Merge pull request #5345 from luotao1/ProtoDataProvider
remove usused ProtoDataProvider related codesrelease/0.11.0
commit
ba86885456
File diff suppressed because it is too large
Load Diff
@ -1,179 +0,0 @@
|
|||||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. */
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "DataFormat.pb.h"
|
|
||||||
#include "paddle/utils/Stat.h"
|
|
||||||
|
|
||||||
#include "DataProvider.h"
|
|
||||||
#include "ProtoReader.h"
|
|
||||||
|
|
||||||
namespace paddle {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Provider data from protobuf data file with each sample
|
|
||||||
* specified by proto message
|
|
||||||
*
|
|
||||||
* DataSample defined in DataFormat.proto.
|
|
||||||
*
|
|
||||||
* The file format is
|
|
||||||
*
|
|
||||||
* header
|
|
||||||
*
|
|
||||||
* sample1
|
|
||||||
*
|
|
||||||
* sample2
|
|
||||||
*
|
|
||||||
* ...
|
|
||||||
*
|
|
||||||
* sampleN
|
|
||||||
*
|
|
||||||
* @note: In the data file, each message is prefixed with its length.
|
|
||||||
* The read/write of the protbuf are implemented in ProtoReader.h
|
|
||||||
*/
|
|
||||||
class ProtoDataProvider : public DataProvider {
|
|
||||||
public:
|
|
||||||
ProtoDataProvider(const DataConfig& config,
|
|
||||||
bool useGpu,
|
|
||||||
bool loadDataAll = true);
|
|
||||||
virtual void reset();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @note this size includes the sequences which are skipped because they
|
|
||||||
* are longer than the batch size.
|
|
||||||
*/
|
|
||||||
virtual int64_t getSize() {
|
|
||||||
int64_t size = sampleNums_;
|
|
||||||
if (usageRatio_ < 1.0f) {
|
|
||||||
size = static_cast<int64_t>(size * usageRatio_);
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
virtual void shuffle();
|
|
||||||
|
|
||||||
void loadData(const std::vector<std::string>& fileList);
|
|
||||||
|
|
||||||
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
/**
|
|
||||||
* @brief load protobuf data from a list of file
|
|
||||||
* @param[in] fileName file name of a file which contains
|
|
||||||
* a list of file names
|
|
||||||
*/
|
|
||||||
void loadData(const std::string& fileName);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief load protobuf data from file
|
|
||||||
* @param[in] fileName data file name
|
|
||||||
*/
|
|
||||||
void loadDataFile(const std::string& fileName);
|
|
||||||
/** @brief check data header of each data sample
|
|
||||||
* @param[in] header data header read from protobuf data
|
|
||||||
*/
|
|
||||||
void checkDataHeader(const DataHeader& header);
|
|
||||||
/**
|
|
||||||
* @brief fill protobuf data into slot_,
|
|
||||||
* slot_ is a vector of ProtoSlot in memory.
|
|
||||||
* @param[in] sample data sample read from protobuf data
|
|
||||||
*/
|
|
||||||
void fillSlots(const DataSample& sample);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief return true if each sample is one sequence, i.e., independent
|
|
||||||
* of other samples.
|
|
||||||
*/
|
|
||||||
inline bool iidData() const { return sequenceStartPositions_.empty(); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief check that sample is consistent with header_
|
|
||||||
*/
|
|
||||||
void checkSample(const DataSample& sample);
|
|
||||||
|
|
||||||
template <class Op>
|
|
||||||
int64_t sequenceLoop(Op op, int64_t size);
|
|
||||||
|
|
||||||
template <class Op>
|
|
||||||
int64_t sampleLoop(Op op, int64_t size);
|
|
||||||
|
|
||||||
template <class Op>
|
|
||||||
int64_t subSampleLoop(Op op, int64_t size, int slot);
|
|
||||||
|
|
||||||
void showDataStats();
|
|
||||||
|
|
||||||
protected:
|
|
||||||
struct ProtoVarSlot {
|
|
||||||
std::vector<real> data;
|
|
||||||
std::vector<int> dims;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ProtoSlot {
|
|
||||||
SlotDef::SlotType type;
|
|
||||||
int dim;
|
|
||||||
std::vector<int> indexData;
|
|
||||||
std::vector<real> denseData;
|
|
||||||
std::vector<sparse_non_value_t> sparseNonValueData;
|
|
||||||
std::vector<sparse_float_value_t> sparseFloatValueData;
|
|
||||||
std::vector<int64_t> indices;
|
|
||||||
std::vector<int64_t> subIndices;
|
|
||||||
|
|
||||||
std::vector<ProtoVarSlot> varDenseData;
|
|
||||||
std::vector<std::vector<int>> varIndices;
|
|
||||||
std::vector<std::string> strData;
|
|
||||||
};
|
|
||||||
DataHeader header_;
|
|
||||||
int numVecSlots_;
|
|
||||||
|
|
||||||
std::vector<ProtoSlot> slots_;
|
|
||||||
size_t sampleNums_;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The starting position of each sequence in samples.
|
|
||||||
* The last element should be num of samples.
|
|
||||||
* If empty, each sample is one sequence.
|
|
||||||
*/
|
|
||||||
std::vector<size_t> sequenceStartPositions_;
|
|
||||||
|
|
||||||
int64_t currentSequenceIndex_;
|
|
||||||
|
|
||||||
// The size should be the number of sequences.
|
|
||||||
std::vector<size_t> shuffledSequenceIds_;
|
|
||||||
|
|
||||||
ThreadLocalD<DataBatch> cpuBatch_;
|
|
||||||
ThreadLocalD<DataBatch> gpuBatch_;
|
|
||||||
|
|
||||||
RWLock lock_;
|
|
||||||
std::vector<StatPtr> nnzStats_; // stats for number of none-zeros entries
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Special use for Proto data: instances should contain sparse-non-value
|
|
||||||
* slots
|
|
||||||
* and label.
|
|
||||||
*
|
|
||||||
* @note ProtoSequenceDataProvider treats each SPARSE SLOT as a SEQUENCE
|
|
||||||
*/
|
|
||||||
class ProtoSequenceDataProvider : public ProtoDataProvider {
|
|
||||||
public:
|
|
||||||
ProtoSequenceDataProvider(const DataConfig& config,
|
|
||||||
bool useGpu,
|
|
||||||
bool loadDataAll = true);
|
|
||||||
~ProtoSequenceDataProvider() {}
|
|
||||||
virtual int64_t getNextBatchInternal(int64_t size, DataBatch* batch);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace paddle
|
|
@ -1,2 +0,0 @@
|
|||||||
./test_ProtoDataProvider/data1.bin
|
|
||||||
./test_ProtoDataProvider/data2.bin
|
|
@ -1,2 +0,0 @@
|
|||||||
./test_ProtoDataProvider/data1.bin.gz
|
|
||||||
./test_ProtoDataProvider/data2.bin.gz
|
|
@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from paddle.trainer_config_helpers import *
|
||||||
|
|
||||||
|
######################## data source ################################
|
||||||
|
dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
|
||||||
|
dict_file = dict()
|
||||||
|
for line_count, line in enumerate(open(dict_path, "r")):
|
||||||
|
dict_file[line.strip()] = line_count
|
||||||
|
|
||||||
|
define_py_data_sources2(
|
||||||
|
train_list='gserver/tests/Sequence/train.list',
|
||||||
|
test_list=None,
|
||||||
|
module='sequenceGen',
|
||||||
|
obj='process',
|
||||||
|
args={"dict_file": dict_file})
|
||||||
|
|
||||||
|
settings(batch_size=5)
|
||||||
|
######################## network configure ################################
|
||||||
|
dict_dim = len(open(dict_path, 'r').readlines())
|
||||||
|
word_dim = 128
|
||||||
|
hidden_dim = 256
|
||||||
|
label_dim = 3
|
||||||
|
sparse_update = get_config_arg("sparse_update", bool, False)
|
||||||
|
|
||||||
|
data = data_layer(name="word", size=dict_dim)
|
||||||
|
|
||||||
|
emb = embedding_layer(
|
||||||
|
input=data,
|
||||||
|
size=word_dim,
|
||||||
|
param_attr=ParamAttr(sparse_update=sparse_update))
|
||||||
|
|
||||||
|
with mixed_layer(size=hidden_dim * 4) as lstm_input:
|
||||||
|
lstm_input += full_matrix_projection(input=emb)
|
||||||
|
|
||||||
|
lstm = lstmemory(
|
||||||
|
input=lstm_input,
|
||||||
|
act=TanhActivation(),
|
||||||
|
gate_act=SigmoidActivation(),
|
||||||
|
state_act=TanhActivation())
|
||||||
|
|
||||||
|
lstm_last = last_seq(input=lstm)
|
||||||
|
|
||||||
|
with mixed_layer(
|
||||||
|
size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
|
||||||
|
output += full_matrix_projection(input=lstm_last)
|
||||||
|
|
||||||
|
outputs(
|
||||||
|
classification_cost(
|
||||||
|
input=output, label=data_layer(
|
||||||
|
name="label", size=1)))
|
@ -0,0 +1,56 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from paddle.trainer_config_helpers import *
|
||||||
|
|
||||||
|
######################## data source ################################
|
||||||
|
dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
|
||||||
|
dict_file = dict()
|
||||||
|
for line_count, line in enumerate(open(dict_path, "r")):
|
||||||
|
dict_file[line.strip()] = line_count
|
||||||
|
|
||||||
|
define_py_data_sources2(
|
||||||
|
train_list='gserver/tests/Sequence/train.list',
|
||||||
|
test_list=None,
|
||||||
|
module='sequenceGen',
|
||||||
|
obj='process',
|
||||||
|
args={"dict_file": dict_file})
|
||||||
|
|
||||||
|
settings(batch_size=5)
|
||||||
|
######################## network configure ################################
|
||||||
|
dict_dim = len(open(dict_path, 'r').readlines())
|
||||||
|
word_dim = 128
|
||||||
|
hidden_dim = 128
|
||||||
|
label_dim = 3
|
||||||
|
|
||||||
|
# This config is designed to be equivalent with sequence_recurrent_group.py
|
||||||
|
|
||||||
|
data = data_layer(name="word", size=dict_dim)
|
||||||
|
|
||||||
|
emb = embedding_layer(
|
||||||
|
input=data, size=word_dim, param_attr=ParamAttr(name="emb"))
|
||||||
|
|
||||||
|
recurrent = recurrent_layer(input=emb, bias_attr=False, act=SoftmaxActivation())
|
||||||
|
|
||||||
|
recurrent_last = last_seq(input=recurrent)
|
||||||
|
|
||||||
|
with mixed_layer(
|
||||||
|
size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
|
||||||
|
output += full_matrix_projection(input=recurrent_last)
|
||||||
|
|
||||||
|
outputs(
|
||||||
|
classification_cost(
|
||||||
|
input=output, label=data_layer(
|
||||||
|
name="label", size=1)))
|
@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from paddle.trainer_config_helpers import *
|
||||||
|
|
||||||
|
######################## data source ################################
|
||||||
|
dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
|
||||||
|
dict_file = dict()
|
||||||
|
for line_count, line in enumerate(open(dict_path, "r")):
|
||||||
|
dict_file[line.strip()] = line_count
|
||||||
|
|
||||||
|
define_py_data_sources2(
|
||||||
|
train_list='gserver/tests/Sequence/train.list',
|
||||||
|
test_list=None,
|
||||||
|
module='sequenceGen',
|
||||||
|
obj='process',
|
||||||
|
args={"dict_file": dict_file})
|
||||||
|
|
||||||
|
settings(batch_size=5)
|
||||||
|
######################## network configure ################################
|
||||||
|
dict_dim = len(open(dict_path, 'r').readlines())
|
||||||
|
word_dim = 128
|
||||||
|
hidden_dim = 128
|
||||||
|
label_dim = 3
|
||||||
|
|
||||||
|
# This config is designed to be equivalent with sequence_recurrent.py
|
||||||
|
|
||||||
|
data = data_layer(name="word", size=dict_dim)
|
||||||
|
|
||||||
|
emb = embedding_layer(
|
||||||
|
input=data, size=word_dim, param_attr=ParamAttr(name="emb"))
|
||||||
|
|
||||||
|
|
||||||
|
def step(y):
|
||||||
|
mem = memory(name="rnn_state", size=hidden_dim)
|
||||||
|
with mixed_layer(
|
||||||
|
name="rnn_state",
|
||||||
|
size=hidden_dim,
|
||||||
|
bias_attr=False,
|
||||||
|
act=SoftmaxActivation()) as out:
|
||||||
|
out += identity_projection(input=y)
|
||||||
|
out += full_matrix_projection(
|
||||||
|
input=mem, param_attr=ParamAttr(name="___recurrent_layer_0__"))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
recurrent = recurrent_group(name="rnn", step=step, input=emb)
|
||||||
|
|
||||||
|
recurrent_last = last_seq(input=recurrent)
|
||||||
|
|
||||||
|
with mixed_layer(
|
||||||
|
size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
|
||||||
|
output += full_matrix_projection(input=recurrent_last)
|
||||||
|
|
||||||
|
outputs(
|
||||||
|
classification_cost(
|
||||||
|
input=output, label=data_layer(
|
||||||
|
name="label", size=1)))
|
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
|||||||
trainer/tests/mnist_bin_part
|
|
Binary file not shown.
Binary file not shown.
@ -1 +0,0 @@
|
|||||||
./trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.proto_data
|
|
@ -1,154 +0,0 @@
|
|||||||
#edit-mode: -*- python -*-
|
|
||||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
|
|
||||||
|
|
||||||
# Note: when making change to this file, please make sure
|
|
||||||
# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
|
|
||||||
# for comparing these two nets can pass (test_CompareTwoNets)
|
|
||||||
|
|
||||||
default_initial_std(0.1)
|
|
||||||
default_device(0)
|
|
||||||
|
|
||||||
word_dim = 999
|
|
||||||
l1 = 0
|
|
||||||
l2 = 0
|
|
||||||
|
|
||||||
model_type("nn")
|
|
||||||
|
|
||||||
sparse_update = get_config_arg("sparse_update", bool, False)
|
|
||||||
|
|
||||||
TrainData(ProtoData(
|
|
||||||
type = "proto_sequence",
|
|
||||||
files = ('trainer/tests/train_sparse.list'),
|
|
||||||
))
|
|
||||||
|
|
||||||
Settings(
|
|
||||||
algorithm='sgd',
|
|
||||||
batch_size=100,
|
|
||||||
learning_rate=0.0001,
|
|
||||||
learning_rate_decay_a=4e-08,
|
|
||||||
learning_rate_decay_b=0.0,
|
|
||||||
learning_rate_schedule='poly',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
wordvec_dim = 32
|
|
||||||
layer2_dim = 16
|
|
||||||
layer3_dim = 16
|
|
||||||
hidden_dim = 32
|
|
||||||
|
|
||||||
slot_names = ["qb", "qw", "tb", "tw"]
|
|
||||||
|
|
||||||
def ltr_network(network_name,
|
|
||||||
word_dim=word_dim,
|
|
||||||
wordvec_dim=wordvec_dim,
|
|
||||||
layer2_dim=layer2_dim,
|
|
||||||
layer3_dim=layer3_dim,
|
|
||||||
hidden_dim=hidden_dim,
|
|
||||||
slot_names=slot_names,
|
|
||||||
l1=l1,
|
|
||||||
l2=l2):
|
|
||||||
|
|
||||||
slotnum = len(slot_names)
|
|
||||||
for i in xrange(slotnum):
|
|
||||||
Inputs(slot_names[i] + network_name)
|
|
||||||
for i in xrange(slotnum):
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + network_name,
|
|
||||||
type = "data",
|
|
||||||
size = word_dim,
|
|
||||||
device = -1,
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_embedding_" + network_name,
|
|
||||||
type = "mixed",
|
|
||||||
size = wordvec_dim,
|
|
||||||
bias = False,
|
|
||||||
device = -1,
|
|
||||||
inputs = TableProjection(slot_names[i] + network_name,
|
|
||||||
parameter_name = "embedding.w0",
|
|
||||||
decay_rate_l1=l1,
|
|
||||||
sparse_remote_update = True,
|
|
||||||
sparse_update = sparse_update,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_rnn1_" + network_name,
|
|
||||||
type = "recurrent",
|
|
||||||
active_type = "tanh",
|
|
||||||
bias = Bias(initial_std = 0,
|
|
||||||
parameter_name = "rnn1.bias"),
|
|
||||||
inputs = Input(slot_names[i] + "_embedding_" + network_name,
|
|
||||||
parameter_name = "rnn1.w0")
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_rnnlast_" + network_name,
|
|
||||||
type = "seqlastins",
|
|
||||||
inputs = [
|
|
||||||
slot_names[i] + "_rnn1_" + network_name,
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
Layer(
|
|
||||||
name = "layer2_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
active_type = "tanh",
|
|
||||||
size = layer2_dim,
|
|
||||||
bias = Bias(parameter_name = "layer2.bias"),
|
|
||||||
inputs = [Input(slot_name + "_rnnlast_" + network_name,
|
|
||||||
parameter_name = "_layer2_" + slot_name + ".w",
|
|
||||||
decay_rate = l2,
|
|
||||||
initial_smart = True) for slot_name in slot_names]
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = "layer3_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
active_type = "tanh",
|
|
||||||
size = layer3_dim,
|
|
||||||
bias = Bias(parameter_name = "layer3.bias"),
|
|
||||||
inputs = [
|
|
||||||
Input("layer2_" + network_name,
|
|
||||||
parameter_name = "_layer3.w",
|
|
||||||
decay_rate = l2,
|
|
||||||
initial_smart = True),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = "output_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
size = 1,
|
|
||||||
bias = False,
|
|
||||||
inputs = [
|
|
||||||
Input("layer3_" + network_name,
|
|
||||||
parameter_name = "_layerO.w"),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
ltr_network("left")
|
|
||||||
ltr_network("right")
|
|
||||||
Inputs("label")
|
|
||||||
Layer(
|
|
||||||
name = "label",
|
|
||||||
type = "data",
|
|
||||||
size = 1,
|
|
||||||
)
|
|
||||||
Outputs("cost", "qb_rnnlast_left")
|
|
||||||
Layer(
|
|
||||||
name = "cost",
|
|
||||||
type = "rank-cost",
|
|
||||||
inputs = ["output_left", "output_right", "label"],
|
|
||||||
)
|
|
@ -1,40 +0,0 @@
|
|||||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
from paddle.trainer_config_helpers import *
|
|
||||||
|
|
||||||
################################### Data Configuration ###################################
|
|
||||||
TrainData(ProtoData(files = "trainer/tests/mnist.list"))
|
|
||||||
################################### Algorithm Configuration ###################################
|
|
||||||
settings(batch_size = 1000,
|
|
||||||
learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
|
|
||||||
################################### Network Configuration ###################################
|
|
||||||
data = data_layer(name ="input", size=784)
|
|
||||||
|
|
||||||
fc1 = fc_layer(input=data, size=800,
|
|
||||||
bias_attr=True,
|
|
||||||
act=SigmoidActivation())
|
|
||||||
|
|
||||||
fc2 = fc_layer(input=fc1, size=800,
|
|
||||||
bias_attr=True,
|
|
||||||
act=SigmoidActivation())
|
|
||||||
|
|
||||||
output = fc_layer(input=[fc1, fc2], size=10,
|
|
||||||
bias_attr=True,
|
|
||||||
act=SoftmaxActivation())
|
|
||||||
|
|
||||||
lbl = data_layer(name ="label", size=1)
|
|
||||||
|
|
||||||
cost = classification_cost(input=output, label=lbl)
|
|
||||||
outputs(cost)
|
|
@ -1,40 +0,0 @@
|
|||||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
from paddle.trainer_config_helpers import *
|
|
||||||
|
|
||||||
################################### Data Configuration ###################################
|
|
||||||
TrainData(ProtoData(files = "trainer/tests/mnist.list"))
|
|
||||||
################################### Algorithm Configuration ###################################
|
|
||||||
settings(batch_size = 1000,
|
|
||||||
learning_method = MomentumOptimizer(momentum=0.5, sparse=False))
|
|
||||||
################################### Network Configuration ###################################
|
|
||||||
data = data_layer(name ="input", size=784)
|
|
||||||
|
|
||||||
fc1 = fc_layer(input=data, size=800,
|
|
||||||
bias_attr=True,
|
|
||||||
act=SigmoidActivation())
|
|
||||||
|
|
||||||
fc2 = fc_layer(input=fc1, size=800,
|
|
||||||
bias_attr=True,
|
|
||||||
act=SigmoidActivation())
|
|
||||||
|
|
||||||
output = fc_layer(input=[fc1, fc2], size=10,
|
|
||||||
bias_attr=True,
|
|
||||||
act=SoftmaxActivation())
|
|
||||||
|
|
||||||
lbl = data_layer(name ="label", size=1)
|
|
||||||
|
|
||||||
cost = classification_cost(input=output, label=lbl)
|
|
||||||
outputs(cost)
|
|
@ -1,154 +0,0 @@
|
|||||||
#edit-mode: -*- python -*-
|
|
||||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
|
|
||||||
|
|
||||||
# Note: when making change to this file, please make sure
|
|
||||||
# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
|
|
||||||
# for comparing these two nets can pass (test_CompareTwoNets)
|
|
||||||
|
|
||||||
default_initial_std(0.1)
|
|
||||||
default_device(0)
|
|
||||||
|
|
||||||
word_dim = 1451594
|
|
||||||
l1 = 0
|
|
||||||
l2 = 0
|
|
||||||
|
|
||||||
model_type("nn")
|
|
||||||
|
|
||||||
sparse_update = get_config_arg("sparse_update", bool, False)
|
|
||||||
|
|
||||||
TrainData(ProtoData(
|
|
||||||
type = "proto_sequence",
|
|
||||||
files = ('trainer/tests/train.list'),
|
|
||||||
))
|
|
||||||
|
|
||||||
Settings(
|
|
||||||
algorithm='sgd',
|
|
||||||
batch_size=100,
|
|
||||||
learning_rate=0.0001,
|
|
||||||
learning_rate_decay_a=4e-08,
|
|
||||||
learning_rate_decay_b=0.0,
|
|
||||||
learning_rate_schedule='poly',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
wordvec_dim = 128
|
|
||||||
layer2_dim = 96
|
|
||||||
layer3_dim = 96
|
|
||||||
hidden_dim = 128
|
|
||||||
|
|
||||||
slot_names = ["qb", "qw", "tb", "tw"]
|
|
||||||
|
|
||||||
def ltr_network(network_name,
|
|
||||||
word_dim=word_dim,
|
|
||||||
wordvec_dim=wordvec_dim,
|
|
||||||
layer2_dim=layer2_dim,
|
|
||||||
layer3_dim=layer3_dim,
|
|
||||||
hidden_dim=hidden_dim,
|
|
||||||
slot_names=slot_names,
|
|
||||||
l1=l1,
|
|
||||||
l2=l2):
|
|
||||||
|
|
||||||
slotnum = len(slot_names)
|
|
||||||
for i in xrange(slotnum):
|
|
||||||
Inputs(slot_names[i] + network_name)
|
|
||||||
for i in xrange(slotnum):
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + network_name,
|
|
||||||
type = "data",
|
|
||||||
size = word_dim,
|
|
||||||
device = -1,
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_embedding_" + network_name,
|
|
||||||
type = "mixed",
|
|
||||||
size = wordvec_dim,
|
|
||||||
bias = False,
|
|
||||||
device = -1,
|
|
||||||
inputs = TableProjection(slot_names[i] + network_name,
|
|
||||||
parameter_name = "embedding.w0",
|
|
||||||
decay_rate_l1=l1,
|
|
||||||
sparse_remote_update = True,
|
|
||||||
sparse_update = sparse_update,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_rnn1_" + network_name,
|
|
||||||
type = "recurrent",
|
|
||||||
active_type = "tanh",
|
|
||||||
bias = Bias(initial_std = 0,
|
|
||||||
parameter_name = "rnn1.bias"),
|
|
||||||
inputs = Input(slot_names[i] + "_embedding_" + network_name,
|
|
||||||
parameter_name = "rnn1.w0")
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_rnnlast_" + network_name,
|
|
||||||
type = "seqlastins",
|
|
||||||
inputs = [
|
|
||||||
slot_names[i] + "_rnn1_" + network_name,
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
Layer(
|
|
||||||
name = "layer2_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
active_type = "tanh",
|
|
||||||
size = layer2_dim,
|
|
||||||
bias = Bias(parameter_name = "layer2.bias"),
|
|
||||||
inputs = [Input(slot_name + "_rnnlast_" + network_name,
|
|
||||||
parameter_name = "_layer2_" + slot_name + ".w",
|
|
||||||
decay_rate = l2,
|
|
||||||
initial_smart = True) for slot_name in slot_names]
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = "layer3_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
active_type = "tanh",
|
|
||||||
size = layer3_dim,
|
|
||||||
bias = Bias(parameter_name = "layer3.bias"),
|
|
||||||
inputs = [
|
|
||||||
Input("layer2_" + network_name,
|
|
||||||
parameter_name = "_layer3.w",
|
|
||||||
decay_rate = l2,
|
|
||||||
initial_smart = True),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = "output_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
size = 1,
|
|
||||||
bias = False,
|
|
||||||
inputs = [
|
|
||||||
Input("layer3_" + network_name,
|
|
||||||
parameter_name = "_layerO.w"),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
ltr_network("left")
|
|
||||||
ltr_network("right")
|
|
||||||
Inputs("label")
|
|
||||||
Layer(
|
|
||||||
name = "label",
|
|
||||||
type = "data",
|
|
||||||
size = 1,
|
|
||||||
)
|
|
||||||
Outputs("cost", "qb_rnnlast_left")
|
|
||||||
Layer(
|
|
||||||
name = "cost",
|
|
||||||
type = "rank-cost",
|
|
||||||
inputs = ["output_left", "output_right", "label"],
|
|
||||||
)
|
|
@ -1,180 +0,0 @@
|
|||||||
#edit-mode: -*- python -*-
|
|
||||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
|
|
||||||
|
|
||||||
# Note: when making change to this file, please make sure
|
|
||||||
# sample_trainer_config_qb_rnn.conf is changed accordingly so that the uniitest
|
|
||||||
# for comparing these two nets can pass (test_CompareTwoNets)
|
|
||||||
|
|
||||||
default_initial_std(0.1)
|
|
||||||
default_device(0)
|
|
||||||
|
|
||||||
word_dim = 1451594
|
|
||||||
l1 = 0
|
|
||||||
l2 = 0
|
|
||||||
|
|
||||||
model_type("recurrent_nn")
|
|
||||||
|
|
||||||
sparse_update = get_config_arg("sparse_update", bool, False)
|
|
||||||
|
|
||||||
TrainData(ProtoData(
|
|
||||||
type = "proto_sequence",
|
|
||||||
files = ('trainer/tests/train.list'),
|
|
||||||
))
|
|
||||||
|
|
||||||
Settings(
|
|
||||||
algorithm='sgd',
|
|
||||||
batch_size=100,
|
|
||||||
learning_rate=0.0001,
|
|
||||||
learning_rate_decay_a=4e-08,
|
|
||||||
learning_rate_decay_b=0.0,
|
|
||||||
learning_rate_schedule='poly',
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
wordvec_dim = 128
|
|
||||||
layer2_dim = 96
|
|
||||||
layer3_dim = 96
|
|
||||||
hidden_dim = 128
|
|
||||||
|
|
||||||
slot_names = ["qb", "qw", "tb", "tw"]
|
|
||||||
|
|
||||||
def SimpleRecurrentLayer(name,
|
|
||||||
size,
|
|
||||||
active_type,
|
|
||||||
bias,
|
|
||||||
input_layer_name,
|
|
||||||
parameter_name,
|
|
||||||
seq_reversed = False):
|
|
||||||
RecurrentLayerGroupBegin(name + "_layer_group",
|
|
||||||
in_links=[input_layer_name],
|
|
||||||
out_links=[name],
|
|
||||||
seq_reversed=seq_reversed)
|
|
||||||
memory_name = Memory(name=name, size=size)
|
|
||||||
Layer(
|
|
||||||
name = name,
|
|
||||||
type = "mixed",
|
|
||||||
size = size,
|
|
||||||
active_type = active_type,
|
|
||||||
bias = bias,
|
|
||||||
inputs = [IdentityProjection(input_layer_name),
|
|
||||||
FullMatrixProjection(memory_name,
|
|
||||||
parameter_name = parameter_name,
|
|
||||||
),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
RecurrentLayerGroupEnd(name + "_layer_group")
|
|
||||||
|
|
||||||
|
|
||||||
def ltr_network(network_name,
|
|
||||||
word_dim=word_dim,
|
|
||||||
wordvec_dim=wordvec_dim,
|
|
||||||
layer2_dim=layer2_dim,
|
|
||||||
layer3_dim=layer3_dim,
|
|
||||||
hidden_dim=hidden_dim,
|
|
||||||
slot_names=slot_names,
|
|
||||||
l1=l1,
|
|
||||||
l2=l2):
|
|
||||||
|
|
||||||
slotnum = len(slot_names)
|
|
||||||
for i in xrange(slotnum):
|
|
||||||
Inputs(slot_names[i] + network_name)
|
|
||||||
for i in xrange(slotnum):
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + network_name,
|
|
||||||
type = "data",
|
|
||||||
size = word_dim,
|
|
||||||
device = -1,
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_embedding_" + network_name,
|
|
||||||
type = "mixed",
|
|
||||||
size = wordvec_dim,
|
|
||||||
bias = False,
|
|
||||||
device = -1,
|
|
||||||
inputs = TableProjection(slot_names[i] + network_name,
|
|
||||||
parameter_name = "embedding.w0",
|
|
||||||
decay_rate_l1=l1,
|
|
||||||
sparse_remote_update = True,
|
|
||||||
sparse_update = sparse_update,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
SimpleRecurrentLayer(
|
|
||||||
name = slot_names[i] + "_rnn1_" + network_name,
|
|
||||||
size = hidden_dim,
|
|
||||||
active_type = "tanh",
|
|
||||||
bias = Bias(initial_std = 0,
|
|
||||||
parameter_name = "rnn1.bias"),
|
|
||||||
input_layer_name = slot_names[i] + "_embedding_" + network_name,
|
|
||||||
parameter_name = "rnn1.w0",
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = slot_names[i] + "_rnnlast_" + network_name,
|
|
||||||
type = "seqlastins",
|
|
||||||
inputs = [
|
|
||||||
slot_names[i] + "_rnn1_" + network_name,
|
|
||||||
],
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = "layer2_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
active_type = "tanh",
|
|
||||||
size = layer2_dim,
|
|
||||||
bias = Bias(parameter_name = "layer2.bias"),
|
|
||||||
inputs = [Input(slot_name + "_rnnlast_" + network_name,
|
|
||||||
parameter_name = "_layer2_" + slot_name + ".w",
|
|
||||||
decay_rate = l2,
|
|
||||||
initial_smart = True) for slot_name in slot_names]
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = "layer3_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
active_type = "tanh",
|
|
||||||
size = layer3_dim,
|
|
||||||
bias = Bias(parameter_name = "layer3.bias"),
|
|
||||||
inputs = [
|
|
||||||
Input("layer2_" + network_name,
|
|
||||||
parameter_name = "_layer3.w",
|
|
||||||
decay_rate = l2,
|
|
||||||
initial_smart = True),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
Layer(
|
|
||||||
name = "output_" + network_name,
|
|
||||||
type = "fc",
|
|
||||||
size = 1,
|
|
||||||
bias = False,
|
|
||||||
inputs = [
|
|
||||||
Input("layer3_" + network_name,
|
|
||||||
parameter_name = "_layerO.w"),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
ltr_network("left")
|
|
||||||
ltr_network("right")
|
|
||||||
Inputs("label")
|
|
||||||
Layer(
|
|
||||||
name = "label",
|
|
||||||
type = "data",
|
|
||||||
size = 1,
|
|
||||||
)
|
|
||||||
Outputs("cost", "qb_rnnlast_left")
|
|
||||||
Layer(
|
|
||||||
name = "cost",
|
|
||||||
type = "rank-cost",
|
|
||||||
inputs = ["output_left", "output_right", "label"],
|
|
||||||
)
|
|
@ -1,184 +0,0 @@
|
|||||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. */
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
#include <paddle/utils/PythonUtil.h>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cstdlib>
|
|
||||||
|
|
||||||
#include "paddle/trainer/Trainer.h"
|
|
||||||
|
|
||||||
using namespace paddle; // NOLINT
|
|
||||||
using namespace std; // NOLINT
|
|
||||||
|
|
||||||
DECLARE_int32(gpu_id);
|
|
||||||
|
|
||||||
DECLARE_bool(local);
|
|
||||||
DECLARE_bool(use_gpu);
|
|
||||||
|
|
||||||
DECLARE_string(config);
|
|
||||||
DECLARE_string(nics);
|
|
||||||
|
|
||||||
DEFINE_string(config_file_a, "", "config of one network to compare");
|
|
||||||
DEFINE_string(config_file_b, "", "config of another network to compare");
|
|
||||||
DEFINE_bool(need_high_accuracy,
|
|
||||||
true,
|
|
||||||
"whether need to run in double accuracy (recommended)");
|
|
||||||
DEFINE_double(
|
|
||||||
max_diff_ratio,
|
|
||||||
0.0f,
|
|
||||||
"max diff ratio allowed for outputs and parameters (value/gradient)");
|
|
||||||
|
|
||||||
struct ComData {
|
|
||||||
vector<Argument> outArgs;
|
|
||||||
vector<ParameterPtr> parameters;
|
|
||||||
};
|
|
||||||
|
|
||||||
void calcGradient(ComData& data, const string configFile) {
|
|
||||||
FLAGS_config = configFile;
|
|
||||||
|
|
||||||
FLAGS_local = true;
|
|
||||||
FLAGS_use_gpu = false;
|
|
||||||
|
|
||||||
FLAGS_nics = "";
|
|
||||||
|
|
||||||
*ThreadLocalRand::getSeed() = 0;
|
|
||||||
srand(0);
|
|
||||||
|
|
||||||
Trainer trainer;
|
|
||||||
trainer.init(TrainerConfigHelper::createFromFlagConfig(), false);
|
|
||||||
|
|
||||||
data.parameters = trainer.getGradientMachine()->getParameters();
|
|
||||||
trainer.getDataProvider()->setSkipShuffle();
|
|
||||||
trainer.train();
|
|
||||||
}
|
|
||||||
|
|
||||||
void checkBuffer(real* A,
|
|
||||||
const char* desA,
|
|
||||||
real* B,
|
|
||||||
const char* desB,
|
|
||||||
size_t len,
|
|
||||||
size_t width = 1) {
|
|
||||||
int nNum = 0;
|
|
||||||
for (size_t i = 0; i < len; ++i) {
|
|
||||||
real diff = fabs(A[i] - B[i]);
|
|
||||||
if (diff > 0.0f &&
|
|
||||||
diff / std::max(fabs(A[i]), fabs(B[i])) > FLAGS_max_diff_ratio) {
|
|
||||||
nNum++;
|
|
||||||
LOG(INFO) << "Row: " << i / width << ", " << desA << " : " << A[i]
|
|
||||||
<< " " << desB << " : " << B[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
EXPECT_EQ(0, nNum);
|
|
||||||
LOG(INFO) << "\n\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
void compareGradient(ComData& comDataA, ComData& comDataB) {
|
|
||||||
vector<Argument> outArgsA = comDataA.outArgs;
|
|
||||||
vector<Argument> outArgsB = comDataB.outArgs;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < outArgsA.size(); ++i) {
|
|
||||||
CpuMatrix matA(outArgsA[i].value->getHeight(),
|
|
||||||
outArgsA[i].value->getWidth());
|
|
||||||
CpuMatrix matB(outArgsB[i].value->getHeight(),
|
|
||||||
outArgsB[i].value->getWidth());
|
|
||||||
|
|
||||||
matA.copyFrom(*outArgsA[i].value);
|
|
||||||
matB.copyFrom(*outArgsB[i].value);
|
|
||||||
|
|
||||||
LOG(INFO) << "\n--------------------------------"
|
|
||||||
<< " Check Network Output_" << i << ":"
|
|
||||||
<< " -------------------------------------\n";
|
|
||||||
checkBuffer(matA.getData(),
|
|
||||||
"network A output",
|
|
||||||
matB.getData(),
|
|
||||||
"network B output",
|
|
||||||
matA.getElementCnt(),
|
|
||||||
matA.getWidth());
|
|
||||||
}
|
|
||||||
|
|
||||||
vector<ParameterPtr>& parametersA = comDataA.parameters;
|
|
||||||
vector<ParameterPtr>& parametersB = comDataB.parameters;
|
|
||||||
|
|
||||||
LOG(INFO) << "\n\n--------------------------------"
|
|
||||||
<< " Check Gradient Machine Parameters:"
|
|
||||||
<< " -------------------------------------\n";
|
|
||||||
for (size_t i = 0; i < parametersA.size(); ++i) {
|
|
||||||
ParameterPtr parameterA, parameterB;
|
|
||||||
parameterA = parametersA[i];
|
|
||||||
parameterB = parametersB[i];
|
|
||||||
|
|
||||||
CpuVector paraA(parameterA->getSize());
|
|
||||||
CpuVector paraB(parameterB->getSize());
|
|
||||||
paraA.copyFrom(*parameterA->getBuf(PARAMETER_VALUE));
|
|
||||||
paraB.copyFrom(*parameterB->getBuf(PARAMETER_VALUE));
|
|
||||||
|
|
||||||
LOG(INFO) << "\n\n----------- PARAMETER_VALUE: " << parameterA->getName()
|
|
||||||
<< " ; size : " << paraA.getSize() << " ------------";
|
|
||||||
checkBuffer(paraA.getData(),
|
|
||||||
"Network A",
|
|
||||||
paraB.getData(),
|
|
||||||
"Network B",
|
|
||||||
paraA.getSize());
|
|
||||||
|
|
||||||
CpuVector gradA(*parameterA->getBuf(PARAMETER_GRADIENT));
|
|
||||||
CpuVector gradB(*parameterB->getBuf(PARAMETER_GRADIENT));
|
|
||||||
|
|
||||||
LOG(INFO) << "\n\n----------- PARAMETER_GRADIENT: " << parameterA->getName()
|
|
||||||
<< " ; size : " << gradA.getSize() << " -----------";
|
|
||||||
checkBuffer(gradA.getData(),
|
|
||||||
"Network A",
|
|
||||||
gradB.getData(),
|
|
||||||
"Network B",
|
|
||||||
gradA.getSize());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(Trainer, create) {
|
|
||||||
ComData dataA;
|
|
||||||
calcGradient(dataA, FLAGS_config_file_a);
|
|
||||||
LOG(INFO) << "\n\ntraining of Network A is finished\n\n";
|
|
||||||
|
|
||||||
ComData dataB;
|
|
||||||
calcGradient(dataB, FLAGS_config_file_b);
|
|
||||||
LOG(INFO) << "\n\ntraining of the Network B is finished\n\n";
|
|
||||||
|
|
||||||
compareGradient(dataA, dataB);
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
paddle::initMain(argc, argv);
|
|
||||||
testing::InitGoogleTest(&argc, argv);
|
|
||||||
initPython(argc, argv);
|
|
||||||
|
|
||||||
#ifndef PADDLE_TYPE_DOUBLE
|
|
||||||
if (FLAGS_need_high_accuracy) {
|
|
||||||
LOG(INFO) << "skip test due to it's need high accuracy";
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (FLAGS_max_diff_ratio == 0.0f) {
|
|
||||||
FLAGS_max_diff_ratio = 2e-4;
|
|
||||||
LOG(INFO) << "auto set max_diff_ratio " << FLAGS_max_diff_ratio
|
|
||||||
<< " in low accuracy mode";
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
if (FLAGS_max_diff_ratio == 0.0f) {
|
|
||||||
FLAGS_max_diff_ratio = 2e-7;
|
|
||||||
LOG(INFO) << "auto set max_diff_ratio " << FLAGS_max_diff_ratio
|
|
||||||
<< " in high accuracy mode";
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
int ret = RUN_ALL_TESTS();
|
|
||||||
return ret;
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue