Merge pull request #5775 from luotao1/ProtoData

remove ProtoData
release/0.11.0
Tao Luo 8 years ago committed by GitHub
commit 3c4d4065ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -11,7 +11,6 @@ add_unittest_without_exec(test_Trainer
test_Trainer.cpp)
add_test(NAME test_Trainer
COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/gen_proto_data.py &&
${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
${CMAKE_CURRENT_BINARY_DIR}/test_Trainer
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)

@ -1,125 +0,0 @@
#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
TrainData(ProtoData(
files = 'trainer/tests/train_files.txt',
usage_ratio = 1.0,
))
TestData(ProtoData(
files = 'trainer/tests/test_files.txt'
))
default_initial_std(1)
default_decay_rate(4e-4)
default_device(0)
Inputs("features", "word", "pos", "chunk")
Outputs("crf")
Layer(
name = "features",
type = "data",
size = 4339,
)
Layer(
name = "word",
type = "data",
size = 478,
)
Layer(
name = "pos",
type = "data",
size = 45
)
Layer(
name = "chunk",
type = "data",
size = 23
)
Layer(
name = "output",
type = "mixed",
size = 23,
bias = False,
device = -1,
inputs = [
FullMatrixProjection("features", parameter_name="feature_weights"),
# TableProjection("word"),
# TableProjection("pos"),
],
)
Layer(
name = "crf",
type = "crf",
size = 23,
device = -1,
inputs = [
Input("output", parameter_name="crfw"),
"chunk"
]
)
Layer(
name = "crf_decoding",
type = "crf_decoding",
size = 23,
device = -1,
inputs = [
Input("output", parameter_name="crfw"),
"chunk"
]
)
Evaluator(
name = "error",
type = "sum",
inputs = "crf_decoding",
)
'''
# chuck evaluator cannot be used for GPU training
Evaluator(
name = "chunk_f1",
type = "chunk",
inputs = ["crf_decoding", "chunk"],
chunk_scheme = "IOB",
num_chunk_types = 11,
)
'''
Settings(
algorithm = 'sgd',
batch_size = 100,
average_window = 0.5,
max_average_window = 2500,
learning_rate = 1e-1,
learning_rate_decay_a = 5e-7,
learning_rate_decay_b = 0.75,
l1weight = 0,
l2weight = 1,
c1 = 0.0001,
backoff = 0.5,
owlqn_steps = 100,
max_backoff = 5,
)

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -24,7 +24,6 @@ using namespace std; // NOLINT
static const string& configFile1 = "trainer/tests/sample_trainer_config.conf";
static const string& configFile2 =
"trainer/tests/sample_trainer_config_hsigmoid.conf";
static const string& configFile3 = "trainer/tests/chunking.conf";
static const string& configFile4 =
"trainer/tests/sample_trainer_config_parallel.conf";
@ -95,13 +94,6 @@ TEST(checkGradient, multi) {
TEST(checkGradient, hsigmoid) { checkGradientTest(configFile2, false, false); }
TEST(checkGradient, chunk) {
checkGradientTest(configFile3, false, false);
#ifdef PADDLE_WITH_CUDA
checkGradientTest(configFile3, true, true);
#endif
}
TEST(checkGradient, non_parallel) {
checkGradientTest(configFile4, false, false);
}

@ -15,12 +15,7 @@
from paddle.trainer_config_helpers import *
TrainData(ProtoData(
files = "dummy_list",
constant_slots = [1.0],
async_load_data = True))
TestData(SimpleData(
TrainData(SimpleData(
files = "trainer/tests/sample_filelist.txt",
feat_dim = 3,
context_len = 0,

@ -1 +0,0 @@
trainer/tests/test_proto.bin

@ -1 +0,0 @@
trainer/tests/data_bin_part

File diff suppressed because it is too large Load Diff

@ -1 +0,0 @@
trainer/tests/train_proto.bin

@ -1 +0,0 @@
trainer/tests/compare_sparse_data

@ -1116,35 +1116,6 @@ def PyData(files=None,
return data_config
@config_func
def ProtoData(files=None,
type=None,
file_group_queue_capacity=None,
load_file_count=None,
constant_slots=None,
load_thread_num=None,
**xargs):
data_config = create_data_config_proto(**xargs)
if type is None:
data_config.type = 'proto'
else:
data_config.type = type
data_config.files = files
# When type="proto_group", one data provider contains at most
# load_file_count files, and there are at most
# (queue_capacity + load_thread_num + 1) data providers in memory
if file_group_queue_capacity is not None:
data_config.file_group_conf.queue_capacity = file_group_queue_capacity
if load_file_count is not None:
data_config.file_group_conf.load_file_count = load_file_count
if load_thread_num is not None:
data_config.file_group_conf.load_thread_num = load_thread_num
if constant_slots:
data_config.constant_slots.extend(constant_slots)
return data_config
#real data for training is actually provided by "sub_data" data providers.
@config_func
def MultiData(sub_data=[]):
@ -2714,7 +2685,7 @@ Usage:
max_sort_size = -1, inputs = ["output", "score"])
Input data: Samples of the same query should be loaded as a sequence,
by ProtoDataProvider or PyDataProvider etc.. User should provide
by PyDataProvider etc.. User should provide
scores for each sample. The score slot should be the 2nd
input of lambdaRank layer.

Loading…
Cancel
Save