You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							155 lines
						
					
					
						
							4.5 KiB
						
					
					
				
			
		
		
	
	
							155 lines
						
					
					
						
							4.5 KiB
						
					
					
				#edit-mode: -*- python -*-
 | 
						|
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 | 
						|
#
 | 
						|
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
# you may not use this file except in compliance with the License.
 | 
						|
# You may obtain a copy of the License at
 | 
						|
#
 | 
						|
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
#
 | 
						|
# Unless required by applicable law or agreed to in writing, software
 | 
						|
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
# See the License for the specific language governing permissions and
 | 
						|
# limitations under the License.
 | 
						|
 | 
						|
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
 | 
						|
 | 
						|
# Note: when making change to this file, please make sure
 | 
						|
# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
 | 
						|
# for comparing these two nets can pass (test_CompareTwoNets)
 | 
						|
 | 
						|
default_initial_std(0.1)
 | 
						|
default_device(0)
 | 
						|
 | 
						|
word_dim = 1451594
 | 
						|
l1 = 0
 | 
						|
l2 = 0
 | 
						|
 | 
						|
model_type("nn")
 | 
						|
 | 
						|
sparse_update = get_config_arg("sparse_update", bool, False)
 | 
						|
 | 
						|
TrainData(ProtoData(        
 | 
						|
            type = "proto_sequence",
 | 
						|
            files = ('trainer/tests/train.list'), 
 | 
						|
            ))
 | 
						|
 | 
						|
Settings(
 | 
						|
    algorithm='sgd',
 | 
						|
    batch_size=100,
 | 
						|
    learning_rate=0.0001,
 | 
						|
    learning_rate_decay_a=4e-08,
 | 
						|
    learning_rate_decay_b=0.0,
 | 
						|
    learning_rate_schedule='poly',
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
wordvec_dim = 128
 | 
						|
layer2_dim = 96
 | 
						|
layer3_dim = 96
 | 
						|
hidden_dim = 128
 | 
						|
 | 
						|
slot_names = ["qb", "qw", "tb", "tw"]
 | 
						|
 | 
						|
def ltr_network(network_name,
 | 
						|
                word_dim=word_dim,
 | 
						|
                wordvec_dim=wordvec_dim,
 | 
						|
                layer2_dim=layer2_dim,
 | 
						|
                layer3_dim=layer3_dim,
 | 
						|
                hidden_dim=hidden_dim,
 | 
						|
                slot_names=slot_names,
 | 
						|
                l1=l1,
 | 
						|
                l2=l2):
 | 
						|
 | 
						|
    slotnum = len(slot_names)
 | 
						|
    for i in xrange(slotnum):
 | 
						|
        Inputs(slot_names[i] + network_name)
 | 
						|
    for i in xrange(slotnum):
 | 
						|
        Layer(
 | 
						|
            name = slot_names[i] + network_name,
 | 
						|
            type = "data",
 | 
						|
            size = word_dim,
 | 
						|
            device = -1,
 | 
						|
        )
 | 
						|
        Layer(
 | 
						|
            name = slot_names[i] + "_embedding_" + network_name,
 | 
						|
            type = "mixed",
 | 
						|
            size = wordvec_dim,
 | 
						|
            bias = False,
 | 
						|
            device = -1,
 | 
						|
            inputs = TableProjection(slot_names[i] + network_name,
 | 
						|
                                     parameter_name = "embedding.w0",
 | 
						|
                                     decay_rate_l1=l1,
 | 
						|
                                     sparse_remote_update = True,
 | 
						|
                                     sparse_update = sparse_update,
 | 
						|
                                     ),
 | 
						|
        )
 | 
						|
        Layer(
 | 
						|
            name = slot_names[i] + "_rnn1_" + network_name,
 | 
						|
            type = "recurrent",
 | 
						|
            active_type = "tanh",
 | 
						|
            bias = Bias(initial_std = 0,
 | 
						|
                        parameter_name = "rnn1.bias"),
 | 
						|
            inputs = Input(slot_names[i] + "_embedding_" + network_name,
 | 
						|
                           parameter_name = "rnn1.w0")
 | 
						|
        )
 | 
						|
        Layer(
 | 
						|
            name = slot_names[i] + "_rnnlast_" + network_name,
 | 
						|
            type = "seqlastins",
 | 
						|
            inputs = [
 | 
						|
                slot_names[i] + "_rnn1_" + network_name,
 | 
						|
            ],
 | 
						|
        )
 | 
						|
 | 
						|
    Layer(
 | 
						|
        name = "layer2_" + network_name,
 | 
						|
        type = "fc",
 | 
						|
        active_type = "tanh",
 | 
						|
        size = layer2_dim,
 | 
						|
        bias = Bias(parameter_name = "layer2.bias"),
 | 
						|
        inputs = [Input(slot_name + "_rnnlast_" + network_name, 
 | 
						|
                        parameter_name = "_layer2_" + slot_name + ".w", 
 | 
						|
                        decay_rate = l2, 
 | 
						|
                        initial_smart = True) for slot_name in slot_names]
 | 
						|
    )
 | 
						|
    Layer(
 | 
						|
        name = "layer3_" + network_name,
 | 
						|
        type = "fc",
 | 
						|
        active_type = "tanh",
 | 
						|
        size = layer3_dim,
 | 
						|
        bias = Bias(parameter_name = "layer3.bias"),
 | 
						|
        inputs = [
 | 
						|
            Input("layer2_" + network_name, 
 | 
						|
                  parameter_name = "_layer3.w", 
 | 
						|
                  decay_rate = l2, 
 | 
						|
                  initial_smart = True),
 | 
						|
        ]
 | 
						|
    )
 | 
						|
    Layer(
 | 
						|
        name = "output_" + network_name,
 | 
						|
        type = "fc",
 | 
						|
        size = 1,
 | 
						|
        bias = False,
 | 
						|
        inputs = [
 | 
						|
                  Input("layer3_" + network_name,
 | 
						|
                       parameter_name = "_layerO.w"),
 | 
						|
                 ],
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
ltr_network("left")
 | 
						|
ltr_network("right")
 | 
						|
Inputs("label")
 | 
						|
Layer(
 | 
						|
    name = "label",
 | 
						|
    type = "data",
 | 
						|
    size = 1,
 | 
						|
    )
 | 
						|
Outputs("cost", "qb_rnnlast_left")
 | 
						|
Layer(
 | 
						|
    name = "cost",
 | 
						|
    type = "rank-cost",
 | 
						|
    inputs = ["output_left", "output_right", "label"],
 | 
						|
    )
 |