You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							87 lines
						
					
					
						
							3.4 KiB
						
					
					
				
			
		
		
	
	
							87 lines
						
					
					
						
							3.4 KiB
						
					
					
				/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License. */
 | 
						|
syntax = "proto2";
 | 
						|
 | 
						|
package paddle;
 | 
						|
 | 
						|
message FileGroupConf {
 | 
						|
  optional uint32 queue_capacity = 1 [ default = 1 ];
 | 
						|
  // how many files to load for a load file thread
 | 
						|
  optional int32 load_file_count = 2 [ default = 1 ];
 | 
						|
  // how many threads to load files
 | 
						|
  // Setting to be 5~10 is appropriate when loading files by hadoop vfs
 | 
						|
  optional int32 load_thread_num = 3 [ default = 1 ];
 | 
						|
};
 | 
						|
 | 
						|
message DataConfig {
 | 
						|
 | 
						|
  required string type = 1;
 | 
						|
 | 
						|
  // name of a text file which contains a list of file names at each line
 | 
						|
  optional string files = 3;
 | 
						|
 | 
						|
  optional int32 feat_dim = 4;         // feature dimension of one frame
 | 
						|
  repeated int32 slot_dims = 5;        // feature slot dims
 | 
						|
  optional int32 context_len = 6;      // max neibour frame numbers
 | 
						|
  optional uint64 buffer_capacity = 7; // the number of samples
 | 
						|
 | 
						|
  // part of data used in training
 | 
						|
  // if not -1, part of train data is used in training
 | 
						|
  optional int64 train_sample_num = 8 [ default = -1 ];
 | 
						|
 | 
						|
  // The number of documents processed once
 | 
						|
  optional int32 file_load_num = 9 [ default = -1 ];
 | 
						|
  optional bool async_load_data = 12 [ default = false ];
 | 
						|
  /// Note the field number 10, 11 and 13 have been deprecated.
 | 
						|
  optional bool for_test = 14
 | 
						|
      [ default = false ]; // whether this data is for test
 | 
						|
  optional FileGroupConf file_group_conf = 15;
 | 
						|
  repeated int32 float_slot_dims = 16;
 | 
						|
 | 
						|
  /// Note the field number 17, 18 and 19 have been deprecated.
 | 
						|
 | 
						|
  // a list of values which will be used to create additional one dimensional
 | 
						|
  // float
 | 
						|
  // values slots. These one dimensional slots can be used as the weight input
 | 
						|
  // for cost layers.
 | 
						|
  // Currently this is only supported by ProtoDataProvider.
 | 
						|
  repeated double constant_slots = 20;
 | 
						|
 | 
						|
  // for PyDataProvider.
 | 
						|
  // Specify the load data script module name, object name and user args
 | 
						|
  optional string load_data_module = 21;
 | 
						|
  optional string load_data_object = 22;
 | 
						|
  optional string load_data_args = 23;
 | 
						|
 | 
						|
  // for MultiDataProvider
 | 
						|
  repeated DataConfig sub_data_configs = 24; // sub dataproviders
 | 
						|
                                             /*
 | 
						|
                                              * the ratio of each sub dataproviders:
 | 
						|
                                              * e.g. sub dataprovider A's ratio is 1, B's ratio is 9, batch_size is 100,
 | 
						|
                                              * then each mini-batch is combined by 10 instance from A and 90 instances
 | 
						|
                                              * from B.
 | 
						|
                                              */
 | 
						|
  optional int32 data_ratio = 25;
 | 
						|
  /*
 | 
						|
   * if one of the sub dataproviders is running out of data, then
 | 
						|
   * (1) it is "main data", then finish current pass.
 | 
						|
   * (2) it is not "main data", then reset it, and try getNextBatch again.
 | 
						|
   */
 | 
						|
  optional bool is_main_data = 26 [ default = true ];
 | 
						|
 | 
						|
  // the usage ratio of instances. Setting to 1.0 means the use of all
 | 
						|
  // instances.
 | 
						|
  optional double usage_ratio = 27 [ default = 1.0 ];
 | 
						|
};
 |