You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							77 lines
						
					
					
						
							2.4 KiB
						
					
					
				
			
		
		
	
	
							77 lines
						
					
					
						
							2.4 KiB
						
					
					
				/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License. */
 | 
						|
syntax = "proto2";
 | 
						|
 | 
						|
package paddle;
 | 
						|
 | 
						|
/*
 | 
						|
 If values is not empty and ids is empty, this is a dense vector.
 | 
						|
 If values is not empty and ids is not empty, this is a sparse vector. The
 | 
						|
 position of each value
 | 
						|
 is specified by ids.
 | 
						|
 If values is empty and ids is not empty, this is a sparse vector whose non-zero
 | 
						|
 values are 1.
 | 
						|
 The position of each 1 is specified by ids.
 | 
						|
*/
 | 
						|
message VectorSlot {
 | 
						|
  repeated float values = 1 [ packed = true ];
 | 
						|
  repeated uint32 ids = 2 [ packed = true ];
 | 
						|
  /* For multidimensional data, for example "image width height depth" */
 | 
						|
  repeated uint32 dims = 3 [ packed = true ];
 | 
						|
  repeated string strs = 4;
 | 
						|
};
 | 
						|
 | 
						|
/*
 | 
						|
 SubseqSlot use to record whether VectorSlot or any other slot in future has
 | 
						|
 subseq.
 | 
						|
 If not all VectorSlot have subseq, we only store the one who has subseq, and
 | 
						|
 use *slot_id* to record it.
 | 
						|
 One vector_slots has one sequence, and it may have N subseq, thus the number of
 | 
						|
 *lens* will be N too.
 | 
						|
*/
 | 
						|
message SubseqSlot {
 | 
						|
  required uint32 slot_id = 1; // the id of slot who has subseq
 | 
						|
  repeated uint32 lens = 2;    // lengths of sub-sequence in the slot
 | 
						|
};
 | 
						|
 | 
						|
message SlotDef {
 | 
						|
  enum SlotType {
 | 
						|
    VECTOR_DENSE = 0;
 | 
						|
    VECTOR_SPARSE_NON_VALUE = 1;
 | 
						|
    VECTOR_SPARSE_VALUE = 2;
 | 
						|
    INDEX = 3; // This can be used as label, or word id, etc.
 | 
						|
    VAR_MDIM_DENSE = 4;
 | 
						|
    VAR_MDIM_INDEX = 5;
 | 
						|
    STRING = 6;
 | 
						|
  }
 | 
						|
  required SlotType type = 1;
 | 
						|
  required uint32 dim =
 | 
						|
      2; // For INDEX slots, this means the maximal index plus 1.
 | 
						|
};
 | 
						|
 | 
						|
message DataHeader {
 | 
						|
  // INDEX slot should be always after VECTOR slots.
 | 
						|
  repeated SlotDef slot_defs = 1;
 | 
						|
};
 | 
						|
 | 
						|
message DataSample {
 | 
						|
  optional bool is_beginning = 1
 | 
						|
      [ default = true ]; // is the beginning of a sequence
 | 
						|
  repeated VectorSlot vector_slots = 2;
 | 
						|
  repeated uint32 id_slots = 3 [ packed = true ];
 | 
						|
  /* use ids of VectorSlot */
 | 
						|
  repeated VectorSlot var_id_slots = 4;
 | 
						|
  repeated SubseqSlot subseq_slots = 5;
 | 
						|
};
 |