You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
77 lines
2.4 KiB
77 lines
2.4 KiB
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
syntax = "proto2";
|
|
|
|
package paddle;
|
|
|
|
/*
|
|
If values is not empty and ids is empty, this is a dense vector.
|
|
If values is not empty and ids is not empty, this is a sparse vector. The
|
|
position of each value
|
|
is specified by ids.
|
|
If values is empty and ids is not empty, this is a sparse vector whose non-zero
|
|
values are 1.
|
|
The position of each 1 is specified by ids.
|
|
*/
|
|
message VectorSlot {
|
|
repeated float values = 1 [ packed = true ];
|
|
repeated uint32 ids = 2 [ packed = true ];
|
|
/* For multidimensional data, for example "image width height depth" */
|
|
repeated uint32 dims = 3 [ packed = true ];
|
|
repeated string strs = 4;
|
|
};
|
|
|
|
/*
|
|
SubseqSlot use to record whether VectorSlot or any other slot in future has
|
|
subseq.
|
|
If not all VectorSlot have subseq, we only store the one who has subseq, and
|
|
use *slot_id* to record it.
|
|
One vector_slots has one sequence, and it may have N subseq, thus the number of
|
|
*lens* will be N too.
|
|
*/
|
|
message SubseqSlot {
|
|
required uint32 slot_id = 1; // the id of slot who has subseq
|
|
repeated uint32 lens = 2; // lengths of sub-sequence in the slot
|
|
};
|
|
|
|
message SlotDef {
|
|
enum SlotType {
|
|
VECTOR_DENSE = 0;
|
|
VECTOR_SPARSE_NON_VALUE = 1;
|
|
VECTOR_SPARSE_VALUE = 2;
|
|
INDEX = 3; // This can be used as label, or word id, etc.
|
|
VAR_MDIM_DENSE = 4;
|
|
VAR_MDIM_INDEX = 5;
|
|
STRING = 6;
|
|
}
|
|
required SlotType type = 1;
|
|
required uint32 dim =
|
|
2; // For INDEX slots, this means the maximal index plus 1.
|
|
};
|
|
|
|
message DataHeader {
|
|
// INDEX slot should be always after VECTOR slots.
|
|
repeated SlotDef slot_defs = 1;
|
|
};
|
|
|
|
message DataSample {
|
|
optional bool is_beginning = 1
|
|
[ default = true ]; // is the beginning of a sequence
|
|
repeated VectorSlot vector_slots = 2;
|
|
repeated uint32 id_slots = 3 [ packed = true ];
|
|
/* use ids of VectorSlot */
|
|
repeated VectorSlot var_id_slots = 4;
|
|
repeated SubseqSlot subseq_slots = 5;
|
|
};
|