// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include class Index { public: int NumRecords() { return num_records_; } // Locate returns the index of chunk that contains the given record, // and the record index within the chunk. It returns (-1, -1) if the // record is out of range. void Locate(int record_idx, std::pair* out) { size_t sum = 0; for (size_t i = 0; i < chunk_lens_.size(); ++i) { sum += chunk_lens_[i]; if (static_cast(record_idx) < sum) { out->first = i; out->second = record_idx - sum + chunk_lens_[i]; return; } } // out->swap(std::make_pair(-1, -1)); out->first = -1; out->second = -1; } private: std::vector chunk_offsets_; std::vector chunk_lens_; int num_records_; std::vector chunk_records_; }; // RangeScanner // creates a scanner that sequencially reads records in the // range [start, start+len). If start < 0, it scans from the // beginning. If len < 0, it scans till the end of file. class RangeScanner { public: RangeScanner(std::istream is, Index idx, int start, int end); bool Scan(); const std::string Record(); private: std::istream stream_; Index index_; int start_, end_, cur_; int chunk_index_; std::unique_ptr chunk_; };