You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/fluid/recordio/range_scanner.h

70 lines
2.0 KiB

// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <fstream>
#include <memory>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
class Index {
public:
int NumRecords() { return num_records_; }
// Locate returns the index of chunk that contains the given record,
// and the record index within the chunk. It returns (-1, -1) if the
// record is out of range.
void Locate(int record_idx, std::pair<int, int>* out) {
size_t sum = 0;
for (size_t i = 0; i < chunk_lens_.size(); ++i) {
sum += chunk_lens_[i];
if (static_cast<size_t>(record_idx) < sum) {
out->first = i;
out->second = record_idx - sum + chunk_lens_[i];
return;
}
}
// out->swap(std::make_pair<int,int>(-1, -1));
out->first = -1;
out->second = -1;
}
private:
std::vector<int64_t> chunk_offsets_;
std::vector<uint32_t> chunk_lens_;
int num_records_;
std::vector<int> chunk_records_;
};
// RangeScanner
// creates a scanner that sequencially reads records in the
// range [start, start+len). If start < 0, it scans from the
// beginning. If len < 0, it scans till the end of file.
class RangeScanner {
public:
RangeScanner(std::istream is, Index idx, int start, int end);
bool Scan();
const std::string Record();
private:
std::istream stream_;
Index index_;
int start_, end_, cur_;
int chunk_index_;
std::unique_ptr<Chunk> chunk_;
};