You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/gserver/tests/TestUtil.cpp

221 lines
7.1 KiB

/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "TestUtil.h"
#include "paddle/utils/CommandLineParser.h"
#include "paddle/math/SparseMatrix.h"
P_DEFINE_int32(fixed_seq_length, 0, "Produce some sequence of fixed length");
namespace paddle {
std::string randStr(const int len) {
std::string str =
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
std::string s = "";
for (int i = 0; i < len; ++i) s += str[(rand() % 62)]; // NOLINT
return s;
}
MatrixPtr makeRandomSparseMatrix(size_t height, size_t width, bool withValue,
bool useGpu, bool equalNnzPerSample) {
std::vector<int64_t> ids(height);
std::vector<int64_t> indices(height + 1);
indices[0] = 0;
std::function<size_t()> randomer = [] { return uniformRandom(10); };
if (equalNnzPerSample) {
size_t n = 0;
do {
n = uniformRandom(10);
} while (!n);
randomer = [=] { return n; };
}
for (size_t i = 0; i < height; ++i) {
indices[i + 1] = indices[i] + std::min(randomer(), width);
ids[i] = i;
}
if (!withValue) {
std::vector<sparse_non_value_t> data;
data.resize(indices[height] - indices[0]);
for (size_t i = 0; i < data.size(); ++i) {
data[i].col = uniformRandom(width);
}
auto mat = Matrix::createSparseMatrix(height, width, data.size(), NO_VALUE,
SPARSE_CSR, false, useGpu);
if (useGpu) {
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
ids.data(), indices.data(), data.data(), HPPL_STREAM_DEFAULT);
} else {
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
->copyFrom(ids.data(), indices.data(), data.data());
}
return mat;
} else {
std::vector<sparse_float_value_t> data;
data.resize(indices[height] - indices[0]);
for (size_t i = 0; i < data.size(); ++i) {
data[i].col = uniformRandom(width);
data[i].value = rand() / static_cast<float>(RAND_MAX); // NOLINT
}
auto mat = Matrix::createSparseMatrix(
height, width, data.size(), FLOAT_VALUE, SPARSE_CSR, false, useGpu);
if (useGpu) {
std::dynamic_pointer_cast<GpuSparseMatrix>(mat)->copyFrom(
ids.data(), indices.data(), data.data(), HPPL_STREAM_DEFAULT);
} else {
std::dynamic_pointer_cast<CpuSparseMatrix>(mat)
->copyFrom(ids.data(), indices.data(), data.data());
}
return mat;
}
}
void generateSequenceStartPositions(size_t batchSize,
IVectorPtr& sequenceStartPositions) {
ICpuGpuVectorPtr gpuCpuVec;
generateSequenceStartPositions(batchSize, gpuCpuVec);
sequenceStartPositions = gpuCpuVec->getMutableVector(false);
}
void generateSequenceStartPositions(size_t batchSize,
ICpuGpuVectorPtr& sequenceStartPositions) {
int numSeqs;
if (FLAGS_fixed_seq_length != 0) {
numSeqs = std::ceil((float)batchSize / (float)FLAGS_fixed_seq_length);
} else {
numSeqs = batchSize / 10 + 1;
}
sequenceStartPositions =
ICpuGpuVector::create(numSeqs + 1, /* useGpu= */false);
int* buf = sequenceStartPositions->getMutableData(false);
int64_t pos = 0;
int len = FLAGS_fixed_seq_length;
int maxLen = 2 * batchSize / numSeqs;
for (int i = 0; i < numSeqs; ++i) {
if (FLAGS_fixed_seq_length == 0) {
len = uniformRandom(
std::min<int64_t>(maxLen, batchSize - pos - numSeqs + i)) + 1;
}
buf[i] = pos;
pos += len;
VLOG(1) << " len=" << len;
}
buf[numSeqs] = batchSize;
}
void generateSubSequenceStartPositions(
const ICpuGpuVectorPtr& sequenceStartPositions,
ICpuGpuVectorPtr& subSequenceStartPositions) {
int numSeqs = sequenceStartPositions->getSize() - 1;
const int* buf = sequenceStartPositions->getData(false);
int numOnes = 0;
for (int i = 0; i < numSeqs; ++i) {
if (buf[i + 1] - buf[i] == 1) {
++numOnes;
}
}
// each seq has two sub-seq except length 1
int numSubSeqs = numSeqs * 2 - numOnes;
subSequenceStartPositions =
ICpuGpuVector::create(numSubSeqs + 1, /* useGpu= */ false);
int* subBuf = subSequenceStartPositions->getMutableData(false);
int j = 0;
for (int i = 0; i < numSeqs; ++i) {
if (buf[i + 1] - buf[i] == 1) {
subBuf[j++] = buf[i];
} else {
int len = uniformRandom(buf[i + 1] - buf[i] - 1) + 1;
subBuf[j++] = buf[i];
subBuf[j++] = buf[i] + len;
}
}
subBuf[j] = buf[numSeqs];
}
void generateMDimSequenceData(const IVectorPtr& sequenceStartPositions,
IVectorPtr& cpuSequenceDims) {
/* generate sequences with 2 dims */
int numSeqs = sequenceStartPositions->getSize() - 1;
int numDims = 2;
cpuSequenceDims = IVector::create(numSeqs * numDims, /* useGpu= */ false);
int* bufStarts = sequenceStartPositions->getData();
int* bufDims = cpuSequenceDims->getData();
for (int i = 0; i < numSeqs; i++) {
int len = bufStarts[i + 1] - bufStarts[i];
/* get width and height randomly */
std::vector<int> dimVec;
for (int j = 0; j < len; j++) {
if (len % (j + 1) == 0) {
dimVec.push_back(1);
}
}
int idx = rand() % dimVec.size(); // NOLINT use rand_r
bufDims[i * numDims] = dimVec[idx];
bufDims[i * numDims + 1] = len / dimVec[idx];
}
}
void generateMDimSequenceData(
const ICpuGpuVectorPtr& sequenceStartPositions,
IVectorPtr& cpuSequenceDims) {
/* generate sequences with 2 dims */
int numSeqs = sequenceStartPositions->getSize() - 1;
int numDims = 2;
cpuSequenceDims = IVector::create(numSeqs * numDims, /* useGpu= */ false);
const int* bufStarts = sequenceStartPositions->getData(false);
int* bufDims = cpuSequenceDims->getData();
for (int i = 0; i < numSeqs; i++) {
int len = bufStarts[i + 1] - bufStarts[i];
/* get width and height randomly */
std::vector<int> dimVec;
for (int j = 0; j < len; j++) {
if (len % (j + 1) == 0) {
dimVec.push_back(1);
}
}
int idx = rand() % dimVec.size(); // NOLINT use rand_r
bufDims[i * numDims] = dimVec[idx];
bufDims[i * numDims + 1] = len / dimVec[idx];
}
}
void checkMatrixEqual(const MatrixPtr& a, const MatrixPtr& b) {
EXPECT_EQ(a->getWidth(), b->getWidth());
EXPECT_EQ(a->getHeight(), b->getHeight());
EXPECT_EQ(a->isTransposed(), b->isTransposed());
for (size_t r = 0; r < a->getHeight(); ++r) {
for (size_t c = 0; c < a->getWidth(); ++c) {
EXPECT_FLOAT_EQ(a->getElement(r, c), b->getElement(r, c));
}
}
}
void checkVectorEqual(const IVectorPtr& a, const IVectorPtr& b) {
EXPECT_EQ(a->getSize(), b->getSize());
for (size_t r = 0; r < a->getSize(); ++r) {
EXPECT_FLOAT_EQ(a->get(r), b->get(r));
}
}
} // namespace paddle