commit
91e8aadaef
@ -0,0 +1,117 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "Layer.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
class KmaxSeqScoreLayer : public Layer {
|
||||
private:
|
||||
MatrixPtr scores_;
|
||||
size_t beamSize_;
|
||||
void kmaxScorePerSeq(const real* score,
|
||||
real* sortedRes,
|
||||
const ICpuGpuVectorPtr seqStartPos);
|
||||
|
||||
public:
|
||||
explicit KmaxSeqScoreLayer(const LayerConfig& config) : Layer(config) {}
|
||||
|
||||
bool init(const LayerMap& layerMap,
|
||||
const ParameterMap& parameterMap) override;
|
||||
|
||||
void forward(PassType passType) override;
|
||||
void backward(const UpdateCallback& callback = nullptr) override;
|
||||
};
|
||||
|
||||
REGISTER_LAYER(kmax_seq_score, KmaxSeqScoreLayer);
|
||||
|
||||
bool KmaxSeqScoreLayer::init(const LayerMap& layerMap,
|
||||
const ParameterMap& parameterMap) {
|
||||
bool ret = Layer::init(layerMap, parameterMap);
|
||||
CHECK_EQ(1U, inputLayers_.size());
|
||||
|
||||
beamSize_ = config_.beam_size();
|
||||
CHECK_GE(beamSize_, 1U);
|
||||
|
||||
setNeedSequenceInfo(false);
|
||||
setNeedGradient(false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void KmaxSeqScoreLayer::kmaxScorePerSeq(const real* scores,
|
||||
real* sortedIds,
|
||||
const ICpuGpuVectorPtr seqStartPos) {
|
||||
int* starts = seqStartPos->getMutableData(false);
|
||||
std::vector<real> indices;
|
||||
for (size_t i = 0; i < seqStartPos->getSize() - 1; ++i) {
|
||||
int seqLen = starts[i + 1] - starts[i];
|
||||
int k = std::min(static_cast<int>(beamSize_), seqLen);
|
||||
|
||||
indices.resize(seqLen, 0);
|
||||
std::iota(begin(indices), end(indices), 0.);
|
||||
std::vector<real> tmpScore(scores + starts[i], scores + starts[i + 1]);
|
||||
std::partial_sort(
|
||||
begin(indices),
|
||||
begin(indices) + k,
|
||||
end(indices),
|
||||
[&](size_t a, size_t b) { return tmpScore[a] > tmpScore[b]; });
|
||||
memcpy(sortedIds + (i * beamSize_), indices.data(), k * sizeof(real));
|
||||
}
|
||||
}
|
||||
|
||||
void KmaxSeqScoreLayer::forward(PassType passType) {
|
||||
Layer::forward(passType);
|
||||
|
||||
const Argument& input = getInput(0);
|
||||
const MatrixPtr inputScore = getInputValue(0);
|
||||
|
||||
CHECK(input.hasSeq() || input.hasSubseq())
|
||||
<< "input of " << getName()
|
||||
<< " must be a sequence or a nested sequence.";
|
||||
CHECK_EQ(input.value->getWidth(), 1UL)
|
||||
<< "input of " << getName()
|
||||
<< " is score over a sequence or a nested sequence, so its width "
|
||||
<< " must be 1.";
|
||||
|
||||
if (useGpu_) {
|
||||
// this Layer runs only in CPU, if the model is runing on GPU,
|
||||
// then copy the input to this layer from GPU to CPU.
|
||||
Matrix::resizeOrCreate(scores_,
|
||||
inputScore->getHeight(),
|
||||
1,
|
||||
false /* trans */,
|
||||
false /* useGpu */);
|
||||
scores_->copyFrom(*inputScore);
|
||||
} else {
|
||||
scores_ = inputScore;
|
||||
}
|
||||
|
||||
Matrix::resizeOrCreate(
|
||||
output_.value,
|
||||
input.hasSubseq() ? input.getNumSubSequences() : input.getNumSequences(),
|
||||
beamSize_,
|
||||
false,
|
||||
false);
|
||||
output_.value->one();
|
||||
output_.value->mulScalar(-1.);
|
||||
|
||||
kmaxScorePerSeq(scores_->getData(),
|
||||
output_.value->getData(),
|
||||
input.hasSubseq() ? input.subSequenceStartPositions
|
||||
: input.sequenceStartPositions);
|
||||
}
|
||||
|
||||
void KmaxSeqScoreLayer::backward(const UpdateCallback& callback) {}
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,176 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "Layer.h"
|
||||
#include "paddle/math/Matrix.h"
|
||||
#include "paddle/math/Vector.h"
|
||||
#include "paddle/utils/Logging.h"
|
||||
#include "paddle/utils/Stat.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
class SubNestedSequenceLayer : public Layer {
|
||||
public:
|
||||
explicit SubNestedSequenceLayer(const LayerConfig& config) : Layer(config) {}
|
||||
|
||||
bool init(const LayerMap& layerMap,
|
||||
const ParameterMap& parameterMap) override;
|
||||
|
||||
void forward(PassType passType) override;
|
||||
void backward(const UpdateCallback& callback = nullptr) override;
|
||||
|
||||
private:
|
||||
/*
|
||||
* This functions generates the indices of rows in a batch according to the
|
||||
* indices of selected sub-sequence in each sequence.
|
||||
*
|
||||
* Examples:
|
||||
* selectedIndices:
|
||||
* [
|
||||
* [0, 1, -1],
|
||||
* [0, 1, 2],
|
||||
* [0, -1, -1],
|
||||
* [0, 2, 3],
|
||||
* ]
|
||||
* inputSeqInfo:
|
||||
* [
|
||||
* [0,3,4],
|
||||
* [4,5,7,10,15],
|
||||
* [15,20],
|
||||
* [20,22,23,25,28]
|
||||
* ]
|
||||
*
|
||||
* ths output is saved to private member rowIndice_;
|
||||
* [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
|
||||
* 16,17,18,19,20,21,22,23,24,25,26,27]
|
||||
*/
|
||||
|
||||
void calSelectedCols(const MatrixPtr selectedIndices,
|
||||
const std::vector<std::vector<int>>& inputSeqInfo);
|
||||
|
||||
// if the second input of this layer is on GPU memory, copy it to CPU memory.
|
||||
MatrixPtr selIdsCpu_;
|
||||
|
||||
// reorganized sequenceStartPositions and subSequenceStartPositions
|
||||
// into a 2d vector to facilitate the sequence selection process.
|
||||
std::vector<std::vector<int>> inputSeqInfoVec_;
|
||||
|
||||
// the final selected row indices in a batch,
|
||||
// rowIdx_ and selectedRows_ actually share a same memory.
|
||||
IVectorPtr rowIndice_;
|
||||
std::vector<int> selectedRows_;
|
||||
};
|
||||
|
||||
REGISTER_LAYER(sub_nested_seq, SubNestedSequenceLayer);
|
||||
|
||||
bool SubNestedSequenceLayer::init(const LayerMap& layerMap,
|
||||
const ParameterMap& parameterMap) {
|
||||
/* Initialize the basic parent class */
|
||||
Layer::init(layerMap, parameterMap);
|
||||
CHECK_EQ(2U, inputLayers_.size());
|
||||
setNeedSequenceInfo(false);
|
||||
return true;
|
||||
}
|
||||
|
||||
void SubNestedSequenceLayer::calSelectedCols(
|
||||
const MatrixPtr selectedIndices,
|
||||
const std::vector<std::vector<int>>& inputSeqInfo) {
|
||||
selectedRows_.clear();
|
||||
|
||||
std::vector<int> outSeqStartInfo(1, 0);
|
||||
std::vector<int> outSubSeqStartInfo(1, 0);
|
||||
|
||||
size_t seqNum = selectedIndices->getHeight();
|
||||
size_t beamSize = selectedIndices->getWidth();
|
||||
for (size_t i = 0; i < seqNum; ++i) {
|
||||
for (size_t j = 0; j < beamSize; ++j) {
|
||||
if (selectedIndices->getElement(i, j) == -1.) break;
|
||||
int selSubSeqIdx = selectedIndices->getElement(i, j);
|
||||
CHECK_GT(inputSeqInfoVec_[i].size() - 1, selSubSeqIdx);
|
||||
|
||||
size_t subSeqLen = inputSeqInfoVec_[i][selSubSeqIdx + 1] -
|
||||
inputSeqInfoVec_[i][selSubSeqIdx];
|
||||
for (size_t k = 0; k < subSeqLen; ++k)
|
||||
selectedRows_.push_back(inputSeqInfoVec_[i][selSubSeqIdx] + k);
|
||||
outSubSeqStartInfo.push_back(outSubSeqStartInfo.back() + subSeqLen);
|
||||
}
|
||||
outSeqStartInfo.push_back(outSubSeqStartInfo.back());
|
||||
}
|
||||
|
||||
if (useGpu_) {
|
||||
rowIndice_ = IVector::create(selectedRows_.size(), useGpu_);
|
||||
rowIndice_->copyFrom(selectedRows_.data(), selectedRows_.size());
|
||||
} else {
|
||||
rowIndice_ =
|
||||
IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_);
|
||||
}
|
||||
|
||||
// create the sequence information for the output.
|
||||
ICpuGpuVector::resizeOrCreate(
|
||||
output_.sequenceStartPositions, outSeqStartInfo.size(), false);
|
||||
output_.sequenceStartPositions->copyFrom(
|
||||
outSeqStartInfo.data(), outSeqStartInfo.size(), false);
|
||||
|
||||
ICpuGpuVector::resizeOrCreate(
|
||||
output_.subSequenceStartPositions, outSubSeqStartInfo.size(), false);
|
||||
output_.subSequenceStartPositions->copyFrom(
|
||||
outSubSeqStartInfo.data(), outSubSeqStartInfo.size(), false);
|
||||
}
|
||||
|
||||
void SubNestedSequenceLayer::forward(PassType passType) {
|
||||
Layer::forward(passType);
|
||||
|
||||
const Argument& inputSeq = getInput(0);
|
||||
CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer "
|
||||
<< "must be a nested sequence.";
|
||||
const MatrixPtr selectedIndices = getInputValue(1);
|
||||
CHECK_EQ(inputSeq.getNumSequences(), selectedIndices->getHeight());
|
||||
|
||||
if (dynamic_cast<GpuMatrix*>(selectedIndices.get())) {
|
||||
/*
|
||||
* Currently, the second input for this layer is generated by
|
||||
* kmax_sequence_score_layer whose output is always stored on CPU,
|
||||
* or a data_layer which canbe on GPU.
|
||||
*
|
||||
* If the second input is on GPU, copy it to CPU memory, because this
|
||||
* input always uses very few memory, and operations related to it are
|
||||
* all logic control, not computations.
|
||||
*/
|
||||
Matrix::resizeOrCreate(selIdsCpu_,
|
||||
selectedIndices->getHeight(),
|
||||
selectedIndices->getWidth(),
|
||||
false /* trans */,
|
||||
false /* useGpu */);
|
||||
selIdsCpu_->copyFrom(*selectedIndices);
|
||||
} else {
|
||||
selIdsCpu_ = selectedIndices;
|
||||
}
|
||||
|
||||
Argument::reorganizeSeqInfo(inputSeq.sequenceStartPositions,
|
||||
inputSeq.subSequenceStartPositions,
|
||||
inputSeqInfoVec_);
|
||||
calSelectedCols(selIdsCpu_, inputSeqInfoVec_);
|
||||
|
||||
resetOutput(selectedRows_.size(), getSize());
|
||||
getOutputValue()->selectRows(*getInputValue(0), *rowIndice_);
|
||||
}
|
||||
|
||||
void SubNestedSequenceLayer::backward(const UpdateCallback& callback) {
|
||||
MatrixPtr inputSeqGrad = getInputGrad(0);
|
||||
MatrixPtr outputGrad = getOutputGrad();
|
||||
|
||||
if (inputSeqGrad) outputGrad->addToRows(*inputSeqGrad, *rowIndice_);
|
||||
}
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,160 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "ModelConfig.pb.h"
|
||||
#include "paddle/gserver/layers/DataLayer.h"
|
||||
#include "paddle/trainer/Trainer.h"
|
||||
#include "paddle/utils/GlobalConstants.h"
|
||||
|
||||
#include "LayerGradUtil.h"
|
||||
#include "paddle/testing/TestUtil.h"
|
||||
|
||||
using namespace paddle; // NOLINT
|
||||
using namespace std; // NOLINT
|
||||
|
||||
DECLARE_bool(use_gpu);
|
||||
DECLARE_int32(gpu_id);
|
||||
DECLARE_bool(thread_local_rand_use_global_seed);
|
||||
|
||||
vector<int> randSampling(int range, int n) {
|
||||
CHECK_GE(range, n);
|
||||
vector<int> num(range);
|
||||
iota(begin(num), end(num), 0);
|
||||
if (range == n) return num;
|
||||
|
||||
random_shuffle(begin(num), end(num));
|
||||
num.resize(n);
|
||||
return num;
|
||||
}
|
||||
|
||||
void genRandomSeqInfo(vector<int>& seqStartPosition,
|
||||
vector<int>& subSeqStartPosition) {
|
||||
const int maxSeqNum = 100;
|
||||
// generate random start position information
|
||||
int seqNum = 1 + (rand() % maxSeqNum);
|
||||
seqStartPosition.resize(seqNum + 1, 0);
|
||||
subSeqStartPosition.resize(1, 0);
|
||||
|
||||
for (int i = 0; i < seqNum; ++i) {
|
||||
int subSeqLen = 1 + (rand() % maxSeqNum);
|
||||
for (int j = 0; j < subSeqLen; ++j)
|
||||
subSeqStartPosition.push_back(subSeqStartPosition.back() + subSeqLen);
|
||||
seqStartPosition[i + 1] = subSeqStartPosition.back();
|
||||
}
|
||||
}
|
||||
|
||||
void genRandomGroundTruth(real* values,
|
||||
vector<vector<int>>& groundTruth,
|
||||
vector<int>& startPos,
|
||||
size_t beamSize) {
|
||||
groundTruth.resize(startPos.size() - 1, vector<int>(beamSize, -1));
|
||||
for (size_t i = 0; i < startPos.size() - 1; ++i) {
|
||||
int seqLen = startPos[i + 1] - startPos[i];
|
||||
vector<int> pos =
|
||||
randSampling(seqLen, min(static_cast<int>(beamSize), seqLen));
|
||||
for (size_t j = 0; j < pos.size(); ++j) {
|
||||
groundTruth[i][j] = pos[j];
|
||||
values[startPos[i] + pos[j]] = 1.;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void checkLayerOut(vector<vector<int>> groundTruth,
|
||||
real* layerOut,
|
||||
size_t beamSize) {
|
||||
for (size_t i = 0; i < groundTruth.size(); ++i) {
|
||||
int begPos = i * beamSize;
|
||||
vector<real> tmp(layerOut + begPos, layerOut + begPos + beamSize);
|
||||
sort(begin(tmp), end(tmp));
|
||||
sort(begin(groundTruth[i]), end(groundTruth[i]));
|
||||
for (size_t j = 0; j < beamSize; ++j) CHECK_EQ(tmp[j], groundTruth[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Layer, kmaxSeqScoreLayer) {
|
||||
const size_t maxBeamSize = 100;
|
||||
int beamSize = 1 + (rand() % maxBeamSize);
|
||||
|
||||
vector<int> seqStartPosition;
|
||||
vector<int> subSeqStartPosition;
|
||||
genRandomSeqInfo(seqStartPosition, subSeqStartPosition);
|
||||
MatrixPtr inValue =
|
||||
Matrix::create(subSeqStartPosition.back(), 1, false, false);
|
||||
|
||||
for (auto hasSubseq : {false, true}) {
|
||||
vector<vector<int>> groundTruth;
|
||||
inValue->randomizeUniform();
|
||||
genRandomGroundTruth(inValue->getData(),
|
||||
groundTruth,
|
||||
hasSubseq ? subSeqStartPosition : seqStartPosition,
|
||||
beamSize);
|
||||
|
||||
for (auto useGpu : {false, true}) {
|
||||
TestConfig config;
|
||||
config.layerConfig.set_type("kmax_seq_score");
|
||||
config.layerConfig.set_beam_size(beamSize);
|
||||
|
||||
if (hasSubseq) {
|
||||
config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA,
|
||||
"scores",
|
||||
inValue,
|
||||
seqStartPosition,
|
||||
subSeqStartPosition});
|
||||
} else {
|
||||
config.inputDefs.push_back(
|
||||
{INPUT_SELF_DEFINE_DATA, "scores", inValue, seqStartPosition});
|
||||
}
|
||||
config.layerConfig.add_inputs();
|
||||
|
||||
// data layer initialize
|
||||
std::vector<DataLayerPtr> dataLayers;
|
||||
LayerMap layerMap;
|
||||
vector<Argument> datas;
|
||||
initDataLayer(
|
||||
config,
|
||||
&dataLayers,
|
||||
&datas,
|
||||
&layerMap,
|
||||
"kmax_seq_score",
|
||||
100 /* actually this parameter is unused in self-defined input*/,
|
||||
false,
|
||||
useGpu);
|
||||
// test layer initialize
|
||||
std::vector<ParameterPtr> parameters;
|
||||
LayerPtr kmaxSeqScoreLayer;
|
||||
FLAGS_use_gpu = useGpu;
|
||||
initTestLayer(config, &layerMap, ¶meters, &kmaxSeqScoreLayer);
|
||||
kmaxSeqScoreLayer->forward(PASS_TRAIN);
|
||||
|
||||
const MatrixPtr outValue = kmaxSeqScoreLayer->getOutputValue();
|
||||
CHECK_EQ(outValue->getHeight(),
|
||||
hasSubseq ? subSeqStartPosition.size() - 1
|
||||
: seqStartPosition.size() - 1);
|
||||
CHECK_EQ(outValue->getWidth(), beamSize);
|
||||
checkLayerOut(groundTruth, outValue->getData(), beamSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
initMain(argc, argv);
|
||||
FLAGS_thread_local_rand_use_global_seed = true;
|
||||
srand((size_t)(time(NULL)));
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
@ -0,0 +1,84 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include <random>
|
||||
#include <type_traits>
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/framework/operator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
// It seems that Eigen::Tensor::random in GPU will SEGFAULT.
|
||||
// Use std::random and thrust::random(thrust is a std library in CUDA) to
|
||||
// implement uniform random.
|
||||
template <typename T>
|
||||
class CPUUniformRandomKernel : public framework::OpKernel {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
auto* tensor = context.Output<framework::Tensor>(0);
|
||||
T* data = tensor->mutable_data<T>(context.GetPlace());
|
||||
unsigned int seed =
|
||||
static_cast<unsigned int>(context.op_.GetAttr<int>("seed"));
|
||||
std::minstd_rand engine;
|
||||
if (seed == 0) {
|
||||
seed = std::random_device()();
|
||||
}
|
||||
engine.seed(seed);
|
||||
std::uniform_real_distribution<T> dist(
|
||||
static_cast<T>(context.op_.GetAttr<float>("min")),
|
||||
static_cast<T>(context.op_.GetAttr<float>("max")));
|
||||
for (ssize_t i = 0; i < framework::product(tensor->dims()); ++i) {
|
||||
data[i] = dist(engine);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class UniformRandomOp : public framework::OperatorWithKernel {
|
||||
protected:
|
||||
void InferShape(const framework::InferShapeContext& ctx) const override {
|
||||
PADDLE_ENFORCE(GetAttr<float>("min") < GetAttr<float>("max"),
|
||||
"uniform_random's min must less then max");
|
||||
auto* tensor = ctx.Output<framework::Tensor>(0);
|
||||
auto dims = GetAttr<std::vector<int>>("dims");
|
||||
tensor->Resize(framework::make_ddim(dims));
|
||||
}
|
||||
};
|
||||
|
||||
class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
UniformRandomOpMaker(framework::OpProto* proto,
|
||||
framework::OpAttrChecker* op_checker)
|
||||
: framework::OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddOutput("Out", "The output tensor of uniform random op");
|
||||
AddComment(R"DOC(Uniform random operator.
|
||||
|
||||
Used to initialize tensor with uniform random generator.
|
||||
)DOC");
|
||||
AddAttr<std::vector<int>>("dims", "the dimension of random tensor");
|
||||
AddAttr<float>("min", "Minimum value of uniform random").SetDefault(-1.0f);
|
||||
AddAttr<float>("max", "Maximun value of uniform random").SetDefault(1.0f);
|
||||
AddAttr<int>("seed",
|
||||
"Random seed of uniform random. "
|
||||
"0 means generate a seed by system")
|
||||
.SetDefault(0);
|
||||
}
|
||||
};
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_OP(uniform_random, paddle::operators::UniformRandomOp,
|
||||
paddle::operators::UniformRandomOpMaker);
|
||||
REGISTER_OP_CPU_KERNEL(uniform_random,
|
||||
paddle::operators::CPUUniformRandomKernel<float>);
|
@ -0,0 +1,70 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
#include <thrust/random.h>
|
||||
#include <thrust/transform.h>
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/framework/operator.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
template <typename T>
|
||||
struct UniformGenerator {
|
||||
T min_, max_;
|
||||
unsigned int seed_;
|
||||
|
||||
__host__ __device__ UniformGenerator(T min, T max, int seed)
|
||||
: min_(min), max_(max), seed_(seed) {}
|
||||
|
||||
__host__ __device__ T operator()(const unsigned int n) const {
|
||||
thrust::minstd_rand rng;
|
||||
rng.seed(seed_);
|
||||
thrust::uniform_real_distribution<T> dist(min_, max_);
|
||||
rng.discard(n);
|
||||
return dist(rng);
|
||||
}
|
||||
};
|
||||
|
||||
// It seems that Eigen::Tensor::random in GPU will SEGFAULT.
|
||||
// Use std::random and thrust::random(thrust is a std library in CUDA) to
|
||||
// implement uniform random.
|
||||
template <typename T>
|
||||
class GPUUniformRandomKernel : public framework::OpKernel {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
auto* tensor = context.Output<framework::Tensor>(0);
|
||||
T* data = tensor->mutable_data<T>(context.GetPlace());
|
||||
unsigned int seed =
|
||||
static_cast<unsigned int>(context.op_.GetAttr<int>("seed"));
|
||||
if (seed == 0) {
|
||||
seed = std::random_device()();
|
||||
}
|
||||
T min = static_cast<T>(context.op_.GetAttr<float>("min"));
|
||||
T max = static_cast<T>(context.op_.GetAttr<float>("max"));
|
||||
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
|
||||
ssize_t N = framework::product(tensor->dims());
|
||||
thrust::transform(index_sequence_begin, index_sequence_begin + N,
|
||||
thrust::device_ptr<T>(data),
|
||||
UniformGenerator<T>(min, max, seed));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_OP_GPU_KERNEL(uniform_random,
|
||||
paddle::operators::GPUUniformRandomKernel<float>);
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue