You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
544 lines
18 KiB
544 lines
18 KiB
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
#pragma once
|
|
|
|
#include <functional>
|
|
#include "GradientMachine.h"
|
|
#include "NeuralNetwork.h"
|
|
|
|
#include "paddle/utils/Locks.h"
|
|
|
|
namespace paddle {
|
|
|
|
/**
|
|
* Private data class declares.
|
|
* Used for user customized beam search.
|
|
*/
|
|
class BeamSearchControlCallbacks;
|
|
class BeamSearchStatisticsCallbacks;
|
|
|
|
class RecurrentGradientMachine : public NeuralNetwork {
|
|
public:
|
|
RecurrentGradientMachine(const std::string& subModelName,
|
|
NeuralNetwork* rootNetwork);
|
|
|
|
// Disable copy and assign.
|
|
RecurrentGradientMachine(const RecurrentGradientMachine& other) = delete;
|
|
RecurrentGradientMachine& operator=(const RecurrentGradientMachine& other) =
|
|
delete;
|
|
|
|
virtual ~RecurrentGradientMachine() {
|
|
this->removeBeamSearchStatisticsCallbacks();
|
|
this->removeBeamSearchControlCallbacks();
|
|
}
|
|
|
|
virtual void init(const ModelConfig& config,
|
|
ParamInitCallback callback,
|
|
const std::vector<ParameterType>& parameterTypes,
|
|
bool useGpu);
|
|
|
|
virtual void prefetch(const std::vector<Argument>& inArgs);
|
|
|
|
virtual void forward(const std::vector<Argument>& inArgs,
|
|
std::vector<Argument>* outArgs,
|
|
PassType passType);
|
|
|
|
virtual void backward(const UpdateCallback& callback = nullptr);
|
|
|
|
void forwardBackward(const std::vector<Argument>& inArgs,
|
|
std::vector<Argument>* outArgs,
|
|
PassType passType,
|
|
const UpdateCallback& callback);
|
|
|
|
virtual void resetState() {}
|
|
virtual void eval(Evaluator* evaluator) const;
|
|
|
|
const std::vector<int>& getParameterIds() { return parameterIds_; }
|
|
|
|
/**
|
|
* @brief BeamSearchCandidatesAdjustCallback
|
|
*
|
|
* Adjust searching candidates to restrict beam search
|
|
* searching within a limited subset of all possibile paths.
|
|
*
|
|
* The first parameter is the prefixes of all formed paths in current
|
|
* beam search step, whose type is basically int[][].
|
|
*
|
|
* The second parameter is a pointer to the network used to generate sequence,
|
|
* user can use this pointer to tranverse each layer in the network to
|
|
* modify behaivors of a particular layer.
|
|
*
|
|
* The third parameter is an integer to indicate the iteration number of
|
|
* beam search, so that user can customize different operations in different
|
|
* beam search iterations.
|
|
*/
|
|
typedef std::function<void(
|
|
const std::vector<std::vector<int>*>&, NeuralNetwork*, const int)>
|
|
BeamSearchCandidatesAdjustCallback;
|
|
|
|
/**
|
|
* @brief DropCallback
|
|
*
|
|
* Drop a whole prefix or one candidate in beam search or not.
|
|
*
|
|
* The first parameter is sequence index in a batch
|
|
*
|
|
* The second parameter is one path in beam search,
|
|
* which is made up of node indices.
|
|
*
|
|
* The third parameter is probabilites for each node in this path.
|
|
*
|
|
* Return true if this prefix or candidate is expected to be dropped.
|
|
*/
|
|
typedef std::function<bool(
|
|
int seqId, const std::vector<int>&, const std::vector<real>&)>
|
|
DropCallback;
|
|
|
|
/**
|
|
* @brief NormOrDropNodeCallback
|
|
*
|
|
* Normalize a path's probabilities or just drop it by modifying path.logProb
|
|
*
|
|
* The first parameter is sequence index in a batch
|
|
*
|
|
* The second parameter is path.ids
|
|
*
|
|
* The third parameter is probabilites for each node in this path.
|
|
*
|
|
* The fourth parameter is the probability of the whole path.
|
|
*/
|
|
typedef std::function<void(
|
|
int seqId, const std::vector<int>&, std::vector<real>&, real*)>
|
|
NormOrDropNodeCallback;
|
|
|
|
/**
|
|
* @brief Register beam search control callbacks. Used for prediction.
|
|
*
|
|
* @param queryBeamSearch: Give the sequences already formed, return the
|
|
* nodes expected to be expanded.
|
|
* Input: A pointer to an array holding pathes which have been expanded
|
|
* Return: A pointer to an array holding nodes wanted to be expanded.
|
|
*
|
|
* @param dropOneNode: Early drop a node in one beam search step.
|
|
* Given the path formed and probability history, decide whether a node
|
|
* should be dropped or not.
|
|
*
|
|
* @param stopBeamSearch: Early stop a path in one beam search step.
|
|
* Given the path and probability history, decide whether a path
|
|
* should be dropped or not.
|
|
*/
|
|
void registerBeamSearchControlCallbacks(
|
|
const BeamSearchCandidatesAdjustCallback& adjustBeamSearch,
|
|
const NormOrDropNodeCallback& normOrDropNode,
|
|
const DropCallback& stopBeamSearch);
|
|
|
|
/**
|
|
* @brief Remove user costumized beam search callbacks,
|
|
*
|
|
* make sequence generation acts like normal beam search.
|
|
*/
|
|
void removeBeamSearchControlCallbacks();
|
|
|
|
/**
|
|
* @brief EachStepCallback
|
|
*
|
|
* Invoke with beam search step.
|
|
*/
|
|
typedef std::function<void(int)> EachStepCallback;
|
|
|
|
/**
|
|
* @brief register statistics methods for performance profile of beam search.
|
|
*
|
|
* @param onEachStepStarted: invoke once a beam search step starts.
|
|
* Its input is index of the beam search step.
|
|
*
|
|
* @param onEachStepStoped: invoke once a beam search step ends.
|
|
* Its input is index of the beam search step.
|
|
*/
|
|
void registerBeamSearchStatisticsCallbacks(
|
|
const EachStepCallback& onEachStepStarted,
|
|
const EachStepCallback& onEachStepStoped);
|
|
|
|
/**
|
|
* @brief Remove beam search callbacks.
|
|
*/
|
|
void removeBeamSearchStatisticsCallbacks();
|
|
|
|
/**
|
|
* @brief Stop beam search for current source.
|
|
*
|
|
* Will restart beam search in the next forward
|
|
*/
|
|
void stopBeamSearch();
|
|
|
|
struct Path {
|
|
/**
|
|
* @brief ids, path of beam search.
|
|
*/
|
|
std::vector<int> ids;
|
|
|
|
/**
|
|
* @brief logProb, current probability of path.
|
|
*/
|
|
real logProb;
|
|
|
|
int machineId; // index of sample in frame
|
|
int topIndex; // index of MaxIdLayer output in one sample
|
|
int seqId; // index of sequence in batch generation
|
|
std::vector<int> machineIdVec;
|
|
|
|
/**
|
|
* @brief A record of each node's probality in a formed path in beam search.
|
|
*
|
|
* @note It could be empty when history is not recorded. If the history is
|
|
* wanted to be recorded, recordHistory() MUST be invoked first.
|
|
*/
|
|
std::vector<real> probHistory;
|
|
|
|
/**
|
|
* @brief Path default ctor, first logProb is 0.
|
|
*/
|
|
Path() {
|
|
logProb = 0;
|
|
seqId = 0;
|
|
}
|
|
explicit Path(size_t seqId) : seqId(seqId) { logProb = 0; }
|
|
|
|
/**
|
|
* @brief Create a new path based on an old path and
|
|
* a new node with probability.
|
|
*
|
|
* @param old old path
|
|
* @param newId index of the new node
|
|
* @param logProb probability of the new node.
|
|
* @param machineId sample index of a frame in RNN
|
|
* @param topIndex index of MaxIdLayer output in one sample
|
|
*/
|
|
Path(Path& old, int newId, real logProb, int machineId, int topIndex)
|
|
: ids(old.ids),
|
|
logProb(old.logProb + logProb),
|
|
machineId(machineId),
|
|
topIndex(topIndex),
|
|
seqId(old.seqId) {
|
|
ids.push_back(newId);
|
|
if (!old.probHistory.empty()) {
|
|
this->probHistory = old.probHistory;
|
|
// probHistory store current prob, not sum
|
|
this->probHistory.push_back(logProb);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief operator <
|
|
*
|
|
* Path a < Path b means log probability of a is smaller than that of b
|
|
*/
|
|
bool operator<(const Path& other) const {
|
|
return (logProb < other.logProb);
|
|
}
|
|
|
|
static bool greaterPath(const Path& a, const Path& b) { return (b < a); }
|
|
|
|
/**
|
|
* @brief Start recording history in this path.
|
|
*/
|
|
void recordHistory() { this->probHistory.push_back(this->logProb); }
|
|
|
|
/**
|
|
* @brief Adjust probability for DIY beam search interface.
|
|
* In normal situation, it will do nothing.
|
|
*
|
|
* @param calc_id: the object id for DIY beam search interface.
|
|
* @param atEos: at end of sequence or not.
|
|
*/
|
|
void adjustProb(int calc_id, bool atEos = false);
|
|
|
|
/**
|
|
* @brief isDropable indacating whether the current node will be
|
|
* dropped or not in beam search.
|
|
*
|
|
* @note: if logProb is -inf, current node will be dropped.
|
|
* @return true to drop the current node.
|
|
*/
|
|
bool isDropable() const { return std::isinf(logProb) && logProb < 0; }
|
|
};
|
|
|
|
/**
|
|
* @brief access beam search results.
|
|
* @return beam search results.
|
|
*/
|
|
const std::vector<std::vector<Path>>& getFinalPaths() const {
|
|
return this->finalPaths_;
|
|
}
|
|
|
|
protected:
|
|
std::vector<Argument::SeqInfo> commonSeqInfo_;
|
|
ICpuGpuVectorPtr sequenceStartPositions_;
|
|
void calcSequenceStartPositions();
|
|
void checkInputConsistency(int inlinkId,
|
|
const std::vector<Argument::SeqInfo>& seqInfo);
|
|
void reorganizeInput(PassType passType);
|
|
void reorganizeOutput(PassType passType);
|
|
void connectFrames(PassType passType);
|
|
void calcNumSequencesAtEachStep();
|
|
|
|
void resizeOrCreateFrames(int numFrames);
|
|
void resizeBootFrame(int numSequences);
|
|
|
|
void generateSequence();
|
|
void oneWaySearch(size_t batchSize);
|
|
void beamSearch(size_t batchSize);
|
|
|
|
struct InFrameLine {
|
|
std::string linkName;
|
|
LayerPtr inLayer;
|
|
std::vector<LayerPtr> agents; // Scatter Agents to reform batch input
|
|
Argument outArg; // scatter output argument
|
|
};
|
|
std::vector<InFrameLine> inFrameLines_;
|
|
|
|
struct OutFrameLine {
|
|
std::string layerName;
|
|
LayerPtr agentLayer;
|
|
std::vector<LayerPtr> frames;
|
|
};
|
|
std::vector<OutFrameLine> outFrameLines_;
|
|
|
|
struct MemoryFrameLine {
|
|
std::string layerName;
|
|
std::string linkName;
|
|
LayerPtr bootLayer; // actually used biasLayer or rootAgent
|
|
LayerPtr biasLayer;
|
|
LayerPtr rootLayer; // layer in root network to boot this memory
|
|
LayerPtr rootAgent; // agent to link rootLayer
|
|
std::vector<LayerPtr> frames;
|
|
std::vector<LayerPtr> agents;
|
|
std::vector<LayerPtr> scatterAgents; // scatter agent used by beam search
|
|
Argument outArg; // scatter output argument
|
|
// Different memoryFrameLine have different element as follows
|
|
IVectorPtr allIds; // scattered id of realLayer
|
|
ICpuGpuVectorPtr
|
|
sequenceStartPositions; // scattered sequenceStartPositions
|
|
};
|
|
std::vector<MemoryFrameLine> memoryFrameLines_;
|
|
|
|
// Each inFrameLines(inlinks) has its own info(elements) below,
|
|
// and all outFrameLines(outlinks) share the info with one inFrameLine,
|
|
// which is assigned by targetInfoInlinkId_.
|
|
struct Info {
|
|
// The original positions in the original batch
|
|
IVectorPtr allIds; // scattered id of realLayer [batchSize]
|
|
|
|
// index of allIds for each step [maxSequenceLength_]
|
|
// idIndex[i] is the total length of the first i sequences
|
|
std::vector<int> idIndex;
|
|
|
|
ICpuGpuVectorPtr
|
|
sequenceStartPositions; // scattered sequenceStartPositions
|
|
std::vector<int> seqStartPosIndex; // index of sequenceStartPositions
|
|
};
|
|
std::vector<Info> info_; // for input
|
|
|
|
// numSeqs_[i] is the number sequences which is longer than i (for sequence
|
|
// data) or has more than i subsequences (for subsequence data)
|
|
// Equivalently, numSeqs_[i] is the number of sequences at step i;
|
|
std::vector<int> numSeqs_;
|
|
|
|
std::vector<std::vector<Argument::SeqInfo>> seqInfos_;
|
|
|
|
void checkOutputConsistency(OutFrameLine& outFrameLine);
|
|
|
|
/* create scattered id infomation for all realLayer of inFrameLines one time.
|
|
* If hasSubseq, will also create scattered sequenceStartPositions infomation
|
|
* for all realLayer of inFrameLines one time.
|
|
*/
|
|
void createInFrameInfo(int inlinks_id,
|
|
const Argument& input,
|
|
PassType passType);
|
|
void createInFrameInfo_nonseq(int inlinks_id,
|
|
const Argument& input,
|
|
PassType passType);
|
|
void createInFrameInfo_seq(int inlinks_id,
|
|
const Argument& input,
|
|
PassType passType);
|
|
void createInFrameInfo_subseq(int inlinks_id,
|
|
const Argument& input,
|
|
PassType passType);
|
|
|
|
void createOutFrameInfo(OutFrameLine& outFrameLine,
|
|
Info& info,
|
|
ICpuGpuVectorPtr& sequenceStartPositions,
|
|
ICpuGpuVectorPtr& subSequenceStartPositions);
|
|
void createOutFrameInfo_seq(OutFrameLine& outFrameLine,
|
|
Info& info,
|
|
ICpuGpuVectorPtr& sequenceStartPositions,
|
|
ICpuGpuVectorPtr& subSequenceStartPositions);
|
|
void createOutFrameInfo_subseq(OutFrameLine& outFrameLine,
|
|
Info& info,
|
|
ICpuGpuVectorPtr& sequenceStartPositions,
|
|
ICpuGpuVectorPtr& subSequenceStartPositions);
|
|
|
|
void createMemoryFrameInfo(MemoryFrameLine* memoryFrameLine,
|
|
PassType passType);
|
|
|
|
void copyScattedId(std::vector<int>& srcIds, IVectorPtr* dstIds, int size);
|
|
|
|
void selectRowsOneTime(LayerPtr layer,
|
|
const IVectorPtr& allIds,
|
|
Argument* arg,
|
|
PassType passType);
|
|
|
|
void createSeqPos(const std::vector<int>& sequenceStartPosition,
|
|
ICpuGpuVectorPtr* sequenceStartPositions);
|
|
|
|
// for generator
|
|
struct EosFrameLine {
|
|
std::vector<LayerPtr> layers;
|
|
};
|
|
std::unique_ptr<EosFrameLine> eosFrameLine_;
|
|
|
|
struct Generator {
|
|
GeneratorConfig config;
|
|
std::vector<int> ids; // store generated sequences
|
|
Argument outArg; // final output argument
|
|
};
|
|
bool generating_;
|
|
Generator generator_;
|
|
|
|
std::vector<std::unique_ptr<NeuralNetwork>> frames_;
|
|
|
|
NeuralNetwork* rootNetwork_;
|
|
bool reversed_;
|
|
|
|
int maxSequenceLength_; // Max top-level length
|
|
bool useGpu_;
|
|
bool stopBeamSearch_;
|
|
|
|
std::vector<int>
|
|
parameterIds_; // parameters actually used by this Layer Group
|
|
|
|
// store final argument of outFrameLines_
|
|
std::vector<Argument> dataArgs_;
|
|
// store each frame's output argument of outFrameLines_
|
|
std::vector<std::vector<Argument>> dataArgsFrame_;
|
|
size_t dataArgsSize_; // size of dataArgs_ = size of dataArgsFrame_
|
|
|
|
IVectorPtr cpuId_;
|
|
MatrixPtr cpuProb_;
|
|
IVectorPtr cpuEos_;
|
|
|
|
private:
|
|
/*
|
|
* @return beam size in beam search
|
|
*/
|
|
size_t getBeamSize() { return generator_.config.beam_size(); }
|
|
|
|
/*
|
|
* @return number of sequence in a batch in generation
|
|
*/
|
|
size_t getGenBatchSize();
|
|
|
|
/*
|
|
* @brief store output of the machineCur-th frame during generation, for
|
|
* creating the final outlink after the entire generation process is finished.
|
|
*
|
|
* In generation, if the layer group has more than 1 outlink, the first
|
|
* one is reserved to store the generated word indices, the others are data
|
|
* outlinks, that can be used like a common layer in the network.
|
|
*
|
|
* @param machineCur : index to access the layer group frame in
|
|
* currrent generation step.
|
|
*/
|
|
void copyDataOutlinkFrame(size_t machineCur);
|
|
|
|
/*
|
|
* @brief In generation, if the layer group has more than 1 outlink, outlinks
|
|
* except the first one are data outlinks. This function creates the data
|
|
* outlinks.
|
|
* @note In beam search, only one generated sequence with the hightest log
|
|
* probabilites are retained.
|
|
* @param machineIdVec : select a row of output matrix in each frame
|
|
* that the generation process expanded.
|
|
*/
|
|
void createDataOutlink(std::vector<int>& machineIdVec);
|
|
|
|
/*
|
|
* @brief used in beam search, connect previous frame to form recurrent link
|
|
* @param stepId : iteration number of generation process.
|
|
* It equals to the length of longest half-generated sequence.
|
|
* @param paths : half-generated paths that are going to be expanded
|
|
* in current beam search iteration.
|
|
*/
|
|
void connectPrevFrame(int stepId, std::vector<Path>& paths);
|
|
|
|
/*
|
|
* @brief used in beam search, forward current recurrent frame
|
|
* @param machineCur : index to access the layer group frame in
|
|
* currrent generation step.
|
|
*/
|
|
void forwardFrame(int machineCur);
|
|
|
|
/*
|
|
* @brief reduce all expanded paths to beam size.
|
|
*
|
|
* @param newPaths : newPaths[totalExpandCount : ] stores all expanded paths
|
|
* for the seqId-th sequence
|
|
* @param seqId : sequence index in a batch
|
|
* @param totalExpandCount : number of already shrinked paths in newPaths
|
|
* @return size of retained paths at the end of a beam search iteration
|
|
*/
|
|
size_t beamShrink(std::vector<Path>& newPaths,
|
|
size_t seqId,
|
|
size_t totalExpandCount);
|
|
|
|
/*
|
|
* @brief expand a single path to expandWidth new paths
|
|
* with highest probability
|
|
* @param curPath : path to be expanded
|
|
* @param curPathId : index of curPath in member newPaths
|
|
* @param expandWidth : number of paths to be expanded
|
|
*/
|
|
void singlePathExpand(Path& curPath,
|
|
size_t curPathId,
|
|
std::vector<Path>& newPaths,
|
|
size_t expandWidth);
|
|
|
|
/*
|
|
* @brief A new beam search iteration. Each half-generated paths in previous
|
|
* beam search iteration are further expanded to beam_size new paths
|
|
* with highest probabilities, and then all the expanded paths are again
|
|
* reduced to beam_size paths according to their log probabilities.
|
|
* @param paths : half-generated paths in previous iteration.
|
|
* @param newPaths : paths expanded and then reduces in current iteration.
|
|
*/
|
|
void beamExpand(std::vector<Path>& paths, std::vector<Path>& newPaths);
|
|
|
|
/*
|
|
* @brief fill sequence start positions and some other information that are
|
|
* uesed by the "text_printer" evaluator.
|
|
*/
|
|
void fillGenOutputs();
|
|
|
|
std::vector<int> machineIds_;
|
|
std::vector<int> topIds_;
|
|
std::vector<int> seqIds_;
|
|
std::vector<int> batchMachineIdVec_;
|
|
std::vector<std::vector<Path>> finalPaths_;
|
|
std::vector<real> minFinalPathLogProb_;
|
|
BeamSearchControlCallbacks* beamSearchCtrlCallbacks_;
|
|
BeamSearchStatisticsCallbacks* beamSearchStatistics_;
|
|
};
|
|
} // namespace paddle
|