commit
536dbc0cb6
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,135 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "CrossEntropyOverBeam.h"
|
||||
#include "Layer.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
/* This struct stores the beams in all search steps for a single sequence. */
|
||||
struct BeamExpansion {
|
||||
std::vector<MatrixPtr> scores;
|
||||
std::vector<IVectorPtr> seqInfo;
|
||||
|
||||
std::vector<MatrixPtr> candidateIds;
|
||||
std::vector<int> gold;
|
||||
|
||||
std::vector<MatrixPtr> scoreGrad;
|
||||
|
||||
size_t expansionCount;
|
||||
|
||||
explicit BeamExpansion(int n) {
|
||||
expansionCount = n;
|
||||
scores.resize(expansionCount);
|
||||
seqInfo.resize(expansionCount);
|
||||
candidateIds.resize(expansionCount);
|
||||
scoreGrad.resize(expansionCount);
|
||||
|
||||
gold.resize(expansionCount);
|
||||
}
|
||||
};
|
||||
typedef std::shared_ptr<BeamExpansion> BeamExpansionPtr;
|
||||
|
||||
class CostForOneSequence {
|
||||
public:
|
||||
CostForOneSequence()
|
||||
: beamSize_(0), validExpansionCount_(0), goldAsExtraPath_(false) {}
|
||||
void setData(const BeamExpansionPtr bPtr, size_t beamSize) {
|
||||
beams_ = bPtr;
|
||||
beamSize_ = beamSize;
|
||||
|
||||
expandedPathScores_.clear();
|
||||
expandedPathScores_.resize(beams_->expansionCount);
|
||||
|
||||
goldRowIds_.clear();
|
||||
goldRowIds_.resize(beams_->expansionCount, 0);
|
||||
goldColIds_.clear();
|
||||
goldColIds_.resize(beams_->expansionCount, -1);
|
||||
}
|
||||
size_t getValidExpansionCount() { return validExpansionCount_; }
|
||||
|
||||
real forward();
|
||||
void backward();
|
||||
|
||||
private:
|
||||
void calValidExpandStep();
|
||||
void constructTotalExpansion();
|
||||
size_t initLastExpansion();
|
||||
real globallyNormalizedScore();
|
||||
|
||||
int getSeqStartPos(size_t beamId, size_t rowId) {
|
||||
CHECK_GT(beams_->seqInfo[beamId]->getSize() - 1, rowId);
|
||||
int* starts = beams_->seqInfo[beamId]->getData();
|
||||
return starts[rowId] - starts[0];
|
||||
}
|
||||
|
||||
size_t beamSize_;
|
||||
size_t validExpansionCount_;
|
||||
bool goldAsExtraPath_;
|
||||
std::vector<int> goldRowIds_;
|
||||
std::vector<int> goldColIds_;
|
||||
|
||||
BeamExpansionPtr beams_;
|
||||
std::vector<std::vector<int>> pathRowIdsInEachBeam_;
|
||||
std::vector<int> parentIdsInBeam_;
|
||||
size_t goldIdsInFinalExpansion_;
|
||||
|
||||
std::vector<MatrixPtr> expandedPathScores_;
|
||||
|
||||
MatrixPtr softmaxOut_;
|
||||
};
|
||||
|
||||
class CrossEntropyOverBeam : public Layer {
|
||||
public:
|
||||
explicit CrossEntropyOverBeam(const LayerConfig& config) : Layer(config) {}
|
||||
bool init(const LayerMap& layerMap,
|
||||
const ParameterMap& parameterMap) override;
|
||||
void forward(PassType passType) override;
|
||||
void backward(const UpdateCallback& callback) override;
|
||||
|
||||
private:
|
||||
void checkInputs();
|
||||
void copyInputsToCpu();
|
||||
void resizeOutput();
|
||||
void copyGradToGpu(size_t copyCount);
|
||||
void splitBatchBeams();
|
||||
|
||||
size_t beamExpanCount_;
|
||||
size_t batchSize_;
|
||||
size_t beamSize_;
|
||||
|
||||
/*
|
||||
* the process of constructing beams is not friendly to GPU, currently, this
|
||||
* layer only runs on CPU, if any of its inputs is on GPU memory, then copy
|
||||
* it to CPU memory.
|
||||
*/
|
||||
std::vector<MatrixPtr> candidateScores_;
|
||||
std::vector<MatrixPtr> candidateScoreGrad_;
|
||||
std::vector<MatrixPtr> candidateInBeam_;
|
||||
std::vector<MatrixPtr> gradToInputs_;
|
||||
std::vector<IVectorPtr> goldSequence_;
|
||||
std::vector<std::vector<int>> beamSplitPos_;
|
||||
|
||||
/*
|
||||
* split entire bath of beams into beam per sequnence and store the result
|
||||
* into this member.
|
||||
*/
|
||||
std::vector<BeamExpansion> beamPerSeq_;
|
||||
/* beamCosts_ is used to propagate error in one sequence. */
|
||||
std::vector<CostForOneSequence> beamCosts_;
|
||||
};
|
||||
|
||||
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,207 @@
|
||||
type: "nn"
|
||||
layers {
|
||||
name: "sentence_states"
|
||||
type: "data"
|
||||
size: 32
|
||||
active_type: ""
|
||||
}
|
||||
layers {
|
||||
name: "sentence_scores"
|
||||
type: "data"
|
||||
size: 1
|
||||
active_type: ""
|
||||
}
|
||||
layers {
|
||||
name: "__kmax_sequence_score_layer_0__"
|
||||
type: "kmax_seq_score"
|
||||
active_type: ""
|
||||
inputs {
|
||||
input_layer_name: "sentence_scores"
|
||||
}
|
||||
beam_size: 5
|
||||
}
|
||||
layers {
|
||||
name: "__sub_nested_seq_layer_0__"
|
||||
type: "sub_nested_seq"
|
||||
size: 32
|
||||
active_type: ""
|
||||
inputs {
|
||||
input_layer_name: "sentence_states"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "__kmax_sequence_score_layer_0__"
|
||||
}
|
||||
}
|
||||
layers {
|
||||
name: "__fc_layer_0__"
|
||||
type: "fc"
|
||||
size: 1
|
||||
active_type: ""
|
||||
inputs {
|
||||
input_layer_name: "__sub_nested_seq_layer_0__"
|
||||
input_parameter_name: "___fc_layer_0__.w0"
|
||||
}
|
||||
bias_parameter_name: "___fc_layer_0__.wbias"
|
||||
}
|
||||
layers {
|
||||
name: "__kmax_sequence_score_layer_1__"
|
||||
type: "kmax_seq_score"
|
||||
active_type: ""
|
||||
inputs {
|
||||
input_layer_name: "sentence_scores"
|
||||
}
|
||||
beam_size: 5
|
||||
}
|
||||
layers {
|
||||
name: "__seq_slice_layer_0__"
|
||||
type: "seq_slice"
|
||||
size: 32
|
||||
active_type: ""
|
||||
inputs {
|
||||
input_layer_name: "__sub_nested_seq_layer_0__"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "__kmax_sequence_score_layer_1__"
|
||||
}
|
||||
select_first: true
|
||||
}
|
||||
layers {
|
||||
name: "__fc_layer_1__"
|
||||
type: "fc"
|
||||
size: 1
|
||||
active_type: ""
|
||||
inputs {
|
||||
input_layer_name: "__seq_slice_layer_0__"
|
||||
input_parameter_name: "___fc_layer_1__.w0"
|
||||
}
|
||||
bias_parameter_name: "___fc_layer_1__.wbias"
|
||||
}
|
||||
layers {
|
||||
name: "__kmax_sequence_score_layer_2__"
|
||||
type: "kmax_seq_score"
|
||||
active_type: ""
|
||||
inputs {
|
||||
input_layer_name: "__fc_layer_1__"
|
||||
}
|
||||
beam_size: 5
|
||||
}
|
||||
layers {
|
||||
name: "sentences_ids"
|
||||
type: "data"
|
||||
size: 1
|
||||
active_type: ""
|
||||
}
|
||||
layers {
|
||||
name: "start_ids"
|
||||
type: "data"
|
||||
size: 1
|
||||
active_type: ""
|
||||
}
|
||||
layers {
|
||||
name: "end_ids"
|
||||
type: "data"
|
||||
size: 1
|
||||
active_type: ""
|
||||
}
|
||||
layers {
|
||||
name: "__cross_entropy_over_beam_0__"
|
||||
type: "cross_entropy_over_beam"
|
||||
active_type: ""
|
||||
inputs {
|
||||
input_layer_name: "sentence_scores"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "__kmax_sequence_score_layer_0__"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "sentences_ids"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "__fc_layer_0__"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "__kmax_sequence_score_layer_1__"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "start_ids"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "__fc_layer_1__"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "__kmax_sequence_score_layer_2__"
|
||||
}
|
||||
inputs {
|
||||
input_layer_name: "end_ids"
|
||||
}
|
||||
}
|
||||
parameters {
|
||||
name: "___fc_layer_0__.w0"
|
||||
size: 32
|
||||
initial_mean: 0.0
|
||||
initial_std: 0.176776695297
|
||||
dims: 32
|
||||
dims: 1
|
||||
initial_strategy: 0
|
||||
initial_smart: true
|
||||
}
|
||||
parameters {
|
||||
name: "___fc_layer_0__.wbias"
|
||||
size: 1
|
||||
initial_mean: 0.0
|
||||
initial_std: 0.0
|
||||
dims: 1
|
||||
dims: 1
|
||||
initial_strategy: 0
|
||||
initial_smart: false
|
||||
}
|
||||
parameters {
|
||||
name: "___fc_layer_1__.w0"
|
||||
size: 32
|
||||
initial_mean: 0.0
|
||||
initial_std: 0.176776695297
|
||||
dims: 32
|
||||
dims: 1
|
||||
initial_strategy: 0
|
||||
initial_smart: true
|
||||
}
|
||||
parameters {
|
||||
name: "___fc_layer_1__.wbias"
|
||||
size: 1
|
||||
initial_mean: 0.0
|
||||
initial_std: 0.0
|
||||
dims: 1
|
||||
dims: 1
|
||||
initial_strategy: 0
|
||||
initial_smart: false
|
||||
}
|
||||
input_layer_names: "sentence_scores"
|
||||
input_layer_names: "sentences_ids"
|
||||
input_layer_names: "sentence_states"
|
||||
input_layer_names: "start_ids"
|
||||
input_layer_names: "end_ids"
|
||||
output_layer_names: "__cross_entropy_over_beam_0__"
|
||||
sub_models {
|
||||
name: "root"
|
||||
layer_names: "sentence_states"
|
||||
layer_names: "sentence_scores"
|
||||
layer_names: "__kmax_sequence_score_layer_0__"
|
||||
layer_names: "__sub_nested_seq_layer_0__"
|
||||
layer_names: "__fc_layer_0__"
|
||||
layer_names: "__kmax_sequence_score_layer_1__"
|
||||
layer_names: "__seq_slice_layer_0__"
|
||||
layer_names: "__fc_layer_1__"
|
||||
layer_names: "__kmax_sequence_score_layer_2__"
|
||||
layer_names: "sentences_ids"
|
||||
layer_names: "start_ids"
|
||||
layer_names: "end_ids"
|
||||
layer_names: "__cross_entropy_over_beam_0__"
|
||||
input_layer_names: "sentence_scores"
|
||||
input_layer_names: "sentences_ids"
|
||||
input_layer_names: "sentence_states"
|
||||
input_layer_names: "start_ids"
|
||||
input_layer_names: "end_ids"
|
||||
output_layer_names: "__cross_entropy_over_beam_0__"
|
||||
is_recurrent_layer_group: false
|
||||
}
|
||||
|
@ -0,0 +1,45 @@
|
||||
#!/usr/bin/env python
|
||||
#coding=utf-8
|
||||
|
||||
from paddle.trainer_config_helpers import *
|
||||
beam_size = 5
|
||||
|
||||
# the first beam expansion.
|
||||
sentence_states = data_layer(name="sentence_states", size=32)
|
||||
sentence_scores = data_layer(name="sentence_scores", size=1)
|
||||
topk_sentence_ids = kmax_sequence_score_layer(
|
||||
input=sentence_scores, beam_size=beam_size)
|
||||
|
||||
# the second beam expansion.
|
||||
topk_sen = sub_nested_seq_layer(
|
||||
input=sentence_states, selected_indices=topk_sentence_ids)
|
||||
start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation())
|
||||
topk_start_pos_ids = kmax_sequence_score_layer(
|
||||
input=sentence_scores, beam_size=beam_size)
|
||||
|
||||
# the final beam expansion.
|
||||
topk_start_spans = seq_slice_layer(
|
||||
input=topk_sen, starts=topk_start_pos_ids, ends=None)
|
||||
end_pos_scores = fc_layer(
|
||||
input=topk_start_spans, size=1, act=LinearActivation())
|
||||
topk_end_pos_ids = kmax_sequence_score_layer(
|
||||
input=end_pos_scores, beam_size=beam_size)
|
||||
|
||||
# define the cost
|
||||
sentence_idx = data_layer(name="sentences_ids", size=1)
|
||||
start_idx = data_layer(name="start_ids", size=1)
|
||||
end_idx = data_layer(name="end_ids", size=1)
|
||||
cost = cross_entropy_over_beam(input=[
|
||||
BeamInput(
|
||||
candidate_scores=sentence_scores,
|
||||
selected_candidates=topk_sentence_ids,
|
||||
gold=sentence_idx), BeamInput(
|
||||
candidate_scores=start_pos_scores,
|
||||
selected_candidates=topk_start_pos_ids,
|
||||
gold=start_idx), BeamInput(
|
||||
candidate_scores=end_pos_scores,
|
||||
selected_candidates=topk_end_pos_ids,
|
||||
gold=end_idx)
|
||||
])
|
||||
|
||||
outputs(cost)
|
Loading…
Reference in new issue