add forward computation of crf operator.

fix-typo
caoying03 7 years ago
parent cbcf11d930
commit cc220eec36

@ -114,16 +114,19 @@ class Tensor {
const platform::DeviceContext& ctx); const platform::DeviceContext& ctx);
/** /**
* @brief Return the slice of the tensor. * @brief Return a sub-tensor of the given tensor.
* *
* @param[in] begin_idx The begin index of the slice. * @param[in] begin_idx The index of the start row(inclusive) to slice.
* @param[in] end_idx The end index of the slice. * The index number begins from 0.
* @param[in] end_idx The index of the end row(exclusive) to slice.
* The index number begins from 0.
*/ */
template <typename T> template <typename T>
inline Tensor Slice(const int& begin_idx, const int& end_idx) const; inline Tensor Slice(const int& begin_idx, const int& end_idx) const;
platform::Place place() const { platform::Place place() const {
PADDLE_ENFORCE_NOT_NULL(holder_, "Tensor get place() must contains holder"); PADDLE_ENFORCE_NOT_NULL(
holder_, "A holder must exist when calling the method place().");
return holder_->place(); return holder_->place();
} }

@ -168,10 +168,11 @@ inline void Tensor::CopyFromVector(const std::vector<T>& src,
template <typename T> template <typename T>
inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const {
check_memory_size<T>(); check_memory_size<T>();
PADDLE_ENFORCE_GE(begin_idx, 0, "Slice begin index is less than zero."); PADDLE_ENFORCE_GE(begin_idx, 0,
PADDLE_ENFORCE_LE(end_idx, dims_[0], "Slice end index is out of bound."); "The start row index must be greater than 0.");
PADDLE_ENFORCE_LE(end_idx, dims_[0], "The end row index is out of bound.");
PADDLE_ENFORCE_LT(begin_idx, end_idx, PADDLE_ENFORCE_LT(begin_idx, end_idx,
"Begin index must be less than end index."); "The start row index must be less than the end row index.");
if (dims_[0] == 1) { if (dims_[0] == 1) {
return *this; return *this;

@ -49,7 +49,7 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
ctx->ShareLoD("X", /*->*/ "Y"); ctx->ShareLoD("X", /*->*/ "Y");
} }
// Explicitly set data type of output of the cross_entropy operator // Explicitly set that data type of the output of the cross_entropy operator
// is determined by its input "X". // is determined by its input "X".
framework::DataType IndicateDataType( framework::DataType IndicateDataType(
const framework::ExecutionContext& ctx) const override { const framework::ExecutionContext& ctx) const override {

File diff suppressed because it is too large Load Diff

@ -19,27 +19,31 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor, template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>; using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename T> template <typename Place, typename T>
class LinearChainCrfOpKernel : public framework::OpKernel<T> { class LinearChainCrfOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override;
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
"This kernel only runs on CPU."); protected:
} T ForwardOneSequence(const platform::DeviceContext& ctx,
const Tensor& emission, Tensor& emission_row_max,
Tensor& emission_exps, const Tensor& trans_weights,
Tensor& trans_weight_exps, const Tensor& label,
Tensor& a) const;
private:
T NormalizeL1(T* x, size_t len) const;
}; };
template <typename T> template <typename Place, typename T>
class LinearChainCrfGradOpKernel : public framework::OpKernel<T> { class LinearChainCrfGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override;
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
"This kernel only runs on CPU.");
}
}; };
} // namespace operators } // namespace operators

@ -60,19 +60,23 @@ Because this operators performs a softmax on logits internally, it expects
unscaled logits. Please do not call this op with the output of softmax operator, unscaled logits. Please do not call this op with the output of softmax operator,
which will produce incorrect results. which will produce incorrect results.
This operators expects mutually exclusive hard labels, each sample in a batch When the attribute softLabel is set false, this operators expects mutually
is in exactly one class with probabilities 1. Each sample in the batch with one exclusive hard labels, each sample in a batch is in exactly one class with
and only one label. probabilities 1. Each sample in the batch with one and only one label.
Equation: Equation:
1) hard label (one-hot label) 1) hard label (one-hot label)
Loss_j = -\text{Logit}_{Label_j} + \log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right), j = 1, ..., K Loss_j = \f$ -\text{Logit}_{Label_j} +
\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right),
j = 1, ..., K $\f
2) soft label (a distribution over all classes) 2) soft label (a distribution over all classes)
Loss_j = -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i-\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right), j = 1,...,K Loss_j = \f$ -\sum_{i=0}^{K}\text{Label}_i\left(\text{Logit}_i -
\log\left(\sum_{i=0}^{K}\exp(\text{Logit}_i)\right)\right),
j = 1,...,K $\f
)DOC"); )DOC");
} }

@ -61,13 +61,13 @@ class LinearChainCrfForward(object):
s += alpha[-1, i] * self.b_exps[i] s += alpha[-1, i] * self.b_exps[i]
log_likelihood -= np.log(s) log_likelihood -= np.log(s)
# calculate the noninator part. # calculate the nominator part.
log_likelihood += ( log_likelihood += (
self.a[label[0]] + self.x[0, label[0]] + self.b[label[-1]]) self.a[label[0]] + self.x[0, label[0]] + self.b[label[-1]])
for k in range(1, seq_len): for k in range(1, seq_len):
log_likelihood += ( log_likelihood += (
self.x[k, label[k]] + self.w[label[k - 1], label[k]]) self.x[k, label[k]] + self.w[label[k - 1], label[k]])
return log_likelihood return -log_likelihood
def crf_forward_compute(self): def crf_forward_compute(self):
for i in range(self.seq_num): for i in range(self.seq_num):
@ -102,7 +102,7 @@ class TestLinearChainCrfOp(OpTest):
self.inputs = { self.inputs = {
"Emission": (emission, lod), "Emission": (emission, lod),
"Transition": transition, "Transition": transition,
"label": (labels, lod) "Label": (labels, lod)
} }
crf = LinearChainCrfForward(lod[0], emission, transition, labels) crf = LinearChainCrfForward(lod[0], emission, transition, labels)

Loading…
Cancel
Save