|
|
|
@ -19,12 +19,12 @@ namespace operators {
|
|
|
|
|
namespace math {
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
void MatrixBitCodeFunctor<T>::Add(const framework::LoDTensor& vec,
|
|
|
|
|
framework::LoDTensor* tmat) {
|
|
|
|
|
void MatrixBitCodeFunctor<T>::Add(const framework::Tensor& vec,
|
|
|
|
|
framework::Tensor* tmat) {
|
|
|
|
|
size_t batch_size = tmat->dims()[0];
|
|
|
|
|
size_t width = tmat->dims()[1];
|
|
|
|
|
for (size_t i = 0; i < batch_size; ++i) {
|
|
|
|
|
auto code = code_table->get_code(i);
|
|
|
|
|
auto code = code_table_->get_code(i);
|
|
|
|
|
int code_length = code->get_length();
|
|
|
|
|
for (int j = 0; j < code_length; ++j) {
|
|
|
|
|
size_t index = code->calc_index(j);
|
|
|
|
@ -34,12 +34,12 @@ void MatrixBitCodeFunctor<T>::Add(const framework::LoDTensor& vec,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat,
|
|
|
|
|
framework::LoDTensor* vec) {
|
|
|
|
|
void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
|
|
|
|
|
framework::Tensor* vec) {
|
|
|
|
|
size_t batch_size = tmat.dims()[0];
|
|
|
|
|
size_t width = tmat.dims()[1];
|
|
|
|
|
for (size_t i = 0; i < batch_size; ++i) {
|
|
|
|
|
auto code = code_table->get_code(i);
|
|
|
|
|
auto code = code_table_->get_code(i);
|
|
|
|
|
int code_length = code->get_length();
|
|
|
|
|
for (int j = 0; j < code_length; ++j) {
|
|
|
|
|
size_t index = code->calc_index(j);
|
|
|
|
@ -49,17 +49,16 @@ void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat,
|
|
|
|
|
void MatrixBitCodeFunctor<T>::AddGrad(const framework::Tensor& tmat,
|
|
|
|
|
framework::SelectedRows* vec) {
|
|
|
|
|
size_t batch_size = tmat.dims()[0];
|
|
|
|
|
size_t width = tmat.dims()[1];
|
|
|
|
|
for (size_t i = 0; i < batch_size; ++i) {
|
|
|
|
|
auto code = code_table->get_code(i);
|
|
|
|
|
auto code = code_table_->get_code(i);
|
|
|
|
|
int code_length = code->get_length();
|
|
|
|
|
for (int j = 0; j < code_length; ++j) {
|
|
|
|
|
size_t index = code->calc_index(j);
|
|
|
|
|
int64_t row_index =
|
|
|
|
|
vec->AutoGrownIndex(static_cast<int64_t>(index), false, true);
|
|
|
|
|
int64_t row_index = vec->GetIndexFromId(static_cast<int64_t>(index));
|
|
|
|
|
vec->mutable_value()->data<T>()[row_index] +=
|
|
|
|
|
tmat.data<T>()[i * width + j];
|
|
|
|
|
}
|
|
|
|
@ -67,13 +66,13 @@ void MatrixBitCodeFunctor<T>::AddGrad(const framework::LoDTensor& tmat,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
void MatrixBitCodeFunctor<T>::Sum(const framework::LoDTensor& tmat,
|
|
|
|
|
framework::LoDTensor* sum, T scale_sum) {
|
|
|
|
|
void MatrixBitCodeFunctor<T>::Sum(const framework::Tensor& tmat,
|
|
|
|
|
framework::Tensor* sum, T scale_sum) {
|
|
|
|
|
size_t num_samples = tmat.dims()[0];
|
|
|
|
|
size_t o_width = tmat.dims()[1];
|
|
|
|
|
for (size_t i = 0; i < num_samples; ++i) {
|
|
|
|
|
T sm = static_cast<T>(0.0);
|
|
|
|
|
auto code = code_table->get_code(i);
|
|
|
|
|
auto code = code_table_->get_code(i);
|
|
|
|
|
int code_length = code->get_length();
|
|
|
|
|
for (int j = 0; j < code_length; ++j) {
|
|
|
|
|
if (code->calc_bit(j)) {
|
|
|
|
@ -87,9 +86,9 @@ void MatrixBitCodeFunctor<T>::Sum(const framework::LoDTensor& tmat,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
void MatrixBitCodeFunctor<T>::Mul(framework::LoDTensor* tmat,
|
|
|
|
|
const framework::LoDTensor& weight,
|
|
|
|
|
const framework::LoDTensor& input) {
|
|
|
|
|
void MatrixBitCodeFunctor<T>::Mul(framework::Tensor* tmat,
|
|
|
|
|
const framework::Tensor& weight,
|
|
|
|
|
const framework::Tensor& input) {
|
|
|
|
|
size_t num_samples = tmat->dims()[0];
|
|
|
|
|
size_t tmat_width = tmat->dims()[1];
|
|
|
|
|
size_t input_width = input.dims()[1];
|
|
|
|
@ -98,7 +97,7 @@ void MatrixBitCodeFunctor<T>::Mul(framework::LoDTensor* tmat,
|
|
|
|
|
auto weight_value = weight.data<T>();
|
|
|
|
|
auto input_value = input.data<T>();
|
|
|
|
|
for (size_t i = 0; i < num_samples; ++i) {
|
|
|
|
|
auto code = code_table->get_code(i);
|
|
|
|
|
auto code = code_table_->get_code(i);
|
|
|
|
|
int code_length = code->get_length();
|
|
|
|
|
for (int j = 0; j < code_length; ++j) {
|
|
|
|
|
size_t index = code->calc_index(j);
|
|
|
|
@ -113,9 +112,9 @@ void MatrixBitCodeFunctor<T>::Mul(framework::LoDTensor* tmat,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
|
|
|
|
|
framework::LoDTensor* weight,
|
|
|
|
|
const framework::LoDTensor& input) {
|
|
|
|
|
void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
|
|
|
|
|
framework::Tensor* weight,
|
|
|
|
|
const framework::Tensor& input) {
|
|
|
|
|
size_t num_samples = tmat.dims()[0];
|
|
|
|
|
size_t input_width = input.dims()[1];
|
|
|
|
|
size_t tmat_width = tmat.dims()[1];
|
|
|
|
@ -124,7 +123,7 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
|
|
|
|
|
auto weight_value = weight->data<T>();
|
|
|
|
|
auto input_value = input.data<T>();
|
|
|
|
|
for (size_t i = 0; i < num_samples; ++i) {
|
|
|
|
|
auto code = code_table->get_code(i);
|
|
|
|
|
auto code = code_table_->get_code(i);
|
|
|
|
|
int code_length = code->get_length();
|
|
|
|
|
for (int j = 0; j < code_length; ++j) {
|
|
|
|
|
size_t index = code->calc_index(j);
|
|
|
|
@ -138,9 +137,9 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
|
|
|
|
|
void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::Tensor& tmat,
|
|
|
|
|
framework::SelectedRows* weight,
|
|
|
|
|
const framework::LoDTensor& input) {
|
|
|
|
|
const framework::Tensor& input) {
|
|
|
|
|
size_t num_samples = tmat.dims()[0];
|
|
|
|
|
size_t input_width = input.dims()[1];
|
|
|
|
|
size_t tmat_width = tmat.dims()[1];
|
|
|
|
@ -149,13 +148,12 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
|
|
|
|
|
auto weight_value = weight->mutable_value()->data<T>();
|
|
|
|
|
auto input_value = input.data<T>();
|
|
|
|
|
for (size_t i = 0; i < num_samples; ++i) {
|
|
|
|
|
auto code = code_table->get_code(i);
|
|
|
|
|
auto code = code_table_->get_code(i);
|
|
|
|
|
int code_length = code->get_length();
|
|
|
|
|
for (int j = 0; j < code_length; ++j) {
|
|
|
|
|
size_t index = code->calc_index(j);
|
|
|
|
|
for (size_t k = 0; k < input_width; ++k) {
|
|
|
|
|
int64_t row_index =
|
|
|
|
|
weight->AutoGrownIndex(static_cast<int64_t>(index), false, true);
|
|
|
|
|
int64_t row_index = weight->GetIndexFromId(static_cast<int64_t>(index));
|
|
|
|
|
weight_value[row_index * weight_width + k] +=
|
|
|
|
|
tmat_value[i * tmat_width + j] * input_value[input_width * i + k];
|
|
|
|
|
}
|
|
|
|
@ -164,9 +162,9 @@ void MatrixBitCodeFunctor<T>::MulGradWeight(const framework::LoDTensor& tmat,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
void MatrixBitCodeFunctor<T>::MulGradError(const framework::LoDTensor& tmat,
|
|
|
|
|
const framework::LoDTensor& weight,
|
|
|
|
|
framework::LoDTensor* input) {
|
|
|
|
|
void MatrixBitCodeFunctor<T>::MulGradError(const framework::Tensor& tmat,
|
|
|
|
|
const framework::Tensor& weight,
|
|
|
|
|
framework::Tensor* input) {
|
|
|
|
|
size_t num_samples = tmat.dims()[0];
|
|
|
|
|
size_t tmat_width = tmat.dims()[1];
|
|
|
|
|
size_t input_width = input->dims()[1];
|
|
|
|
@ -176,7 +174,7 @@ void MatrixBitCodeFunctor<T>::MulGradError(const framework::LoDTensor& tmat,
|
|
|
|
|
auto input_value = input->data<T>();
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_samples; ++i) {
|
|
|
|
|
auto code = code_table->get_code(i);
|
|
|
|
|
auto code = code_table_->get_code(i);
|
|
|
|
|
int code_length = code->get_length();
|
|
|
|
|
for (int j = 0; j < code_length; ++j) {
|
|
|
|
|
size_t index = code->calc_index(j);
|
|
|
|
@ -191,11 +189,11 @@ void MatrixBitCodeFunctor<T>::MulGradError(const framework::LoDTensor& tmat,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
void MatrixBitCodeFunctor<T>::Sub(framework::LoDTensor* tmat) {
|
|
|
|
|
void MatrixBitCodeFunctor<T>::Sub(framework::Tensor* tmat) {
|
|
|
|
|
size_t num_samples = tmat->dims()[0];
|
|
|
|
|
size_t o_width = tmat->dims()[1];
|
|
|
|
|
for (size_t i = 0; i < num_samples; ++i) {
|
|
|
|
|
auto code = code_table->get_code(i);
|
|
|
|
|
auto code = code_table_->get_code(i);
|
|
|
|
|
int code_length = code->get_length();
|
|
|
|
|
for (int j = 0; j < code_length; ++j) {
|
|
|
|
|
if (code->calc_bit(j)) {
|
|
|
|
|