Merge pull request #13689 from sneaxiy/sparse_rmsprop

Fix sparse rmsprop
fix-readmd
Zeng Jinle 7 years ago committed by GitHub
commit 93606c2c2c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -18,6 +18,7 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/math/algorithm.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/for_range.h"
@ -199,23 +200,9 @@ struct SparseAdamFunctor {
row_numel_(row_numel),
row_count_(row_count) {}
inline HOSTDEVICE int64_t BinarySearchInRows(int64_t row) const {
int64_t beg = 0, end = row_count_ - 1;
while (beg <= end) {
auto mid = ((beg + end) >> 1);
if (rows_[mid] == row)
return mid;
else if (rows_[mid] < row)
beg = mid + 1;
else
end = mid - 1;
}
return -1;
}
inline HOSTDEVICE void operator()(size_t i) const {
int64_t row = i / row_numel_;
auto row_idx = BinarySearchInRows(row);
auto row_idx =
math::BinarySearch<int64_t>(rows_, row_count_, i / row_numel_);
T g = row_idx >= 0 ? grad_[row_idx * row_numel_ + i % row_numel_] : 0;
// The following code is the same as dense

@ -0,0 +1,44 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <cstdint> // for int64_t
#include <numeric>
#include "paddle/fluid/platform/hostdevice.h"
namespace paddle {
namespace operators {
namespace math {
template <typename T>
HOSTDEVICE inline int64_t BinarySearch(const T *x, int64_t num, const T &val) {
int64_t beg = 0, end = num - 1;
while (beg <= end) {
auto mid = ((beg + end) >> 1);
if (x[mid] == val)
return mid;
else if (x[mid] < val)
beg = mid + 1;
else
end = mid - 1;
}
return -1;
}
} // namespace math
} // namespace operators
} // namespace paddle

@ -12,9 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <map>
#include <set>
#include <vector>
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
namespace paddle {
@ -245,40 +247,42 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
const framework::SelectedRows& input,
framework::SelectedRows* output) {
framework::SelectedRows& out = *output;
auto input_rows = input.rows();
std::vector<int64_t> merge_rows;
merge_rows.reserve(input_rows.size());
std::unordered_map<int64_t, size_t> rows_pos_map;
rows_pos_map.reserve(input_rows.size());
size_t idx = 0u;
for (std::vector<int64_t>::iterator iter = input_rows.begin();
iter != input_rows.end(); ++iter) {
if (rows_pos_map.find(*iter) == rows_pos_map.end()) {
rows_pos_map[*iter] = idx++;
merge_rows.emplace_back(*iter);
}
std::vector<int64_t> input_rows(input.rows());
std::map<int64_t, std::vector<int64_t>> merge_row_map;
for (size_t i = 0; i < input_rows.size(); ++i) {
merge_row_map[input_rows[i]].push_back(i);
}
auto input_width = input.value().dims()[1];
out.set_rows(merge_rows);
std::vector<int64_t> merge_rows(merge_row_map.size());
size_t idx = 0;
int64_t input_width = input.value().dims()[1];
out.set_height(input.height());
out.mutable_value()->mutable_data<T>(
T* out_data = out.mutable_value()->mutable_data<T>(
framework::make_ddim(
{static_cast<int64_t>(merge_rows.size()), input_width}),
context.GetPlace());
math::SetConstant<platform::CPUDeviceContext, T> constant_functor;
constant_functor(context, out.mutable_value(), 0.0);
auto* out_data = out.mutable_value()->data<T>();
auto* input_data = input.value().data<T>();
for (size_t i = 0; i < input_rows.size(); i++) {
size_t out_i = rows_pos_map[input_rows[i]];
for (int64_t j = 0; j < input_width; j++) {
out_data[out_i * input_width + j] += input_data[i * input_width + j];
const T* in_data = input.value().data<T>();
for (auto& row_pair : merge_row_map) {
auto* out_ptr = out_data + idx * input_width;
auto& rows = row_pair.second;
merge_rows[idx] = row_pair.first;
++idx;
// rows.size() is always larger than 0
std::memcpy(out_ptr, in_data + rows[0] * input_width,
sizeof(T) * input_width);
for (size_t i = 1; i < rows.size(); ++i) {
auto* in_ptr = in_data + rows[i] * input_width;
for (int64_t j = 0; j < input_width; ++j) {
out_ptr[j] += in_ptr[j];
}
}
}
out.set_rows(merge_rows);
}
};

@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <map>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
@ -97,41 +98,39 @@ struct MergeAdd<platform::CPUDeviceContext, float> {
const framework::SelectedRows& input,
framework::SelectedRows* output) {
framework::SelectedRows& out = *output;
auto input_rows = input.rows();
std::vector<int64_t> merge_rows;
merge_rows.reserve(input_rows.size());
std::unordered_map<int64_t, size_t> rows_pos_map;
rows_pos_map.reserve(input_rows.size());
size_t idx = 0u;
for (std::vector<int64_t>::iterator iter = input_rows.begin();
iter != input_rows.end(); ++iter) {
if (rows_pos_map.find(*iter) == rows_pos_map.end()) {
rows_pos_map[*iter] = idx++;
merge_rows.emplace_back(*iter);
}
std::vector<int64_t> input_rows(input.rows());
std::map<int64_t, std::vector<int64_t>> merge_row_map;
for (size_t i = 0; i < input_rows.size(); ++i) {
merge_row_map[input_rows[i]].push_back(i);
}
auto input_width = input.value().dims()[1];
out.set_rows(merge_rows);
std::vector<int64_t> merge_rows(merge_row_map.size());
size_t idx = 0;
int64_t input_width = input.value().dims()[1];
out.set_height(input.height());
out.mutable_value()->mutable_data<float>(
auto* out_data = out.mutable_value()->mutable_data<float>(
framework::make_ddim(
{static_cast<int64_t>(merge_rows.size()), input_width}),
context.GetPlace());
math::SetConstant<platform::CPUDeviceContext, float> constant_functor;
constant_functor(context, out.mutable_value(), 0.0);
auto* out_data = out.mutable_value()->data<float>();
auto* input_data = input.value().data<float>();
auto* in_data = input.value().data<float>();
auto blas = GetBlas<platform::CPUDeviceContext, float>(context);
for (size_t i = 0; i < input_rows.size(); i++) {
size_t out_i = rows_pos_map[input_rows[i]];
float* y = out_data + out_i * input_width;
const float* x = input_data + i * input_width;
blas.AXPY(input_width, 1., x, y);
for (auto& row_pair : merge_row_map) {
auto* out_ptr = out_data + idx * input_width;
auto& rows = row_pair.second;
merge_rows[idx] = row_pair.first;
++idx;
// rows.size() is always larger than 0
blas.VCOPY(input_width, in_data + rows[0] * input_width, out_ptr);
for (size_t i = 1; i < rows.size(); ++i) {
blas.AXPY(input_width, 1., in_data + rows[i] * input_width, out_ptr);
}
}
out.set_rows(merge_rows);
}
};
@ -148,41 +147,39 @@ struct MergeAdd<platform::CPUDeviceContext, double> {
const framework::SelectedRows& input,
framework::SelectedRows* output) {
framework::SelectedRows& out = *output;
auto input_rows = input.rows();
std::vector<int64_t> merge_rows;
merge_rows.reserve(input_rows.size());
std::unordered_map<int64_t, size_t> rows_pos_map;
rows_pos_map.reserve(input_rows.size());
size_t idx = 0u;
for (std::vector<int64_t>::iterator iter = input_rows.begin();
iter != input_rows.end(); ++iter) {
if (rows_pos_map.find(*iter) == rows_pos_map.end()) {
rows_pos_map[*iter] = idx++;
merge_rows.emplace_back(*iter);
}
std::vector<int64_t> input_rows(input.rows());
std::map<int64_t, std::vector<int64_t>> merge_row_map;
for (size_t i = 0; i < input_rows.size(); ++i) {
merge_row_map[input_rows[i]].push_back(i);
}
auto input_width = input.value().dims()[1];
out.set_rows(merge_rows);
std::vector<int64_t> merge_rows(merge_row_map.size());
size_t idx = 0;
int64_t input_width = input.value().dims()[1];
out.set_height(input.height());
out.mutable_value()->mutable_data<double>(
auto* out_data = out.mutable_value()->mutable_data<double>(
framework::make_ddim(
{static_cast<int64_t>(merge_rows.size()), input_width}),
context.GetPlace());
math::SetConstant<platform::CPUDeviceContext, double> constant_functor;
constant_functor(context, out.mutable_value(), 0.0);
auto* out_data = out.mutable_value()->data<double>();
auto* input_data = input.value().data<double>();
auto* in_data = input.value().data<double>();
auto blas = GetBlas<platform::CPUDeviceContext, double>(context);
for (size_t i = 0; i < input_rows.size(); i++) {
size_t out_i = rows_pos_map[input_rows[i]];
double* y = out_data + out_i * input_width;
const double* x = input_data + i * input_width;
blas.AXPY(input_width, 1., x, y);
for (auto& row_pair : merge_row_map) {
auto* out_ptr = out_data + idx * input_width;
auto& rows = row_pair.second;
merge_rows[idx] = row_pair.first;
++idx;
// rows.size() is always larger than 0
blas.VCOPY(input_width, in_data + rows[0] * input_width, out_ptr);
for (size_t i = 1; i < rows.size(); ++i) {
blas.AXPY(input_width, 1., in_data + rows[i] * input_width, out_ptr);
}
}
out.set_rows(merge_rows);
}
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save