|
|
@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License. */
|
|
|
|
limitations under the License. */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <cstdlib>
|
|
|
|
|
|
|
|
#include <ctime>
|
|
|
|
#include "paddle/fluid/framework/device_worker.h"
|
|
|
|
#include "paddle/fluid/framework/device_worker.h"
|
|
|
|
#include "paddle/fluid/platform/cpu_helper.h"
|
|
|
|
#include "paddle/fluid/platform/cpu_helper.h"
|
|
|
|
|
|
|
|
|
|
|
@ -65,6 +67,13 @@ void DownpourWorker::Initialize(const TrainerDesc& desc) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
flag_partial_push_ = false;
|
|
|
|
|
|
|
|
for (auto& m : param_.program_config(0).partial_pushdense_condtable_map()) {
|
|
|
|
|
|
|
|
cond2table_map_[m.key()] = m.value();
|
|
|
|
|
|
|
|
condvalue_set_.insert(m.value());
|
|
|
|
|
|
|
|
flag_partial_push_ = true;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
skip_ops_.resize(param_.skip_ops_size());
|
|
|
|
skip_ops_.resize(param_.skip_ops_size());
|
|
|
|
for (int i = 0; i < param_.skip_ops_size(); ++i) {
|
|
|
|
for (int i = 0; i < param_.skip_ops_size(); ++i) {
|
|
|
|
skip_ops_[i] = param_.skip_ops(i);
|
|
|
|
skip_ops_[i] = param_.skip_ops(i);
|
|
|
@ -876,14 +885,42 @@ void DownpourWorker::TrainFiles() {
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
if (need_to_push_dense_) {
|
|
|
|
if (need_to_push_dense_) {
|
|
|
|
for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
|
|
|
|
if (flag_partial_push_) {
|
|
|
|
++i) {
|
|
|
|
Variable* var = (*thread_scope_).FindVar("cond_tag");
|
|
|
|
uint64_t tid = static_cast<uint64_t>(
|
|
|
|
LoDTensor* tensor = var->GetMutable<LoDTensor>();
|
|
|
|
param_.program_config(0).push_dense_table_id(i));
|
|
|
|
// check type in python code
|
|
|
|
fleet_ptr_->PushDenseVarsAsync(
|
|
|
|
int64_t* cond_value_batch = tensor->data<int64_t>();
|
|
|
|
*thread_scope_, tid, dense_grad_names_[tid], &push_sparse_status_,
|
|
|
|
|
|
|
|
scale_datanorm_, cur_batch);
|
|
|
|
for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
|
|
|
|
|
|
|
|
++i) {
|
|
|
|
|
|
|
|
uint64_t tid = static_cast<uint64_t>(
|
|
|
|
|
|
|
|
param_.program_config(0).push_dense_table_id(i));
|
|
|
|
|
|
|
|
if (condvalue_set_.find(tid) != condvalue_set_.end()) {
|
|
|
|
|
|
|
|
// common dense table must push dense
|
|
|
|
|
|
|
|
if (cond2table_map_[cond_value_batch[0]] != tid) {
|
|
|
|
|
|
|
|
// can't push dense
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
VLOG(3) << "push multitask dense gradient " << tid;
|
|
|
|
|
|
|
|
fleet_ptr_->PushDenseVarsAsync(
|
|
|
|
|
|
|
|
*thread_scope_, tid, dense_grad_names_[tid], &push_sparse_status_,
|
|
|
|
|
|
|
|
scale_datanorm_, cur_batch);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
|
|
|
|
|
|
|
|
++i) {
|
|
|
|
|
|
|
|
uint64_t tid = static_cast<uint64_t>(
|
|
|
|
|
|
|
|
param_.program_config(0).push_dense_table_id(i));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fleet_ptr_->PushDenseVarsAsync(
|
|
|
|
|
|
|
|
*thread_scope_, tid, dense_grad_names_[tid], &push_sparse_status_,
|
|
|
|
|
|
|
|
scale_datanorm_, cur_batch);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
VLOG(3) << "push dense gradient done.";
|
|
|
|
VLOG(3) << "push dense gradient done.";
|
|
|
|
|
|
|
|
|
|
|
|
// the following code should be more precise and clean
|
|
|
|
// the following code should be more precise and clean
|
|
|
|