Merge branch 'develop' of github.com:PaddlePaddle/Paddle into parallel_graph_mode

test=develop
revert-15207-remove_op_handle_lock_and_fix_var
Yancey1989 6 years ago
commit e65436103f

@ -149,6 +149,14 @@ RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \
-DCMAKE_BUILD_TYPE=Release . \
make)
# ar mishandles 4GB files
# https://sourceware.org/bugzilla/show_bug.cgi?id=14625
# remove them when apt-get support 2.27 and higher version
RUN wget -q https://launchpad.net/ubuntu/+archive/primary/+sourcefiles/binutils/2.27-9ubuntu1/binutils_2.27.orig.tar.gz && \
tar -xzf binutils_2.27.orig.tar.gz && \
cd binutils-2.27 && \
./configure && make -j && make install && cd .. && rm -rf binutils-2.27 binutils_2.27.orig.tar.gz
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
RUN mkdir /var/run/sshd
RUN echo 'root:root' | chpasswd

@ -16,14 +16,6 @@ IF(NOT ${WITH_MKLML})
return()
ENDIF(NOT ${WITH_MKLML})
IF(APPLE)
MESSAGE(WARNING
"Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF")
SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE)
return()
ENDIF()
INCLUDE(ExternalProject)
SET(MKLML_DST_DIR "mklml")
SET(MKLML_INSTALL_ROOT "${THIRD_PARTY_PATH}/install")
@ -47,10 +39,13 @@ SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib")
IF((NOT DEFINED MKLML_VER) OR (NOT DEFINED MKLML_URL))
MESSAGE(STATUS "use pre defined download url")
if(WIN32)
SET(MKLML_VER "mklml_win_2019.0.20180710" CACHE STRING "" FORCE)
SET(MKLML_VER "mklml_win_2019.0.1.20180928" CACHE STRING "" FORCE)
SET(MKLML_URL "https://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.zip" CACHE STRING "" FORCE)
elseif(APPLE)
SET(MKLML_VER "mklml_mac_2019.0.1.20180928" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
else()
SET(MKLML_VER "mklml_lnx_2019.0.20180710" CACHE STRING "" FORCE)
SET(MKLML_VER "mklml_lnx_2019.0.1.20180928" CACHE STRING "" FORCE)
SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
ENDIF()
endif()

@ -71,7 +71,7 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
context->endpoints_ = strategy_.trainers_endpoints_;
context->trainer_id_ = strategy_.trainer_id_;
PADDLE_ENFORCE(strategy_.trainer_id_ >= 0, "trainer_id_ >= 0");
if (strategy_.trainer_id_ > 0) {
if (strategy_.trainer_id_ > 0 && strategy_.trainers_endpoints_.size() > 0) {
PADDLE_ENFORCE((unsigned)(strategy_.trainer_id_) <
strategy_.trainers_endpoints_.size(),
"trainer_id_ < endpoints_ size");

@ -69,6 +69,15 @@ inline std::string GradVarName(const std::string& var_name) {
return result;
}
inline std::string GradOriginalVarName(const std::string& grad_var_name) {
std::size_t pos = grad_var_name.rfind(kGradVarSuffix);
if (pos == std::string::npos) {
return grad_var_name;
} else {
return grad_var_name.substr(0, pos);
}
}
proto::VarType::Type GetDataTypeOfVar(const Variable* var);
const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var);
Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var);

@ -288,3 +288,30 @@ TEST(OpKernel, multi_inputs) {
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cpu_place);
}
TEST(VarNameTest, all) {
std::string var_name("X");
std::string grad_var_name = paddle::framework::GradVarName(var_name);
ASSERT_EQ(grad_var_name, "X@GRAD");
std::string original_var_name =
paddle::framework::GradOriginalVarName(grad_var_name);
ASSERT_EQ(original_var_name, "X");
original_var_name = paddle::framework::GradOriginalVarName(original_var_name);
ASSERT_EQ(original_var_name, "X");
std::string var_name_2("XYZ");
grad_var_name = paddle::framework::GradVarName(var_name_2);
ASSERT_EQ(grad_var_name, "XYZ@GRAD");
original_var_name = paddle::framework::GradOriginalVarName(grad_var_name);
ASSERT_EQ(original_var_name, "XYZ");
original_var_name = paddle::framework::GradOriginalVarName(original_var_name);
ASSERT_EQ(original_var_name, "XYZ");
std::string var_name_3("");
grad_var_name = paddle::framework::GradVarName(var_name_3);
ASSERT_EQ(grad_var_name, "@GRAD");
original_var_name = paddle::framework::GradOriginalVarName(grad_var_name);
ASSERT_EQ(original_var_name, "");
original_var_name = paddle::framework::GradOriginalVarName(original_var_name);
ASSERT_EQ(original_var_name, "");
}

@ -21,6 +21,7 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/string/printf.h"
namespace paddle {
@ -31,8 +32,14 @@ using framework::Variable;
void AddTo(Variable* src, Variable* dst) {
framework::LoDTensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
framework::LoDTensor* src_tensor = src->GetMutable<framework::LoDTensor>();
PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(), "%lld vs %lld",
dst_tensor->numel(), src_tensor->numel());
// FIXME(minqiyang): loss_grad op will pass a zero grad of label
// ugly fix for it
if (src_tensor->numel() == 0) {
return;
}
PADDLE_ENFORCE(dst_tensor->numel() == src_tensor->numel(),
"dst_numel %lld vs. src_numel %lld", dst_tensor->numel(),
src_tensor->numel());
float* dst_data = dst_tensor->mutable_data<float>(platform::CPUPlace());
const float* src_data = src_tensor->data<float>();
for (size_t i = 0; i < src_tensor->numel(); ++i) {
@ -45,6 +52,10 @@ class Autograd {
Autograd() {}
void RunBackward(VarBase* var) {
if (var->stop_gradient_) {
return;
}
std::deque<OpBase*> ready;
ready.push_back(var->pre_op_);
@ -60,6 +71,9 @@ class Autograd {
const std::vector<VarBase*>& ingrads = it.second;
for (size_t i = 0; i < ingrads.size(); ++i) {
if (!ingrads[i]) continue;
if (ready_op->input_vars_[it.first][i]->stop_gradient_) {
continue;
}
OpBase* pre_op = ready_op->pre_ops_[it.first][i];
if (!pre_op) continue;
@ -107,7 +121,7 @@ framework::LoDTensor& VarBase::Grad() {
std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
if (!grad_op_desc_) {
VLOG(3) << "op with no grad: " << op_desc_->Type();
LOG(WARNING) << "op with no grad: " << op_desc_->Type();
return {};
}
VLOG(3) << "op grad " << grad_op_desc_->Type();
@ -117,15 +131,18 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
for (auto it : grad_output_vars_) {
auto& outputs = grad_outputs[it.first];
for (size_t i = 0; i < it.second.size(); ++i) {
tmp_vars.emplace_back(new framework::Variable());
outputs.push_back(tmp_vars.back().get());
outputs.back()->GetMutable<framework::LoDTensor>();
// Allocate a new variable
Variable* tmp_var = new framework::Variable();
tmp_var->GetMutable<framework::LoDTensor>();
tmp_vars.emplace_back(tmp_var);
outputs.push_back(tmp_var);
}
}
framework::RuntimeContext ctx(grad_input_vars_, grad_outputs);
// No need to do static infer shape here.
// No need to do compile time infer shape here.
// grad_op_desc_->InferShape(*block_);
grad_op_desc_->InferVarType(block_);
@ -144,6 +161,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
for (auto it : grad_output_vars_) {
auto& outputs = grad_outputs[it.first];
auto& origin_outputs = it.second;
for (size_t i = 0; i < outputs.size(); ++i) {
framework::Variable* orig_grad = origin_outputs[i];
AddTo(outputs[i], orig_grad);

@ -86,23 +86,30 @@ class VarBase {
pre_op_out_idx_(-1),
var_desc_(nullptr),
var_(new framework::Variable()),
grads_(new framework::Variable()) {}
grads_(new framework::Variable()),
stop_gradient_(false) {}
virtual ~VarBase() {
if (var_) {
delete var_;
var_ = nullptr;
}
if (grads_) {
delete grads_;
grads_ = nullptr;
}
}
explicit VarBase(bool stop_gradient)
: pre_op_(nullptr),
pre_op_out_idx_(-1),
var_desc_(nullptr),
var_(new framework::Variable()),
grads_(new framework::Variable()),
stop_gradient_(stop_gradient) {}
virtual ~VarBase() {}
void RunBackward();
framework::LoDTensor& Grad();
inline std::string GradName() const {
PADDLE_ENFORCE(
var_desc_,
"Couldn't get gradient variable's name, please call backward() first");
return string::Sprintf("%s@IGrad", var_desc_->Name());
}
OpBase* pre_op_;
std::string pre_op_out_name_;
int pre_op_out_idx_;
@ -110,6 +117,8 @@ class VarBase {
framework::VarDesc* var_desc_;
framework::Variable* var_;
framework::Variable* grads_;
bool stop_gradient_;
};
class OpBase {

@ -50,16 +50,14 @@ void InitVar(framework::Variable* var, framework::Variable* grad_var) {
class Tracer {
public:
explicit Tracer(framework::BlockDesc* root_block,
framework::BlockDesc* startup_block)
: root_block_(root_block), startup_block_(startup_block) {}
explicit Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {}
virtual ~Tracer() {}
void Trace(OpBase* op,
const std::map<std::string, std::vector<VarBase*>>& inputs,
const std::map<std::string, std::vector<VarBase*>>& outputs,
framework::BlockDesc* block) {
framework::BlockDesc* block, const bool stop_gradient = false) {
std::map<std::string, VarBase*> vars;
framework::OpDesc* op_desc = op->op_desc_;
@ -107,6 +105,7 @@ class Tracer {
} else {
LOG(ERROR) << "tracer doesn't support yet";
}
out->stop_gradient_ = stop_gradient;
out->pre_op_ = op;
out->pre_op_out_name_ = it.first;
out->pre_op_out_idx_ = i;
@ -130,9 +129,7 @@ class Tracer {
p.op.RuntimeInferShape(scope, place, ctx);
p.func(framework::ExecutionContext(p.op, scope, *p.dev_ctx, p.ctx));
if (block == startup_block_) {
op->grad_op_desc_ = nullptr;
} else {
if (!stop_gradient) {
framework::OpDesc* grad_op_desc;
auto grad_to_var = new std::unordered_map<std::string, std::string>();
CreateGradOp(*op_desc, {}, {block}, &grad_op_desc, grad_to_var);
@ -156,6 +153,7 @@ class Tracer {
}
}
}
for (auto it : grad_op_desc->Outputs()) {
auto& grad_out_vars = op->grad_output_vars_[it.first];
for (const std::string& grad_outvar : it.second) {
@ -170,12 +168,12 @@ class Tracer {
}
}
}
op->block_ = block;
}
private:
framework::BlockDesc* root_block_;
framework::BlockDesc* startup_block_;
};
} // namespace imperative

@ -251,7 +251,12 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
input.set_lod(lod);
int idx = -1;
if (config_.specify_input_name) {
idx = feed_names_[inputs[i].name];
auto name = inputs[i].name;
if (feed_names_.find(name) == feed_names_.end()) {
LOG(ERROR) << "feed names from program do not have name: [" << name
<< "] from specified input";
}
idx = feed_names_[name];
} else {
idx = boost::get<int>(feeds_[i]->GetAttr("col"));
}

@ -90,6 +90,11 @@ set(SEQ_CONV1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_conv1")
download_model_and_data(${SEQ_CONV1_INSTALL_DIR} "seq_conv1_model.tar.gz" "seq_conv1_data.txt.tar.gz")
inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} analyzer_seq_conv1_tester.cc)
# seq_pool1
set(SEQ_POOL1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_pool")
download_model_and_data(${SEQ_POOL1_INSTALL_DIR} "seq_pool1_model_.tar.gz" "seq_pool1_data.txt.tar.gz")
inference_analysis_api_test(test_analyzer_seq_pool1 ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_tester.cc)
# ocr
set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr")
if (NOT EXISTS ${OCR_INSTALL_DIR})
@ -108,10 +113,6 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose
inference_analysis_api_test_with_fake_data(test_analyzer_resnet50
"${INFERENCE_DEMO_INSTALL_DIR}/resnet50" analyzer_resnet50_tester.cc "resnet50_model.tar.gz")
# seq_pool1
inference_analysis_api_test_with_fake_data(test_analyzer_seq_pool1
"${INFERENCE_DEMO_INSTALL_DIR}/seq_pool1" analyzer_seq_pool1_tester.cc "seq_pool1.tar.gz")
# mobilenet with depthwise_conv op
inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv
"${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz")

@ -60,8 +60,7 @@ struct DataRecord {
}
};
void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
int batch_size) {
void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data) {
PaddleTensor lod_word_tensor, lod_mention_tensor;
lod_word_tensor.name = "word";
lod_mention_tensor.name = "mention";
@ -100,7 +99,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1;
LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size;
for (int bid = 0; bid < epoch; ++bid) {
PrepareInputs(&input_slots, &data, FLAGS_batch_size);
PrepareInputs(&input_slots, &data);
(*inputs).emplace_back(input_slots);
}
}

@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/tests/api/tester_helper.h"
@ -20,6 +21,106 @@ namespace paddle {
namespace inference {
namespace analysis {
struct OneSlotInBatch {
std::string name;
std::vector<std::vector<float>> data;
std::vector<int> shape;
std::vector<size_t> lod;
};
struct DataRecord {
std::vector<std::vector<OneSlotInBatch>> batched_data;
std::map<std::string, std::vector<std::vector<float>>> datasets;
size_t batch_iter{0}, num_samples; // total number of samples
DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1) {
Load(path);
Prepare(batch_size);
}
void Load(const std::string &path) {
std::ifstream file(path);
constexpr int num_slots = 154;
std::string line;
int num_lines = 0;
while (std::getline(file, line)) {
num_lines++;
std::vector<std::string> data;
split(line, '\t', &data);
std::vector<float> slot_data;
split_to_float(data[1], ' ', &slot_data);
std::string name = data[0];
PADDLE_ENFORCE_EQ(slot_data.size() % 11, 0,
"line %d, %s should be divisible", num_lines, name);
datasets[name].emplace_back(std::move(slot_data));
}
num_samples = num_lines / num_slots;
PADDLE_ENFORCE_EQ(num_samples * num_slots, static_cast<size_t>(num_lines),
"num samples should be divisible");
PADDLE_ENFORCE_GT(num_samples, 0);
}
void Prepare(int bs) {
for (auto it = datasets.begin(); it != datasets.end(); ++it) {
PADDLE_ENFORCE_EQ(it->second.size(), num_samples,
"size of each slot should be equal");
}
size_t num_batches = num_samples / bs;
EXPECT_GT(num_batches, 0);
batched_data.resize(num_batches);
for (auto &one_batch : batched_data) {
one_batch.resize(datasets.size());
size_t i = 0;
for (auto it = datasets.begin(); it != datasets.end(); ++it) {
auto &slot = one_batch[i];
slot.name = it->first;
slot.data.resize(bs);
slot.lod.resize(bs + 1);
slot.lod[0] = 0;
auto &lod = slot.lod;
auto &datas = it->second;
for (int k = 0; k < bs; ++k) {
size_t id = k + batch_iter * bs;
std::copy(datas[id].begin(), datas[id].end(),
std::back_inserter(slot.data[k]));
size_t len = datas[id].size() / 11;
PADDLE_ENFORCE_EQ(len * 11, datas[id].size(),
"%s %d size should be divisible", slot.name, id);
lod[k + 1] = lod[k] + len;
}
slot.shape.assign({static_cast<int>(lod[bs]), 11});
i++;
}
}
}
const std::vector<OneSlotInBatch> &NextBatch() {
if (batch_iter >= batched_data.size() - 1) {
batch_iter = -1;
}
return batched_data[++batch_iter];
}
};
static void TensorAssignSlot(PaddleTensor *tensor, const OneSlotInBatch &slot) {
tensor->name = slot.name + "_embed";
tensor->shape = slot.shape;
tensor->dtype = PaddleDType::FLOAT32;
tensor->lod.clear();
tensor->lod.emplace_back(slot.lod);
TensorAssignData(tensor, slot.data);
}
void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data) {
const auto &one_batch = data->NextBatch();
input_slots->resize(one_batch.size());
for (size_t i = 0; i < one_batch.size(); ++i) {
auto &slot = one_batch[i];
TensorAssignSlot(&((*input_slots)[i]), slot);
}
}
void SetConfig(AnalysisConfig *cfg) {
cfg->param_file = FLAGS_infer_model + "/params";
cfg->prog_file = FLAGS_infer_model + "/model";
@ -27,62 +128,22 @@ void SetConfig(AnalysisConfig *cfg) {
cfg->device = 0;
cfg->enable_ir_optim = true;
cfg->specify_input_name = true;
cfg->pass_builder()->TurnOnDebug();
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
}
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
std::vector<std::string> feed_names = {
"slot10000_embed", "slot10001_embed", "slot10004_embed",
"slot10005_embed", "slot10008_embed", "slot10009_embed",
"slot10012_embed", "slot10013_embed", "slot10108_embed",
"slot13324_embed", "slot13325_embed", "slot13326_embed",
"slot13327_embed", "slot13328_embed", "slot13329_embed",
"slot13330_embed", "slot13331_embed", "slot15501_embed",
"slot15502_embed", "slot15503_embed", "slot15504_embed",
"slot15505_embed", "slot15506_embed", "slot15507_embed",
"slot15508_embed", "slot15516_embed", "slot15519_embed",
"slot15523_embed", "slot15531_embed", "slot15533_embed",
"slot15548_embed", "slot15564_embed", "slot15565_embed",
"slot15566_embed", "slot15570_embed", "slot15571_embed",
"slot15572_embed", "slot15573_embed", "slot15574_embed",
"slot15575_embed", "slot15576_embed", "slot15577_embed",
"slot15579_embed", "slot15581_embed", "slot15582_embed",
"slot15583_embed", "slot15584_embed", "slot5016_embed",
"slot5021_embed", "slot6002_embed", "slot6003_embed",
"slot6004_embed", "slot6005_embed", "slot6006_embed",
"slot6007_embed", "slot6008_embed", "slot6009_embed",
"slot6011_embed", "slot6014_embed", "slot6015_embed",
"slot6023_embed", "slot6024_embed", "slot6025_embed",
"slot6027_embed", "slot6029_embed", "slot6031_embed",
"slot6034_embed", "slot6035_embed", "slot6036_embed",
"slot6037_embed", "slot6039_embed", "slot6048_embed",
"slot6050_embed", "slot6058_embed", "slot6059_embed",
"slot6060_embed", "slot6066_embed", "slot6067_embed",
"slot6068_embed", "slot6069_embed", "slot6070_embed",
"slot6071_embed", "slot6072_embed", "slot6073_embed",
"slot6182_embed", "slot6183_embed", "slot6184_embed",
"slot6185_embed", "slot6186_embed", "slot6188_embed",
"slot6189_embed", "slot6190_embed", "slot6201_embed",
"slot6202_embed", "slot6203_embed", "slot6247_embed",
"slot6248_embed", "slot6250_embed", "slot6251_embed",
"slot6807_embed", "slot6808_embed", "slot6809_embed",
"slot6810_embed", "slot6811_embed", "slot6812_embed",
"slot6813_embed", "slot6814_embed", "slot6815_embed",
"slot6816_embed", "slot6817_embed", "slot6818_embed",
"slot6819_embed", "slot6820_embed", "slot6822_embed",
"slot6823_embed", "slot6826_embed", "slot7002_embed",
"slot7003_embed", "slot7004_embed", "slot7005_embed",
"slot7006_embed", "slot7008_embed", "slot7009_embed",
"slot7010_embed", "slot7011_embed", "slot7013_embed",
"slot7014_embed", "slot7015_embed", "slot7016_embed",
"slot7017_embed", "slot7019_embed", "slot7100_embed",
"slot7506_embed", "slot7507_embed", "slot7514_embed",
"slot7515_embed", "slot7516_embed"};
SetFakeImageInput(inputs, FLAGS_infer_model, true, "model", "params",
&feed_names);
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
std::vector<PaddleTensor> input_slots;
int epoch = FLAGS_test_all_data ? data.batched_data.size() : 1;
LOG(INFO) << "number of samples: "
<< data.batched_data.size() * FLAGS_batch_size;
for (int bid = 0; bid < epoch; ++bid) {
PrepareInputs(&input_slots, &data);
(*inputs).emplace_back(input_slots);
}
}
// Easy for profiling independently.
void profile(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg);
@ -100,6 +161,17 @@ void profile(bool use_mkldnn = false) {
TEST(Analyzer_seq_pool1, profile) { profile(); }
// Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_seq_pool1, compare) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
}
// Check the fuse status
TEST(Analyzer_seq_pool1, fuse_statis) {
AnalysisConfig cfg;
@ -109,7 +181,7 @@ TEST(Analyzer_seq_pool1, fuse_statis) {
auto fuse_statis = GetFuseStatis(
static_cast<AnalysisPredictor *>(predictor.get()), &num_ops);
LOG(INFO) << "num_ops: " << num_ops;
EXPECT_EQ(num_ops, 314);
EXPECT_EQ(num_ops, 349);
}
} // namespace analysis

@ -38,13 +38,13 @@ class LoadCombineOp : public framework::OperatorBase {
static_cast<int>(out_var_names.size()), 0,
"The number of output variables should be greater than 0.");
if (!model_from_memory) {
std::ifstream fin(filename);
std::ifstream fin(filename, std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin),
"Cannot open file %s for load_combine op", filename);
LoadParamsFromBuffer(scope, place, &fin, load_as_fp16, out_var_names);
} else {
PADDLE_ENFORCE(!filename.empty(), "Cannot load file from memory");
std::stringstream fin(filename);
std::stringstream fin(filename, std::ios::in | std::ios::binary);
LoadParamsFromBuffer(scope, place, &fin, load_as_fp16, out_var_names);
}
}

@ -34,7 +34,7 @@ class LoadOp : public framework::OperatorBase {
// FIXME(yuyang18): We save variable to local file now, but we should change
// it to save an output stream.
auto filename = Attr<std::string>("file_path");
std::ifstream fin(filename);
std::ifstream fin(filename, std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot open file %s for load op",
filename);

@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/operators/pool_op.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
@ -71,7 +72,6 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
@ -130,20 +130,25 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
CorrectOutputSize(src_tz, dst_tz, ksize, paddings, strides,
padding_right_bottom);
}
auto src_md = platform::MKLDNNMemDesc(
src_tz, platform::MKLDNNGetDataType<T>(), input_format);
mkldnn::memory::data_type dt =
paddle::framework::ToMKLDNNDataType(input->type());
auto src_md = platform::MKLDNNMemDesc(src_tz, dt, input_format);
/* create memory descriptor for pooling without specified format
* ('any') which lets a primitive (pooling in this case) choose
* the memory format preferred for best performance
*/
auto dst_md = platform::MKLDNNMemDesc(dst_tz, mkldnn::memory::f32,
mkldnn::memory::format::any);
auto dst_md =
platform::MKLDNNMemDesc(dst_tz, dt, mkldnn::memory::format::any);
auto propagation = src_md.data.data_type == mkldnn_f32
? mkldnn::prop_kind::forward_training
: mkldnn::prop_kind::forward_scoring;
std::shared_ptr<mkldnn::pooling_forward::primitive_desc> pool_pd =
CreatePrimitiveDesc(src_md, dst_md, strides, padding_left_top,
padding_right_bottom, ksize, pooling_type,
mkldnn_engine, ceil_mode, is_test);
CreatePrimitiveDesc(src_md, dst_md, propagation, strides,
padding_left_top, padding_right_bottom, ksize,
pooling_type, mkldnn_engine, ceil_mode, is_test);
// save pool_pd into global device context to be referred in backward path
if (!is_test) dev_ctx.SetBlob(key_pool_pd, pool_pd);
@ -203,7 +208,8 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
private:
std::unique_ptr<mkldnn::pooling_forward::primitive_desc> CreatePrimitiveDesc(
const mkldnn::memory::desc& src, const mkldnn::memory::desc& dst,
const std::vector<int>& stride, const std::vector<int>& padding_left_top,
const mkldnn::prop_kind& propagation, const std::vector<int>& stride,
const std::vector<int>& padding_left_top,
const std::vector<int>& padding_right_bot, const std::vector<int>& kernel,
const std::string& pooling_type, const mkldnn::engine& engine,
bool ceil_mode, bool is_test) const {
@ -411,6 +417,9 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(pool2d, MKLDNN, ::paddle::platform::CPUPlace,
ops::PoolMKLDNNOpKernel<float>);
ops::PoolMKLDNNOpKernel<float>,
ops::PoolMKLDNNOpKernel<int8_t>,
ops::PoolMKLDNNOpKernel<uint8_t>);
REGISTER_OP_KERNEL(pool2d_grad, MKLDNN, ::paddle::platform::CPUPlace,
ops::PoolMKLDNNGradOpKernel<float>);

@ -49,7 +49,7 @@ class SaveCombineOp : public framework::OperatorBase {
}
MkDirRecursively(DirName(filename).c_str());
std::ofstream fout(filename);
std::ofstream fout(filename, std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fout), "Cannot open %s to write",
filename);

@ -80,7 +80,7 @@ class SaveOp : public framework::OperatorBase {
// FIXME(yuyang18): We save variable to local file now, but we should change
// it to save an output stream.
std::ofstream fout(filename);
std::ofstream fout(filename, std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fout), "Cannot open %s to write",
filename);
@ -122,7 +122,7 @@ class SaveOp : public framework::OperatorBase {
// FIXME(yuyang18): We save variable to local file now, but we should change
// it to save an output stream.
std::ofstream fout(filename);
std::ofstream fout(filename, std::ios::binary);
PADDLE_ENFORCE(static_cast<bool>(fout), "Cannot open %s to write",
filename);
framework::SerializeToStream(fout, selectedRows, dev_ctx);

@ -16,6 +16,13 @@ limitations under the License. */
#include <stdlib.h>
#include "paddle/fluid/platform/port.h"
#ifdef _WIN32
static unsigned sleep(unsigned seconds) {
Sleep(seconds * 1000);
return 0;
}
#endif
namespace paddle {
namespace platform {

@ -23,9 +23,8 @@ namespace pybind {
void BindTracer(pybind11::module *m) {
pybind11::class_<imperative::Tracer>(*m, "Tracer", "")
.def("__init__",
[](imperative::Tracer &self, framework::BlockDesc *root_block,
framework::BlockDesc *startup_block) {
new (&self) imperative::Tracer(root_block, startup_block);
[](imperative::Tracer &self, framework::BlockDesc *root_block) {
new (&self) imperative::Tracer(root_block);
})
.def("trace", &imperative::Tracer::Trace);
}

@ -125,11 +125,26 @@ PYBIND11_MODULE(core, m) {
m.add_object("_cleanup",
py::capsule([]() { ScopePool::Instance().Clear(); }));
py::class_<imperative::VarBase, PyVarBase>(m, "VarBase", R"DOC()DOC")
.def(py::init<>())
py::class_<imperative::VarBase, std::shared_ptr<imperative::VarBase>>(
m, "VarBase", R"DOC()DOC")
// .def(py::init<>())
.def(py::init<bool>(), py::arg("stop_gradient") = false)
.def("_run_backward",
[](imperative::VarBase &self) { self.RunBackward(); })
.def("_grad_name", &imperative::VarBase::GradName)
.def("_grad", &imperative::VarBase::Grad)
.def_property("grad_value",
[](const imperative::VarBase &self) { return self.grads_; },
[](imperative::VarBase &self, framework::Variable *grad) {
self.grads_ = grad;
},
py::return_value_policy::reference)
.def_property("value",
[](const imperative::VarBase &self) { return self.var_; },
[](imperative::VarBase &self, framework::Variable *var) {
self.var_ = var;
},
py::return_value_policy::reference)
.def_property(
"desc",
[](const imperative::VarBase &self) { return self.var_desc_; },
@ -137,12 +152,12 @@ PYBIND11_MODULE(core, m) {
self.var_desc_ = var_desc;
},
py::return_value_policy::reference)
.def_property("var",
[](const imperative::VarBase &self) { return self.var_; },
[](imperative::VarBase &self, framework::Variable *var) {
self.var_ = var;
},
py::return_value_policy::reference);
.def_property(
"stop_gradient",
[](const imperative::VarBase &self) { return self.stop_gradient_; },
[](imperative::VarBase &self, bool stop_gradient) {
self.stop_gradient_ = stop_gradient;
});
py::class_<imperative::OpBase, PyOpBase>(m, "OpBase", R"DOC()DOC")
.def(py::init<>())

@ -527,6 +527,18 @@ function assert_api_spec_approvals() {
fi
fi
pip install ${PADDLE_ROOT}/build/opt/paddle/share/wheels/*.whl
CHECK_DOCK_MD5=`python ${PADDLE_ROOT}/tools/check_doc_approval.py`
if [ "True" != ${CHECK_DOCK_MD5} ]; then
APPROVALS=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000 | \
python ${PADDLE_ROOT}/tools/check_pr_approval.py 1 35982308`
echo "current pr ${GIT_PR_ID} got approvals: ${APPROVALS}"
if [ "${APPROVALS}" == "FALSE" ]; then
echo "You must have shanyi15 approval for the api doc change! "
exit 1
fi
echo ${CHECK_DOCK_MD5} >/root/.cache/doc_md5.txt
fi
}
@ -906,11 +918,11 @@ function main() {
cmake_gen ${PYTHON_ABI:-""}
build
assert_api_not_changed ${PYTHON_ABI:-""}
assert_api_spec_approvals
run_test
gen_capi_package
gen_fluid_lib
test_fluid_lib
assert_api_spec_approvals
;;
assert_api)
assert_api_not_changed ${PYTHON_ABI:-""}

@ -20,7 +20,6 @@ import contextlib
import os
import re
import six
import sys
import numpy as np
@ -368,9 +367,10 @@ class Variable(object):
if _in_imperative_mode():
self._ivar = core.VarBase()
self._ivar.desc = self.desc
self._ivar.stop_gradient = stop_gradient
def _numpy(self):
tensor = self._ivar.var.get_tensor()
tensor = self._ivar.value.get_tensor()
return np.array(tensor)
def _backward(self):
@ -379,6 +379,14 @@ class Variable(object):
def _gradient(self):
return np.array(self._ivar._grad())
@property
def _value(self):
return self._ivar.value
@_value.setter
def _value(self, v):
self._ivar.value = v
def __str__(self):
return self.to_string(True)
@ -422,6 +430,14 @@ class Variable(object):
"""
self.desc = input
@property
def _stop_gradient(self):
return self._ivar.stop_gradient
@_stop_gradient.setter
def _stop_gradient(self, s):
self._ivar.stop_gradient = s
@property
def persistable(self):
return self.desc.persistable()
@ -681,9 +697,11 @@ class Operator(object):
self._update_desc_attr(attr_name, attr_val)
self.desc.check_attrs()
if self._has_kernel(type):
self.desc.infer_var_type(self.block.desc)
self.desc.infer_shape(self.block.desc)
if _in_imperative_mode():
self.iop = core.OpBase()
self.iop.desc = self.desc
@ -1266,12 +1284,22 @@ class Block(object):
Operator: the append Operator.
"""
op_desc = self.desc.append_op()
op = Operator(block=self, desc=op_desc, *args, **kwargs)
if _in_imperative_mode():
_imperative_tracer().trace(op.iop, op.inputs, op.outputs, self.desc)
op = Operator(
block=self,
desc=op_desc,
type=kwargs.get("type", None),
inputs=kwargs.get("inputs", None),
outputs=kwargs.get("outputs", None),
attrs=kwargs.get("attrs", None))
self.ops.append(op)
self._trace_op(op, kwargs.get("stop_gradient", False))
return op
def _trace_op(self, op, stop_gradient=False):
if _in_imperative_mode():
_imperative_tracer().trace(op.iop, op.inputs, op.outputs, self.desc,
stop_gradient)
def _insert_op(self, index, *args, **kwargs):
"""
Insert a Operator according to the giving arguments.
@ -1317,10 +1345,15 @@ class Block(object):
def _prepend_op(self, *args, **kwargs):
op_desc = self.desc._prepend_op()
op = Operator(self, op_desc, *args, **kwargs)
if _in_imperative_mode():
_imperative_tracer().trace(op.iop, op.inputs, op.outputs, self.desc)
op = Operator(
self,
op_desc,
type=kwargs.get("type", None),
inputs=kwargs.get("inputs", None),
outputs=kwargs.get("outputs", None),
attrs=kwargs.get("attrs", None))
self.ops.insert(0, op)
self._trace_op(op, kwargs.get("stop_gradient", False))
return op
def _sync_with_cpp(self):

@ -20,6 +20,10 @@ from .base import *
from . import layers
from .layers import *
from . import nn
from .nn import *
__all__ = []
__all__ += layers.__all__
__all__ += base.__all__
__all__ += nn.__all__

@ -28,8 +28,7 @@ def enabled():
def guard():
train = framework.Program()
startup = framework.Program()
tracer = core.Tracer(train.current_block().desc,
startup.current_block().desc)
tracer = core.Tracer(train.current_block().desc)
with framework.program_guard(train, startup):
with framework.unique_name.guard():
with framework._imperative_guard(tracer):
@ -46,7 +45,7 @@ def to_variable(value, block=None):
name=None,
shape=value.shape,
dtype=value.dtype)
var = py_var._ivar.var
var = py_var._ivar.value
tensor = var.get_tensor()
tensor.set(value, core.CPUPlace())
return py_var

@ -24,26 +24,21 @@ __all__ = ['PyLayer']
class PyLayer(core.Layer):
def __init__(self):
self._built = False
def __call__(self, inputs):
if not isinstance(inputs, list) and not isinstance(inputs, tuple):
inputs = [inputs]
var_inputs = []
for x in inputs:
py_var = base.to_variable(x)
var_inputs.append(py_var)
if not self._built:
self._build_once(inputs)
self._built = True
outputs = self.forward(var_inputs)
return outputs
def __init__(self, dtype=core.VarDesc.VarType.FP32, name=None):
self._once_built = False
self._dtype = dtype
def _build_once(self, inputs):
pass
def forward(self, inputs):
return []
def __call__(self, *inputs):
if not self._once_built:
self._build_once(*inputs)
self._once_built = True
outputs = self.forward(*inputs)
return outputs
def forward(self, *inputs):
raise NotImplementedError

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save