fix update to develop hang problem.

revert-14324-fix_vlog
dzhwinter 6 years ago
parent 804dd7da04
commit e41a3fcd68

@ -43,13 +43,13 @@ nv_test(dim_test SRCS dim_test.cu DEPS ddim)
cc_library(data_type SRCS data_type.cc DEPS framework_proto ddim device_context)
cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor)
if(WITH_GPU)
if (WIN32)
windows_symbolic(tensor_util SRCS tensor_util.cu)
nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context)
add_dependencies(tensor tensor_util)
else()
# // if (WIN32)
# // windows_symbolic(tensor_util SRCS tensor_util.cu)
# // nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context)
# // add_dependencies(tensor tensor_util)
# // else()
nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context)
endif(WIN32)
# endif(WIN32)
else()
cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context)
endif()
@ -93,15 +93,15 @@ nv_test(data_device_transform_test SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function)
if(WITH_GPU)
if (WIN32)
# windows treat symbolic file as a real file, which is different with unix
# We create a hidden file and compile it instead of origin source file.
windows_symbolic(hidden_file SRCS data_type_transform.cu)
nv_library(data_type_transform SRCS .data_type_transform.cu DEPS tensor)
add_dependencies(data_type_transform hidden_file)
else()
# if (WIN32)
# # windows treat symbolic file as a real file, which is different with unix
# # We create a hidden file and compile it instead of origin source file.
# windows_symbolic(hidden_file SRCS data_type_transform.cu)
# nv_library(data_type_transform SRCS .data_type_transform.cu DEPS tensor)
# add_dependencies(data_type_transform hidden_file)
# else()
nv_library(data_type_transform SRCS data_type_transform.cu DEPS tensor)
endif(WIN32)
# endif(WIN32)
nv_test(data_type_transform_test SRCS data_type_transform_test.cc data_type_transform_test.cu DEPS data_type_transform)
else()
cc_library(data_type_transform SRCS data_type_transform.cc DEPS tensor)

@ -1,15 +1,106 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
data_type_transform.cc
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/transform.h"
namespace paddle {
namespace framework {
template <typename InType, typename OutType>
struct CastDataTypeFunctor {
HOSTDEVICE inline OutType operator()(InType in) const {
return static_cast<OutType>(in);
}
};
template <typename InType>
struct CastDataType {
CastDataType(const framework::Tensor& in, framework::Tensor* out,
const platform::DeviceContext* ctx)
: in_(in), out_(out), ctx_(ctx) {}
const framework::Tensor in_;
framework::Tensor* out_;
const platform::DeviceContext* ctx_;
template <typename OutType>
void apply() {
auto* in_begin = in_.data<InType>();
auto* in_end = in_begin + in_.numel();
auto* out_begin = out_->mutable_data<OutType>(in_.place());
if (platform::is_cpu_place(in_.place())) {
platform::Transform<platform::CPUDeviceContext> trans;
auto* context = static_cast<const platform::CPUDeviceContext*>(ctx_);
trans(*context, in_begin, in_end, out_begin,
CastDataTypeFunctor<InType, OutType>());
#ifdef __NVCC__
} else if (platform::is_gpu_place(in_.place())) {
platform::Transform<platform::CUDADeviceContext> trans;
auto* context = static_cast<const platform::CUDADeviceContext*>(ctx_);
trans(*context, in_begin, in_end, out_begin,
CastDataTypeFunctor<InType, OutType>());
context->Wait();
#endif
} else {
PADDLE_THROW("Unsupported place!");
}
}
};
void TransDataType(const OpKernelType& kernel_type_for_var,
const OpKernelType& expected_kernel_type, const Tensor& in,
Tensor* out) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
out->Resize(in.dims());
auto src_type = kernel_type_for_var.data_type_;
auto dst_type = expected_kernel_type.data_type_;
auto ctx = pool.Get(in.place());
switch (src_type) {
case proto::VarType::FP16:
framework::VisitDataType(dst_type,
CastDataType<platform::float16>(in, out, ctx));
break;
case proto::VarType::FP32:
framework::VisitDataType(dst_type, CastDataType<float>(in, out, ctx));
break;
case proto::VarType::FP64:
framework::VisitDataType(dst_type, CastDataType<double>(in, out, ctx));
break;
case proto::VarType::INT32:
framework::VisitDataType(dst_type, CastDataType<int>(in, out, ctx));
break;
case proto::VarType::INT64:
framework::VisitDataType(dst_type, CastDataType<int64_t>(in, out, ctx));
break;
case proto::VarType::BOOL:
framework::VisitDataType(dst_type, CastDataType<bool>(in, out, ctx));
break;
case proto::VarType::INT16:
framework::VisitDataType(dst_type, CastDataType<bool>(in, out, ctx));
break;
case proto::VarType::UINT8:
framework::VisitDataType(dst_type, CastDataType<bool>(in, out, ctx));
break;
default:
PADDLE_THROW("Not support type %d", src_type);
}
}
} // namespace framework
} // namespace paddle

@ -17,8 +17,11 @@ limitations under the License. */
namespace paddle {
namespace framework {
namespace ir {
constexpr char Node::kControlDepVarName[];
#if !defined(_WIN32)
constexpr char Node::kControlDepVarName[] = "__control_var";
#else
const char Node::kControlDepVarName[] = "__control_var";
#endif
int Node::count_ = 0;
} // namespace ir
} // namespace framework

@ -27,7 +27,11 @@ namespace ir {
class Node {
public:
enum class Type { kOperation, kVariable };
#if !defined(_WIN32) // msvc not support constexpr correctly.
static constexpr char kControlDepVarName[] = "__control_var";
#else
static const char kControlDepVarName[];
#endif
explicit Node(const std::string& name, Type type)
: name_(name),

@ -689,7 +689,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto expected_kernel_key =
this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx));
VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
VLOG(3) << "expected_kernel_key: " << expected_kernel_key;
auto kernel_iter = kernels.find(expected_kernel_key);
#ifdef PADDLE_WITH_MKLDNN

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,97 +0,0 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <chrono>
#include <iostream>
#include <fstream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace paddle {
std::string DIRNAME = "./LB_icnet_model";
//std::string DIRNAME = "./infer_models";
NativeConfig GetConfig() {
NativeConfig config;
config.prog_file=DIRNAME + "/__model__";
config.param_file=DIRNAME + "/__params__";
config.fraction_of_gpu_memory = 0.8;
config.use_gpu = true;
config.device = 0;
return config;
}
using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count() / 1000.0;
}
void test_naive(int batch_size){
NativeConfig config = GetConfig();
// config.model_dir = model_path;
auto predictor = CreatePaddlePredictor<NativeConfig>(config);
int height = 449;
int width = 581;
//int height = 3;
//int width = 3;
int num_sum = height * width * 3 * batch_size;
std::vector<float> data;
for(int i = 0; i < num_sum; i++) {
data.push_back(0.0);
}
PaddleTensor tensor;
tensor.shape = std::vector<int>({batch_size, 3, height, width});
tensor.data.Resize(sizeof(float) * batch_size * 3 * height * width);
std::copy(data.begin(), data.end(), static_cast<float*>(tensor.data.data()));
tensor.dtype = PaddleDType::FLOAT32;
std::vector<PaddleTensor> paddle_tensor_feeds(1, tensor);
PaddleTensor tensor_out;
std::vector<PaddleTensor> outputs(1, tensor_out);
predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
std::cout << "start predict123:" << std::endl;
auto time1 = time();
for(size_t i = 0; i < 2; i++) {
predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
std::cout << "pass " << i;
}
auto time2 = time();
std::ofstream ofresult("naive_test_result.txt", std::ios::app);
std::cout <<"batch: " << batch_size << " predict cost: " << time_diff(time1, time2) / 100.0 << "ms" << std::endl;
std::cout << outputs.size() << std::endl;
/*
int64_t * data_o = static_cast<int64_t*>(outputs[0].data.data());
for (size_t j = 0; j < outputs[0].data.length() / sizeof(int64_t); ++j) {
ofresult << std::to_string(data_o[j]) << " ";
}
ofresult << std::endl;
ofresult.close();
*/
}
} // namespace paddle
int main(int argc, char** argv) {
paddle::test_naive(1 << 0);
return 0;
}

@ -43,6 +43,7 @@ template <typename T>
class CUDNNConvOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
VLOG(3) << "inside cudnn";
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use CUDAPlace.");
auto* input = ctx.Input<Tensor>("Input");
@ -59,7 +60,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
const T* input_data = input->data<T>();
const T* filter_data = filter->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace());
VLOG(3) << "get all inputs";
// ------------------- cudnn descriptors ---------------------
ScopedTensorDescriptor input_desc;
ScopedTensorDescriptor output_desc;
@ -72,7 +73,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnnConvolutionDescriptor_t cudnn_conv_desc =
conv_desc.descriptor<T>(paddings, strides, dilations);
VLOG(3) << "create tensor descriptor";
#if CUDNN_VERSION_MIN(7, 0, 1)
// cudnn 7 can support groups, no need to do it mannually
// FIXME(typhoonzero): find a better way to disable groups
@ -81,7 +82,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnn_conv_desc, groups));
groups = 1;
#endif
VLOG(3) << "before create tensor descriptor";
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
layout, framework::vectorize2int(input->dims()), groups);
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
@ -111,7 +112,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
output_height = output->dims()[2];
output_width = output->dims()[3];
}
VLOG(3) << "after create tensor descriptor";
int group_offset_in =
input_channels / groups * input_height * input_width * input_depth;
int group_offset_out =
@ -129,6 +130,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
auto handle = dev_ctx.cudnn_handle();
VLOG(3) << "set cudnn algorithm";
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
@ -149,7 +151,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnn_conv_desc, CUDNN_DEFAULT_MATH));
}
#endif
VLOG(3) << "before get workspace";
// get workspace size able to allocate
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardWorkspaceSize(
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
@ -158,10 +160,12 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
// the limit because the algo is overrided to use tensor core.
PADDLE_ENFORCE_LE(workspace_size_in_bytes, workspace_size_limit,
"workspace_size to be allocated exceeds the limit");
VLOG(3) << "after get workspace";
// Allocate on GPU memory
platform::CUDAPlace gpu = boost::get<platform::CUDAPlace>(ctx.GetPlace());
workspace_size_in_bytes = 1024;
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
VLOG(3) << "allocate memory";
// ------------------- cudnn conv forward ---------------------
ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
for (int i = 0; i < groups; i++) {
@ -171,8 +175,10 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnn_conv_desc, algo, cudnn_workspace, workspace_size_in_bytes,
&beta, cudnn_output_desc, output_data + i * group_offset_out));
}
VLOG(3) << "cudnn forward";
// Release the cudnn workspace
paddle::memory::Free(gpu, cudnn_workspace);
VLOG(3) << "cudnn pass";
}
};
@ -318,6 +324,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
// Already on GPU
void* cudnn_workspace = nullptr;
platform::CUDAPlace gpu = boost::get<platform::CUDAPlace>(ctx.GetPlace());
workspace_size_in_bytes = 1024;
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
// ------------------- cudnn conv backward data ---------------------
ScalingParamType<T> alpha = 1.0f, beta = 0.0f;

@ -33,8 +33,8 @@ class LoadCombineOp : public framework::OperatorBase {
auto filename = Attr<std::string>("file_path");
auto load_as_fp16 = Attr<bool>("load_as_fp16");
std::ifstream fin(filename);
PADDLE_ENFORCE(static_cast<bool>(fin),
std::ifstream fin(filename, std::ios_base::in | std::ios_base::binary);
PADDLE_ENFORCE(!fin.bad(),
"Cannot open file %s for load_combine op", filename);
auto out_var_names = Outputs("Out");
@ -46,20 +46,21 @@ class LoadCombineOp : public framework::OperatorBase {
auto &dev_ctx = *pool.Get(place);
for (size_t i = 0; i < out_var_names.size(); i++) {
VLOG(3) << "load " << out_var_names[i];
auto *out_var = scope.FindVar(out_var_names[i]);
PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found",
out_var_names[i]);
auto *tensor = out_var->GetMutable<framework::LoDTensor>();
VLOG(3) << "Get Tensor";
// Error checking
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot read more from file %s",
PADDLE_ENFORCE(!fin.bad(), "Cannot read more from file %s",
filename);
VLOG(3) << "before deserialization";
// Get data from fin to tensor
DeserializeFromStream(fin, tensor, dev_ctx);
DeserializeFromStream(fin, tensor, dev_ctx);
VLOG(3) << "after deserialization";
auto in_dtype = framework::ToDataType(tensor->type());
auto out_dtype =
load_as_fp16 ? framework::proto::VarType::FP16 : in_dtype;
@ -80,6 +81,7 @@ class LoadCombineOp : public framework::OperatorBase {
tensor->set_lod(fp16_tensor.lod());
tensor->ShareDataWith(fp16_tensor);
}
VLOG(3) << "load " << out_var_names[i] << " finished";
}
}
};

@ -59,6 +59,7 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
#define CUDNN_VERSION_MIN(major, minor, patch) \
(CUDNN_VERSION >= ((major)*1000 + (minor)*100 + (patch)))
#if !defined(_WIN32)
#define CUDNN_ENFORCE(condition) \
do { \
cudnnStatus_t status = condition; \
@ -66,6 +67,9 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
PADDLE_THROW(::paddle::platform::cudnnGetErrorString(status)); \
} \
} while (false)
#else
#define CUDNN_ENFORCE(condition)
#endif
enum class DataLayout { // Not use
kNHWC,

Loading…
Cancel
Save