Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into omitlstmunit
commit
4c183b17f2
@ -0,0 +1,25 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/pool_cudnn_op.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OP(pool2d_cudnn, ops::PoolOp, ops::Pool2dOpMaker, pool2d_cudnn_grad,
|
||||
ops::PoolOpGrad);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL(pool2d_cudnn,
|
||||
ops::PoolKernel<paddle::platform::CPUPlace, float>);
|
||||
REGISTER_OP_CPU_KERNEL(pool2d_cudnn_grad,
|
||||
ops::PoolGradKernel<paddle::platform::CPUPlace, float>)
|
@ -0,0 +1,152 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/operators/pool_cudnn_op.h"
|
||||
#include "paddle/platform/cudnn_helper.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
using ScopedTensorDescriptor = platform::ScopedTensorDescriptor;
|
||||
using ScopedPoolingDescriptor = platform::ScopedPoolingDescriptor;
|
||||
using DataLayout = platform::DataLayout;
|
||||
using PoolingMode = platform::PoolingMode;
|
||||
|
||||
template <typename T>
|
||||
class PoolCudnnOpKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext &ctx) const override {
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
|
||||
"It must use GPUPlace.");
|
||||
|
||||
const Tensor *input = ctx.Input<Tensor>("X");
|
||||
Tensor *output = ctx.Output<Tensor>("Out");
|
||||
|
||||
const T *input_data = input->data<T>();
|
||||
T *output_data = output->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
std::string pooling_type = ctx.Attr<std::string>("poolingType");
|
||||
std::vector<int> ksize = ctx.Attr<std::vector<int>>("ksize");
|
||||
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
|
||||
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
|
||||
if (ctx.Attr<bool>("globalPooling")) {
|
||||
for (size_t i = 0; i < ksize.size(); ++i) {
|
||||
ksize[i] = static_cast<int>(input->dims()[i + 2]);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------- cudnn descriptors ---------------------
|
||||
ScopedTensorDescriptor input_desc;
|
||||
ScopedTensorDescriptor output_desc;
|
||||
ScopedPoolingDescriptor pool_desc;
|
||||
DataLayout layout = DataLayout::kNCHW;
|
||||
|
||||
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
|
||||
layout, framework::vectorize2int(input->dims()));
|
||||
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
|
||||
layout, framework::vectorize2int(output->dims()));
|
||||
|
||||
PoolingMode pooling_mode;
|
||||
if (pooling_type == "max") {
|
||||
pooling_mode = PoolingMode::kMaximum;
|
||||
} else {
|
||||
pooling_mode = PoolingMode::kAverage;
|
||||
}
|
||||
|
||||
cudnnPoolingDescriptor_t cudnn_pool_desc =
|
||||
pool_desc.descriptor(pooling_mode, ksize, paddings, strides);
|
||||
|
||||
// ------------------- cudnn pool algorithm ---------------------
|
||||
auto handle = ctx.cuda_device_context().cudnn_handle();
|
||||
T alpha = 1.0f, beta = 0.0f;
|
||||
|
||||
PADDLE_ENFORCE(platform::dynload::cudnnPoolingForward(
|
||||
handle, cudnn_pool_desc, &alpha, cudnn_input_desc, input_data, &beta,
|
||||
cudnn_output_desc, output_data));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class PoolCudnnGradOpKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext &ctx) const override {
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
|
||||
"It must use GPUPlace.");
|
||||
|
||||
const Tensor *input = ctx.Input<Tensor>("X");
|
||||
const Tensor *output = ctx.Input<Tensor>("Out");
|
||||
const Tensor *output_grad =
|
||||
ctx.Input<Tensor>(framework::GradVarName("Out"));
|
||||
Tensor *input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
|
||||
|
||||
std::string pooling_type = ctx.Attr<std::string>("poolingType");
|
||||
std::vector<int> ksize = ctx.Attr<std::vector<int>>("ksize");
|
||||
std::vector<int> strides = ctx.Attr<std::vector<int>>("strides");
|
||||
std::vector<int> paddings = ctx.Attr<std::vector<int>>("paddings");
|
||||
|
||||
if (ctx.Attr<bool>("globalPooling")) {
|
||||
for (size_t i = 0; i < ksize.size(); ++i)
|
||||
ksize[i] = static_cast<int>(input->dims()[i + 2]);
|
||||
}
|
||||
|
||||
const T *input_data = input->data<T>();
|
||||
const T *output_data = output->data<T>();
|
||||
const T *output_grad_data = output_grad->data<T>();
|
||||
|
||||
// ------------------- cudnn descriptors ---------------------
|
||||
ScopedTensorDescriptor input_desc;
|
||||
ScopedTensorDescriptor output_desc;
|
||||
ScopedPoolingDescriptor pool_desc;
|
||||
DataLayout layout = DataLayout::kNCHW;
|
||||
|
||||
cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor<T>(
|
||||
layout, framework::vectorize2int(input->dims()));
|
||||
cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor<T>(
|
||||
layout, framework::vectorize2int(output->dims()));
|
||||
|
||||
PoolingMode pooling_mode;
|
||||
if (pooling_type == "max") {
|
||||
pooling_mode = PoolingMode::kMaximum;
|
||||
} else {
|
||||
pooling_mode = PoolingMode::kAverage;
|
||||
}
|
||||
|
||||
cudnnPoolingDescriptor_t cudnn_pool_desc =
|
||||
pool_desc.descriptor(pooling_mode, ksize, paddings, strides);
|
||||
|
||||
// ------------------- cudnn pool algorithm ---------------------
|
||||
auto handle = ctx.cuda_device_context().cudnn_handle();
|
||||
T alpha = 1.0f, beta = 0.0f;
|
||||
|
||||
if (input_grad) {
|
||||
T *input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace());
|
||||
math::SetConstant<paddle::platform::GPUPlace, T> set_zero;
|
||||
set_zero(ctx.device_context(), input_grad, static_cast<T>(0));
|
||||
|
||||
PADDLE_ENFORCE(platform::dynload::cudnnPoolingBackward(
|
||||
handle, cudnn_pool_desc, &alpha, cudnn_output_desc, output_data,
|
||||
cudnn_output_desc, output_grad_data, cudnn_input_desc, input_data,
|
||||
&beta, cudnn_input_desc, input_grad_data));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OP_GPU_KERNEL(pool2d_cudnn, ops::PoolCudnnOpKernel<float>);
|
||||
REGISTER_OP_GPU_KERNEL(pool2d_cudnn_grad, ops::PoolCudnnGradOpKernel<float>);
|
@ -0,0 +1,19 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/operators/pool_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,72 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import gzip
|
||||
import struct
|
||||
import os
|
||||
|
||||
from paddle.trainer_config_helpers.layers import LayerOutput
|
||||
from paddle.v2.parameters import Parameters
|
||||
from paddle.proto import ModelConfig_pb2
|
||||
from paddle.v2.topology import Topology
|
||||
|
||||
|
||||
def merge_v2_model(net, param_file, output_file):
|
||||
'''Integrate the model config and model parameters into one file.
|
||||
|
||||
The model configuration file describes the model structure which
|
||||
ends with .py. The parameters file stores the parameters of the model
|
||||
which ends with .tar.gz.
|
||||
|
||||
@param net The output layer of the network.
|
||||
@param param_file Path of the model parameters(.tar.gz) which is stored by v2 api.
|
||||
@param output_file Path of the merged file which will be generated.
|
||||
|
||||
Usage:
|
||||
|
||||
from paddle.util.merge_model import merge_v2_model
|
||||
# import your network configuration
|
||||
from mobilenet import mobile_net
|
||||
|
||||
net = mobile_net(3*224*224, 102)
|
||||
param_file = './param_pass_00000.tar.gz'
|
||||
output_file = './output.paddle'
|
||||
|
||||
merge_v2_model(net, param_file, output_file)
|
||||
|
||||
'''
|
||||
|
||||
assert isinstance(net, LayerOutput), \
|
||||
"The net should be the output of the network"
|
||||
assert os.path.exists(param_file), \
|
||||
"The model parameters file %s does not exists " % (param_file)
|
||||
|
||||
model_proto = Topology(net).proto()
|
||||
assert isinstance(model_proto, ModelConfig_pb2.ModelConfig)
|
||||
|
||||
with gzip.open(param_file) as f:
|
||||
params = Parameters.from_tar(f)
|
||||
|
||||
if os.path.exists(output_file):
|
||||
os.remove(output_file)
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
param_names = [param.name for param in model_proto.parameters]
|
||||
conf_str = model_proto.SerializeToString()
|
||||
f.write(struct.pack('q', len(conf_str)))
|
||||
f.write(conf_str)
|
||||
for pname in param_names:
|
||||
params.serialize(pname, f)
|
||||
|
||||
print 'Generate %s success!' % (output_file)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue