Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix-7555

Conflicts:
	python/paddle/v2/fluid/layers/nn.py
	python/paddle/v2/fluid/tests/test_layers.py
emailweixu-patch-1
yangyaming 7 years ago
commit 630a8646fe

@ -31,9 +31,6 @@ if(NOT CMAKE_CROSSCOMPILING)
endif(NOT CMAKE_CROSSCOMPILING)
find_package(Git REQUIRED)
find_package(Threads REQUIRED)
if(NOT ANDROID AND NOT IOS)
find_package(Boost QUIET)
endif()
include(simd)
@ -140,6 +137,7 @@ include(external/openblas) # download, build, install openblas
include(external/mkldnn) # download, build, install mkldnn
include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
include(external/boost) # download, build, install boost
include(external/any) # download libn::any
include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11
@ -164,7 +162,6 @@ include_directories("${PADDLE_SOURCE_DIR}")
include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c")
include_directories(${Boost_INCLUDE_DIRS})
set(EXTERNAL_LIBS
${GFLAGS_LIBRARIES}

@ -27,7 +27,7 @@ RUN apt-get update && \
curl sed grep graphviz libjpeg-dev zlib1g-dev \
python-matplotlib gcc-4.8 g++-4.8 \
automake locales clang-format swig doxygen cmake \
liblapack-dev liblapacke-dev libboost-dev \
liblapack-dev liblapacke-dev \
clang-3.8 llvm-3.8 libclang-3.8-dev \
net-tools libtool && \
apt-get clean -y

@ -0,0 +1,51 @@
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include(ExternalProject)
set(BOOST_PROJECT "extern_boost")
set(BOOST_VER "1.66.0")
set(BOOST_TAR "boost_1_66_0")
set(BOOST_URL "https://dl.bintray.com/boostorg/release/${BOOST_VER}/source/${BOOST_TAR}.tar.gz")
set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost)
set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}")
set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE)
include_directories(${BOOST_INCLUDE_DIR})
ExternalProject_Add(
${BOOST_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
DOWNLOAD_DIR ${BOOST_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate ${BOOST_URL} -c -q -O ${BOOST_TAR}.tar.gz
&& tar zxf ${BOOST_TAR}.tar.gz
DOWNLOAD_NO_PROGRESS 1
PREFIX ${BOOST_SOURCES_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
UPDATE_COMMAND ""
)
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c)
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
add_library(boost STATIC ${dummyfile})
else()
add_library(boost INTERFACE)
endif()
add_dependencies(boost ${BOOST_PROJECT})
list(APPEND external_project_dependencies boost)
set(Boost_INCLUDE_DIR ${BOOST_INCLUDE_DIR})

@ -18,6 +18,11 @@ dynamic_lstm
.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm
:noindex:
dynamic_gru
-----------
.. autofunction:: paddle.v2.fluid.layers.dynamic_gru
:noindex:
data
----
.. autofunction:: paddle.v2.fluid.layers.data
@ -500,6 +505,11 @@ swish
.. autofunction:: paddle.v2.fluid.layers.swish
:noindex:
im2sequence
------
.. autofunction:: paddle.v2.fluid.layers.im2sequence
:noindex:
edit_distance
---------------
.. autofunction:: paddle.v2.fluid.layers.edit_distance_error

@ -0,0 +1,96 @@
# Design Doc: CSP in PaddlePaddle Fluid
## Motivation
Concurrent programming is important for deep learning. Few example applications are:
1. The main thread keeps reading the next mini-batch while another thread uses the GPU for computing.
2. The main thread performs the computation while another thread uploads the local gradients from each trainer to the parameter server.
Most DL systems, including TensorFlow, Caffe2, and MxNet, can asynchronously execute operators in a graph. However, Fluid doesn't have the concept of a graph at all, as the design goal of Fluid is that of a programming language.
## Concurrent Programming Models
There were many concurrent programming models, implemented in various forms:
| concurrent programming model | implementation |
|-----|-----|
| mutex | types and functions in standard libraries |
| semaphore | types and functions in standard libraries |
| communicating sequential processes (CSP) | Go programming language |
| actor model | Erlang programming language |
| message passing | MPI |
| bulk synchronous parallel (BSP) | Pregel distributed programming framework |
Since Fluid was designed to be a programming language, we would like to implement CSP in Fluid.
### CSP v.s. Actor Model
A well-known implementation of Actor Model is the Erlang programming language. In Actor Model, *processes* could send messages to another process and receive messages from another process given the process IDs. We can find the three ingredients, process with ID, send, and recv, in MPI too. Indeed, we can rewrite Erlang programs in Python + MPI with possibly fewer lines of code. Our concern with Actor Model is that it doesn't seem reasonable to implement process management in a programming language's runtime library; instead, it should be the operating systems' responsibility to manage processes and libraries like MPI for send/recv.
## CSP in Fluid
Fluid has two fundamental control-flows: *if-else* and *while*. If we are to implement CSP, we need the following:
1. a new data type: *channel* and operators *send* and *recv*,
1. *goroutine* or thread, and
1. a new control-flow: select.
We also need Python wrappers for the above components.
The type *channel* is conceptually the blocking queue. In Go, its implemented is a [blocking circular queue](https://github.com/golang/go/blob/68ce117cf17b8debf5754bfd476345779b5b6616/src/runtime/chan.go#L31-L50), which supports send and recv.
The `select` operation has been in OS kernels long before Go language. All Unix kernels implement system calls *poll* and *select*. They monitor multiple file descriptors to see if I/O is possible on any of them. This takes O(N) time. Since Linux 2.6, a new system call, *epoll*, can do the same in O(1) time. In BSD systems, there is a similar system call *kqueue*. Go's Linux implementation uses epoll.
It might be a good idea to implement Fluid's select using epoll too. In this design doc, we start from the O(N) way, so we could focus on Python binding and the syntax.
### Type Channel
Fluid supports many data types:
1. Tensor,
1. Row-sparse Tensor
1. LoD Tensor,
1. Tensor array, etc
Each data type is registered in the [`framework.proto`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto#L117-L127) as an enum value. To add a new type channel, we need to add a new type enum.
To expose a C++ type to Python, we need to edit the [`pybind.cc`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc) file. [Here](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/pybind/pybind.cc#L120-L164) is an example how we expose C++ class LoDTensor.
## Syntax Design
### Create Channel
In Go, we create a channel by specifying the element type and buffer size:
```go
ch := make(chan int) // a channel without buffer
ch1 := make(chan int, 100) // a channel that can buffer 100 ints.
```
In Fluid, we should be able to do the same:
```python
ch = fluid.make_chan(dtype=INT)
ch1 = fluid.make_chan(dtype=INT, 100)
```
In addition to that, we want channels that can hold more complex element types, e.g., Tensors of float16:
```python
ch = fluid.make_chan(dtype=Tensor, etype=float16)
```
or Tensors of Tensors of float16 etc.
The point here is that we need a consistent way to compose types, like in C++ we can have `Tensor<Tensor<...<float16>...> >`.
### Send and Recv
### Select
## Example Programs
### 1. RPC between Trainers and Parameter Servers
### 2. Concurrent Minibatch Loading

@ -9,16 +9,16 @@ different purposes.
## Background
The previous implementations of the parameter server does not run a
The previous implementations of the parameter server do not run a
fluid sub-program. Parameter initialization, optimizer computation, network
communication and checkpointing are implemented twice on both the
trainer and the parameter server.
trainer as well as the parameter server.
It would be great if we can write code once and use them on both the
trainer and the parameter server: reduces code duplication and
improves extensibility. Given that after the current refactor, we are
representing everything as a computing graph on the
trainer. Representing everything as a computing graph on the parameter
It would be great if we can write code once and use them on both: the
trainer and the parameter server, since this reduces code duplication and
improves extensibility. Given that after the current refactoring, we are
representing everything as a computation graph on the
trainer. Representing everything as a computation graph on the parameter
server becomes a natural extension.
## Design
@ -30,9 +30,9 @@ into sub-programs to be scheduled on different nodes with the following
steps:
1. OP placement: the OPs will be placed on different nodes according
to heuristic that minimizes estimated total computation
to a heuristic that minimizes the estimated total computation
time. Currently we will use a simple heuristic that puts parameter
varable on parameter server workers and everything else on trainer
variable on parameter server workers and everything else on trainer
workers.
1. Add communication OPs to enable the communication between nodes.
@ -47,22 +47,22 @@ After converting:
<img src="src/dist-graph.png" width="700"/>
1. The parameter variable W and it's optimizer program are placed on the parameter server.
1. The parameter variable W and its optimizer program are placed on the parameter server.
1. Operators are added to the program.
- *Send* sends data to the connected *Recv* operator. The
scheduler on the receive node will only schedule *Recv* operator
to run when the *Send* operator has ran (the *Send* OP will mark
the *Recv* OP runnable automatically).
- *Enueue* enqueues the input variable, it can block until space
- *Enqueue* enqueues the input variable, it can block until space
become available in the queue.
- *Dequeue* outputs configurable numbers of tensors from the
queue. It will block until the queue have the required number of
queue. It will block until the queue has the required number of
tensors.
### Benefits
- Model parallelism become easier to implement: it's an extension to
- Model parallelism becomes easier to implement: it is an extension to
the trainer - parameter server approach. We can have several "Transpilers"
to achieve different goals.
- User-defined optimizer is easier to add - user can now express it as
@ -72,22 +72,22 @@ After converting:
### Challenges
- It's important to balance the parameter shards of on multiple
parameter server. If a single parameter is very big (some
- It is important to balance the parameter shards on multiple
parameter servers. If a single parameter is very big (for example: some
word-embedding, fully connected, softmax layer), we need to
automatically partition the single parameter onto different
parameter servers when possible (only element-wise optimizer depends
on the parameter variable).
- In the "Aync SGD" figure, the "W" variable on the parameter server
could be read and wrote concurrently. See
- In the "Async SGD" figure, the "W" variable on the parameter server
could be read and written concurrently. See
[here](https://github.com/PaddlePaddle/Paddle/pull/6394) for more
details about concurrent program in fluid.
details about concurrent program in Fluid.
### Discussion
- Can the Enqueue OP be implemented under our current tensor design
(puts the input tensor into the queue tensor)?
- *Dequeue* OP will have variable numbers of output (depends on the
(put the input tensor into the queue tensor)?
- *Dequeue* OP will have variable numbers of output (depending on the
`min_count` attribute), does our current design support it? (similar
question for the *Add* OP)

@ -22,7 +22,7 @@ The current `LoDTensor` is designed to store levels of variable-length sequences
The integers in each level represent the begin and end (not inclusive) offset of a sequence **in the underlying tensor**,
let's call this format the **absolute-offset LoD** for clarity.
The relative-offset LoD can retrieve any sequence very quickly but fails to represent empty sequences, for example, a two-level LoD is as follows
The absolute-offset LoD can retrieve any sequence very quickly but fails to represent empty sequences, for example, a two-level LoD is as follows
```python
[[0, 3, 9]
[0, 2, 3, 3, 3, 9]]
@ -119,7 +119,7 @@ def generate():
encoder_ctx_expanded = pd.lod_expand(encoder_ctx, target_word)
decoder_input = pd.fc(
act=pd.activation.Linear(),
input=[target_word, encoder_ctx],
input=[target_word, encoder_ctx_expanded],
size=3 * decoder_dim)
gru_out, cur_mem = pd.gru_step(
decoder_input, mem=decoder_mem, size=decoder_dim)

@ -25,14 +25,14 @@
.. code-block:: bash
docker pull docker.paddlepaddle.org/paddle
docker pull docker.paddlepaddlehub.com/paddle
下载GPU版本cuda8.0_cudnn5_avx_mkl的Docker镜像
.. code-block:: bash
docker pull paddlepaddle/paddle:latest-gpu
docker pull docker.paddlepaddle.org/paddle:latest-gpu
docker pull docker.paddlepaddlehub.com/paddle:latest-gpu
选择下载使用不同的BLAS库的Docker镜像
@ -49,7 +49,7 @@
docker pull paddlepaddle/paddle:[tag]
# 比如:
docker pull docker.paddlepaddle.org/paddle:0.10.0-gpu
docker pull docker.paddlepaddlehub.com/paddle:0.11.0-gpu
.. _docker_run:

@ -26,14 +26,14 @@ For users in China, we provide a faster mirror:
.. code-block:: bash
docker pull docker.paddlepaddle.org/paddle
docker pull docker.paddlepaddlehub.com/paddle
Download GPU version (cuda8.0_cudnn5_avx_mkl) images:
.. code-block:: bash
docker pull paddlepaddle/paddle:latest-gpu
docker pull docker.paddlepaddle.org/paddle:latest-gpu
docker pull docker.paddlepaddlehub.com/paddle:latest-gpu
Choose between different BLAS version:
@ -53,7 +53,7 @@ and run:
docker pull paddlepaddle/paddle:[tag]
# i.e.
docker pull docker.paddlepaddle.org/paddle:0.10.0-gpu
docker pull docker.paddlepaddlehub.com/paddle:0.11.0-gpu
.. _docker_run:

@ -60,8 +60,7 @@ each column is as follows:
| column | meaning |
| --- | --- |
| ncalls | the number of calls into a function |
| tottime | the total execution time of the function, not including the
execution time of other functions called by the function |
| tottime | the total execution time of the function, not including the execution time of other functions called by the function |
| percall | tottime divided by ncalls |
| cumtime | the total execution time of the function, including the execution time of other functions being called |
| percall | cumtime divided by ncalls |

@ -18,7 +18,7 @@ else()
add_subdirectory(capi)
endif()
if(Boost_FOUND)
if(NOT ANDROID AND NOT IOS)
add_subdirectory(memory)
add_subdirectory(platform)
add_subdirectory(framework)

@ -1,7 +1,7 @@
# ddim lib
proto_library(framework_proto SRCS framework.proto)
cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_library(ddim SRCS ddim.cc DEPS eigen3 boost)
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test(dim_test SRCS dim_test.cu DEPS ddim)
@ -45,7 +45,7 @@ cc_test(data_layout_transform_test SRCS data_layout_transform_test.cc DEPS data_
cc_library(data_transform SRCS data_transform.cc DEPS math_function tensor
framework_proto selected_rows data_device_transform data_type_transform data_layout_transform)
cc_library(attribute SRCS attribute.cc DEPS framework_proto)
cc_library(attribute SRCS attribute.cc DEPS framework_proto boost)
cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc
device_context)
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)

@ -61,6 +61,9 @@ Attribute GetAttrValue(const proto::OpDesc::Attr& attr_desc) {
}
return val;
}
case proto::AttrType::LONG: {
return attr_desc.l();
}
default:
PADDLE_THROW("Unsupport attr type %d", attr_desc.type());
}

@ -168,6 +168,32 @@ struct ExtractAttribute<bool> {
const std::string& attr_name_;
};
template <>
struct ExtractAttribute<int64_t> {
explicit ExtractAttribute(const std::string& attr_name)
: attr_name_(attr_name) {}
int64_t* operator()(Attribute& attr) const {
if (attr.type() == typeid(int)) { // NOLINT
int val = boost::get<int>(attr);
attr = static_cast<int64_t>(val);
} else if (attr.type() == typeid(float)) { // NOLINT
int val = boost::get<float>(attr);
attr = static_cast<int64_t>(val);
}
int64_t* attr_value = nullptr;
try {
attr_value = &boost::get<int64_t>(attr);
} catch (boost::bad_get& bad_get) {
PADDLE_THROW("Cannot get attribute %s by type int64_t, its type is %s",
attr_name_, attr.type().name());
}
return attr_value;
}
const std::string& attr_name_;
};
// check whether a certain attribute fit its limits
// an attribute can have more than one limits
template <typename T>

@ -75,7 +75,7 @@ std::vector<VarDesc *> BlockDesc::AllVars() const {
OpDesc *BlockDesc::AppendOp() {
need_update_ = true;
ops_.emplace_back(new OpDesc());
ops_.emplace_back(new OpDesc(this));
return ops_.back().get();
}
@ -86,7 +86,7 @@ void BlockDesc::AppendAllocatedOp(std::unique_ptr<OpDesc> &&op_desc) {
OpDesc *BlockDesc::PrependOp() {
need_update_ = true;
ops_.emplace_front(new OpDesc());
ops_.emplace_front(new OpDesc(this));
return ops_.front().get();
}
@ -153,7 +153,7 @@ BlockDesc::BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc)
vars_[var_desc.name()].reset(new VarDesc(var_desc));
}
for (const proto::OpDesc &op_desc : desc_->ops()) {
ops_.emplace_back(new OpDesc(op_desc, prog));
ops_.emplace_back(new OpDesc(op_desc, prog, this));
}
}
@ -162,7 +162,7 @@ BlockDesc::BlockDesc(const BlockDesc &other, proto::BlockDesc *desc,
: prog_(prog), desc_(desc) {
need_update_ = true;
for (auto &op : other.ops_) {
ops_.emplace_back(new OpDesc(*op));
ops_.emplace_back(new OpDesc(*op, this));
}
for (auto &it : other.vars_) {

@ -116,8 +116,9 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
for (auto& op_desc : block.AllOps()) {
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
VLOG(3) << op->DebugStringEx(local_scope);
VLOG(4) << op->DebugStringEx(local_scope);
op->Run(*local_scope, place_);
VLOG(3) << op->DebugStringEx(local_scope);
if (FLAGS_do_memory_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_);

@ -26,6 +26,7 @@ enum AttrType {
BOOLEAN = 6;
BOOLEANS = 7;
BLOCK = 8;
LONG = 9;
}
// OpDesc describes an instance of a C++ framework::OperatorBase
@ -44,6 +45,7 @@ message OpDesc {
optional bool b = 10;
repeated bool bools = 11;
optional int32 block_idx = 12;
optional int64 l = 13;
};
message Var {

@ -107,9 +107,10 @@ LoD ToAbsOffset(const LoD &in) {
// the lowest level stores relative offsets
if (in.empty() || in.size() == 1) return in;
LoD result = in;
for (int level = result.size() - 2; level >= 0; level--) {
for (auto &ele : result[level]) {
ele = result[level + 1][ele];
for (auto level = static_cast<int>(in.size() - 2); level >= 0; level--) {
for (size_t i = 0; i < in[level].size(); ++i) {
size_t index = in[level][i];
result[level][i] = result[level + 1][index];
}
}
return result;

@ -97,7 +97,7 @@ void OpDesc::CopyFrom(const OpDesc &op_desc) {
need_update_ = true;
}
OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog)
OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog, BlockDesc *block)
: desc_(desc), need_update_(false) {
// restore inputs_
int input_size = desc_.inputs_size();
@ -131,6 +131,7 @@ OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog)
attrs_[attr_name] = prog->MutableBlock(bid);
}
}
this->block_ = block;
}
proto::OpDesc *OpDesc::Proto() {
@ -282,6 +283,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
VectorToRepeated(v, attr_->mutable_bools());
}
void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->ID()); }
void operator()(int64_t v) const { attr_->set_l(v); }
void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
};

@ -25,7 +25,6 @@ namespace framework {
class BlockDesc;
class ProgramDesc;
class OpDesc {
public:
OpDesc() {}
@ -33,7 +32,14 @@ class OpDesc {
OpDesc(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs);
OpDesc(const proto::OpDesc &desc, ProgramDesc *prog);
OpDesc(const proto::OpDesc &desc, ProgramDesc *prog, BlockDesc *block);
explicit OpDesc(BlockDesc *block) : block_(block) {}
OpDesc(const OpDesc &other, BlockDesc *block) {
*this = other;
block_ = block;
}
void CopyFrom(const OpDesc &op_desc);
@ -117,6 +123,10 @@ class OpDesc {
void Flush();
BlockDesc *Block() { return this->block_; }
void SetBlock(BlockDesc *block) { this->block_ = block; }
private:
template <typename MapType>
static std::vector<typename MapType::key_type> MapKeys(const MapType &map) {
@ -129,6 +139,7 @@ class OpDesc {
}
proto::OpDesc desc_;
BlockDesc *block_; // not_own
// input arg name => input variable names
VariableNameMap inputs_;
// output arg name => output variable names

@ -35,7 +35,7 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using Attribute =
boost::variant<boost::blank, int, float, std::string, std::vector<int>,
std::vector<float>, std::vector<std::string>, bool,
std::vector<bool>, BlockDesc*>;
std::vector<bool>, BlockDesc*, int64_t>;
using AttributeMap = std::unordered_map<std::string, Attribute>;

@ -66,6 +66,8 @@ class VarDesc {
std::string Name() const { return desc_.name(); }
void SetName(std::string name) { desc_.set_name(name); }
void SetShape(const std::vector<int64_t> &dims);
void SetDataType(proto::DataType data_type);

@ -12,19 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
/*
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include <memory>
#include <string>

@ -69,7 +69,7 @@ bool PriorBoxLayer::init(const LayerMap& layerMap,
if (maxSize_.size() > 0) CHECK_EQ(minSize_.size(), maxSize_.size());
// flip aspect ratios
for (int index = 0; index < tmp.size(); index++) {
for (unsigned index = 0; index < tmp.size(); index++) {
real ar = tmp[index];
if (fabs(ar - 1.) < 1e-6) continue;
aspectRatio_.push_back(ar);

@ -1,7 +1,7 @@
add_subdirectory(detail)
cc_library(memory SRCS memory.cc DEPS place enforce)
cc_library(memcpy SRCS memcpy.cc)
cc_library(memcpy SRCS memcpy.cc DEPS place)
cc_library(paddle_memory
DEPS

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save