diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index 452806a20e..59d18aceda 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -145,14 +145,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key
 paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
 paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
-paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'out', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None, None))
-paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
-paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
+paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None))
+paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
+paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
 paddle.fluid.layers.uniform_random_batch_size_like ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0))
 paddle.fluid.layers.gaussian_random ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype', 'use_mkldnn'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32', False))
 paddle.fluid.layers.sampling_id ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32'))
@@ -160,6 +160,12 @@ paddle.fluid.layers.gaussian_random_batch_size_like ArgSpec(args=['input', 'shap
 paddle.fluid.layers.sum ArgSpec(args=['x', 'use_mkldnn'], varargs=None, keywords=None, defaults=(False,))
 paddle.fluid.layers.slice ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.shape ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
+paddle.fluid.layers.logical_and ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.layers.logical_or ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.layers.logical_xor ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.layers.logical_not ArgSpec(args=['x', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None))
+paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
 paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
 paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
@@ -225,12 +231,6 @@ paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords=
 paddle.fluid.layers.mean ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.mul ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.clip ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.clip_by_norm ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.logical_or ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.logical_xor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
-paddle.fluid.layers.logical_not ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
 paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc
index 40dee143f5..1a727a2c8c 100644
--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@@ -20,13 +20,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/threadpool.h"
 #include "paddle/fluid/string/printf.h"
 
-// The mutex is not needed by training and inference, only for distribution.
-#if PADDLE_WITH_DISTRIBUTE
-#define WITH_LOCK 1
-#else
-#define WITH_LOCK 0
-#endif
-
 DEFINE_bool(benchmark, false,
             "Doing memory benchmark. It will make deleting scope synchronized, "
             "and add some memory usage logs."
@@ -56,24 +49,18 @@ int64_t GetEagerDeletionThreshold() {
 Scope::~Scope() { DropKids(); }
 
 Scope& Scope::NewScope() const {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   kids_.push_back(new Scope(this));
   return *kids_.back();
 }
 
 Variable* Scope::Var(const std::string& name) {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   return VarInternal(name);
 }
 
 Variable* Scope::Var(std::string* name) {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   auto new_name = string::Sprintf("%p.%d", this, vars_.size());
   if (name != nullptr) {
     *name = new_name;
@@ -82,39 +69,29 @@ Variable* Scope::Var(std::string* name) {
 }
 
 Variable* Scope::FindVar(const std::string& name) const {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   return FindVarInternal(name);
 }
 
 const Scope* Scope::FindScope(const Variable* var) const {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   return FindScopeInternal(var);
 }
 
 void Scope::DropKids() {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   for (Scope* s : kids_) delete s;
   kids_.clear();
 }
 
 bool Scope::HasKid(const Scope* scope) const {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
   return it != this->kids_.end();
 }
 
 std::vector<std::string> Scope::LocalVarNames() const {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   std::vector<std::string> known_vars;
   known_vars.reserve(this->vars_.size());
   for (auto& p : vars_) {
@@ -124,9 +101,7 @@ std::vector<std::string> Scope::LocalVarNames() const {
 }
 
 void Scope::DeleteScope(Scope* scope) const {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
   PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
   this->kids_.erase(it);
@@ -139,9 +114,7 @@ void Scope::DeleteScope(Scope* scope) const {
 }
 
 void Scope::EraseVars(const std::vector<std::string>& var_names) {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   std::set<std::string> var_set(var_names.begin(), var_names.end());
   for (auto it = vars_.begin(); it != vars_.end();) {
     if (var_set.find(it->first) != var_set.end()) {
@@ -154,16 +127,12 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {
 
 void Scope::Rename(const std::string& origin_name,
                    const std::string& new_name) const {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   RenameInternal(origin_name, new_name);
 }
 
 std::string Scope::Rename(const std::string& origin_name) const {
-#if WITH_LOCK
   std::unique_lock<std::mutex> lock(mutex_);
-#endif
   auto new_name = string::Sprintf("%p.%d", this, vars_.size());
   RenameInternal(origin_name, new_name);
   return new_name;
diff --git a/paddle/fluid/platform/dynload/cublas.h b/paddle/fluid/platform/dynload/cublas.h
index c7c533bd42..4ea0cd7283 100644
--- a/paddle/fluid/platform/dynload/cublas.h
+++ b/paddle/fluid/platform/dynload/cublas.h
@@ -55,7 +55,7 @@ extern void *cublas_dso_handle;
   struct DynLoad__##__name {                         \
     template <typename... Args>                      \
     inline cublasStatus_t operator()(Args... args) { \
-      return __name(args...);                        \
+      return ::__name(args...);                      \
     }                                                \
   };                                                 \
   extern DynLoad__##__name __name
diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h
index 0103e7a3ac..e6353f67ef 100644
--- a/paddle/fluid/platform/dynload/cudnn.h
+++ b/paddle/fluid/platform/dynload/cudnn.h
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+#define GLOG_NO_ABBREVIATED_SEVERITIES
+#define GOOGLE_GLOG_DLL_DECL
+#include <glog/logging.h>
 
 #include <cudnn.h>
 #include <mutex>  // NOLINT
@@ -47,13 +50,13 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
 
 #else
 
-#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name)                  \
-  struct DynLoad__##__name {                                     \
-    template <typename... Args>                                  \
-    auto operator()(Args... args) -> decltype(__name(args...)) { \
-      return __name(args...);                                    \
-    }                                                            \
-  };                                                             \
+#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name)     \
+  struct DynLoad__##__name {                        \
+    template <typename... Args>                     \
+    inline cudnnStatus_t operator()(Args... args) { \
+      return ::__name(args...);                     \
+    }                                               \
+  };                                                \
   extern DynLoad__##__name __name
 
 #endif
diff --git a/paddle/fluid/platform/dynload/curand.h b/paddle/fluid/platform/dynload/curand.h
index 2daf1b4215..0bb300ec33 100644
--- a/paddle/fluid/platform/dynload/curand.h
+++ b/paddle/fluid/platform/dynload/curand.h
@@ -44,7 +44,7 @@ extern void *curand_dso_handle;
   struct DynLoad__##__name {                     \
     template <typename... Args>                  \
     curandStatus_t operator()(Args... args) {    \
-      return __name(args...);                    \
+      return ::__name(args...);                  \
     }                                            \
   };                                             \
   extern DynLoad__##__name __name
diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc
index 6a3ad21510..cc5cda6106 100644
--- a/paddle/fluid/platform/dynload/dynamic_loader.cc
+++ b/paddle/fluid/platform/dynload/dynamic_loader.cc
@@ -107,7 +107,11 @@ static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path,
 static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
                                                const std::string& dso_name,
                                                bool throw_on_error = true) {
+#if !defined(_WIN32)
   int dynload_flags = RTLD_LAZY | RTLD_LOCAL;
+#else
+  int dynload_flags = 0;
+#endif  // !_WIN32
   void* dso_handle = nullptr;
 
   std::string dlPath = dso_name;
@@ -117,10 +121,15 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
     // search xxx.so from custom path
     dlPath = join(search_root, dso_name);
     dso_handle = dlopen(dlPath.c_str(), dynload_flags);
+#if !defined(_WIN32)
+    auto errorno = dlerror();
+#else
+    auto errorno = GetLastError();
+#endif  // !_WIN32
     // if not found, search from default path
     if (nullptr == dso_handle) {
       LOG(WARNING) << "Failed to find dynamic library: " << dlPath << " ("
-                   << dlerror() << ")";
+                   << errorno << ")";
       if (dlPath.find("nccl") != std::string::npos) {
         std::cout
             << "You may need to install 'nccl2' from NVIDIA official website: "
@@ -139,10 +148,15 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
       "export LD_LIBRARY_PATH=... \n Note: After Mac OS 10.11, "
       "using the DYLD_LIBRARY_PATH is impossible unless System "
       "Integrity Protection (SIP) is disabled.";
+#if !defined(_WIN32)
+  auto errorno = dlerror();
+#else
+  auto errorno = GetLastError();
+#endif  // !_WIN32
   if (throw_on_error) {
-    PADDLE_ENFORCE(nullptr != dso_handle, error_msg, dlPath, dlerror());
+    PADDLE_ENFORCE(nullptr != dso_handle, error_msg, dlPath, errorno);
   } else if (nullptr == dso_handle) {
-    LOG(WARNING) << string::Sprintf(error_msg, dlPath, dlerror());
+    LOG(WARNING) << string::Sprintf(error_msg, dlPath, errorno);
   }
 
   return dso_handle;
diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh
index 7d2fb7c6ce..88a2434518 100755
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -395,7 +395,7 @@ EOF
         ctest --output-on-failure -j $1     
         # make install should also be test when unittest 
         make install -j 8
-        pip install /usr/local/opt/paddle/share/wheels/*.whl
+        pip install ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl
         if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]] ; then
             paddle version
         fi
@@ -750,7 +750,7 @@ function main() {
         cmake_gen ${PYTHON_ABI:-""}
         build
         run_test
-        assert_api_not_changed
+        assert_api_not_changed ${PYTHON_ABI:-""}
         ;;
       *)
         print_usage
diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py
index e884185528..4c24d0d6a7 100644
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -271,7 +271,8 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
                     "All parameters' 'clip_norm' of a same group should be the same"
                 )
 
-        local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0))
+        square = grad * grad
+        local_norm_var = layers.cast(layers.reduce_sum(input=square), 'float64')
         context[self.group_name].append(local_norm_var)
 
         self.context = context
@@ -281,6 +282,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
         if group_scale_name not in self.context:
             group_norm_var = layers.sums(input=self.context[self.group_name])
             group_norm_var = layers.sqrt(x=group_norm_var)
+            group_norm_var = layers.cast(group_norm_var, 'float32')
             clip_var = self.context[self.group_name + "_clip"]
             group_scale_var = layers.elementwise_div(
                 x=clip_var,
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 0049773bbe..c6250ff6ce 100644
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -21,7 +21,7 @@ from .. import core
 from ..framework import Program, Variable, Operator
 from ..layer_helper import LayerHelper, unique_name
 from ..initializer import force_init_on_cpu
-from .ops import logical_and, logical_not, logical_or
+from .nn import logical_and, logical_not, logical_or
 import numpy
 import warnings
 import six
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index a9696ac200..c41ed05247 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -51,7 +51,9 @@ __all__ = [
     'expand', 'sequence_concat', 'scale', 'elementwise_add', 'elementwise_div',
     'elementwise_sub', 'elementwise_mul', 'elementwise_max', 'elementwise_min',
     'elementwise_pow', 'uniform_random_batch_size_like', 'gaussian_random',
-    'sampling_id', 'gaussian_random_batch_size_like', 'sum', 'slice', 'shape'
+    'sampling_id', 'gaussian_random_batch_size_like', 'sum', 'slice', 'shape',
+    'logical_and', 'logical_or', 'logical_xor', 'logical_not', 'clip',
+    'clip_by_norm'
 ]
 
 
@@ -953,8 +955,8 @@ def cross_entropy(input, label, soft_label=False, ignore_index=-100):
         soft_label (bool): a flag indicating whether to
                                            interpretate the given labels as soft
                                            labels. Default: `False`.
-        ignore_index (int): Specifies a target value that is ignored and does 
-                            not contribute to the input gradient. Only valid 
+        ignore_index (int): Specifies a target value that is ignored and does
+                            not contribute to the input gradient. Only valid
                             if soft_label is set to False. Default: -100
 
     Returns:
@@ -2714,20 +2716,20 @@ def sequence_pad(x, pad_value, maxlen=None):
 
     Args:
         x(Variable): Input variable which should contain lod information.
-        pad_value(Variable): The Variable that holds values that will be fill 
-            into padded steps. It can be a scalar or a tensor whose shape 
-            equals to time steps in sequences. If it's a scalar, it will be 
+        pad_value(Variable): The Variable that holds values that will be fill
+            into padded steps. It can be a scalar or a tensor whose shape
+            equals to time steps in sequences. If it's a scalar, it will be
             automatically broadcasted to the shape of time step.
-        maxlen(int, default None): The length of padded sequences. It can be 
-            None or any positive int. When it is None, all sequences will be 
-            padded up to the length of the longest one among them; when it a 
-            certain positive value, it must be greater than the length of the 
+        maxlen(int, default None): The length of padded sequences. It can be
+            None or any positive int. When it is None, all sequences will be
+            padded up to the length of the longest one among them; when it a
+            certain positive value, it must be greater than the length of the
             longest original sequence."
-    
+
     Returns:
-        Variable: The padded sequence batch and the original lengths before 
+        Variable: The padded sequence batch and the original lengths before
                   padding. All sequences has the same length.
-    
+
     Examples:
         .. code-block:: python
 
@@ -4343,8 +4345,8 @@ def softmax_with_cross_entropy(logits,
             soft_label is set to true, Label is a Tensor<float/double> with
         soft_label (bool): A flag to indicate whether to interpretate the given
             labels as soft labels. By default, `soft_label` is set to False.
-        ignore_index (int): Specifies a target value that is ignored and does 
-                            not contribute to the input gradient. Only valid 
+        ignore_index (int): Specifies a target value that is ignored and does
+                            not contribute to the input gradient. Only valid
                             if soft_label is set to False. Default: -100
 
     Returns:
@@ -4601,14 +4603,14 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
 
 def squeeze(input, axes, name=None):
     """
-    Remove single-dimensional entries from the shape of a tensor. Takes a 
-    parameter axes with a list of axes to squeeze. If axes is not provided, all 
-    the single dimensions will be removed from the shape. If an axis is 
+    Remove single-dimensional entries from the shape of a tensor. Takes a
+    parameter axes with a list of axes to squeeze. If axes is not provided, all
+    the single dimensions will be removed from the shape. If an axis is
     selected with shape entry not equal to one, an error is raised.
-        
+
     Examples:
     Case 1:
-      Given 
+      Given
         X.shape = (1, 3, 1, 5)
       and
         axes = [0]
@@ -4617,11 +4619,11 @@ def squeeze(input, axes, name=None):
       Case 2:
         Given
           X.shape = (1, 3, 1, 5)
-        and 
+        and
           axes = []
         we get:
           Out.shape = (3, 5)
-    
+
     Args:
         input (Variable): The input variable to be squeezed.
         axes (list): List of integers, indicating the dimensions to be squeezed.
@@ -4651,14 +4653,14 @@ def squeeze(input, axes, name=None):
 
 def unsqueeze(input, axes, name=None):
     """
-    Insert single-dimensional entries to the shape of a tensor. Takes one 
-    required argument axes, a list of dimensions that will be inserted. 
-    Dimension indices in axes are as seen in the output tensor. 
+    Insert single-dimensional entries to the shape of a tensor. Takes one
+    required argument axes, a list of dimensions that will be inserted.
+    Dimension indices in axes are as seen in the output tensor.
 
-    For example: 
-      Given a tensor such that tensor with shape [3, 4, 5], 
+    For example:
+      Given a tensor such that tensor with shape [3, 4, 5],
       then Unsqueezed tensor with axes=[0, 4] has shape [1, 3, 4, 5, 1].
-    
+
     Args:
         input (Variable): The input variable to be unsqueezed.
         axes (list): List of integers, indicating the dimensions to be inserted.
@@ -5757,39 +5759,39 @@ def pad2d(input,
     Example:
 
       Given that X is a channel of image from input:
-      
+
       X = [[1, 2, 3],
            [4, 5, 6]]
-      
+
       Case 0:
-      
+
         paddings = [0, 1, 2, 3],
         mode = 'constant'
         pad_value = 0
-        
+
         Out = [[0, 0, 1, 2, 3, 0, 0, 0]
                [0, 0, 4, 5, 6, 0, 0, 0]
                [0, 0, 0, 0, 0, 0, 0, 0]]
-      
+
       Case 1:
-      
+
         paddings = [0, 1, 2, 1],
         mode = 'reflect'
-        
+
         Out = [[3, 2, 1, 2, 3, 2]
                [6, 5, 4, 5, 6, 5]
                [3, 2, 1, 2, 3, 2]]
-        
+
       Case 2:
-      
+
         paddings = [0, 1, 2, 1],
         mode = 'edge'
-        
+
         Out = [[1, 1, 1, 2, 3, 3]
                [4, 4, 4, 5, 6, 6]
                [4, 4, 4, 5, 6, 6]]
-    
-  
+
+
     Args:
         input (Variable): The input image with [N, C, H, W] format or [N, H, W, C] format.
         paddings (tuple|list): The padding size. If padding is a tuple, it must
@@ -5988,7 +5990,7 @@ def prelu(x, mode, param_attr=None, name=None):
  		       channel:elements in a channel share same weight
  		       element:each element has a weight
 	name(str|None): A name for this layer(optional). If set None, the layer
-                        will be named automatically. 
+                        will be named automatically.
 
     Returns:
         Variable: The output tensor with the same shape as input.
@@ -6166,10 +6168,10 @@ def flatten(x, axis=1, name=None):
 def sequence_enumerate(input, win_size, pad_value=0, name=None):
     """
     Generate a new sequence for the input index sequence, which enumerates all the
-    sub-sequences with length `win_size` of the input. 
+    sub-sequences with length `win_size` of the input.
     The enumerated sequence has the same 1st dimension with variable `input`, and
     the 2nd dimension is `win_size`, padded by `pad_value` if necessary in generation.
-    
+
     Examples:
     Case 1:
       Input:
@@ -6296,20 +6298,20 @@ def unstack(x, axis=0, num=None):
     **UnStack Layer**
 
     This layer unstacks input :code:`x` into several tensors along axis.
-   
+
     If :code:`axis` < 0, it would be replaced with :code:`axis+rank(x)`.
     If :code:`num` is None, it would be inferred from :code:`x.shape[axis]`,
     and if :code:`x.shape[axis]` <= 0 or is unknown, :code:`ValueError` is
-    raised. 
+    raised.
 
     Args:
-        x (Variable): Input variable. 
+        x (Variable): Input variable.
         axis (int): The axis along which the input is unstacked.
         num (int|None): The number of output variables.
-    
+
     Returns:
         list(Variable): The unstacked variables.
-    
+
     """
 
     helper = LayerHelper('unstack', **locals())
@@ -6342,21 +6344,21 @@ def expand(x, expand_times, name=None):
     .. code-block:: text
 
         Input(X) is a 3-D tensor with shape [2, 3, 1]:
-        
+
                 [
                    [[1], [2], [3]],
                    [[4], [5], [6]]
                 ]
-        
+
         Attr(expand_times):  [1, 2, 2]
-        
+
         Output(Out) is a 3-D tensor with shape [2, 6, 2]:
-        
+
                 [
                     [[1, 1], [2, 2], [3, 3], [1, 1], [2, 2], [3, 3]],
                     [[4, 4], [5, 5], [6, 6], [4, 4], [5, 5], [6, 6]]
                 ]
-        
+
     Args:
         x (Variable): A tensor with rank in [1, 6].
         expand_times (list|tuple): Expand times number for each dimension.
@@ -6630,14 +6632,12 @@ def _elementwise_op(helper):
     assert y is not None, 'y cannot be None in {}'.format(op_type)
     axis = helper.kwargs.get('axis', -1)
     use_mkldnn = helper.kwargs.get('use_mkldnn', False)
-    out = helper.kwargs.get('out', None)
-    if out is None:
-        name = helper.kwargs.get('name', None)
-        if name is None:
-            out = helper.create_tmp_variable(dtype=x.dtype)
-        else:
-            out = helper.create_variable(
-                name=name, dtype=x.dtype, persistable=False)
+    name = helper.kwargs.get('name', None)
+    if name is None:
+        out = helper.create_tmp_variable(dtype=x.dtype)
+    else:
+        out = helper.create_variable(
+            name=name, dtype=x.dtype, persistable=False)
 
     helper.append_op(
         type=op_type,
@@ -6650,13 +6650,7 @@ def _elementwise_op(helper):
 
 
 @templatedoc()
-def scale(x,
-          scale=1.0,
-          bias=0.0,
-          bias_after_scale=True,
-          out=None,
-          act=None,
-          name=None):
+def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
     """
     ${comment}
 
@@ -6665,21 +6659,19 @@ def scale(x,
         scale(${scale_type}): ${scale_comment}
         bias(${bias_type}): ${bias_comment}
         bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment}
-        out(Tensor): Output tensor.
         act(basestring|None): Activation applied to the output.
-        name(basestring|None): Name of the output. 
+        name(basestring|None): Name of the output.
 
     Returns:
         out(${out_type}): ${out_comment}
     """
 
     helper = LayerHelper('scale', **locals())
-    if out is None:
-        if name is None:
-            out = helper.create_tmp_variable(dtype=x.dtype)
-        else:
-            out = helper.create_variable(
-                name=name, dtype=x.dtype, persistable=False)
+    if name is None:
+        out = helper.create_tmp_variable(dtype=x.dtype)
+    else:
+        out = helper.create_variable(
+            name=name, dtype=x.dtype, persistable=False)
 
     helper.append_op(
         type='scale',
@@ -6693,73 +6685,31 @@ def scale(x,
     return helper.append_activation(out)
 
 
-def elementwise_add(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_add(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
     return _elementwise_op(LayerHelper('elementwise_add', **locals()))
 
 
-def elementwise_div(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_div(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
     return _elementwise_op(LayerHelper('elementwise_div', **locals()))
 
 
-def elementwise_sub(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_sub(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
     return _elementwise_op(LayerHelper('elementwise_sub', **locals()))
 
 
-def elementwise_mul(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_mul(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
     return _elementwise_op(LayerHelper('elementwise_mul', **locals()))
 
 
-def elementwise_max(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_max(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
     return _elementwise_op(LayerHelper('elementwise_max', **locals()))
 
 
-def elementwise_min(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_min(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
     return _elementwise_op(LayerHelper('elementwise_min', **locals()))
 
 
-def elementwise_pow(x,
-                    y,
-                    out=None,
-                    axis=-1,
-                    use_mkldnn=False,
-                    act=None,
-                    name=None):
+def elementwise_pow(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
     return _elementwise_op(LayerHelper('elementwise_pow', **locals()))
 
 
@@ -6771,7 +6721,168 @@ for func in [
     func.__doc__ = _generate_doc_string_(
         op_proto,
         additional_args_lines=[
-            "out (Tensor): The output tensor of elementwise op.",
             "act (basestring|None): Activation applied to the output.",
             "name (basestring|None): Name of the output."
         ])
+
+
+def _logical_op(op_name, x, y, out=None, name=None, binary_op=True):
+    helper = LayerHelper(op_name, **locals())
+
+    if binary_op:
+        assert x.dtype == y.dtype
+
+    if out is None:
+        if name is None:
+            out = helper.create_tmp_variable(dtype=x.dtype)
+        else:
+            out = helper.create_variable(
+                name=name, dtype=x.dtype, persistable=False)
+
+    if binary_op:
+        helper.append_op(
+            type=op_name, inputs={"X": x,
+                                  "Y": y}, outputs={"Out": out})
+    else:
+        helper.append_op(type=op_name, inputs={"X": x}, outputs={"Out": out})
+
+    return out
+
+
+@templatedoc()
+def logical_and(x, y, out=None, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        y(${y_type}): ${y_comment}
+        out(Tensor): Output tensor of logical operation.
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    return _logical_op(
+        op_name="logical_and", x=x, y=y, name=name, out=out, binary_op=True)
+
+
+@templatedoc()
+def logical_or(x, y, out=None, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        y(${y_type}): ${y_comment}
+        out(Tensor): Output tensor of logical operation.
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    return _logical_op(
+        op_name="logical_or", x=x, y=y, name=name, out=out, binary_op=True)
+
+
+@templatedoc()
+def logical_xor(x, y, out=None, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        y(${y_type}): ${y_comment}
+        out(Tensor): Output tensor of logical operation.
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    return _logical_op(
+        op_name="logical_xor", x=x, y=y, name=name, out=out, binary_op=True)
+
+
+@templatedoc()
+def logical_not(x, out=None, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        out(Tensor): Output tensor of logical operation.
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    return _logical_op(
+        op_name="logical_not", x=x, y=None, name=name, out=out, binary_op=False)
+
+
+@templatedoc()
+def clip(x, min, max, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        min(${min_type}): ${min_comment}
+        max(${max_type}): ${max_comment}
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    helper = LayerHelper("clip", **locals())
+
+    if name is None:
+        out = helper.create_tmp_variable(dtype=x.dtype)
+    else:
+        out = helper.create_variable(
+            name=name, dtype=x.dtype, persistable=False)
+
+    helper.append_op(
+        type="clip",
+        inputs={"X": x},
+        attrs={"min": min,
+               "max": max},
+        outputs={"Out": out})
+
+    return out
+
+
+@templatedoc()
+def clip_by_norm(x, max_norm, name=None):
+    """
+    ${comment}
+
+    Args:
+        x(${x_type}): ${x_comment}
+        max_norm(${max_norm_type}): ${max_norm_comment}
+        name(basestring|None): Name of the output.
+
+    Returns:
+        out(${out_type}): ${out_comment}
+    """
+
+    helper = LayerHelper("clip_by_norm", **locals())
+
+    if name is None:
+        out = helper.create_tmp_variable(dtype=x.dtype)
+    else:
+        out = helper.create_variable(
+            name=name, dtype=x.dtype, persistable=False)
+
+    helper.append_op(
+        type="clip_by_norm",
+        inputs={"X": x},
+        attrs={"max_norm": max_norm},
+        outputs={"Out": out})
+
+    return out
diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py
index 220d065f8f..824c5be0ff 100644
--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
@@ -39,12 +39,6 @@ __all__ = [
     'mean',
     'mul',
     'sigmoid_cross_entropy_with_logits',
-    'clip',
-    'clip_by_norm',
-    'logical_and',
-    'logical_or',
-    'logical_xor',
-    'logical_not',
     'maxout',
 ]
 
diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
index c0989ca709..43188bfefa 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
@@ -22,7 +22,7 @@ class TestDistSeResneXt2x2(TestDistBase):
         self._sync_mode = True
         self._use_reader_alloc = False
 
-    def test_dist_train(self):
+    def no_test_dist_train(self):
         self.check_with_place("dist_se_resnext.py", delta=100)
 
 
@@ -40,7 +40,7 @@ class TestDistSeResneXt2x2Async(TestDistBase):
         self._sync_mode = False
         self._use_reader_alloc = False
 
-    def test_dist_train(self):
+    def no_test_dist_train(self):
         self.check_with_place("dist_se_resnext.py", delta=100)