Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into pixel_softmax_layer

8 years ago · 3aa679814f
parent 29f25fbe03 6398c15c7f
commit 3aa679814f
59 changed files with 1045 additions and 198 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -21,3 +21,10 @@
    sha: 28c0ea8a67a3e2dbbf4822ef44e85b63a0080a29
    hooks:
    -   id: clang-formater
 -   repo: https://github.com/dnephin/pre-commit-golang
    sha: e4693a4c282b4fc878eda172a929f7a6508e7d16
    hooks:
      -   id: go-fmt
          files: (.*\.go)
      -   id: go-lint
          files: (.*\.go)
--- a/.travis.yml
+++ b/.travis.yml
@ -33,16 +33,17 @@ addons:
      - ccache
 before_install:
  - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
-  # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python 
+  # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python
  # protobuf version.
  - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker
  - pip install rarfile
  - curl https://glide.sh/get | bash
  - eval "$(GIMME_GO_VERSION=1.8.3 gimme)"
  - |
    function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
 script:
  - |
-    export WITH_GOLANG=ON && timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout
+    timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout
    RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi;
 notifications:
  email:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -16,6 +16,7 @@ cmake_minimum_required(VERSION 3.0)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
 set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR})
 include(system)
--- a/cmake/external/glog.cmake
+++ b/cmake/external/glog.cmake
@ -38,12 +38,14 @@ ExternalProject_Add(
    CMAKE_ARGS      -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
    CMAKE_ARGS      -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
    CMAKE_ARGS      -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
    CMAKE_ARGS      -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib
    CMAKE_ARGS      -DCMAKE_POSITION_INDEPENDENT_CODE=ON
    CMAKE_ARGS      -DWITH_GFLAGS=ON
    CMAKE_ARGS      -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
    CMAKE_ARGS      -DBUILD_TESTING=OFF
    CMAKE_ARGS      -DCMAKE_BUILD_TYPE=Release
    CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
                     -DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
                     -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
                     -DCMAKE_BUILD_TYPE:STRING=Release
 )
--- a/cmake/external/protobuf.cmake
+++ b/cmake/external/protobuf.cmake
@ -17,6 +17,65 @@ INCLUDE(ExternalProject)
 FIND_PACKAGE(Protobuf QUIET)
 SET(PROTOBUF_FOUND "OFF")
 if(NOT COMMAND protobuf_generate_python)  # before cmake 3.4, protobuf_genrerate_python is not defined.
    function(protobuf_generate_python SRCS)
        # shameless copy from https://github.com/Kitware/CMake/blob/master/Modules/FindProtobuf.cmake
        if(NOT ARGN)
            message(SEND_ERROR "Error: PROTOBUF_GENERATE_PYTHON() called without any proto files")
            return()
        endif()
        if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
            # Create an include path for each file specified
            foreach(FIL ${ARGN})
                get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
                get_filename_component(ABS_PATH ${ABS_FIL} PATH)
                list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
                if(${_contains_already} EQUAL -1)
                    list(APPEND _protobuf_include_path -I ${ABS_PATH})
                endif()
            endforeach()
        else()
            set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
        endif()
        if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS)
            set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}")
        endif()
        if(DEFINED Protobuf_IMPORT_DIRS)
            foreach(DIR ${Protobuf_IMPORT_DIRS})
                get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
                list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
                if(${_contains_already} EQUAL -1)
                    list(APPEND _protobuf_include_path -I ${ABS_PATH})
                endif()
            endforeach()
        endif()
        set(${SRCS})
        foreach(FIL ${ARGN})
            get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
            get_filename_component(FIL_WE ${FIL} NAME_WE)
            if(NOT PROTOBUF_GENERATE_CPP_APPEND_PATH)
                get_filename_component(FIL_DIR ${FIL} DIRECTORY)
                if(FIL_DIR)
                    set(FIL_WE "${FIL_DIR}/${FIL_WE}")
                endif()
            endif()
            list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py")
            add_custom_command(
                    OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}_pb2.py"
                    COMMAND  ${Protobuf_PROTOC_EXECUTABLE} --python_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL}
                    DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
                    COMMENT "Running Python protocol buffer compiler on ${FIL}"
                    VERBATIM )
        endforeach()
        set(${SRCS} ${${SRCS}} PARENT_SCOPE)
    endfunction()
 endif()
 # Print and set the protobuf library information,
 # finish this cmake process and exit from this file.
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@ -88,7 +88,7 @@
 #
 # including binary directory for generated headers.
-include_directories(${CMAKE_BINARY_DIR})
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
 if(NOT APPLE)
    find_package(Threads REQUIRED)
@ -99,25 +99,44 @@ function(merge_static_libs TARGET_NAME)
  set(libs ${ARGN})
  list(REMOVE_DUPLICATES libs)
-  # First get the file names of the libraries to be merged
+  # Get all propagation dependencies from the merged libraries
  foreach(lib ${libs})
-    set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
+    list(APPEND libs_deps ${${lib}_LIB_DEPENDS})
  endforeach()
  if(APPLE) # Use OSX's libtool to merge archives
    # To produce a library we need at least one source file.
    # It is created by add_custom_command below and will helps
    # also help to track dependencies.
    set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
    # Make the generated dummy source file depended on all static input
    # libs. If input lib changes,the source file is touched
    # which causes the desired effect (relink).
    add_custom_command(OUTPUT ${dummyfile}
      COMMAND ${CMAKE_COMMAND} -E touch ${dummyfile}
      DEPENDS ${libs})
    # Generate dummy staic lib
    file(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";")
    add_library(${TARGET_NAME} STATIC ${dummyfile})
    target_link_libraries(${TARGET_NAME} ${libs_deps})
    foreach(lib ${libs})
      # Get the file names of the libraries to be merged
      set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
    endforeach()
 		add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
      COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
      COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles})
-	else() # general UNIX: use "ar" to extract objects and re-add to a common lib
+  else() # general UNIX: use "ar" to extract objects and re-add to a common lib
    foreach(lib ${libs})
      set(objlistfile ${lib}.objlist) # list of objects in the input library
      set(objdir ${lib}.objdir)
      add_custom_command(OUTPUT ${objdir}
-        COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir})
+        COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
        DEPENDS ${lib})
      add_custom_command(OUTPUT ${objlistfile}
        COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
@ -134,18 +153,18 @@ function(merge_static_libs TARGET_NAME)
      list(APPEND mergebases "${mergebase}")
    endforeach()
    # We need a target for the output merged library
    add_library(${TARGET_NAME} STATIC ${mergebases})
    target_link_libraries(${TARGET_NAME} ${libs_deps})
    # Get the file name of the generated library
    set(outlibfile "$<TARGET_FILE:${TARGET_NAME}>")
    foreach(lib ${libs})
      add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
-      COMMAND ${CMAKE_AR} ru ${outlibfile} @"../${lib}.objlist"
+        COMMAND ${CMAKE_AR} cr ${outlibfile} *.o
-      WORKING_DIRECTORY ${lib}.objdir)
+        COMMAND ${CMAKE_RANLIB} ${outlibfile}
        WORKING_DIRECTORY ${lib}.objdir)
    endforeach()
    add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
      COMMAND ${CMAKE_RANLIB} ${outlibfile})
  endif()
 endfunction(merge_static_libs)
@ -194,7 +213,7 @@ function(cc_test TARGET_NAME)
    add_executable(${TARGET_NAME} ${cc_test_SRCS})
    target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main)
    add_dependencies(${TARGET_NAME} ${cc_test_DEPS} gtest gtest_main)
-    add_test(${TARGET_NAME} ${TARGET_NAME})
+    add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
  endif()
 endfunction(cc_test)
@ -281,10 +300,11 @@ function(go_library TARGET_NAME)
  file(GLOB GO_SOURCE RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.go")
  string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
  # FIXME: link path
  add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
    COMMAND rm "${${TARGET_NAME}_LIB_PATH}"
    # Golang build source code
-    COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
+    COMMAND GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
    -o "${${TARGET_NAME}_LIB_PATH}"
    "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${GO_SOURCE}"
    # must run under GOPATH
@ -299,11 +319,13 @@ function(go_binary TARGET_NAME)
  cmake_parse_arguments(go_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
  string(REPLACE "${PADDLE_GO_PATH}/" "" CMAKE_CURRENT_SOURCE_REL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
  # FIXME: link path
  add_custom_command(OUTPUT ${TARGET_NAME}_timestamp
-    COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
+      COMMAND env LIBRARY_PATH=${CMAKE_BINARY_DIR}/go/pserver/client/c/:$ENV{LIBRARY_PATH}
      GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build
    -o "${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}"
    "./${CMAKE_CURRENT_SOURCE_REL_DIR}/${go_binary_SRCS}"
-  WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go")
+    WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go")
  # TODO: don't know what ${TARGET_NAME}_link does
  add_custom_target(${TARGET_NAME} ALL DEPENDS go_vendor ${TARGET_NAME}_timestamp ${go_binary_DEPS})
  install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME} DESTINATION bin)
@ -332,3 +354,12 @@ function(proto_library TARGET_NAME)
  protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS})
  cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf)
 endfunction()
 function(py_proto_compile TARGET_NAME)
  set(oneValueArgs "")
  set(multiValueArgs SRCS)
  cmake_parse_arguments(py_proto_compile "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
  set(py_srcs)
  protobuf_generate_python(py_srcs ${py_proto_compile_SRCS})
  add_custom_target(${TARGET_NAME} ALL DEPENDS ${py_srcs})
 endfunction()
--- a/doc_theme/templates/layout.html
+++ b/doc_theme/templates/layout.html
@ -101,7 +101,7 @@
    </div>
    <div class="site-nav-links">
      <div class="site-menu">
-        <a class="fork-on-github" href="https://github.com/PaddlePaddle/Paddle" target="_blank"><i class="fa fa-github"></i>Folk me on Github</a>
+        <a class="fork-on-github" href="https://github.com/PaddlePaddle/Paddle" target="_blank"><i class="fa fa-github"></i>Fork me on Github</a>
        <div class="language-switcher dropdown">
          <a type="button" data-toggle="dropdown">
            <span>English</span>
--- a/go/cmd/master/CMakeLists.txt
+++ b/go/cmd/master/CMakeLists.txt
@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-go_binary(master SRC master.go)
+go_binary(master SRC master.go DEPS paddle_go_optimizer)
--- a/go/cmd/pserver/CMakeLists.txt
+++ b/go/cmd/pserver/CMakeLists.txt
@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-go_binary(pserver SRCS pserver.go)
+go_binary(pserver SRCS pserver.go DEPS paddle_go_optimizer)
--- a/go/master/c/client.go
+++ b/go/master/c/client.go
@ -104,11 +104,22 @@ func paddle_set_dataset(client C.paddle_master_client, path **C.char, size C.int
 	return C.PADDLE_MASTER_OK
 }
 // return value:
 //     0:ok
 //    -1:error
 //export paddle_next_record
 func paddle_next_record(client C.paddle_master_client, record **C.uchar) C.int {
 	c := get(client)
-	r := c.NextRecord()
+	r, err := c.NextRecord()
 	if err != nil {
 		// Error
 		// TODO: return the type of error?
 		*record = (*C.uchar)(nullPtr)
 		return -1
 	}
 	if len(r) == 0 {
 		// Empty record
 		*record = (*C.uchar)(nullPtr)
 		return 0
 	}
--- a/go/master/client.go
+++ b/go/master/client.go
@ -11,7 +11,12 @@ import (
 // Client is the client of the master server.
 type Client struct {
 	conn *connection.Conn
-	ch   chan []byte
+	ch   chan record
 }
 type record struct {
 	r   []byte
 	err error
 }
 // NewClient creates a new Client.
@ -21,7 +26,7 @@ type Client struct {
 func NewClient(addrCh <-chan string, bufSize int) *Client {
 	c := &Client{}
 	c.conn = connection.New()
-	c.ch = make(chan []byte, bufSize)
+	c.ch = make(chan record, bufSize)
 	go c.monitorMaster(addrCh)
 	go c.getRecords()
 	return c
@ -46,10 +51,11 @@ func (c *Client) getRecords() {
 			s := recordio.NewRangeScanner(f, &chunk.Index, -1, -1)
 			for s.Scan() {
-				c.ch <- s.Record()
+				c.ch <- record{s.Record(), nil}
 			}
 			if s.Err() != nil {
 				c.ch <- record{nil, s.Err()}
 				log.Errorln(err, chunk.Path)
 			}
@ -116,6 +122,7 @@ func (c *Client) taskFinished(taskID int) error {
 //
 // NextRecord will block until the next record is available. It is
 // thread-safe.
-func (c *Client) NextRecord() []byte {
+func (c *Client) NextRecord() ([]byte, error) {
-	return <-c.ch
+	r := <-c.ch
 	return r.r, r.err
 }
--- a/go/master/client_test.go
+++ b/go/master/client_test.go
@ -68,12 +68,17 @@ func TestNextRecord(t *testing.T) {
 	for pass := 0; pass < 50; pass++ {
 		received := make(map[byte]bool)
 		for i := 0; i < total; i++ {
-			r := c.NextRecord()
+			r, err := c.NextRecord()
 			if err != nil {
 				t.Fatal(pass, i, "Read error:", err)
 			}
 			if len(r) != 1 {
-				t.Fatal("Length should be 1.", r)
+				t.Fatal(pass, i, "Length should be 1.", r)
 			}
 			if received[r[0]] {
-				t.Fatal("Received duplicate.", received, r)
+				t.Fatal(pass, i, "Received duplicate.", received, r)
 			}
 			received[r[0]] = true
 		}
--- a/go/pserver/client/c/CMakeLists.txt
+++ b/go/pserver/client/c/CMakeLists.txt
@ -1,5 +1,8 @@
 cc_library(paddle_go_optimizer DEPS paddle_optimizer paddle_proto glog gflags protobuf)
 target_link_libraries(paddle_go_optimizer stdc++ m)
 go_library(paddle_pserver_cclient STATIC DEPS paddle_go_optimizer)
 if(WITH_TESTING)
-  add_subdirectory(test)
+  # FIXME: this test requires pserver which is not managed by the test
  # we need some kind of e2e testing machanism.
  # add_subdirectory(test)
 endif()
--- a/go/pserver/client/c/test/CMakeLists.txt
+++ b/go/pserver/client/c/test/CMakeLists.txt
@ -1,2 +1,2 @@
-cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient)
+cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer)
 add_style_check_target(test_cclient test_cclient.c)
--- a/go/pserver/optimizer.go
+++ b/go/pserver/optimizer.go
@ -2,7 +2,7 @@ package pserver
 // #cgo CFLAGS: -I ../../
 // //FIXME: ldflags contain "build" path
-// #cgo LDFLAGS: ../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ -lm
+// #cgo LDFLAGS: ${SRCDIR}/../../build/go/pserver/client/c/libpaddle_go_optimizer.a -lstdc++ -lm
 // #include "paddle/optimizer/optimizer.h"
 // #include <stdlib.h>
 // #include <string.h>
@ -56,8 +56,8 @@ func newOptimizer(paramWithConfigs ParameterWithConfig) *optimizer {
 func (o *optimizer) GetWeights() []byte {
 	var buffer unsafe.Pointer
-	buffer_len := C.paddle_optimizer_get_weights(o.opt, &buffer)
+	bufferLen := C.paddle_optimizer_get_weights(o.opt, &buffer)
-	return cArrayToSlice(buffer, int(buffer_len)*C.sizeof_float)
+	return cArrayToSlice(buffer, int(bufferLen)*C.sizeof_float)
 }
 func (o *optimizer) UpdateParameter(g Gradient) error {
--- a/go/pserver/service.go
+++ b/go/pserver/service.go
@ -10,8 +10,10 @@ import (
 type ElementType int
 const (
 	// AlreadyInitialized is true if pserver is initialized
 	AlreadyInitialized = "pserver already initialized"
-	Uninitialized      = "pserver not fully initialized"
+	// Uninitialized is true if pserver not fully initialized
 	Uninitialized = "pserver not fully initialized"
 )
 // Supported element types
@ -55,7 +57,7 @@ func NewService(idx int) (*Service, error) {
 	s := &Service{
 		idx: idx,
 	}
-  s.optMap = make(map[string]*optimizer)
+	s.optMap = make(map[string]*optimizer)
 	s.initialized = make(chan struct{})
 	return s, nil
 }
--- a/paddle/api/CMakeLists.txt
+++ b/paddle/api/CMakeLists.txt
@ -66,6 +66,7 @@ SWIG_LINK_LIBRARIES(swig_paddle
    paddle_trainer_lib
    paddle_network
    paddle_parameter
    paddle_optimizer
    paddle_math
    paddle_utils
    paddle_proto
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@ -9,6 +9,10 @@ cc_test(enforce_test SRCS enforce_test.cc)
 proto_library(attr_type SRCS attr_type.proto)
 proto_library(op_proto SRCS op_proto.proto DEPS attr_type)
 cc_test(op_proto_test SRCS op_proto_test.cc DEPS op_proto protobuf)
 proto_library(op_desc SRCS op_desc.proto DEPS attr_type)
 cc_test(op_desc_test SRCS op_desc_test.cc DEPS op_desc protobuf)
 cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_proto op_desc)
 py_proto_compile(framework_py_proto SRCS attr_type.proto op_proto.proto op_desc.proto)
 # Generate an empty __init__.py to make framework_py_proto as a valid python module.
 add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(framework_py_proto framework_py_proto_init)
--- a/paddle/framework/attr_checker.h
+++ b/paddle/framework/attr_checker.h
@ -0,0 +1,119 @@
 #pragma once
 #include <boost/variant.hpp>
 #include <functional>
 #include <string>
 #include <unordered_map>
 #include <vector>
 #include "paddle/framework/enforce.h"
 namespace paddle {
 namespace framework {
 typedef boost::variant<boost::blank, int, float, std::string, std::vector<int>,
                       std::vector<float>, std::vector<std::string>>
    Attribute;
 typedef std::unordered_map<std::string, Attribute> AttributeMap;
 // check whether a value(attribute) fit a certain limit
 template <typename T>
 class LargerThanChecker {
 public:
  LargerThanChecker(T lower_bound) : lower_bound_(lower_bound) {}
  void operator()(T& value) const {
    PADDLE_ENFORCE(value > lower_bound_, "larger_than check fail");
  }
 private:
  T lower_bound_;
 };
 // we can provide users more common Checker, like 'LessThanChecker',
 // 'BetweenChecker'...
 template <typename T>
 class DefaultValueSetter {
 public:
  DefaultValueSetter(T default_value) : default_value_(default_value) {}
  void operator()(T& value) const { value = default_value_; }
 private:
  T default_value_;
 };
 // check whether a certain attribute fit its limits
 // an attribute can have more than one limits
 template <typename T>
 class TypedAttrChecker {
  typedef std::function<void(T&)> ValueChecker;
 public:
  TypedAttrChecker(const std::string& attr_name) : attr_name_(attr_name) {}
  TypedAttrChecker& LargerThan(const T& lower_bound) {
    value_checkers_.push_back(LargerThanChecker<T>(lower_bound));
    return *this;
  }
  // we can add more common limits, like LessThan(), Between()...
  TypedAttrChecker& SetDefault(const T& default_value) {
    PADDLE_ENFORCE(default_value_setter_.empty(),
                   "%s can't have more than one default value!", attr_name_);
    default_value_setter_.push_back(DefaultValueSetter<T>(default_value));
    return *this;
  }
  // allow users provide their own checker
  TypedAttrChecker& AddCustomChecker(const ValueChecker& checker) {
    value_checkers_.push_back(checker);
    return *this;
  }
  void operator()(AttributeMap& attr_map) const {
    if (!attr_map.count(attr_name_)) {
      // user do not set this attr
      PADDLE_ENFORCE(!default_value_setter_.empty(),
                     "Attribute '%s' is required!", attr_name_);
      // default_value_setter_ has no more than one element
      T val;
      (default_value_setter_[0])(val);
      attr_map[attr_name_] = val;
    }
    Attribute& attr = attr_map.at(attr_name_);
    T& attr_value = boost::get<T>(attr);
    for (const auto& checker : value_checkers_) {
      checker(attr_value);
    }
  }
 private:
  std::string attr_name_;
  std::vector<ValueChecker> value_checkers_;
  std::vector<ValueChecker> default_value_setter_;
 };
 // check whether op's all attributes fit their own limits
 class OpAttrChecker {
  typedef std::function<void(AttributeMap&)> AttrChecker;
 public:
  template <typename T>
  TypedAttrChecker<T>& AddAttrChecker(const std::string& attr_name) {
    attr_checkers_.push_back(TypedAttrChecker<T>(attr_name));
    AttrChecker& checker = attr_checkers_.back();
    return *(checker.target<TypedAttrChecker<T>>());
  }
  void Check(AttributeMap& attr_map) const {
    for (const auto& checker : attr_checkers_) {
      checker(attr_map);
    }
  }
 private:
  std::vector<AttrChecker> attr_checkers_;
 };
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/op_registry.h
+++ b/paddle/framework/op_registry.h
--- a/paddle/framework/op_registry_test.cc
+++ b/paddle/framework/op_registry_test.cc
@ -0,0 +1,122 @@
 #include "paddle/framework/op_registry.h"
 #include <gtest/gtest.h>
 TEST(OpRegistry, CreateOp) {
  paddle::framework::OpDesc op_desc;
  op_desc.set_type("cos_sim");
  op_desc.add_inputs("aa");
  op_desc.add_outputs("bb");
  auto attr = op_desc.mutable_attrs()->Add();
  attr->set_name("scale");
  attr->set_type(paddle::framework::AttrType::FLOAT);
  attr->set_f(3.3);
  paddle::framework::OpBase* op =
      paddle::framework::OpRegistry::CreateOp(op_desc);
  std::string debug_str = op->Run();
  std::string str = "CosineOp runs! scale = " + std::to_string(3.3);
  ASSERT_EQ(str.size(), debug_str.size());
  for (size_t i = 0; i < debug_str.length(); ++i) {
    ASSERT_EQ(debug_str[i], str[i]);
  }
 }
 TEST(OpRegistry, IllegalAttr) {
  paddle::framework::OpDesc op_desc;
  op_desc.set_type("cos_sim");
  op_desc.add_inputs("aa");
  op_desc.add_outputs("bb");
  auto attr = op_desc.mutable_attrs()->Add();
  attr->set_name("scale");
  attr->set_type(paddle::framework::AttrType::FLOAT);
  attr->set_f(-2.0);
  bool caught = false;
  try {
    paddle::framework::OpBase* op __attribute__((unused)) =
        paddle::framework::OpRegistry::CreateOp(op_desc);
  } catch (paddle::framework::EnforceNotMet err) {
    caught = true;
    std::string msg = "larger_than check fail";
    const char* err_msg = err.what();
    for (size_t i = 0; i < msg.length(); ++i) {
      ASSERT_EQ(err_msg[i], msg[i]);
    }
  }
  ASSERT_TRUE(caught);
 }
 TEST(OpRegistry, DefaultValue) {
  paddle::framework::OpDesc op_desc;
  op_desc.set_type("cos_sim");
  op_desc.add_inputs("aa");
  op_desc.add_outputs("bb");
  paddle::framework::OpBase* op =
      paddle::framework::OpRegistry::CreateOp(op_desc);
  std::string debug_str = op->Run();
  float default_value = 1.0;
  std::string str = "CosineOp runs! scale = " + std::to_string(default_value);
  ASSERT_EQ(str.size(), debug_str.size());
  for (size_t i = 0; i < debug_str.length(); ++i) {
    ASSERT_EQ(debug_str[i], str[i]);
  }
 }
 TEST(OpRegistry, CustomChecker) {
  paddle::framework::OpDesc op_desc;
  op_desc.set_type("my_test_op");
  op_desc.add_inputs("ii");
  op_desc.add_outputs("oo");
  // attr 'test_attr' is not set
  bool caught = false;
  try {
    paddle::framework::OpBase* op __attribute__((unused)) =
        paddle::framework::OpRegistry::CreateOp(op_desc);
  } catch (paddle::framework::EnforceNotMet err) {
    caught = true;
    std::string msg = "Attribute 'test_attr' is required!";
    const char* err_msg = err.what();
    for (size_t i = 0; i < msg.length(); ++i) {
      ASSERT_EQ(err_msg[i], msg[i]);
    }
  }
  ASSERT_TRUE(caught);
  // set 'test_attr' set to an illegal value
  auto attr = op_desc.mutable_attrs()->Add();
  attr->set_name("test_attr");
  attr->set_type(paddle::framework::AttrType::INT);
  attr->set_i(3);
  caught = false;
  try {
    paddle::framework::OpBase* op __attribute__((unused)) =
        paddle::framework::OpRegistry::CreateOp(op_desc);
  } catch (paddle::framework::EnforceNotMet err) {
    caught = true;
    std::string msg = "'test_attr' must be even!";
    const char* err_msg = err.what();
    for (size_t i = 0; i < msg.length(); ++i) {
      ASSERT_EQ(err_msg[i], msg[i]);
    }
  }
  ASSERT_TRUE(caught);
  // set 'test_attr' set to a legal value
  op_desc.mutable_attrs()->Clear();
  attr = op_desc.mutable_attrs()->Add();
  attr->set_name("test_attr");
  attr->set_type(paddle::framework::AttrType::INT);
  attr->set_i(4);
  paddle::framework::OpBase* op =
      paddle::framework::OpRegistry::CreateOp(op_desc);
  std::string debug_str = op->Run();
  std::string str = "MyTestOp runs! test_attr = " + std::to_string(4);
  ASSERT_EQ(str.size(), debug_str.size());
  for (size_t i = 0; i < debug_str.length(); ++i) {
    ASSERT_EQ(debug_str[i], str[i]);
  }
 }
--- a/paddle/gserver/layers/AverageLayer.h
+++ b/paddle/gserver/layers/AverageLayer.h
@ -25,6 +25,10 @@ namespace paddle {
 * If SequenceLevel = kNonSeq:
 *    Output: output size is the number of input sequences (NOT input instances)
 *    output[i] = average_{for each instance in this sequence}{input[i]}
 *    If stride_ > 0:
 *      Output: a shorten sequence. Stride is the step size by which we slide a
 *              window upon the input sequence, and the average pooling
 *              operation is then applied to each interval independently.
 * If SequenceLevel = kSeq:
 *    Check input sequence must has sub-sequence
 *    Output: output size is the number of input sub-sequences
--- a/paddle/gserver/layers/CrossChannelNormLayer.cpp
+++ b/paddle/gserver/layers/CrossChannelNormLayer.cpp
@ -36,6 +36,16 @@ MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data,
      data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_);
 }
 bool CrossChannelNormLayer::init(const LayerMap& layerMap,
                                 const ParameterMap& parameterMap) {
  Layer::init(layerMap, parameterMap);
  CHECK(parameters_[0]);
  const NormConfig& conf = config_.inputs(0).norm_conf();
  channels_ = conf.channels();
  scale_.reset(new Weight(channels_, 1, parameters_[0]));
  return true;
 }
 void CrossChannelNormLayer::forward(PassType passType) {
  Layer::forward(passType);
  MatrixPtr inV = getInputValue(0);
@ -51,9 +61,7 @@ void CrossChannelNormLayer::forward(PassType passType) {
  Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_);
  Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_);
  Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_);
-  normBuffer_->zeroMem();
+
  // add eps to avoid overflow
  normBuffer_->addScalar(*normBuffer_, 1e-6);
  inV->square2(*dataBuffer_);
  for (size_t i = 0; i < batchSize; i++) {
    const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim);
@ -63,6 +71,8 @@ void CrossChannelNormLayer::forward(PassType passType) {
    // compute norm.
    spatialBuffer_->sumCols(*dataTmp, 1, 0);
    // add eps to avoid overflow
    spatialBuffer_->add(1e-6);
    spatialBuffer_->sqrt2(*spatialBuffer_);
    normTmp->copyFrom(*spatialBuffer_);
    outVTmp->copyFrom(*inVTmp);
@ -82,6 +92,9 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) {
  size_t dataDim = inG->getWidth();
  size_t spatialDim = dataDim / channels_;
  MatrixPtr inGBuffer;
  Matrix::resizeOrCreate(inGBuffer, channels_, spatialDim, false, useGpu_);
  dataBuffer_->dotMul(*outG, *outV);
  Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_);
  Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_);
@ -100,22 +113,24 @@ void CrossChannelNormLayer::backward(const UpdateCallback& callback) {
    scaleDiff_->add(*channelBuffer_, 1.);
    sampleBuffer_->dotMul(*inVTmp, *outGTmp);
-    spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.);
+    spatialBuffer_->sumCols(*sampleBuffer_, 1., 0.);
    // scale the grad
-    inGTmp->copyFrom(*inVTmp);
+    inGBuffer->copyFrom(*inVTmp);
-    inGTmp->mulRowVector(*spatialBuffer_);
+    inGBuffer->mulRowVector(*spatialBuffer_);
    // divide by square of norm
    spatialBuffer_->dotMul(*normTmp, *normTmp);
-    inGTmp->divRowVector(*spatialBuffer_);
+    inGBuffer->divRowVector(*spatialBuffer_);
    // subtract
-    inGTmp->add(*outGTmp, -1, 1);
+    inGBuffer->add(*outGTmp, -1, 1);
    // divide by norm
-    inGTmp->divRowVector(*normTmp);
+    inGBuffer->divRowVector(*normTmp);
    // scale the diff
-    inGTmp->mulColVector(*scale_->getW());
+    inGBuffer->mulColVector(*scale_->getW());
    inGTmp->add(*inGBuffer);
  }
  // updata scale
-  if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_);
+  if (scale_->getWGrad()) scale_->getWGrad()->add(*scaleDiff_);
  scale_->getParameterPtr()->incUpdate(callback);
 }
--- a/paddle/gserver/layers/MaxLayer.h
+++ b/paddle/gserver/layers/MaxLayer.h
@ -26,6 +26,10 @@ namespace paddle {
 * If SequenceLevel = kNonSeq:
 *    Output: output size is the number of input sequences (NOT input instances)
 *    output[i] = max_{for each instance in this sequence}{input[i]}
 *    If stride_ > 0:
 *      Output: a shorten sequence. Stride is the step size by which we slide a
 *              window upon the input sequence, and the max pooling operation is
 *              then applied to each interval independently.
 * If SequenceLevel = kSeq:
 *    Check input sequence must has sub-sequence
 *    Output: output size is the number of input sub-sequences
--- a/paddle/gserver/layers/NormLayer.cpp
+++ b/paddle/gserver/layers/NormLayer.cpp
@ -56,14 +56,4 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
  return true;
 }
 bool CrossChannelNormLayer::init(const LayerMap& layerMap,
                                 const ParameterMap& parameterMap) {
  Layer::init(layerMap, parameterMap);
  CHECK(parameters_[0]);
  const NormConfig& conf = config_.inputs(0).norm_conf();
  channels_ = conf.channels();
  scale_.reset(new Weight(channels_, 1, parameters_[0]));
  return true;
 }
 }  // namespace paddle
--- a/Show More
+++ b/Show More
`@ -1,2 +1,2 @@`
	`cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient)`	`cc_test(test_cclient SRCS test_cclient.c DEPS paddle_pserver_cclient paddle_go_optimizer)`
	`add_style_check_target(test_cclient test_cclient.c)`	`add_style_check_target(test_cclient test_cclient.c)`