diff --git a/.travis.yml b/.travis.yml index f9b4a7e083..64961adcf2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,23 +1,22 @@ +group: deprecated-2017Q2 language: cpp cache: directories: - - $HOME/third_party - $HOME/.ccache - $HOME/.cache/pip + - $TRAVIS_BUILD_DIR/build/third_party sudo: required dist: trusty os: - linux env: - - JOB=DOCS - - JOB=BUILD_AND_TEST - - JOB=PRE_COMMIT + - JOB=build_doc + - JOB=check_style addons: apt: packages: - gcc-4.8 - g++-4.8 - - gfortran-4.8 - git - build-essential - python @@ -34,18 +33,7 @@ addons: - libtool - ccache before_install: - - | - if [ ${JOB} == "BUILD_AND_TEST" ]; then - local change_list=`git diff --name-only $TRAVIS_COMMIT_RANGE` - if [ $? -eq 0 ]; then # if git diff return no zero, then rerun unit test. - if ! echo ${change_list} | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)' - then - echo "Only markdown docs were updated, stopping build process." - exit - fi - fi - fi - - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi + - if [[ "$JOB" == "check_style" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi # Paddle is using protobuf 3.1 currently. Protobuf 3.2 breaks the compatibility. So we specify the python # protobuf version. - pip install numpy wheel 'protobuf==3.1' sphinx==1.5.6 recommonmark sphinx-rtd-theme==0.1.9 virtualenv pre-commit requests==2.9.2 LinkChecker @@ -54,8 +42,8 @@ before_install: - | function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } script: - - | - timeout 2580 paddle/scripts/travis/main.sh # 43min timeout + - | + timeout 2580 paddle/scripts/travis/${JOB}.sh # 43min timeout RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else false; fi; notifications: email: diff --git a/CMakeLists.txt b/CMakeLists.txt index c5d7f2c7ec..3c719d35ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ if(ANDROID) "Disable RDMA when cross-compiling for Android" FORCE) endif(ANDROID) -set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING +set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING "A path setting third party libraries download & build directories.") if (WITH_C_API AND WITH_PYTHON) diff --git a/Dockerfile b/Dockerfile index 39af60966b..bf227737c5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,7 +25,7 @@ COPY ./paddle/scripts/docker/root/ /root/ RUN apt-get update && \ apt-get install -y \ git python-pip python-dev openssh-server bison \ - wget unzip tar xz-utils bzip2 gzip coreutils \ + wget unzip tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ python-numpy python-matplotlib gcc g++ \ automake locales clang-format-3.8 swig doxygen cmake \ diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 2341e3785b..5b9d9844ed 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -21,7 +21,8 @@ IF(NOT ${CBLAS_FOUND}) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) - SET(CBLAS_LIBRARIES "${CBLAS_INSTALL_DIR}/lib/${LIBRARY_PREFIX}openblas${STATIC_LIBRARY_SUFFIX}" + SET(CBLAS_LIBRARIES + "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE FILEPATH "openblas library." FORCE) SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 7340394b1e..d43badc1da 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -14,11 +14,41 @@ INCLUDE(ExternalProject) +# Print and set the protobuf library information, +# finish this cmake process and exit from this file. macro(PROMPT_PROTOBUF_LIB) + SET(protobuf_DEPS ${ARGN}) + MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}") MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}") MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}") INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) + + # Assuming that all the protobuf libraries are of the same type. + IF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$") + SET(protobuf_LIBTYPE STATIC) + ELSEIF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$") + SET(protobuf_LIBTYPE SHARED) + ELSE() + MESSAGE(FATAL_ERROR "Unknown library type: ${PROTOBUF_LIBRARY}") + ENDIF() + + ADD_LIBRARY(protobuf ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET protobuf PROPERTY IMPORTED_LOCATION ${PROTOBUF_LIBRARY}) + + ADD_LIBRARY(protobuf_lite ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET protobuf_lite PROPERTY IMPORTED_LOCATION ${PROTOBUF_LITE_LIBRARY}) + + ADD_LIBRARY(protoc ${protobuf_LIBTYPE} IMPORTED GLOBAL) + SET_PROPERTY(TARGET protoc PROPERTY IMPORTED_LOCATION ${PROTOC_LIBRARY}) + + FOREACH(dep ${protobuf_DEPS}) + ADD_DEPENDENCIES(protobuf ${dep}) + ADD_DEPENDENCIES(protobuf_lite ${dep}) + ADD_DEPENDENCIES(protoc ${dep}) + ENDFOREACH() + + LIST(APPEND external_project_dependencies protobuf) RETURN() endmacro() macro(SET_PROTOBUF_VERSION) @@ -43,22 +73,23 @@ if (NOT "${PROTOBUF_ROOT}" STREQUAL "") endif() FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) - SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/${TARGET_NAME}) - SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_NAME}) + STRING(REPLACE "extern_" "" TARGET_DIR_NAME "${TARGET_NAME}") + SET(PROTOBUF_SOURCES_DIR ${THIRD_PARTY_PATH}/${TARGET_DIR_NAME}) + SET(PROTOBUF_INSTALL_DIR ${THIRD_PARTY_PATH}/install/${TARGET_DIR_NAME}) SET(${TARGET_NAME}_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) SET(PROTOBUF_INCLUDE_DIR "${PROTOBUF_INSTALL_DIR}/include" PARENT_SCOPE) SET(${TARGET_NAME}_LITE_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${STATIC_LIBRARY_SUFFIX}" + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite${CMAKE_STATIC_LIBRARY_SUFFIX}" PARENT_SCOPE) SET(${TARGET_NAME}_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${STATIC_LIBRARY_SUFFIX}" + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX}" PARENT_SCOPE) SET(${TARGET_NAME}_PROTOC_LIBRARY - "${PROTOBUF_INSTALL_DIR}/lib/libprotoc${STATIC_LIBRARY_SUFFIX}" + "${PROTOBUF_INSTALL_DIR}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX}" PARENT_SCOPE) SET(${TARGET_NAME}_PROTOC_EXECUTABLE - "${PROTOBUF_INSTALL_DIR}/bin/protoc${EXECUTABLE_SUFFIX}" + "${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}" PARENT_SCOPE) SET(OPTIONAL_CACHE_ARGS "") @@ -109,6 +140,8 @@ IF(NOT CMAKE_CROSSCOMPILING) SET_PROTOBUF_VERSION() IF("${PROTOBUF_VERSION}" VERSION_LESS "3.1.0") SET(PROTOBUF_FOUND OFF) + ELSE() + PROMPT_PROTOBUF_LIB() ENDIF() ENDIF(PROTOBUF_FOUND) ELSE() @@ -120,18 +153,22 @@ ELSE() ENDIF() IF(NOT PROTOBUF_FOUND) - build_protobuf(protobuf FALSE) - LIST(APPEND external_project_dependencies protobuf) + build_protobuf(extern_protobuf FALSE) - SET(PROTOBUF_INCLUDE_DIR ${protobuf_INCLUDE_DIR} + SET(PROTOBUF_INCLUDE_DIR ${extern_protobuf_INCLUDE_DIR} CACHE PATH "protobuf include directory." FORCE) - IF(NOT CMAKE_CROSSCOMPILING) - SET(PROTOBUF_PROTOC_EXECUTABLE ${protobuf_PROTOC_EXECUTABLE} + SET(PROTOBUF_LITE_LIBRARY ${extern_protobuf_LITE_LIBRARY} + CACHE FILEPATH "protobuf lite library." FORCE) + SET(PROTOBUF_LIBRARY ${extern_protobuf_LIBRARY} + CACHE FILEPATH "protobuf library." FORCE) + SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY} + CACHE FILEPATH "protoc library." FORCE) + + IF(CMAKE_CROSSCOMPILING) + PROMPT_PROTOBUF_LIB(protobuf_host extern_protobuf) + ELSE() + SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE} CACHE FILEPATH "protobuf executable." FORCE) + PROMPT_PROTOBUF_LIB(extern_protobuf) ENDIF() - SET(PROTOBUF_LITE_LIBRARY ${protobuf_LITE_LIBRARY} CACHE FILEPATH "protobuf lite library." FORCE) - SET(PROTOBUF_LIBRARY ${protobuf_LIBRARY} CACHE FILEPATH "protobuf library." FORCE) - SET(PROTOBUF_PROTOC_LIBRARY ${protobuf_PROTOC_LIBRARY} CACHE FILEPATH "protoc library." FORCE) ENDIF(NOT PROTOBUF_FOUND) - -PROMPT_PROTOBUF_LIB() \ No newline at end of file diff --git a/cmake/system.cmake b/cmake/system.cmake index 904652413e..3b5cbfdd63 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -84,24 +84,6 @@ IF(DEFINED CMAKE_SYSTEM_NAME) ENDIF() ENDIF() -# prefix and suffix on different os -IF(WIN32) - SET(LIBRARY_PREFIX "") - SET(SHARED_LIBRARY_SUFFIX ".dll") - SET(STATIC_LIBRARY_SUFFIX ".lib") - SET(EXECUTABLE_SUFFIX ".exe") -ELSE(WIN32) - SET(LIBRARY_PREFIX "lib") - IF(APPLE) - SET(SHARED_LIBRARY_SUFFIX ".dylib") - ELSE(APPLE) - SET(SHARED_LIBRARY_SUFFIX ".so") - ENDIF(APPLE) - - SET(STATIC_LIBRARY_SUFFIX ".a") - SET(EXECUTABLE_SUFFIX "") -ENDIF(WIN32) - # external dependencies log output SET(EXTERNAL_PROJECT_LOG_ARGS LOG_DOWNLOAD 0 # Wrap download in script to log output diff --git a/doc/api/v2/config/evaluators.rst b/doc/api/v2/config/evaluators.rst index 39db51fa4a..9ac972fb19 100644 --- a/doc/api/v2/config/evaluators.rst +++ b/doc/api/v2/config/evaluators.rst @@ -99,3 +99,12 @@ value_printer .. automodule:: paddle.v2.evaluator :members: value_printer :noindex: + +Detection +===== + +detection_map +------------- +.. automodule:: paddle.v2.evaluator + :members: detection_map + :noindex: diff --git a/go/cmd/master/master.go b/go/cmd/master/master.go index 25cd1cafcd..54fa254863 100644 --- a/go/cmd/master/master.go +++ b/go/cmd/master/master.go @@ -1,45 +1,69 @@ package main import ( + "fmt" "net" "net/http" "net/rpc" "strconv" + "strings" "time" "github.com/namsral/flag" + log "github.com/sirupsen/logrus" "github.com/PaddlePaddle/Paddle/go/master" + "github.com/PaddlePaddle/Paddle/go/utils/networkhelper" ) func main() { port := flag.Int("port", 8080, "port of the master server.") - - faultTolerance := flag.Bool("fault_tolerance", false, "enable fault tolerance (requires etcd).") + ttlSec := flag.Int("ttl", 60, "etcd lease TTL in seconds.") + endpoints := flag.String("endpoints", "http://127.0.0.1:2379", "comma separated etcd endpoints. If empty, fault tolerance will not be enabled.") taskTimeoutDur := flag.Duration("task_timout_dur", 20*time.Minute, "task timout duration.") taskTimeoutMax := flag.Int("task_timeout_max", 3, "max timtout count for each task before it being declared failed task.") chunkPerTask := flag.Int("chunk_per_task", 10, "chunk per task.") flag.Parse() - if *faultTolerance { - panic("fault tolernance not implemented.") + if *endpoints == "" { + log.Warningln("-endpoints not set, fault tolerance not be enabled.") + } + + var store master.Store + if *endpoints != "" { + eps := strings.Split(*endpoints, ",") + ip, err := networkhelper.GetExternalIP() + if err != nil { + log.Fatal(err) + } + addr := fmt.Sprintf("%s:%d", ip, *port) + store, err = master.NewEtcdClient(eps, addr, master.DefaultLockPath, master.DefaultAddrPath, master.DefaultStatePath, *ttlSec) + if err != nil { + log.Fatal(err) + } + } else { + store = &master.InMemStore{} + } + + s, err := master.NewService(store, *chunkPerTask, *taskTimeoutDur, *taskTimeoutMax) + if err != nil { + log.Fatal(err) } - s := master.NewService(*chunkPerTask, *taskTimeoutDur, *taskTimeoutMax) - err := rpc.Register(s) + err = rpc.Register(s) if err != nil { - panic(err) + log.Fatal(err) } rpc.HandleHTTP() l, err := net.Listen("tcp", ":"+strconv.Itoa(*port)) if err != nil { - panic(err) + log.Fatal(err) } err = http.Serve(l, nil) if err != nil { - panic(err) + log.Fatal(err) } } diff --git a/go/cmd/pserver/pserver.go b/go/cmd/pserver/pserver.go index f0be251c24..6c85b1804b 100644 --- a/go/cmd/pserver/pserver.go +++ b/go/cmd/pserver/pserver.go @@ -5,18 +5,36 @@ import ( "net/http" "net/rpc" "strconv" + "time" "github.com/namsral/flag" "github.com/PaddlePaddle/Paddle/go/pserver" + log "github.com/sirupsen/logrus" ) func main() { port := flag.Int("port", 0, "port of the pserver") + etcdEndpoint := flag.String("etcd-endpoint", "http://127.0.0.1:2379", + "comma separated endpoint string for pserver to connect to etcd") + etcdTimeout := flag.Int("etcd-timeout", 5, "timeout for etcd calls") + numPservers := flag.Int("num-pservers", 1, "total pserver count in a training job") + logLevel := flag.String("log-level", "info", + "log level, possible values: debug, info, warning, error, fatal, panic") flag.Parse() - s := pserver.NewService() - err := rpc.Register(s) + level, err := log.ParseLevel(*logLevel) + if err != nil { + panic(err) + } + log.SetLevel(level) + + timeout := time.Second * time.Duration((*etcdTimeout)) + s, err := pserver.NewService(*etcdEndpoint, *numPservers, timeout) + if err != nil { + panic(err) + } + err = rpc.Register(s) if err != nil { panic(err) } @@ -27,7 +45,9 @@ func main() { panic(err) } + log.Infof("start pserver at port %d", *port) err = http.Serve(l, nil) + if err != nil { panic(err) } diff --git a/go/master/client_internal_test.go b/go/master/client_internal_test.go index 00fcca0e2c..251225780a 100644 --- a/go/master/client_internal_test.go +++ b/go/master/client_internal_test.go @@ -47,9 +47,13 @@ func TestGetFinishTask(t *testing.T) { } go func(l net.Listener) { - s := NewService(chunkPerTask, time.Second, 1) + s, err := NewService(&InMemStore{}, chunkPerTask, time.Second, 1) + if err != nil { + panic(err) + } + server := rpc.NewServer() - err := server.Register(s) + err = server.Register(s) if err != nil { panic(err) } diff --git a/go/master/client_test.go b/go/master/client_test.go index 2b3f873ecf..85a86761c2 100644 --- a/go/master/client_test.go +++ b/go/master/client_test.go @@ -33,9 +33,13 @@ func TestNextRecord(t *testing.T) { } go func(l net.Listener) { - s := master.NewService(10, time.Second, 1) + s, err := master.NewService(&master.InMemStore{}, 10, time.Second, 1) + if err != nil { + panic(err) + } + server := rpc.NewServer() - err := server.Register(s) + err = server.Register(s) if err != nil { panic(err) } diff --git a/go/master/etcd_client.go b/go/master/etcd_client.go new file mode 100644 index 0000000000..b7293a7598 --- /dev/null +++ b/go/master/etcd_client.go @@ -0,0 +1,144 @@ +package master + +import ( + "context" + "time" + + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/clientv3/concurrency" + log "github.com/sirupsen/logrus" +) + +const ( + // DefaultLockPath is the default etcd master lock path. + DefaultLockPath = "/master/lock" + // DefaultStatePath is the default etcd key for master state. + DefaultStatePath = "/master/state" + // DefaultAddrPath is the default etcd key for master address. + DefaultAddrPath = "/master/addr" +) + +// EtcdClient is the etcd client that master uses for fault tolerance +// and service registry. +type EtcdClient struct { + lockPath string + statePath string + client *clientv3.Client + lock *concurrency.Mutex +} + +// NewEtcdClient creates a new EtcdClient. +func NewEtcdClient(endpoints []string, addr string, lockPath, addrPath, statePath string, ttlSec int) (*EtcdClient, error) { + log.Debugf("Connecting to etcd at %v", endpoints) + // TODO(helin): gracefully shutdown etcd store. Becuase etcd + // store holds a etcd lock, even though the lock will expire + // when the lease timeout, we need to implement graceful + // shutdown to release the lock. + cli, err := clientv3.New(clientv3.Config{ + Endpoints: endpoints, + DialTimeout: dialTimeout, + }) + if err != nil { + return nil, err + } + + sess, err := concurrency.NewSession(cli, concurrency.WithTTL(ttlSec)) + if err != nil { + return nil, err + } + + lock := concurrency.NewMutex(sess, lockPath) + // It's fine for the lock to get stuck, in this case we have + // multiple master servers running (only configured to have + // one master running, but split-brain problem may cuase + // multiple master servers running), and the cluster management + // software will kill one of them. + log.Debugf("Trying to acquire lock at %s.", lockPath) + err = lock.Lock(context.TODO()) + if err != nil { + return nil, err + } + log.Debugf("Successfully acquired lock at %s.", lockPath) + + put := clientv3.OpPut(addrPath, string(addr)) + resp, err := cli.Txn(context.Background()).If(lock.IsOwner()).Then(put).Commit() + if err != nil { + return nil, err + } + + if !resp.Succeeded { + log.Fatal("No longer owns the master lock. Exiting.") + } + + e := &EtcdClient{ + lockPath: lockPath, + statePath: statePath, + client: cli, + lock: lock, + } + + return e, nil +} + +// Save saves the state into the etcd. +func (e *EtcdClient) Save(state []byte) error { + ctx := context.TODO() + put := clientv3.OpPut(e.statePath, string(state)) + resp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(put).Commit() + if err != nil { + return err + } + + if !resp.Succeeded { + log.Errorln("No longer owns the lock, trying to lock again") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + err := e.lock.Lock(ctx) + cancel() + if err != nil { + // We lost the master lock and can not acquire + // it back, it means some other master is + // already started. We don't want cluster + // managment system to kill the master server + // who is holding the lock and running + // correctly. So the most feasible solution is + // to kill current master server. The current + // state is not saved, but the trainer's RPC + // call will fail, so the trainer will retry. + log.Fatalf("Could not acquire the lock at %s: %v. Exiting.", e.lockPath, err) + } + log.Infof("Successfully acquired lock at %s.", e.lockPath) + return e.Save(state) + } + + return nil +} + +// Load loads the state from etcd. +func (e *EtcdClient) Load() ([]byte, error) { + ctx := context.TODO() + get := clientv3.OpGet(e.statePath) + + resp, err := e.client.Txn(ctx).If(e.lock.IsOwner()).Then(get).Commit() + if err != nil { + return nil, err + } + + if !resp.Succeeded { + log.Errorln("No longer owns the lock, trying to lock and load again.") + err = e.lock.Lock(context.Background()) + if err != nil { + return nil, err + } + + return e.Load() + } + + kvs := resp.Responses[0].GetResponseRange().Kvs + if len(kvs) == 0 { + // No state exists + return nil, nil + } + + state := kvs[0].Value + return state, nil +} diff --git a/go/master/inmem_store.go b/go/master/inmem_store.go new file mode 100644 index 0000000000..bcd549b20e --- /dev/null +++ b/go/master/inmem_store.go @@ -0,0 +1,28 @@ +package master + +import "sync" + +// InMemStore is an in memory implementation of Store interface. +// +// It does not tolerate the fault that casues the program to crash. +type InMemStore struct { + mu sync.Mutex + buf []byte +} + +// Save saves the state into the in-memory store. +func (m *InMemStore) Save(state []byte) error { + m.mu.Lock() + defer m.mu.Unlock() + + m.buf = state + return nil +} + +// Load loads the state from the in-memory store. +func (m *InMemStore) Load() ([]byte, error) { + m.mu.Lock() + defer m.mu.Unlock() + + return m.buf, nil +} diff --git a/go/master/service.go b/go/master/service.go index 55e1e2d1a4..58e68e7448 100644 --- a/go/master/service.go +++ b/go/master/service.go @@ -1,6 +1,9 @@ package master import ( + "bytes" + "compress/gzip" + "encoding/gob" "errors" "os" "path/filepath" @@ -12,24 +15,54 @@ import ( "github.com/PaddlePaddle/recordio" ) +const ( + dialTimeout = 5 * time.Second +) + +// Store is the interface for save and load the master state. +type Store interface { + Save([]byte) error + Load() ([]byte, error) +} + +// Chunk is a chunk of data consisted of several data instances. +type Chunk struct { + Path string + Index recordio.Index // chunk index +} + +// Task is the basic unit of data instances assigned to trainers. +type Task struct { + ID int + Chunks []Chunk +} + +type taskEntry struct { + Epoch int + NumTimeout int + Task Task +} + +type taskQueues struct { + Todo []taskEntry + Pending map[int]taskEntry // map from task ID to task entry + Done []taskEntry + Failed []Task +} + // Service is the master server service. type Service struct { chunksPerTask int timeoutDur time.Duration timeoutMax int ready chan struct{} + store Store mu sync.Mutex initDone bool taskQueues taskQueues } -// Recover recovers service state from etcd. -func Recover() (*Service, error) { - // TODO(helin): recover from snapshot state from etcd. - return nil, nil -} - func partition(chunks []Chunk, chunksPerTask int) []taskEntry { id := 0 if chunksPerTask <= 0 { @@ -58,7 +91,7 @@ func partition(chunks []Chunk, chunksPerTask int) []taskEntry { } // NewService creates a new service. -func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Service { +func NewService(store Store, chunksPerTask int, timeoutDur time.Duration, timeoutMax int) (*Service, error) { s := &Service{} s.chunksPerTask = chunksPerTask s.timeoutDur = timeoutDur @@ -66,38 +99,82 @@ func NewService(chunksPerTask int, timeoutDur time.Duration, timeoutMax int) *Se s.taskQueues = taskQueues{} s.taskQueues.Pending = make(map[int]taskEntry) s.ready = make(chan struct{}) - return s -} + s.store = store + recovered, err := s.recover() + if err != nil { + return nil, err + } -// Chunk is a chunk of data consisted of several data instances. -type Chunk struct { - Path string - Index recordio.Index // chunk index -} + if recovered { + // Recovered. Now the state is already initialized, + // and the master is ready. + s.initDone = true + close(s.ready) + log.Info("Master recovered from saved state.") + } -// Task is the basic unit of data instances assigned to trainers. -type Task struct { - ID int - Chunks []Chunk + return s, nil } -type taskEntry struct { - Epoch int - NumTimeout int - Task Task -} +// recover recovers service state from etcd. +func (s *Service) recover() (bool, error) { + state, err := s.store.Load() + if err != nil { + return false, err + } -type taskQueues struct { - Todo []taskEntry - Pending map[int]taskEntry // map from task ID to task entry - Done []taskEntry - Failed []Task + if state == nil { + log.Infoln("No state exists, not recovered.") + return false, nil + } + + log.Infof("Loaded snapshot of size: %d bytes.", len(state)) + gr, err := gzip.NewReader(bytes.NewReader(state)) + if err != nil { + return false, err + } + + dec := gob.NewDecoder(gr) + var tqs taskQueues + err = dec.Decode(&tqs) + if err != nil { + return false, err + } + + err = gr.Close() + if err != nil { + // Only close failed, recover actually succeed, so + // just log error. + log.Errorln(err) + } + + s.taskQueues = tqs + return true, nil } -// *must* be called with s.mu being held. +// snapshot *must* be called with s.mu being held. func (s *Service) snapshot() error { - // TODO(helin): snapshot state on etcd. - return nil + // TOOD(helin): etcd request has a size limit, so the snapshot + // size is limited by the max request size. We should either + // divide the snapshot into smaller chunks and save under + // different keys, or configure the request size to be big + // enough: + // https://github.com/coreos/etcd/blob/2f84f3d8d8ed8f9537ab6ffa44a3a1c7eddfa9b1/embed/config.go#L44 + var buf bytes.Buffer + gw := gzip.NewWriter(&buf) + enc := gob.NewEncoder(gw) + err := enc.Encode(s.taskQueues) + if err != nil { + return err + } + err = gw.Close() + if err != nil { + return err + } + + state := buf.Bytes() + log.Infof("Saving snapshot of size: %d bytes.", len(state)) + return s.store.Save(state) } func readChunks(globPaths []string) ([]Chunk, error) { @@ -207,12 +284,12 @@ func (s *Service) checkTimeoutFunc(taskID int, epoch int) func() { t.NumTimeout++ if t.NumTimeout > s.timeoutMax { - log.Warningf("Task %v timed out %d times, discard.\n", t.Task, t.NumTimeout) + log.Warningf("Task %v timed out %d times, discard.", t.Task, t.NumTimeout) s.taskQueues.Failed = append(s.taskQueues.Failed, t.Task) return } - log.Warningf("Task %v timed out %d times, retry.\n", t.Task, t.NumTimeout) + log.Warningf("Task %v timed out %d times, retry.", t.Task, t.NumTimeout) s.taskQueues.Todo = append(s.taskQueues.Todo, t) } } diff --git a/go/pserver/cclient/cclient.go b/go/pserver/cclient/cclient.go index 92a41b7f54..bbaf43d9f1 100644 --- a/go/pserver/cclient/cclient.go +++ b/go/pserver/cclient/cclient.go @@ -133,7 +133,7 @@ func paddle_init_param(client C.paddle_pserver_client, param C.paddle_parameter, if err != nil { if err.Error() == pserver.AlreadyInitialized { - log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.\n", name) + log.Warningf("parameter %s already initialized, treat paddle_init_param as sucessful.", name) return C.PSERVER_OK } log.Errorln(err) @@ -200,7 +200,7 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, for i, p := range ps { pn[i] = p.Name } - log.Errorf("pserver returned wrong number of parameters. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", ")) + log.Errorf("pserver returned wrong number of parameters. Requested: %s, returned: %s.", strings.Join(pn, ", "), strings.Join(ns, ", ")) return C.PSERVER_ERROR } @@ -210,7 +210,7 @@ func paddle_get_params(client C.paddle_pserver_client, dst **C.paddle_parameter, for i, p := range ps { pn[i] = p.Name } - log.Errorf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.\n", strings.Join(pn, ", "), strings.Join(ns, ", ")) + log.Errorf("pserver returned wrong parameters, or not in requested order. Requested: %s, returned: %s.", strings.Join(pn, ", "), strings.Join(ns, ", ")) return C.PSERVER_ERROR } } diff --git a/go/pserver/client_test.go b/go/pserver/client_test.go index d0371a26a1..6ecf1fa08a 100644 --- a/go/pserver/client_test.go +++ b/go/pserver/client_test.go @@ -7,6 +7,7 @@ import ( "strconv" "strings" "testing" + "time" "github.com/PaddlePaddle/Paddle/go/pserver" ) @@ -30,9 +31,12 @@ func init() { port[i] = p go func(l net.Listener) { - s := pserver.NewService() + s, err := pserver.NewService("", time.Second*5) + if err != nil { + panic(err) + } server := rpc.NewServer() - err := server.Register(s) + err = server.Register(s) if err != nil { panic(err) } diff --git a/go/pserver/service.go b/go/pserver/service.go index 78a2bfaf63..f966595fdc 100644 --- a/go/pserver/service.go +++ b/go/pserver/service.go @@ -1,9 +1,18 @@ package pserver import ( + "context" "errors" "fmt" + "strconv" + "strings" "sync" + "time" + + "github.com/PaddlePaddle/Paddle/go/utils/networkhelper" + "github.com/coreos/etcd/clientv3" + "github.com/coreos/etcd/clientv3/concurrency" + log "github.com/sirupsen/logrus" ) // ElementType is the type of elements of a Parameter. @@ -24,6 +33,9 @@ const ( Float64 ) +// PsDesired is etcd path for store desired pserver count +const PsDesired = "/ps_desired" + // Parameter is a piece of data to sync with the parameter server. type Parameter struct { Name string @@ -47,14 +59,154 @@ type Service struct { mu sync.Mutex opt *optimizer paramMap map[string]Parameter + + etcdEndpoints string + etcdClient *clientv3.Client + // etcdTimeout is also used as retry intervals. + etcdTimeout time.Duration + // desired number of pservers in the job. + // assume desired will not change during one training job. + desired int + // FIXME: ensure GetExternalIP gets the correct ip for trainers to connect. + externalIP string } -// NewService creates a new service. -func NewService() *Service { +// NewService creates a new service, will bypass etcd registration if no +// endpoints specified. +func NewService(endpoints string, numPservers int, timeout time.Duration) (*Service, error) { s := &Service{opt: newOptimizer(sgd, 0.005)} s.paramMap = make(map[string]Parameter) s.initialized = make(chan struct{}) - return s + s.etcdEndpoints = endpoints + s.etcdTimeout = timeout + + var err error + s.externalIP, err = networkhelper.GetExternalIP() + if err != nil { + return nil, err + } + + if endpoints != "" { + // initialize connection to etcd, try + ep := strings.Split(s.etcdEndpoints, ",") + for { + cli, err := clientv3.New(clientv3.Config{ + Endpoints: ep, + DialTimeout: s.etcdTimeout, + }) + if err != nil { + log.Errorf("connect to etcd error: %v", err) + time.Sleep(s.etcdTimeout) + continue + } + s.etcdClient = cli + log.Debugf("inited client to %s", s.etcdEndpoints) + break + } + // init /ps_desired using transaction, for multiple pservers may want to write + // it at the same time. + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + _, err := s.initDesiredPsercers(ctx, numPservers) + cancel() + if err != nil { + log.Warn(err) + time.Sleep(s.etcdTimeout) + continue + } + break + } + // TODO: when implementing extending or reducing pservers, /ps_desired is + // changed, then we need to watch /ps_desired node for events. For now, just + // write once when init and read from it. + // wait and set s.desired init value + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + resp, err := s.etcdClient.Get(ctx, PsDesired) + cancel() + if err != nil { + log.Errorf("getting %s error: %v", PsDesired, err) + time.Sleep(s.etcdTimeout) + continue + } + if len(resp.Kvs) != 0 { + s.desired, err = strconv.Atoi(string(resp.Kvs[0].Value)) + if err != nil { + log.Errorf("value of %s invalid %v\n", PsDesired, err) + time.Sleep(s.etcdTimeout) + // NOTE: wait util ps_desired value change + continue + } + break + } + } + // try register pserver node on etcd + for { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + _, err := s.registerPserverEtcd(ctx) + cancel() + if err != nil { + log.Warn(err) + time.Sleep(s.etcdTimeout) + continue + } + break + } + } // if endpoints != "" + // Bypass etcd registration if no endpoints specified + return s, nil +} + +func (s *Service) initDesiredPsercers(ctx context.Context, numPservers int) (*clientv3.TxnResponse, error) { + return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error { + dsStr := c.Get(PsDesired) + if dsStr == "" { + c.Put(PsDesired, strconv.Itoa(numPservers)) + } + return nil + }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) +} + +// registerPserverEtcd registers pserver node on etcd using transaction. +func (s *Service) registerPserverEtcd(ctx context.Context) (*clientv3.TxnResponse, error) { + return concurrency.NewSTM(s.etcdClient, func(c concurrency.STM) error { + registered := false + for i := 0; i < s.desired; i++ { + psKey := "/ps/" + strconv.Itoa(i) + log.Debugf("checking %s", psKey) + ps := c.Get(psKey) + log.Debugf("got value (%s) for key: %s", ps, psKey) + + if ps == "" { + resp, err := s.etcdClient.Grant(context.TODO(), 5) + if err != nil { + log.Fatal(err) + } + // find the first id and write info + c.Put(psKey, s.externalIP, clientv3.WithLease(resp.ID)) + log.Debugf("set pserver node %s with value %s", psKey, s.externalIP) + ch, kaerr := s.etcdClient.KeepAlive(context.TODO(), resp.ID) + if kaerr != nil { + log.Errorf("keepalive etcd node error: %v", kaerr) + return kaerr + } + + // Eat the keep alive message so etcd + // will not expire the lease. + go func(ch <-chan *clientv3.LeaseKeepAliveResponse) { + ka := <-ch + log.Debugf("keepalive: %d\n", ka.TTL) + }(ch) + log.Debug("register finished") + registered = true + break + } + } + if registered == true { + return nil + } + return errors.New("not registerd, may due to already have enough pservers") + }, concurrency.WithAbortContext(ctx), concurrency.WithIsolation(concurrency.RepeatableReads)) } // InitParam initializes a parameter. diff --git a/go/pserver/service_test.go b/go/pserver/service_test.go index b746d13e1c..f317535592 100644 --- a/go/pserver/service_test.go +++ b/go/pserver/service_test.go @@ -10,12 +10,15 @@ import ( ) func TestFull(t *testing.T) { - s := pserver.NewService() + s, err := pserver.NewService("", time.Second*5) + if err != nil { + t.Error(err) + } var p pserver.Parameter p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) + err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) if err != nil { t.FailNow() } @@ -72,8 +75,11 @@ func TestFull(t *testing.T) { } func TestMultipleInit(t *testing.T) { - s := pserver.NewService() - err := s.FinishInitParams(0, nil) + s, err := pserver.NewService("", time.Second*5) + if err != nil { + t.Error(err) + } + err = s.FinishInitParams(0, nil) if err != nil { t.FailNow() } @@ -85,15 +91,18 @@ func TestMultipleInit(t *testing.T) { } func TestUninitialized(t *testing.T) { - s := pserver.NewService() - err := s.SendGrad(pserver.Gradient{}, nil) + s, err := pserver.NewService("", time.Second*5) + err = s.SendGrad(pserver.Gradient{}, nil) if err.Error() != pserver.Uninitialized { t.FailNow() } } func TestBlockUntilInitialized(t *testing.T) { - s := pserver.NewService() + s, err := pserver.NewService("", time.Second*5) + if err != nil { + t.Error(err) + } ch := make(chan struct{}, 2) errCh := make(chan error, 2) var wg sync.WaitGroup @@ -133,7 +142,7 @@ func TestBlockUntilInitialized(t *testing.T) { p.Name = "param_a" p.Content = []byte{1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0} p.ElementType = pserver.Int32 - err := s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) + err = s.InitParam(pserver.ParameterWithConfig{Param: p, Config: nil}, nil) if err != nil { t.FailNow() } diff --git a/go/utils/networkhelper/helper.go b/go/utils/networkhelper/helper.go new file mode 100644 index 0000000000..fbeaea8f5e --- /dev/null +++ b/go/utils/networkhelper/helper.go @@ -0,0 +1,45 @@ +package networkhelper + +import ( + "errors" + "net" +) + +// GetExternalIP returns the ip address of local network interface, not the +// loopback device. +func GetExternalIP() (string, error) { + ifaces, err := net.Interfaces() + if err != nil { + return "", err + } + for _, iface := range ifaces { + if iface.Flags&net.FlagUp == 0 { + continue // interface down + } + if iface.Flags&net.FlagLoopback != 0 { + continue // loopback interface + } + addrs, err := iface.Addrs() + if err != nil { + return "", err + } + for _, addr := range addrs { + var ip net.IP + switch v := addr.(type) { + case *net.IPNet: + ip = v.IP + case *net.IPAddr: + ip = v.IP + } + if ip == nil || ip.IsLoopback() { + continue + } + ip = ip.To4() + if ip == nil { + continue // not an ipv4 address + } + return ip.String(), nil + } + } + return "", errors.New("are you connected to the network?") +} diff --git a/go/utils/networkhelper/helper_test.go b/go/utils/networkhelper/helper_test.go new file mode 100644 index 0000000000..4208f9e358 --- /dev/null +++ b/go/utils/networkhelper/helper_test.go @@ -0,0 +1,10 @@ +package networkhelper + +import "testing" + +func TestGetIP(t *testing.T) { + _, err := GetExternalIP() + if err != nil { + t.Errorf("GetExternalIP returns error : %v\n", err) + } +} diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 673cfa19ac..e3c3155aa9 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -2,3 +2,5 @@ cc_library(ddim SRCS ddim.cc) cc_test(ddim_test SRCS ddim_test.cc DEPS ddim) nv_test(dim_test SRCS dim_test.cu DEPS ddim) + +cc_test(variable_test SRCS variable_test.cc) diff --git a/paddle/framework/ddim_test.cc b/paddle/framework/ddim_test.cc index e5c84d7abe..36eef02370 100644 --- a/paddle/framework/ddim_test.cc +++ b/paddle/framework/ddim_test.cc @@ -1,5 +1,3 @@ -//#include -//#include #include #include diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h new file mode 100644 index 0000000000..b33e10e682 --- /dev/null +++ b/paddle/framework/variable.h @@ -0,0 +1,68 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#pragma once + +#include +#include +#include + +#include "paddle/platform/assert.h" + +namespace paddle { +namespace framework { + +class Variable { + public: + template + const T& Get() const { + PADDLE_ASSERT(holder_ != nullptr); + PADDLE_ASSERT(std::type_index(typeid(T)) == + std::type_index(holder_->Type())); + return *static_cast(holder_->Ptr()); + } + + template + T* GetMutable() { + if (holder_ == nullptr || + std::type_index(typeid(T)) != std::type_index(holder_->Type())) { + holder_.reset(new PlaceholderImpl(new T())); + } + return static_cast(holder_->Ptr()); + } + + private: + struct Placeholder { + virtual ~Placeholder() {} + virtual const std::type_info& Type() const = 0; + virtual void* Ptr() const = 0; + }; + + // Placeholder hides type T, so it doesn't appear as a template + // parameter of Variable. + template + struct PlaceholderImpl : public Placeholder { + PlaceholderImpl(T* ptr) : ptr_(ptr), type_(typeid(T)) {} + + virtual const std::type_info& Type() const { return type_; } + virtual void* Ptr() const { return static_cast(ptr_.get()); } + + std::unique_ptr ptr_; + const std::type_info& type_; + }; + + std::unique_ptr + holder_; // pointers to a PlaceholderImpl object indeed. +}; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/variable.md b/paddle/framework/variable.md new file mode 100644 index 0000000000..f44d5ea46e --- /dev/null +++ b/paddle/framework/variable.md @@ -0,0 +1,52 @@ +# Design Doc: Variable + + +Variable is also known as *blob* in MxNet and Caffe2. It is the input and output type of operators, where a neural network is a graph of operators. + +## Requirements: Lazy Memory Allocation + +For the flexibility of a DL system, a variable should be able to contain any typed value -- a tensor in most cases, but could also be some integer IDs or a scope of other variables in the case of RNN. + +To use the minimum amount of memory, we'd like that a variable to allocate memory when it has to, or, lazy memory allocation. Let's take the following example: + +```cpp +Variable vr, v1, v2; + +Tensor* t1 = new Tensor(); +Tensor* t2 = new Tensor(); + +Randomize( + /* malloc */ v1.GetMutable().mutable_data(DDim(100,200)), + /* size */ t1.Size()); + +Randomize( + /* malloc */ v2.GetMutable().mutable_data(DDim(200,300)), + /* size */ t2.Size()); + +Mult( + /*result*/ vr.GetMutable().mutable_data(SizeOfMult(v1, v2)), + /*input1*/ v1.Get().data(), + /*input2*/ v2.Get().data()); +``` + +We see that a variable holds nothing until `Variable::GetMutable()` allocates a tensor and puts it in the variable. Similarly, a tensor gets its memory until `Tensor::mutable_data()`. + +This syntax for lazy memory allocation when we call `Randomize` and `Mult`, those functions that mutate the variable, so it saves us some line of C++ code. + + +## Implementation: Type Hiding + +To make memory allocation lazy, we cannot assume that we know the type held by a variable at definition time. In other words, `class Variable` cannot be a template `template class Variable`. + +Because we don't know the type `T`, we cannot save a `T*` as `Variable's` data member. Instead, we save an interface object `Placeholder`, who can return the pointer to the saved object via `Placeholder::Ptr()` as `void*`. + +But anyway, Variable needs to know `T` so could it `delete(ptr)` and so could `Variable::Get` checks the expected type and the saved object's type. + +We save `T` in `PlaceholderImpl`, the implementation of `Placeholder`. Please be aware that `PlaceholderImpl` is a class template and `T` is passed in as a template parameter. + +Because `PlaceholderImpl` knows `T`, it can save and return `typeid(T)` for the type comparison in `Variable::Get` and `Variable::GetMutable`. + + +## Conclusion + +The technique type hiding utilizes C++ class templates, interface and derivation, and C++ RTTI (typeid). This combination saves us from definition something like `caffe2::TypeMata`, which takes hundreds of lines of C++ code. diff --git a/paddle/framework/variable_test.cc b/paddle/framework/variable_test.cc new file mode 100644 index 0000000000..aea03bcf57 --- /dev/null +++ b/paddle/framework/variable_test.cc @@ -0,0 +1,40 @@ +/* + Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include + +#include "gtest/gtest.h" +#include "paddle/framework/variable.h" + +TEST(Variable, GetMutable) { + using paddle::framework::Variable; + + struct Tensor { + int content_; + }; + + std::unique_ptr v(new Variable()); + + Tensor* t = v->GetMutable(); + t->content_ = 1234; + + const Tensor& tt = v->Get(); + EXPECT_EQ(1234, tt.content_); + + std::string* s = v->GetMutable(); + *s = "hello"; + + const std::string& ss = v->Get(); + EXPECT_EQ("hello", ss); +} diff --git a/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp b/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp new file mode 100644 index 0000000000..9b825db574 --- /dev/null +++ b/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp @@ -0,0 +1,308 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Evaluator.h" +#include "paddle/gserver/layers/DetectionUtil.h" + +using std::map; +using std::vector; +using std::pair; +using std::make_pair; + +namespace paddle { + +/** + * @brief detection map Evaluator + * + * The config file api is detection_map_evaluator. + */ +class DetectionMAPEvaluator : public Evaluator { +public: + DetectionMAPEvaluator() + : evaluateDifficult_(false), cpuOutput_(nullptr), cpuLabel_(nullptr) {} + + virtual void start() { + Evaluator::start(); + allTruePos_.clear(); + allFalsePos_.clear(); + numPos_.clear(); + } + + virtual real evalImp(std::vector& arguments) { + overlapThreshold_ = config_.overlap_threshold(); + backgroundId_ = config_.background_id(); + evaluateDifficult_ = config_.evaluate_difficult(); + apType_ = config_.ap_type(); + + MatrixPtr detectTmpValue = arguments[0].value; + Matrix::resizeOrCreate(cpuOutput_, + detectTmpValue->getHeight(), + detectTmpValue->getWidth(), + false, + false); + + MatrixPtr labelTmpValue = arguments[1].value; + Matrix::resizeOrCreate(cpuLabel_, + labelTmpValue->getHeight(), + labelTmpValue->getWidth(), + false, + false); + + cpuOutput_->copyFrom(*detectTmpValue); + cpuLabel_->copyFrom(*labelTmpValue); + + Argument label = arguments[1]; + const int* labelIndex = label.sequenceStartPositions->getData(false); + size_t batchSize = label.getNumSequences(); + + vector>> allGTBBoxes; + vector>>> allDetectBBoxes; + + for (size_t n = 0; n < batchSize; ++n) { + map> bboxes; + for (int i = labelIndex[n]; i < labelIndex[n + 1]; ++i) { + vector bbox; + getBBoxFromLabelData(cpuLabel_->getData() + i * 6, 1, bbox); + int c = cpuLabel_->getData()[i * 6]; + bboxes[c].push_back(bbox[0]); + } + allGTBBoxes.push_back(bboxes); + } + + size_t n = 0; + const real* cpuOutputData = cpuOutput_->getData(); + for (size_t imgId = 0; imgId < batchSize; ++imgId) { + map>> bboxes; + size_t curImgId = static_cast((cpuOutputData + n * 7)[0]); + while (curImgId == imgId && n < cpuOutput_->getHeight()) { + vector label; + vector score; + vector bbox; + getBBoxFromDetectData(cpuOutputData + n * 7, 1, label, score, bbox); + bboxes[label[0]].push_back(make_pair(score[0], bbox[0])); + ++n; + curImgId = static_cast((cpuOutputData + n * 7)[0]); + } + allDetectBBoxes.push_back(bboxes); + } + + for (size_t n = 0; n < batchSize; ++n) { + for (map>::iterator it = + allGTBBoxes[n].begin(); + it != allGTBBoxes[n].end(); + ++it) { + size_t count = 0; + if (evaluateDifficult_) { + count = it->second.size(); + } else { + for (size_t i = 0; i < it->second.size(); ++i) + if (!(it->second[i].isDifficult)) ++count; + } + if (numPos_.find(it->first) == numPos_.end() && count != 0) { + numPos_[it->first] = count; + } else { + numPos_[it->first] += count; + } + } + } + + // calcTFPos + calcTFPos(batchSize, allGTBBoxes, allDetectBBoxes); + + return 0; + } + + virtual void printStats(std::ostream& os) const { + real mAP = calcMAP(); + os << "Detection mAP=" << mAP; + } + + virtual void distributeEval(ParameterClient2* client) { + LOG(FATAL) << "Distribute detection evaluation not implemented."; + } + +protected: + void calcTFPos(const size_t batchSize, + const vector>>& allGTBBoxes, + const vector>>>& + allDetectBBoxes) { + for (size_t n = 0; n < allDetectBBoxes.size(); ++n) { + if (allGTBBoxes[n].size() == 0) { + for (map>>::const_iterator + it = allDetectBBoxes[n].begin(); + it != allDetectBBoxes[n].end(); + ++it) { + size_t label = it->first; + for (size_t i = 0; i < it->second.size(); ++i) { + allTruePos_[label].push_back(make_pair(it->second[i].first, 0)); + allFalsePos_[label].push_back(make_pair(it->second[i].first, 1)); + } + } + } else { + for (map>>::const_iterator + it = allDetectBBoxes[n].begin(); + it != allDetectBBoxes[n].end(); + ++it) { + size_t label = it->first; + vector> predBBoxes = it->second; + if (allGTBBoxes[n].find(label) == allGTBBoxes[n].end()) { + for (size_t i = 0; i < predBBoxes.size(); ++i) { + allTruePos_[label].push_back(make_pair(predBBoxes[i].first, 0)); + allFalsePos_[label].push_back(make_pair(predBBoxes[i].first, 1)); + } + } else { + vector gtBBoxes = + allGTBBoxes[n].find(label)->second; + vector visited(gtBBoxes.size(), false); + // Sort detections in descend order based on scores + std::sort(predBBoxes.begin(), + predBBoxes.end(), + sortScorePairDescend); + for (size_t i = 0; i < predBBoxes.size(); ++i) { + real maxOverlap = -1.0; + size_t maxIdx = 0; + for (size_t j = 0; j < gtBBoxes.size(); ++j) { + real overlap = + jaccardOverlap(predBBoxes[i].second, gtBBoxes[j]); + if (overlap > maxOverlap) { + maxOverlap = overlap; + maxIdx = j; + } + } + if (maxOverlap > overlapThreshold_) { + if (evaluateDifficult_ || + (!evaluateDifficult_ && !gtBBoxes[maxIdx].isDifficult)) { + if (!visited[maxIdx]) { + allTruePos_[label].push_back( + make_pair(predBBoxes[i].first, 1)); + allFalsePos_[label].push_back( + make_pair(predBBoxes[i].first, 0)); + visited[maxIdx] = true; + } else { + allTruePos_[label].push_back( + make_pair(predBBoxes[i].first, 0)); + allFalsePos_[label].push_back( + make_pair(predBBoxes[i].first, 1)); + } + } + } else { + allTruePos_[label].push_back(make_pair(predBBoxes[i].first, 0)); + allFalsePos_[label].push_back( + make_pair(predBBoxes[i].first, 1)); + } + } + } + } + } + } + } + + real calcMAP() const { + real mAP = 0.0; + size_t count = 0; + for (map::const_iterator it = numPos_.begin(); + it != numPos_.end(); + ++it) { + size_t label = it->first; + size_t labelNumPos = it->second; + if (labelNumPos == 0 || allTruePos_.find(label) == allTruePos_.end()) + continue; + vector> labelTruePos = allTruePos_.find(label)->second; + vector> labelFalsePos = + allFalsePos_.find(label)->second; + // Compute average precision. + vector tpCumSum; + getAccumulation(labelTruePos, &tpCumSum); + vector fpCumSum; + getAccumulation(labelFalsePos, &fpCumSum); + std::vector precision, recall; + size_t num = tpCumSum.size(); + // Compute Precision. + for (size_t i = 0; i < num; ++i) { + CHECK_LE(tpCumSum[i], labelNumPos); + precision.push_back(static_cast(tpCumSum[i]) / + static_cast(tpCumSum[i] + fpCumSum[i])); + recall.push_back(static_cast(tpCumSum[i]) / labelNumPos); + } + // VOC2007 style + if (apType_ == "11point") { + vector maxPrecisions(11, 0.0); + int startIdx = num - 1; + for (int j = 10; j >= 0; --j) + for (int i = startIdx; i >= 0; --i) { + if (recall[i] < j / 10.) { + startIdx = i; + if (j > 0) maxPrecisions[j - 1] = maxPrecisions[j]; + break; + } else { + if (maxPrecisions[j] < precision[i]) + maxPrecisions[j] = precision[i]; + } + } + for (int j = 10; j >= 0; --j) mAP += maxPrecisions[j] / 11; + ++count; + } else if (apType_ == "Integral") { + // Nature integral + real averagePrecisions = 0.; + real prevRecall = 0.; + for (size_t i = 0; i < num; ++i) { + if (fabs(recall[i] - prevRecall) > 1e-6) + averagePrecisions += precision[i] * fabs(recall[i] - prevRecall); + prevRecall = recall[i]; + } + mAP += averagePrecisions; + ++count; + } else { + LOG(FATAL) << "Unkown ap version: " << apType_; + } + } + if (count != 0) mAP /= count; + return mAP * 100; + } + + void getAccumulation(vector> inPairs, + vector* accuVec) const { + std::stable_sort( + inPairs.begin(), inPairs.end(), sortScorePairDescend); + accuVec->clear(); + size_t sum = 0; + for (size_t i = 0; i < inPairs.size(); ++i) { + sum += inPairs[i].second; + accuVec->push_back(sum); + } + } + + std::string getTypeImpl() const { return "detection_map"; } + + real getValueImpl() const { return calcMAP(); } + +private: + real overlapThreshold_; // overlap threshold when determining whether matched + bool evaluateDifficult_; // whether evaluate difficult ground truth + size_t backgroundId_; // class index of background + std::string apType_; // how to calculate mAP (Integral or 11point) + + MatrixPtr cpuOutput_; + MatrixPtr cpuLabel_; + + map numPos_; // counts of true objects each classification + map>> + allTruePos_; // true positive prediction + map>> + allFalsePos_; // false positive prediction +}; + +REGISTER_EVALUATOR(detection_map, DetectionMAPEvaluator); + +} // namespace paddle diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 4512aacc81..2e839f6405 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -241,11 +241,14 @@ void NeuralNetwork::forward(const std::vector& inArgs, dataLayers_[i]->setData(inArgs[i]); } + gLayerStackTrace.set_stage(true); + { for (auto& layer : layers_) { REGISTER_TIMER_INFO("ForwardTimer", layer->getName().c_str()); gLayerStackTrace.push(layer->getName()); layer->forward(passType); + gLayerStackTrace.pop(layer->getName()); } } @@ -254,9 +257,6 @@ void NeuralNetwork::forward(const std::vector& inArgs, for (auto& layer : outputLayers_) { outArgs->push_back(layer->getOutput()); } - if (passType == PASS_TEST) { - gLayerStackTrace.clear(); - } } void NeuralNetwork::resetState() { @@ -283,9 +283,10 @@ void NeuralNetwork::getState(MachineState& machineState) { } void NeuralNetwork::backward(const UpdateCallback& callback) { - gLayerStackTrace.pop(""); // tell layer trace is during backward. + gLayerStackTrace.set_stage(false); FOR_EACH_R(layer, layers_) { REGISTER_TIMER_INFO("BackwardTimer", (*layer)->getName().c_str()); + gLayerStackTrace.push((*layer)->getName()); if ((*layer)->needGradient()) { (*layer)->backward(callback); } @@ -308,35 +309,35 @@ public: void addEvaluator(std::unique_ptr&& evaluator) { evaluators_.emplace_back(std::move(evaluator)); } - virtual void start() { + void start() override { for (auto& evaluator : evaluators_) { evaluator->start(); } } - virtual void finish() { + void finish() override { for (auto& evaluator : evaluators_) { evaluator->finish(); } } - virtual void eval(const NeuralNetwork& nn) { + void eval(const NeuralNetwork& nn) override { for (auto& evaluator : evaluators_) { evaluator->eval(nn); } } - virtual real evalImp(std::vector& arguments) { + real evalImp(std::vector& arguments) override { (void)arguments; return -1; } - virtual void printStats(std::ostream& os) const { + void printStats(std::ostream& os) const override { for (auto& evaluator : evaluators_) { evaluator->printStats(os); os << ' '; } } - virtual void distributeEval(ParameterClient2* client) { + void distributeEval(ParameterClient2* client) override { for (auto& evaluator : evaluators_) { evaluator->distributeEval(client); } @@ -351,7 +352,7 @@ public: * @brief getNames will return all inside evaluators' names. * @param names [out]: return names. */ - void getNames(std::vector* names) { + void getNames(std::vector* names) override { for (auto& eval : evaluators_) { eval->getNames(names); } @@ -360,7 +361,7 @@ public: /** * @brief getValue could get all inside evaluators' value. */ - real getValue(const std::string& name, Error* err) const { + real getValue(const std::string& name, Error* err) const override { return this->getMethodHelper( name, err, [&name, err](const std::unique_ptr& eval) { return eval->getValue(name, err); @@ -370,7 +371,7 @@ public: /** * @brief getType could get all inside evaluators' type. */ - std::string getType(const std::string& name, Error* err) const { + std::string getType(const std::string& name, Error* err) const override { return this->getMethodHelper( name, err, [&name, err](const std::unique_ptr& eval) { return eval->getType(name, err); @@ -395,6 +396,30 @@ private: } }; +class SubnetEvaluator : public CombinedEvaluator { +public: + SubnetEvaluator(const std::string& layerName, + std::unique_ptr&& evaluator) + : layerName_(layerName) { + addEvaluator(std::move(evaluator)); + } + virtual void eval(const NeuralNetwork& nn) override { + const LayerPtr& layer = nn.getLayer(layerName_); + CHECK(layer) << "Nonexisted layer: " << layerName_ << " in submodel " + << nn.getName(); + bool accessed = false; + layer->accessSubNetwork([this, &accessed](NeuralNetwork& subnet) { + subnet.eval(evaluators_[0].get()); + accessed = true; + }); + CHECK(accessed) << "There is no subnetwork for layer " << layerName_ + << " in submodel " << nn.getName(); + } + +protected: + std::string layerName_; +}; + Evaluator* NeuralNetwork::makeEvaluator() const { CombinedEvaluator* combinedEvaluator = new CombinedEvaluator(); auto subModelConfig = std::find_if(config_.sub_models().begin(), @@ -421,6 +446,15 @@ Evaluator* NeuralNetwork::makeEvaluator() const { combinedEvaluator->addEvaluator(std::move(evaluator)); } } + for (auto& layer : layers_) { + layer->accessSubNetwork( + [layer, combinedEvaluator](NeuralNetwork& subnet) { + std::unique_ptr subEvaluator(new SubnetEvaluator( + layer->getName(), + std::unique_ptr(subnet.makeEvaluator()))); + combinedEvaluator->addEvaluator(std::move(subEvaluator)); + }); + } } else { for (const EvaluatorConfig& evalConfig : config_.evaluators()) { std::unique_ptr evaluator(Evaluator::create(evalConfig)); diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/gserver/gradientmachines/NeuralNetwork.h index e7b6c43840..12810f6425 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/gserver/gradientmachines/NeuralNetwork.h @@ -129,6 +129,8 @@ public: static NeuralNetwork* newNeuralNetwork(const std::string& name = "", NeuralNetwork* rootNetwork = nullptr); + const std::string& getName() const { return subModelName_; } + protected: /** * The constructor of NeuralNetwork. diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp index 3e93038022..9a972466d6 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -208,6 +208,7 @@ void RecurrentGradientMachine::init( }); CHECK(subModelConfig != config.sub_models().end()); reversed_ = subModelConfig->reversed(); + generating_ = subModelConfig->has_generator(); inFrameLines_.resize(subModelConfig->in_links_size()); for (size_t i = 0; i < inFrameLines_.size(); ++i) { @@ -287,10 +288,6 @@ void RecurrentGradientMachine::init( parameterIds_.push_back(para->getID()); } } - - if (subModelConfig->evaluator_names_size() > 0) { - evaluator_.reset(frames_[0]->makeEvaluator()); - } } void RecurrentGradientMachine::resizeOrCreateFrames(int numFrames) { @@ -538,7 +535,7 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, The outputs are outFramesLines_[i].agentLayer */ - if (inFrameLines_.empty() && passType == PASS_TEST) { + if (generating_) { generateSequence(); return; } // else forward.. @@ -561,14 +558,14 @@ void RecurrentGradientMachine::forward(const std::vector& inArgs, std::vector outArgs; frames_[i]->forward(inArgs, &outArgs, passType); } - if (evaluator_ && passType == PASS_TEST) { - this->eval(evaluator_.get()); - } reorganizeOutput(passType); } void RecurrentGradientMachine::backward(const UpdateCallback& callback) { + if (generating_) { + return; + } REGISTER_TIMER_INFO("RecurrentBwTime", "RecurrentBwTime"); AsyncGpuBlock asyncGpuBlock; for (int i = maxSequenceLength_ - 1; i >= 0; --i) { @@ -577,11 +574,6 @@ void RecurrentGradientMachine::backward(const UpdateCallback& callback) { for (auto& memoryFrameLine : memoryFrameLines_) { memoryFrameLine.bootLayer->backward(nullptr); } - - // call printers here so the gradient can be printed - if (evaluator_) { - this->eval(evaluator_.get()); - } } void RecurrentGradientMachine::forwardBackward( @@ -595,9 +587,9 @@ void RecurrentGradientMachine::forwardBackward( void RecurrentGradientMachine::eval(Evaluator* evaluator) const { // call printers frame by frame for (int i = 0; i < maxSequenceLength_; ++i) { - LOG(INFO) << "Recurrent Layer Group eval frame " << i << " begin"; + VLOG(2) << "Recurrent Layer Group eval frame " << i << " begin"; evaluator->eval(*(frames_[i].get())); - LOG(INFO) << "Recurrent Layer Group eval frame " << i << " end"; + VLOG(2) << "Recurrent Layer Group eval frame " << i << " end"; } } @@ -1093,10 +1085,6 @@ void RecurrentGradientMachine::oneWaySearch(size_t batchSize) { copyDataOutlinkFrame(machineCur); - // call value printer - if (evaluator_) { - evaluator_->eval(*(frames_[machineCur].get())); - } // check eos const IVectorPtr& eosVec = eosFrameLine_->layers[machineCur]->getOutput().ids; @@ -1321,11 +1309,10 @@ void RecurrentGradientMachine::fillGenOutputs() { batchMachineIdVec_.clear(); generator_.ids.clear(); + int* starts = generator_.outArg.sequenceStartPositions->getMutableData(false); + starts[0] = 0; if (numResults > 1) { real* probs = generator_.outArg.in->getData(); - int* starts = - generator_.outArg.sequenceStartPositions->getMutableData(false); - starts[0] = 0; for (size_t i = 0; i < finalPaths_.size(); ++i) { for (size_t j = 0; j < finalPaths_[i].size(); ++j) { Path& path = finalPaths_[i][j]; @@ -1348,7 +1335,10 @@ void RecurrentGradientMachine::fillGenOutputs() { } else { for (size_t i = 0; i < finalPaths_.size(); ++i) { CHECK(!finalPaths_[i].empty()); - generator_.ids = finalPaths_[i][0].ids; + generator_.ids.insert(generator_.ids.begin(), + finalPaths_[i][0].ids.begin(), + finalPaths_[i][0].ids.end()); + starts[i + 1] = starts[i] + finalPaths_[i][0].ids.size(); } } } diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h index 8d94d7e2df..f245620cf6 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/gserver/gradientmachines/RecurrentGradientMachine.h @@ -414,6 +414,7 @@ protected: std::vector ids; // store generated sequences Argument outArg; // final output argument }; + bool generating_; Generator generator_; std::vector> frames_; @@ -428,8 +429,6 @@ protected: std::vector parameterIds_; // parameters actually used by this Layer Group - std::unique_ptr evaluator_; // frame printers in this layer group - // store final argument of outFrameLines_ std::vector dataArgs_; // store each frame's output argument of outFrameLines_ diff --git a/paddle/gserver/layers/AgentLayer.cpp b/paddle/gserver/layers/AgentLayer.cpp index 31463823b3..15e7411b5f 100644 --- a/paddle/gserver/layers/AgentLayer.cpp +++ b/paddle/gserver/layers/AgentLayer.cpp @@ -109,6 +109,40 @@ void GatherAgentLayer::forwardValue(PassType passType) { } } +namespace { + +// dest[index[i]] <- src[i] for each i +void copyElements(const IVector& srcVec, + const IVector& indexVec, + IVector& destVec) { + const int* src = srcVec.getData(); + const int* index = indexVec.getData(); + int* dest = destVec.getData(); + int len = indexVec.getSize(); + CHECK_EQ(srcVec.getSize(), indexVec.getSize()); + for (int i = 0; i < len; ++i) { + dest[index[i]] = src[i]; + } +} +} + +void GatherAgentLayer::forwardIds(PassType passType) { + IVectorPtr realId = realLayers_[0]->getOutputLabel(); + if (!realId) return; + + IVector::resizeOrCreate(output_.ids, allIds_->getSize(), useGpu_); + IVectorPtr outId = output_.ids; + idsVec_.resize(idIndex_.size()); + + for (size_t i = 0; i < realLayers_.size(); ++i) { + const IVectorPtr& realId = realLayers_[i]->getOutputLabel(); + idsVec_[i] = IVector::create(allIds_->getData() + idIndex_[i], + /* size */ realId->getSize(), + useGpu_); + execViaCpu(©Elements, *realId, *idsVec_[i], *outId); + } +} + void GatherAgentLayer::backward(const UpdateCallback& callback) { (void)callback; const MatrixPtr& outputGrad = getOutputGrad(); @@ -136,23 +170,22 @@ void ScatterAgentLayer::forward(PassType passType) { CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); int width = this->getSize(); - if (realOutArg_.hasSeq()) { - forwardSequence(passType); - } else if (realOutArg_.value || realOutArg_.ids) { - output_.subArgFrom( - realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_); - } else { // used in generation - if (realLayer_->getOutput().ids) { - IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); - output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); - } - if (realLayer_->getOutput().value) { - int height = ids_->getSize(); - resetOutput(height, width); - - const MatrixPtr& outV = getOutputValue(); - const MatrixPtr& realV = realLayer_->getOutputValue(); - outV->selectRows(*realV, *ids_); + if (selectionMode_) { + forwardWithSelection(passType); + } else { + if (realOutArg_.hasSeq()) { + output_.subArgFrom(realOutArg_, + /* offset */ idIndex_, + idSize_, + width, + useGpu_, + /* trans */ false, + /* seqFlag */ true, + /* seqStart */ seqStartPosIndex_, + /* seqSize */ numSequences_); + } else { + output_.subArgFrom( + realOutArg_, /* offset */ idIndex_, idSize_, width, useGpu_); } } } @@ -160,6 +193,8 @@ void ScatterAgentLayer::forward(PassType passType) { void ScatterAgentLayer::backward(const UpdateCallback& callback) { (void)callback; + CHECK(!selectionMode_); + const MatrixPtr& outputGrad = realOutArg_.grad; const MatrixPtr& realGrad = realLayer_->getOutputGrad(); if (realGrad) { @@ -174,42 +209,7 @@ void ScatterAgentLayer::backward(const UpdateCallback& callback) { REGISTER_LAYER(gather_agent, GatherAgentLayer); REGISTER_LAYER(scatter_agent, ScatterAgentLayer); -void GatherAgentLayer::forwardIds(PassType passType) { - int height = 0; - IVectorPtr idReal = realLayers_[0]->getOutputLabel(); - - if (!idReal) return; - - if (output_.subSequenceStartPositions) { - int* starts = output_.subSequenceStartPositions->getMutableData(false); - // Gather generator.idsVec - // if is beam search generation result. Get first result. - if (idReal->getData()[idReal->getSize() - 1] == -1) { - for (size_t i = 0; i < realLayers_.size(); ++i) { - // The first element stores first result size - idReal = realLayers_[i]->getOutputLabel(); - idReal->subVecFrom(*idReal, 1, idReal->getData()[0]); - } - } - for (size_t i = 0; i < realLayers_.size(); ++i) { - CHECK(realLayers_[i]->getOutputLabel()); - starts[i] = height; - height += realLayers_[i]->getOutputLabel()->getSize(); - } - starts[realLayers_.size()] = height; - output_.sequenceStartPositions->getMutableData(false)[1] = height; - - IVector::resizeOrCreate(output_.ids, height, false); - for (size_t i = 0; i < realLayers_.size(); ++i) { - output_.ids->subVec(starts[i], starts[i + 1] - starts[i]) - ->copyFrom(*realLayers_[i]->getOutputLabel()); - } - } else { - LOG(FATAL) << "Not implemented"; - } -} - -void ScatterAgentLayer::forwardSequence(PassType passType) { +void ScatterAgentLayer::forwardWithSelection(PassType passType) { Layer::forward(passType); CHECK_EQ(realLayer_->getDeviceId(), this->getDeviceId()); @@ -220,17 +220,19 @@ void ScatterAgentLayer::forwardSequence(PassType passType) { AsyncGpuBlock asyncGpuBlock; REGISTER_TIMER_INFO("SequenceAgentLayerForward", getName().c_str()); - if (realOutArg_.value || realOutArg_.ids) { - CHECK(realOutArg_.sequenceStartPositions); - output_.subArgFrom(realOutArg_, - /* offset */ idIndex_, - idSize_, - width, - useGpu_, - /* trans */ false, - /* seqFlag */ true, - /* seqStart */ seqStartPosIndex_, - /* seqSize */ numSequences_); + if (!input.hasSeq()) { + if (realLayer_->getOutput().ids) { + IVector::resizeOrCreate(output_.ids, ids_->getSize(), useGpu_); + output_.ids->selectFrom(*realLayer_->getOutput().ids, *ids_); + } + if (realLayer_->getOutput().value) { + int height = ids_->getSize(); + resetOutput(height, width); + + const MatrixPtr& outV = getOutputValue(); + const MatrixPtr& realV = realLayer_->getOutputValue(); + outV->selectRows(*realV, *ids_); + } } else { // Putting the generation logic here is really an ugly hack! // used in generation diff --git a/paddle/gserver/layers/AgentLayer.h b/paddle/gserver/layers/AgentLayer.h index 461b84b17e..29681b29c6 100644 --- a/paddle/gserver/layers/AgentLayer.h +++ b/paddle/gserver/layers/AgentLayer.h @@ -110,6 +110,9 @@ protected: // of real layer. ICpuGpuVectorPtr inputStartPos_; + // true for setRealLayer, false for setRealLayerAndOutput + bool selectionMode_; + public: explicit ScatterAgentLayer(const LayerConfig& config) : Layer(config) {} @@ -137,6 +140,7 @@ public: } else { cpuIds_ = ids_; } + selectionMode_ = true; } // set real layer and output, [idIndex, idIndex + idSize) of *ids* @@ -153,6 +157,7 @@ public: idIndex_ = idIndex; idSize_ = idSize; handleBackward_ = handleBackward; + selectionMode_ = false; } void setSequenceStartPositions(const ICpuGpuVectorPtr& sequenceStartPositions, @@ -166,7 +171,7 @@ public: void forward(PassType passType) override; void backward(const UpdateCallback& callback) override; - void forwardSequence(PassType passType); + void forwardWithSelection(PassType passType); }; } // namespace paddle diff --git a/paddle/gserver/tests/test_Evaluator.cpp b/paddle/gserver/tests/test_Evaluator.cpp index 4f5fdbb37c..93996392d2 100644 --- a/paddle/gserver/tests/test_Evaluator.cpp +++ b/paddle/gserver/tests/test_Evaluator.cpp @@ -138,6 +138,23 @@ void testEvaluatorAll(TestConfig testConf, testEvaluator(testConf, testEvaluatorName, batchSize, false); } +TEST(Evaluator, detection_map) { + TestConfig config; + config.evaluatorConfig.set_type("detection_map"); + config.evaluatorConfig.set_overlap_threshold(0.5); + config.evaluatorConfig.set_background_id(0); + config.evaluatorConfig.set_ap_type("Integral"); + config.evaluatorConfig.set_evaluate_difficult(0); + + config.inputDefs.push_back({INPUT_DATA, "output", 7}); + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "label", 6}); + config.evaluatorConfig.set_evaluate_difficult(false); + testEvaluatorAll(config, "detection_map", 100); + + config.evaluatorConfig.set_evaluate_difficult(true); + testEvaluatorAll(config, "detection_map", 100); +} + TEST(Evaluator, classification_error) { TestConfig config; config.evaluatorConfig.set_type("classification_error"); diff --git a/paddle/memory/README.md b/paddle/memory/README.md new file mode 100644 index 0000000000..e5f7880e4c --- /dev/null +++ b/paddle/memory/README.md @@ -0,0 +1,139 @@ +## Design + +### Usage + +To allocate 4KB CPU memory: + +```cpp +p = memory::Alloc(platform::CPUPlace(), 4*1024); +``` + +To allocate 4KB memory on the 3rd GPU: + +```cpp +p = memory::Alloc(platform::GPUPlace(2), 4*1024); +``` + +To free memory and check the so-far used amount of memory on a place: + +```cpp +auto pl = platform::GPUPlace(0); +p = memory::Alloc(pl, 4*1024); +cout << memory::Used(pl); +memory::Free(pl, p); +``` + +### API + +In `paddle/memory/memory.h` we have: + +```cpp +namespace memory { +template void* Alloc(Place, size_t); +template void Free(Place, void*); +template size_t Used(Place); +} // namespace memory +``` + +These function templates have specializations on either `platform::CPUPlace` or `platform::GPUPlace`: + +```cpp +template<> +void* Alloc(CPUPlace p, size_t size) { + return GetCPUBuddyAllocator()->Alloc(size); +} +``` + +and + +```cpp +template<> +void Alloc(GPUPlace p, size_t size) { + return GetGPUBuddyAllocator(p.id)->Alloc(size); +} +``` + +Similar specializations exist for `Free` and `Used`. + +### Implementation + +`GetCPUBuddyAllocator` and `GetGPUBuddyAllocator` are singletions. + +```cpp +BuddyAllocator* GetCPUBuddyAllocator() { + static BuddyAllocator* a = NULL; + if (a == NULL) { + a = new BuddyAllocator(new CPUAllocator /*backup allocator*/, ...); + } + return a; +} + +BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { + static BuddyAllocator* as = NULL; + if (as == NULL) { + as = new BuddyAllocator*[platform::NumGPUs()]; + for (int gpu = 0; gpu < platform::NumGPUs(); gpu++) { + as[gpu] = new BuddyAllocator(new GPUAllocator(gpu) /* backup allocator */, ...); + } + } + return as[gpu_id); +``` + +#### `BuddyAllocator` + +`BuddyAllocator` implements the buddy allocation algorithm. Its constructor takes parameters only related with the algorithm: + +```cpp +BuddyAllocator::BuddyAllocator(initial_pool_size, max_pool_size) { + ... +} +``` + +Please be aware that **`BuddyAllocator` always allocate aligned memory**, aligned on 32-bytes, which can hold a `BuddyAllocator::Block` object: + +```cpp +class BuddyAllocator { + private: + struct Block { + size_t size; + Block* left, right; + }; + ... +}; +``` + +Because BuddyAllocator has the meta-data of each block, it can trace the used memory -- record the amount returned by `Alloc` freed in `Free`. Instead, `CPUAllocator` and `GPUAllocator` doesn't know the size of freed memory block and cannot do the trace. + +#### System Allocators + +The `GPUAllocator` and `CPUAllocator` are calls *system allocators*. They work as the fallback allocators of `BuddyAllocator`. + +## Justification + +I got inspiration from Majel and Caffe2, though above design look different from both. + +### Caffe2 + +In Caffe2, `Tensor::mutable_data()` allocates the memroy. In particular, [`Tensor::mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L523) calls [`Tensor::raw_mutable_data`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L459), which in turn calls [`Context::New`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/tensor.h#L479). + +There are two implementations of `Context`: + +1. [`CPUContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L105), whose [`New` method](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.h#L131) calls [`g_cpu_allocator.get()->New(size_t)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context.cc#L15) to allocate the memory. + +1. [`CUDAContext`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L99), which has a data member [`int gpu_id_`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.h#L202). This looks very similar to class `majel::GPUPlace`, who also has an `int id_` data member. `CUDAContext::New(size_t)` calls [`g_cub_allocator->DeviceAllocate(&ptr, nbytes)`](https://github.com/caffe2/caffe2/blob/v0.7.0/caffe2/core/context_gpu.cu#L355) to allocate the memory. + +### Majel + +In Majel, there are basically two allocator types: + +1. `cpu::SystemAllocator`, which has similar functionality to `caffe2::CPUContext::New/Delete`. +1. `gpu::SystemAllocator`, which has similar functionality to `caffe2::CUDAContext::New/Delete`. + +However, memory allocation is not via these two allocators. Instead, these two allocators are defined in hidden namespaces. + +In Majel there are hidden global variables like: + +1. `cpu::SystemAllocator g_cpu_allocator`, and +1. `vector g_gpu_allocators(NUM_GPUS)`. + +Programs allocate memory via a BuddyAllocator, which can take the `g_cpu_allocator` or a `g_gpu_allocators[gpu_id]` as its *fallback allocator*, so that if BuddyAllocator cannot find a block in its memory pool, it extends its memory pool by calling the fallback allocator's `New(size_t)`. diff --git a/paddle/parameter/ParameterUpdaterHook.cpp b/paddle/parameter/ParameterUpdaterHook.cpp index f826e8448c..c8b47687f5 100644 --- a/paddle/parameter/ParameterUpdaterHook.cpp +++ b/paddle/parameter/ParameterUpdaterHook.cpp @@ -14,11 +14,13 @@ limitations under the License. */ #include "ParameterUpdaterHook.h" +#include #include #include #include #include #include +#include #include "paddle/math/Vector.h" #include "paddle/parameter/Parameter.h" @@ -29,106 +31,76 @@ namespace paddle { /** * The static pruning hook - * - * Static means user load a mask map before training started. This map will - * define which link/weight between neural is disabled. + * Static means user specify a sparsity_ratio before training started, and the + * network will prune the parameters based on the sparsity_ratio. More details + * can be found https://arxiv.org/pdf/1506.02626.pdf. */ + class StaticPruningHook : public IParameterUpdaterHook { public: - /** - * The Mask Map Header. - * The map file started with this header. - * - * In Version 0, reset file will be: - * contains header.size bit, each bit means such weight is enabled or not. - * if bit is 1, then such weight is enabled. - * at end, the file will round to byte, and the low bits of end byte will be - * filled by zero. - * - */ - struct StaticMaskHeader { - uint32_t version; - size_t size; - } __attribute__((__packed__)); - - explicit StaticPruningHook(const std::string& mask_filename) : initCount_(0) { - bool ok = this->loadMaskFile(mask_filename); - if (!ok) { - LOG(WARNING) << "Fail to load mask file " << mask_filename - << " in current directory, searching in init_model_path"; - std::string combineMaskFilename = - path::join(FLAGS_init_model_path, mask_filename); - CHECK(this->loadMaskFile(combineMaskFilename)) - << "Cannot load " << mask_filename << " in ./" << mask_filename - << " and " << combineMaskFilename; - } - VLOG(3) << mask_filename << " mask size = " << this->mask_.size(); + explicit StaticPruningHook(const ParameterUpdaterHookConfig &hookConfig) + : initCount_(0) { + sparsityRatio_ = hookConfig.sparsity_ratio(); } - void update(Parameter* para) { + static bool sortPairAscend(const std::pair &pair1, + const std::pair &pair2) { + return pair1.first > pair2.first; + } + + void update(Parameter *para) { updateThreadChecker_.check(); - auto& vec = para->getBuf(PARAMETER_GRADIENT); + auto &vec = para->getBuf(PARAMETER_GRADIENT); if (vec) { vec->dotMul(*maskVec_); } } - void init(Parameter* para) { - size_t initCount = this->initCount_.fetch_add(1); - CHECK_EQ(initCount, 0UL) << "Currently the StaticPruningHook must invoke " - "in same ParamterUpdater"; - VLOG(3) << "Initialize Parameter " << para; - SetDevice device(para->getDeviceId()); + void generateMask(Parameter *para) { + VectorPtr maskTemp = Vector::create(para->getSize(), false); + maskTemp->zeroMem(); + real *maskTempData = maskTemp->getData(); + size_t nonZeroNum = para->getSize() * (1 - sparsityRatio_); - auto maskVec = Vector::create(this->mask_.size(), false); - { // Initialize maskVec with float mask vector - real* dataPtr = maskVec->getData(); - size_t i = 0; - for (bool m : mask_) { - dataPtr[i++] = m ? 1.0 : 0.0; - } - } + VectorPtr paraVec = para->getBuf(PARAMETER_VALUE); + VectorPtr paraCpuCopy = Vector::create(para->getSize(), false); + + paraCpuCopy->copyFrom(*paraVec); + std::vector> param; + + for (size_t i = 0; i < para->getSize(); i++) + param.push_back(std::make_pair(fabs(paraCpuCopy->getData()[i]), i)); + + std::partial_sort( + param.begin(), param.begin() + nonZeroNum, param.end(), sortPairAscend); + for (size_t i = 0; i < nonZeroNum; i++) maskTempData[param[i].second] = 1.0; // Currently just use a mask vector for hack. - // @TODO(yuyang18): Implemented the mask operation in vector. if (para->useGpu()) { - maskVec_ = Vector::create(this->mask_.size(), para->useGpu()); - maskVec_->copyFrom(*maskVec); + maskVec_ = Vector::create(para->getSize(), para->useGpu()); + maskVec_->copyFrom(*maskTemp); } else { - maskVec_ = maskVec; + maskVec_ = maskTemp; } - - auto& vec = para->getBuf(PARAMETER_VALUE); - vec->dotMul(*maskVec_); } -private: - bool loadMaskFile(const std::string& mask_filename) { - std::ifstream fin; - fin.open(mask_filename); - if (fin.is_open()) { - StaticMaskHeader header; - fin.read(reinterpret_cast(&header), sizeof(StaticMaskHeader)); - CHECK_EQ(header.version, 0UL); - mask_.resize(header.size); - uint8_t buf; - for (size_t i = 0; i < header.size; ++i, buf <<= 1) { - if (i % 8 == 0) { - fin.read(reinterpret_cast(&buf), sizeof(uint8_t)); - } - mask_[i] = buf & 0x80; - } - fin.close(); - return true; - } else { - return false; - } + void init(Parameter *para) { + generateMask(para); + size_t initCount = this->initCount_.fetch_add(1); + CHECK_EQ(initCount, 0UL) << "Currently the StaticPruningHook must invoke " + "in same ParamterUpdater"; + VLOG(3) << "Initialize Parameter " << para; + SetDevice device(para->getDeviceId()); + + auto ¶Vec = para->getBuf(PARAMETER_VALUE); + paraVec->dotMul(*maskVec_); } +private: SameThreadChecker updateThreadChecker_; std::atomic initCount_; VectorPtr maskVec_; - std::vector mask_; + real sparsityRatio_; }; IParameterUpdaterHook::IParameterUpdaterHook() {} @@ -145,7 +117,7 @@ IParameterUpdaterHook::~IParameterUpdaterHook() {} */ class StringIntPairHasher { public: - size_t operator()(const std::pair& k) const { + size_t operator()(const std::pair &k) const { return intHasher_(strHasher_(k.first) + k.second); } @@ -162,19 +134,19 @@ static WeakKVCache, /** * ParameterUpdaterHook actually factory method. */ -static IParameterUpdaterHook* createImpl( - const ParameterUpdaterHookConfig& config) { - auto& type = config.type(); +static IParameterUpdaterHook *createImpl( + const ParameterUpdaterHookConfig &config) { + auto &type = config.type(); if (type == "pruning") { - if (config.has_purning_mask_filename()) { - return new StaticPruningHook(config.purning_mask_filename()); - } + return new StaticPruningHook(config); } + + LOG(FATAL) << "Unknown Hook type: " << type; return nullptr; } std::shared_ptr IParameterUpdaterHook::create( - const ParameterConfig& paramConfig, int idx) { + const ParameterConfig ¶mConfig, int idx) { std::pair key = {paramConfig.name(), idx}; return g_hookCache_.get( key, [&] { return createImpl(paramConfig.update_hooks(idx)); }); diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index c7d7b14518..7abe2ab89e 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -2,3 +2,4 @@ nv_test(cuda_test SRCS cuda_test.cu) cc_library(place SRCS place.cc) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) +cc_test(must_check_test SRCS must_check_test.cc) diff --git a/paddle/utils/Compiler.h b/paddle/platform/must_check.h similarity index 78% rename from paddle/utils/Compiler.h rename to paddle/platform/must_check.h index cebca5a2a3..4fcc62afc0 100644 --- a/paddle/utils/Compiler.h +++ b/paddle/platform/must_check.h @@ -10,24 +10,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -/** - * This header defines some useful attribute by each compiler. It is the - * abstract layer of compilers. - */ -#ifdef __GNUC__ -#define GCC_VERSION \ - (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#else -#define GCC_VERSION -#endif - /** * __must_check macro. It make the function's return value must be used, * otherwise it will raise a compile warning. And also Paddle treat all compile * warnings as errors. */ -#if GCC_VERSION >= 30400 +#ifdef __GNUC__ +#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 30400 #define __must_check __attribute__((warn_unused_result)) #else #define __must_check #endif +#else +#define __must_check +#endif diff --git a/paddle/platform/must_check_test.cc b/paddle/platform/must_check_test.cc new file mode 100644 index 0000000000..6ee3ea49ac --- /dev/null +++ b/paddle/platform/must_check_test.cc @@ -0,0 +1,10 @@ +#include +#include + +int __must_check SomeFunctionMustCheck() { return 0; } + +TEST(MustCheck, all) { + // This line should not be compiled, because the + // return value of SomeFunctionMustCheck marked as __must_check + // SomeFunctionMustCheck(); +} \ No newline at end of file diff --git a/paddle/scripts/travis/build_and_test.sh b/paddle/scripts/travis/build_and_test.sh deleted file mode 100755 index f2cbc56165..0000000000 --- a/paddle/scripts/travis/build_and_test.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -source ./common.sh - -NPROC=1 -export PYTHONPATH=/opt/python/2.7.12/lib/python2.7/site-packages -export PYTHONHOME=/opt/python/2.7.12 -export PATH=/opt/python/2.7.12/bin:${PATH} -cmake .. -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DON_TRAVIS=ON -DWITH_COVERAGE=ON -DCOVERALLS_UPLOAD=ON ${EXTRA_CMAKE_OPTS} -NRPOC=`nproc` -make -j $NPROC -make coveralls -sudo make install diff --git a/paddle/scripts/travis/docs.sh b/paddle/scripts/travis/build_doc.sh similarity index 84% rename from paddle/scripts/travis/docs.sh rename to paddle/scripts/travis/build_doc.sh index c784293695..a44bd35357 100755 --- a/paddle/scripts/travis/docs.sh +++ b/paddle/scripts/travis/build_doc.sh @@ -1,15 +1,19 @@ #!/bin/bash +set -e + +# Create the build directory for CMake. +mkdir -p $TRAVIS_BUILD_DIR/build +cd $TRAVIS_BUILD_DIR/build -# Add set -e, cd to directory. -source ./common.sh # Compile Documentation only. -cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF ${EXTRA_CMAKE_OPTS} +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=OFF -DWITH_STYLE_CHECK=OFF + mkdir output make -j `nproc` find .. -name '*whl' | xargs pip install # install all wheels. rm -rf * -cmake .. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_Fortran_COMPILER=/usr/bin/gfortran-4.8 -DWITH_GPU=OFF -DWITH_DOC=ON ${EXTRA_CMAKE_OPTS} -make paddle_docs paddle_docs_cn +cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_DOC=ON +make -j `nproc` paddle_docs paddle_docs_cn # check websites for broken links linkchecker doc/en/html/index.html diff --git a/paddle/scripts/travis/precommit.sh b/paddle/scripts/travis/check_style.sh similarity index 54% rename from paddle/scripts/travis/precommit.sh rename to paddle/scripts/travis/check_style.sh index 7a59b1131d..4754bdd4c8 100755 --- a/paddle/scripts/travis/precommit.sh +++ b/paddle/scripts/travis/check_style.sh @@ -1,14 +1,14 @@ #!/bin/bash function abort(){ - echo "Your commit not fit PaddlePaddle code style" 1>&2 - echo "Please use pre-commit scripts to auto-format your code" 1>&2 + echo "Your change doesn't follow PaddlePaddle's code style." 1>&2 + echo "Please use pre-commit to reformat your code and git push again." 1>&2 exit 1 } trap 'abort' 0 set -e -source common.sh -cd .. + +cd $TRAVIS_BUILD_DIR export PATH=/usr/bin:$PATH pre-commit install clang-format --version diff --git a/paddle/scripts/travis/common.sh b/paddle/scripts/travis/common.sh deleted file mode 100755 index f05c7530a3..0000000000 --- a/paddle/scripts/travis/common.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e -mkdir -p ../../../build -cd ../../../build -mkdir -p $HOME/third_party -EXTRA_CMAKE_OPTS="-DTHIRD_PARTY_PATH=${HOME}/third_party" diff --git a/paddle/scripts/travis/main.sh b/paddle/scripts/travis/main.sh deleted file mode 100755 index 13f2552d29..0000000000 --- a/paddle/scripts/travis/main.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -cd `dirname $0` - -if [ ${JOB} == "BUILD_AND_TEST" ]; then - ./build_and_test.sh -elif [ ${JOB} == "DOCS" ]; then - ./docs.sh -elif [ ${JOB} == "PRE_COMMIT" ]; then - ./precommit.sh -else - echo Unknown job ${JOB} - exit 1 -fi diff --git a/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf b/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf index d669fbc40c..741a0aa71d 100644 --- a/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf +++ b/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf @@ -35,7 +35,7 @@ def outer_step(dummy_data): embedding_size=num_words)] def inner_step(dummy_memory, predict_word): - + # simplified RNN for testing with mixed_layer(size=num_words) as layer: layer += full_matrix_projection(input=predict_word, @@ -46,15 +46,15 @@ def outer_step(dummy_data): param_attr=ParamAttr(name="wordvec")) return out - + beam_gen = beam_search(name="rnn_gen", step=inner_step, input=gen_inputs, bos_id=0, eos_id=num_words-1, beam_size=2 if beam_flag else 1, - num_results_per_sample=2 if beam_flag else 1, - max_length=10) + num_results_per_sample=1, + max_length=10) return beam_gen beam_gen_concat = recurrent_group(name="rnn_gen_concat", diff --git a/paddle/trainer/tests/sample_trainer_rnn_gen.conf b/paddle/trainer/tests/sample_trainer_rnn_gen.conf index 2b337282f6..58d27f15ae 100644 --- a/paddle/trainer/tests/sample_trainer_rnn_gen.conf +++ b/paddle/trainer/tests/sample_trainer_rnn_gen.conf @@ -33,7 +33,7 @@ gen_inputs = [StaticInput(input=dummy_data, size=2), embedding_size=num_words)] def step(dummy_memory, predict_word): - + # simplified RNN for testing with mixed_layer(size=num_words) as layer: layer += full_matrix_projection(input=predict_word, @@ -44,7 +44,7 @@ def step(dummy_memory, predict_word): param_attr=ParamAttr(name="wordvec")) return out - + beam_gen = beam_search(name="rnn_gen", step=step, input=gen_inputs, @@ -52,7 +52,7 @@ beam_gen = beam_search(name="rnn_gen", eos_id=num_words-1, beam_size=2 if beam_flag else 1, num_results_per_sample=2 if beam_flag else 1, - max_length=10) + max_length=10) seqtext_printer_evaluator(input=beam_gen, id_input=sent_id, diff --git a/paddle/utils/CustomStackTrace.h b/paddle/utils/CustomStackTrace.h index 6992e85622..52a6df9497 100644 --- a/paddle/utils/CustomStackTrace.h +++ b/paddle/utils/CustomStackTrace.h @@ -55,13 +55,17 @@ public: * Else, just set status to popping. */ void pop(const T& item) { - pushing() = false; auto& s = this->stack(); if (item == s.top()) { s.pop(); } } + /** + * @brief Indicate whether we are at forward or backward stage of computation + */ + void set_stage(bool isForward) { pushing() = isForward; } + /** * @brief clear current thread stack. */ diff --git a/paddle/utils/Error.h b/paddle/utils/Error.h index cda1b5c37d..f3d535c69c 100644 --- a/paddle/utils/Error.h +++ b/paddle/utils/Error.h @@ -19,7 +19,7 @@ limitations under the License. */ #include #include #include -#include "Compiler.h" +#include "paddle/platform/must_check.h" namespace paddle { diff --git a/paddle/utils/tests/test_CustomStackTrace.cpp b/paddle/utils/tests/test_CustomStackTrace.cpp index b5d9f93f13..c320074fba 100644 --- a/paddle/utils/tests/test_CustomStackTrace.cpp +++ b/paddle/utils/tests/test_CustomStackTrace.cpp @@ -72,7 +72,6 @@ TEST(CustomStackTrace, normalTrain) { for (size_t i = 0; i < layerSize; ++i) { tracer.push("layer_" + paddle::str::to_string(i)); } - tracer.pop(""); for (size_t i = 0; i < layerSize; ++i) { tracer.pop("layer_" + paddle::str::to_string(layerSize - 1 - i)); } diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 29270829bb..ebe4f5cbb5 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -489,6 +489,15 @@ message EvaluatorConfig { // Used by ClassificationErrorEvaluator // top # classification error optional int32 top_k = 13 [default = 1]; + + // Used by DetectionMAPEvaluator + optional double overlap_threshold = 14 [default = 0.5]; + + optional int32 background_id = 15 [default = 0]; + + optional bool evaluate_difficult = 16 [default = false]; + + optional string ap_type = 17 [default = "11point"]; } message LinkConfig { diff --git a/proto/ParameterConfig.proto b/proto/ParameterConfig.proto index cbcd0af598..580d663246 100644 --- a/proto/ParameterConfig.proto +++ b/proto/ParameterConfig.proto @@ -25,8 +25,10 @@ enum ParameterInitStrategy { } message ParameterUpdaterHookConfig { + // hook type such as 'pruning' required string type = 1; - optional string purning_mask_filename = 2; + // this represents the ratio of zero element to be set by the Parameter + optional double sparsity_ratio = 2 [default = 0.6]; } message ParameterConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index c11dc09a8b..58e4902f57 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1280,20 +1280,23 @@ def parse_maxout(maxout, input_layer_name, maxout_conf): # Define an evaluator @config_func -def Evaluator( - name, - type, - inputs, - chunk_scheme=None, - num_chunk_types=None, - classification_threshold=None, - positive_label=None, - dict_file=None, - result_file=None, - num_results=None, - top_k=None, - delimited=None, - excluded_chunk_types=None, ): +def Evaluator(name, + type, + inputs, + chunk_scheme=None, + num_chunk_types=None, + classification_threshold=None, + positive_label=None, + dict_file=None, + result_file=None, + num_results=None, + top_k=None, + delimited=None, + excluded_chunk_types=None, + overlap_threshold=None, + background_id=None, + evaluate_difficult=None, + ap_type=None): evaluator = g_config.model_config.evaluators.add() evaluator.type = type evaluator.name = MakeLayerNameInSubmodel(name) @@ -1327,6 +1330,18 @@ def Evaluator( if excluded_chunk_types: evaluator.excluded_chunk_types.extend(excluded_chunk_types) + if overlap_threshold is not None: + evaluator.overlap_threshold = overlap_threshold + + if background_id is not None: + evaluator.background_id = background_id + + if evaluate_difficult is not None: + evaluator.evaluate_difficult = evaluate_difficult + + if ap_type is not None: + evaluator.ap_type = ap_type + class LayerBase(object): def __init__( @@ -3124,11 +3139,11 @@ def Layer(name, type, **xargs): @config_func def ParameterHook(type, **kwargs): if type == 'pruning': - mask_filename = kwargs.get('mask_filename', None) - assert mask_filename is not None hook = ParameterUpdaterHookConfig() hook.type = type - hook.purning_mask_filename = mask_filename + sparsity_ratio = kwargs.get('sparsity_ratio', None) + if sparsity_ratio is not None: + hook.sparsity_ratio = sparsity_ratio return hook else: return None @@ -3236,13 +3251,13 @@ def Parameter(name, if update_hooks is not None: if hasattr(update_hooks, '__call__'): - update_hooks = update_hooks(para.name) + update_hooks = update_hooks() if isinstance(update_hooks, list): for hook in update_hooks: para.update_hooks.extend([hook]) else: - para.update_hooks.extend(update_hooks) + para.update_hooks.extend([update_hooks]) g_parameter_map[name] = para if initializer is not None: diff --git a/python/paddle/trainer_config_helpers/attrs.py b/python/paddle/trainer_config_helpers/attrs.py index 4100697c9c..9b9f979bb6 100644 --- a/python/paddle/trainer_config_helpers/attrs.py +++ b/python/paddle/trainer_config_helpers/attrs.py @@ -14,7 +14,8 @@ from paddle.trainer.config_parser import * __all__ = [ - 'ParamAttr', 'ExtraAttr', 'ParameterAttribute', 'ExtraLayerAttribute' + 'HookAttr', 'ParamAttr', 'ExtraAttr', 'ParameterAttribute', + 'ExtraLayerAttribute' ] @@ -55,6 +56,40 @@ def is_compatible_with(x, Type): return False +class HookAttribute(object): + """ + Hook Attribute object. As a member of ParameterAttribute class, the hook is an auxiliary operation that occurs + during training process of a layer with parameters, such as img_conv layer, fc layer. + + :param type: Hook type, currently supported types: + 'pruning' : user specify a sparsity_ratio before training started, and the + network will prune the parameters based on the sparsity_ratio. + eg: The definition of Hook object can be hk = HookAttribute('pruning', 0.6) + The specific usage can be paddle.layer.img_conv(input=img, filter_size=3, + num_channels=3, num_filters=64, + param_attr=ParameterAttribute(update_hooks=hk) ) + The pruning details can be found https://arxiv.org/pdf/1506.02626.pdf + :type type: string + + :param sparsity_ratio: Must be specified if hook type is 'pruning', + it represents the ratio of the zero elements to be set by the Parameter. + :type sparsity_ratio: float or None + + """ + + def __init__(self, type, sparsity_ratio=None): + self.type = type + self.sparsity_ratio = sparsity_ratio + if self.sparsity_ratio is not None: + assert is_compatible_with( + self.sparsity_ratio, + float), 'sparisity_ratio must be float type' + assert self.sparsity_ratio <= 1 and self.sparsity_ratio >= 0, 'sparsity_ratio must be a float between [0, 1] ' + + def __call__(self): + return ParameterHook(self.type, sparsity_ratio=self.sparsity_ratio) + + class ParameterAttribute(object): """ Parameter Attributes object. To fine-tuning network training process, user @@ -114,6 +149,7 @@ class ParameterAttribute(object): momentum=None, gradient_clipping_threshold=None, sparse_update=False, + update_hooks=None, initializer=None): self.attr = {} @@ -169,6 +205,9 @@ class ParameterAttribute(object): if initializer is not None: self.attr['initializer'] = initializer + if update_hooks: + self.attr['update_hooks'] = update_hooks + def set_default_parameter_name(self, name): """ Set default parameter name. If parameter not set, then will use default @@ -244,5 +283,6 @@ class ExtraLayerAttribute(object): return attr.attr +HookAttr = HookAttribute ParamAttr = ParameterAttribute ExtraAttr = ExtraLayerAttribute diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py index a5234f3e47..44d52edfa7 100644 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ b/python/paddle/trainer_config_helpers/evaluators.py @@ -21,7 +21,8 @@ __all__ = [ "chunk_evaluator", "sum_evaluator", "column_sum_evaluator", "value_printer_evaluator", "gradient_printer_evaluator", "maxid_printer_evaluator", "maxframe_printer_evaluator", - "seqtext_printer_evaluator", "classification_error_printer_evaluator" + "seqtext_printer_evaluator", "classification_error_printer_evaluator", + "detection_map_evaluator" ] @@ -31,10 +32,11 @@ class EvaluatorAttribute(object): FOR_RANK = 1 << 2 FOR_PRINT = 1 << 3 FOR_UTILS = 1 << 4 + FOR_DETECTION = 1 << 5 KEYS = [ "for_classification", "for_regression", "for_rank", "for_print", - "for_utils" + "for_utils", "for_detection" ] @staticmethod @@ -57,22 +59,25 @@ def evaluator(*attrs): return impl -def evaluator_base( - input, - type, - label=None, - weight=None, - name=None, - chunk_scheme=None, - num_chunk_types=None, - classification_threshold=None, - positive_label=None, - dict_file=None, - result_file=None, - num_results=None, - delimited=None, - top_k=None, - excluded_chunk_types=None, ): +def evaluator_base(input, + type, + label=None, + weight=None, + name=None, + chunk_scheme=None, + num_chunk_types=None, + classification_threshold=None, + positive_label=None, + dict_file=None, + result_file=None, + num_results=None, + delimited=None, + top_k=None, + excluded_chunk_types=None, + overlap_threshold=None, + background_id=None, + evaluate_difficult=None, + ap_type=None): """ Evaluator will evaluate the network status while training/testing. @@ -107,6 +112,14 @@ def evaluator_base( :type weight: LayerOutput. :param top_k: number k in top-k error rate :type top_k: int + :param overlap_threshold: In detection tasks to filter detection results + :type overlap_threshold: float + :param background_id: Identifier of background class + :type background_id: int + :param evaluate_difficult: Whether to evaluate difficult objects + :type evaluate_difficult: bool + :param ap_type: How to calculate average persicion + :type ap_type: str """ # inputs type assertions. assert classification_threshold is None or isinstance( @@ -136,7 +149,61 @@ def evaluator_base( delimited=delimited, num_results=num_results, top_k=top_k, - excluded_chunk_types=excluded_chunk_types, ) + excluded_chunk_types=excluded_chunk_types, + overlap_threshold=overlap_threshold, + background_id=background_id, + evaluate_difficult=evaluate_difficult, + ap_type=ap_type) + + +@evaluator(EvaluatorAttribute.FOR_DETECTION) +@wrap_name_default() +def detection_map_evaluator(input, + label, + overlap_threshold=0.5, + background_id=0, + evaluate_difficult=False, + ap_type="11point", + name=None): + """ + Detection mAP Evaluator. It will print mean Average Precision (mAP) for detection. + + The detection mAP Evaluator based on the output of detection_output layer counts + the true positive and the false positive bbox and integral them to get the + mAP. + + The simple usage is: + + .. code-block:: python + + eval = detection_map_evaluator(input=det_output,label=lbl) + + :param input: Input layer. + :type input: LayerOutput + :param label: Label layer. + :type label: LayerOutput + :param overlap_threshold: The bbox overlap threshold of a true positive. + :type overlap_threshold: float + :param background_id: The background class index. + :type background_id: int + :param evaluate_difficult: Whether evaluate a difficult ground truth. + :type evaluate_difficult: bool + """ + if not isinstance(input, list): + input = [input] + + if label: + input.append(label) + + evaluator_base( + name=name, + type="detection_map", + input=input, + label=label, + overlap_threshold=overlap_threshold, + background_id=background_id, + evaluate_difficult=evaluate_difficult, + ap_type=ap_type) @evaluator(EvaluatorAttribute.FOR_CLASSIFICATION) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index b8ce0373c0..84ed160773 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3839,7 +3839,8 @@ def classification_cost(input, weight=None, name=None, evaluator=classification_error_evaluator, - layer_attr=None): + layer_attr=None, + coeff=1.): """ classification cost Layer. @@ -3855,6 +3856,8 @@ def classification_cost(input, :param evaluator: Evaluator method. :param layer_attr: layer's extra attribute. :type layer_attr: ExtraLayerAttribute + :param coeff: The coefficient affects the gradient in the backward. + :type coeff: float :return: LayerOutput object. :rtype: LayerOutput """ @@ -3868,6 +3871,7 @@ def classification_cost(input, name=name, type="multi-class-cross-entropy", inputs=ipts, + coeff=coeff, **ExtraLayerAttribute.to_kwargs(layer_attr)) def __add_evaluator__(e): diff --git a/python/paddle/v2/attr.py b/python/paddle/v2/attr.py index 32f78614e7..5d23894d73 100644 --- a/python/paddle/v2/attr.py +++ b/python/paddle/v2/attr.py @@ -17,10 +17,12 @@ import paddle.trainer_config_helpers.attrs __all__ = [ "Param", "Extra", + "Hook", ] Param = paddle.trainer_config_helpers.attrs.ParameterAttribute Extra = paddle.trainer_config_helpers.attrs.ExtraLayerAttribute +Hook = paddle.trainer_config_helpers.attrs.HookAttribute for each in paddle.trainer_config_helpers.attrs.__all__: globals()[each] = getattr(paddle.trainer_config_helpers.attrs, each) diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 81af0a8e66..f885b2834e 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -31,10 +31,10 @@ images per class. import cPickle import itertools import numpy -from common import download +import paddle.v2.dataset.common import tarfile -__all__ = ['train100', 'test100', 'train10', 'test10'] +__all__ = ['train100', 'test100', 'train10', 'test10', 'convert'] URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/' CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' @@ -75,7 +75,8 @@ def train100(): :rtype: callable """ return reader_creator( - download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'train') + paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + 'train') def test100(): @@ -88,7 +89,9 @@ def test100(): :return: Test reader creator. :rtype: callable """ - return reader_creator(download(CIFAR100_URL, 'cifar', CIFAR100_MD5), 'test') + return reader_creator( + paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + 'test') def train10(): @@ -102,7 +105,8 @@ def train10(): :rtype: callable """ return reader_creator( - download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'data_batch') + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + 'data_batch') def test10(): @@ -116,9 +120,20 @@ def test10(): :rtype: callable """ return reader_creator( - download(CIFAR10_URL, 'cifar', CIFAR10_MD5), 'test_batch') + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), + 'test_batch') def fetch(): - download(CIFAR10_URL, 'cifar', CIFAR10_MD5) - download(CIFAR100_URL, 'cifar', CIFAR100_MD5) + paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) + paddle.v2.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train100(), 10, "cifar_train100") + paddle.v2.dataset.common.convert(path, test100(), 10, "cifar_test100") + paddle.v2.dataset.common.convert(path, train10(), 10, "cifar_train10") + paddle.v2.dataset.common.convert(path, test10(), 10, "cifar_test10") diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index e09ac1a7a0..4a2eb59c34 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -23,17 +23,24 @@ import paddle.v2.dataset import cPickle import glob -__all__ = ['DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader'] +__all__ = [ + 'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader', + 'convert' +] DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset') -if not os.path.exists(DATA_HOME): - try: - os.makedirs(DATA_HOME) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise - pass +# When running unit tests, there could be multiple processes that +# trying to create DATA_HOME directory simultaneously, so we cannot +# use a if condition to check for the existence of the directory; +# instead, we use the filesystem as the synchronization mechanism by +# catching returned errors. +try: + os.makedirs(DATA_HOME) +except OSError as exc: + if exc.errno != errno.EEXIST: + raise + pass def md5file(fname): diff --git a/python/paddle/v2/dataset/conll05.py b/python/paddle/v2/dataset/conll05.py index 12d648bf65..f8aae52e7c 100644 --- a/python/paddle/v2/dataset/conll05.py +++ b/python/paddle/v2/dataset/conll05.py @@ -23,9 +23,9 @@ to initialize SRL model. import tarfile import gzip import itertools -from common import download +import paddle.v2.dataset.common -__all__ = ['test, get_dict', 'get_embedding'] +__all__ = ['test, get_dict', 'get_embedding', 'convert'] DATA_URL = 'http://www.cs.upc.edu/~srlconll/conll05st-tests.tar.gz' DATA_MD5 = '387719152ae52d60422c016e92a742fc' @@ -182,9 +182,15 @@ def get_dict(): """ Get the word, verb and label dictionary of Wikipedia corpus. """ - word_dict = load_dict(download(WORDDICT_URL, 'conll05st', WORDDICT_MD5)) - verb_dict = load_dict(download(VERBDICT_URL, 'conll05st', VERBDICT_MD5)) - label_dict = load_dict(download(TRGDICT_URL, 'conll05st', TRGDICT_MD5)) + word_dict = load_dict( + paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', + WORDDICT_MD5)) + verb_dict = load_dict( + paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', + VERBDICT_MD5)) + label_dict = load_dict( + paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', + TRGDICT_MD5)) return word_dict, verb_dict, label_dict @@ -192,7 +198,7 @@ def get_embedding(): """ Get the trained word vector based on Wikipedia corpus. """ - return download(EMB_URL, 'conll05st', EMB_MD5) + return paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) def test(): @@ -209,15 +215,23 @@ def test(): """ word_dict, verb_dict, label_dict = get_dict() reader = corpus_reader( - download(DATA_URL, 'conll05st', DATA_MD5), + paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5), words_name='conll05st-release/test.wsj/words/test.wsj.words.gz', props_name='conll05st-release/test.wsj/props/test.wsj.props.gz') return reader_creator(reader, word_dict, verb_dict, label_dict) def fetch(): - download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) - download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) - download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) - download(EMB_URL, 'conll05st', EMB_MD5) - download(DATA_URL, 'conll05st', DATA_MD5) + paddle.v2.dataset.common.download(WORDDICT_URL, 'conll05st', WORDDICT_MD5) + paddle.v2.dataset.common.download(VERBDICT_URL, 'conll05st', VERBDICT_MD5) + paddle.v2.dataset.common.download(TRGDICT_URL, 'conll05st', TRGDICT_MD5) + paddle.v2.dataset.common.download(EMB_URL, 'conll05st', EMB_MD5) + paddle.v2.dataset.common.download(DATA_URL, 'conll05st', DATA_MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, test(), 10, "conl105_train") + paddle.v2.dataset.common.convert(path, test(), 10, "conl105_test") diff --git a/python/paddle/v2/dataset/imdb.py b/python/paddle/v2/dataset/imdb.py index 5dc5abfe53..c0ec5992e0 100644 --- a/python/paddle/v2/dataset/imdb.py +++ b/python/paddle/v2/dataset/imdb.py @@ -28,7 +28,7 @@ import re import string import threading -__all__ = ['build_dict', 'train', 'test'] +__all__ = ['build_dict', 'train', 'test', 'convert'] URL = 'http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz' MD5 = '7c2ac02c03563afcf9b574c7e56c153a' @@ -166,3 +166,12 @@ def word_dict(): def fetch(): paddle.v2.dataset.common.download(URL, 'imdb', MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + w = word_dict() + paddle.v2.dataset.common.convert(path, lambda: train(w), 10, "imdb_train") + paddle.v2.dataset.common.convert(path, lambda: test(w), 10, "imdb_test") diff --git a/python/paddle/v2/dataset/imikolov.py b/python/paddle/v2/dataset/imikolov.py index dd3a4552d2..b18ee8e9ba 100644 --- a/python/paddle/v2/dataset/imikolov.py +++ b/python/paddle/v2/dataset/imikolov.py @@ -22,7 +22,7 @@ import paddle.v2.dataset.common import collections import tarfile -__all__ = ['train', 'test', 'build_dict'] +__all__ = ['train', 'test', 'build_dict', 'convert'] URL = 'http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz' MD5 = '30177ea32e27c525793142b6bf2c8e2d' @@ -146,3 +146,15 @@ def test(word_idx, n, data_type=DataType.NGRAM): def fetch(): paddle.v2.dataset.common.download(URL, "imikolov", MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + N = 5 + word_dict = build_dict() + paddle.v2.dataset.common.convert(path, + train(word_dict, N), 10, "imikolov_train") + paddle.v2.dataset.common.convert(path, + test(word_dict, N), 10, "imikolov_test") diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 435556b292..ea5891f4f3 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -21,7 +21,7 @@ import paddle.v2.dataset.common import subprocess import numpy import platform -__all__ = ['train', 'test'] +__all__ = ['train', 'test', 'convert'] URL_PREFIX = 'http://yann.lecun.com/exdb/mnist/' TEST_IMAGE_URL = URL_PREFIX + 't10k-images-idx3-ubyte.gz' @@ -113,3 +113,11 @@ def fetch(): paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train(), 10, "minist_train") + paddle.v2.dataset.common.convert(path, test(), 10, "minist_test") diff --git a/python/paddle/v2/dataset/movielens.py b/python/paddle/v2/dataset/movielens.py index 837a859126..d9372d422a 100644 --- a/python/paddle/v2/dataset/movielens.py +++ b/python/paddle/v2/dataset/movielens.py @@ -23,14 +23,15 @@ set and test set into paddle reader creators. """ import zipfile -from common import download +import paddle.v2.dataset.common import re import random import functools __all__ = [ 'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id', - 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info' + 'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info', + 'convert' ] age_table = [1, 18, 25, 35, 45, 50, 56] @@ -99,7 +100,7 @@ USER_INFO = None def __initialize_meta_info__(): - fn = download(URL, "movielens", MD5) + fn = paddle.v2.dataset.common.download(URL, "movielens", MD5) global MOVIE_INFO if MOVIE_INFO is None: pattern = re.compile(r'^(.*)\((\d+)\)$') @@ -246,7 +247,15 @@ def unittest(): def fetch(): - download(URL, "movielens", MD5) + paddle.v2.dataset.common.download(URL, "movielens", MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train(), 10, "movielens_train") + paddle.v2.dataset.common.convert(path, test(), 10, "movielens_test") if __name__ == '__main__': diff --git a/python/paddle/v2/dataset/sentiment.py b/python/paddle/v2/dataset/sentiment.py index 4dd34e7383..e33f120c87 100644 --- a/python/paddle/v2/dataset/sentiment.py +++ b/python/paddle/v2/dataset/sentiment.py @@ -26,9 +26,9 @@ from itertools import chain import nltk from nltk.corpus import movie_reviews -import common +import paddle.v2.dataset.common -__all__ = ['train', 'test', 'get_word_dict'] +__all__ = ['train', 'test', 'get_word_dict', 'convert'] NUM_TRAINING_INSTANCES = 1600 NUM_TOTAL_INSTANCES = 2000 @@ -39,12 +39,13 @@ def download_data_if_not_yet(): """ try: # make sure that nltk can find the data - if common.DATA_HOME not in nltk.data.path: - nltk.data.path.append(common.DATA_HOME) + if paddle.v2.dataset.common.DATA_HOME not in nltk.data.path: + nltk.data.path.append(paddle.v2.dataset.common.DATA_HOME) movie_reviews.categories() except LookupError: print "Downloading movie_reviews data set, please wait....." - nltk.download('movie_reviews', download_dir=common.DATA_HOME) + nltk.download( + 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) print "Download data set success....." print "Path is " + nltk.data.find('corpora/movie_reviews').path @@ -128,4 +129,13 @@ def test(): def fetch(): - nltk.download('movie_reviews', download_dir=common.DATA_HOME) + nltk.download( + 'movie_reviews', download_dir=paddle.v2.dataset.common.DATA_HOME) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train, 10, "sentiment_train") + paddle.v2.dataset.common.convert(path, test, 10, "sentiment_test") diff --git a/python/paddle/v2/dataset/uci_housing.py b/python/paddle/v2/dataset/uci_housing.py index 3469fd9ce1..c715ea9681 100644 --- a/python/paddle/v2/dataset/uci_housing.py +++ b/python/paddle/v2/dataset/uci_housing.py @@ -14,14 +14,14 @@ """ UCI Housing dataset. -This module will download dataset from +This module will paddle.v2.dataset.common.download dataset from https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and parse training set and test set into paddle reader creators. """ import numpy as np import os -from common import download +import paddle.v2.dataset.common __all__ = ['train', 'test'] @@ -29,7 +29,7 @@ URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing MD5 = 'd4accdce7a25600298819f8e28e8d593' feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', - 'PTRATIO', 'B', 'LSTAT' + 'PTRATIO', 'B', 'LSTAT', 'convert' ] UCI_TRAIN_DATA = None @@ -82,7 +82,7 @@ def train(): :rtype: callable """ global UCI_TRAIN_DATA - load_data(download(URL, 'uci_housing', MD5)) + load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) def reader(): for d in UCI_TRAIN_DATA: @@ -102,7 +102,7 @@ def test(): :rtype: callable """ global UCI_TEST_DATA - load_data(download(URL, 'uci_housing', MD5)) + load_data(paddle.v2.dataset.common.download(URL, 'uci_housing', MD5)) def reader(): for d in UCI_TEST_DATA: @@ -112,4 +112,12 @@ def test(): def fetch(): - download(URL, 'uci_housing', MD5) + paddle.v2.dataset.common.download(URL, 'uci_housing', MD5) + + +def convert(path): + """ + Converts dataset to recordio format + """ + paddle.v2.dataset.common.convert(path, train(), 10, "uci_housing_train") + paddle.v2.dataset.common.convert(path, test(), 10, "uci_houseing_test") diff --git a/python/paddle/v2/dataset/wmt14.py b/python/paddle/v2/dataset/wmt14.py index 0902f87741..e1dc4f4c30 100644 --- a/python/paddle/v2/dataset/wmt14.py +++ b/python/paddle/v2/dataset/wmt14.py @@ -22,10 +22,10 @@ parse training set and test set into paddle reader creators. import tarfile import gzip -from paddle.v2.dataset.common import download +import paddle.v2.dataset.common from paddle.v2.parameters import Parameters -__all__ = ['train', 'test', 'build_dict'] +__all__ = ['train', 'test', 'build_dict', 'convert'] URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz' MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5' @@ -115,7 +115,8 @@ def train(dict_size): :rtype: callable """ return reader_creator( - download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'train/train', dict_size) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'train/train', dict_size) def test(dict_size): @@ -130,16 +131,18 @@ def test(dict_size): :rtype: callable """ return reader_creator( - download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'test/test', dict_size) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'test/test', dict_size) def gen(dict_size): return reader_creator( - download(URL_TRAIN, 'wmt14', MD5_TRAIN), 'gen/gen', dict_size) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN), + 'gen/gen', dict_size) def model(): - tar_file = download(URL_MODEL, 'wmt14', MD5_MODEL) + tar_file = paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) with gzip.open(tar_file, 'r') as f: parameters = Parameters.from_tar(f) return parameters @@ -148,7 +151,7 @@ def model(): def get_dict(dict_size, reverse=True): # if reverse = False, return dict = {'a':'001', 'b':'002', ...} # else reverse = true, return dict = {'001':'a', '002':'b', ...} - tar_file = download(URL_TRAIN, 'wmt14', MD5_TRAIN) + tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) src_dict, trg_dict = __read_to_dict__(tar_file, dict_size) if reverse: src_dict = {v: k for k, v in src_dict.items()} @@ -157,5 +160,14 @@ def get_dict(dict_size, reverse=True): def fetch(): - download(URL_TRAIN, 'wmt14', MD5_TRAIN) - download(URL_MODEL, 'wmt14', MD5_MODEL) + paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN) + paddle.v2.dataset.common.download(URL_MODEL, 'wmt14', MD5_MODEL) + + +def convert(path): + """ + Converts dataset to recordio format + """ + dict_size = 30000 + paddle.v2.dataset.common.convert(path, train(dict_size), 10, "wmt14_train") + paddle.v2.dataset.common.convert(path, test(dict_size), 10, "wmt14_test") diff --git a/python/paddle/v2/layer.py b/python/paddle/v2/layer.py index bbb9c3ea8c..4ade1c6f32 100644 --- a/python/paddle/v2/layer.py +++ b/python/paddle/v2/layer.py @@ -45,12 +45,12 @@ __all__ = ['data', 'parse_network'] def __need_to_keep__(name): return name in [ 'StaticInput', 'SubsequenceInput', 'GeneratedInput', 'LayerType', - 'layer_support' + 'layer_support', 'BaseGeneratedInput' ] def __need_to_wrap__(name): - return name not in ['AggregateLevel', 'ExpandLevel'] + return name not in ['AggregateLevel', 'ExpandLevel', 'BaseGeneratedInput'] def __convert_name__(inname): @@ -199,6 +199,15 @@ def __get_used_submodels__(layer_names): return submodel_names +def __get_submodel_data_out_links__(): + data_links = set() + for submodel in cp.g_config.model_config.sub_models: + for link in submodel.out_links: + if cp.g_layer_map[link.link_name].type == 'data': + data_links.add(link.link_name) + return data_links + + def __get_used_evaluators__(layer_names): evaluator_names = set() for e in cp.g_config.model_config.evaluators: @@ -264,6 +273,7 @@ def parse_network(output_layers, extra_layers=None): submodel_names = __get_used_submodels__(layer_names) submodel_names.add('root') evaluator_names = __get_used_evaluators__(layer_names) + data_out_links = __get_submodel_data_out_links__() input_layer_names = set() output_layer_names = set() @@ -279,7 +289,7 @@ def parse_network(output_layers, extra_layers=None): continue model_config.layers.extend([l]) if l.type == 'data': - if l.name in model_config.output_layer_names: + if l.name in data_out_links: """ In text generation, the outlink to save the generated word indices is a data_layer defined in recurrent_group. This diff --git a/python/paddle/v2/reader/creator.py b/python/paddle/v2/reader/creator.py index 9f888b16d6..5e052026f6 100644 --- a/python/paddle/v2/reader/creator.py +++ b/python/paddle/v2/reader/creator.py @@ -16,6 +16,7 @@ Creator package contains some simple reader creator, which could be used in user program. """ + __all__ = ['np_array', 'text_file', "recordio"] @@ -75,4 +76,4 @@ def recordio(path): yield r f.close() - return reader + return reader \ No newline at end of file diff --git a/python/paddle/v2/reader/decorator.py b/python/paddle/v2/reader/decorator.py index c76faa596c..e432003129 100644 --- a/python/paddle/v2/reader/decorator.py +++ b/python/paddle/v2/reader/decorator.py @@ -230,7 +230,7 @@ class XmapEndSignal(): pass -def xmap_readers(mapper, reader, process_num, buffer_size): +def xmap_readers(mapper, reader, process_num, buffer_size, order=False): """ Use multiprocess to map samples from reader by a mapper defined by user. And this function contains a buffered decorator. @@ -242,12 +242,15 @@ def xmap_readers(mapper, reader, process_num, buffer_size): :type process_num: int :param buffer_size: max buffer size :type buffer_size: int + :param order: keep the order of reader + :type order: bool :return: the decarated reader :rtype: callable """ end = XmapEndSignal() in_queue = Queue(buffer_size) out_queue = Queue(buffer_size) + out_order = [0] # define a worker to read samples from reader to in_queue def read_worker(reader, in_queue): @@ -255,8 +258,17 @@ def xmap_readers(mapper, reader, process_num, buffer_size): in_queue.put(i) in_queue.put(end) + # define a worker to read samples from reader to in_queue with order flag + def order_read_worker(reader, in_queue): + in_order = 0 + for i in reader(): + in_queue.put((in_order, i)) + in_order += 1 + in_queue.put(end) + # start a read worker in a thread - t = Thread(target=read_worker, args=(reader, in_queue)) + target = order_read_worker if order else read_worker + t = Thread(target=target, args=(reader, in_queue)) t.daemon = True t.start() @@ -271,11 +283,28 @@ def xmap_readers(mapper, reader, process_num, buffer_size): in_queue.put(end) out_queue.put(end) + # define a worker to handle samples from in_queue by mapper + # and put mapped samples into out_queue by order + def order_handle_worker(in_queue, out_queue, mapper, out_order): + ins = in_queue.get() + while not isinstance(ins, XmapEndSignal): + order, sample = ins + r = mapper(sample) + while order != out_order[0]: + pass + out_queue.put(r) + out_order[0] += 1 + ins = in_queue.get() + in_queue.put(end) + out_queue.put(end) + # start several handle_workers + target = order_handle_worker if order else handle_worker + args = (in_queue, out_queue, mapper, out_order) if order else ( + in_queue, out_queue, mapper) workers = [] for i in xrange(process_num): - worker = Thread( - target=handle_worker, args=(in_queue, out_queue, mapper)) + worker = Thread(target=target, args=args) worker.daemon = True workers.append(worker) for w in workers: diff --git a/python/paddle/v2/reader/tests/creator_test.py b/python/paddle/v2/reader/tests/creator_test.py index e20af9e5e4..ba4f558874 100644 --- a/python/paddle/v2/reader/tests/creator_test.py +++ b/python/paddle/v2/reader/tests/creator_test.py @@ -13,9 +13,7 @@ # limitations under the License. import os import unittest - import numpy as np - import paddle.v2.reader.creator diff --git a/python/paddle/v2/reader/tests/decorator_test.py b/python/paddle/v2/reader/tests/decorator_test.py index 734154b979..bb3c5d220b 100644 --- a/python/paddle/v2/reader/tests/decorator_test.py +++ b/python/paddle/v2/reader/tests/decorator_test.py @@ -121,5 +121,27 @@ class TestShuffle(unittest.TestCase): self.assertEqual(total, 10) +class TestXmap(unittest.TestCase): + def test_xmap(self): + def mapper(x): + return (x + 1) + + orders = (True, False) + thread_nums = (1, 2, 4, 8, 16) + buffered_size = (1, 2, 4, 8, 16) + for order in orders: + for tNum in thread_nums: + for size in buffered_size: + result = [] + for i in paddle.v2.reader.xmap_readers(mapper, + reader_creator_10(0), + tNum, size, order)(): + result.append(i) + if not order: + result.sort() + for idx, e in enumerate(result): + self.assertEqual(e, mapper(idx)) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/reader/tests/test_recordio_creator.dat b/python/paddle/v2/reader/tests/test_recordio_creator.dat deleted file mode 100644 index 17aa89b679..0000000000 Binary files a/python/paddle/v2/reader/tests/test_recordio_creator.dat and /dev/null differ diff --git a/python/setup.py.in b/python/setup.py.in index 2e22f640cb..86fc0fc5c0 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -13,6 +13,7 @@ packages=['paddle', setup_requires=["requests", "numpy", "protobuf==3.1", + "recordio", "matplotlib", "rarfile"]