Fix conflict

avx_docs
liaogang 9 years ago
commit bd38facada

@ -35,6 +35,8 @@ addons:
- libgoogle-glog-dev
- libgflags-dev
- libgtest-dev
- curl
- lcov
- graphviz
before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi

@ -3,13 +3,13 @@ cmake_minimum_required(VERSION 2.8)
project(paddle CXX C)
set(PADDLE_MAJOR_VERSION 0)
set(PADDLE_MINOR_VERSION 8)
set(PADDLE_PATCH_VERSION 0b2)
set(PADDLE_PATCH_VERSION 0b3)
set(PADDLE_VERSION ${PADDLE_MAJOR_VERSION}.${PADDLE_MINOR_VERSION}.${PADDLE_PATCH_VERSION})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
include(package)
include(swig)
find_package(SWIG 2.0)
find_package(CUDA QUIET)
find_package(Protobuf REQUIRED)
find_package(PythonLibs 2.7 REQUIRED)
@ -40,6 +40,9 @@ option(WITH_TESTING "Compile and run unittest for PaddlePaddle" ${GTEST_FOUND})
option(WITH_DOC "Compile PaddlePaddle with documentation" OFF)
option(WITH_SWIG_PY "Compile PaddlePaddle with py PaddlePaddle prediction api" ${SWIG_FOUND})
option(ON_TRAVIS "Running test on travis-ci or not." OFF)
option(ON_COVERALLS "Generating code coverage data on coveralls or not." OFF)
option(COVERALLS_UPLOAD "Uploading the generated coveralls json." ON)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
@ -49,11 +52,16 @@ endif()
include(enableCXX11)
include(cpplint)
include(ccache)
if(WITH_RDMA)
include(rdma)
endif()
include(util)
include(flags)
include(cudnn)
include(FindPythonModule)
include(check_packages)
include(swig)
include(coveralls)
# add PaddlePaddle version
if(DEFINED ENV{PADDLE_VERSION})
@ -129,12 +137,15 @@ else(WITH_PYTHON)
add_definitions(-DPADDLE_NO_PYTHON)
endif(WITH_PYTHON)
if(NOT WITH_RDMA)
add_definitions(-DPADDLE_DISABLE_RDMA)
endif()
if(WITH_RDMA)
include_directories("${RDMA_INC_DIR}")
else(WITH_RDMA)
add_definitions(-DPADDLE_DISABLE_RDMA)
endif(WITH_RDMA)
if(WITH_GLOG)
add_definitions(-DPADDLE_USE_GLOG)
include_directories(${LIBGLOG_INCLUDE_DIR})
endif()
if(WITH_GFLAGS)

@ -0,0 +1,14 @@
Thank you for contributing to PaddlePaddle. Submitting an issue is a great help for us.
Both Chinese and English issues are welcome.
It's hard to solve a problem when important details are missing.
Before submitting the issue, look over the following criteria before handing your request in.
- [ ] Was there a similar issue submitted or resolved before ? You could search issue in the github.
- [ ] Did you retrieve your issue from widespread search engines ?
- [ ] Is my description of the issue clear enough to reproduce this problem?
* If some errors occured, we need details about `how do you run your code?`, `what system do you use?`, `Are you using GPU or not?`, etc.
* If you use an recording [asciinema](https://asciinema.org/) to show what you are doing to make it happen, that's awesome! We could help you solve the problem more quickly.
- [ ] Is my description of the issue use the github markdown correctly?
* Please use the proper markdown syntaxes for styling all forms of writing, e.g, source code, error information, etc.
* Check out [this page](https://guides.github.com/features/mastering-markdown/) to find out much more about markdown.

@ -1,8 +1,10 @@
# PaddlePaddle
| **`Linux`** | **`License`** | **`Chat Room`** |
|----------------|---------------|-----------------|
|[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)|[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)|[![Join the chat at https://gitter.im/PaddlePaddle/Deep_Learning](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)|
[![Build Status](https://travis-ci.org/baidu/Paddle.svg?branch=master)](https://travis-ci.org/baidu/Paddle)
[![Coverage Status](https://coveralls.io/repos/github/baidu/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/baidu/Paddle?branch=develop)
[![Join the chat at https://gitter.im/PaddlePaddle/Deep_Learning](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/PaddlePaddle/Deep_Learning?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![License](https://img.shields.io/badge/license-Apache%202.0-green.svg)](LICENSE)
Welcome to the PaddlePaddle GitHub.

@ -1,4 +1,4 @@
# Find the CBlas libraries
# Find the CBlas and lapack libraries
#
# It will search MKL, atlas, OpenBlas, reference-cblas in order.
#
@ -19,6 +19,8 @@ set(MKL_ROOT $ENV{MKL_ROOT} CACHE PATH "Folder contains MKL")
find_path(MKL_INCLUDE_DIR mkl.h PATHS
${MKL_ROOT}/include)
find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS
${MKL_ROOT}/include)
find_library(MKL_CORE_LIB NAMES mkl_core PATHS
${MKL_ROOT}/lib
${MKL_ROOT}/lib/intel64)
@ -37,6 +39,7 @@ if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
${MKL_SEQUENTIAL_LIB}
${MKL_CORE_LIB})
add_definitions(-DPADDLE_USE_MKL)
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
return() # return file.
endif()
@ -55,15 +58,19 @@ set(ATLAS_LIB_SEARCH_PATHS
)
find_path(ATLAS_INC_DIR NAMES cblas.h
PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
find_path(ATLAS_CLAPACK_INC_DIR NAMES clapack.h
PATHS ${ATLAS_INCLUDE_SEARCH_PATHS})
find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS})
find_library(ATLAS_LIB NAMES atlas libatlas.so.3
find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
PATHS ${ATLAS_LIB_SEARCH_PATHS})
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
set(CBLAS_PROVIDER ATLAS)
set(CBLAS_INC_DIR ${ATLAS_INC_DIR})
set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
set(CBLAS_LIBS ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
add_definitions(-DPADDLE_USE_ATLAS)
message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
return()
endif()
@ -83,6 +90,8 @@ set(OPENBLAS_LIB_SEARCH_PATHS
find_path(OPENBLAS_INC_DIR NAMES cblas.h
PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
find_path(OPENBLAS_LAPACKE_INC_DIR NAMES lapacke.h
PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
find_library(OPENBLAS_LIB NAMES openblas
PATHS ${OPENBLAS_LIB_SEARCH_PATHS})
@ -90,6 +99,7 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
set(CBLAS_PROVIDER OPENBLAS)
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
set(CBLAS_LIBS ${OPENBLAS_LIB})
message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBS})")
return()
endif()

@ -0,0 +1,103 @@
# CMake script for code coverage.
# If _COVERALLS_UPLOAD is ON, it will upload json files to overalls.io automatically.
# Param _COVERAGE_SRCS A list of coverage source files.
# Param _COVERALLS_UPLOAD Upload the result to coveralls.
# Param _CMAKE_SCRIPT_PATH CMake script path.
function(code_coverage _COVERAGE_SRCS _COVERALLS_UPLOAD _CMAKE_SCRIPT_PATH)
# clean previous gcov data.
file(REMOVE_RECURSE ${PROJECT_BINARY_DIR}/*.gcda)
# find curl for upload JSON soon.
if (_COVERALLS_UPLOAD)
find_program(CURL_EXECUTABLE curl)
if (NOT CURL_EXECUTABLE)
message(FATAL_ERROR "Coveralls: curl not found!")
endif()
endif()
# When passing a CMake list to an external process, the list
# will be converted from the format "1;2;3" to "1 2 3".
set(COVERAGE_SRCS "")
foreach (SINGLE_SRC ${_COVERAGE_SRCS})
set(COVERAGE_SRCS "${COVERAGE_SRCS}*${SINGLE_SRC}")
endforeach()
# query number of logical cores
cmake_host_system_information(RESULT core_size QUERY NUMBER_OF_LOGICAL_CORES)
# coveralls json file.
set(COVERALLS_FILE ${PROJECT_BINARY_DIR}/coveralls.json)
add_custom_target(coveralls_generate
# Run regress tests.
COMMAND ${CMAKE_CTEST_COMMAND}
-j ${core_size}
--output-on-failure
# Generate Gcov and translate it into coveralls JSON.
COMMAND ${CMAKE_COMMAND}
-DCOVERAGE_SRCS="${COVERAGE_SRCS}"
-DCOVERALLS_OUTPUT_FILE="${COVERALLS_FILE}"
-DCOV_PATH="${PROJECT_BINARY_DIR}"
-DPROJECT_ROOT="${PROJECT_SOURCE_DIR}"
-P "${_CMAKE_SCRIPT_PATH}/coverallsGcovJsons.cmake"
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMENT "Coveralls: generating coveralls output..."
)
if (_COVERALLS_UPLOAD)
message("COVERALLS UPLOAD: ON")
# Upload the JSON to coveralls.
add_custom_target(coveralls_upload
COMMAND ${CURL_EXECUTABLE}
-S -F json_file=@${COVERALLS_FILE}
https://coveralls.io/api/v1/jobs
DEPENDS coveralls_generate
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMENT "Coveralls: uploading coveralls output...")
add_custom_target(coveralls DEPENDS coveralls_upload)
else()
message("COVERALLS UPLOAD: OFF")
add_custom_target(coveralls DEPENDS coveralls_generate)
endif()
endfunction()
if(ON_COVERALLS)
set(CMAKE_BUILD_TYPE "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -O0 -fprofile-arcs -ftest-coverage")
set(EXCLUDE_DIRS
"demo/"
"build/"
"tests/"
".test_env/"
)
if(WITH_GPU)
file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" ".c" "*.cu")
else()
file(GLOB_RECURSE PADDLE_SOURCES RELATIVE "${PROJECT_SOURCE_DIR}" "*.cpp" "*.cc" "*.c")
endif()
# exclude trivial files in PADDLE_SOURCES
foreach(EXCLUDE_DIR ${EXCLUDE_DIRS})
foreach(TMP_PATH ${PADDLE_SOURCES})
string(FIND ${TMP_PATH} ${EXCLUDE_DIR} EXCLUDE_DIR_FOUND)
if(NOT ${EXCLUDE_DIR_FOUND} EQUAL -1)
list(REMOVE_ITEM PADDLE_SOURCES ${TMP_PATH})
endif()
endforeach(TMP_PATH)
endforeach()
# convert to absolute path
set(PADDLE_SRCS "")
foreach(PADDLE_SRC ${PADDLE_SOURCES})
set(PADDLE_SRCS "${PADDLE_SRCS};${PROJECT_SOURCE_DIR}/${PADDLE_SRC}")
endforeach()
code_coverage(
"${PADDLE_SRCS}"
${COVERALLS_UPLOAD}
"${PROJECT_SOURCE_DIR}/cmake"
)
endif()

File diff suppressed because it is too large Load Diff

@ -21,12 +21,6 @@ function(safe_set_flag is_c src_list flag_name)
endif()
if(${safe_name})
set(${src_list} "${${src_list}} ${flag_name}" PARENT_SCOPE)
if(is_c)
set(CUDA_NVCC_FLAGS
--compiler-options;${flag_name}
${CUDA_NVCC_FLAGS}
PARENT_SCOPE)
endif()
endif()
endfunction()
@ -40,6 +34,20 @@ macro(safe_set_cxxflag src_list flag_name)
safe_set_flag(OFF ${src_list} ${flag_name})
endmacro()
# helper macro to set nvcc flag
macro(safe_set_nvflag flag_name)
string(REPLACE "-" "_" safe_name ${flag_name})
string(REPLACE "=" "_" safe_name ${safe_name})
CHECK_C_COMPILER_FLAG(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name})
set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name})
if(${safe_name})
set(CUDA_NVCC_FLAGS
--compiler-options;${flag_name}
${CUDA_NVCC_FLAGS})
endif()
endmacro()
CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS)
if(NOT UINT64_MAX_EXISTS)
set(CMAKE_REQUIRED_DEFINITIONS -D__STDC_LIMIT_MACROS)
@ -63,20 +71,43 @@ set(COMMON_FLAGS
-Wnon-virtual-dtor
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=literal-suffix
-Wno-error=unused-local-typedefs)
set(GPU_COMMON_FLAGS
-fPIC
-fno-omit-frame-pointer
-Wnon-virtual-dtor
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=literal-suffix
-Wno-error=unused-local-typedefs
-Wno-error=unused-function # Warnings in Numpy Header.
)
if (APPLE)
# On Mac OS X build fat binaries with x86_64 architectures by default.
set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
else()
set(GPU_COMMON_FLAGS
-Wall
-Wextra
-Werror
${GPU_COMMON_FLAGS})
endif()
foreach(flag ${COMMON_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag})
safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag})
endforeach()
# On Mac OS X build fat binaries with x86_64 architectures by default.
if (APPLE)
set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
endif ()
foreach(flag ${GPU_COMMON_FLAGS})
safe_set_nvflag(${flag})
endforeach()
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.

@ -0,0 +1,76 @@
# user should download rdma first from subversion repository
# execute following instruction to download svn mannally
# svn co https://svn.baidu.com/sys/ip/trunk/rdma/sockrdmav1 rdma/
# svn co https://svn.baidu.com/sys/ip/trunk/rdma/thirdparty rdma/
# we use static output in svn repositories to avoid implict bugs from not standard runtime env.
set(RDMA_ROOT $ENV{RDMA_ROOT} CACHE PATH "Folder contains RDMA sock library and thirdparty library")
function(generate_rdma_links)
#redirect to current DIR to isolate the pollution from system runtime environment
#it can benifits unified control for different gcc environment.
#e.g, by default gcc48 did not refer /usr/lib64 which could contain low version
#runtime libraries that will crash process while loading it. That redirect trick
#can fix it.
execute_process(
COMMAND mkdir -p librdma
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so.1
COMMAND ln -s -f /usr/lib64/libibverbs.so.1.0.0 librdma/libibverbs.so
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so.1
COMMAND ln -s -f /usr/lib64/librdmacm.so.1.0.0 librdma/librdmacm.so
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
)
endfunction(generate_rdma_links)
#check and set headers
find_path(RDMA_INC_SXISOCK sxi_sock.h PATHS ${RDMA_ROOT}/sockrdmav1/output/include)
find_path(RDMA_INC_XIO libxio.h PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_path(RDMA_INC_EVENT event2 PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_path(RDMA_INC_NUMA numa.h PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
#check and set libs
find_library(RDMA_LIB_SXISOCK NAMES sxisock PATHS ${RDMA_ROOT}/sockrdmav1/output)
find_library(RDMA_LIB_XIO NAMES xio PATHS ${RDMA_ROOT}/thirdparty/output/accelio)
find_library(RDMA_LIB_EVENT NAMES event PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_CORE NAMES event_core PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_EXTRA NAMES event_extra PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_EVENT_PTHREADS NAMES event_pthreads PATHS ${RDMA_ROOT}/thirdparty/output/libevent)
find_library(RDMA_LIB_NUMA NAMES numa PATHS ${RDMA_ROOT}/thirdparty/output/libnuma)
if(
RDMA_INC_SXISOCK AND
RDMA_INC_XIO AND
RDMA_INC_EVENT AND
RDMA_INC_NUMA AND
RDMA_LIB_SXISOCK AND
RDMA_LIB_XIO AND
RDMA_LIB_EVENT AND
RDMA_LIB_EVENT_CORE AND
RDMA_LIB_EVENT_EXTRA AND
RDMA_LIB_EVENT_PTHREADS AND
RDMA_LIB_NUMA
)
set(RDMA_INC_DIR
${RDMA_INC_SXISOCK}
${RDMA_INC_XIO}
${RDMA_INC_EVENT}
${RDMA_INC_NUMA})
set(RDMA_LIBS
${RDMA_LIB_SXISOCK}
${RDMA_LIB_XIO}
${RDMA_LIB_EVENT}
${RDMA_LIB_EVENT_CORE}
${RDMA_LIB_EVENT_EXTRA}
${RDMA_LIB_EVENT_PTHREADS}
${RDMA_LIB_NUMA}
)
set(RDMA_LD_FLAGS "-L./librdma -libverbs -lrdmacm -Xlinker -rpath ./librdma")
return()
endif()
#if this module is not called, RDMA_INC_DIR RDMA_LIBS will be null, so top module always refer this variable
message(FATAL_ERROR, "RDMA libraries are not found, try to set RDMA_ROOT or check all related libraries.")

@ -1,25 +1,3 @@
find_program(
SWIG_BINARY_PATH
swig)
if(${SWIG_BINARY_PATH} STREQUAL "SWIG_BINARY_PATH-NOTFOUND")
set(SWIG_FOUND OFF)
else()
set(SWIG_FOUND ON)
endif()
set(MIN_SWIG_VERSION 2)
if(SWIG_FOUND)
execute_process(COMMAND sh -c "${SWIG_BINARY_PATH} -version | grep Version | cut -f3 -d' '"
OUTPUT_VARIABLE _SWIG_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
message("swig version ${MIN_SWIG_VERSION} or greater is needed for generating python api. "
"Only version ${_SWIG_VERSION} is found. Set SWIG_FOUND to FALSE")
set(SWIG_FOUND FALSE)
endif(${_SWIG_VERSION} VERSION_LESS ${MIN_SWIG_VERSION})
endif(SWIG_FOUND)
function(generate_python_api target_name)
add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
${PROJ_ROOT}/paddle/Paddle_wrap.cxx
@ -27,6 +5,7 @@ function(generate_python_api target_name)
COMMAND swig -python -c++ -outcurrentdir -I../ api/Paddle.swig
&& mv ${PROJ_ROOT}/paddle/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle/swig_paddle.py
DEPENDS ${PROJ_ROOT}/paddle/api/Paddle.swig
${PROJ_ROOT}/paddle/api/PaddleAPI.h
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
COMMENT "Generate Python API from swig")
add_custom_target(${target_name} ALL DEPENDS

@ -67,6 +67,10 @@ endmacro()
#
# It will handle WITH_PYTHON/WITH_GLOG etc.
function(link_paddle_exe TARGET_NAME)
if(WITH_RDMA)
generate_rdma_links()
endif()
if(WITH_METRIC)
if(WITH_GPU)
set(METRIC_LIBS paddle_metric_learning paddle_dserver_lib metric metric_cpu)
@ -110,6 +114,12 @@ function(link_paddle_exe TARGET_NAME)
${INTERAL_LIBS}
${CMAKE_DL_LIBS})
if(WITH_RDMA)
target_link_libraries(${TARGET_NAME}
${RDMA_LD_FLAGS}
${RDMA_LIBS})
endif()
if(WITH_PYTHON)
target_link_libraries(${TARGET_NAME}
${PYTHON_LIBRARIES})

@ -0,0 +1,4 @@
This folder contains scripts used in PaddlePaddle introduction.
- use `bash train.sh` to train a simple linear regression model
- use `python evaluate_model.py` to read model parameters. You can see that `w` and `b` are very close to [2, 0.3].

@ -0,0 +1,24 @@
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
import random
# define data types of input: 2 real numbers
@provider(input_types=[dense_vector(1), dense_vector(1)],use_seq=False)
def process(settings, input_file):
for i in xrange(2000):
x = random.random()
yield [x], [2*x+0.3]

@ -0,0 +1,36 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Print model parameters in last model
Usage:
python evaluate_model.py
"""
import numpy as np
import os
def load(file_name):
with open(file_name, 'rb') as f:
f.read(16) # skip header for float type.
return np.fromfile(f, dtype=np.float32)
def main():
print 'w=%.6f, b=%.6f from pass 29' % (load('output/pass-00029/w'),
load('output/pass-00029/b'))
if __name__ == '__main__':
main()

@ -0,0 +1,21 @@
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
paddle train \
--config=trainer_config.py \
--save_dir=./output \
--num_passes=30 \
2>&1 |tee 'train.log'

@ -0,0 +1,32 @@
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
# 1. read data. Suppose you saved above python code as dataprovider.py
data_file = 'empty.list'
with open(data_file, 'w') as f: f.writelines(' ')
define_py_data_sources2(train_list=data_file, test_list=None,
module='dataprovider', obj='process',args={})
# 2. learning algorithm
settings(batch_size=12, learning_rate=1e-3, learning_method=MomentumOptimizer())
# 3. Network configuration
x = data_layer(name='x', size=1)
y = data_layer(name='y', size=1)
y_predict = fc_layer(input=x, param_attr=ParamAttr(name='w'), size=1, act=LinearActivation(), bias_attr=ParamAttr(name='b'))
cost = regression_cost(input=y_predict, label=y)
outputs(cost)

@ -0,0 +1,114 @@
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import itertools
import random
from paddle.trainer.config_parser import parse_config
from py_paddle import swig_paddle as api
from py_paddle import DataProviderConverter
from paddle.trainer.PyDataProvider2 \
import integer_value, integer_value_sequence, sparse_binary_vector
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument("--train_data",
type=str, required=False, help="train data file")
parser.add_argument("--test_data", type=str, help="test data file")
parser.add_argument("--config",
type=str, required=True, help="config file name")
parser.add_argument("--dict_file", required=True, help="dictionary file")
parser.add_argument("--seq",
default=1, type=int,
help="whether use sequence training")
parser.add_argument("--use_gpu", default=0, type=int,
help="whether use GPU for training")
parser.add_argument("--trainer_count", default=1, type=int,
help="Number of threads for training")
parser.add_argument("--num_passes", default=5, type=int,
help="Number of training passes")
return parser.parse_args()
UNK_IDX = 0
def load_data(file_name, word_dict):
with open(file_name, 'r') as f:
for line in f:
label, comment = line.strip().split('\t')
words = comment.split()
word_slot = [word_dict.get(w, UNK_IDX) for w in words]
yield word_slot, int(label)
def load_dict(dict_file):
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(f):
w = line.strip().split()[0]
word_dict[w] = i
return word_dict
def main():
options = parse_arguments()
api.initPaddle("--use_gpu=%s" % options.use_gpu,
"--trainer_count=%s" % options.trainer_count)
word_dict = load_dict(options.dict_file)
train_dataset = list(load_data(options.train_data, word_dict))
if options.test_data:
test_dataset = list(load_data(options.test_data, word_dict))
else:
test_dataset = None
trainer_config = parse_config(options.config,
"dict_file=%s" % options.dict_file)
# No need to have data provider for trainer
trainer_config.ClearField('data_config')
trainer_config.ClearField('test_data_config')
# create a GradientMachine from the model configuratin
model = api.GradientMachine.createFromConfigProto(
trainer_config.model_config)
# create a trainer for the gradient machine
trainer = api.Trainer.create(trainer_config, model)
# create a data converter which converts data to PaddlePaddle
# internal format
input_types = [
integer_value_sequence(len(word_dict)) if options.seq
else sparse_binary_vector(len(word_dict)),
integer_value(2)]
converter = DataProviderConverter(input_types)
batch_size = trainer_config.opt_config.batch_size
trainer.startTrain()
for train_pass in xrange(options.num_passes):
trainer.startTrainPass()
random.shuffle(train_dataset)
for pos in xrange(0, len(train_dataset), batch_size):
batch = itertools.islice(train_dataset, pos, pos + batch_size)
size = min(batch_size, len(train_dataset) - pos)
trainer.trainOneDataBatch(size, converter(batch))
trainer.finishTrainPass()
if test_dataset:
trainer.startTestPeriod();
for pos in xrange(0, len(test_dataset), batch_size):
batch = itertools.islice(test_dataset, pos, pos + batch_size)
size = min(batch_size, len(test_dataset) - pos)
trainer.testOneDataBatch(size, converter(batch))
trainer.finishTestPeriod()
trainer.finishTrain()
if __name__ == '__main__':
main()

@ -0,0 +1,29 @@
#!/bin/bash
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
# Note: if using trainer_config.emb.py, trainer_config.cnn.py
# or trainer_config.lstm.py, you need to change --seq to --seq=1
# because they are sequence models.
python api_train.py \
--config=trainer_config.lr.py \
--trainer_count=2 \
--num_passes=15 \
--use_gpu=0 \
--seq=0 \
--train_data=data/train.txt \
--test_data=data/test.txt \
--dict_file=data/dict.txt \
2>&1 | tee 'train.log'

@ -16,6 +16,7 @@ from paddle.trainer.PyDataProvider2 import *
UNK_IDX = 0
def initializer(settings, dictionary, **kwargs):
settings.word_dict = dictionary
settings.input_types = [

@ -24,7 +24,7 @@ paddle train \
--config=$cfg \
--save_dir=./output \
--trainer_count=4 \
--log_period=20 \
--log_period=100 \
--num_passes=15 \
--use_gpu=false \
--show_parameter_stats_period=100 \

@ -16,7 +16,7 @@
from paddle.trainer_config_helpers import *
dict_file = "./data/dict.txt"
dict_file = get_config_arg('dict_file', str, "./data/dict.txt")
word_dict = dict()
with open(dict_file, 'r') as f:
for i, line in enumerate(f):
@ -63,7 +63,6 @@ if not is_predict:
label = data_layer(name="label", size=2)
# Define cross-entropy classification loss and error.
classification_cost(input=output, label=label)
cls = classification_cost(input=output, label=label)
outputs(cls)
else:

@ -42,20 +42,13 @@ settings(
gradient_clipping_threshold=25
)
bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
fc = fc_layer(input=emb, size=512,
act=LinearActivation(),
bias_attr=bias_attr,
layer_attr=ExtraAttr(drop_rate=0.1))
lstm = lstmemory(input=fc, act=TanhActivation(),
bias_attr=bias_attr,
layer_attr=ExtraAttr(drop_rate=0.25))
lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
output = fc_layer(input=lstm_last, size=2,
bias_attr=bias_attr,
lstm = simple_lstm(input=emb, size=128,
lstm_cell_attr=ExtraAttr(drop_rate=0.25))
lstm_max = pooling_layer(input=lstm, pooling_type=MaxPooling())
output = fc_layer(input=lstm_max, size=2,
act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)

@ -0,0 +1,10 @@
*.pyc
train.log
data/feature
data/conll05st-release/
data/src.dict
data/test.wsj.props
data/test.wsj.seq_pair
data/test.wsj.words
data/tgt.dict
output

@ -46,8 +46,8 @@ class SentimentPrediction():
conf = parse_config(train_conf, "is_predict=1")
self.network = swig_paddle.GradientMachine.createFromConfigProto(conf.model_config)
self.network.loadParameters(self.model_dir)
slots = [integer_value_sequence(self.dict_dim)]
self.converter = DataProviderConverter(slots)
input_types = [integer_value_sequence(self.dict_dim)]
self.converter = DataProviderConverter(input_types)
def load_dict(self):
"""

@ -153,12 +153,12 @@ As a simple example, consider the following:
- **Only CPU**
```bash
cmake .. -DWITH_GPU=OFF -DWITH_DOC=OFF
cmake .. -DWITH_GPU=OFF
```
- **GPU**
```bash
cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF
cmake .. -DWITH_GPU=ON
```
- **GPU with doc and swig**
@ -171,7 +171,7 @@ Finally, you can build PaddlePaddle:
```bash
# you can add build option here, such as:
cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF -DCMAKE_INSTALL_PREFIX=<path to install>
cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX=<path to install>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
# set PaddlePaddle installation path in ~/.bashrc
@ -219,10 +219,9 @@ easy_install pip
# Install google test on Mac OS X
# Download gtest 1.7.0
wget https://github.com/google/googletest/archive/release-1.7.0.tar.gz
tar -xvf googletest-release-1.7.0.tar.gz && cd googletest-release-1.7.0
tar -xzf googletest-release-1.7.0.tar.gz && cd googletest-release-1.7.0
# Build gtest
mkdir build && cmake ..
make
mkdir build && cd build && cmake .. && make
# Install gtest library
sudo cp -r ../include/gtest /usr/local/include/
sudo cp lib*.a /usr/local/lib
@ -246,7 +245,7 @@ easy_install pip
```bash
sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn*
sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib/libcudnn*
```
2. Then you need to set DYLD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc.
@ -273,12 +272,12 @@ As a simple example, consider the following:
- **Only CPU**
```bash
cmake .. -DWITH_GPU=OFF -DWITH_DOC=OFF
cmake .. -DWITH_GPU=OFF
```
- **GPU**
```bash
cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF
cmake .. -DWITH_GPU=ON
```
- **GPU with doc and swig**
@ -291,9 +290,9 @@ Finally, you can build PaddlePaddle:
```bash
# you can add build option here, such as:
cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF -DCMAKE_INSTALL_PREFIX=<installation path>
cmake .. -DWITH_GPU=ON -DCMAKE_INSTALL_PREFIX=<installation path>
# please use sudo make install, if you want to install PaddlePaddle into the system
make -j `nproc` && make install
make -j `sysctl -n hw.ncpu` && make install
# set PaddlePaddle installation path in ~/.bashrc
export PATH=<installation path>/bin:$PATH
```

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save