!24 Synchronization code to ms-incubator

Merge pull request !24 from changzherui/sy-code
pull/915/head
mindspore-ci-bot 5 years ago committed by Gitee
commit 22cc03a54a

@ -0,0 +1,26 @@
<!-- Thanks for sending a pull request! Here are some tips for you:
If this is your first time, please read our contributor guidelines: https://gitee.com/mindspore/mindspore/blob/master/CONTRIBUTING.md
-->
**What type of PR is this?**
> Uncomment only one ` /kind <>` line, hit enter to put that in a new line, and remove leading whitespaces from that line:
>
> /kind bug
> /kind task
> /kind feature
**What does this PR do / why do we need it**:
**Which issue(s) this PR fixes**:
<!--
*Automatically closes linked issue when PR is merged.
Usage: `Fixes #<issue number>`, or `Fixes (paste link of issue)`.
-->
Fixes #
**Special notes for your reviewers**:

@ -0,0 +1,19 @@
---
name: RFC
about: Use this template for the new feature or enhancement
labels: kind/feature or kind/enhancement
---
## Background
- Describe the status of the problem you wish to solve
- Attach the relevant issue if have
## Introduction
- Describe the general solution, design and/or pseudo-code
## Trail
| No. | Task Description | Related Issue(URL) |
| --- | ---------------- | ------------------ |
| 1 | | |
| 2 | | |

@ -0,0 +1,43 @@
---
name: Bug Report
about: Use this template for reporting a bug
labels: kind/bug
---
<!-- Thanks for sending an issue! Here are some tips for you:
If this is your first time, please read our contributor guidelines: https://github.com/mindspore-ai/mindspore/blob/master/CONTRIBUTING.md
-->
## Environment
### Hardware Environment(`Ascend`/`GPU`/`CPU`):
> Uncomment only one ` /device <>` line, hit enter to put that in a new line, and remove leading whitespaces from that line:
>
> `/device ascend`</br>
> `/device gpu`</br>
> `/device cpu`</br>
### Software Environment:
- **MindSpore version (source or binary)**:
- **Python version (e.g., Python 3.7.5)**:
- **OS platform and distribution (e.g., Linux Ubuntu 16.04)**:
- **GCC/Compiler version (if compiled from source)**:
## Describe the current behavior
## Describe the expected behavior
## Steps to reproduce the issue
1.
2.
3.
## Related log / screenshot
## Special notes for this issue

@ -0,0 +1,19 @@
---
name: Task
about: Use this template for task tracking
labels: kind/task
---
## Task Description
## Task Goal
## Sub Task
| No. | Task Description | Issue ID |
| --- | ---------------- | -------- |
| 1 | | |
| 2 | | |

@ -0,0 +1,24 @@
<!-- Thanks for sending a pull request! Here are some tips for you:
If this is your first time, please read our contributor guidelines: https://github.com/mindspore-ai/mindspore/blob/master/CONTRIBUTING.md
-->
**What type of PR is this?**
> Uncomment only one ` /kind <>` line, hit enter to put that in a new line, and remove leading whitespaces from that line:
>
> `/kind bug`</br>
> `/kind task`</br>
> `/kind feature`</br>
**What does this PR do / why do we need it**:
**Which issue(s) this PR fixes**:
<!--
*Automatically closes linked issue when PR is merged.
Usage: `Fixes #<issue number>`, or `Fixes (paste link of issue)`.
-->
Fixes #
**Special notes for your reviewers**:

@ -38,15 +38,17 @@ set(MS_CCSRC_BUILD_PATH ${BUILD_PATH}/mindspore/mindspore/ccsrc)
if (ENABLE_GE) if (ENABLE_GE)
link_directories(${CMAKE_SOURCE_DIR}/third_party/ge/lib) link_directories(${CMAKE_SOURCE_DIR}/third_party/ge/lib)
else() elseif(ENABLE_D OR ENABLE_TESTCASES)
include(${CMAKE_SOURCE_DIR}/cmake/dependency_graphengine.cmake) include(${CMAKE_SOURCE_DIR}/cmake/dependency_graphengine.cmake)
endif() endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc) if (ENABLE_GE OR ENABLE_D OR ENABLE_TESTCASES)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/external) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/framework) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/external)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/inc/framework)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc/toolchain) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/graphengine/third_party/fwkacllib/inc/toolchain)
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
add_subdirectory(mindspore/ccsrc) add_subdirectory(mindspore/ccsrc)

@ -78,7 +78,7 @@ Please follow this style to make MindSpore easy to review, maintain and develop.
* Pull a request to MindSpore repository * Pull a request to MindSpore repository
In the last step, your need to pull a compare request between your new branch and MindSpore `master` branch. After finishing the pull request, the Jekins CI will be automatically set up for building test. In the last step, your need to pull a compare request between your new branch and MindSpore `master` branch. After finishing the pull request, the Jenkins CI will be automatically set up for building test.
### Report issues ### Report issues

@ -3,7 +3,7 @@
## Main Features ## Main Features
### Ascend 910 Training and Inference Framework ### Ascend 910 Training and Inference Framework
* Recommended OS: Ubuntu 16.04 (or later) or EulerOS 2.0 * Recommended OS: Ubuntu 16.04 (or later) or EulerOS 2.5 or EulerOS 2.8
* Python version: 3.7.5 * Python version: 3.7.5
* Preset models * Preset models
* ResNet-50: residual structure-based convolutional neural network (CNN) for image classification, which is widely used. * ResNet-50: residual structure-based convolutional neural network (CNN) for image classification, which is widely used.

@ -38,19 +38,19 @@ elseif (DEFINED ENV{D_LINK_PATH})
find_library(cce libcce.so ${GE_LIB_PATH}) find_library(cce libcce.so ${GE_LIB_PATH})
find_library(resource libresource.so ${GE_LIB_PATH}) find_library(resource libresource.so ${GE_LIB_PATH})
else() else()
set(HIAI_INSTALLED_DIR /usr/local/HiAI) # Ascend mode
set(HIAI_DRIVER_DIR ${HIAI_INSTALLED_DIR}/driver/lib64) set(ASCEND_PATH /usr/local/Ascend)
set(HIAI_RUNTIME_DIR ${HIAI_INSTALLED_DIR}/runtime/lib64) set(ASCEND_DRIVER_PATH ${ASCEND_PATH}/driver/lib64/common)
find_library(c_sec libc_sec.so ${HIAI_DRIVER_DIR}) set(ASCEND_RUNTIME_PATH ${ASCEND_PATH}/fwkacllib/lib64)
find_library(slog libslog.so ${HIAI_DRIVER_DIR}) find_library(c_sec libc_sec.so ${ASCEND_DRIVER_PATH})
find_library(mmpa libmmpa.so ${HIAI_DRIVER_DIR}) find_library(slog libslog.so ${ASCEND_DRIVER_PATH})
find_library(mmpa libmmpa.so ${ASCEND_DRIVER_PATH})
find_library(cce libcce.so ${HIAI_RUNTIME_DIR}) find_library(cce libcce.so ${ASCEND_RUNTIME_PATH})
find_library(hccl libhccl.so ${HIAI_RUNTIME_DIR}) find_library(hccl libhccl.so ${ASCEND_RUNTIME_PATH})
find_library(runtime libruntime.so ${HIAI_RUNTIME_DIR}) find_library(runtime libruntime.so ${ASCEND_RUNTIME_PATH})
find_library(msprof libmsprof.so ${HIAI_RUNTIME_DIR}) find_library(msprof libmsprof.so ${ASCEND_RUNTIME_PATH})
find_library(register libregister.so ${HIAI_RUNTIME_DIR}) find_library(register libregister.so ${ASCEND_RUNTIME_PATH})
find_library(resource libresource.so ${HIAI_RUNTIME_DIR}) find_library(resource libresource.so ${ASCEND_RUNTIME_PATH})
endif() endif()
# compile libraries from following directories # compile libraries from following directories

@ -40,7 +40,7 @@ if (ENABLE_GE)
include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include) include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include)
include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include/external) include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include/external)
include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include/external/graph) include_directories(${CMAKE_SOURCE_DIR}/third_party/ge/include/external/graph)
else() elseif(ENABLE_D OR ENABLE_TESTCASES)
include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc) include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc)
include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc/ops) include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc/ops)
include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc/external) include_directories(${CMAKE_SOURCE_DIR}/graphengine/inc/external)

@ -16,16 +16,34 @@ function(mindspore_add_submodule_obj des_submodule_objs sub_dir submodule_name_o
endfunction() endfunction()
get_filename_component(_MS_LIB_CACHE ~/.mslib REALPATH) if (DEFINED ENV{MSLIBS_CACHE_PATH})
set(_MS_LIB_CACHE $ENV{MSLIBS_CACHE_PATH})
else()
set(_MS_LIB_CACHE ${CMAKE_BINARY_DIR}/.mslib)
endif ()
message("MS LIBS CACHE PATH: ${_MS_LIB_CACHE}")
if (NOT EXISTS ${_MS_LIB_CACHE}) if (NOT EXISTS ${_MS_LIB_CACHE})
file(MAKE_DIRECTORY ${_MS_LIB_CACHE}) file(MAKE_DIRECTORY ${_MS_LIB_CACHE})
endif () endif ()
# set(FETCHCONTENT_BASE_DIR ${_MS_LIB_CACHE})
# set(CMAKE_PREFIX_PATH ${_MS_LIB_CACHE})
if (DEFINED ENV{MSLIBS_SERVER}) if (DEFINED ENV{MSLIBS_SERVER})
set(LOCAL_LIBS_SERVER $ENV{MSLIBS_SERVER}) set(LOCAL_LIBS_SERVER $ENV{MSLIBS_SERVER})
message("LOCAL_LIBS_SERVER: ${LOCAL_LIBS_SERVER}") message("LOCAL_LIBS_SERVER: ${LOCAL_LIBS_SERVER}")
endif () endif ()
include(ProcessorCount)
ProcessorCount(N)
if (JOBS)
set(THNUM ${JOBS})
else()
set(JOBS 8)
if (${JOBS} GREATER ${N})
set(THNUM ${N})
endif()
endif ()
message("set make thread num: ${THNUM}")
if(LOCAL_LIBS_SERVER) if(LOCAL_LIBS_SERVER)
if (NOT ENV{no_proxy}) if (NOT ENV{no_proxy})
set(ENV{no_proxy} "${LOCAL_LIBS_SERVER}") set(ENV{no_proxy} "${LOCAL_LIBS_SERVER}")
@ -287,7 +305,7 @@ function(mindspore_add_pkg pkg_name )
-DCMAKE_INSTALL_PREFIX=${${pkg_name}_BASE_DIR} .. -DCMAKE_INSTALL_PREFIX=${${pkg_name}_BASE_DIR} ..
WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build)
__exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --target install -- -j8 __exec_cmd(COMMAND ${CMAKE_COMMAND} --build . --target install -- -j${THNUM}
WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build) WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}/_build)
else() else()
@ -318,7 +336,7 @@ function(mindspore_add_pkg pkg_name )
${${pkg_name}_MAKE_CFLAGS} ${${pkg_name}_MAKE_CXXFLAGS} ${${pkg_name}_MAKE_LDFLAGS}) ${${pkg_name}_MAKE_CFLAGS} ${${pkg_name}_MAKE_CXXFLAGS} ${${pkg_name}_MAKE_LDFLAGS})
endif () endif ()
# build # build
__exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_BUILD_OPTION} -j8 __exec_cmd(COMMAND ${CMAKE_MAKE_PROGRAM} ${${pkg_name}_BUILD_OPTION} -j${THNUM}
WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR}) WORKING_DIRECTORY ${${pkg_name}_SOURCE_DIR})
if (PKG_INSTALL_INCS OR PKG_INSTALL_LIBS) if (PKG_INSTALL_INCS OR PKG_INSTALL_LIBS)

@ -6,17 +6,17 @@
"net_name": "ResNet50", "net_name": "ResNet50",
"mode": 0, "mode": 0,
"iteration": 0, "iteration": 0,
"kernels": ["TensorAdd"] "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
}, },
"DumpSettingsSpec": { "DumpSettingsSpec": {
"enable": "true: dump enable false: dump disable", "enable": "true: dump enable, false: dump disable",
"trans_flag": "true: trans to host format,false: not trans format", "trans_flag": "true: trans to host format, false: not trans format",
"path": "the dump file folder", "path": "the dump file folder",
"net_name": "net name eg:ResNet50", "net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels 1: dump kernels in kernels list", "mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "0: all iteration others: specified iteration ", "iteration": "0: all iteration, others: specified iteration ",
"kernels": "kernel name list need to be dump" "kernels": "op's full scope name which need to be dump"
}, },
"other": {} "other": {}
} }

@ -6,17 +6,17 @@
"net_name": "ResNet50", "net_name": "ResNet50",
"mode": 0, "mode": 0,
"iteration": 0, "iteration": 0,
"kernels": ["AllReduce","BiasAddGrad","Conv2DBackpropFilter","SparseSoftmaxCrossEntropyWithLogits"] "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
}, },
"DumpSettingsSpec": { "DumpSettingsSpec": {
"enable": "true: dump enable false: dump disable", "enable": "true: dump enable, false: dump disable",
"trans_flag": "true: trans to host format,false: not trans format", "trans_flag": "true: trans to host format, false: not trans format",
"path": "the dump file folder", "path": "the dump file folder",
"net_name": "net name eg:ResNet50", "net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels 1: dump kernels in kernels list", "mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "0: all iteration others: specified iteration ", "iteration": "0: all iteration, others: specified iteration ",
"kernels": "kernel name list need to be dump" "kernels": "op's full scope name which need to be dump"
}, },
"other": {} "other": {}
} }

@ -6,17 +6,17 @@
"net_name": "ResNet50", "net_name": "ResNet50",
"mode": 0, "mode": 0,
"iteration": 0, "iteration": 0,
"kernels": ["AllReduce","BiasAddGrad","Conv2DBackpropFilter","SparseSoftmaxCrossEntropyWithLogits"] "kernels": ["Default/Conv2D-op2", "Default/TensorAdd-op10"]
}, },
"DumpSettingsSpec": { "DumpSettingsSpec": {
"enable": "true: dump enable false: dump disable", "enable": "true: dump enable, false: dump disable",
"trans_flag": "true: trans to host format,false: not trans format", "trans_flag": "true: trans to host format, false: not trans format",
"path": "the dump file folder", "path": "the dump file folder",
"net_name": "net name eg:ResNet50", "net_name": "net name eg:ResNet50",
"mode": "0: dump all kernels 1: dump kernels in kernels list", "mode": "0: dump all kernels, 1: dump kernels in kernels list",
"iteration": "0: all iteration others: specified iteration ", "iteration": "0: all iteration, others: specified iteration ",
"kernels": "kernel name list need to be dump" "kernels": "op's full scope name which need to be dump"
}, },
"other": {} "other": {}
} }

@ -12,20 +12,22 @@ RUN apt update \
&& DEBIAN_FRONTEND=noninteractive apt install -y \ && DEBIAN_FRONTEND=noninteractive apt install -y \
vim \ vim \
wget \ wget \
curl \
xz-utils \ xz-utils \
net-tools \ net-tools \
openssh-client \ openssh-client \
git \ git \
subversion \
ntpdate \ ntpdate \
tzdata \ tzdata \
tcl \ tcl \
sudo sudo \
bash-completion
# Install compile tools # Install compile tools
RUN DEBIAN_FRONTEND=noninteractive apt install -y \ RUN DEBIAN_FRONTEND=noninteractive apt install -y \
gcc \ gcc \
g++ \ g++ \
zlibc \
make \ make \
libgmp-dev \ libgmp-dev \
patch \ patch \
@ -39,7 +41,8 @@ RUN echo "dash dash/sh boolean false" | debconf-set-selections
RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
# Install python (v3.7.5) # Install python (v3.7.5)
RUN apt install -y --no-install-recommends libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev libgdbm-dev liblzma-dev libreadline-dev \ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
&& cd /tmp \ && cd /tmp \
&& wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
&& tar -xvf v3.7.5.tar.gz \ && tar -xvf v3.7.5.tar.gz \
@ -62,12 +65,12 @@ RUN mkdir -pv /root/.pip \
&& echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
# Install pip package # Install pip package
RUN pip install numpy \ RUN pip install --no-cache-dir \
&& pip install wheel \ numpy \
&& pip install nose \ wheel \
&& pip install pytest \ nose \
&& pip install pytest-xdist \ pytest \
&& pip list pytest-xdist
# Install cmake (v3.14.1) # Install cmake (v3.14.1)
RUN cd /tmp \ RUN cd /tmp \
@ -77,4 +80,4 @@ RUN cd /tmp \
&& rm -f /tmp/cmake-3.14.1-Linux-x86_64.sh && rm -f /tmp/cmake-3.14.1-Linux-x86_64.sh
# Install MindSpore cpu whl package # Install MindSpore cpu whl package
RUN pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/cpu/ubuntu-x86/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl

@ -12,20 +12,22 @@ RUN apt update \
&& DEBIAN_FRONTEND=noninteractive apt install -y \ && DEBIAN_FRONTEND=noninteractive apt install -y \
vim \ vim \
wget \ wget \
curl \
xz-utils \ xz-utils \
net-tools \ net-tools \
openssh-client \ openssh-client \
git \ git \
subversion \
ntpdate \ ntpdate \
tzdata \ tzdata \
tcl \ tcl \
sudo sudo \
bash-completion
# Install compile tools # Install compile tools
RUN DEBIAN_FRONTEND=noninteractive apt install -y \ RUN DEBIAN_FRONTEND=noninteractive apt install -y \
gcc \ gcc \
g++ \ g++ \
zlibc \
make \ make \
libgmp-dev \ libgmp-dev \
patch \ patch \
@ -39,7 +41,8 @@ RUN echo "dash dash/sh boolean false" | debconf-set-selections
RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
# Install python (v3.7.5) # Install python (v3.7.5)
RUN apt install -y --no-install-recommends libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev libgdbm-dev liblzma-dev libreadline-dev \ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
&& cd /tmp \ && cd /tmp \
&& wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
&& tar -xvf v3.7.5.tar.gz \ && tar -xvf v3.7.5.tar.gz \
@ -62,12 +65,12 @@ RUN mkdir -pv /root/.pip \
&& echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
# Install pip package # Install pip package
RUN pip install numpy \ RUN pip install --no-cache-dir \
&& pip install wheel \ numpy \
&& pip install nose \ wheel \
&& pip install pytest \ nose \
&& pip install pytest-xdist \ pytest \
&& pip list pytest-xdist
# Install cmake (v3.14.1) # Install cmake (v3.14.1)
RUN cd /tmp \ RUN cd /tmp \
@ -77,4 +80,4 @@ RUN cd /tmp \
&& rm -f /tmp/cmake-3.14.1-Linux-x86_64.sh && rm -f /tmp/cmake-3.14.1-Linux-x86_64.sh
# Install MindSpore cuda-10.1 whl package # Install MindSpore cuda-10.1 whl package
RUN pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-10.1/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl

@ -12,20 +12,22 @@ RUN apt update \
&& DEBIAN_FRONTEND=noninteractive apt install -y \ && DEBIAN_FRONTEND=noninteractive apt install -y \
vim \ vim \
wget \ wget \
curl \
xz-utils \ xz-utils \
net-tools \ net-tools \
openssh-client \ openssh-client \
git \ git \
subversion \
ntpdate \ ntpdate \
tzdata \ tzdata \
tcl \ tcl \
sudo sudo \
bash-completion
# Install compile tools # Install compile tools
RUN DEBIAN_FRONTEND=noninteractive apt install -y \ RUN DEBIAN_FRONTEND=noninteractive apt install -y \
gcc \ gcc \
g++ \ g++ \
zlibc \
make \ make \
libgmp-dev \ libgmp-dev \
patch \ patch \
@ -39,7 +41,8 @@ RUN echo "dash dash/sh boolean false" | debconf-set-selections
RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash
# Install python (v3.7.5) # Install python (v3.7.5)
RUN apt install -y --no-install-recommends libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev libgdbm-dev liblzma-dev libreadline-dev \ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
&& cd /tmp \ && cd /tmp \
&& wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
&& tar -xvf v3.7.5.tar.gz \ && tar -xvf v3.7.5.tar.gz \
@ -62,12 +65,12 @@ RUN mkdir -pv /root/.pip \
&& echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf
# Install pip package # Install pip package
RUN pip install numpy \ RUN pip install --no-cache-dir \
&& pip install wheel \ numpy \
&& pip install nose \ wheel \
&& pip install pytest \ nose \
&& pip install pytest-xdist \ pytest \
&& pip list pytest-xdist
# Install cmake (v3.14.1) # Install cmake (v3.14.1)
RUN cd /tmp \ RUN cd /tmp \
@ -77,4 +80,4 @@ RUN cd /tmp \
&& rm -f /tmp/cmake-3.14.1-Linux-x86_64.sh && rm -f /tmp/cmake-3.14.1-Linux-x86_64.sh
# Install MindSpore cuda-9.2 whl package # Install MindSpore cuda-9.2 whl package
RUN pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-9.2/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.1.0-alpha/MindSpore/gpu/cuda-9.2/mindspore-0.1.0-cp37-cp37m-linux_x86_64.whl

Binary file not shown.

Before

Width:  |  Height:  |  Size: 121 KiB

After

Width:  |  Height:  |  Size: 35 KiB

@ -1,55 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting, will be used in main.py
"""
from easydict import EasyDict as edict
import mindspore.common.dtype as mstype
from mindspore.model_zoo.Bert_NEZHA import BertConfig
bert_cfg = edict({
'epoch_size': 10,
'num_warmup_steps': 0,
'start_learning_rate': 1e-4,
'end_learning_rate': 1,
'decay_steps': 1000,
'power': 10.0,
'save_checkpoint_steps': 2000,
'keep_checkpoint_max': 10,
'checkpoint_prefix': "checkpoint_bert",
'DATA_DIR' = "/your/path/examples.tfrecord"
'SCHEMA_DIR' = "/your/path/datasetSchema.json"
'bert_config': BertConfig(
batch_size=16,
seq_length=128,
vocab_size=21136,
hidden_size=1024,
num_hidden_layers=24,
num_attention_heads=16,
intermediate_size=4096,
hidden_act="gelu",
hidden_dropout_prob=0.0,
attention_probs_dropout_prob=0.0,
max_position_embeddings=512,
type_vocab_size=2,
initializer_range=0.02,
use_relative_positions=True,
input_mask_from_dataset=True,
token_type_ids_from_dataset=True,
dtype=mstype.float32,
compute_type=mstype.float16,
)
})

@ -0,0 +1,57 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting, will be used in train.py
"""
from easydict import EasyDict as edict
import mindspore.common.dtype as mstype
from mindspore.model_zoo.Bert_NEZHA import BertConfig
bert_train_cfg = edict({
'epoch_size': 10,
'num_warmup_steps': 0,
'start_learning_rate': 1e-4,
'end_learning_rate': 0.0,
'decay_steps': 1000,
'power': 10.0,
'save_checkpoint_steps': 2000,
'keep_checkpoint_max': 10,
'checkpoint_prefix': "checkpoint_bert",
# please add your own dataset path
'DATA_DIR': "/your/path/examples.tfrecord",
# please add your own dataset schema path
'SCHEMA_DIR': "/your/path/datasetSchema.json"
})
bert_net_cfg = BertConfig(
batch_size=16,
seq_length=128,
vocab_size=21136,
hidden_size=1024,
num_hidden_layers=24,
num_attention_heads=16,
intermediate_size=4096,
hidden_act="gelu",
hidden_dropout_prob=0.0,
attention_probs_dropout_prob=0.0,
max_position_embeddings=512,
type_vocab_size=2,
initializer_range=0.02,
use_relative_positions=True,
input_mask_from_dataset=True,
token_type_ids_from_dataset=True,
dtype=mstype.float32,
compute_type=mstype.float16,
)

@ -14,7 +14,8 @@
# ============================================================================ # ============================================================================
""" """
NEZHA (NEural contextualiZed representation for CHinese lAnguage understanding) is the Chinese pretrained language model currently based on BERT developed by Huawei. NEZHA (NEural contextualiZed representation for CHinese lAnguage understanding) is the Chinese pretrained language
model currently based on BERT developed by Huawei.
1. Prepare data 1. Prepare data
Following the data preparation as in BERT, run command as below to get dataset for training: Following the data preparation as in BERT, run command as below to get dataset for training:
python ./create_pretraining_data.py \ python ./create_pretraining_data.py \
@ -28,36 +29,30 @@ Following the data preparation as in BERT, run command as below to get dataset f
--random_seed=12345 \ --random_seed=12345 \
--dupe_factor=5 --dupe_factor=5
2. Pretrain 2. Pretrain
First, prepare the distributed training environment, then adjust configurations in config.py, finally run main.py. First, prepare the distributed training environment, then adjust configurations in config.py, finally run train.py.
""" """
import os import os
import pytest
import numpy as np import numpy as np
from numpy import allclose from config import bert_train_cfg, bert_net_cfg
from config import bert_cfg as cfg
import mindspore.common.dtype as mstype
import mindspore.dataset.engine.datasets as de import mindspore.dataset.engine.datasets as de
import mindspore._c_dataengine as deMap import mindspore.dataset.transforms.c_transforms as C
from mindspore import context from mindspore import context
from mindspore.common.tensor import Tensor from mindspore.common.tensor import Tensor
from mindspore.train.model import Model from mindspore.train.model import Model
from mindspore.train.callback import Callback from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
from mindspore.model_zoo.Bert_NEZHA import BertConfig, BertNetworkWithLoss, BertTrainOneStepCell from mindspore.model_zoo.Bert_NEZHA import BertNetworkWithLoss, BertTrainOneStepCell
from mindspore.nn.optim import Lamb from mindspore.nn.optim import Lamb
from mindspore import log as logger
_current_dir = os.path.dirname(os.path.realpath(__file__)) _current_dir = os.path.dirname(os.path.realpath(__file__))
DATA_DIR = [cfg.DATA_DIR]
SCHEMA_DIR = cfg.SCHEMA_DIR
def me_de_train_dataset(batch_size): def create_train_dataset(batch_size):
"""test me de train dataset""" """create train dataset"""
# apply repeat operations # apply repeat operations
repeat_count = cfg.epoch_size repeat_count = bert_train_cfg.epoch_size
ds = de.StorageDataset(DATA_DIR, SCHEMA_DIR, columns_list=["input_ids", "input_mask", "segment_ids", ds = de.StorageDataset([bert_train_cfg.DATA_DIR], bert_train_cfg.SCHEMA_DIR,
"next_sentence_labels", "masked_lm_positions", columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
"masked_lm_ids", "masked_lm_weights"]) "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"])
type_cast_op = deMap.TypeCastOp("int32") type_cast_op = C.TypeCast(mstype.int32)
ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op) ds = ds.map(input_columns="masked_lm_ids", operations=type_cast_op)
ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op) ds = ds.map(input_columns="masked_lm_positions", operations=type_cast_op)
ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op) ds = ds.map(input_columns="next_sentence_labels", operations=type_cast_op)
@ -69,43 +64,32 @@ def me_de_train_dataset(batch_size):
ds = ds.repeat(repeat_count) ds = ds.repeat(repeat_count)
return ds return ds
def weight_variable(shape): def weight_variable(shape):
"""weight variable""" """weight variable"""
np.random.seed(1) np.random.seed(1)
ones = np.random.uniform(-0.1, 0.1, size=shape).astype(np.float32) ones = np.random.uniform(-0.1, 0.1, size=shape).astype(np.float32)
return Tensor(ones) return Tensor(ones)
def train_bert():
class ModelCallback(Callback): """train bert"""
def __init__(self):
super(ModelCallback, self).__init__()
self.loss_list = []
def step_end(self, run_context):
cb_params = run_context.original_args()
self.loss_list.append(cb_params.net_outputs.asnumpy()[0])
logger.info("epoch: {}, outputs are {}".format(cb_params.cur_epoch_num, str(cb_params.net_outputs)))
def test_bert_tdt():
"""test bert tdt"""
context.set_context(mode=context.GRAPH_MODE) context.set_context(mode=context.GRAPH_MODE)
context.set_context(device_target="Ascend") context.set_context(device_target="Ascend")
context.set_context(enable_task_sink=True) context.set_context(enable_task_sink=True)
context.set_context(enable_loop_sink=True) context.set_context(enable_loop_sink=True)
context.set_context(enable_mem_reuse=True) context.set_context(enable_mem_reuse=True)
parallel_callback = ModelCallback() ds = create_train_dataset(bert_net_cfg.batch_size)
ds = me_de_train_dataset(cfg.bert_config.batch_size) netwithloss = BertNetworkWithLoss(bert_net_cfg, True)
config = cfg.bert_config optimizer = Lamb(netwithloss.trainable_params(), decay_steps=bert_train_cfg.decay_steps,
netwithloss = BertNetworkWithLoss(config, True) start_learning_rate=bert_train_cfg.start_learning_rate,
optimizer = Lamb(netwithloss.trainable_params(), decay_steps=cfg.decay_steps, start_learning_rate=cfg.start_learning_rate, end_learning_rate=bert_train_cfg.end_learning_rate, power=bert_train_cfg.power,
end_learning_rate=cfg.end_learning_rate, power=cfg.power, warmup_steps=cfg.num_warmup_steps, decay_filter=lambda x: False) warmup_steps=bert_train_cfg.num_warmup_steps, decay_filter=lambda x: False)
netwithgrads = BertTrainOneStepCell(netwithloss, optimizer=optimizer) netwithgrads = BertTrainOneStepCell(netwithloss, optimizer=optimizer)
netwithgrads.set_train(True) netwithgrads.set_train(True)
model = Model(netwithgrads) model = Model(netwithgrads)
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=cfg.keep_checkpoint_max) config_ck = CheckpointConfig(save_checkpoint_steps=bert_train_cfg.save_checkpoint_steps,
ckpoint_cb = ModelCheckpoint(prefix=cfg.checkpoint_prefix, config=config_ck) keep_checkpoint_max=bert_train_cfg.keep_checkpoint_max)
model.train(ds.get_repeat_count(), ds, callbacks=[parallel_callback, ckpoint_cb], dataset_sink_mode=False) ckpoint_cb = ModelCheckpoint(prefix=bert_train_cfg.checkpoint_prefix, config=config_ck)
model.train(ds.get_repeat_count(), ds, callbacks=[LossMonitor(), ckpoint_cb], dataset_sink_mode=False)
if __name__ == '__main__': if __name__ == '__main__':
test_bert_tdt() train_bert()

@ -0,0 +1,32 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting, will be used in train.py
"""
from easydict import EasyDict as edict
alexnet_cfg = edict({
'num_classes': 10,
'learning_rate': 0.002,
'momentum': 0.9,
'epoch_size': 1,
'batch_size': 32,
'buffer_size': 1000,
'image_height': 227,
'image_width': 227,
'save_checkpoint_steps': 1562,
'keep_checkpoint_max': 10,
})

@ -0,0 +1,54 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Produce the dataset
"""
from config import alexnet_cfg as cfg
import mindspore.dataset as ds
import mindspore.dataset.transforms.c_transforms as C
import mindspore.dataset.transforms.vision.c_transforms as CV
from mindspore.common import dtype as mstype
def create_dataset(data_path, batch_size=32, repeat_size=1, status="train"):
"""
create dataset for train or test
"""
cifar_ds = ds.Cifar10Dataset(data_path)
rescale = 1.0 / 255.0
shift = 0.0
resize_op = CV.Resize((cfg.image_height, cfg.image_width))
rescale_op = CV.Rescale(rescale, shift)
normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
if status == "train":
random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4])
random_horizontal_op = CV.RandomHorizontalFlip()
channel_swap_op = CV.HWC2CHW()
typecast_op = C.TypeCast(mstype.int32)
cifar_ds = cifar_ds.map(input_columns="label", operations=typecast_op)
if status == "train":
cifar_ds = cifar_ds.map(input_columns="image", operations=random_crop_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=random_horizontal_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=resize_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=rescale_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=normalize_op)
cifar_ds = cifar_ds.map(input_columns="image", operations=channel_swap_op)
cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
cifar_ds = cifar_ds.repeat(repeat_size)
return cifar_ds

@ -0,0 +1,58 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
######################## eval alexnet example ########################
eval alexnet according to model file:
python eval.py --data_path /YourDataPath --ckpt_path Your.ckpt
"""
import argparse
from config import alexnet_cfg as cfg
from dataset import create_dataset
import mindspore.nn as nn
from mindspore import context
from mindspore.model_zoo.alexnet import AlexNet
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.train import Model
from mindspore.nn.metrics import Accuracy
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='MindSpore AlexNet Example')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'],
help='device where the code will be implemented (default: Ascend)')
parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved')
parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\
path where the trained ckpt file')
parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True')
args = parser.parse_args()
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, enable_mem_reuse=False)
network = AlexNet(cfg.num_classes)
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
repeat_size = cfg.epoch_size
opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) # test
print("============== Starting Testing ==============")
param_dict = load_checkpoint(args.ckpt_path)
load_param_into_net(network, param_dict)
ds_eval = create_dataset(args.data_path,
cfg.batch_size,
1,
"test")
acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode)
print("============== Accuracy:{} ==============".format(acc))

@ -0,0 +1,58 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
######################## train alexnet example ########################
train alexnet and get network model files(.ckpt) :
python train.py --data_path /YourDataPath
"""
import argparse
from config import alexnet_cfg as cfg
from dataset import create_dataset
import mindspore.nn as nn
from mindspore import context
from mindspore.train import Model
from mindspore.nn.metrics import Accuracy
from mindspore.model_zoo.alexnet import AlexNet
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='MindSpore AlexNet Example')
parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'],
help='device where the code will be implemented (default: Ascend)')
parser.add_argument('--data_path', type=str, default="./", help='path where the dataset is saved')
parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\
path where the trained ckpt file')
parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True')
args = parser.parse_args()
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, enable_mem_reuse=False)
network = AlexNet(cfg.num_classes)
loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean")
opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) # test
print("============== Starting Training ==============")
ds_train = create_dataset(args.data_path,
cfg.batch_size,
cfg.epoch_size,
"train")
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
keep_checkpoint_max=cfg.keep_checkpoint_max)
ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet", directory=args.ckpt_path, config=config_ck)
model.train(cfg.epoch_size, ds_train, callbacks=[ckpoint_cb, LossMonitor()],
dataset_sink_mode=args.dataset_sink_mode)

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save