#!/usr/bin/env bash # Copyright 2020 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ export DEVICE_ID=0 export RANK_ID=0 export RANK_SIZE=1 options=`getopt -u -o ht:n:i:j:c:o:v:m: -l help,task:,device_num:,device_id:,hccl_json:,config:,output:,vocab:,metric: -- "$@"` eval set -- "$options" echo $options echo_help() { echo "Usage:" echo "bash train.sh [-h] [-t t|i] [-n N] [-i N] [-j FILE] [-c FILE] [-o FILE] [-v FILE]" echo "options:" echo " -h --help show usage" echo " -t --task select task, 't' for training and 'i' for inference" echo " -n --device_num training with N devices" echo " -i --device_id training with device i" echo " -j --hccl_json set the rank table file" echo " -c --config set the configuration file" echo " -o --output set the output file of inference" echo " -v --vocab set the vocabulary" echo " -m --metric set the metric" } set_hccl_json() { while [ -n "$1" ] do if [[ "$1" == "-j" || "$1" == "--hccl_json" ]] then export RANK_TABLE_FILE=$2 break fi shift done } set_device_id() { while [ -n "$1" ] do if [[ "$1" == "-i" || "$1" == "--device_id" ]] then if [[ $2 -ge 0 && $2 -le 7 ]] then export DEVICE_ID=$2 fi break fi shift done } while [ -n "$1" ] do case "$1" in -h|--help) echo_help shift ;; -t|--task) echo "task:" if [ "$2" == "t" ] then task=train elif [ "$2" == "i" ] then task=infer fi shift 2 ;; -n|--device_num) echo "device_num" if [ $2 -eq 1 ] then set_device_id $options elif [ $2 -gt 1 ] then export HCCL_FLAG=1 export DEPLOY_MODE=0 export RANK_SIZE=$2 set_hccl_json $options fi shift 2 ;; -i|--device_id) echo "set device id" export DEVICE_ID=$2 shift 2 ;; -c|--config) echo "config"; configurations=$2 shift 2 ;; -o|--output) echo "output"; output=$2 shift 2 ;; -v|--vocab) echo "vocab"; vocab=$2 shift 2 ;; -m|--metric) echo "metric"; metric=$2 shift 2 ;; --) shift break ;; *) shift ;; esac done file_path=$(cd "$(dirname $0)" || exit; pwd) for((i=0; i < $RANK_SIZE; i++)) do if [ $RANK_SIZE -gt 1 ] then echo $RANK_SIZE export RANK_ID=$i export DEVICE_ID=$[i] fi echo "Working on device $i" cd $file_path || exit cd ../ || exit rm -rf ./${task}_prophetnet_$DEVICE_ID mkdir ./${task}_prophetnet_$DEVICE_ID cp train_gradient_accumulation.py ./${task}_prophetnet_$DEVICE_ID cp train.py ./${task}_prophetnet_$DEVICE_ID cp eval.py ./${task}_prophetnet_$DEVICE_ID cp -r src ./${task}_prophetnet_$DEVICE_ID cp -r config ./${task}_prophetnet_$DEVICE_ID cp $configurations ./${task}_prophetnet_$DEVICE_ID if [ $vocab ] then cp $vocab ./${task}_prophetnet_$DEVICE_ID fi cd ./${task}_prophetnet_$DEVICE_ID || exit env > log.log echo $task if [ "$task" == "train" ] then #python train.py --config ${configurations##*/} --platform Ascend >>log.log 2>&1 & python train.py --config ${configurations##*/} --platform Ascend elif [ "$task" == "infer" ] then #python eval.py --config ${configurations##*/} --output ${output} --vocab ${vocab##*/} --metric ${metric} --platform Ascend >>log_infer.log 2>&1 & python eval.py --config ${configurations##*/} --output ${output} --vocab ${vocab##*/} --metric ${metric} --platform Ascend fi cd ../ done