synchronize latest Ascend software suite 18 Jul 2020, and merging branches

pull/3198/head
yanghaoran 5 years ago
commit 859acc6d2a

2
.gitmodules vendored

@ -15,4 +15,4 @@
url = https://gitee.com/mindspore/akg.git url = https://gitee.com/mindspore/akg.git
[submodule "graphengine"] [submodule "graphengine"]
path = graphengine path = graphengine
url = https://gitee.com/ms-incubator/graphengine.git url = https://gitee.com/mindspore/graphengine.git

@ -202,10 +202,10 @@ Check out how MindSpore Open Governance [works](https://gitee.com/mindspore/comm
### Communication ### Communication
- [MindSpore Slack](https://join.slack.com/t/mindspore/shared_invite/enQtOTcwMTIxMDI3NjM0LTNkMWM2MzI5NjIyZWU5ZWQ5M2EwMTQ5MWNiYzMxOGM4OWFhZjI4M2E5OGI2YTg3ODU1ODE2Njg1MThiNWI3YmQ) - Communication platform for developers. - [MindSpore Slack](https://join.slack.com/t/mindspore/shared_invite/zt-dgk65rli-3ex4xvS4wHX7UDmsQmfu8w) - Communication platform for developers.
- IRC channel at `#mindspore` (only for meeting minutes logging purpose) - IRC channel at `#mindspore` (only for meeting minutes logging purpose)
- Video Conferencing: https://meet.jit.si - Video Conferencing: TBD
- Mailing-list: https://mailweb.mindspore.cn/postorius/lists - Mailing-list: <https://mailweb.mindspore.cn/postorius/lists>
## Contributing ## Contributing

2
akg

@ -1 +1 @@
Subproject commit df57a6cf9450e347d1854687d1fe66a420ee3b35 Subproject commit f60af9df4220bf3db5de2b224418953c0dc1f625

@ -24,7 +24,7 @@ usage()
{ {
echo "Usage:" echo "Usage:"
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
echo " [-a on|off] [-Q on|off] [-S on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]" echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
echo "" echo ""
echo "Options:" echo "Options:"
@ -48,7 +48,6 @@ usage()
echo " -P Enable dump anf graph to file in ProtoBuffer format, default on" echo " -P Enable dump anf graph to file in ProtoBuffer format, default on"
echo " -Q Enable dump memory, default off" echo " -Q Enable dump memory, default off"
echo " -D Enable dumping of function graph ir, default on" echo " -D Enable dumping of function graph ir, default on"
echo " -S Enable async data dump, default off"
echo " -z Compile dataset & mindrecord, default on" echo " -z Compile dataset & mindrecord, default on"
echo " -M Enable MPI and NCCL for GPU training, gpu default on" echo " -M Enable MPI and NCCL for GPU training, gpu default on"
echo " -V Specify the minimum required cuda version, default CUDA 10.1" echo " -V Specify the minimum required cuda version, default CUDA 10.1"
@ -89,7 +88,6 @@ checkopts()
ENABLE_TIMELINE="off" ENABLE_TIMELINE="off"
ENABLE_DUMP2PROTO="on" ENABLE_DUMP2PROTO="on"
ENABLE_DUMPE2E="off" ENABLE_DUMPE2E="off"
ENABLE_DATA_DUMP="off"
ENABLE_DUMP_IR="on" ENABLE_DUMP_IR="on"
COMPILE_MINDDATA="on" COMPILE_MINDDATA="on"
ENABLE_MPI="off" ENABLE_MPI="off"
@ -104,7 +102,7 @@ checkopts()
ENABLE_PYTHON="on" ENABLE_PYTHON="on"
# Process the options # Process the options
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:S:D:zM:V:K:sB:E' opt while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt
do do
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
case "${opt}" in case "${opt}" in
@ -186,6 +184,7 @@ checkopts()
elif [[ "X$OPTARG" == "Xd" || "X$OPTARG" == "Xascend" ]]; then elif [[ "X$OPTARG" == "Xd" || "X$OPTARG" == "Xascend" ]]; then
ENABLE_D="on" ENABLE_D="on"
ENABLE_CPU="on" ENABLE_CPU="on"
ENABLE_SERVING="on"
elif [[ "X$OPTARG" == "Xcpu" ]]; then elif [[ "X$OPTARG" == "Xcpu" ]]; then
ENABLE_CPU="on" ENABLE_CPU="on"
else else
@ -220,11 +219,6 @@ checkopts()
ENABLE_DUMPE2E="$OPTARG" ENABLE_DUMPE2E="$OPTARG"
echo "enable dump end to end" echo "enable dump end to end"
;; ;;
S)
check_on_off $OPTARG S
ENABLE_DATA_DUMP="$OPTARG"
echo "enable data dump"
;;
D) D)
check_on_off $OPTARG D check_on_off $OPTARG D
ENABLE_DUMP_IR="$OPTARG" ENABLE_DUMP_IR="$OPTARG"
@ -328,9 +322,6 @@ build_mindspore()
if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then if [[ "X$ENABLE_DUMPE2E" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
fi fi
if [[ "X$ENABLE_DATA_DUMP" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DATA_DUMP=ON"
fi
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
if [[ "X$ENABLE_MPI" = "Xon" ]]; then if [[ "X$ENABLE_MPI" = "Xon" ]]; then

@ -1,4 +1,4 @@
set(glog_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 ${SECURE_CXX_FLAGS}") set(glog_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2 ${SECURE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
set(glog_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") set(glog_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
mindspore_add_pkg(glog mindspore_add_pkg(glog
VER 0.4.0 VER 0.4.0

@ -116,10 +116,10 @@ if(ENABLE_DUMP_E2E)
add_compile_definitions(ENABLE_DUMP_E2E) add_compile_definitions(ENABLE_DUMP_E2E)
endif() endif()
if(ENABLE_DATA_DUMP)
add_compile_definitions(ENABLE_DATA_DUMP)
endif()
if(ENABLE_DEBUGGER) if(ENABLE_DEBUGGER)
add_compile_definitions(ENABLE_DEBUGGER) add_compile_definitions(ENABLE_DEBUGGER)
endif() endif()
if(ENABLE_TESTCASES)
add_compile_definitions(ENABLE_TESTCASES)
endif()

@ -1,13 +1,16 @@
# find exec # find exec
find_package(Python3 3.7 COMPONENTS Interpreter Development) find_package(Python3 3.7 COMPONENTS Interpreter Development)
if (NOT Python3_FOUND) if (NOT Python3_FOUND)
message("No python3 found.") message(FATAL_ERROR "No python3 found.")
return ()
endif () endif ()
set(PYTHON ${Python3_EXECUTABLE}) set(PYTHON ${Python3_EXECUTABLE})
set(PYTHON_VERSION ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}) set(PYTHON_VERSION ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR})
if (NOT PYTHON_VERSION MATCHES "3.7")
message(FATAL_ERROR "FIND PYTHON VERSION ${PYTHON_VERSION} BUT CAN NOT MATCH PYTHON VERSION 3.7")
endif ()
find_package(Git) find_package(Git)
if (NOT GIT_FOUND) if (NOT GIT_FOUND)
message("No git found.") message("No git found.")

@ -1 +1 @@
Subproject commit eee707935c066c16e9b9cd207f8125871b6b97cf Subproject commit 103f2d1019dc50d781d7a964551d9f1f50b3b009

@ -17,7 +17,7 @@
"""Resources for ast tree parse.""" """Resources for ast tree parse."""
import ast import ast
import math import math
from mindspore import IndexedSlices from mindspore import IndexedSlices, SparseTensor
from mindspore.ops.composite import multitype_ops from mindspore.ops.composite import multitype_ops
from mindspore.ops import functional as F, composite as C from mindspore.ops import functional as F, composite as C
from . import standard_method as M from . import standard_method as M
@ -140,4 +140,5 @@ convert_object_map = {
# user defined # user defined
IndexedSlices: F.make_indexed_slices, IndexedSlices: F.make_indexed_slices,
SparseTensor: F.make_sparse_tensor,
} }

@ -44,7 +44,7 @@ if(ENABLE_GPU)
"backend/kernel_compiler/akg/akg_kernel_attrs_process.cc" "backend/kernel_compiler/akg/akg_kernel_attrs_process.cc"
) )
list(APPEND CUDA_NVCC_FLAGS -arch=sm_53) list(APPEND CUDA_NVCC_FLAGS -arch=sm_53 --expt-relaxed-constexpr)
list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/blocking_queue.cc" "runtime/device/gpu/gpu_buffer_mgr.cc") list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/blocking_queue.cc" "runtime/device/gpu/gpu_buffer_mgr.cc")
list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/mpi/mpi_initializer.cc" list(REMOVE_ITEM GPU_SRC_LIST "runtime/device/gpu/mpi/mpi_initializer.cc"
"runtime/device/gpu/distribution/collective_wrapper.cc" "runtime/device/gpu/distribution/collective_wrapper.cc"

@ -26,14 +26,6 @@ if (ENABLE_CPU)
"cpu/*.cc" "cpu/*.cc"
) )
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/push_kernel.cc"
"cpu/ps/pull_kernel.cc"
"cpu/ps/embedding_look_up_ps_kernel.cc"
"cpu/ps/embedding_look_up_proxy_kernel.cc"
"cpu/ps/apply_momentum_ps_kernel.cc"
"cpu/ps/sparse_apply_adam_ps_kernel.cc"
"cpu/ps/sparse_apply_ftrl_ps_kernel.cc")
if (NOT ENABLE_MPI) if (NOT ENABLE_MPI)
list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc") list(REMOVE_ITEM CPU_SRC_LIST "cpu/allgather_cpu_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc") list(REMOVE_ITEM CPU_SRC_LIST "cpu/reduce_scatter_cpu_kernel.cc")
@ -41,6 +33,17 @@ if (ENABLE_CPU)
endif () endif ()
endif () endif ()
if (${CMAKE_SYSTEM_NAME} MATCHES "Windows" OR ENABLE_GE)
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/apply_momentum_ps_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/embedding_look_up_proxy_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/embedding_look_up_ps_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/pserver_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/pull_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/push_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/sparse_apply_adam_ps_kernel.cc")
list(REMOVE_ITEM CPU_SRC_LIST "cpu/ps/sparse_apply_ftrl_ps_kernel.cc")
endif()
if (ENABLE_GPU) if (ENABLE_GPU)
file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} file(GLOB_RECURSE CUDA_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"gpu/*.cu" "gpu/*.cu"

@ -18,6 +18,7 @@
#include <algorithm> #include <algorithm>
#include "backend/session/anf_runtime_algorithm.h" #include "backend/session/anf_runtime_algorithm.h"
#include "backend/optimizer/common/helper.h" #include "backend/optimizer/common/helper.h"
#include "backend/kernel_compiler/common_utils.h"
namespace mindspore { namespace mindspore {
namespace kernel { namespace kernel {
@ -75,15 +76,7 @@ void SetAkgAttrsForCast(const AnfNodePtr &anf_node) {
std::string dst_type; std::string dst_type;
TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, 0); TypeId output_type = AnfAlgo::GetOutputDeviceDataType(anf_node, 0);
if (output_type == kFloat32->type_id()) { dst_type = TypeId2String(output_type);
dst_type = "float32";
} else if (output_type == kFloat16->type_id()) {
dst_type = "float16";
} else if (output_type == kInt32->type_id()) {
dst_type = "int32";
} else {
MS_LOG(WARNING) << "Unknown cast_to type: " << TypeIdToType(output_type)->ToString();
}
AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node); AnfAlgo::SetNodeAttr("dst_type", MakeValue(dst_type), anf_node);
} }

@ -21,9 +21,7 @@
#include <memory> #include <memory>
#include "framework/ge_runtime/task_info.h" #include "framework/ge_runtime/task_info.h"
#include "backend/kernel_compiler/kernel.h" #include "backend/kernel_compiler/kernel.h"
#ifdef ENABLE_DATA_DUMP
#include "debug/data_dump_parser.h" #include "debug/data_dump_parser.h"
#endif
using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>; using TaskInfoPtr = std::shared_ptr<ge::model_runner::TaskInfo>;
namespace mindspore { namespace mindspore {
@ -34,13 +32,7 @@ class AscendKernelMod : public KernelMod {
const std::vector<AddressPtr> &, uint32_t) = 0; const std::vector<AddressPtr> &, uint32_t) = 0;
uint32_t block_dim() { return block_dim_; } uint32_t block_dim() { return block_dim_; }
uint32_t stream_id() { return stream_id_; } uint32_t stream_id() { return stream_id_; }
virtual bool NeedDump() { virtual bool NeedDump() { return DataDumpParser::GetInstance().NeedDump(kernel_name_); }
#ifdef ENABLE_DATA_DUMP
return DataDumpParser::GetInstance().NeedDump(kernel_name_);
#else
return false;
#endif
}
protected: protected:
uint32_t block_dim_{1}; uint32_t block_dim_{1};

File diff suppressed because it is too large Load Diff

@ -73,9 +73,18 @@ class KernelMeta {
}; };
struct SparseGradient { struct SparseGradient {
float *value_; float *value_{nullptr};
int *indices_; int *indices_{nullptr};
size_t indices_size_; size_t indices_size_{0};
};
struct ReduceSparseGradientParam {
SparseGradient *input_grad_{nullptr};
SparseGradient *workspace_grad_{nullptr};
SparseGradient *output_grad_{nullptr};
size_t max_index_{0};
size_t value_stride_{0};
bool use_sort_reduce_{false};
}; };
struct MultiThreadComputeParams { struct MultiThreadComputeParams {
@ -112,10 +121,6 @@ void SaveJsonInfo(const std::string &json_name, const std::string &info);
std::string GetProcessor(const AnfNodePtr &anf_node); std::string GetProcessor(const AnfNodePtr &anf_node);
bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b); bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b);
int Sign(float x); int Sign(float x);
void DeduplicateIndexedSlices(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim);
void ReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim, bool use_multi_threads = true);
std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index); std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index);
std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list, std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list,
const std::vector<AnfNodePtr> &input_list); const std::vector<AnfNodePtr> &input_list);
@ -130,14 +135,7 @@ void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<An
bool IsWeightBoundary(const AnfNodePtr &node); bool IsWeightBoundary(const AnfNodePtr &node);
void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params, void MultiThreadCompute(const MultiThreadComputeFunc &func, MultiThreadComputeParams *params,
size_t total_compute_size); size_t total_compute_size);
void RunMultiThreadReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *unique_grad, void BucketReduceSparseGradient(const ReduceSparseGradientParam &param);
size_t outer_dim, std::vector<std::pair<int, size_t>> *sorted_indices,
std::vector<size_t> *slice_positions);
void ReduceMultiSparseGradient(const std::vector<std::shared_ptr<SparseGradient>> &unique_slice_grads,
SparseGradient *tmp_grad, SparseGradient *unique_grad, size_t first_dim,
size_t outer_dim);
void TwoLevelReduceSparseGradient(const SparseGradient &origin_sparse_grad, SparseGradient *tmp_grad,
SparseGradient *unique_grad, size_t first_dim, size_t outer_dim);
std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode); std::vector<int> GetReduceAttrAxis(const CNodePtr &cnode);
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore

@ -46,7 +46,7 @@ class EmbeddingLookUpCPUKernel : public CPUKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override; const std::vector<AddressPtr> &outputs) override;
private: protected:
void LookUpTable(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2, void LookUpTable(const std::vector<kernel::AddressPtr> &inputs, size_t dim0, size_t dim1, size_t dim2,
float **output_addr); float **output_addr);
void CheckParam(const CNodePtr &kernel_node); void CheckParam(const CNodePtr &kernel_node);

@ -53,15 +53,15 @@ bool EmbeddingLookUpProxyKernel::Launch(const std::vector<kernel::AddressPtr> &i
size_t output_size = outputs[0]->size; size_t output_size = outputs[0]->size;
size_t size = input_size / sizeof(float); size_t size = input_size / sizeof(float);
::ps::SArray<float> lookup_ids(size, 0); ::ps::SArray<int> lookup_ids(size, 0);
::ps::SArray<int> lengths{size}; ::ps::SArray<int> lengths{size};
::ps::SArray<float> lookup_result; ::ps::SArray<float> lookup_result(output_size / sizeof(float), 0);
auto ret = memcpy_s(lookup_ids.data(), input_size, indices_addr, input_size); auto ret = memcpy_s(lookup_ids.data(), input_size, indices_addr, input_size);
if (ret != EOK) { if (ret != EOK) {
MS_LOG(EXCEPTION) << "Lookup id memcpy failed."; MS_LOG(EXCEPTION) << "Lookup id memcpy failed.";
} }
parallel::ps::Worker<float>::GetInstance().DoPSEmbeddingLookup({key_}, lookup_ids, lengths, lookup_result, parallel::ps::Worker<float>::GetInstance().DoPSEmbeddingLookup({key_}, lookup_ids, lengths, &lookup_result,
parallel::ps::kEmbeddingLookupCmd); parallel::ps::kEmbeddingLookupCmd);
auto ret2 = memcpy_s(output_addr, output_size, lookup_result.data(), output_size); auto ret2 = memcpy_s(output_addr, output_size, lookup_result.data(), output_size);

@ -50,7 +50,7 @@ void EmbeddingLookUpPSKernel::InitKernel(
split_num_ = pserver_num_; split_num_ = pserver_num_;
// input shape should be sharded after computing offset_; // input shape should be sharded after computing offset_;
Shard(input_shape_, axis_); Shard(&input_shape_, axis_);
size_t output_size = size_t output_size =
std::accumulate(output_shape_.begin(), output_shape_.end(), sizeof(float), std::multiplies<size_t>()); std::accumulate(output_shape_.begin(), output_shape_.end(), sizeof(float), std::multiplies<size_t>());

@ -34,5 +34,13 @@ MS_REG_CPU_KERNEL_T(Push,
MS_REG_CPU_KERNEL_T( MS_REG_CPU_KERNEL_T(
Push, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt64), Push, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeUInt64),
PushKernel, float); PushKernel, float);
MS_REG_CPU_KERNEL_T(Push,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeUInt64),
PushKernel, float);
} // namespace kernel } // namespace kernel
} // namespace mindspore } // namespace mindspore

@ -43,7 +43,7 @@ class PushKernel : public CPUKernel {
sizes.push_back(SizeToInt(input->size) / sizeof(T)); sizes.push_back(SizeToInt(input->size) / sizeof(T));
} }
parallel::ps::Worker<T>::GetInstance().Push(keys, addrs, sizes); parallel::ps::Worker<T>::GetInstance().Push(keys, addrs, sizes);
memcpy(outputs[0]->addr, &key_, sizeof(size_t)); memcpy_s(outputs[0]->addr, sizeof(size_t), &key_, sizeof(size_t));
return true; return true;
} }

@ -75,7 +75,7 @@ void SparseApplyAdamPSKernel::ReInit(const std::shared_ptr<std::vector<std::shar
void SparseApplyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) { void SparseApplyAdamPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
const auto &indices_addr = inputs[10]; const auto &indices_addr = inputs[10];
indices_size_ = indices_addr->size; indices_size_ = indices_addr->size / sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float); workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
workspace_size_list_[1] = indices_size_ * sizeof(int); workspace_size_list_[1] = indices_size_ * sizeof(int);
} }

@ -64,7 +64,7 @@ void SparseApplyFtrlPSKernel::ReInit(const std::shared_ptr<std::vector<std::shar
void SparseApplyFtrlPSKernel::ReInit(const std::vector<AddressPtr> &inputs) { void SparseApplyFtrlPSKernel::ReInit(const std::vector<AddressPtr> &inputs) {
const auto &indices_addr = inputs[4]; const auto &indices_addr = inputs[4];
indices_size_ = indices_addr->size; indices_size_ = indices_addr->size / sizeof(int);
workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float); workspace_size_list_[0] = indices_size_ * var_outer_dim_size_ * sizeof(float);
workspace_size_list_[1] = indices_size_ * sizeof(int); workspace_size_list_[1] = indices_size_ * sizeof(int);
} }

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save