!937 Fix set output addr fail in gpu pynative

Merge pull request !937 from JoyLvliang/fix-set-output-addr-fail-in-gpu-pynative
pull/937/MERGE
mindspore-ci-bot 5 years ago committed by Gitee
commit c477029c97

@ -198,17 +198,16 @@ void KernelRuntime::RunOpAssignOutputMemory(const AnfNodePtr &kernel) {
if (output_sizes.empty()) {
return;
}
if (AnfAlgo::GetCNodeName(kernel) == "ApplyMomentum") {
auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, 0);
AnfAlgo::SetOutputAddr(device_address, 0, kernel.get());
AnfAlgo::SetOutputAddr(device_address, 1, kernel.get());
return;
}
for (size_t i = 0; i < output_sizes.size(); ++i) {
if (AnfAlgo::OutputAddrExist(kernel, i)) {
continue;
}
if (AnfAlgo::GetCNodeName(kernel) == kApplyMomentumOpName) {
auto device_address = AnfAlgo::GetPrevNodeMutableOutputAddr(kernel, i);
AnfAlgo::SetOutputAddr(device_address, i, kernel.get());
continue;
}
std::string output_format = AnfAlgo::GetOutputFormat(kernel, i);
auto output_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
auto device_address = CreateDeviceAddress(nullptr, output_sizes[i], output_format, output_type);

@ -195,8 +195,13 @@ void AscendBackendIRFusionOptimization(const std::shared_ptr<session::KernelGrap
}
auto optimizer = std::make_shared<GraphOptimizer>();
auto ir_fusion_pm = std::make_shared<PassManager>("ir_fusion_pm");
ir_fusion_pm->AddPass(std::make_shared<BatchNormGradSplit>());
ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormFusion>());
if (context_ptr->execution_mode() == kPynativeMode) {
ir_fusion_pm->AddPass(std::make_shared<BnSplit>());
ir_fusion_pm->AddPass(std::make_shared<BnGradSplit>());
} else {
ir_fusion_pm->AddPass(std::make_shared<BatchNormGradSplit>());
ir_fusion_pm->AddPass(std::make_shared<FusedBatchNormFusion>());
}
ir_fusion_pm->AddPass(std::make_shared<AddMemcpyAsync>());
if (context_ptr->ir_fusion_flag()) {
AddAscendBackendOptionalIRFusion(ir_fusion_pm.get());

@ -256,6 +256,8 @@ void ConvertPyObjectToTensor(const py::object &input_object, const PrimitivePtr
tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::list>(input_object), nullptr);
} else if (py::isinstance<py::array>(input_object)) {
tensor_ptr = std::make_shared<tensor::Tensor>(py::cast<py::array>(input_object), nullptr);
} else if (py::isinstance<py::none>(input_object)) {
return;
} else if (py::isinstance<py::tuple>(input_object)) {
auto tuple_inputs = py::cast<py::tuple>(input_object);
if (py::isinstance<tensor::Tensor>(tuple_inputs[0])) {

@ -77,10 +77,12 @@ class Tensor(Tensor_):
def __eq__(self, other):
if not isinstance(other, Tensor):
return False
x = self.asnumpy()
y = other.asnumpy()
out = np.equal(x, y)
return Tensor(np.array(out))
return Tensor(np.array(self.asnumpy() == other.asnumpy()))
def __ne__(self, other):
if not isinstance(other, Tensor):
return True
return Tensor(np.array(self.asnumpy() != other.asnumpy()))
def __hash__(self):
return hash(id(self))

@ -82,6 +82,7 @@ class _BatchNorm(Cell):
self.dtype = P.DType()
self.reshape = P.Reshape()
self.is_ascend = context.get_context("device_target") == "Ascend"
self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
if context.get_context("enable_ge"):
self.is_ge_backend = True
@ -89,7 +90,7 @@ class _BatchNorm(Cell):
else:
self.is_ge_backend = False
self.momentum = 1.0 - momentum
if self.is_ge_backend or self.is_ascend:
if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
self.bn_train = P.BatchNorm(is_training=True,
epsilon=self.eps)
else:
@ -147,7 +148,7 @@ class _BatchNorm(Cell):
if self.is_ge_backend and self.is_global:
axes, re_shape = _shape_infer(F.shape(x), self.num_features)
y = self._global_sync(x, axes, re_shape)
elif self.is_ge_backend or self.is_ascend:
elif self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
y, batch_mean, batch_var, _, _ = \
self.bn_train(x,
self.gamma,

Loading…
Cancel
Save