!13699 [MS][LITE][Develop]fix write for big endian devices

From: @lx0095
Reviewed-by: 
Signed-off-by:
pull/13699/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit acfa8f066a

@ -82,7 +82,7 @@ class MS_API CPUDeviceInfo : public DeviceInfoContext {
public:
enum DeviceType GetDeviceType() const override { return DeviceType::kCPU; };
/// \brief Set the thread affinity of CPU cores.
/// \brief Set the thread affinity to CPU cores.
///
/// \param mode: 0: no affinities, 1: big cores first, 2: little cores first
void SetThreadAffinity(int mode);

@ -53,7 +53,7 @@ class MS_API MSTensor {
virtual Vector<int> shape() const = 0;
/// \brief Set the shape of MSTensor.
virtual void set_shape(const Vector<int> &name) = 0;
virtual void set_shape(const Vector<int> &shape) = 0;
/// \brief Get number of element in MSTensor.
///
@ -71,7 +71,7 @@ class MS_API MSTensor {
virtual String tensor_name() const = 0;
/// \brief Set the name of MSTensor.
virtual void set_tensor_name(const String name) = 0;
virtual void set_tensor_name(const String &name) = 0;
/// \brief Get the pointer of data in MSTensor.
///

@ -458,115 +458,91 @@ LoopRow4:
b WriteEnd
Write2:
add x2, x2, #8
str d9, [x11]
st1 {v9.2s}, [x11], x8
cmp x6, #1
beq WriteEnd
add x11, x11, x8
str d11, [x11]
st1 {v11.2s}, [x11], x8
cmp x6, #2
beq WriteEnd
add x11, x11, x8
str d13, [x11]
st1 {v13.2s}, [x11], x8
cmp x6, #3
beq WriteEnd
add x11, x11, x8
str d15, [x11]
st1 {v15.2s}, [x11], x8
cmp x6, #4
beq WriteEnd
add x11, x11, x8
str d17, [x11]
st1 {v17.2s}, [x11], x8
cmp x6, #5
beq WriteEnd
add x11, x11, x8
str d19, [x11]
st1 {v19.2s}, [x11], x8
cmp x6, #6
beq WriteEnd
add x11, x11, x8
str d21, [x11]
st1 {v21.2s}, [x11], x8
cmp x6, #7
beq WriteEnd
add x11, x11, x8
str d23, [x11]
st1 {v23.2s}, [x11], x8
cmp x6, #8
beq WriteEnd
add x11, x11, x8
str d25, [x11]
st1 {v25.2s}, [x11], x8
cmp x6, #9
beq WriteEnd
add x11, x11, x8
str d27, [x11]
st1 {v27.2s}, [x11], x8
cmp x6, #10
beq WriteEnd
add x11, x11, x8
str d29, [x11]
st1 {v29.2s}, [x11], x8
cmp x6, #11
beq WriteEnd
add x11, x11, x8
str d31, [x11]
add x11, x11, x8
st1 {v31.2s}, [x11], x8
add x11, x11, #8
b WriteEnd
Write3:
add x2, x2, #12
add x19, x11, #8
str d9, [x11]
st1 {v9.2s}, [x11], x8
st1 {v9.s}[2], [x19], x8
cmp x6, #1
beq WriteEnd
add x11, x11, x8
str d11, [x11]
st1 {v11.2s}, [x11], x8
st1 {v11.s}[2], [x19], x8
cmp x6, #2
beq WriteEnd
add x11, x11, x8
str d13, [x11]
st1 {v13.2s}, [x11], x8
st1 {v13.s}[2], [x19], x8
cmp x6, #3
beq WriteEnd
add x11, x11, x8
str d15, [x11]
st1 {v15.2s}, [x11], x8
st1 {v15.s}[2], [x19], x8
cmp x6, #4
beq WriteEnd
add x11, x11, x8
str d17, [x11]
st1 {v17.2s}, [x11], x8
st1 {v17.s}[2], [x19], x8
cmp x6, #5
beq WriteEnd
add x11, x11, x8
str d19, [x11]
st1 {v19.2s}, [x11], x8
st1 {v19.s}[2], [x19], x8
cmp x6, #6
beq WriteEnd
add x11, x11, x8
str d21, [x11]
st1 {v21.2s}, [x11], x8
st1 {v21.s}[2], [x19], x8
cmp x6, #7
beq WriteEnd
add x11, x11, x8
str d23, [x11]
st1 {v23.2s}, [x11], x8
st1 {v23.s}[2], [x19], x8
cmp x6, #8
beq WriteEnd
add x11, x11, x8
str d25, [x11]
st1 {v25.2s}, [x11], x8
st1 {v25.s}[2], [x19], x8
cmp x6, #9
beq WriteEnd
add x11, x11, x8
str d27, [x11]
st1 {v27.2s}, [x11], x8
st1 {v27.s}[2], [x19], x8
cmp x6, #10
beq WriteEnd
add x11, x11, x8
str d29, [x11]
st1 {v29.2s}, [x11], x8
st1 {v29.s}[2], [x19], x8
cmp x6, #11
beq WriteEnd
add x11, x11, x8
str d31, [x11]
st1 {v31.2s}, [x11], x8
st1 {v31.s}[2], [x19]
add x11, x11, x8
add x11, x11, #12
b WriteEnd
Write4:

@ -129,7 +129,7 @@ asm_function ConvDwFp32Indirect3x3
tbnz w11, #1, Write2
tbnz w11, #0, Write1
Write2:
str d29, [x0], #8
st1 {v29.2s}, [x0], #8
ext v29.16b, v29.16b, v29.16b, #8
tbz w11, #0, NextPixel
Write1:

@ -260,7 +260,7 @@ asm_function ConvDwFp32Indirect5x5
tbnz w2, #1, Write2
tbnz w2, #0, Write1
Write2:
str d29, [x0], #8
st1 {v29.2s}, [x0], #8
ext v29.16b, v29.16b, v29.16b, #8
tbz w2, #0, NextPixel
Write1:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -30,8 +30,8 @@ constexpr auto kModelOptionKirinNpuFrequency = "mindspore.option.kirin_npu.frequ
struct Context::Data {
std::vector<std::shared_ptr<DeviceInfoContext>> device_info_list;
int32_t thread_num;
std::shared_ptr<Allocator> allocator;
int32_t thread_num = 2;
std::shared_ptr<Allocator> allocator = nullptr;
};
struct DeviceInfoContext::Data {

@ -74,7 +74,7 @@ class Tensor : public mindspore::tensor::MSTensor {
virtual bool operator==(const Tensor &tensor);
void set_tensor_name(std::string name) override { tensor_name_ = name; }
void set_tensor_name(const std::string &name) override { tensor_name_ = name; }
std::string tensor_name() const override { return tensor_name_; }

Loading…
Cancel
Save