!13009 [debugger] offline debug feature
From: @islam_amin Reviewed-by: Signed-off-by:pull/13009/MERGE
commit
83b25e10e9
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,28 @@
|
||||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op169
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 1
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [149 167 124 ... 158 212 164]
|
||||
size in bytes = 2076672
|
||||
debugger dtype = 10
|
||||
shape = [32, 192, 13, 13]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/ReLUV2-op348
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 1
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [ 20 21 18 ... 126 98 25]
|
||||
size in bytes = 129792
|
||||
debugger dtype = 6
|
||||
shape = [32, 12, 13, 13, 2]
|
@ -0,0 +1,72 @@
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/opt/nvme2n1/j00455527/dumps/async_sink_true/032421")
|
||||
|
||||
_ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False)
|
||||
|
||||
# output tensor with zero slot
|
||||
info1 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/"
|
||||
"conv3-Conv2d/Conv2D-op169",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=1, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/"
|
||||
"ReLUV2-op348",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=1, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
for x, _ in enumerate(tensor_info):
|
||||
print("-----------------------------------------------------------")
|
||||
print("tensor_info_" + str(x+1) + " attributes:")
|
||||
print("node name = ", tensor_info[x].node_name)
|
||||
print("slot = ", tensor_info[x].slot)
|
||||
print("iteration = ", tensor_info[x].iteration)
|
||||
print("device_id = ", tensor_info[x].device_id)
|
||||
print("root_graph_id = ", tensor_info[x].root_graph_id)
|
||||
print("is_parameter = ", tensor_info[x].is_parameter)
|
||||
print()
|
||||
print("tensor_data_" + str(x+1) + " attributes:")
|
||||
print("data (printed in uint8) = ", np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size))
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
print("The python byte size of ", py_byte_size,
|
||||
" does not match the C++ byte size of ", c_byte_size)
|
||||
print("size in bytes = ", tensor_data[x].data_size)
|
||||
print("debugger dtype = ", tensor_data[x].dtype)
|
||||
print("shape = ", tensor_data[x].shape)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,14 @@
|
||||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_1 attributes:
|
||||
name = Default/network-TrainOneStepCell/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op169
|
||||
slot = 0
|
||||
condition = 6
|
||||
watchpoint_id = 1
|
||||
parameter 0 name = param
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = -0.1417236328125
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 1
|
@ -0,0 +1,92 @@
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Watchpoints test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/opt/nvme2n1/j00455527/dumps/async_sink_true/032421")
|
||||
|
||||
_ = debugger_backend.initialize(net_name="alexnet", is_sync_mode=False)
|
||||
|
||||
# NOTES:
|
||||
# -> watch_condition=6 is MIN_LT
|
||||
# -> watch_condition=18 is CHANGE_TOO_LARGE
|
||||
|
||||
# test 1: watchpoint set and hit (watch_condition=6)
|
||||
param1 = d.Parameter(name="param", disabled=False, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
|
||||
check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/"
|
||||
"_backbone-AlexNet/conv3-Conv2d/Conv2D-op169":
|
||||
{"device_id": [0], "root_graph_id": [1], "is_parameter": False
|
||||
}}, parameter_list=[param1])
|
||||
|
||||
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if len(watchpoint_hits_test_1) != 1:
|
||||
print("ERROR -> test 1: watchpoint set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_1, 1)
|
||||
|
||||
# test 2: watchpoint remove and ensure it's not hit
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
|
||||
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_2:
|
||||
print("ERROR -> test 2: watchpoint removed but hit")
|
||||
|
||||
# test 3: watchpoint set and not hit, then remove
|
||||
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
|
||||
check_node_list={"Default/network-TrainOneStepCell/network-WithLossCell/"
|
||||
"_backbone-AlexNet/conv3-Conv2d/Conv2D-op169":
|
||||
{"device_id": [0], "root_graph_id": [1], "is_parameter": False
|
||||
}}, parameter_list=[param2])
|
||||
|
||||
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_3:
|
||||
print("ERROR -> test 3: watchpoint set but not supposed to be hit")
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||
|
||||
|
||||
def print_watchpoint_hits(watchpoint_hits, test_id):
|
||||
"""Print watchpoint hits."""
|
||||
for x, _ in enumerate(watchpoint_hits):
|
||||
print("-----------------------------------------------------------")
|
||||
print("watchpoint_hit for test_%u attributes:" % test_id)
|
||||
print("name = ", watchpoint_hits[x].name)
|
||||
print("slot = ", watchpoint_hits[x].slot)
|
||||
print("condition = ", watchpoint_hits[x].condition)
|
||||
print("watchpoint_id = ", watchpoint_hits[x].watchpoint_id)
|
||||
for p, _ in enumerate(watchpoint_hits[x].parameters):
|
||||
print("parameter ", p, " name = ",
|
||||
watchpoint_hits[x].parameters[p].name)
|
||||
print("parameter ", p, " disabled = ",
|
||||
watchpoint_hits[x].parameters[p].disabled)
|
||||
print("parameter ", p, " value = ",
|
||||
watchpoint_hits[x].parameters[p].value)
|
||||
print("parameter ", p, " hit = ",
|
||||
watchpoint_hits[x].parameters[p].hit)
|
||||
print("parameter ", p, " actual_value = ",
|
||||
watchpoint_hits[x].parameters[p].actual_value)
|
||||
print("error code = ", watchpoint_hits[x].error_code)
|
||||
print("device_id = ", watchpoint_hits[x].device_id)
|
||||
print("root_graph_id = ", watchpoint_hits[x].root_graph_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,49 @@
|
||||
python sync_trans_false_read_tensors.py > sync_trans_false_read_tensors.actual
|
||||
sed -i '/\[WARNING\]/d' sync_trans_false_read_tensors.actual
|
||||
sed -i '/Deprecated/d' sync_trans_false_read_tensors.actual
|
||||
diff sync_trans_false_read_tensors.actual sync_trans_false_read_tensors.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_false_read_tensors PASSED
|
||||
else
|
||||
echo sync_trans_false_read_tensors FAILED
|
||||
fi
|
||||
|
||||
python sync_trans_true_read_tensors.py > sync_trans_true_read_tensors.actual
|
||||
sed -i '/\[WARNING\]/d' sync_trans_true_read_tensors.actual
|
||||
sed -i '/Deprecated/d' sync_trans_true_read_tensors.actual
|
||||
diff sync_trans_true_read_tensors.actual sync_trans_true_read_tensors.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_true_read_tensors PASSED
|
||||
else
|
||||
echo sync_trans_true_read_tensors FAILED
|
||||
fi
|
||||
|
||||
python sync_trans_false_watchpoints.py > sync_trans_false_watchpoints.actual
|
||||
sed -i '/\[WARNING\]/d' sync_trans_false_watchpoints.actual
|
||||
sed -i '/Deprecated/d' sync_trans_false_watchpoints.actual
|
||||
diff sync_trans_false_watchpoints.actual sync_trans_false_watchpoints.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_false_watchpoints PASSED
|
||||
else
|
||||
echo sync_trans_false_watchpoints FAILED
|
||||
fi
|
||||
|
||||
python async_sink_mode_true_read_tensors.py > async_sink_mode_true_read_tensors.actual
|
||||
sed -i '/\[WARNING\]/d' async_sink_mode_true_read_tensors.actual
|
||||
sed -i '/Deprecated/d' async_sink_mode_true_read_tensors.actual
|
||||
diff async_sink_mode_true_read_tensors.actual async_sink_mode_true_read_tensors.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo async_sink_mode_true_read_tensors PASSED
|
||||
else
|
||||
echo async_sink_mode_true_read_tensors FAILED
|
||||
fi
|
||||
|
||||
python async_sink_mode_true_watchpoints.py > async_sink_mode_true_watchpoints.actual
|
||||
sed -i '/\[WARNING\]/d' async_sink_mode_true_watchpoints.actual
|
||||
sed -i '/Deprecated/d' async_sink_mode_true_watchpoints.actual
|
||||
diff async_sink_mode_true_watchpoints.actual async_sink_mode_true_watchpoints.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo async_sink_mode_true_watchpoints PASSED
|
||||
else
|
||||
echo async_sink_mode_true_watchpoints FAILED
|
||||
fi
|
@ -0,0 +1,70 @@
|
||||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [170 19 44 181 254 212 16 52 52 162 148 180 130 115 226 180 183 243
|
||||
101 52 224 79 189 51 10 70 69 51 199 75 159 52 79 98 104 52
|
||||
106 77 19 52 129 183 8 180 252 58 48 180 35 219 9 52 240 201
|
||||
179 51 142 151 158 51 210 145 182 53 140 219 0 53 140 219 22 181
|
||||
46 33 87 180 238 90 122 180 166 10 38 179 202 195 4 53 166 10
|
||||
150 51 214 120 209 52 235 115 37 180 92 177 215 180 0 136 84 51
|
||||
72 114 145 180 43 169 255 180 114 27 61 52 76 225 122 50 126 72
|
||||
159 51 58 35 202 51 114 61 106 51 60 223 63 52 209 179 1 52
|
||||
232 217 44 178 130 158 109 179 213 231 10 179 37 40 94 179 208 68
|
||||
64 53 6 52 249 52 162 35 1 181 231 29 155 52 30 201 69 180
|
||||
229 131 126 51 18 165 109 180 164 112 163 181 116 172 11 178 6 129
|
||||
37 52 54 205 203 180 115 104 145 52 232 106 219 179 36 40 214 52
|
||||
202 50 204 52 76 89 38 179 230 140 232 178 168 53 77 52 180 191
|
||||
108 51 128 183 64 51 56 137 161 180 247 6 143 180 126 63 197 180
|
||||
198 177 94 52 140 185 139 51 150 178 228 180 255 67 150 52 134 201
|
||||
164 52 107 43 14 53 174 216 63 179 40 160 41 53 120 88 72 179
|
||||
218 172 234 52 234 38 25 52 85 159 155 180 254 67 138 180 34 253
|
||||
118 180 218 61 17 52 242 133 253 52 175 37 180 52 171 62 163 52
|
||||
202 195 86 53 160 171 45 52 34 31 176 180 156 85 5 53 178 191
|
||||
68 180 42 203 140 52 248 117 72 52 248 253 212 176 195 100 202 51
|
||||
87 14 141 52 91 100 235 51 48 221 136 52 143 117 17 180 51 196
|
||||
25 52 127 29 112 180 152 144 207 178 219 104 64 52 21 174 251 52
|
||||
164 78 138 181 20 63 6 52 10 249 96 179 163 146 18 53 200 186
|
||||
236 52 2 188 85 52 124 140 121 179 246 185 22 181 246 74 249 51
|
||||
70 182 135 53 189 227 76 52 249 160 159 180 134 235 65 53 64 164
|
||||
255 51 224 156 41 53 142 117 69 181 247 151 101 53 185 175 35 52
|
||||
164 112 21 53 30 31 212 179 142 151 110 179 176 148 29 181 206 204
|
||||
88 53 116 215 214 180 172 173 216 51 106 222 153 180 200 152 19 181
|
||||
176 3 7 52 215 52 87 52]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [181 167 46 ... 12 204 164]
|
||||
size in bytes = 2076672
|
||||
debugger dtype = 10
|
||||
shape = [32, 12, 13, 13, 16]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [ 50 17 122 ... 94 42 90]
|
||||
size in bytes = 129792
|
||||
debugger dtype = 6
|
||||
shape = [32, 12, 13, 13, 2]
|
@ -0,0 +1,74 @@
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/opt/nvme2n1/j00455527/dumps/sync_trans_false/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op346",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
for x, _ in enumerate(tensor_info):
|
||||
print("-----------------------------------------------------------")
|
||||
print("tensor_info_" + str(x+1) + " attributes:")
|
||||
print("node name = ", tensor_info[x].node_name)
|
||||
print("slot = ", tensor_info[x].slot)
|
||||
print("iteration = ", tensor_info[x].iteration)
|
||||
print("device_id = ", tensor_info[x].device_id)
|
||||
print("root_graph_id = ", tensor_info[x].root_graph_id)
|
||||
print("is_parameter = ", tensor_info[x].is_parameter)
|
||||
print()
|
||||
print("tensor_data_" + str(x+1) + " attributes:")
|
||||
print("data (printed in uint8) = ", np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size))
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
print("The python byte size of ", py_byte_size,
|
||||
" does not match the C++ byte size of ", c_byte_size)
|
||||
print("size in bytes = ", tensor_data[x].data_size)
|
||||
print("debugger dtype = ", tensor_data[x].dtype)
|
||||
print("shape = ", tensor_data[x].shape)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,33 @@
|
||||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_1 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op168
|
||||
slot = 0
|
||||
condition = 6
|
||||
watchpoint_id = 1
|
||||
parameter 0 name = param
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = -0.14013671875
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 0
|
||||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_4 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias
|
||||
slot = 0
|
||||
condition = 18
|
||||
watchpoint_id = 3
|
||||
parameter 0 name = abs_mean_update_ratio_gt
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = 0.5243796973599475
|
||||
parameter 1 name = epsilon
|
||||
parameter 1 disabled = True
|
||||
parameter 1 value = 0.0
|
||||
parameter 1 hit = False
|
||||
parameter 1 actual_value = 0.0
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 0
|
@ -0,0 +1,109 @@
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Watchpoints test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/opt/nvme2n1/j00455527/dumps/sync_trans_false/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# NOTES:
|
||||
# -> watch_condition=6 is MIN_LT
|
||||
# -> watch_condition=18 is CHANGE_TOO_LARGE
|
||||
|
||||
# test 1: watchpoint set and hit (watch_condition=6)
|
||||
param1 = d.Parameter(name="param", disabled=False, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op168":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": False
|
||||
}}, parameter_list=[param1])
|
||||
|
||||
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if len(watchpoint_hits_test_1) != 1:
|
||||
print("ERROR -> test 1: watchpoint set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_1, 1)
|
||||
|
||||
# test 2: watchpoint remove and ensure it's not hit
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
|
||||
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_2:
|
||||
print("ERROR -> test 2: watchpoint removed but hit")
|
||||
|
||||
# test 3: watchpoint set and not hit, then remove
|
||||
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op308":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": False
|
||||
}}, parameter_list=[param2])
|
||||
|
||||
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_3:
|
||||
print("ERROR -> test 3: watchpoint set but not supposed to be hit")
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||
|
||||
# test 4: weight change watchpoint set and hit
|
||||
param_abs_mean_update_ratio_gt = d.Parameter(
|
||||
name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
|
||||
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
|
||||
"Parameter[6]_11/fc3.bias":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": True
|
||||
}}, parameter_list=[param_abs_mean_update_ratio_gt,
|
||||
param_epsilon])
|
||||
|
||||
watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
|
||||
if len(watchpoint_hits_test_4) != 1:
|
||||
print("ERROR -> test 4: watchpoint weight change set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_4, 4)
|
||||
|
||||
|
||||
def print_watchpoint_hits(watchpoint_hits, test_id):
|
||||
"""Print watchpoint hits."""
|
||||
for x, _ in enumerate(watchpoint_hits):
|
||||
print("-----------------------------------------------------------")
|
||||
print("watchpoint_hit for test_%u attributes:" % test_id)
|
||||
print("name = ", watchpoint_hits[x].name)
|
||||
print("slot = ", watchpoint_hits[x].slot)
|
||||
print("condition = ", watchpoint_hits[x].condition)
|
||||
print("watchpoint_id = ", watchpoint_hits[x].watchpoint_id)
|
||||
for p, _ in enumerate(watchpoint_hits[x].parameters):
|
||||
print("parameter ", p, " name = ",
|
||||
watchpoint_hits[x].parameters[p].name)
|
||||
print("parameter ", p, " disabled = ",
|
||||
watchpoint_hits[x].parameters[p].disabled)
|
||||
print("parameter ", p, " value = ",
|
||||
watchpoint_hits[x].parameters[p].value)
|
||||
print("parameter ", p, " hit = ",
|
||||
watchpoint_hits[x].parameters[p].hit)
|
||||
print("parameter ", p, " actual_value = ",
|
||||
watchpoint_hits[x].parameters[p].actual_value)
|
||||
print("error code = ", watchpoint_hits[x].error_code)
|
||||
print("device_id = ", watchpoint_hits[x].device_id)
|
||||
print("root_graph_id = ", watchpoint_hits[x].root_graph_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,70 @@
|
||||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [230 208 10 52 104 34 252 52 4 231 144 52 188 150 64 180 88 236
|
||||
15 180 254 135 180 51 131 226 147 52 88 202 62 53 2 43 55 53
|
||||
231 29 87 180 220 249 30 180 157 17 177 180 81 107 140 181 8 95
|
||||
192 180 89 134 112 180 96 238 90 178 156 196 212 180 206 25 15 181
|
||||
212 154 6 180 91 211 116 52 191 14 140 51 128 106 124 53 28 158
|
||||
70 181 182 21 251 50 100 204 157 179 88 202 42 180 7 95 8 53
|
||||
128 251 238 52 241 133 241 52 111 86 157 179 48 221 148 180 200 7
|
||||
141 180 236 226 182 51 190 82 158 180 140 108 179 180 195 134 215 179
|
||||
103 213 39 179 89 168 149 180 42 58 58 180 64 53 62 179 250 126
|
||||
158 52 38 83 117 52 0 0 136 180 136 133 122 51 110 18 131 179
|
||||
238 13 94 51 102 136 15 181 134 90 227 180 16 11 117 180 35 74
|
||||
163 52 105 0 87 181 112 18 131 50 226 233 67 181 217 172 10 52
|
||||
206 25 217 52 208 213 22 52 146 203 87 180 74 46 207 52 178 191
|
||||
4 180 100 93 216 52 119 190 171 180 223 2 5 181 128 72 207 179
|
||||
58 146 11 179 224 79 137 52 143 228 154 180 246 219 215 179 14 79
|
||||
195 52 126 29 64 52 132 192 42 51 94 220 86 52 94 109 1 181
|
||||
72 37 117 178 110 197 94 180 160 94 153 179 118 224 80 181 156 17
|
||||
37 50 120 156 162 53 26 115 135 180 228 20 29 53 145 126 147 52
|
||||
99 16 48 180 211 188 199 180 52 51 99 180 93 254 227 52 152 126
|
||||
123 49 6 18 16 181 5 163 130 51 27 158 98 53 134 235 189 52
|
||||
119 45 9 180 130 115 110 52 158 128 162 52 232 251 197 180 178 46
|
||||
158 179 57 214 157 52 172 207 161 180 208 0 222 49 242 99 32 53
|
||||
20 174 135 50 247 117 176 52 194 57 43 180 140 108 135 51 243 65
|
||||
175 51 187 73 156 51 63 232 217 50 180 234 115 52 194 168 148 52
|
||||
27 192 183 180 45 178 157 52 125 208 17 53 236 192 65 53 190 193
|
||||
7 53 254 246 57 53 3 43 199 51 64 164 215 180 220 104 240 51
|
||||
23 72 24 180 68 173 9 51 72 114 29 53 105 0 57 181 188 150
|
||||
8 53 229 97 131 53 0 34 189 51 163 146 74 53 31 244 204 51
|
||||
86 193 220 180 156 51 146 179]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op171
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [ 99 26 69 ... 154 218 164]
|
||||
size in bytes = 2076672
|
||||
debugger dtype = 10
|
||||
shape = [32, 192, 13, 13]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op353
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [19 17 27 ... 94 42 90]
|
||||
size in bytes = 129792
|
||||
debugger dtype = 6
|
||||
shape = [32, 12, 13, 13, 2]
|
@ -0,0 +1,74 @@
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/opt/nvme2n1/j00455527/dumps/sync_trans_true/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op171",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op353",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
for x, _ in enumerate(tensor_info):
|
||||
print("-----------------------------------------------------------")
|
||||
print("tensor_info_" + str(x+1) + " attributes:")
|
||||
print("node name = ", tensor_info[x].node_name)
|
||||
print("slot = ", tensor_info[x].slot)
|
||||
print("iteration = ", tensor_info[x].iteration)
|
||||
print("device_id = ", tensor_info[x].device_id)
|
||||
print("root_graph_id = ", tensor_info[x].root_graph_id)
|
||||
print("is_parameter = ", tensor_info[x].is_parameter)
|
||||
print()
|
||||
print("tensor_data_" + str(x+1) + " attributes:")
|
||||
print("data (printed in uint8) = ", np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size))
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
print("The python byte size of ", py_byte_size,
|
||||
" does not match the C++ byte size of ", c_byte_size)
|
||||
print("size in bytes = ", tensor_data[x].data_size)
|
||||
print("debugger dtype = ", tensor_data[x].dtype)
|
||||
print("shape = ", tensor_data[x].shape)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,149 @@
|
||||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef DEBUG_DBG_SERVICES_H_
|
||||
#define DEBUG_DBG_SERVICES_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <iostream>
|
||||
#include <variant>
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "pybind11/stl.h"
|
||||
#include "pybind11/stl_bind.h"
|
||||
|
||||
#include "debug/debug_services.h"
|
||||
namespace py = pybind11;
|
||||
|
||||
typedef struct parameter {
|
||||
parameter(const std::string &name, bool disabled, double value, bool hit, double actual_value)
|
||||
: name(name), disabled(disabled), value(value), hit(hit), actual_value(actual_value) {}
|
||||
const std::string get_name() const { return name; }
|
||||
const bool get_disabled() const { return disabled; }
|
||||
const double get_value() const { return value; }
|
||||
const bool get_hit() const { return hit; }
|
||||
const double get_actual_value() const { return actual_value; }
|
||||
std::string name;
|
||||
bool disabled;
|
||||
double value;
|
||||
bool hit;
|
||||
double actual_value;
|
||||
} parameter_t;
|
||||
|
||||
typedef struct watchpoint_hit {
|
||||
watchpoint_hit(const std::string &name, uint32_t slot, int condition, uint32_t watchpoint_id,
|
||||
const std::vector<parameter_t> ¶meters, int32_t error_code, uint32_t device_id,
|
||||
uint32_t root_graph_id)
|
||||
: name(name),
|
||||
slot(slot),
|
||||
condition(condition),
|
||||
watchpoint_id(watchpoint_id),
|
||||
parameters(parameters),
|
||||
error_code(error_code),
|
||||
device_id(device_id),
|
||||
root_graph_id(root_graph_id) {}
|
||||
const std::string get_name() const { return name; }
|
||||
const uint32_t get_slot() const { return slot; }
|
||||
const int get_condition() const { return condition; }
|
||||
const uint32_t get_watchpoint_id() const { return watchpoint_id; }
|
||||
const std::vector<parameter_t> get_parameters() const { return parameters; }
|
||||
const int32_t get_error_code() const { return error_code; }
|
||||
const uint32_t get_device_id() const { return device_id; }
|
||||
const uint32_t get_root_graph_id() const { return root_graph_id; }
|
||||
std::string name;
|
||||
uint32_t slot;
|
||||
int condition;
|
||||
uint32_t watchpoint_id;
|
||||
std::vector<parameter_t> parameters;
|
||||
int32_t error_code;
|
||||
uint32_t device_id;
|
||||
uint32_t root_graph_id;
|
||||
} watchpoint_hit_t;
|
||||
|
||||
typedef struct tensor_info {
|
||||
tensor_info(const std::string &node_name, uint32_t slot, uint32_t iteration, uint32_t device_id,
|
||||
uint32_t root_graph_id, bool is_parameter)
|
||||
: node_name(node_name),
|
||||
slot(slot),
|
||||
iteration(iteration),
|
||||
device_id(device_id),
|
||||
root_graph_id(root_graph_id),
|
||||
is_parameter(is_parameter) {}
|
||||
const std::string get_node_name() const { return node_name; }
|
||||
const uint32_t get_slot() const { return slot; }
|
||||
const uint32_t get_iteration() const { return iteration; }
|
||||
const uint32_t get_device_id() const { return device_id; }
|
||||
const uint32_t get_root_graph_id() const { return root_graph_id; }
|
||||
const bool get_is_parameter() const { return is_parameter; }
|
||||
std::string node_name;
|
||||
uint32_t slot;
|
||||
uint32_t iteration;
|
||||
uint32_t device_id;
|
||||
uint32_t root_graph_id;
|
||||
bool is_parameter;
|
||||
} tensor_info_t;
|
||||
|
||||
typedef struct tensor_data {
|
||||
tensor_data(char *data_ptr, uint64_t data_size, int dtype, const std::vector<int64_t> &shape)
|
||||
: data_size(data_size), dtype(dtype), shape(shape) {
|
||||
if (data_ptr != NULL) {
|
||||
this->data_ptr = py::bytes(data_ptr, data_size);
|
||||
} else {
|
||||
this->data_ptr = py::bytes();
|
||||
}
|
||||
}
|
||||
const py::bytes get_data_ptr() const { return data_ptr; }
|
||||
const uint64_t get_data_size() const { return data_size; }
|
||||
const int get_dtype() const { return dtype; }
|
||||
const std::vector<int64_t> &get_shape() const { return shape; }
|
||||
py::bytes data_ptr;
|
||||
uint64_t data_size;
|
||||
int dtype;
|
||||
std::vector<int64_t> shape;
|
||||
} tensor_data_t;
|
||||
|
||||
class DbgServices {
|
||||
private:
|
||||
DebugServices *debug_services;
|
||||
|
||||
public:
|
||||
explicit DbgServices(bool verbose = false);
|
||||
|
||||
DbgServices(const DbgServices &other);
|
||||
|
||||
DbgServices &operator=(const DbgServices &other);
|
||||
|
||||
~DbgServices();
|
||||
|
||||
int32_t Initialize(std::string net_name, std::string dump_folder_path, bool is_sync_mode);
|
||||
|
||||
int32_t AddWatchpoint(
|
||||
unsigned int id, unsigned int watch_condition,
|
||||
std::map<std::string, std::map<std::string, std::variant<bool, std::vector<std::string>>>> check_nodes,
|
||||
std::vector<parameter_t> parameter_list);
|
||||
|
||||
int32_t RemoveWatchpoint(unsigned int id);
|
||||
|
||||
std::vector<watchpoint_hit_t> CheckWatchpoints(unsigned int iteration);
|
||||
|
||||
std::vector<tensor_data_t> ReadTensors(std::vector<tensor_info_t> info);
|
||||
|
||||
std::string GetVersion();
|
||||
};
|
||||
|
||||
#endif // DEBUG_DBG_SERVICES_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,24 @@
|
||||
python sync_trans_false_read_tensors.py > sync_trans_false_read_tensors.actual
|
||||
diff sync_trans_false_read_tensors.actual sync_trans_false_read_tensors.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_false_read_tensors PASSED
|
||||
else
|
||||
echo sync_trans_false_read_tensors FAILED
|
||||
fi
|
||||
|
||||
python sync_trans_true_read_tensors.py > sync_trans_true_read_tensors.actual
|
||||
diff sync_trans_true_read_tensors.actual sync_trans_true_read_tensors.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_true_read_tensors PASSED
|
||||
else
|
||||
echo sync_trans_true_read_tensors FAILED
|
||||
fi
|
||||
|
||||
python sync_trans_false_watchpoints.py > sync_trans_false_watchpoints.actual
|
||||
diff sync_trans_false_watchpoints.actual sync_trans_false_watchpoints.expected
|
||||
if [ $? -eq 0 ]; then
|
||||
echo sync_trans_false_watchpoints PASSED
|
||||
else
|
||||
echo sync_trans_false_watchpoints FAILED
|
||||
fi
|
||||
|
@ -0,0 +1,70 @@
|
||||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [ 0 0 0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 58 196 248
|
||||
194 127 0 0 17 0 0 0 0 0 0 0 160 76 6 140 195 127
|
||||
0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0
|
||||
64 195 195 248 194 127 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 88 1 196 248 194 127 0 0 18 0 0 0
|
||||
0 0 0 0 160 47 6 140 195 127 0 0 69 0 0 0 0 0
|
||||
0 0 1 0 0 0 195 127 0 0 176 203 195 248 194 127 0 0
|
||||
176 204 195 248 194 127 0 0 0 0 0 0 0 0 0 0 216 241
|
||||
195 248 194 127 0 0 19 0 0 0 0 0 0 0 96 39 6 140
|
||||
195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127
|
||||
0 0 112 52 196 248 194 127 0 0 176 52 196 248 194 127 0 0
|
||||
0 0 0 0 0 0 0 0 88 250 195 248 194 127 0 0 20 0
|
||||
0 0 0 0 0 0 128 130 5 140 195 127 0 0 69 0 0 0
|
||||
0 0 0 0 0 0 0 0 195 127 0 0 208 136 195 248 194 127
|
||||
0 0 176 202 195 248 194 127 0 0 48 52 196 248 194 127 0 0
|
||||
184 247 195 248 194 127 0 0 21 0 0 0 0 0 0 0 176 213
|
||||
4 140 195 127 0 0 69 0 0 0 0 0 0 0 0 0 0 0
|
||||
195 127 0 0 48 52 196 248 194 127 0 0 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 8 249 195 248 194 127 0 0
|
||||
22 0 0 0 0 0 0 0 16 46 4 140 195 127 0 0 69 0
|
||||
0 0 0 0 0 0 1 0 0 0 195 127 0 0 64 137 195 248
|
||||
194 127 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
0 0 88 12 196 248 194 127 0 0 23 0 0 0 0 0 0 0
|
||||
32 137 3 140 195 127 0 0 85 0 0 0 0 0 0 0 0 0
|
||||
0 0 195 127 0 0 176 202 195 248 194 127 0 0 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 104 246 195 248 194 127
|
||||
0 0 24 0 0 0 0 0 0 0 48 104 15 140 195 127 0 0
|
||||
32 104 15 140 195 127 0 0]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [ 0 169 0 ... 152 242 63]
|
||||
size in bytes = 4153344
|
||||
debugger dtype = 11
|
||||
shape = [32, 192, 13, 13]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [ 0 169 0 ... 217 4 52]
|
||||
size in bytes = 831744
|
||||
debugger dtype = 8
|
||||
shape = [207936]
|
@ -0,0 +1,74 @@
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Read tensor test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/home/jtzanaka/dumps/sync_trans_false/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# parameter
|
||||
info1 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=True)
|
||||
# output tensor with zero slot
|
||||
info2 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308",
|
||||
slot=0, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
# output tensor with non-zero slot
|
||||
info3 = d.TensorInfo(node_name="Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300",
|
||||
slot=1, iteration=2, device_id=0, root_graph_id=0, is_parameter=False)
|
||||
|
||||
tensor_info = [info1, info2, info3]
|
||||
|
||||
tensor_data = debugger_backend.read_tensors(tensor_info)
|
||||
|
||||
print_read_tensors(tensor_info, tensor_data)
|
||||
|
||||
|
||||
def print_read_tensors(tensor_info, tensor_data):
|
||||
"""Print read tensors."""
|
||||
for x, _ in enumerate(tensor_info):
|
||||
print("-----------------------------------------------------------")
|
||||
print("tensor_info_" + str(x+1) + " attributes:")
|
||||
print("node name = ", tensor_info[x].node_name)
|
||||
print("slot = ", tensor_info[x].slot)
|
||||
print("iteration = ", tensor_info[x].iteration)
|
||||
print("device_id = ", tensor_info[x].device_id)
|
||||
print("root_graph_id = ", tensor_info[x].root_graph_id)
|
||||
print("is_parameter = ", tensor_info[x].is_parameter)
|
||||
print()
|
||||
print("tensor_data_" + str(x+1) + " attributes:")
|
||||
print("data (printed in uint8) = ", np.frombuffer(
|
||||
tensor_data[x].data_ptr, np.uint8, tensor_data[x].data_size))
|
||||
py_byte_size = len(tensor_data[x].data_ptr)
|
||||
c_byte_size = tensor_data[x].data_size
|
||||
if c_byte_size != py_byte_size:
|
||||
print("The python byte size of ", py_byte_size,
|
||||
" does not match the C++ byte size of ", c_byte_size)
|
||||
print("size in bytes = ", tensor_data[x].data_size)
|
||||
print("debugger dtype = ", tensor_data[x].dtype)
|
||||
print("shape = ", tensor_data[x].shape)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,33 @@
|
||||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_1 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308
|
||||
slot = 0
|
||||
condition = 6
|
||||
watchpoint_id = 1
|
||||
parameter 0 name = param
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = -2.429065704345703
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 0
|
||||
-----------------------------------------------------------
|
||||
watchpoint_hit for test_4 attributes:
|
||||
name = Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/Parameter[6]_11/fc3.bias
|
||||
slot = 0
|
||||
condition = 18
|
||||
watchpoint_id = 3
|
||||
parameter 0 name = abs_mean_update_ratio_gt
|
||||
parameter 0 disabled = False
|
||||
parameter 0 value = 0.0
|
||||
parameter 0 hit = True
|
||||
parameter 0 actual_value = 1.793662034335766e-35
|
||||
parameter 1 name = epsilon
|
||||
parameter 1 disabled = True
|
||||
parameter 1 value = 0.0
|
||||
parameter 1 hit = False
|
||||
parameter 1 actual_value = 0.0
|
||||
error code = 0
|
||||
device_id = 0
|
||||
root_graph_id = 0
|
@ -0,0 +1,109 @@
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""
|
||||
Watchpoints test script for offline debugger APIs.
|
||||
"""
|
||||
|
||||
import mindspore.offline_debug.dbg_services as d
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
debugger_backend = d.DbgServices(
|
||||
dump_file_path="/home/jtzanaka/dumps/sync_trans_false/032421/alexnet")
|
||||
|
||||
_ = debugger_backend.initialize(
|
||||
net_name="Network Name goes here!", is_sync_mode=True)
|
||||
|
||||
# NOTES:
|
||||
# -> watch_condition=6 is MIN_LT
|
||||
# -> watch_condition=18 is CHANGE_TOO_LARGE
|
||||
|
||||
# test 1: watchpoint set and hit (watch_condition=6)
|
||||
param1 = d.Parameter(name="param", disabled=False, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=1, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op308":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": False
|
||||
}}, parameter_list=[param1])
|
||||
|
||||
watchpoint_hits_test_1 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if len(watchpoint_hits_test_1) != 1:
|
||||
print("ERROR -> test 1: watchpoint set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_1, 1)
|
||||
|
||||
# test 2: watchpoint remove and ensure it's not hit
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=1)
|
||||
watchpoint_hits_test_2 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_2:
|
||||
print("ERROR -> test 2: watchpoint removed but hit")
|
||||
|
||||
# test 3: watchpoint set and not hit, then remove
|
||||
param2 = d.Parameter(name="param", disabled=False, value=-1000.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=2, watch_condition=6,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/"
|
||||
"Conv2D-op308":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": False
|
||||
}}, parameter_list=[param2])
|
||||
|
||||
watchpoint_hits_test_3 = debugger_backend.check_watchpoints(iteration=2)
|
||||
if watchpoint_hits_test_3:
|
||||
print("ERROR -> test 3: watchpoint set but not supposed to be hit")
|
||||
_ = debugger_backend.remove_watchpoint(watchpoint_id=2)
|
||||
|
||||
# test 4: weight change watchpoint set and hit
|
||||
param_abs_mean_update_ratio_gt = d.Parameter(
|
||||
name="abs_mean_update_ratio_gt", disabled=False, value=0.0)
|
||||
param_epsilon = d.Parameter(name="epsilon", disabled=True, value=0.0)
|
||||
_ = debugger_backend.add_watchpoint(watchpoint_id=3, watch_condition=18,
|
||||
check_node_list={"Default/network-WithLossCell/_backbone-AlexNet/fc3-Dense/"
|
||||
"Parameter[6]_11/fc3.bias":
|
||||
{"device_id": [0], "root_graph_id": [0], "is_parameter": True
|
||||
}}, parameter_list=[param_abs_mean_update_ratio_gt,
|
||||
param_epsilon])
|
||||
|
||||
watchpoint_hits_test_4 = debugger_backend.check_watchpoints(iteration=3)
|
||||
if len(watchpoint_hits_test_4) != 1:
|
||||
print("ERROR -> test 4: watchpoint weight change set but not hit just once")
|
||||
print_watchpoint_hits(watchpoint_hits_test_4, 4)
|
||||
|
||||
|
||||
def print_watchpoint_hits(watchpoint_hits, test_id):
|
||||
"""Print watchpoint hits."""
|
||||
for x, _ in enumerate(watchpoint_hits):
|
||||
print("-----------------------------------------------------------")
|
||||
print("watchpoint_hit for test_%u attributes:" % test_id)
|
||||
print("name = ", watchpoint_hits[x].name)
|
||||
print("slot = ", watchpoint_hits[x].slot)
|
||||
print("condition = ", watchpoint_hits[x].condition)
|
||||
print("watchpoint_id = ", watchpoint_hits[x].watchpoint_id)
|
||||
for p, _ in enumerate(watchpoint_hits[x].parameters):
|
||||
print("parameter ", p, " name = ",
|
||||
watchpoint_hits[x].parameters[p].name)
|
||||
print("parameter ", p, " disabled = ",
|
||||
watchpoint_hits[x].parameters[p].disabled)
|
||||
print("parameter ", p, " value = ",
|
||||
watchpoint_hits[x].parameters[p].value)
|
||||
print("parameter ", p, " hit = ",
|
||||
watchpoint_hits[x].parameters[p].hit)
|
||||
print("parameter ", p, " actual_value = ",
|
||||
watchpoint_hits[x].parameters[p].actual_value)
|
||||
print("error code = ", watchpoint_hits[x].error_code)
|
||||
print("device_id = ", watchpoint_hits[x].device_id)
|
||||
print("root_graph_id = ", watchpoint_hits[x].root_graph_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -0,0 +1,70 @@
|
||||
-----------------------------------------------------------
|
||||
tensor_info_1 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv2-Conv2d/conv2.bias
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = True
|
||||
|
||||
tensor_data_1 attributes:
|
||||
data (printed in uint8) = [ 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 186 117 65
|
||||
195 127 0 0 5 0 0 0 0 0 0 0 160 76 6 204 195 127
|
||||
0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127 0 0
|
||||
48 135 117 65 195 127 0 0 16 58 118 65 195 127 0 0 144 58
|
||||
118 65 195 127 0 0 168 186 117 65 195 127 0 0 6 0 0 0
|
||||
0 0 0 0 160 47 6 204 195 127 0 0 69 0 0 0 0 0
|
||||
0 0 1 0 0 0 195 127 0 0 80 58 118 65 195 127 0 0
|
||||
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 184 249
|
||||
117 65 195 127 0 0 7 0 0 0 0 0 0 0 96 39 6 204
|
||||
195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0 195 127
|
||||
0 0 224 218 117 65 195 127 0 0 0 0 0 0 0 0 0 0
|
||||
224 219 117 65 195 127 0 0 200 17 118 65 195 127 0 0 8 0
|
||||
0 0 0 0 0 0 128 130 5 204 195 127 0 0 69 0 0 0
|
||||
0 0 0 0 1 0 0 0 195 127 0 0 120 233 255 59 196 127
|
||||
0 0 224 217 117 65 195 127 0 0 224 214 117 65 195 127 0 0
|
||||
120 250 117 65 195 127 0 0 9 0 0 0 0 0 0 0 176 213
|
||||
4 204 195 127 0 0 69 0 0 0 0 0 0 0 1 0 0 0
|
||||
195 127 0 0 240 66 118 65 195 127 0 0 160 218 117 65 195 127
|
||||
0 0 224 215 117 65 195 127 0 0 40 9 118 65 195 127 0 0
|
||||
10 0 0 0 0 0 0 0 16 46 4 204 195 127 0 0 69 0
|
||||
0 0 0 0 0 0 1 0 0 0 195 127 0 0 208 59 118 65
|
||||
195 127 0 0 0 0 0 0 0 0 0 0 96 218 117 65 195 127
|
||||
0 0 56 251 117 65 195 127 0 0 11 0 0 0 0 0 0 0
|
||||
32 137 3 204 195 127 0 0 85 0 0 0 0 0 0 0 1 0
|
||||
0 0 195 127 0 0 224 214 117 65 195 127 0 0 144 59 118 65
|
||||
195 127 0 0 160 214 117 65 195 127 0 0 136 62 118 65 195 127
|
||||
0 0 12 0 0 0 0 0 0 0 48 104 15 204 195 127 0 0
|
||||
32 104 15 204 195 127 0 0]
|
||||
size in bytes = 512
|
||||
debugger dtype = 11
|
||||
shape = [128]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_2 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/conv3-Conv2d/Conv2D-op308
|
||||
slot = 0
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_2 attributes:
|
||||
data (printed in uint8) = [206 239 74 ... 53 201 62]
|
||||
size in bytes = 4153344
|
||||
debugger dtype = 11
|
||||
shape = [32, 192, 13, 13]
|
||||
-----------------------------------------------------------
|
||||
tensor_info_3 attributes:
|
||||
node name = Default/network-WithLossCell/_backbone-AlexNet/ReLUV2-op300
|
||||
slot = 1
|
||||
iteration = 2
|
||||
device_id = None
|
||||
root_graph_id = 0
|
||||
is_parameter = False
|
||||
|
||||
tensor_data_3 attributes:
|
||||
data (printed in uint8) = [206 239 74 ... 16 239 51]
|
||||
size in bytes = 831744
|
||||
debugger dtype = 8
|
||||
shape = [207936]
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue