# Copyright 2020 Huawei Technologies Co., Ltd # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ import json import os import argparse import warnings import sys import numpy as np from tqdm import tqdm import cv2 from scipy.ndimage.filters import gaussian_filter from pycocotools.coco import COCO as LoadAnn from pycocotools.cocoeval import COCOeval as MapEval from mindspore import context, Tensor from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.communication.management import init, get_rank, get_group_size from mindspore.common import dtype as mstype from src.config import params, JointType from src.openposenet import OpenPoseNet from src.dataset import valdata warnings.filterwarnings("ignore") devid = int(os.getenv('DEVICE_ID')) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=devid) show_gt = 0 parser = argparse.ArgumentParser('mindspore openpose_net test') parser.add_argument('--model_path', type=str, default='./0-33_170000.ckpt', help='path of testing model') parser.add_argument('--imgpath_val', type=str, default='./dataset/coco/val2017', help='path of testing imgs') parser.add_argument('--ann', type=str, default='./dataset/coco/annotations/person_keypoints_val2017.json', help='path of annotations') parser.add_argument('--output_path', type=str, default='./output_img', help='path of testing imgs') # distributed related parser.add_argument('--is_distributed', type=int, default=0, help='if multi device') parser.add_argument('--rank', type=int, default=0, help='local rank of distributed') parser.add_argument('--group_size', type=int, default=1, help='world size of distributed') args, _ = parser.parse_known_args() def evaluate_mAP(res_file, ann_file, ann_type='keypoints', silence=True): class NullWriter(): def write(self, arg): pass if silence: nullwrite = NullWriter() oldstdout = sys.stdout sys.stdout = nullwrite # disable output Gt = LoadAnn(ann_file) Dt = Gt.loadRes(res_file) Eval = MapEval(Gt, Dt, ann_type) Eval.evaluate() Eval.accumulate() Eval.summarize() if silence: sys.stdout = oldstdout # enable output stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'] info_str = {} for ind, name in enumerate(stats_names): info_str[name] = Eval.stats[ind] return info_str def load_model(test_net, model_path): assert os.path.exists(model_path) param_dict = load_checkpoint(model_path) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moment'): continue elif key.startswith('network'): param_dict_new[key[8:]] = values # else: # param_dict_new[key] = values load_param_into_net(test_net, param_dict_new) def preprocess(img): x_data = img.astype('f') x_data /= 255 x_data -= 0.5 x_data = x_data.transpose(2, 0, 1)[None] return x_data def getImgsPath(img_dir_path): filepaths = [] dirpaths = [] pathName = img_dir_path for root, dirs, files in os.walk(pathName): for file in files: file_path = os.path.join(root, file) filepaths.append(file_path) for d in dirs: dir_path = os.path.join(root, d) dirpaths.append(dir_path) return filepaths def compute_optimal_size(orig_img, img_size, stride=8): orig_img_h, orig_img_w, _ = orig_img.shape aspect = orig_img_h / orig_img_w if orig_img_h < orig_img_w: img_h = img_size img_w = np.round(img_size / aspect).astype(int) surplus = img_w % stride if surplus != 0: img_w += stride - surplus else: img_w = img_size img_h = np.round(img_size * aspect).astype(int) surplus = img_h % stride if surplus != 0: img_h += stride - surplus return (img_w, img_h) def compute_peaks_from_heatmaps(heatmaps): heatmaps = heatmaps[:-1] all_peaks = [] peak_counter = 0 for i, heatmap in enumerate(heatmaps): heatmap = gaussian_filter(heatmap, sigma=params['gaussian_sigma']) map_left = np.zeros(heatmap.shape) map_right = np.zeros(heatmap.shape) map_top = np.zeros(heatmap.shape) map_bottom = np.zeros(heatmap.shape) map_left[1:, :] = heatmap[:-1, :] map_right[:-1, :] = heatmap[1:, :] map_top[:, 1:] = heatmap[:, :-1] map_bottom[:, :-1] = heatmap[:, 1:] peaks_binary = np.logical_and.reduce(( heatmap > params['heatmap_peak_thresh'], heatmap > map_left, heatmap > map_right, heatmap > map_top, heatmap > map_bottom, )) peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) peaks_with_score = [(i,) + peak_pos + (heatmap[peak_pos[1], peak_pos[0]],) for peak_pos in peaks] peaks_id = range(peak_counter, peak_counter + len(peaks_with_score)) peaks_with_score_and_id = [peaks_with_score[i] + (peaks_id[i],) for i in range(len(peaks_id))] peak_counter += len(peaks_with_score_and_id) all_peaks.append(peaks_with_score_and_id) all_peaks = np.array([peak for peaks_each_category in all_peaks for peak in peaks_each_category]) return all_peaks def compute_candidate_connections(paf, cand_a, cand_b, img_len, params_): candidate_connections = [] for joint_a in cand_a: for joint_b in cand_b: vector = joint_b[:2] - joint_a[:2] norm = np.linalg.norm(vector) if norm == 0: continue ys = np.linspace(joint_a[1], joint_b[1], num=params_['n_integ_points']) xs = np.linspace(joint_a[0], joint_b[0], num=params_['n_integ_points']) integ_points = np.stack([ys, xs]).T.round().astype('i') paf_in_edge = np.hstack([paf[0][np.hsplit(integ_points, 2)], paf[1][np.hsplit(integ_points, 2)]]) unit_vector = vector / norm inner_products = np.dot(paf_in_edge, unit_vector) integ_value = inner_products.sum() / len(inner_products) integ_value_with_dist_prior = integ_value + min(params_['limb_length_ratio'] * img_len / norm - params_['length_penalty_value'], 0) n_valid_points = sum(inner_products > params_['inner_product_thresh']) if n_valid_points > params_['n_integ_points_thresh'] and integ_value_with_dist_prior > 0: candidate_connections.append([int(joint_a[3]), int(joint_b[3]), integ_value_with_dist_prior]) candidate_connections = sorted(candidate_connections, key=lambda x: x[2], reverse=True) return candidate_connections def compute_connections(pafs, all_peaks, img_len, params_): all_connections = [] for i in range(len(params_['limbs_point'])): paf_index = [i * 2, i * 2 + 1] paf = pafs[paf_index] # shape: (2, 320, 320) limb_point = params_['limbs_point'][i] # example: [, ] cand_a = all_peaks[all_peaks[:, 0] == limb_point[0]][:, 1:] cand_b = all_peaks[all_peaks[:, 0] == limb_point[1]][:, 1:] if cand_a.shape[0] > 0 and cand_b.shape[0] > 0: candidate_connections = compute_candidate_connections(paf, cand_a, cand_b, img_len, params_) connections = np.zeros((0, 3)) for index_a, index_b, score in candidate_connections: if index_a not in connections[:, 0] and index_b not in connections[:, 1]: connections = np.vstack([connections, [index_a, index_b, score]]) if len(connections) >= min(len(cand_a), len(cand_b)): break all_connections.append(connections) else: all_connections.append(np.zeros((0, 3))) return all_connections def grouping_key_points(all_connections, candidate_peaks, params_): subsets = -1 * np.ones((0, 20)) for l, connections in enumerate(all_connections): joint_a, joint_b = params_['limbs_point'][l] for ind_a, ind_b, score in connections[:, :3]: ind_a, ind_b = int(ind_a), int(ind_b) joint_found_cnt = 0 joint_found_subset_index = [-1, -1] for subset_ind, subset in enumerate(subsets): if subset[joint_a] == ind_a or subset[joint_b] == ind_b: joint_found_subset_index[joint_found_cnt] = subset_ind joint_found_cnt += 1 if joint_found_cnt == 1: found_subset = subsets[joint_found_subset_index[0]] if found_subset[joint_b] != ind_b: found_subset[joint_b] = ind_b found_subset[-1] += 1 # increment joint count found_subset[-2] += candidate_peaks[ind_b, 3] + score elif joint_found_cnt == 2: found_subset_1 = subsets[joint_found_subset_index[0]] found_subset_2 = subsets[joint_found_subset_index[1]] membership = ((found_subset_1 >= 0).astype(int) + (found_subset_2 >= 0).astype(int))[:-2] if not np.any(membership == 2): # merge two subsets when no duplication found_subset_1[:-2] += found_subset_2[:-2] + 1 # default is -1 found_subset_1[-2:] += found_subset_2[-2:] found_subset_1[-2] += score subsets = np.delete(subsets, joint_found_subset_index[1], axis=0) else: if found_subset_1[joint_a] == -1: found_subset_1[joint_a] = ind_a found_subset_1[-1] += 1 found_subset_1[-2] += candidate_peaks[ind_a, 3] + score elif found_subset_1[joint_b] == -1: found_subset_1[joint_b] = ind_b found_subset_1[-1] += 1 found_subset_1[-2] += candidate_peaks[ind_b, 3] + score if found_subset_2[joint_a] == -1: found_subset_2[joint_a] = ind_a found_subset_2[-1] += 1 found_subset_2[-2] += candidate_peaks[ind_a, 3] + score elif found_subset_2[joint_b] == -1: found_subset_2[joint_b] = ind_b found_subset_2[-1] += 1 found_subset_2[-2] += candidate_peaks[ind_b, 3] + score elif joint_found_cnt == 0 and l != 9 and l != 13: row = -1 * np.ones(20) row[joint_a] = ind_a row[joint_b] = ind_b row[-1] = 2 row[-2] = sum(candidate_peaks[[ind_a, ind_b], 3]) + score subsets = np.vstack([subsets, row]) elif joint_found_cnt >= 3: pass # delete low score subsets keep = np.logical_and(subsets[:, -1] >= params_['n_subset_limbs_thresh'], subsets[:, -2] / subsets[:, -1] >= params_['subset_score_thresh']) subsets = subsets[keep] return subsets def subsets_to_pose_array(subsets, all_peaks): person_pose_array = [] for subset in subsets: joints = [] for joint_index in subset[:18].astype('i'): if joint_index >= 0: joint = all_peaks[joint_index][1:3].tolist() joint.append(2) joints.append(joint) else: joints.append([0, 0, 0]) person_pose_array.append(np.array(joints)) person_pose_array = np.array(person_pose_array) return person_pose_array def detect(img, network): orig_img = img.copy() orig_img_h, orig_img_w, _ = orig_img.shape input_w, input_h = compute_optimal_size(orig_img, params['inference_img_size']) # 368 # map_w, map_h = compute_optimal_size(orig_img, params['heatmap_size']) # 320 map_w, map_h = compute_optimal_size(orig_img, params['inference_img_size']) # print("image size is: ", input_w, input_h) resized_image = cv2.resize(orig_img, (input_w, input_h)) x_data = preprocess(resized_image) x_data = Tensor(x_data, mstype.float32) x_data.requires_grad = False logit_pafs, logit_heatmap = network(x_data) logit_pafs = logit_pafs[-1].asnumpy()[0] logit_heatmap = logit_heatmap[-1].asnumpy()[0] pafs = np.zeros((logit_pafs.shape[0], map_h, map_w)) for i in range(logit_pafs.shape[0]): pafs[i] = cv2.resize(logit_pafs[i], (map_w, map_h)) if show_gt: save_path = "./test_output/" + str(i) + "pafs.png" cv2.imwrite(save_path, pafs[i]*255) heatmaps = np.zeros((logit_heatmap.shape[0], map_h, map_w)) for i in range(logit_heatmap.shape[0]): heatmaps[i] = cv2.resize(logit_heatmap[i], (map_w, map_h)) if show_gt: save_path = "./test_output/" + str(i) + "heatmap.png" cv2.imwrite(save_path, heatmaps[i]*255) all_peaks = compute_peaks_from_heatmaps(heatmaps) if all_peaks.shape[0] == 0: return np.empty((0, len(JointType), 3)), np.empty(0) all_connections = compute_connections(pafs, all_peaks, map_w, params) subsets = grouping_key_points(all_connections, all_peaks, params) all_peaks[:, 1] *= orig_img_w / map_w all_peaks[:, 2] *= orig_img_h / map_h poses = subsets_to_pose_array(subsets, all_peaks) scores = subsets[:, -2] return poses, scores def draw_person_pose(orig_img, poses): orig_img = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB) if poses.shape[0] == 0: return orig_img limb_colors = [ [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0.], [255, 0, 85], [170, 255, 0], [85, 255, 0], [170, 0, 255.], [0, 0, 255], [0, 0, 255], [255, 0, 255], [170, 0, 255], [255, 0, 170], ] joint_colors = [ [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]] canvas = orig_img.copy() # limbs for pose in poses.round().astype('i'): for i, (limb, color) in enumerate(zip(params['limbs_point'], limb_colors)): if i not in (9, 13): # don't show ear-shoulder connection limb_ind = np.array(limb) if np.all(pose[limb_ind][:, 2] != 0): joint1, joint2 = pose[limb_ind][:, :2] cv2.line(canvas, tuple(joint1), tuple(joint2), color, 2) # joints for pose in poses.round().astype('i'): for i, ((x, y, v), color) in enumerate(zip(pose, joint_colors)): if v != 0: cv2.circle(canvas, (x, y), 3, color, -1) return canvas def depreprocess(img): #x_data = img.astype('f') x_data = img[0] x_data += 0.5 x_data *= 255 x_data = x_data.astype('uint8') x_data = x_data.transpose(1, 2, 0) return x_data def val(): if args.is_distributed: init() args.rank = get_rank() args.group_size = get_group_size() if not os.path.exists(args.output_path): os.mkdir(args.output_path) network = OpenPoseNet(vgg_with_bn=params['vgg_with_bn']) network.set_train(False) load_model(network, args.model_path) print("load models right") dataset = valdata(args.ann, args.imgpath_val, args.rank, args.group_size, mode='val') dataset_size = dataset.get_dataset_size() de_dataset = dataset.create_tuple_iterator() print("eval dataset size: ", dataset_size) kpt_json = [] for _, (img, img_id) in tqdm(enumerate(de_dataset), total=dataset_size): img = img.asnumpy() img_id = int((img_id.asnumpy())[0]) poses, scores = detect(img, network) if poses.shape[0] > 0: #print("got poses") for index, pose in enumerate(poses): data = dict() pose = pose[[0, 15, 14, 17, 16, 5, 2, 6, 3, 7, 4, 11, 8, 12, 9, 13, 10, 1], :].round().astype('i') keypoints = pose.reshape(-1).tolist() keypoints = keypoints[:-3] data['image_id'] = img_id data['score'] = scores[index] data['category_id'] = 1 data['keypoints'] = keypoints kpt_json.append(data) else: print("Predict poses size is zero.", flush=True) img = draw_person_pose(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), poses) #print('Saving result into',str(img_id)+'.png...') save_path = os.path.join(args.output_path, str(img_id)+".png") cv2.imwrite(save_path, img) result_json = 'eval_result.json' with open(os.path.join(args.output_path, result_json), 'w') as fid: json.dump(kpt_json, fid) res = evaluate_mAP(os.path.join(args.output_path, result_json), ann_file=args.ann) print('result: ', res) if __name__ == "__main__": val()