import os from argparse import ArgumentParser import cv2 from mmpose.apis import (inference_bottom_up_pose_model, init_pose_model, vis_pose_result) from mmcv import Config, DictAction from mmcv.parallel import MMDataParallel, MMDistributedDataParallel from mmcv.runner import get_dist_info, init_dist, load_checkpoint from models import build_posenet def main(): """Visualize the demo images.""" parser = ArgumentParser() parser.add_argument('pose_config', help='Config file for pose') parser.add_argument('pose_checkpoint', help='Checkpoint file for pose') parser.add_argument('--video-path', type=str, help='Video path') parser.add_argument( '--show', action='store_true', default=False, help='whether to show visualizations.') parser.add_argument( '--out-video-root', default='', help='Root of the output video file. ' 'Default not saving the visualization video.') parser.add_argument( '--device', default='cuda:0', help='Device used for inference') parser.add_argument( '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') args = parser.parse_args() assert args.show or (args.out_video_root != '') cfg = Config.fromfile(args.pose_config) #if args.cfg_options is not None: # cfg.merge_from_dict(args.cfg_options) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True model = build_posenet(cfg.model) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.pose_checkpoint, map_location='cpu') #model = MMDataParallel(model, device_ids=[0]) # build the pose model from a config file and a checkpoint file #pose_model = init_pose_model( # args.pose_config, args.pose_checkpoint, device=args.device.lower()) dataset = cfg.data['test']['type'] #assert (dataset == 'BottomUpCocoDataset') cap = cv2.VideoCapture(args.video_path) if args.out_video_root == '': save_out_video = False else: os.makedirs(args.out_video_root, exist_ok=True) save_out_video = True if save_out_video: fps = cap.get(cv2.CAP_PROP_FPS) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) fourcc = cv2.VideoWriter_fourcc(*'mp4v') videoWriter = cv2.VideoWriter( os.path.join(args.out_video_root, f'vis_{os.path.basename(args.video_path)}'), fourcc, fps, size) # optional return_heatmap = False # e.g. use ('backbone', ) to return backbone feature output_layer_names = None while (cap.isOpened()): flag, img = cap.read() if not flag: break pose_results, returned_outputs = inference_bottom_up_pose_model( model, img, return_heatmap=return_heatmap, outputs=output_layer_names) # show the results vis_img = vis_pose_result( pose_model, img, pose_results, dataset=dataset, kpt_score_thr=args.kpt_thr, show=False) if args.show: cv2.imshow('Image', vis_img) if save_out_video: videoWriter.write(vis_img) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() if save_out_video: videoWriter.release() cv2.destroyAllWindows() if __name__ == '__main__': main()