From ba52d5045ed54c0dd15031bf5a5ec50b6f410ec3 Mon Sep 17 00:00:00 2001 From: Rockey <41846794+RockeyCoss@users.noreply.github.com> Date: Fri, 14 Jan 2022 15:19:23 +0800 Subject: [PATCH] [Feature] add log collector (#1175) * [Feature] add log collector * Update .dev/log_collector/readme.md Co-authored-by: Miao Zheng <76149310+MeowZheng@users.noreply.github.com> * Update .dev/log_collector/example_config.py Co-authored-by: Miao Zheng <76149310+MeowZheng@users.noreply.github.com> * fix typo and so on * modify readme * fix some bugs and revise the readme.md * more elegant * Update .dev/log_collector/readme.md Co-authored-by: Junjun2016 Co-authored-by: Miao Zheng <76149310+MeowZheng@users.noreply.github.com> Co-authored-by: Junjun2016 --- .dev/log_collector/example_config.py | 18 ++++ .dev/log_collector/log_collector.py | 143 +++++++++++++++++++++++++++ .dev/log_collector/readme.md | 143 +++++++++++++++++++++++++++ .dev/log_collector/utils.py | 20 ++++ 4 files changed, 324 insertions(+) create mode 100644 .dev/log_collector/example_config.py create mode 100644 .dev/log_collector/log_collector.py create mode 100644 .dev/log_collector/readme.md create mode 100644 .dev/log_collector/utils.py diff --git a/.dev/log_collector/example_config.py b/.dev/log_collector/example_config.py new file mode 100644 index 0000000..bc2b4d6 --- /dev/null +++ b/.dev/log_collector/example_config.py @@ -0,0 +1,18 @@ +work_dir = '../../work_dirs' +metric = 'mIoU' + +# specify the log files we would like to collect in `log_items` +log_items = [ + 'segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup', + 'segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr', + 'segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr', + 'segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr' +] +# or specify ignore_keywords, then the folders whose name contain +# `'segformer'` won't be collected +# ignore_keywords = ['segformer'] + +# should not include metric +other_info_keys = ['mAcc'] +markdown_file = 'markdowns/lr_in_trans.json.md' +json_file = 'jsons/trans_in_cnn.json' diff --git a/.dev/log_collector/log_collector.py b/.dev/log_collector/log_collector.py new file mode 100644 index 0000000..d0f4080 --- /dev/null +++ b/.dev/log_collector/log_collector.py @@ -0,0 +1,143 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import datetime +import json +import os +import os.path as osp +from collections import OrderedDict + +from utils import load_config + +# automatically collect all the results + +# The structure of the directory: +# ├── work-dir +# │ ├── config_1 +# │ │ ├── time1.log.json +# │ │ ├── time2.log.json +# │ │ ├── time3.log.json +# │ │ ├── time4.log.json +# │ ├── config_2 +# │ │ ├── time5.log.json +# │ │ ├── time6.log.json +# │ │ ├── time7.log.json +# │ │ ├── time8.log.json + + +def parse_args(): + parser = argparse.ArgumentParser(description='extract info from log.json') + parser.add_argument('config_dir') + args = parser.parse_args() + return args + + +def has_keyword(name: str, keywords: list): + for a_keyword in keywords: + if a_keyword in name: + return True + return False + + +def main(): + args = parse_args() + cfg = load_config(args.config_dir) + work_dir = cfg['work_dir'] + metric = cfg['metric'] + log_items = cfg.get('log_items', []) + ignore_keywords = cfg.get('ignore_keywords', []) + other_info_keys = cfg.get('other_info_keys', []) + markdown_file = cfg.get('markdown_file', None) + json_file = cfg.get('json_file', None) + + if json_file and osp.split(json_file)[0] != '': + os.makedirs(osp.split(json_file)[0], exist_ok=True) + if markdown_file and osp.split(markdown_file)[0] != '': + os.makedirs(osp.split(markdown_file)[0], exist_ok=True) + + assert not (log_items and ignore_keywords), \ + 'log_items and ignore_keywords cannot be specified at the same time' + assert metric not in other_info_keys, \ + 'other_info_keys should not contain metric' + + if ignore_keywords and isinstance(ignore_keywords, str): + ignore_keywords = [ignore_keywords] + if other_info_keys and isinstance(other_info_keys, str): + other_info_keys = [other_info_keys] + if log_items and isinstance(log_items, str): + log_items = [log_items] + + if not log_items: + log_items = [ + item for item in sorted(os.listdir(work_dir)) + if not has_keyword(item, ignore_keywords) + ] + + experiment_info_list = [] + for config_dir in log_items: + preceding_path = os.path.join(work_dir, config_dir) + log_list = [ + item for item in os.listdir(preceding_path) + if item.endswith('.log.json') + ] + log_list = sorted( + log_list, + key=lambda time_str: datetime.datetime.strptime( + time_str, '%Y%m%d_%H%M%S.log.json')) + val_list = [] + last_iter = 0 + for log_name in log_list: + with open(os.path.join(preceding_path, log_name), 'r') as f: + # ignore the info line + f.readline() + all_lines = f.readlines() + val_list.extend([ + json.loads(line) for line in all_lines + if json.loads(line)['mode'] == 'val' + ]) + for index in range(len(all_lines) - 1, -1, -1): + line_dict = json.loads(all_lines[index]) + if line_dict['mode'] == 'train': + last_iter = max(last_iter, line_dict['iter']) + break + + new_log_dict = dict( + method=config_dir, metric_used=metric, last_iter=last_iter) + for index, log in enumerate(val_list, 1): + new_ordered_dict = OrderedDict() + new_ordered_dict['eval_index'] = index + new_ordered_dict[metric] = log[metric] + for key in other_info_keys: + if key in log: + new_ordered_dict[key] = log[key] + val_list[index - 1] = new_ordered_dict + + assert len(val_list) >= 1, \ + f"work dir {config_dir} doesn't contain any evaluation." + new_log_dict['last eval'] = val_list[-1] + new_log_dict['best eval'] = max(val_list, key=lambda x: x[metric]) + experiment_info_list.append(new_log_dict) + print(f'{config_dir} is processed') + + if json_file: + with open(json_file, 'w') as f: + json.dump(experiment_info_list, f, indent=4) + + if markdown_file: + lines_to_write = [] + for index, log in enumerate(experiment_info_list, 1): + lines_to_write.append( + f"|{index}|{log['method']}|{log['best eval'][metric]}" + f"|{log['best eval']['eval_index']}|" + f"{log['last eval'][metric]}|" + f"{log['last eval']['eval_index']}|{log['last_iter']}|\n") + with open(markdown_file, 'w') as f: + f.write(f'|exp_num|method|{metric} best|best index|' + f'{metric} last|last index|last iter num|\n') + f.write('|:---:|:---:|:---:|:---:|:---:|:---:|:---:|\n') + f.writelines(lines_to_write) + + print('processed successfully') + + +if __name__ == '__main__': + main() diff --git a/.dev/log_collector/readme.md b/.dev/log_collector/readme.md new file mode 100644 index 0000000..41ea235 --- /dev/null +++ b/.dev/log_collector/readme.md @@ -0,0 +1,143 @@ +# Log Collector + +## Function + +Automatically collect logs and write the result in a json file or markdown file. + +If there are several `.log.json` files in one folder, Log Collector assumes that the `.log.json` files other than the first one are resume from the preceding `.log.json` file. Log Collector returns the result considering all `.log.json` files. + +## Usage: + +To use log collector, you need to write a config file to configure the log collector first. + +For example: + +example_config.py: + +```python +# The work directory that contains folders that contains .log.json files. +work_dir = '../../work_dirs' +# The metric used to find the best evaluation. +metric = 'mIoU' + +# **Don't specify the log_items and ignore_keywords at the same time.** +# Specify the log files we would like to collect in `log_items`. +# The folders specified should be the subdirectories of `work_dir`. +log_items = [ + 'segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup', + 'segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr', + 'segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr', + 'segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr' +] +# Or specify `ignore_keywords`. The folders whose name contain one +# of the keywords in the `ignore_keywords` list(e.g., `'segformer'`) +# won't be collected. +# ignore_keywords = ['segformer'] + +# Other log items in .log.json that you want to collect. +# should not include metric. +other_info_keys = ["mAcc"] +# The output markdown file's name. +markdown_file ='markdowns/lr_in_trans.json.md' +# The output json file's name. (optional) +json_file = 'jsons/trans_in_cnn.json' +``` + + The structure of the work-dir directory should be like: + +```text +├── work-dir +│ ├── folder1 +│ │ ├── time1.log.json +│ │ ├── time2.log.json +│ │ ├── time3.log.json +│ │ ├── time4.log.json +│ ├── folder2 +│ │ ├── time5.log.json +│ │ ├── time6.log.json +│ │ ├── time7.log.json +│ │ ├── time8.log.json +``` + +Then , cd to the log collector folder. + +Now you can run log_collector.py by using command: + +```bash +python log_collector.py ./example_config.py +``` + +The output markdown file is like: + +|exp_num|method|mIoU best|best index|mIoU last|last index|last iter num| +|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +|1|segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup|0.2776|10|0.2776|10|160000| +|2|segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr|0.2802|10|0.2802|10|160000| +|3|segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr|0.4943|11|0.4943|11|160000| +|4|segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr|0.4883|11|0.4883|11|160000| + +The output json file is like: +```json +[ + { + "method": "segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup", + "metric_used": "mIoU", + "last_iter": 160000, + "last eval": { + "eval_index": 10, + "mIoU": 0.2776, + "mAcc": 0.3779 + }, + "best eval": { + "eval_index": 10, + "mIoU": 0.2776, + "mAcc": 0.3779 + } + }, + { + "method": "segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr", + "metric_used": "mIoU", + "last_iter": 160000, + "last eval": { + "eval_index": 10, + "mIoU": 0.2802, + "mAcc": 0.3764 + }, + "best eval": { + "eval_index": 10, + "mIoU": 0.2802, + "mAcc": 0.3764 + } + }, + { + "method": "segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr", + "metric_used": "mIoU", + "last_iter": 160000, + "last eval": { + "eval_index": 11, + "mIoU": 0.4943, + "mAcc": 0.6097 + }, + "best eval": { + "eval_index": 11, + "mIoU": 0.4943, + "mAcc": 0.6097 + } + }, + { + "method": "segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr", + "metric_used": "mIoU", + "last_iter": 160000, + "last eval": { + "eval_index": 11, + "mIoU": 0.4883, + "mAcc": 0.6061 + }, + "best eval": { + "eval_index": 11, + "mIoU": 0.4883, + "mAcc": 0.6061 + } + } +] +``` diff --git a/.dev/log_collector/utils.py b/.dev/log_collector/utils.py new file mode 100644 index 0000000..848516a --- /dev/null +++ b/.dev/log_collector/utils.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# modified from https://github.dev/open-mmlab/mmcv +import os.path as osp +import sys +from importlib import import_module + + +def load_config(cfg_dir: str) -> dict: + assert cfg_dir.endswith('.py') + root_path, file_name = osp.split(cfg_dir) + temp_module = osp.splitext(file_name)[0] + sys.path.insert(0, root_path) + mod = import_module(temp_module) + sys.path.pop(0) + cfg_dict = { + k: v + for k, v in mod.__dict__.items() if not k.startswith('__') + } + del sys.modules[temp_module] + return cfg_dict