From a2738fd9befee1a30aaecd60ec997394e439a542 Mon Sep 17 00:00:00 2001 From: yamengxi <49829199+yamengxi@users.noreply.github.com> Date: Tue, 22 Sep 2020 14:56:13 +0800 Subject: [PATCH] Add Pascal Context to mmsegmentation (#133) * Add Pascal Context to mmsegmentation * Add benchmark result to Pascal Context * fix mmcv version * fix code syntax * fix code syntax again * Update mmseg/models/segmentors/encoder_decoder.py update hint Co-authored-by: Jerry Jiarui XU * update comment * fix pascal context model path * fix model path mistake again * fix model path mistake again * fix model path mistakes again Co-authored-by: Jerry Jiarui XU --- configs/_base_/datasets/pascal_context.py | 60 +++++++++++++ configs/deeplabv3/README.md | 6 ++ ...abv3_r101-d8_480x480_40k_pascal_context.py | 2 + ...abv3_r101-d8_480x480_80k_pascal_context.py | 2 + ...labv3_r50-d8_480x480_40k_pascal_context.py | 9 ++ ...labv3_r50-d8_480x480_80k_pascal_context.py | 9 ++ configs/deeplabv3plus/README.md | 6 ++ ...plus_r101-d8_480x480_40k_pascal_context.py | 2 + ...plus_r101-d8_480x480_80k_pascal_context.py | 2 + ...3plus_r50-d8_480x480_40k_pascal_context.py | 9 ++ ...3plus_r50-d8_480x480_80k_pascal_context.py | 9 ++ configs/fcn/README.md | 6 ++ .../fcn_r101-d8_480x480_40k_pascal_context.py | 2 + .../fcn_r101-d8_480x480_80k_pascal_context.py | 2 + .../fcn_r50-d8_480x480_40k_pascal_context.py | 7 ++ .../fcn_r50-d8_480x480_80k_pascal_context.py | 7 ++ configs/hrnet/README.md | 6 ++ .../fcn_hr18_480x480_40k_pascal_context.py | 7 ++ .../fcn_hr18_480x480_80k_pascal_context.py | 7 ++ .../fcn_hr18s_480x480_40k_pascal_context.py | 9 ++ .../fcn_hr18s_480x480_80k_pascal_context.py | 9 ++ .../fcn_hr48_480x480_40k_pascal_context.py | 10 +++ .../fcn_hr48_480x480_80k_pascal_context.py | 10 +++ configs/pspnet/README.md | 6 ++ ...pnet_r101-d8_480x480_40k_pascal_context.py | 2 + ...pnet_r101-d8_480x480_80k_pascal_context.py | 2 + ...spnet_r50-d8_480x480_40k_pascal_context.py | 9 ++ ...spnet_r50-d8_480x480_80k_pascal_context.py | 9 ++ docs/getting_started.md | 19 ++++ mmseg/datasets/__init__.py | 3 +- mmseg/datasets/pascal_context.py | 54 ++++++++++++ mmseg/models/segmentors/encoder_decoder.py | 8 +- setup.cfg | 2 +- tools/convert_datasets/pascal_context.py | 86 +++++++++++++++++++ 34 files changed, 393 insertions(+), 5 deletions(-) create mode 100644 configs/_base_/datasets/pascal_context.py create mode 100644 configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py create mode 100644 configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py create mode 100644 configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py create mode 100644 configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py create mode 100644 configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py create mode 100644 configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py create mode 100644 configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py create mode 100644 configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py create mode 100644 configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py create mode 100644 configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py create mode 100644 configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py create mode 100644 configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py create mode 100644 configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py create mode 100644 configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py create mode 100644 configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py create mode 100644 configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py create mode 100644 configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py create mode 100644 configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py create mode 100644 configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py create mode 100644 configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py create mode 100644 configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py create mode 100644 configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py create mode 100644 mmseg/datasets/pascal_context.py create mode 100644 tools/convert_datasets/pascal_context.py diff --git a/configs/_base_/datasets/pascal_context.py b/configs/_base_/datasets/pascal_context.py new file mode 100644 index 0000000..a00e474 --- /dev/null +++ b/configs/_base_/datasets/pascal_context.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/configs/deeplabv3/README.md b/configs/deeplabv3/README.md index 37e2ee6..e67857e 100644 --- a/configs/deeplabv3/README.md +++ b/configs/deeplabv3/README.md @@ -41,3 +41,9 @@ Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series | DeepLabV3 | R-101-D8 | 512x512 | 20000 | 9.6 | 9.81 | 78.70 | 79.95 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) | | DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | 77.68 | 78.78 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json) | | DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | 77.92 | 79.18 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) | + +### Pascal Context +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download | +|-----------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| DeepLabV3 | R-101-D8 | 480x480 | 40000 | 9.2 | 7.09 | 46.55 | 47.81 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json) | +| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | 46.42 | 47.53 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json) | diff --git a/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py b/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..0b5256f --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py b/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..001b7a6 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..0cdb262 --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60)) +test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320)) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..84e831a --- /dev/null +++ b/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60)) +test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320)) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3plus/README.md b/configs/deeplabv3plus/README.md index 591554d..cdfaba1 100644 --- a/configs/deeplabv3plus/README.md +++ b/configs/deeplabv3plus/README.md @@ -41,3 +41,9 @@ Note: `D-8` here corresponding to the output stride 8 setting for DeepLab series | DeepLabV3+ | R-101-D8 | 512x512 | 20000 | 11 | 13.88 | 77.22 | 78.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) | | DeepLabV3+ | R-50-D8 | 512x512 | 40000 | - | - | 76.81 | 77.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json) | | DeepLabV3+ | R-101-D8 | 512x512 | 40000 | - | - | 78.62 | 79.53 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) | + +#### Pascal Context +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download | +|------------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | 9.09 | 47.30 | 48.47 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json) | +| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | 47.23 | 48.26 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json) | diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..68e2b07 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..3a46c28 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..ee548fb --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60)) +test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320)) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..604cf2b --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60)) +test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320)) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/fcn/README.md b/configs/fcn/README.md index 6ec2080..25c966c 100644 --- a/configs/fcn/README.md +++ b/configs/fcn/README.md @@ -43,3 +43,9 @@ | FCN | R-101-D8 | 512x512 | 20000 | 9.2 | 14.81 | 71.16 | 73.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) | | FCN | R-50-D8 | 512x512 | 40000 | - | - | 66.97 | 69.04 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | | FCN | R-101-D8 | 512x512 | 40000 | - | - | 69.91 | 72.38 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) | + +### Pascal Context +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download | +|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| FCN | R-101-D8 | 480x480 | 40000 | - | 9.93 | 44.14 | 45.67 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20200911_212515-9b565a6d.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20200911_212515.log.json) | +| FCN | R-101-D8 | 480x480 | 80000 | - | - | 44.47 | 45.74 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20200915_032644-a3828480.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20200915_032644.log.json) | diff --git a/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py b/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..f3a15b4 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py b/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..bdccfd9 --- /dev/null +++ b/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..d124fbf --- /dev/null +++ b/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(decode_head=dict(num_classes=60)) +test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320)) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..d84f1c8 --- /dev/null +++ b/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(decode_head=dict(num_classes=60)) +test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320)) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/README.md b/configs/hrnet/README.md index 4bb016e..153fbdd 100644 --- a/configs/hrnet/README.md +++ b/configs/hrnet/README.md @@ -44,3 +44,9 @@ | FCN | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 66.61 | 70.00 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) | | FCN | HRNetV2p-W18 | 512x512 | 40000 | - | - | 72.90 | 75.59 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json) | | FCN | HRNetV2p-W48 | 512x512 | 40000 | - | - | 76.24 | 78.49 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json) | + +### Pascal Context +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download | +|--------|--------------------|-----------|--------:|----------|----------------|------:|--------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| FCN | HRNetV2p-W48 | 480x480 | 40000 | 6.1 | 8.86 | 45.14 | 47.42 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json) | +| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | 45.84 | 47.84 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json) | diff --git a/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py new file mode 100644 index 0000000..54a412e --- /dev/null +++ b/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(decode_head=dict(num_classes=60)) +test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320)) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py new file mode 100644 index 0000000..2dfba87 --- /dev/null +++ b/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(decode_head=dict(num_classes=60)) +test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320)) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py b/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py new file mode 100644 index 0000000..d099310 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_480x480_40k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py b/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py new file mode 100644 index 0000000..584b713 --- /dev/null +++ b/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_480x480_80k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py b/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py new file mode 100644 index 0000000..0e2d96c --- /dev/null +++ b/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_480x480_40k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py b/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py new file mode 100644 index 0000000..e28164e --- /dev/null +++ b/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_480x480_80k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/configs/pspnet/README.md b/configs/pspnet/README.md index ec31fee..304804b 100644 --- a/configs/pspnet/README.md +++ b/configs/pspnet/README.md @@ -39,3 +39,9 @@ | PSPNet | R-101-D8 | 512x512 | 20000 | 9.6 | 15.02 | 78.47 | 79.25 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) | | PSPNet | R-50-D8 | 512x512 | 40000 | - | - | 77.29 | 78.48 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | | PSPNet | R-101-D8 | 512x512 | 40000 | - | - | 78.52 | 79.57 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) | + +### Pascal Context +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | download | +|--------|----------|-----------|--------:|----------|----------------|------:|--------------:|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| PSPNet | R-101-D8 | 480x480 | 40000 | 8.8 | 9.68 | 46.60 | 47.78 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json) | +| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | 46.03 | 47.15 | [model](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth) | [log](https://openmmlab.oss-accelerate.aliyuncs.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json) | diff --git a/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py b/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..0b5a990 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py b/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..fda9110 --- /dev/null +++ b/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..86da94d --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60)) +test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320)) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..cbb0271 --- /dev/null +++ b/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), auxiliary_head=dict(num_classes=60)) +test_cfg = dict(mode='slide', crop_size=(480, 480), stride=(320, 320)) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/docs/getting_started.md b/docs/getting_started.md index 892060d..35ba57b 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -27,6 +27,14 @@ mmsegmentation │ │ │ ├── SegmentationClass │ │ │ ├── ImageSets │ │ │ │ ├── Segmentation +│ │ ├── VOC2010 +│ │ │ ├── JPEGImages +│ │ │ ├── SegmentationClassContext +│ │ │ ├── ImageSets +│ │ │ │ ├── SegmentationContext +│ │ │ │ │ ├── train.txt +│ │ │ │ │ ├── val.txt +│ │ │ ├── trainval_merged.json │ │ ├── VOCaug │ │ │ ├── dataset │ │ │ │ ├── cls @@ -69,6 +77,17 @@ Please refer to [concat dataset](https://github.com/open-mmlab/mmsegmentation/bl The training and validation set of ADE20K could be download from this [link](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip). We may also download test set from [here](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip). +### Pascal Context +The training and validation set of Pascal Context could be download from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar). You may also download test set from [here](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar) after registration. + +To split the training and validation set from original dataset, you may download trainval_merged.json from [here](https://codalabuser.blob.core.windows.net/public/trainval_merged.json). + +If you would like to use Pascal Context dataset, please install [Detail](https://github.com/ccvl/detail-api) and then run the following command to convert annotations into proper format. + +```shell +python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json +``` + ## Inference with pretrained models We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.), diff --git a/mmseg/datasets/__init__.py b/mmseg/datasets/__init__.py index cb81b9a..dd4705c 100644 --- a/mmseg/datasets/__init__.py +++ b/mmseg/datasets/__init__.py @@ -3,10 +3,11 @@ from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset from .cityscapes import CityscapesDataset from .custom import CustomDataset from .dataset_wrappers import ConcatDataset, RepeatDataset +from .pascal_context import PascalContextDataset from .voc import PascalVOCDataset __all__ = [ 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', - 'PascalVOCDataset', 'ADE20KDataset' + 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset' ] diff --git a/mmseg/datasets/pascal_context.py b/mmseg/datasets/pascal_context.py new file mode 100644 index 0000000..ab42877 --- /dev/null +++ b/mmseg/datasets/pascal_context.py @@ -0,0 +1,54 @@ +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalContextDataset(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', + 'bus', 'car', 'cat', 'chair', 'cow', 'table', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', + 'tvmonitor', 'bag', 'bed', 'bench', 'book', 'building', + 'cabinet', 'ceiling', 'cloth', 'computer', 'cup', 'door', + 'fence', 'floor', 'flower', 'food', 'grass', 'ground', + 'keyboard', 'light', 'mountain', 'mouse', 'curtain', 'platform', + 'sign', 'plate', 'road', 'rock', 'shelves', 'sidewalk', 'sky', + 'snow', 'bedclothes', 'track', 'tree', 'truck', 'wall', 'water', + 'window', 'wood') + + PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]] + + def __init__(self, split, **kwargs): + super(PascalContextDataset, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + split=split, + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) and self.split is not None diff --git a/mmseg/models/segmentors/encoder_decoder.py b/mmseg/models/segmentors/encoder_decoder.py index 3e11630..9adf65b 100644 --- a/mmseg/models/segmentors/encoder_decoder.py +++ b/mmseg/models/segmentors/encoder_decoder.py @@ -167,13 +167,15 @@ class EncoderDecoder(BaseSegmentor): # TODO refactor def slide_inference(self, img, img_meta, rescale): - """Inference by sliding-window with overlap.""" + """Inference by sliding-window with overlap. + + If h_crop > h_img or w_crop > w_img, the small patch will be used to + decode without padding. + """ h_stride, w_stride = self.test_cfg.stride h_crop, w_crop = self.test_cfg.crop_size batch_size, _, h_img, w_img = img.size() - assert h_crop <= h_img and w_crop <= w_img, ( - 'crop size should not greater than image size') num_classes = self.num_classes h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 diff --git a/setup.cfg b/setup.cfg index 594abb8..21aad54 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,6 +8,6 @@ line_length = 79 multi_line_output = 0 known_standard_library = setuptools known_first_party = mmseg -known_third_party = PIL,cityscapesscripts,matplotlib,mmcv,numpy,onnxruntime,pytablewriter,pytest,scipy,torch +known_third_party = PIL,cityscapesscripts,detail,matplotlib,mmcv,numpy,onnxruntime,pytablewriter,pytest,scipy,torch no_lines_before = STDLIB,LOCALFOLDER default_section = THIRDPARTY diff --git a/tools/convert_datasets/pascal_context.py b/tools/convert_datasets/pascal_context.py new file mode 100644 index 0000000..e0a97ce --- /dev/null +++ b/tools/convert_datasets/pascal_context.py @@ -0,0 +1,86 @@ +import argparse +import os.path as osp +from functools import partial + +import mmcv +import numpy as np +from detail import Detail +from PIL import Image + +_mapping = np.sort( + np.array([ + 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, + 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, + 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, + 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 + ])) +_key = np.array(range(len(_mapping))).astype('uint8') + + +def generate_labels(img_id, detail, out_dir): + + def _class_to_index(mask, _mapping, _key): + # assert the values + values = np.unique(mask) + for i in range(len(values)): + assert (values[i] in _mapping) + index = np.digitize(mask.ravel(), _mapping, right=True) + return _key[index].reshape(mask.shape) + + mask = Image.fromarray( + _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key)) + filename = img_id['file_name'] + mask.save(osp.join(out_dir, filename.replace('jpg', 'png'))) + return osp.splitext(osp.basename(filename))[0] + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert PASCAL VOC annotations to mmdetection format') + parser.add_argument('devkit_path', help='pascal voc devkit path') + parser.add_argument('json_path', help='annoation json filepath') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + devkit_path = args.devkit_path + if args.out_dir is None: + out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext') + else: + out_dir = args.out_dir + json_path = args.json_path + mmcv.mkdir_or_exist(out_dir) + img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages') + + train_detail = Detail(json_path, img_dir, 'train') + train_ids = train_detail.getImgs() + + val_detail = Detail(json_path, img_dir, 'val') + val_ids = val_detail.getImgs() + + mmcv.mkdir_or_exist( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext')) + + train_list = mmcv.track_progress( + partial(generate_labels, detail=train_detail, out_dir=out_dir), + train_ids) + with open( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', + 'train.txt'), 'w') as f: + f.writelines(line + '\n' for line in sorted(train_list)) + + val_list = mmcv.track_progress( + partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids) + with open( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', + 'val.txt'), 'w') as f: + f.writelines(line + '\n' for line in sorted(val_list)) + + print('Done!') + + +if __name__ == '__main__': + main()