Skip to content
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
6b5740e
Add CLIP model
Jun 8, 2023
b476263
Add bpe vocabulary dict
Jun 8, 2023
e7a9a7a
Add prompt templates for language models
Jun 8, 2023
3e56ded
Add backbone for CAT-Seg
Jun 8, 2023
eb34f1e
Add CAT-Seg decoder head
Jun 8, 2023
b0d9e45
Add CAT-Seg aggregator (neck)
Jun 8, 2023
40e675d
Support CLIP image encoder finetune
Jun 9, 2023
40cd281
Add CAT-Seg r101 training config
Jun 9, 2023
9849bb4
Fix coco-stuff164k typos
Jun 9, 2023
e2d278a
Fix yapf format
Jun 9, 2023
5089e34
Refactor CAT-Seg configs
Jun 9, 2023
0e1fec8
Fix feature extractor input transform
Jun 9, 2023
38f8258
Refactor aggregator & update config
Jun 9, 2023
7488480
Support slide inference
Jun 10, 2023
04a5270
Add README and model index
Jun 11, 2023
fa31087
Update configs & support vitg and vith
Jun 11, 2023
9c2a9d9
Enhance CLIP weights huggingface downloading
Jun 11, 2023
068a23d
Fix descriptions of classes
Jun 11, 2023
ecbcf5e
Fix docstring converge error
Jun 11, 2023
9d7fc3a
Update optional dependencies
Jun 11, 2023
659af17
Fix open_clip dependency
Jun 11, 2023
8925512
Update ViTH results
Jun 11, 2023
db2614a
Add regex dependency
Jun 11, 2023
0950ed9
Support ade20k and pascal-context-59
Jun 11, 2023
145f935
Update reproduction results
Jun 12, 2023
86d70d7
Fix redundant kwargs
Jun 13, 2023
f85fdac
Enhance open_clip weights loading
Jun 13, 2023
1db3679
Add Unit Tests
Jun 13, 2023
8740a56
Fix cat-seg head unit test
Jun 13, 2023
876906d
Reduce the test batch size
Jun 13, 2023
a113e9f
Fix over memory test error
Jun 13, 2023
bd3fe9a
Add unit test pseudo data
SheffieldCao Jun 13, 2023
3e3f8dd
Fix unit test configs
Jun 13, 2023
717d9ba
Merge branch 'support-cat-seg' of https://github.com/SheffieldCao/mms…
Jun 13, 2023
3e7dd26
Skip unit test for lower version torch
Jun 13, 2023
0d8a0a5
Skip unit test on windows due to limited memory
Jun 13, 2023
32985b2
Skip unit tests on cpu
Jun 14, 2023
f2afafd
Enhance backbone class embedding
Jun 14, 2023
d0aa6de
Skip unit test on cpu
Jun 14, 2023
a359a2c
Add type hints and reference
Jul 17, 2023
4bdade3
Delete relative position embedding
Jul 17, 2023
9e97075
Sync with dev1.x
SheffieldCao Jul 18, 2023
572752f
Fix inference pooling size
SheffieldCao Jul 18, 2023
d3824c1
Resolve conflicts with branch dev-1.x
SheffieldCao Aug 2, 2023
654eff1
Merge branch 'dev-1.x' into support-cat-seg
SheffieldCao Aug 2, 2023
796e4c0
Move to Project support
SheffieldCao Aug 8, 2023
1f1c66e
Rebase the configs
SheffieldCao Aug 8, 2023
e7a75f3
--refactor=support build
xiexinch Aug 9, 2023
9b32cb7
--fix=fix linear attn
xiexinch Aug 9, 2023
854d47f
--other=remove config
xiexinch Aug 9, 2023
4141077
--other=update model link
xiexinch Aug 9, 2023
1ed9947
--other=restore mmseg package
xiexinch Aug 9, 2023
8ca9250
fix
xiexinch Aug 9, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions configs/_base_/datasets/ade20k_384x384.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# dataset settings
dataset_type = 'ADE20KDataset'
data_root = 'data/ade/ADEChallengeData2016'
crop_size = (384, 384)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(
type='RandomResize',
scale=(2048, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training', seg_map_path='annotations/training'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=test_pipeline))
test_dataloader = val_dataloader

val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator
2 changes: 1 addition & 1 deletion configs/_base_/datasets/coco-stuff164k.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/train2017', seg_map_path='annotations/val2017'),
img_path='images/train2017', seg_map_path='annotations/train2017'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
Expand Down
62 changes: 62 additions & 0 deletions configs/_base_/datasets/coco-stuff164k_384x384.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# dataset settings
dataset_type = 'COCOStuffDataset'
data_root = 'data/coco_stuff164k'
crop_size = (384, 384)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=2,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/train2017', seg_map_path='annotations/train2017'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/val2017', seg_map_path='annotations/val2017'),
pipeline=test_pipeline))
test_dataloader = val_dataloader

val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator
72 changes: 72 additions & 0 deletions configs/_base_/datasets/pascal_context_59_384x384.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# dataset settings
dataset_type = 'PascalContextDataset59'
data_root = 'data/VOCdevkit/VOC2010/'

img_scale = (520, 520)
crop_size = (384, 384)

train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(
type='RandomResize',
scale=img_scale,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
ann_file='ImageSets/SegmentationContext/train.txt',
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
ann_file='ImageSets/SegmentationContext/val.txt',
pipeline=test_pipeline))
test_dataloader = val_dataloader

val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator
Loading