STDC/data/convert_to_cityscapes.py
charlie880624 ac3c30a1d2
Some checks failed
build / build_cpu (3.7, 1.5.1, torch1.5, 0.6.1) (push) Has been cancelled
build / build_cpu (3.7, 1.6.0, torch1.6, 0.7.0) (push) Has been cancelled
build / build_cpu (3.7, 1.7.0, torch1.7, 0.8.1) (push) Has been cancelled
build / build_cpu (3.7, 1.8.0, torch1.8, 0.9.0) (push) Has been cancelled
build / build_cpu (3.7, 1.9.0, torch1.9, 0.10.0) (push) Has been cancelled
build / build_cuda101 (3.7, 1.5.1+cu101, torch1.5, 0.6.1+cu101) (push) Has been cancelled
build / build_cuda101 (3.7, 1.6.0+cu101, torch1.6, 0.7.0+cu101) (push) Has been cancelled
build / build_cuda101 (3.7, 1.7.0+cu101, torch1.7, 0.8.1+cu101) (push) Has been cancelled
build / build_cuda101 (3.7, 1.8.0+cu101, torch1.8, 0.9.0+cu101) (push) Has been cancelled
build / build_cuda102 (3.6, 1.9.0+cu102, torch1.9, 0.10.0+cu102) (push) Has been cancelled
build / build_cuda102 (3.7, 1.9.0+cu102, torch1.9, 0.10.0+cu102) (push) Has been cancelled
build / build_cuda102 (3.8, 1.9.0+cu102, torch1.9, 0.10.0+cu102) (push) Has been cancelled
build / build_cuda102 (3.9, 1.9.0+cu102, torch1.9, 0.10.0+cu102) (push) Has been cancelled
build / test_windows (windows-2022, cpu, 3.8) (push) Has been cancelled
build / test_windows (windows-2022, cu111, 3.8) (push) Has been cancelled
deploy / build-n-publish (push) Has been cancelled
lint / lint (push) Has been cancelled
add data folder and update gitignore
2026-03-18 17:57:51 +08:00

134 lines
4.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
# ✅ 統一的訓練 label ID 對應表
name_to_unified_id = {
'bunker': 0,
'car': 1,
'grass': 2,
'greenery': 3,
'person': 4,
'pond': 5,
'road': 6,
'tree': 7,
'background': 255 # 忽略背景
}
# ✅ 兩組資料,合併為 train/val/test 三組 Cityscapes 格式
datasets = [
# 05-13
{
"name": "junior_train",
"input_dir": r"C:\Users\rd_de\kneronstdc\data\06-18_danger_object_segmentation\train",
"output_img_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\leftImg8bit\train",
"output_mask_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\gtFine\train",
},
{
"name": "junior_val",
"input_dir": r"C:\Users\rd_de\kneronstdc\data\06-18_danger_object_segmentation\valid",
"output_img_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\leftImg8bit\val",
"output_mask_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\gtFine\val",
},
{
"name": "junior_test",
"input_dir": r"C:\Users\rd_de\kneronstdc\data\06-18_danger_object_segmentation\test",
"output_img_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\leftImg8bit\test",
"output_mask_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\gtFine\test",
},
# 05-19
{
"name": "may19_train",
"input_dir": r"C:\Users\rd_de\kneronstdc\data\06-10_danger_object_segmentation\train",
"output_img_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\leftImg8bit\train",
"output_mask_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\gtFine\train",
},
{
"name": "may19_val",
"input_dir": r"C:\Users\rd_de\kneronstdc\data\06-10_danger_object_segmentation\valid",
"output_img_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\leftImg8bit\val",
"output_mask_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\gtFine\val",
},
{
"name": "may19_test",
"input_dir": r"C:\Users\rd_de\kneronstdc\data\06-10_danger_object_segmentation\test",
"output_img_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\leftImg8bit\test",
"output_mask_dir": r"C:\Users\rd_de\kneronstdc\data\cityscapes\gtFine\test",
}
]
# ✅ 開始轉換
for dataset in datasets:
name = dataset["name"]
input_dir = dataset["input_dir"]
output_img_dir = dataset["output_img_dir"]
output_mask_dir = dataset["output_mask_dir"]
os.makedirs(output_img_dir, exist_ok=True)
os.makedirs(output_mask_dir, exist_ok=True)
# ✅ 讀 _classes.csv建立 label 對應表
csv_path = os.path.join(input_dir, "_classes.csv")
if not os.path.exists(csv_path):
print(f"❌ 缺少 _classes.csv: {csv_path}")
continue
df = pd.read_csv(csv_path)
col_id, col_name = df.columns[0], df.columns[1]
label_mapping = {}
for _, row in df.iterrows():
orig_id = int(row[col_id])
class_name = str(row[col_name]).strip().lower()
label_mapping[orig_id] = name_to_unified_id.get(class_name, 255)
print(f"\n📂 處理資料集: {name}")
print(f"✅ Label 映射表: {label_mapping}")
# ✅ 處理所有 mask 檔案
for file in tqdm(os.listdir(input_dir), desc=f"轉換 {name}"):
if not file.endswith("_mask.png"):
continue
mask_path = os.path.join(input_dir, file)
stem = file.replace("_mask.png", "")
image_path = None
for ext in [".jpg", ".png"]:
candidate = os.path.join(input_dir, stem + ext)
if os.path.exists(candidate):
image_path = candidate
break
if image_path is None:
print(f"⚠️ 找不到圖片: {stem}")
continue
img = cv2.imread(image_path)
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
if img is None or mask is None:
print(f"❌ 無法讀取圖片或 mask: {file}")
continue
if img.shape[:2] != mask.shape:
print(f"❌ 尺寸不一致: {file}")
continue
# ✅ label 映射
remapped_mask = np.full_like(mask, 255, dtype=np.uint8)
for old_id, new_id in label_mapping.items():
remapped_mask[mask == old_id] = new_id
out_img_path = os.path.join(output_img_dir, f"{stem}_leftImg8bit.png")
out_mask_path = os.path.join(output_mask_dir, f"{stem}_gtFine_labelIds.png")
cv2.imwrite(out_img_path, img)
cv2.imwrite(out_mask_path, remapped_mask)
print(f"📊 {file} label: {np.unique(remapped_mask)}")
print("\n✅ 全部 train / val / test 合併並轉換完成Cityscapes OK")