2026-01-10 11:35:09 +01:00
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import random
|
|
|
|
|
import boto3
|
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
from tqdm.auto import tqdm
|
|
|
|
|
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
# s3 bucket configuration
|
2026-01-10 11:35:09 +01:00
|
|
|
MINIO_CONFIG = {
|
|
|
|
|
'endpoint_url': 'https://minio.hgk.ch',
|
|
|
|
|
'access_key': 'meinAccessKey',
|
|
|
|
|
'secret_key': 'meinSecretKey',
|
|
|
|
|
'bucket': 'skiai'
|
|
|
|
|
}
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
# input specs, annotations
|
2026-01-10 11:35:09 +01:00
|
|
|
JSON_PATH = 'datasets/skier_pose/labelstudio_export.json'
|
2026-01-10 11:53:07 +01:00
|
|
|
# input specs, keypoint orde must stay consistent
|
2026-01-10 11:35:09 +01:00
|
|
|
KP_ORDER = [
|
|
|
|
|
"leftski_tip", "leftski_tail", "rightski_tip", "rightski_tail",
|
|
|
|
|
"leftpole_top", "leftpole_bottom", "rightpole_top", "rightpole_bottom"
|
|
|
|
|
]
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
# output specs
|
|
|
|
|
OUTPUT_DIR = 'datasets/skier_pose'
|
|
|
|
|
TRAIN_RATIO = 0.8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# create folder structure
|
|
|
|
|
def __setup_directories():
|
2026-01-10 11:35:09 +01:00
|
|
|
|
|
|
|
|
for split in ['train', 'val']:
|
|
|
|
|
os.makedirs(os.path.join(OUTPUT_DIR, split, 'images'), exist_ok=True)
|
|
|
|
|
os.makedirs(os.path.join(OUTPUT_DIR, split, 'labels'), exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
# download image from s3
|
|
|
|
|
def __download_from_minio(s3_path, local_path):
|
2026-01-10 11:35:09 +01:00
|
|
|
parsed = urlparse(s3_path)
|
|
|
|
|
bucket = MINIO_CONFIG['bucket']
|
2026-01-10 11:53:07 +01:00
|
|
|
# removes 's3://bucketname/' if existing, otherwise slash
|
2026-01-10 11:35:09 +01:00
|
|
|
key = parsed.path.lstrip('/')
|
|
|
|
|
|
|
|
|
|
s3 = boto3.client('s3',
|
|
|
|
|
endpoint_url=MINIO_CONFIG['endpoint_url'],
|
|
|
|
|
aws_access_key_id=MINIO_CONFIG['access_key'],
|
|
|
|
|
aws_secret_access_key=MINIO_CONFIG['secret_key'])
|
|
|
|
|
s3.download_file(bucket, key, local_path)
|
|
|
|
|
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
# create YOLO dataset
|
|
|
|
|
def createYOLOdataset():
|
|
|
|
|
__setup_directories()
|
|
|
|
|
|
|
|
|
|
# read annotations
|
2026-01-10 11:35:09 +01:00
|
|
|
with open(JSON_PATH, 'r', encoding='utf-8') as f:
|
|
|
|
|
data = json.load(f)
|
|
|
|
|
|
|
|
|
|
random.seed(42)
|
|
|
|
|
random.shuffle(data)
|
|
|
|
|
split_idx = int(len(data) * TRAIN_RATIO)
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
# loop over all images
|
2026-01-10 11:35:09 +01:00
|
|
|
for i, entry in enumerate(tqdm(data, desc="Importing Images", unit="img")):
|
|
|
|
|
split = 'train' if i < split_idx else 'val'
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
# get image name
|
2026-01-10 11:35:09 +01:00
|
|
|
image_s3_path = entry['data']['image']
|
|
|
|
|
filename = os.path.basename(image_s3_path)
|
|
|
|
|
base_name = os.path.splitext(filename)[0]
|
|
|
|
|
|
|
|
|
|
img_local_path = os.path.join(OUTPUT_DIR, split, 'images', filename)
|
|
|
|
|
label_local_path = os.path.join(OUTPUT_DIR, split, 'labels', f"{base_name}.txt")
|
|
|
|
|
|
|
|
|
|
try:
|
2026-01-10 11:53:07 +01:00
|
|
|
__download_from_minio(image_s3_path, img_local_path)
|
2026-01-10 11:35:09 +01:00
|
|
|
except Exception as e:
|
|
|
|
|
tqdm.write(f"Error treating {filename}: {e}")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
yolo_lines = []
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
# check if annotations, otherwise skip
|
2026-01-10 11:35:09 +01:00
|
|
|
if not entry.get('annotations'):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
results = entry['annotations'][0].get('result', [])
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
# dummy vars
|
2026-01-10 11:35:09 +01:00
|
|
|
kp_map = {} # ID -> {label, x, y}
|
|
|
|
|
visibility_map = {} # ID -> v_status (1 oder 2)
|
|
|
|
|
bboxes = [] # Liste aller gefundenen BBoxes
|
|
|
|
|
|
|
|
|
|
for res in results:
|
|
|
|
|
res_id = res['id']
|
|
|
|
|
res_type = res['type']
|
|
|
|
|
val = res.get('value', {})
|
|
|
|
|
|
|
|
|
|
if res_type == 'keypointlabels':
|
|
|
|
|
kp_map[res_id] = {
|
|
|
|
|
'label': val['keypointlabels'][0],
|
|
|
|
|
'x': val['x'] / 100.0,
|
|
|
|
|
'y': val['y'] / 100.0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
elif res_type == 'choices':
|
|
|
|
|
if "1" in val.get('choices', []):
|
|
|
|
|
visibility_map[res_id] = 1
|
|
|
|
|
|
|
|
|
|
elif res_type == 'rectanglelabels':
|
|
|
|
|
# BBox normalisieren
|
|
|
|
|
bw = val['width'] / 100.0
|
|
|
|
|
bh = val['height'] / 100.0
|
|
|
|
|
bx = (val['x'] / 100.0) + (bw / 2.0)
|
|
|
|
|
by = (val['y'] / 100.0) + (bh / 2.0)
|
|
|
|
|
bboxes.append(f"{bx:.6f} {by:.6f} {bw:.6f} {bh:.6f}")
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
# create yolo data
|
2026-01-10 11:35:09 +01:00
|
|
|
for bbox_coords in bboxes:
|
|
|
|
|
line = f"0 {bbox_coords}"
|
|
|
|
|
|
|
|
|
|
for kp_name in KP_ORDER:
|
|
|
|
|
target_id = next((id for id, d in kp_map.items() if d['label'] == kp_name), None)
|
|
|
|
|
|
|
|
|
|
if target_id:
|
|
|
|
|
coords = kp_map[target_id]
|
2026-01-10 11:53:07 +01:00
|
|
|
# visibility, 0 missing, 1 invisible, 2 visible
|
2026-01-10 11:35:09 +01:00
|
|
|
v = visibility_map.get(target_id, 2)
|
|
|
|
|
line += f" {coords['x']:.6f} {coords['y']:.6f} {v}"
|
|
|
|
|
else:
|
|
|
|
|
line += " 0.000000 0.000000 0"
|
|
|
|
|
|
|
|
|
|
yolo_lines.append(line)
|
|
|
|
|
|
|
|
|
|
with open(label_local_path, 'w', encoding='utf-8') as f:
|
|
|
|
|
f.write('\n'.join(yolo_lines))
|
|
|
|
|
|
2026-01-10 11:53:07 +01:00
|
|
|
print(f"Finished! Dataset saved to: {os.path.abspath(OUTPUT_DIR)}")
|
2026-01-10 11:35:09 +01:00
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2026-01-10 11:53:07 +01:00
|
|
|
createYOLOdataset()
|