From 8a592591657262ce14d109dec369d985fc31ea4d Mon Sep 17 00:00:00 2001 From: Clawd Date: Fri, 6 Feb 2026 18:00:23 +0000 Subject: [PATCH] Fix: Parse JSON Lines format from docker ps/images correctly --- docker-overlay-cleanup.py | 130 +++++++++++++++++++++++++------------- 1 file changed, 86 insertions(+), 44 deletions(-) diff --git a/docker-overlay-cleanup.py b/docker-overlay-cleanup.py index 39cf9a1..99ffba9 100755 --- a/docker-overlay-cleanup.py +++ b/docker-overlay-cleanup.py @@ -38,68 +38,110 @@ def human_size(size: int) -> str: size /= 1024 return f"{size:.1f} PB" -def get_docker_json(cmd: List[str]) -> list: - """Run docker command and parse JSON output.""" +def run_docker_cmd(cmd: List[str]) -> subprocess.CompletedProcess: + """Run a docker command and return the result.""" try: - result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) + return subprocess.run(cmd, capture_output=True, text=True, timeout=60) + except Exception as e: + return subprocess.CompletedProcess(cmd, 1, "", str(e)) + +def parse_jsonl(text: str) -> List[dict]: + """Parse JSON Lines format (one JSON object per line).""" + result = [] + for line in text.strip().split('\n'): + if line.strip(): + try: + result.append(json.loads(line)) + except json.JSONDecodeError: + pass + return result + +def get_docker_inspect(ids: List[str]) -> List[dict]: + """Run docker inspect on a list of IDs and return parsed JSON.""" + if not ids: + return [] + try: + result = subprocess.run( + ["docker", "inspect"] + ids, + capture_output=True, text=True, timeout=120 + ) if result.returncode == 0 and result.stdout.strip(): return json.loads(result.stdout) except: pass return [] +def extract_layer_ids(graphdriver_data: dict) -> Set[str]: + """Extract overlay2 layer IDs from GraphDriver data.""" + layer_ids = set() + for key in ["MergedDir", "UpperDir", "WorkDir", "LowerDir"]: + path = graphdriver_data.get(key, "") + # LowerDir can have multiple paths separated by ':' + for p in path.split(":"): + if OVERLAY2_PATH in p: + # Extract layer ID from path like /var/lib/docker/overlay2/abc123/diff + parts = p.replace(OVERLAY2_PATH + "/", "").split("/") + if parts: + layer_id = parts[0] + if layer_id and layer_id != "l": + layer_ids.add(layer_id) + return layer_ids + def get_referenced_layers() -> Dict[str, str]: """Get all overlay2 layer IDs referenced by containers and images. Returns dict of layer_id -> description.""" referenced = {} - # Get all container references - containers = get_docker_json(["docker", "ps", "-a", "--format", "{{json .}}"]) + # Get all container IDs and names + result = run_docker_cmd(["docker", "ps", "-a", "--format", "{{json .}}"]) + containers = parse_jsonl(result.stdout) if result.returncode == 0 else [] + + # Build a map of container ID -> name + container_names = {} + container_ids = [] for c in containers: - if isinstance(c, str): - c = json.loads(c) cid = c.get("ID", "") cname = c.get("Names", cid) - - # Inspect container for GraphDriver data - inspect = get_docker_json(["docker", "inspect", cid]) - if inspect: - gd = inspect[0].get("GraphDriver", {}).get("Data", {}) - for key in ["MergedDir", "UpperDir", "WorkDir", "LowerDir"]: - path = gd.get(key, "") - # LowerDir can have multiple paths separated by ':' - for p in path.split(":"): - if OVERLAY2_PATH in p: - # Extract layer ID from path - parts = p.replace(OVERLAY2_PATH + "/", "").split("/") - if parts: - layer_id = parts[0] - if layer_id and layer_id != "l": - referenced[layer_id] = f"container: {cname}" + if cid: + container_ids.append(cid) + container_names[cid] = cname - # Get all image references - images = get_docker_json(["docker", "images", "-a", "--format", "{{json .}}"]) + # Inspect all containers at once (much faster) + if container_ids: + inspected = get_docker_inspect(container_ids) + for info in inspected: + cid = info.get("Id", "")[:12] + cname = container_names.get(cid, info.get("Name", cid).lstrip("/")) + gd = info.get("GraphDriver", {}).get("Data", {}) + for layer_id in extract_layer_ids(gd): + referenced[layer_id] = f"container: {cname}" + + # Get all image IDs + result = run_docker_cmd(["docker", "images", "-a", "--format", "{{json .}}"]) + images = parse_jsonl(result.stdout) if result.returncode == 0 else [] + + # Build a map of image ID -> name + image_names = {} + image_ids = [] for img in images: - if isinstance(img, str): - img = json.loads(img) img_id = img.get("ID", "") - img_name = img.get("Repository", "") + ":" + img.get("Tag", "") - if img_name == ":": - img_name = img_id[:12] - - inspect = get_docker_json(["docker", "inspect", img_id]) - if inspect: - gd = inspect[0].get("GraphDriver", {}).get("Data", {}) - for key in ["MergedDir", "UpperDir", "WorkDir", "LowerDir"]: - path = gd.get(key, "") - for p in path.split(":"): - if OVERLAY2_PATH in p: - parts = p.replace(OVERLAY2_PATH + "/", "").split("/") - if parts: - layer_id = parts[0] - if layer_id and layer_id != "l": - if layer_id not in referenced: - referenced[layer_id] = f"image: {img_name}" + repo = img.get("Repository", "") + tag = img.get("Tag", "") + img_name = f"{repo}:{tag}" if repo and tag else img_id[:12] + if img_id: + image_ids.append(img_id) + image_names[img_id] = img_name + + # Inspect all images at once + if image_ids: + inspected = get_docker_inspect(image_ids) + for info in inspected: + img_id = info.get("Id", "").replace("sha256:", "")[:12] + img_name = image_names.get(img_id, img_id) + gd = info.get("GraphDriver", {}).get("Data", {}) + for layer_id in extract_layer_ids(gd): + if layer_id not in referenced: + referenced[layer_id] = f"image: {img_name}" return referenced