Fix: Parse JSON Lines format from docker ps/images correctly

This commit is contained in:
Clawd
2026-02-06 18:00:23 +00:00
parent 13c2d1f43a
commit 8a59259165

View File

@@ -38,66 +38,108 @@ def human_size(size: int) -> str:
size /= 1024 size /= 1024
return f"{size:.1f} PB" return f"{size:.1f} PB"
def get_docker_json(cmd: List[str]) -> list: def run_docker_cmd(cmd: List[str]) -> subprocess.CompletedProcess:
"""Run docker command and parse JSON output.""" """Run a docker command and return the result."""
try: try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) return subprocess.run(cmd, capture_output=True, text=True, timeout=60)
except Exception as e:
return subprocess.CompletedProcess(cmd, 1, "", str(e))
def parse_jsonl(text: str) -> List[dict]:
"""Parse JSON Lines format (one JSON object per line)."""
result = []
for line in text.strip().split('\n'):
if line.strip():
try:
result.append(json.loads(line))
except json.JSONDecodeError:
pass
return result
def get_docker_inspect(ids: List[str]) -> List[dict]:
"""Run docker inspect on a list of IDs and return parsed JSON."""
if not ids:
return []
try:
result = subprocess.run(
["docker", "inspect"] + ids,
capture_output=True, text=True, timeout=120
)
if result.returncode == 0 and result.stdout.strip(): if result.returncode == 0 and result.stdout.strip():
return json.loads(result.stdout) return json.loads(result.stdout)
except: except:
pass pass
return [] return []
def extract_layer_ids(graphdriver_data: dict) -> Set[str]:
"""Extract overlay2 layer IDs from GraphDriver data."""
layer_ids = set()
for key in ["MergedDir", "UpperDir", "WorkDir", "LowerDir"]:
path = graphdriver_data.get(key, "")
# LowerDir can have multiple paths separated by ':'
for p in path.split(":"):
if OVERLAY2_PATH in p:
# Extract layer ID from path like /var/lib/docker/overlay2/abc123/diff
parts = p.replace(OVERLAY2_PATH + "/", "").split("/")
if parts:
layer_id = parts[0]
if layer_id and layer_id != "l":
layer_ids.add(layer_id)
return layer_ids
def get_referenced_layers() -> Dict[str, str]: def get_referenced_layers() -> Dict[str, str]:
"""Get all overlay2 layer IDs referenced by containers and images. """Get all overlay2 layer IDs referenced by containers and images.
Returns dict of layer_id -> description.""" Returns dict of layer_id -> description."""
referenced = {} referenced = {}
# Get all container references # Get all container IDs and names
containers = get_docker_json(["docker", "ps", "-a", "--format", "{{json .}}"]) result = run_docker_cmd(["docker", "ps", "-a", "--format", "{{json .}}"])
containers = parse_jsonl(result.stdout) if result.returncode == 0 else []
# Build a map of container ID -> name
container_names = {}
container_ids = []
for c in containers: for c in containers:
if isinstance(c, str):
c = json.loads(c)
cid = c.get("ID", "") cid = c.get("ID", "")
cname = c.get("Names", cid) cname = c.get("Names", cid)
if cid:
container_ids.append(cid)
container_names[cid] = cname
# Inspect container for GraphDriver data # Inspect all containers at once (much faster)
inspect = get_docker_json(["docker", "inspect", cid]) if container_ids:
if inspect: inspected = get_docker_inspect(container_ids)
gd = inspect[0].get("GraphDriver", {}).get("Data", {}) for info in inspected:
for key in ["MergedDir", "UpperDir", "WorkDir", "LowerDir"]: cid = info.get("Id", "")[:12]
path = gd.get(key, "") cname = container_names.get(cid, info.get("Name", cid).lstrip("/"))
# LowerDir can have multiple paths separated by ':' gd = info.get("GraphDriver", {}).get("Data", {})
for p in path.split(":"): for layer_id in extract_layer_ids(gd):
if OVERLAY2_PATH in p:
# Extract layer ID from path
parts = p.replace(OVERLAY2_PATH + "/", "").split("/")
if parts:
layer_id = parts[0]
if layer_id and layer_id != "l":
referenced[layer_id] = f"container: {cname}" referenced[layer_id] = f"container: {cname}"
# Get all image references # Get all image IDs
images = get_docker_json(["docker", "images", "-a", "--format", "{{json .}}"]) result = run_docker_cmd(["docker", "images", "-a", "--format", "{{json .}}"])
for img in images: images = parse_jsonl(result.stdout) if result.returncode == 0 else []
if isinstance(img, str):
img = json.loads(img)
img_id = img.get("ID", "")
img_name = img.get("Repository", "") + ":" + img.get("Tag", "")
if img_name == ":":
img_name = img_id[:12]
inspect = get_docker_json(["docker", "inspect", img_id]) # Build a map of image ID -> name
if inspect: image_names = {}
gd = inspect[0].get("GraphDriver", {}).get("Data", {}) image_ids = []
for key in ["MergedDir", "UpperDir", "WorkDir", "LowerDir"]: for img in images:
path = gd.get(key, "") img_id = img.get("ID", "")
for p in path.split(":"): repo = img.get("Repository", "")
if OVERLAY2_PATH in p: tag = img.get("Tag", "")
parts = p.replace(OVERLAY2_PATH + "/", "").split("/") img_name = f"{repo}:{tag}" if repo and tag else img_id[:12]
if parts: if img_id:
layer_id = parts[0] image_ids.append(img_id)
if layer_id and layer_id != "l": image_names[img_id] = img_name
# Inspect all images at once
if image_ids:
inspected = get_docker_inspect(image_ids)
for info in inspected:
img_id = info.get("Id", "").replace("sha256:", "")[:12]
img_name = image_names.get(img_id, img_id)
gd = info.get("GraphDriver", {}).get("Data", {})
for layer_id in extract_layer_ids(gd):
if layer_id not in referenced: if layer_id not in referenced:
referenced[layer_id] = f"image: {img_name}" referenced[layer_id] = f"image: {img_name}"