src/pqc_content_provenance/embed.py

2.9 KB · 80 lines · python Raw

1	`"""Embed/extract manifests in/from content.`
2
3	`Two embedding modes:`
4	`1. Sidecar: manifest is stored next to content as a separate .c2pa.json file.`
5	`2. Inline header: for text, we can prepend a <!--PQC-PROV--> JSON block.`
6
7	`For images/video, real C2PA stores inside metadata (XMP, EXIF). Here we keep it`
8	`simple: we provide sidecar packaging helpers. Callers can embed the bytes`
9	`however they want for their own file format.`
10	`"""`
11
12	`from __future__ import annotations`
13
14	`import base64`
15	`import json`
16
17	`from pqc_content_provenance.errors import InvalidManifestError`
18	`from pqc_content_provenance.manifest import ContentManifest`
19
20
21	`SIDECAR_EXTENSION = ".c2pa.json"`
22	`TEXT_MARKER_BEGIN = "<!--PQC-PROV-BEGIN-->"`
23	`TEXT_MARKER_END = "<!--PQC-PROV-END-->"`
24
25
26	`def embed_manifest(content: bytes, manifest: ContentManifest, mode: str = "sidecar") -> bytes:`
27	`"""Produce an embedded form of (content, manifest).`
28
29	`mode='sidecar': returns a JSON blob containing both; save to .c2pa.json.`
30	`mode='text-header': prepends a marker-bracketed JSON to text content.`
31	`"""`
32	`if mode == "sidecar":`
33	`envelope = {`
34	`"manifest": manifest.to_dict(),`
35	`"content_base64": _to_base64(content),`
36	`}`
37	`return json.dumps(envelope, indent=2).encode("utf-8")`
38
39	`if mode == "text-header":`
40	`header = f"{TEXT_MARKER_BEGIN}{manifest.to_json()}{TEXT_MARKER_END}"`
41	`return header.encode("utf-8") + b"\n" + content`
42
43	`raise ValueError(f"unknown embed mode: {mode}")`
44
45
46	`def extract_manifest(blob: bytes, mode: str = "sidecar") -> tuple[ContentManifest, bytes]:`
47	`"""Extract (manifest, content) from an embedded blob. Inverse of embed_manifest."""`
48	`if mode == "sidecar":`
49	`try:`
50	`envelope = json.loads(blob.decode("utf-8"))`
51	`manifest = ContentManifest.from_dict(envelope["manifest"])`
52	`content = _from_base64(envelope["content_base64"])`
53	`return manifest, content`
54	`except (ValueError, KeyError) as e:`
55	`raise InvalidManifestError(f"invalid sidecar envelope: {e}") from e`
56
57	`if mode == "text-header":`
58	`text = blob.decode("utf-8", errors="replace")`
59	`if TEXT_MARKER_BEGIN not in text or TEXT_MARKER_END not in text:`
60	`raise InvalidManifestError("text-header markers not found")`
61	`start = text.index(TEXT_MARKER_BEGIN) + len(TEXT_MARKER_BEGIN)`
62	`end = text.index(TEXT_MARKER_END, start)`
63	`manifest_json = text[start:end]`
64	`manifest = ContentManifest.from_json(manifest_json)`
65	`# content is everything after the end marker (skip the trailing newline)`
66	`rest = text[end + len(TEXT_MARKER_END):]`
67	`if rest.startswith("\n"):`
68	`rest = rest[1:]`
69	`return manifest, rest.encode("utf-8")`
70
71	`raise ValueError(f"unknown embed mode: {mode}")`
72
73
74	`def _to_base64(data: bytes) -> str:`
75	`return base64.b64encode(data).decode("ascii")`
76
77
78	`def _from_base64(s: str) -> bytes:`
79	`return base64.b64decode(s.encode("ascii"))`
80