tests/test_manifest.py
5.2 KB · 130 lines · python Raw
1 """Tests for ContentManifest and its helpers."""
2
3 from __future__ import annotations
4
5 import pytest
6
7 from pqc_content_provenance import (
8 AIGeneratedAssertion,
9 ContentManifest,
10 GenerationContext,
11 ModelAttribution,
12 UsageAssertion,
13 )
14 from pqc_content_provenance.errors import InvalidManifestError, UnknownAssertionError
15
16
17 def test_compute_content_hash_deterministic() -> None:
18 data = b"hello, world"
19 h1 = ContentManifest.compute_content_hash(data)
20 h2 = ContentManifest.compute_content_hash(data)
21 assert h1 == h2
22 assert len(h1) == 64
23 # Different content yields different hash
24 assert ContentManifest.compute_content_hash(b"other") != h1
25
26
27 def test_create_sets_required_fields(sample_manifest: ContentManifest) -> None:
28 m = sample_manifest
29 assert m.manifest_id.startswith("urn:pqc-prov:")
30 assert len(m.content_hash) == 64
31 assert m.content_type == "text/plain"
32 assert m.content_size > 0
33 assert m.created_at != ""
34 assert m.model_attribution.model_name == "Llama-3-8B-Instruct"
35 assert len(m.assertions) == 2
36 # Signature fields start empty (unsigned)
37 assert m.signature == ""
38 assert m.signer_did == ""
39
40
41 def test_to_dict_from_dict_roundtrip(sample_manifest: ContentManifest) -> None:
42 # Fill signature-ish fields so roundtrip preserves them
43 sample_manifest.signer_did = "did:pqaid:abc"
44 sample_manifest.algorithm = "ML-DSA-65"
45 sample_manifest.signature = "deadbeef"
46 sample_manifest.public_key = "cafebabe"
47 sample_manifest.signed_at = "2026-04-20T12:34:56+00:00"
48 sample_manifest.previous_manifest_id = "urn:pqc-prov:prev"
49
50 d = sample_manifest.to_dict()
51 restored = ContentManifest.from_dict(d)
52
53 assert restored.manifest_id == sample_manifest.manifest_id
54 assert restored.content_hash == sample_manifest.content_hash
55 assert restored.content_type == sample_manifest.content_type
56 assert restored.content_size == sample_manifest.content_size
57 assert restored.created_at == sample_manifest.created_at
58 assert restored.previous_manifest_id == sample_manifest.previous_manifest_id
59 assert restored.signer_did == sample_manifest.signer_did
60 assert restored.algorithm == sample_manifest.algorithm
61 assert restored.signature == sample_manifest.signature
62 assert restored.public_key == sample_manifest.public_key
63 assert restored.signed_at == sample_manifest.signed_at
64 assert restored.model_attribution.model_did == sample_manifest.model_attribution.model_did
65 assert restored.generation_context.prompt_hash == sample_manifest.generation_context.prompt_hash
66 assert len(restored.assertions) == len(sample_manifest.assertions)
67 # Assertion types preserved
68 labels = {a.label for a in restored.assertions}
69 assert "c2pa.ai_generated" in labels
70 assert "c2pa.usage" in labels
71
72
73 def test_from_json_raises_on_invalid_json() -> None:
74 with pytest.raises(InvalidManifestError):
75 ContentManifest.from_json("{not valid json")
76
77
78 def test_from_dict_raises_on_unknown_assertion(sample_manifest: ContentManifest) -> None:
79 d = sample_manifest.to_dict()
80 d["assertions"].append({"label": "c2pa.unknown_type", "foo": "bar"})
81 with pytest.raises(UnknownAssertionError):
82 ContentManifest.from_dict(d)
83
84
85 def test_canonical_bytes_stable(sample_manifest: ContentManifest) -> None:
86 a = sample_manifest.canonical_bytes()
87 b = sample_manifest.canonical_bytes()
88 assert a == b
89 # Same logical content -> same bytes regardless of dict ordering
90 m2 = ContentManifest(
91 manifest_id=sample_manifest.manifest_id,
92 content_hash=sample_manifest.content_hash,
93 content_type=sample_manifest.content_type,
94 content_size=sample_manifest.content_size,
95 model_attribution=ModelAttribution(
96 model_did=sample_manifest.model_attribution.model_did,
97 model_name=sample_manifest.model_attribution.model_name,
98 model_version=sample_manifest.model_attribution.model_version,
99 registry_url=sample_manifest.model_attribution.registry_url,
100 model_manifest_hash=sample_manifest.model_attribution.model_manifest_hash,
101 ),
102 generation_context=GenerationContext(
103 prompt_hash=sample_manifest.generation_context.prompt_hash,
104 input_content_hashes=list(sample_manifest.generation_context.input_content_hashes),
105 parameters=dict(sample_manifest.generation_context.parameters),
106 generated_at=sample_manifest.generation_context.generated_at,
107 ),
108 assertions=[
109 AIGeneratedAssertion(
110 model_name="Llama-3-8B-Instruct",
111 model_version="1.0",
112 generator_type="text",
113 ),
114 UsageAssertion(
115 license="cc-by-4.0",
116 commercial_use=True,
117 attribution_required=True,
118 ),
119 ],
120 created_at=sample_manifest.created_at,
121 previous_manifest_id=sample_manifest.previous_manifest_id,
122 )
123 assert m2.canonical_bytes() == a
124
125
126 def test_to_json_parses_back(sample_manifest: ContentManifest) -> None:
127 blob = sample_manifest.to_json()
128 restored = ContentManifest.from_json(blob)
129 assert restored.manifest_id == sample_manifest.manifest_id
130