tests/test_integration.py
3.3 KB · 101 lines · python Raw
1 """End-to-end integration tests."""
2
3 from __future__ import annotations
4
5 import json
6 from pathlib import Path
7
8 from quantumshield.identity.agent import AgentIdentity
9
10 from pqc_mbom import (
11 ComponentType,
12 MBOM,
13 MBOMBuilder,
14 MBOMSigner,
15 MBOMVerifier,
16 diff_mboms,
17 )
18
19
20 def test_full_lifecycle(tmp_path: Path) -> None:
21 identity = AgentIdentity.create("release-pipeline")
22 builder = MBOMBuilder("Llama-3-8B-Instruct", "1.0.0", supplier="Meta")
23 builder.set_description("End-to-end integration model")
24 builder.add_base_architecture("Llama-3", version="3.0", content_hash="a" * 64)
25 builder.add_training_data("common-crawl-2024", content_hash="b" * 64, content_size=10**12)
26 builder.add_fine_tuning_data("instruct-v1", content_hash="c" * 64)
27 builder.add_rlhf_data("hh-rlhf", content_hash="d" * 64)
28 builder.add_tokenizer("llama3-tokenizer", content_hash="e" * 64)
29 builder.add_weights("model.safetensors", content_hash="f" * 64, content_size=16_000_000_000)
30 builder.add_evaluation("mmlu", content_hash="1" * 64)
31 builder.add_quantization("int8-smoothquant")
32 mbom = builder.build()
33
34 assert len(mbom.components) == 8
35
36 # Sign
37 MBOMSigner(identity).sign(mbom)
38
39 # Save to disk
40 out = tmp_path / "mbom.json"
41 out.write_text(mbom.to_json(), encoding="utf-8")
42
43 # Load from disk
44 loaded = MBOM.from_json(out.read_text(encoding="utf-8"))
45 assert loaded.mbom_id == mbom.mbom_id
46 assert loaded.components_root_hash == mbom.components_root_hash
47
48 # Verify
49 result = MBOMVerifier.verify_or_raise(loaded)
50 assert result.valid
51 assert result.signer_did == identity.did
52
53 # Confirm JSON is well-formed
54 data = json.loads(out.read_text(encoding="utf-8"))
55 assert data["schema_version"]
56 assert data["signature"]
57
58
59 def test_diff_between_two_versions() -> None:
60 identity = AgentIdentity.create("release-pipeline")
61
62 # v1
63 v1 = (
64 MBOMBuilder("Llama-3-8B-Instruct", "1.0.0", supplier="Meta")
65 .add_base_architecture("Llama-3", version="3.0", content_hash="a" * 64)
66 .add_weights("model.safetensors", content_hash="w1" * 32, content_size=16_000_000_000)
67 .add_training_data("common-crawl-2024", content_hash="t1" * 32)
68 .build()
69 )
70 MBOMSigner(identity).sign(v1)
71
72 # v2 built from v1 - swap training data hash, add an evaluation component,
73 # keep other component ids stable to show a change diff.
74 v2 = MBOM.from_json(v1.to_json())
75 v2.mbom_id = v1.mbom_id.replace("mbom:", "mbom-v2:")
76 v2.model_version = "1.0.1"
77 for c in v2.components:
78 if c.component_type == ComponentType.TRAINING_DATA:
79 c.content_hash = "t2" * 32
80 v2.components.append(
81 type(v2.components[0])(
82 component_id="eval-mmlu",
83 component_type=ComponentType.EVALUATION_BENCHMARK,
84 name="mmlu",
85 content_hash="e" * 64,
86 )
87 )
88 v2.recompute_root()
89 MBOMSigner(identity).sign(v2)
90
91 diff = diff_mboms(v1, v2)
92 assert len(diff.added) == 1
93 assert diff.added[0].component_type == ComponentType.EVALUATION_BENCHMARK
94 assert len(diff.changed) == 1
95 old_td, new_td = diff.changed[0]
96 assert old_td.content_hash != new_td.content_hash
97
98 # Both versions still verify individually
99 assert MBOMVerifier.verify(v1).valid
100 assert MBOMVerifier.verify(v2).valid
101