tests/test_integration.py
| 1 | """End-to-end integration tests.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import json |
| 6 | from pathlib import Path |
| 7 | |
| 8 | from quantumshield.identity.agent import AgentIdentity |
| 9 | |
| 10 | from pqc_mbom import ( |
| 11 | ComponentType, |
| 12 | MBOM, |
| 13 | MBOMBuilder, |
| 14 | MBOMSigner, |
| 15 | MBOMVerifier, |
| 16 | diff_mboms, |
| 17 | ) |
| 18 | |
| 19 | |
| 20 | def test_full_lifecycle(tmp_path: Path) -> None: |
| 21 | identity = AgentIdentity.create("release-pipeline") |
| 22 | builder = MBOMBuilder("Llama-3-8B-Instruct", "1.0.0", supplier="Meta") |
| 23 | builder.set_description("End-to-end integration model") |
| 24 | builder.add_base_architecture("Llama-3", version="3.0", content_hash="a" * 64) |
| 25 | builder.add_training_data("common-crawl-2024", content_hash="b" * 64, content_size=10**12) |
| 26 | builder.add_fine_tuning_data("instruct-v1", content_hash="c" * 64) |
| 27 | builder.add_rlhf_data("hh-rlhf", content_hash="d" * 64) |
| 28 | builder.add_tokenizer("llama3-tokenizer", content_hash="e" * 64) |
| 29 | builder.add_weights("model.safetensors", content_hash="f" * 64, content_size=16_000_000_000) |
| 30 | builder.add_evaluation("mmlu", content_hash="1" * 64) |
| 31 | builder.add_quantization("int8-smoothquant") |
| 32 | mbom = builder.build() |
| 33 | |
| 34 | assert len(mbom.components) == 8 |
| 35 | |
| 36 | # Sign |
| 37 | MBOMSigner(identity).sign(mbom) |
| 38 | |
| 39 | # Save to disk |
| 40 | out = tmp_path / "mbom.json" |
| 41 | out.write_text(mbom.to_json(), encoding="utf-8") |
| 42 | |
| 43 | # Load from disk |
| 44 | loaded = MBOM.from_json(out.read_text(encoding="utf-8")) |
| 45 | assert loaded.mbom_id == mbom.mbom_id |
| 46 | assert loaded.components_root_hash == mbom.components_root_hash |
| 47 | |
| 48 | # Verify |
| 49 | result = MBOMVerifier.verify_or_raise(loaded) |
| 50 | assert result.valid |
| 51 | assert result.signer_did == identity.did |
| 52 | |
| 53 | # Confirm JSON is well-formed |
| 54 | data = json.loads(out.read_text(encoding="utf-8")) |
| 55 | assert data["schema_version"] |
| 56 | assert data["signature"] |
| 57 | |
| 58 | |
| 59 | def test_diff_between_two_versions() -> None: |
| 60 | identity = AgentIdentity.create("release-pipeline") |
| 61 | |
| 62 | # v1 |
| 63 | v1 = ( |
| 64 | MBOMBuilder("Llama-3-8B-Instruct", "1.0.0", supplier="Meta") |
| 65 | .add_base_architecture("Llama-3", version="3.0", content_hash="a" * 64) |
| 66 | .add_weights("model.safetensors", content_hash="w1" * 32, content_size=16_000_000_000) |
| 67 | .add_training_data("common-crawl-2024", content_hash="t1" * 32) |
| 68 | .build() |
| 69 | ) |
| 70 | MBOMSigner(identity).sign(v1) |
| 71 | |
| 72 | # v2 built from v1 - swap training data hash, add an evaluation component, |
| 73 | # keep other component ids stable to show a change diff. |
| 74 | v2 = MBOM.from_json(v1.to_json()) |
| 75 | v2.mbom_id = v1.mbom_id.replace("mbom:", "mbom-v2:") |
| 76 | v2.model_version = "1.0.1" |
| 77 | for c in v2.components: |
| 78 | if c.component_type == ComponentType.TRAINING_DATA: |
| 79 | c.content_hash = "t2" * 32 |
| 80 | v2.components.append( |
| 81 | type(v2.components[0])( |
| 82 | component_id="eval-mmlu", |
| 83 | component_type=ComponentType.EVALUATION_BENCHMARK, |
| 84 | name="mmlu", |
| 85 | content_hash="e" * 64, |
| 86 | ) |
| 87 | ) |
| 88 | v2.recompute_root() |
| 89 | MBOMSigner(identity).sign(v2) |
| 90 | |
| 91 | diff = diff_mboms(v1, v2) |
| 92 | assert len(diff.added) == 1 |
| 93 | assert diff.added[0].component_type == ComponentType.EVALUATION_BENCHMARK |
| 94 | assert len(diff.changed) == 1 |
| 95 | old_td, new_td = diff.changed[0] |
| 96 | assert old_td.content_hash != new_td.content_hash |
| 97 | |
| 98 | # Both versions still verify individually |
| 99 | assert MBOMVerifier.verify(v1).valid |
| 100 | assert MBOMVerifier.verify(v2).valid |
| 101 | |