examples/build_llama_mbom.py
3.8 KB · 128 lines · python Raw
1 """Build a realistic MBOM for Llama-3-8B, sign it, and verify the signature.
2
3 Realistic here = realistic *shape*. Content hashes come from hashlib over
4 dummy blobs so the example is reproducible offline.
5 """
6
7 from __future__ import annotations
8
9 import hashlib
10
11 from quantumshield.identity.agent import AgentIdentity
12
13 from pqc_mbom import (
14 ComponentType,
15 LicenseInfo,
16 MBOMBuilder,
17 MBOMSigner,
18 MBOMVerifier,
19 )
20
21
22 def _h(label: str) -> str:
23 return hashlib.sha3_256(label.encode()).hexdigest()
24
25
26 def main() -> None:
27 identity = AgentIdentity.create("meta-llama-release-pipeline")
28
29 builder = MBOMBuilder("Llama-3-8B-Instruct", "1.0.0", supplier="Meta")
30 builder.set_description(
31 "Llama 3 8B instruction-tuned model. Canonical components enumerated "
32 "with SHA3-256 content hashes and signed with ML-DSA."
33 )
34
35 builder.add_base_architecture(
36 "Llama-3-architecture",
37 version="3.0",
38 content_hash=_h("llama-3-architecture-definition"),
39 supplier="Meta",
40 license=LicenseInfo(
41 spdx_id="llama-3-community",
42 name="Llama 3 Community License",
43 url="https://llama.meta.com/llama3/license",
44 commercial_use=True,
45 ),
46 )
47 builder.add_tokenizer(
48 "llama3-tokenizer",
49 content_hash=_h("llama3-tiktoken-vocab"),
50 supplier="Meta",
51 properties={"vocab_size": "128256"},
52 )
53 builder.add_training_data(
54 "pretraining-mix",
55 content_hash=_h("pretraining-blob"),
56 content_size=15 * 10**12,
57 supplier="Meta",
58 properties={"source": "15T tokens filtered web + code + books"},
59 )
60 builder.add_fine_tuning_data(
61 "instruct-sft-v1",
62 content_hash=_h("sft-dataset"),
63 content_size=10 * 10**9,
64 supplier="Meta",
65 )
66 builder.add_rlhf_data(
67 "preference-pairs-v1",
68 content_hash=_h("preference-pairs"),
69 supplier="Meta",
70 properties={"pair_count": "1.5M"},
71 )
72 builder.add_weights(
73 "llama3-8b-instruct.safetensors",
74 content_hash=_h("llama3-8b-instruct-weights"),
75 content_size=16_060_522_240,
76 supplier="Meta",
77 )
78 builder.add_quantization(
79 "no-quant-fp16",
80 properties={"dtype": "bfloat16", "method": "none"},
81 )
82 builder.add_evaluation(
83 "mmlu-5shot",
84 content_hash=_h("mmlu-benchmark-records"),
85 external_url="https://github.com/hendrycks/test",
86 )
87 builder.add_evaluation(
88 "human-eval",
89 content_hash=_h("human-eval-records"),
90 )
91 builder.add_component(__import__("pqc_mbom").ModelComponent(
92 component_id="safety-guard-1",
93 component_type=ComponentType.SAFETY_MODEL,
94 name="llama-guard-2",
95 content_hash=_h("llama-guard-2-weights"),
96 ))
97
98 mbom = builder.build()
99
100 signer = MBOMSigner(identity)
101 signer.sign(mbom)
102
103 print("=" * 70)
104 print(f"MBOM: {mbom.mbom_id}")
105 print(f"Model: {mbom.model_name} v{mbom.model_version}")
106 print(f"Supplier: {mbom.supplier}")
107 print(f"Created: {mbom.created_at}")
108 print(f"Components: {len(mbom.components)}")
109 print(f"Root hash: {mbom.components_root_hash}")
110 print(f"Signer DID: {mbom.signer_did}")
111 print(f"Algorithm: {mbom.algorithm}")
112 print(f"Signature len: {len(mbom.signature) // 2} bytes")
113 print("=" * 70)
114 print("Components:")
115 for c in mbom.components:
116 size = f"{c.content_size:>15,} B" if c.content_size else " " * 17
117 print(f" - [{c.component_type.value:<22}] {c.name:<35} {size}")
118
119 print("=" * 70)
120 result = MBOMVerifier.verify(mbom)
121 print(f"Verification: valid={result.valid}")
122 print(f" signature_valid={result.signature_valid}")
123 print(f" root_hash_valid={result.root_hash_valid}")
124
125
126 if __name__ == "__main__":
127 main()
128