examples/build_llama_mbom.py
| 1 | """Build a realistic MBOM for Llama-3-8B, sign it, and verify the signature. |
| 2 | |
| 3 | Realistic here = realistic *shape*. Content hashes come from hashlib over |
| 4 | dummy blobs so the example is reproducible offline. |
| 5 | """ |
| 6 | |
| 7 | from __future__ import annotations |
| 8 | |
| 9 | import hashlib |
| 10 | |
| 11 | from quantumshield.identity.agent import AgentIdentity |
| 12 | |
| 13 | from pqc_mbom import ( |
| 14 | ComponentType, |
| 15 | LicenseInfo, |
| 16 | MBOMBuilder, |
| 17 | MBOMSigner, |
| 18 | MBOMVerifier, |
| 19 | ) |
| 20 | |
| 21 | |
| 22 | def _h(label: str) -> str: |
| 23 | return hashlib.sha3_256(label.encode()).hexdigest() |
| 24 | |
| 25 | |
| 26 | def main() -> None: |
| 27 | identity = AgentIdentity.create("meta-llama-release-pipeline") |
| 28 | |
| 29 | builder = MBOMBuilder("Llama-3-8B-Instruct", "1.0.0", supplier="Meta") |
| 30 | builder.set_description( |
| 31 | "Llama 3 8B instruction-tuned model. Canonical components enumerated " |
| 32 | "with SHA3-256 content hashes and signed with ML-DSA." |
| 33 | ) |
| 34 | |
| 35 | builder.add_base_architecture( |
| 36 | "Llama-3-architecture", |
| 37 | version="3.0", |
| 38 | content_hash=_h("llama-3-architecture-definition"), |
| 39 | supplier="Meta", |
| 40 | license=LicenseInfo( |
| 41 | spdx_id="llama-3-community", |
| 42 | name="Llama 3 Community License", |
| 43 | url="https://llama.meta.com/llama3/license", |
| 44 | commercial_use=True, |
| 45 | ), |
| 46 | ) |
| 47 | builder.add_tokenizer( |
| 48 | "llama3-tokenizer", |
| 49 | content_hash=_h("llama3-tiktoken-vocab"), |
| 50 | supplier="Meta", |
| 51 | properties={"vocab_size": "128256"}, |
| 52 | ) |
| 53 | builder.add_training_data( |
| 54 | "pretraining-mix", |
| 55 | content_hash=_h("pretraining-blob"), |
| 56 | content_size=15 * 10**12, |
| 57 | supplier="Meta", |
| 58 | properties={"source": "15T tokens filtered web + code + books"}, |
| 59 | ) |
| 60 | builder.add_fine_tuning_data( |
| 61 | "instruct-sft-v1", |
| 62 | content_hash=_h("sft-dataset"), |
| 63 | content_size=10 * 10**9, |
| 64 | supplier="Meta", |
| 65 | ) |
| 66 | builder.add_rlhf_data( |
| 67 | "preference-pairs-v1", |
| 68 | content_hash=_h("preference-pairs"), |
| 69 | supplier="Meta", |
| 70 | properties={"pair_count": "1.5M"}, |
| 71 | ) |
| 72 | builder.add_weights( |
| 73 | "llama3-8b-instruct.safetensors", |
| 74 | content_hash=_h("llama3-8b-instruct-weights"), |
| 75 | content_size=16_060_522_240, |
| 76 | supplier="Meta", |
| 77 | ) |
| 78 | builder.add_quantization( |
| 79 | "no-quant-fp16", |
| 80 | properties={"dtype": "bfloat16", "method": "none"}, |
| 81 | ) |
| 82 | builder.add_evaluation( |
| 83 | "mmlu-5shot", |
| 84 | content_hash=_h("mmlu-benchmark-records"), |
| 85 | external_url="https://github.com/hendrycks/test", |
| 86 | ) |
| 87 | builder.add_evaluation( |
| 88 | "human-eval", |
| 89 | content_hash=_h("human-eval-records"), |
| 90 | ) |
| 91 | builder.add_component(__import__("pqc_mbom").ModelComponent( |
| 92 | component_id="safety-guard-1", |
| 93 | component_type=ComponentType.SAFETY_MODEL, |
| 94 | name="llama-guard-2", |
| 95 | content_hash=_h("llama-guard-2-weights"), |
| 96 | )) |
| 97 | |
| 98 | mbom = builder.build() |
| 99 | |
| 100 | signer = MBOMSigner(identity) |
| 101 | signer.sign(mbom) |
| 102 | |
| 103 | print("=" * 70) |
| 104 | print(f"MBOM: {mbom.mbom_id}") |
| 105 | print(f"Model: {mbom.model_name} v{mbom.model_version}") |
| 106 | print(f"Supplier: {mbom.supplier}") |
| 107 | print(f"Created: {mbom.created_at}") |
| 108 | print(f"Components: {len(mbom.components)}") |
| 109 | print(f"Root hash: {mbom.components_root_hash}") |
| 110 | print(f"Signer DID: {mbom.signer_did}") |
| 111 | print(f"Algorithm: {mbom.algorithm}") |
| 112 | print(f"Signature len: {len(mbom.signature) // 2} bytes") |
| 113 | print("=" * 70) |
| 114 | print("Components:") |
| 115 | for c in mbom.components: |
| 116 | size = f"{c.content_size:>15,} B" if c.content_size else " " * 17 |
| 117 | print(f" - [{c.component_type.value:<22}] {c.name:<35} {size}") |
| 118 | |
| 119 | print("=" * 70) |
| 120 | result = MBOMVerifier.verify(mbom) |
| 121 | print(f"Verification: valid={result.valid}") |
| 122 | print(f" signature_valid={result.signature_valid}") |
| 123 | print(f" root_hash_valid={result.root_hash_valid}") |
| 124 | |
| 125 | |
| 126 | if __name__ == "__main__": |
| 127 | main() |
| 128 | |