examples/mbom_to_spdx.py
2.8 KB · 85 lines · python Raw
1 """Convert an MBOM to SPDX 2.3 JSON and print the result.
2
3 The SPDX output interoperates with existing SBOM tooling (Dependency-Track,
4 CycloneDX converters, SPDX CLI). AI-specific metadata (component_type,
5 ML-DSA signature, etc.) survives as structured annotations.
6 """
7
8 from __future__ import annotations
9
10 import hashlib
11 import json
12
13 from quantumshield.identity.agent import AgentIdentity
14
15 from pqc_mbom import (
16 LicenseInfo,
17 MBOMBuilder,
18 MBOMSigner,
19 from_spdx_json,
20 to_spdx_json,
21 )
22
23
24 def _h(label: str) -> str:
25 return hashlib.sha3_256(label.encode()).hexdigest()
26
27
28 def main() -> None:
29 identity = AgentIdentity.create("release-pipeline")
30
31 mbom = (
32 MBOMBuilder("Mistral-7B-Instruct", "0.3", supplier="Mistral AI")
33 .set_description("Demo MBOM exported to SPDX 2.3")
34 .add_base_architecture(
35 "mistral-7b-architecture",
36 version="0.3",
37 content_hash=_h("mistral-architecture"),
38 license=LicenseInfo(spdx_id="apache-2.0", commercial_use=True),
39 )
40 .add_tokenizer("mistral-tokenizer", content_hash=_h("mistral-tok"))
41 .add_training_data("pretraining-mix", content_hash=_h("train"), content_size=10**12)
42 .add_fine_tuning_data("instruct-sft", content_hash=_h("sft"))
43 .add_weights("mistral-7b.safetensors", content_hash=_h("weights"), content_size=14 * 10**9)
44 .build()
45 )
46 MBOMSigner(identity).sign(mbom)
47
48 spdx_blob = to_spdx_json(mbom)
49 doc = json.loads(spdx_blob)
50
51 print("=" * 70)
52 print(f"SPDX document: {doc['name']}")
53 print(f" spdxVersion: {doc['spdxVersion']}")
54 print(f" dataLicense: {doc['dataLicense']}")
55 print(f" namespace: {doc['documentNamespace']}")
56 print(f" packages: {len(doc['packages'])}")
57 print(f" relationships: {len(doc['relationships'])}")
58 print("=" * 70)
59 print("Packages:")
60 for pkg in doc["packages"]:
61 checksum = pkg.get("checksums", [{}])[0].get("checksumValue", "")
62 ctype_ann = next(
63 (a["comment"] for a in pkg.get("annotations", [])
64 if a["comment"].startswith("pqc-mbom:component_type=")),
65 "pqc-mbom:component_type=unknown",
66 )
67 ctype = ctype_ann.split("=", 1)[1]
68 print(f" - {pkg['name']:<40} [{ctype:<22}] sha3-256={checksum[:16]}...")
69
70 print("=" * 70)
71 print("Roundtripping SPDX -> MBOM...")
72 recovered = from_spdx_json(spdx_blob)
73 print(f" recovered model: {recovered.model_name} v{recovered.model_version}")
74 print(f" recovered components: {len(recovered.components)}")
75 print(f" root hash matches: {recovered.components_root_hash == mbom.components_root_hash}")
76
77 print("=" * 70)
78 print("First 600 chars of SPDX JSON:")
79 print(spdx_blob[:600])
80 print("...")
81
82
83 if __name__ == "__main__":
84 main()
85