examples/commit_corpus.py
2.0 KB · 68 lines · python Raw
1 """Commit a training corpus and publish the signed root.
2
3 Run: python examples/commit_corpus.py
4 """
5
6 from quantumshield import AgentIdentity
7
8 from pqc_training_data import (
9 CommitmentBuilder,
10 CommitmentSigner,
11 DataRecord,
12 )
13
14
15 def main() -> None:
16 identity = AgentIdentity.create("model-creator")
17 signer = CommitmentSigner(identity)
18
19 # Simulate a small training corpus
20 corpus = [
21 DataRecord(
22 content=b"Patient records: de-identified dataset v3.",
23 metadata={"source": "ehr", "id": 1},
24 ),
25 DataRecord(
26 content=b"Medical literature corpus 2024-2026.",
27 metadata={"source": "pubmed", "id": 2},
28 ),
29 DataRecord(
30 content=b"Synthetic diagnostic transcripts.",
31 metadata={"source": "synthetic", "id": 3},
32 ),
33 DataRecord(
34 content=b"Public domain medical textbooks.",
35 metadata={"source": "pd-books", "id": 4},
36 ),
37 DataRecord(
38 content=b"FDA drug approval filings.",
39 metadata={"source": "fda", "id": 5},
40 ),
41 ]
42
43 builder = CommitmentBuilder(
44 dataset_name="medical-diagnostics-train-v1",
45 dataset_version="1.0.0",
46 )
47 builder.add_records(corpus)
48 builder.licenses = ["cc-by-4.0", "public-domain"]
49 builder.tags = ["medical", "diagnostics"]
50
51 commitment = builder.build(
52 description="Training data for Medical Diagnostics model v1"
53 )
54 signed = signer.sign(commitment)
55
56 print("[OK] Commitment created")
57 print(f" commitment_id: {signed.commitment_id}")
58 print(f" dataset: {signed.dataset_name} v{signed.dataset_version}")
59 print(f" record_count: {signed.record_count}")
60 print(f" root: {signed.root}")
61 print(f" signer_did: {signed.signer_did}")
62 print(f" algorithm: {signed.algorithm}")
63 print(f" signature (truncated): {signed.signature[:48]}...")
64
65
66 if __name__ == "__main__":
67 main()
68