examples/commit_corpus.py
| 1 | """Commit a training corpus and publish the signed root. |
| 2 | |
| 3 | Run: python examples/commit_corpus.py |
| 4 | """ |
| 5 | |
| 6 | from quantumshield import AgentIdentity |
| 7 | |
| 8 | from pqc_training_data import ( |
| 9 | CommitmentBuilder, |
| 10 | CommitmentSigner, |
| 11 | DataRecord, |
| 12 | ) |
| 13 | |
| 14 | |
| 15 | def main() -> None: |
| 16 | identity = AgentIdentity.create("model-creator") |
| 17 | signer = CommitmentSigner(identity) |
| 18 | |
| 19 | # Simulate a small training corpus |
| 20 | corpus = [ |
| 21 | DataRecord( |
| 22 | content=b"Patient records: de-identified dataset v3.", |
| 23 | metadata={"source": "ehr", "id": 1}, |
| 24 | ), |
| 25 | DataRecord( |
| 26 | content=b"Medical literature corpus 2024-2026.", |
| 27 | metadata={"source": "pubmed", "id": 2}, |
| 28 | ), |
| 29 | DataRecord( |
| 30 | content=b"Synthetic diagnostic transcripts.", |
| 31 | metadata={"source": "synthetic", "id": 3}, |
| 32 | ), |
| 33 | DataRecord( |
| 34 | content=b"Public domain medical textbooks.", |
| 35 | metadata={"source": "pd-books", "id": 4}, |
| 36 | ), |
| 37 | DataRecord( |
| 38 | content=b"FDA drug approval filings.", |
| 39 | metadata={"source": "fda", "id": 5}, |
| 40 | ), |
| 41 | ] |
| 42 | |
| 43 | builder = CommitmentBuilder( |
| 44 | dataset_name="medical-diagnostics-train-v1", |
| 45 | dataset_version="1.0.0", |
| 46 | ) |
| 47 | builder.add_records(corpus) |
| 48 | builder.licenses = ["cc-by-4.0", "public-domain"] |
| 49 | builder.tags = ["medical", "diagnostics"] |
| 50 | |
| 51 | commitment = builder.build( |
| 52 | description="Training data for Medical Diagnostics model v1" |
| 53 | ) |
| 54 | signed = signer.sign(commitment) |
| 55 | |
| 56 | print("[OK] Commitment created") |
| 57 | print(f" commitment_id: {signed.commitment_id}") |
| 58 | print(f" dataset: {signed.dataset_name} v{signed.dataset_version}") |
| 59 | print(f" record_count: {signed.record_count}") |
| 60 | print(f" root: {signed.root}") |
| 61 | print(f" signer_did: {signed.signer_did}") |
| 62 | print(f" algorithm: {signed.algorithm}") |
| 63 | print(f" signature (truncated): {signed.signature[:48]}...") |
| 64 | |
| 65 | |
| 66 | if __name__ == "__main__": |
| 67 | main() |
| 68 | |