examples/simple_ingest.py
| 1 | """ |
| 2 | Simple RAG Ingest Example |
| 3 | |
| 4 | Shows how to sign a small document corpus with ML-DSA so it can be |
| 5 | verified at retrieval time. |
| 6 | """ |
| 7 | |
| 8 | from quantumshield import AgentIdentity |
| 9 | |
| 10 | from pqc_rag_signing import Corpus |
| 11 | |
| 12 | |
| 13 | def main() -> None: |
| 14 | # Create an identity for the ingest pipeline |
| 15 | identity = AgentIdentity.create("my-company-rag-ingest") |
| 16 | print(f"Ingest DID: {identity.did}") |
| 17 | print(f"Algorithm: {identity.signing_keypair.algorithm.value}") |
| 18 | |
| 19 | # Build a corpus from two documents |
| 20 | corpus = Corpus(name="company-handbook-v1", identity=identity) |
| 21 | corpus.add_document( |
| 22 | "handbook-2026.pdf", |
| 23 | chunks=[ |
| 24 | "QuantaMrkt employees use ML-DSA-87 for all model signing.", |
| 25 | "All data in transit uses ML-KEM-1024 key encapsulation.", |
| 26 | "Classical crypto (RSA, ECDSA) is deprecated for new systems.", |
| 27 | ], |
| 28 | ) |
| 29 | corpus.add_document( |
| 30 | "security-policy.pdf", |
| 31 | chunks=[ |
| 32 | "All AI agents must have PQ-AID credentials.", |
| 33 | "Retrieval-augmented systems must use signed chunks.", |
| 34 | ], |
| 35 | ) |
| 36 | |
| 37 | # Sign every chunk |
| 38 | signed = corpus.sign_all() |
| 39 | print(f"\nSigned {len(signed)} chunks") |
| 40 | for c in signed: |
| 41 | print( |
| 42 | f" {c.chunk_id} {c.metadata.source} " |
| 43 | f"[chunk {c.metadata.chunk_index + 1}/{c.metadata.total_chunks}]" |
| 44 | ) |
| 45 | |
| 46 | # Build a manifest committing to the whole corpus |
| 47 | manifest = corpus.build_manifest() |
| 48 | print("\nCorpus manifest:") |
| 49 | print(f" corpus_id = {manifest.corpus_id}") |
| 50 | print(f" root = {manifest.root}") |
| 51 | print(f" signature = {manifest.signature[:32]}...") |
| 52 | print("\n[OK] Corpus ready for vector DB ingestion.") |
| 53 | |
| 54 | |
| 55 | if __name__ == "__main__": |
| 56 | main() |
| 57 | |