tests/test_chunk.py
| 1 | """Tests for SignedChunk and ChunkMetadata.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from pqc_rag_signing import ChunkMetadata, SignedChunk |
| 6 | |
| 7 | |
| 8 | def test_content_hash_deterministic(sample_metadata: ChunkMetadata) -> None: |
| 9 | text = "deterministic content" |
| 10 | nonce = "deadbeef" |
| 11 | h1 = SignedChunk.compute_content_hash(text, sample_metadata, nonce) |
| 12 | h2 = SignedChunk.compute_content_hash(text, sample_metadata, nonce) |
| 13 | assert h1 == h2 |
| 14 | assert len(h1) == 64 # SHA3-256 hex digest |
| 15 | |
| 16 | |
| 17 | def test_content_hash_changes_with_text(sample_metadata: ChunkMetadata) -> None: |
| 18 | nonce = "deadbeef" |
| 19 | h1 = SignedChunk.compute_content_hash("alpha", sample_metadata, nonce) |
| 20 | h2 = SignedChunk.compute_content_hash("beta", sample_metadata, nonce) |
| 21 | assert h1 != h2 |
| 22 | |
| 23 | |
| 24 | def test_content_hash_changes_with_metadata() -> None: |
| 25 | text = "same text" |
| 26 | nonce = "deadbeef" |
| 27 | meta_a = ChunkMetadata(source="a.txt", chunk_index=0, total_chunks=1) |
| 28 | meta_b = ChunkMetadata(source="b.txt", chunk_index=0, total_chunks=1) |
| 29 | h1 = SignedChunk.compute_content_hash(text, meta_a, nonce) |
| 30 | h2 = SignedChunk.compute_content_hash(text, meta_b, nonce) |
| 31 | assert h1 != h2 |
| 32 | |
| 33 | |
| 34 | def test_content_hash_changes_with_nonce(sample_metadata: ChunkMetadata) -> None: |
| 35 | text = "same text" |
| 36 | h1 = SignedChunk.compute_content_hash(text, sample_metadata, "nonce-a") |
| 37 | h2 = SignedChunk.compute_content_hash(text, sample_metadata, "nonce-b") |
| 38 | assert h1 != h2 |
| 39 | |
| 40 | |
| 41 | def test_to_dict_roundtrip(sample_signed_chunk: SignedChunk) -> None: |
| 42 | as_dict = sample_signed_chunk.to_dict() |
| 43 | restored = SignedChunk.from_dict(as_dict) |
| 44 | assert restored.chunk_id == sample_signed_chunk.chunk_id |
| 45 | assert restored.text == sample_signed_chunk.text |
| 46 | assert restored.content_hash == sample_signed_chunk.content_hash |
| 47 | assert restored.signature == sample_signed_chunk.signature |
| 48 | assert restored.public_key == sample_signed_chunk.public_key |
| 49 | assert restored.signer_did == sample_signed_chunk.signer_did |
| 50 | assert restored.algorithm == sample_signed_chunk.algorithm |
| 51 | assert restored.nonce == sample_signed_chunk.nonce |
| 52 | assert restored.corpus_id == sample_signed_chunk.corpus_id |
| 53 | assert restored.metadata.source == sample_signed_chunk.metadata.source |
| 54 | assert restored.metadata.chunk_index == sample_signed_chunk.metadata.chunk_index |
| 55 | |
| 56 | |
| 57 | def test_metadata_extra_field() -> None: |
| 58 | meta = ChunkMetadata( |
| 59 | source="x.pdf", |
| 60 | chunk_index=0, |
| 61 | total_chunks=1, |
| 62 | extra={"author": "Alice", "year": 2026}, |
| 63 | ) |
| 64 | d = meta.to_dict() |
| 65 | assert d["extra"]["author"] == "Alice" |
| 66 | assert d["extra"]["year"] == 2026 |
| 67 | |