tests/test_record.py
| 1 | """Tests for DataRecord and RecordHash.""" |
| 2 | |
| 3 | from pqc_training_data import DataRecord |
| 4 | |
| 5 | |
| 6 | def test_leaf_hash_deterministic() -> None: |
| 7 | r1 = DataRecord(content=b"hello", metadata={"a": 1, "b": 2}) |
| 8 | r2 = DataRecord(content=b"hello", metadata={"b": 2, "a": 1}) |
| 9 | assert r1.leaf_hash().hex == r2.leaf_hash().hex |
| 10 | # Sanity: hex is 64 chars (SHA3-256) |
| 11 | assert len(r1.leaf_hash().hex) == 64 |
| 12 | |
| 13 | |
| 14 | def test_leaf_hash_changes_with_content() -> None: |
| 15 | r1 = DataRecord(content=b"alpha", metadata={"k": 1}) |
| 16 | r2 = DataRecord(content=b"beta", metadata={"k": 1}) |
| 17 | assert r1.leaf_hash().hex != r2.leaf_hash().hex |
| 18 | |
| 19 | |
| 20 | def test_leaf_hash_changes_with_metadata() -> None: |
| 21 | r1 = DataRecord(content=b"same-content", metadata={"tag": "x"}) |
| 22 | r2 = DataRecord(content=b"same-content", metadata={"tag": "y"}) |
| 23 | assert r1.leaf_hash().hex != r2.leaf_hash().hex |
| 24 | |
| 25 | |
| 26 | def test_to_dict_does_not_include_raw_content() -> None: |
| 27 | record = DataRecord( |
| 28 | content=b"super secret training text", |
| 29 | metadata={"source": "private"}, |
| 30 | ) |
| 31 | d = record.to_dict() |
| 32 | assert "content" not in d |
| 33 | # Must not contain the raw bytes |
| 34 | assert b"super secret" not in repr(d).encode() |
| 35 | # But has the safe fields |
| 36 | assert d["content_size"] == len(b"super secret training text") |
| 37 | assert len(d["content_sha3_256"]) == 64 |
| 38 | assert d["leaf_hash"] == record.leaf_hash().hex |
| 39 | |