tests/test_record.py
1.3 KB · 39 lines · python Raw
1 """Tests for DataRecord and RecordHash."""
2
3 from pqc_training_data import DataRecord
4
5
6 def test_leaf_hash_deterministic() -> None:
7 r1 = DataRecord(content=b"hello", metadata={"a": 1, "b": 2})
8 r2 = DataRecord(content=b"hello", metadata={"b": 2, "a": 1})
9 assert r1.leaf_hash().hex == r2.leaf_hash().hex
10 # Sanity: hex is 64 chars (SHA3-256)
11 assert len(r1.leaf_hash().hex) == 64
12
13
14 def test_leaf_hash_changes_with_content() -> None:
15 r1 = DataRecord(content=b"alpha", metadata={"k": 1})
16 r2 = DataRecord(content=b"beta", metadata={"k": 1})
17 assert r1.leaf_hash().hex != r2.leaf_hash().hex
18
19
20 def test_leaf_hash_changes_with_metadata() -> None:
21 r1 = DataRecord(content=b"same-content", metadata={"tag": "x"})
22 r2 = DataRecord(content=b"same-content", metadata={"tag": "y"})
23 assert r1.leaf_hash().hex != r2.leaf_hash().hex
24
25
26 def test_to_dict_does_not_include_raw_content() -> None:
27 record = DataRecord(
28 content=b"super secret training text",
29 metadata={"source": "private"},
30 )
31 d = record.to_dict()
32 assert "content" not in d
33 # Must not contain the raw bytes
34 assert b"super secret" not in repr(d).encode()
35 # But has the safe fields
36 assert d["content_size"] == len(b"super secret training text")
37 assert len(d["content_sha3_256"]) == 64
38 assert d["leaf_hash"] == record.leaf_hash().hex
39