src/pqc_audit_log_fs/event.py
3.5 KB · 98 lines · python Raw
1 """InferenceEvent - one AI decision worth recording."""
2
3 from __future__ import annotations
4
5 import hashlib
6 import json
7 import uuid
8 from dataclasses import asdict, dataclass, field
9 from datetime import datetime, timezone
10 from typing import Any
11
12
13 @dataclass
14 class InferenceEvent:
15 """A single AI inference event.
16
17 We DO NOT store raw input/output (privacy). We store SHA3-256 hashes of
18 the canonical input and output so forensic investigators can verify a
19 specific claimed-input matches what the model actually saw.
20 """
21
22 event_id: str
23 timestamp: str
24 model_did: str # did:pqaid:... of the model
25 model_version: str
26 input_hash: str # SHA3-256 hex of canonical input
27 output_hash: str # SHA3-256 hex of canonical output
28 reasoning_chain_hash: str = "" # hash over chain-of-thought steps
29 decision_type: str = "" # 'classification' | 'generation' | 'tool_call' | ...
30 decision_label: str = "" # short label (e.g. 'approve' | 'deny')
31 actor_did: str = "" # who invoked the model (user/agent DID)
32 session_id: str = ""
33 metadata: dict[str, Any] = field(default_factory=dict)
34
35 @classmethod
36 def create(
37 cls,
38 model_did: str,
39 model_version: str,
40 input_bytes: bytes,
41 output_bytes: bytes,
42 reasoning_bytes: bytes | None = None,
43 decision_type: str = "",
44 decision_label: str = "",
45 actor_did: str = "",
46 session_id: str = "",
47 metadata: dict[str, Any] | None = None,
48 ) -> InferenceEvent:
49 return cls(
50 event_id=f"urn:pqc-audit-evt:{uuid.uuid4().hex}",
51 timestamp=datetime.now(timezone.utc).isoformat(),
52 model_did=model_did,
53 model_version=model_version,
54 input_hash=hashlib.sha3_256(input_bytes).hexdigest(),
55 output_hash=hashlib.sha3_256(output_bytes).hexdigest(),
56 reasoning_chain_hash=(
57 hashlib.sha3_256(reasoning_bytes).hexdigest() if reasoning_bytes else ""
58 ),
59 decision_type=decision_type,
60 decision_label=decision_label,
61 actor_did=actor_did,
62 session_id=session_id,
63 metadata=dict(metadata or {}),
64 )
65
66 def canonical_bytes(self) -> bytes:
67 """Deterministic serialization; leaf hash in Merkle tree."""
68 payload = {
69 "event_id": self.event_id,
70 "timestamp": self.timestamp,
71 "model_did": self.model_did,
72 "model_version": self.model_version,
73 "input_hash": self.input_hash,
74 "output_hash": self.output_hash,
75 "reasoning_chain_hash": self.reasoning_chain_hash,
76 "decision_type": self.decision_type,
77 "decision_label": self.decision_label,
78 "actor_did": self.actor_did,
79 "session_id": self.session_id,
80 "metadata": self.metadata,
81 }
82 return json.dumps(
83 payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False
84 ).encode("utf-8")
85
86 def leaf_hash(self) -> str:
87 return hashlib.sha3_256(self.canonical_bytes()).hexdigest()
88
89 def to_dict(self) -> dict[str, Any]:
90 return asdict(self)
91
92 @classmethod
93 def from_dict(cls, data: dict[str, Any]) -> InferenceEvent:
94 return cls(**data)
95
96 def to_jsonl(self) -> str:
97 return json.dumps(self.to_dict(), separators=(",", ":"))
98