src/pqc_audit_log_fs/event.py
| 1 | """InferenceEvent - one AI decision worth recording.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import hashlib |
| 6 | import json |
| 7 | import uuid |
| 8 | from dataclasses import asdict, dataclass, field |
| 9 | from datetime import datetime, timezone |
| 10 | from typing import Any |
| 11 | |
| 12 | |
| 13 | @dataclass |
| 14 | class InferenceEvent: |
| 15 | """A single AI inference event. |
| 16 | |
| 17 | We DO NOT store raw input/output (privacy). We store SHA3-256 hashes of |
| 18 | the canonical input and output so forensic investigators can verify a |
| 19 | specific claimed-input matches what the model actually saw. |
| 20 | """ |
| 21 | |
| 22 | event_id: str |
| 23 | timestamp: str |
| 24 | model_did: str # did:pqaid:... of the model |
| 25 | model_version: str |
| 26 | input_hash: str # SHA3-256 hex of canonical input |
| 27 | output_hash: str # SHA3-256 hex of canonical output |
| 28 | reasoning_chain_hash: str = "" # hash over chain-of-thought steps |
| 29 | decision_type: str = "" # 'classification' | 'generation' | 'tool_call' | ... |
| 30 | decision_label: str = "" # short label (e.g. 'approve' | 'deny') |
| 31 | actor_did: str = "" # who invoked the model (user/agent DID) |
| 32 | session_id: str = "" |
| 33 | metadata: dict[str, Any] = field(default_factory=dict) |
| 34 | |
| 35 | @classmethod |
| 36 | def create( |
| 37 | cls, |
| 38 | model_did: str, |
| 39 | model_version: str, |
| 40 | input_bytes: bytes, |
| 41 | output_bytes: bytes, |
| 42 | reasoning_bytes: bytes | None = None, |
| 43 | decision_type: str = "", |
| 44 | decision_label: str = "", |
| 45 | actor_did: str = "", |
| 46 | session_id: str = "", |
| 47 | metadata: dict[str, Any] | None = None, |
| 48 | ) -> InferenceEvent: |
| 49 | return cls( |
| 50 | event_id=f"urn:pqc-audit-evt:{uuid.uuid4().hex}", |
| 51 | timestamp=datetime.now(timezone.utc).isoformat(), |
| 52 | model_did=model_did, |
| 53 | model_version=model_version, |
| 54 | input_hash=hashlib.sha3_256(input_bytes).hexdigest(), |
| 55 | output_hash=hashlib.sha3_256(output_bytes).hexdigest(), |
| 56 | reasoning_chain_hash=( |
| 57 | hashlib.sha3_256(reasoning_bytes).hexdigest() if reasoning_bytes else "" |
| 58 | ), |
| 59 | decision_type=decision_type, |
| 60 | decision_label=decision_label, |
| 61 | actor_did=actor_did, |
| 62 | session_id=session_id, |
| 63 | metadata=dict(metadata or {}), |
| 64 | ) |
| 65 | |
| 66 | def canonical_bytes(self) -> bytes: |
| 67 | """Deterministic serialization; leaf hash in Merkle tree.""" |
| 68 | payload = { |
| 69 | "event_id": self.event_id, |
| 70 | "timestamp": self.timestamp, |
| 71 | "model_did": self.model_did, |
| 72 | "model_version": self.model_version, |
| 73 | "input_hash": self.input_hash, |
| 74 | "output_hash": self.output_hash, |
| 75 | "reasoning_chain_hash": self.reasoning_chain_hash, |
| 76 | "decision_type": self.decision_type, |
| 77 | "decision_label": self.decision_label, |
| 78 | "actor_did": self.actor_did, |
| 79 | "session_id": self.session_id, |
| 80 | "metadata": self.metadata, |
| 81 | } |
| 82 | return json.dumps( |
| 83 | payload, sort_keys=True, separators=(",", ":"), ensure_ascii=False |
| 84 | ).encode("utf-8") |
| 85 | |
| 86 | def leaf_hash(self) -> str: |
| 87 | return hashlib.sha3_256(self.canonical_bytes()).hexdigest() |
| 88 | |
| 89 | def to_dict(self) -> dict[str, Any]: |
| 90 | return asdict(self) |
| 91 | |
| 92 | @classmethod |
| 93 | def from_dict(cls, data: dict[str, Any]) -> InferenceEvent: |
| 94 | return cls(**data) |
| 95 | |
| 96 | def to_jsonl(self) -> str: |
| 97 | return json.dumps(self.to_dict(), separators=(",", ":")) |
| 98 | |