src/pqc_content_provenance/assertions/training.py
827 B · 24 lines · python Raw
1 """Assertion: training data summary (c2pa.training)."""
2
3 from __future__ import annotations
4
5 from dataclasses import dataclass, field
6 from typing import ClassVar
7
8 from pqc_content_provenance.assertions.base import Assertion
9
10
11 @dataclass
12 class TrainingAssertion(Assertion):
13 """What training data produced the model that generated this output."""
14
15 label: ClassVar[str] = "c2pa.training"
16
17 dataset_name: str = "" # e.g. "common-crawl-2024"
18 dataset_root_hash: str = "" # Merkle root over training data
19 fine_tune_dataset: str = "" # optional, e.g. "internal-medical-1k"
20 fine_tune_root_hash: str = ""
21 pii_filtered: bool = True
22 copyright_cleared: bool = False
23 licenses: list[str] = field(default_factory=list) # e.g. ["cc-by-4.0", "apache-2.0"]
24