src/pqc_content_provenance/assertions/training.py
| 1 | """Assertion: training data summary (c2pa.training).""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from dataclasses import dataclass, field |
| 6 | from typing import ClassVar |
| 7 | |
| 8 | from pqc_content_provenance.assertions.base import Assertion |
| 9 | |
| 10 | |
| 11 | @dataclass |
| 12 | class TrainingAssertion(Assertion): |
| 13 | """What training data produced the model that generated this output.""" |
| 14 | |
| 15 | label: ClassVar[str] = "c2pa.training" |
| 16 | |
| 17 | dataset_name: str = "" # e.g. "common-crawl-2024" |
| 18 | dataset_root_hash: str = "" # Merkle root over training data |
| 19 | fine_tune_dataset: str = "" # optional, e.g. "internal-medical-1k" |
| 20 | fine_tune_root_hash: str = "" |
| 21 | pii_filtered: bool = True |
| 22 | copyright_cleared: bool = False |
| 23 | licenses: list[str] = field(default_factory=list) # e.g. ["cc-by-4.0", "apache-2.0"] |
| 24 | |