src/pqc_gpu_driver/backends/cuda.py
| 1 | """CUDA backend (stub interface). |
| 2 | |
| 3 | Real integration uses the NVIDIA CUDA Driver / Runtime API. This stub |
| 4 | documents the expected shape; users plug in their real syscalls. |
| 5 | |
| 6 | A production implementation of this backend is expected to: |
| 7 | |
| 8 | * Initialize a CUDA context via ``cuInit`` / ``cuCtxCreate`` for the target |
| 9 | device (typically an H100 or H200 with Confidential Computing enabled). |
| 10 | * For :meth:`upload`, allocate device memory with ``cuMemAlloc`` and copy the |
| 11 | ciphertext bytes of the :class:`EncryptedTensor` from pinned host memory |
| 12 | with ``cuMemcpyHtoD``. Register the pointer with CUDA-IPC if cross-process |
| 13 | sharing is required. |
| 14 | * For :meth:`download`, issue ``cuMemcpyDtoH`` from the device buffer |
| 15 | associated with ``device_handle`` back into a host buffer and return it |
| 16 | wrapped in an :class:`EncryptedTensor`. |
| 17 | * For :meth:`free`, call ``cuMemFree`` and drop the IPC handle. |
| 18 | * Crucially, keep the tensor bytes encrypted at rest on device until the |
| 19 | workload actively decrypts them in enclave memory (H100 Confidential |
| 20 | Computing) - the framework never gives the host OS or hypervisor a view |
| 21 | of plaintext. |
| 22 | """ |
| 23 | |
| 24 | from __future__ import annotations |
| 25 | |
| 26 | from pqc_gpu_driver.backends.base import GPUBackend |
| 27 | from pqc_gpu_driver.errors import BackendError |
| 28 | from pqc_gpu_driver.tensor import EncryptedTensor |
| 29 | |
| 30 | |
| 31 | class CUDABackend(GPUBackend): |
| 32 | """Stub NVIDIA CUDA backend. |
| 33 | |
| 34 | Raises :class:`BackendError` when invoked without real runtime wiring. |
| 35 | """ |
| 36 | |
| 37 | name = "cuda" |
| 38 | device_type = "cuda" |
| 39 | |
| 40 | def __init__(self, device_index: int = 0) -> None: |
| 41 | self.device_index = device_index |
| 42 | |
| 43 | def upload(self, tensor: EncryptedTensor) -> str: |
| 44 | raise BackendError( |
| 45 | "CUDABackend.upload is a stub. A real implementation allocates " |
| 46 | f"device memory on CUDA device {self.device_index} via cuMemAlloc " |
| 47 | "and copies the ciphertext bytes with cuMemcpyHtoD." |
| 48 | ) |
| 49 | |
| 50 | def download(self, device_handle: str) -> EncryptedTensor: |
| 51 | raise BackendError( |
| 52 | "CUDABackend.download is a stub. A real implementation issues " |
| 53 | f"cuMemcpyDtoH for handle {device_handle} to pull ciphertext " |
| 54 | "bytes back to host memory." |
| 55 | ) |
| 56 | |
| 57 | def free(self, device_handle: str) -> None: |
| 58 | raise BackendError( |
| 59 | "CUDABackend.free is a stub. A real implementation calls " |
| 60 | f"cuMemFree on the device pointer for {device_handle} and drops " |
| 61 | "any CUDA-IPC handles." |
| 62 | ) |
| 63 | |
| 64 | def device_info(self) -> dict: |
| 65 | raise BackendError( |
| 66 | "CUDABackend.device_info is a stub. A real implementation reads " |
| 67 | f"device {self.device_index} name + compute_capability via " |
| 68 | "cuDeviceGetName and cuDeviceComputeCapability." |
| 69 | ) |
| 70 | |