From 11041cca90540244b69dbf963432fc434b09f54d Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Wed, 28 Jan 2026 10:09:59 -0500
Subject: [PATCH 1/4] cuda.bindings.nvml: Small follow-on to #1524

---
 cuda_bindings/cuda/bindings/_nvml.pxd   |   2 +-
 cuda_bindings/cuda/bindings/_nvml.pyx   | 282 +++++++-----------------
 cuda_bindings/tests/nvml/test_device.py |   3 +-
 3 files changed, 79 insertions(+), 208 deletions(-)

diff --git a/cuda_bindings/cuda/bindings/_nvml.pxd b/cuda_bindings/cuda/bindings/_nvml.pxd
index fff04adea4..36ab860e64 100644
--- a/cuda_bindings/cuda/bindings/_nvml.pxd
+++ b/cuda_bindings/cuda/bindings/_nvml.pxd
@@ -17,6 +17,7 @@ ctypedef nvmlDramEncryptionInfo_v1_t DramEncryptionInfo_v1
 ctypedef nvmlMarginTemperature_v1_t MarginTemperature_v1
 ctypedef nvmlFanSpeedInfo_v1_t FanSpeedInfo_v1
 ctypedef nvmlDevicePerfModes_v1_t DevicePerfModes_v1
+ctypedef nvmlDeviceCurrentClockFreqs_v1_t DeviceCurrentClockFreqs_v1
 ctypedef nvmlVgpuHeterogeneousMode_v1_t VgpuHeterogeneousMode_v1
 ctypedef nvmlVgpuPlacementId_v1_t VgpuPlacementId_v1
 ctypedef nvmlVgpuRuntimeState_v1_t VgpuRuntimeState_v1
@@ -226,7 +227,6 @@ cpdef tuple device_get_min_max_clock_of_p_state(intptr_t device, int type, int p
 cpdef tuple device_get_gpc_clk_min_max_vf_offset(intptr_t device)
 cpdef tuple device_get_mem_clk_min_max_vf_offset(intptr_t device)
 cpdef device_set_clock_offsets(intptr_t device, intptr_t info)
-cpdef object device_get_current_clock_freqs(intptr_t device)
 cpdef unsigned int device_get_power_management_limit(intptr_t device) except? 0
 cpdef tuple device_get_power_management_limit_constraints(intptr_t device)
 cpdef unsigned int device_get_power_management_default_limit(intptr_t device) except? 0
diff --git a/cuda_bindings/cuda/bindings/_nvml.pyx b/cuda_bindings/cuda/bindings/_nvml.pyx
index 8252f781b8..e175c5d2f2 100644
--- a/cuda_bindings/cuda/bindings/_nvml.pyx
+++ b/cuda_bindings/cuda/bindings/_nvml.pyx
@@ -4605,142 +4605,6 @@ cdef class ClockOffset_v1:
         return obj
 
 
-cdef _get_device_current_clock_freqs_v1_dtype_offsets():
-    cdef nvmlDeviceCurrentClockFreqs_v1_t pod = nvmlDeviceCurrentClockFreqs_v1_t()
-    return _numpy.dtype({
-        'names': ['version', 'str'],
-        'formats': [_numpy.uint32, (_numpy.int8, 2048)],
-        'offsets': [
-            (<intptr_t>&(pod.version)) - (<intptr_t>&pod),
-            (<intptr_t>&(pod.str)) - (<intptr_t>&pod),
-        ],
-        'itemsize': sizeof(nvmlDeviceCurrentClockFreqs_v1_t),
-    })
-
-device_current_clock_freqs_v1_dtype = _get_device_current_clock_freqs_v1_dtype_offsets()
-
-cdef class DeviceCurrentClockFreqs_v1:
-    """Empty-initialize an instance of `nvmlDeviceCurrentClockFreqs_v1_t`.
-
-
-    .. seealso:: `nvmlDeviceCurrentClockFreqs_v1_t`
-    """
-    cdef:
-        nvmlDeviceCurrentClockFreqs_v1_t *_ptr
-        object _owner
-        bint _owned
-        bint _readonly
-
-    def __init__(self):
-        self._ptr = <nvmlDeviceCurrentClockFreqs_v1_t *>calloc(1, sizeof(nvmlDeviceCurrentClockFreqs_v1_t))
-        if self._ptr == NULL:
-            raise MemoryError("Error allocating DeviceCurrentClockFreqs_v1")
-        self._owner = None
-        self._owned = True
-        self._readonly = False
-
-    def __dealloc__(self):
-        cdef nvmlDeviceCurrentClockFreqs_v1_t *ptr
-        if self._owned and self._ptr != NULL:
-            ptr = self._ptr
-            self._ptr = NULL
-            free(ptr)
-
-    def __repr__(self):
-        return f"<{__name__}.DeviceCurrentClockFreqs_v1 object at {hex(id(self))}>"
-
-    @property
-    def ptr(self):
-        """Get the pointer address to the data as Python :class:`int`."""
-        return <intptr_t>(self._ptr)
-
-    cdef intptr_t _get_ptr(self):
-        return <intptr_t>(self._ptr)
-
-    def __int__(self):
-        return <intptr_t>(self._ptr)
-
-    def __eq__(self, other):
-        cdef DeviceCurrentClockFreqs_v1 other_
-        if not isinstance(other, DeviceCurrentClockFreqs_v1):
-            return False
-        other_ = other
-        return (memcmp(<void *><intptr_t>(self._ptr), <void *><intptr_t>(other_._ptr), sizeof(nvmlDeviceCurrentClockFreqs_v1_t)) == 0)
-
-    def __setitem__(self, key, val):
-        if key == 0 and isinstance(val, _numpy.ndarray):
-            self._ptr = <nvmlDeviceCurrentClockFreqs_v1_t *>malloc(sizeof(nvmlDeviceCurrentClockFreqs_v1_t))
-            if self._ptr == NULL:
-                raise MemoryError("Error allocating DeviceCurrentClockFreqs_v1")
-            memcpy(<void*>self._ptr, <void*><intptr_t>val.ctypes.data, sizeof(nvmlDeviceCurrentClockFreqs_v1_t))
-            self._owner = None
-            self._owned = True
-            self._readonly = not val.flags.writeable
-        else:
-            setattr(self, key, val)
-
-    @property
-    def version(self):
-        """int: the API version number"""
-        return self._ptr[0].version
-
-    @version.setter
-    def version(self, val):
-        if self._readonly:
-            raise ValueError("This DeviceCurrentClockFreqs_v1 instance is read-only")
-        self._ptr[0].version = val
-
-    @property
-    def str(self):
-        """~_numpy.int8: (array of length 2048).OUT: the current clock frequency string."""
-        return cpython.PyUnicode_FromString(self._ptr[0].str)
-
-    @str.setter
-    def str(self, val):
-        if self._readonly:
-            raise ValueError("This DeviceCurrentClockFreqs_v1 instance is read-only")
-        cdef bytes buf = val.encode()
-        if len(buf) >= 2048:
-            raise ValueError("String too long for field str, max length is 2047")
-        cdef char *ptr = buf
-        memcpy(<void *>(self._ptr[0].str), <void *>ptr, 2048)
-
-    @staticmethod
-    def from_data(data):
-        """Create an DeviceCurrentClockFreqs_v1 instance wrapping the given NumPy array.
-
-        Args:
-            data (_numpy.ndarray): a single-element array of dtype `device_current_clock_freqs_v1_dtype` holding the data.
-        """
-        return __from_data(data, "device_current_clock_freqs_v1_dtype", device_current_clock_freqs_v1_dtype, DeviceCurrentClockFreqs_v1)
-
-    @staticmethod
-    def from_ptr(intptr_t ptr, bint readonly=False, object owner=None):
-        """Create an DeviceCurrentClockFreqs_v1 instance wrapping the given pointer.
-
-        Args:
-            ptr (intptr_t): pointer address as Python :class:`int` to the data.
-            owner (object): The Python object that owns the pointer. If not provided, data will be copied.
-            readonly (bool): whether the data is read-only (to the user). default is `False`.
-        """
-        if ptr == 0:
-            raise ValueError("ptr must not be null (0)")
-        cdef DeviceCurrentClockFreqs_v1 obj = DeviceCurrentClockFreqs_v1.__new__(DeviceCurrentClockFreqs_v1)
-        if owner is None:
-            obj._ptr = <nvmlDeviceCurrentClockFreqs_v1_t *>malloc(sizeof(nvmlDeviceCurrentClockFreqs_v1_t))
-            if obj._ptr == NULL:
-                raise MemoryError("Error allocating DeviceCurrentClockFreqs_v1")
-            memcpy(<void*>(obj._ptr), <void*>ptr, sizeof(nvmlDeviceCurrentClockFreqs_v1_t))
-            obj._owner = None
-            obj._owned = True
-        else:
-            obj._ptr = <nvmlDeviceCurrentClockFreqs_v1_t *>ptr
-            obj._owner = owner
-            obj._owned = False
-        obj._readonly = readonly
-        return obj
-
-
 cdef _get_process_utilization_sample_dtype_offsets():
     cdef nvmlProcessUtilizationSample_t pod = nvmlProcessUtilizationSample_t()
     return _numpy.dtype({
@@ -10825,7 +10689,7 @@ cdef class ConfComputeGpuCertificate:
         """~_numpy.uint8: (array of length 4096)."""
         if self._ptr[0].certChainSize == 0:
             return _numpy.array([])
-        cdef view.array arr = view.array(shape=(4096,), itemsize=sizeof(unsigned char), format="B", mode="c", allocate_buffer=False)
+        cdef view.array arr = view.array(shape=(self._ptr[0].certChainSize,), itemsize=sizeof(unsigned char), format="B", mode="c", allocate_buffer=False)
         arr.data = <char *>(&(self._ptr[0].certChain))
         return _numpy.asarray(arr)
 
@@ -10838,7 +10702,7 @@ cdef class ConfComputeGpuCertificate:
         self._ptr[0].certChainSize = len(val)
         if len(val) == 0:
             return
-        cdef view.array arr = view.array(shape=(4096,), itemsize=sizeof(unsigned char), format="B", mode="c")
+        cdef view.array arr = view.array(shape=(self._ptr[0].certChainSize,), itemsize=sizeof(unsigned char), format="B", mode="c")
         arr[:] = _numpy.asarray(val, dtype=_numpy.uint8)
         memcpy(<void *>(&(self._ptr[0].certChain)), <void *>(arr.data), sizeof(unsigned char) * len(val))
 
@@ -10847,7 +10711,7 @@ cdef class ConfComputeGpuCertificate:
         """~_numpy.uint8: (array of length 5120)."""
         if self._ptr[0].attestationCertChainSize == 0:
             return _numpy.array([])
-        cdef view.array arr = view.array(shape=(5120,), itemsize=sizeof(unsigned char), format="B", mode="c", allocate_buffer=False)
+        cdef view.array arr = view.array(shape=(self._ptr[0].attestationCertChainSize,), itemsize=sizeof(unsigned char), format="B", mode="c", allocate_buffer=False)
         arr.data = <char *>(&(self._ptr[0].attestationCertChain))
         return _numpy.asarray(arr)
 
@@ -10860,7 +10724,7 @@ cdef class ConfComputeGpuCertificate:
         self._ptr[0].attestationCertChainSize = len(val)
         if len(val) == 0:
             return
-        cdef view.array arr = view.array(shape=(5120,), itemsize=sizeof(unsigned char), format="B", mode="c")
+        cdef view.array arr = view.array(shape=(self._ptr[0].attestationCertChainSize,), itemsize=sizeof(unsigned char), format="B", mode="c")
         arr[:] = _numpy.asarray(val, dtype=_numpy.uint8)
         memcpy(<void *>(&(self._ptr[0].attestationCertChain)), <void *>(arr.data), sizeof(unsigned char) * len(val))
 
@@ -11011,7 +10875,7 @@ cdef class ConfComputeGpuAttestationReport:
         """~_numpy.uint8: (array of length 8192)."""
         if self._ptr[0].attestationReportSize == 0:
             return _numpy.array([])
-        cdef view.array arr = view.array(shape=(8192,), itemsize=sizeof(unsigned char), format="B", mode="c", allocate_buffer=False)
+        cdef view.array arr = view.array(shape=(self._ptr[0].attestationReportSize,), itemsize=sizeof(unsigned char), format="B", mode="c", allocate_buffer=False)
         arr.data = <char *>(&(self._ptr[0].attestationReport))
         return _numpy.asarray(arr)
 
@@ -11024,7 +10888,7 @@ cdef class ConfComputeGpuAttestationReport:
         self._ptr[0].attestationReportSize = len(val)
         if len(val) == 0:
             return
-        cdef view.array arr = view.array(shape=(8192,), itemsize=sizeof(unsigned char), format="B", mode="c")
+        cdef view.array arr = view.array(shape=(self._ptr[0].attestationReportSize,), itemsize=sizeof(unsigned char), format="B", mode="c")
         arr[:] = _numpy.asarray(val, dtype=_numpy.uint8)
         memcpy(<void *>(&(self._ptr[0].attestationReport)), <void *>(arr.data), sizeof(unsigned char) * len(val))
 
@@ -11033,7 +10897,7 @@ cdef class ConfComputeGpuAttestationReport:
         """~_numpy.uint8: (array of length 4096)."""
         if self._ptr[0].cecAttestationReportSize == 0:
             return _numpy.array([])
-        cdef view.array arr = view.array(shape=(4096,), itemsize=sizeof(unsigned char), format="B", mode="c", allocate_buffer=False)
+        cdef view.array arr = view.array(shape=(self._ptr[0].cecAttestationReportSize,), itemsize=sizeof(unsigned char), format="B", mode="c", allocate_buffer=False)
         arr.data = <char *>(&(self._ptr[0].cecAttestationReport))
         return _numpy.asarray(arr)
 
@@ -11046,7 +10910,7 @@ cdef class ConfComputeGpuAttestationReport:
         self._ptr[0].cecAttestationReportSize = len(val)
         if len(val) == 0:
             return
-        cdef view.array arr = view.array(shape=(4096,), itemsize=sizeof(unsigned char), format="B", mode="c")
+        cdef view.array arr = view.array(shape=(self._ptr[0].cecAttestationReportSize,), itemsize=sizeof(unsigned char), format="B", mode="c")
         arr[:] = _numpy.asarray(val, dtype=_numpy.uint8)
         memcpy(<void *>(&(self._ptr[0].cecAttestationReport)), <void *>(arr.data), sizeof(unsigned char) * len(val))
 
@@ -11363,7 +11227,7 @@ cdef class NvlinkSupportedBwModes_v1:
         """~_numpy.uint8: (array of length 23)."""
         if self._ptr[0].totalBwModes == 0:
             return _numpy.array([])
-        cdef view.array arr = view.array(shape=(23,), itemsize=sizeof(unsigned char), format="B", mode="c", allocate_buffer=False)
+        cdef view.array arr = view.array(shape=(self._ptr[0].totalBwModes,), itemsize=sizeof(unsigned char), format="B", mode="c", allocate_buffer=False)
         arr.data = <char *>(&(self._ptr[0].bwModes))
         return _numpy.asarray(arr)
 
@@ -11376,7 +11240,7 @@ cdef class NvlinkSupportedBwModes_v1:
         self._ptr[0].totalBwModes = len(val)
         if len(val) == 0:
             return
-        cdef view.array arr = view.array(shape=(23,), itemsize=sizeof(unsigned char), format="B", mode="c")
+        cdef view.array arr = view.array(shape=(self._ptr[0].totalBwModes,), itemsize=sizeof(unsigned char), format="B", mode="c")
         arr[:] = _numpy.asarray(val, dtype=_numpy.uint8)
         memcpy(<void *>(&(self._ptr[0].bwModes)), <void *>(arr.data), sizeof(unsigned char) * len(val))
 
@@ -14863,8 +14727,11 @@ cdef class BridgeChipHierarchy:
         if self._readonly:
             raise ValueError("This BridgeChipHierarchy instance is read-only")
         cdef BridgeChipInfo val_ = val
-        if len(val) != self._ptr[0].bridgeCount:
-            raise ValueError(f"Expected length { self._ptr[0].bridgeCount } for field bridge_chip_info, got {len(val)}")
+        if len(val) > 128:
+            raise ValueError(f"Expected length < 128 for field bridge_chip_info, got {len(val)}")
+        self._ptr[0].bridgeCount = len(val)
+        if len(val) == 0:
+            return
         memcpy(<void *>&(self._ptr[0].bridgeChipInfo), <void *>(val_._get_ptr()), sizeof(nvmlBridgeChipInfo_t) * self._ptr[0].bridgeCount)
 
     @staticmethod
@@ -16008,8 +15875,11 @@ cdef class ClkMonStatus:
         if self._readonly:
             raise ValueError("This ClkMonStatus instance is read-only")
         cdef ClkMonFaultInfo val_ = val
-        if len(val) != self._ptr[0].clkMonListSize:
-            raise ValueError(f"Expected length { self._ptr[0].clkMonListSize } for field clk_mon_list, got {len(val)}")
+        if len(val) > 32:
+            raise ValueError(f"Expected length < 32 for field clk_mon_list, got {len(val)}")
+        self._ptr[0].clkMonListSize = len(val)
+        if len(val) == 0:
+            return
         memcpy(<void *>&(self._ptr[0].clkMonList), <void *>(val_._get_ptr()), sizeof(nvmlClkMonFaultInfo_t) * self._ptr[0].clkMonListSize)
 
     @property
@@ -19297,8 +19167,11 @@ cdef class GridLicensableFeatures:
         if self._readonly:
             raise ValueError("This GridLicensableFeatures instance is read-only")
         cdef GridLicensableFeature val_ = val
-        if len(val) != self._ptr[0].licensableFeaturesCount:
-            raise ValueError(f"Expected length { self._ptr[0].licensableFeaturesCount } for field grid_licensable_features, got {len(val)}")
+        if len(val) > 3:
+            raise ValueError(f"Expected length < 3 for field grid_licensable_features, got {len(val)}")
+        self._ptr[0].licensableFeaturesCount = len(val)
+        if len(val) == 0:
+            return
         memcpy(<void *>&(self._ptr[0].gridLicensableFeatures), <void *>(val_._get_ptr()), sizeof(nvmlGridLicensableFeature_t) * self._ptr[0].licensableFeaturesCount)
 
     @property
@@ -21007,26 +20880,6 @@ cpdef device_set_clock_offsets(intptr_t device, intptr_t info):
     check_status(__status__)
 
 
-cpdef object device_get_current_clock_freqs(intptr_t device):
-    """Retrieves a string with the associated current GPU Clock and Memory Clock values.
-
-    Args:
-        device (intptr_t): The identifier of the target device.
-
-    Returns:
-        nvmlDeviceCurrentClockFreqs_v1_t: Reference in which to return the performance level string.
-
-    .. seealso:: `nvmlDeviceGetCurrentClockFreqs`
-    """
-    cdef DeviceCurrentClockFreqs_v1 current_clock_freqs_py = DeviceCurrentClockFreqs_v1()
-    cdef nvmlDeviceCurrentClockFreqs_t *current_clock_freqs = <nvmlDeviceCurrentClockFreqs_t *><intptr_t>(current_clock_freqs_py._get_ptr())
-    current_clock_freqs.version = sizeof(nvmlDeviceCurrentClockFreqs_v1_t) | (1 << 24)
-    with nogil:
-        __status__ = nvmlDeviceGetCurrentClockFreqs(<Device>device, current_clock_freqs)
-    check_status(__status__)
-    return current_clock_freqs_py
-
-
 cpdef unsigned int device_get_power_management_limit(intptr_t device) except? 0:
     """Retrieves the power management limit associated with this device.
 
@@ -24567,7 +24420,7 @@ cpdef object device_get_topology_nearest_gpus(intptr_t device, unsigned int leve
     return deviceArray
 
 
-cpdef object device_get_temperature_v(intptr_t device, nvmlTemperatureSensors_t sensorType):
+cpdef int device_get_temperature_v(intptr_t device, nvmlTemperatureSensors_t sensorType):
     """Retrieves the current temperature readings (in degrees C) for the given device.
 
     Args:
@@ -24579,10 +24432,10 @@ cpdef object device_get_temperature_v(intptr_t device, nvmlTemperatureSensors_t
     .. seealso:: `nvmlDeviceGetTemperatureV`
     """
     cdef nvmlTemperature_v1_t[1] temperature
-    temperature[0].version = sizeof(nvmlTemperature_v1_t) | (1 << 24)
-    temperature[0].sensorType = <nvmlTemperatureSensors_t>sensorType
 
     with nogil:
+        temperature[0].version = sizeof(nvmlTemperature_v1_t) | (1 << 24)
+        temperature[0].sensorType = <nvmlTemperatureSensors_t>sensorType
         __status__ = nvmlDeviceGetTemperatureV(<Device>device, temperature)
     check_status(__status__)
     return temperature.temperature
@@ -24638,9 +24491,10 @@ cpdef object device_get_running_process_detail_list(intptr_t device, unsigned in
     with nogil:
         __status__ = nvmlDeviceGetRunningProcessDetailList(<Device>device, ptr)
     check_status(__status__)
+    return plist
 
 
-cpdef object device_get_samples(intptr_t device, int type, unsigned long long last_seen_time_stamp):
+cpdef tuple device_get_samples(intptr_t device, int type, unsigned long long last_seen_time_stamp):
     """Gets recent samples for the GPU.
 
     Args:
@@ -24665,7 +24519,7 @@ cpdef object device_get_samples(intptr_t device, int type, unsigned long long la
     return (sample_val_type[0], samples)
 
 
-cpdef object device_get_retired_pages_v2(intptr_t device, int cause):
+cpdef tuple device_get_retired_pages_v2(intptr_t device, int cause):
     """Returns the list of retired pages by source, including pages that are pending retirement
 
     Args:
@@ -24809,7 +24663,7 @@ cpdef object device_get_field_values(intptr_t device, values):
     return values_
 
 
-cpdef object device_clear_field_values(intptr_t device, values):
+cpdef  device_clear_field_values(intptr_t device, values):
     """Clear values for a list of fields for a device. This API allows multiple fields to be cleared at once.
 
     Args:
@@ -24892,7 +24746,7 @@ cpdef object device_get_active_vgpus(intptr_t device):
     return vgpuInstances
 
 
-cpdef str vgpu_instance_get_vm_id(unsigned int vgpu_instance):
+cpdef tuple vgpu_instance_get_vm_id(unsigned int vgpu_instance):
     """Retrieve the VM ID associated with a vGPU instance.
 
     Args:
@@ -25304,8 +25158,8 @@ cpdef object device_get_gpu_fabric_info_v(intptr_t device):
     if CUDA_VERSION >= 13000:
         gpu_fabric_info_v3_py = GpuFabricInfo_v3()
         gpu_fabric_info = <nvmlGpuFabricInfoV_t *><intptr_t>(gpu_fabric_info_v3_py._get_ptr())
-        gpu_fabric_info.version = sizeof(nvmlGpuFabricInfo_v3_t) | (3 << 24)
         with nogil:
+            gpu_fabric_info.version = sizeof(nvmlGpuFabricInfo_v3_t) | (3 << 24)
             __status__ = nvmlDeviceGetGpuFabricInfoV(<Device>device, gpu_fabric_info)
         check_status(__status__)
         return gpu_fabric_info_v3_py
@@ -25313,8 +25167,8 @@ cpdef object device_get_gpu_fabric_info_v(intptr_t device):
     else:
         gpu_fabric_info_v2_py = GpuFabricInfo_v2()
         gpu_fabric_info = <nvmlGpuFabricInfoV_t *><intptr_t>(gpu_fabric_info_v2_py._get_ptr())
-        gpu_fabric_info.version = sizeof(nvmlGpuFabricInfo_v2_t) | (2 << 24)
         with nogil:
+            gpu_fabric_info.version = sizeof(nvmlGpuFabricInfo_v2_t) | (2 << 24)
             __status__ = nvmlDeviceGetGpuFabricInfoV(<Device>device, gpu_fabric_info)
         check_status(__status__)
         return gpu_fabric_info_v2_py
@@ -25338,8 +25192,8 @@ cpdef object device_get_platform_info(intptr_t device):
     if CUDA_VERSION >= 13000:
         platform_info_v2_py = PlatformInfo_v2()
         platform_info = <nvmlPlatformInfo_t *><intptr_t>(platform_info_v2_py._get_ptr())
-        platform_info.version = sizeof(nvmlPlatformInfo_v2_t) | (2 << 24)
         with nogil:
+            platform_info.version = sizeof(nvmlPlatformInfo_v2_t) | (2 << 24)
             __status__ = nvmlDeviceGetPlatformInfo(<Device>device, platform_info)
         check_status(__status__)
         return platform_info_v2_py
@@ -25347,8 +25201,8 @@ cpdef object device_get_platform_info(intptr_t device):
     else:
         platform_info_v1_py = PlatformInfo_v1()
         platform_info = <nvmlPlatformInfo_t *><intptr_t>(platform_info_v1_py._get_ptr())
-        platform_info.version = sizeof(nvmlPlatformInfo_v1_t) | (1 << 24)
         with nogil:
+            platform_info.version = sizeof(nvmlPlatformInfo_v1_t) | (1 << 24)
             __status__ = nvmlDeviceGetPlatformInfo(<Device>device, platform_info)
         check_status(__status__)
         return platform_info_v1_py
@@ -25372,8 +25226,8 @@ cpdef object device_get_nvlink_info(intptr_t device):
     if CUDA_VERSION >= 13000:
         info_v2_py = NvLinkInfo_v2()
         info = <nvmlNvLinkInfo_t *><intptr_t>(info_v2_py._get_ptr())
-        info.version = sizeof(nvmlNvLinkInfo_v2_t) | (2 << 24)
         with nogil:
+            info.version = sizeof(nvmlNvLinkInfo_v2_t) | (2 << 24)
             __status__ = nvmlDeviceGetNvLinkInfo(<Device>device, info)
         check_status(__status__)
         return info_v2_py
@@ -25381,8 +25235,8 @@ cpdef object device_get_nvlink_info(intptr_t device):
     else:
         info_v1_py = NvLinkInfo_v1()
         info = <nvmlNvLinkInfo_t *><intptr_t>(info_v1_py._get_ptr())
-        info.version = sizeof(nvmlNvLinkInfo_v1_t) | (1 << 24)
         with nogil:
+            info.version = sizeof(nvmlNvLinkInfo_v1_t) | (1 << 24)
             __status__ = nvmlDeviceGetNvLinkInfo(<Device>device, info)
         check_status(__status__)
         return info_v1_py
@@ -25416,10 +25270,10 @@ cpdef system_register_events(unsigned long long event_types, intptr_t event_set)
         event_set (intptr_t): The system event set handle.
     """
     cdef nvmlSystemRegisterEventRequest_v1_t[1] request
-    request[0].set = <SystemEventSet>event_set
-    request[0].eventTypes = event_types
     with nogil:
         request[0].version = sizeof(nvmlSystemRegisterEventRequest_v1_t) | (1 << 24)
+        request[0].set = <SystemEventSet>event_set
+        request[0].eventTypes = event_types
         __status__ = nvmlSystemRegisterEvents(<nvmlSystemRegisterEventRequest_t*>request)
     check_status(__status__)
 
@@ -25437,12 +25291,12 @@ cpdef object system_event_set_wait(intptr_t event_set, unsigned int timeout_ms,
     """
     cdef nvmlSystemEventSetWaitRequest_v1_t[1] request
     cdef SystemEventData_v1 event_data = SystemEventData_v1(buffer_size)
-    request[0].timeoutms = timeout_ms
-    request[0].set = <SystemEventSet>event_set
     request[0].data = <nvmlSystemEventData_v1_t *><intptr_t>(event_data._get_ptr())
-    request[0].dataSize = buffer_size
     with nogil:
         request[0].version = sizeof(nvmlSystemEventSetWaitRequest_v1_t) | (1 << 24)
+        request[0].timeoutms = timeout_ms
+        request[0].set = <SystemEventSet>event_set
+        request[0].dataSize = buffer_size
         __status__ = nvmlSystemEventSetWait(<nvmlSystemEventSetWaitRequest_t*>request)
     check_status(__status__)
     event_data._data.resize((request[0].numEvent,))
@@ -25462,9 +25316,9 @@ cpdef unsigned int device_get_fan_speed_rpm(intptr_t device, unsigned int fan):
     .. seealso:: `nvmlDeviceGetFanSpeedRPM`
     """
     cdef nvmlFanSpeedInfo_v1_t[1] fan_speed
-    fan_speed[0].version = sizeof(nvmlFanSpeedInfo_v1_t) | (1 << 24)
-    fan_speed[0].fan = fan
     with nogil:
+        fan_speed[0].version = sizeof(nvmlFanSpeedInfo_v1_t) | (1 << 24)
+        fan_speed[0].fan = fan
         __status__ = nvmlDeviceGetFanSpeedRPM(<Device>device, fan_speed)
     check_status(__status__)
     return fan_speed[0].speed
@@ -25482,8 +25336,8 @@ cpdef int device_get_margin_temperature(intptr_t device):
     .. seealso:: `nvmlDeviceGetMarginTemperature`
     """
     cdef nvmlMarginTemperature_v1_t[1] margin_temp_info
-    margin_temp_info[0].version = sizeof(nvmlMarginTemperature_v1_t) | (1 << 24)
     with nogil:
+        margin_temp_info[0].version = sizeof(nvmlMarginTemperature_v1_t) | (1 << 24)
         __status__ = nvmlDeviceGetMarginTemperature(<Device>device, margin_temp_info)
     check_status(__status__)
     return margin_temp_info[0].marginTemperature
@@ -25502,10 +25356,10 @@ cpdef object device_get_clock_offsets(intptr_t device, nvmlClockType_t clock_typ
     """
     cdef ClockOffset_v1 info_py = ClockOffset_v1()
     cdef nvmlClockOffset_v1_t *info = <nvmlClockOffset_v1_t *><intptr_t>(info_py._get_ptr())
-    info.version = sizeof(nvmlClockOffset_v1_t) | (1 << 24)
-    info.type = clock_type
-    info.pstate = pstate
     with nogil:
+        info.version = sizeof(nvmlClockOffset_v1_t) | (1 << 24)
+        info.type = clock_type
+        info.pstate = pstate
         __status__ = nvmlDeviceGetClockOffsets(<Device>device, info)
     check_status(__status__)
     return info_py
@@ -25558,8 +25412,8 @@ cpdef unsigned int vgpu_instance_get_placement_id(unsigned int vgpu_instance):
     .. seealso:: `nvmlVgpuInstanceGetPlacementId`
     """
     cdef nvmlVgpuPlacementId_t[1] p_placement
-    p_placement[0].version = sizeof(nvmlVgpuPlacementId_v1_t) | (1 << 24)
     with nogil:
+        p_placement[0].version = sizeof(nvmlVgpuPlacementId_v1_t) | (1 << 24)
         __status__ = nvmlVgpuInstanceGetPlacementId(<nvmlVgpuInstance_t>vgpu_instance, p_placement)
     check_status(__status__)
     return p_placement[0].placementId
@@ -25577,8 +25431,8 @@ cpdef object device_get_capabilities(intptr_t device):
     .. seealso:: `nvmlDeviceGetCapabilities`
     """
     cdef nvmlDeviceCapabilities_t[1] caps
-    caps[0].version = sizeof(nvmlDeviceCapabilities_v1_t) | (1 << 24)
     with nogil:
+        caps[0].version = sizeof(nvmlDeviceCapabilities_v1_t) | (1 << 24)
         __status__ = nvmlDeviceGetCapabilities(<Device>device, caps)
     check_status(__status__)
     return caps[0].capMask
@@ -25621,8 +25475,7 @@ cpdef tuple device_get_dram_encryption_mode(intptr_t device):
     cdef nvmlDramEncryptionInfo_t current
     cdef nvmlDramEncryptionInfo_t pending
     with nogil:
-        current.version = sizeof(nvmlDramEncryptionInfo_t) | (1 << 24)
-        pending.version = sizeof(nvmlDramEncryptionInfo_t) | (1 << 24)
+        current.version = pending.version = sizeof(nvmlDramEncryptionInfo_t) | (1 << 24)
         __status__ = nvmlDeviceGetDramEncryptionMode(<Device>device, &current, &pending)
     check_status(__status__)
     return (current.encryptionState, pending.encryptionState)
@@ -25659,8 +25512,8 @@ cpdef object device_get_gpu_instance_profile_info_by_id_v(intptr_t device, unsig
     """
     cdef GpuInstanceProfileInfo_v3 info_py = GpuInstanceProfileInfo_v3()
     cdef nvmlGpuInstanceProfileInfo_v3_t *info = <nvmlGpuInstanceProfileInfo_v3_t *><intptr_t>(info_py._get_ptr())
-    info.version = sizeof(nvmlGpuInstanceProfileInfo_v3_t) | (3 << 24)
     with nogil:
+        info.version = sizeof(nvmlGpuInstanceProfileInfo_v3_t) | (3 << 24)
         __status__ = nvmlDeviceGetGpuInstanceProfileInfoByIdV(<Device>device, profile_id, <nvmlGpuInstanceProfileInfo_v2_t *>info)
     check_status(__status__)
     return info_py
@@ -25680,8 +25533,8 @@ cpdef object device_get_gpu_instance_profile_info_v(intptr_t device, unsigned in
     """
     cdef GpuInstanceProfileInfo_v3 info_py = GpuInstanceProfileInfo_v3()
     cdef nvmlGpuInstanceProfileInfo_v3_t *info = <nvmlGpuInstanceProfileInfo_v3_t *><intptr_t>(info_py._get_ptr())
-    info.version = sizeof(nvmlGpuInstanceProfileInfo_v3_t) | (3 << 24)
     with nogil:
+        info.version = sizeof(nvmlGpuInstanceProfileInfo_v3_t) | (3 << 24)
         __status__ = nvmlDeviceGetGpuInstanceProfileInfoV(<Device>device, profile, <nvmlGpuInstanceProfileInfo_v2_t *>info)
     check_status(__status__)
     return info_py
@@ -25736,8 +25589,8 @@ cpdef unsigned long long device_get_pdi(intptr_t device):
     .. seealso:: `nvmlDeviceGetPdi`
     """
     cdef nvmlPdi_v1_t[1] pdi
-    pdi[0].version = sizeof(nvmlPdi_v1_t) | (1 << 24)
     with nogil:
+        pdi[0].version = sizeof(nvmlPdi_v1_t) | (1 << 24)
         __status__ = nvmlDeviceGetPdi(<Device>device, pdi)
     check_status(__status__)
     return pdi[0].value
@@ -25755,8 +25608,8 @@ cpdef str device_get_performance_modes(intptr_t device):
     .. seealso:: `nvmlDeviceGetPerformanceModes`
     """
     cdef nvmlDevicePerfModes_t[1] perf_modes
-    perf_modes[0].version = sizeof(nvmlDevicePerfModes_v1_t) | (1 << 24)
     with nogil:
+        perf_modes[0].version = sizeof(nvmlDevicePerfModes_v1_t) | (1 << 24)
         __status__ = nvmlDeviceGetPerformanceModes(<Device>device, perf_modes)
     check_status(__status__)
     return cpython.PyUnicode_FromString(perf_modes[0].str)
@@ -25988,7 +25841,7 @@ cpdef device_set_nvlink_device_low_power_threshold(intptr_t device, unsigned int
     check_status(__status__)
 
 
-cpdef unsigned int device_set_power_management_limit_v2(intptr_t device, int power_scope, unsigned int power_value_mw):
+cpdef device_set_power_management_limit_v2(intptr_t device, int power_scope, unsigned int power_value_mw):
     """Set new power limit of this device.
 
     Args:
@@ -26109,3 +25962,22 @@ cpdef unsigned int vgpu_type_get_max_instances_per_gpu_instance(unsigned int vgp
         __status__ = nvmlVgpuTypeGetMaxInstancesPerGpuInstance(max_instance)
     check_status(__status__)
     return max_instance[0].maxInstancePerGI
+
+
+cpdef str device_get_current_clock_freqs(intptr_t device):
+    """Retrieves a string with the associated current GPU Clock and Memory Clock values.
+
+    Args:
+        device (intptr_t): The identifier of the target device.
+
+    Returns:
+        str: The current clock frequency string.
+
+    .. seealso:: `nvmlDeviceGetCurrentClockFreqs`
+    """
+    cdef nvmlDeviceCurrentClockFreqs_t[1] current_clock_freqs
+    with nogil:
+        current_clock_freqs[0].version = sizeof(nvmlDeviceCurrentClockFreqs_v1_t) | (1 << 24)
+        __status__ = nvmlDeviceGetCurrentClockFreqs(<Device>device, current_clock_freqs)
+    check_status(__status__)
+    return cpython.PyUnicode_FromString(current_clock_freqs[0].str)
diff --git a/cuda_bindings/tests/nvml/test_device.py b/cuda_bindings/tests/nvml/test_device.py
index bd3d00b7a0..96f1777c5f 100644
--- a/cuda_bindings/tests/nvml/test_device.py
+++ b/cuda_bindings/tests/nvml/test_device.py
@@ -39,8 +39,7 @@ def test_clk_mon_status_t():
 def test_current_clock_freqs(all_devices):
     for device in all_devices:
         clk_freqs = nvml.device_get_current_clock_freqs(device)
-        assert isinstance(clk_freqs, nvml.DeviceCurrentClockFreqs_v1)
-        assert isinstance(clk_freqs.str, str)
+        assert isinstance(clk_freqs, str)
 
 
 def test_grid_licensable_features(all_devices):

From 43d1c8534a56d9ea0370fdff72519a52b0ceada1 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Wed, 28 Jan 2026 14:14:44 -0500
Subject: [PATCH 2/4] cuda.bindings.nvml: Make public

---
 .../_internal/{_nvml.pxd => nvml.pxd}         |   2 +-
 .../{_nvml_linux.pyx => nvml_linux.pyx}       |   0
 .../{_nvml_windows.pyx => nvml_windows.pyx}   |   0
 .../cuda/bindings/_test_helpers/arch_check.py |   2 +-
 .../cuda/bindings/{cy_nvml.pxd => cynvml.pxd} |   0
 .../cuda/bindings/{cy_nvml.pyx => cynvml.pyx} |   2 +-
 .../cuda/bindings/{_nvml.pxd => nvml.pxd}     |   2 +-
 .../cuda/bindings/{_nvml.pyx => nvml.pyx}     | 124 +++++++++---------
 cuda_bindings/tests/nvml/conftest.py          |   4 +-
 cuda_bindings/tests/nvml/test_compute_mode.py |   2 +-
 cuda_bindings/tests/nvml/test_cuda.py         |   4 +-
 cuda_bindings/tests/nvml/test_device.py       |   2 +-
 cuda_bindings/tests/nvml/test_gpu.py          |   2 +-
 cuda_bindings/tests/nvml/test_init.py         |   4 +-
 cuda_bindings/tests/nvml/test_nvlink.py       |   4 +-
 .../tests/nvml/test_page_retirement.py        |   4 +-
 cuda_bindings/tests/nvml/test_pci.py          |   2 +-
 cuda_bindings/tests/nvml/test_pynvml.py       |   4 +-
 cuda_bindings/tests/nvml/util.py              |   4 +-
 cuda_core/cuda/core/system/_device.pyx        |   2 +-
 cuda_core/cuda/core/system/_nvml_context.pyx  |   4 +-
 cuda_core/cuda/core/system/_system.pyx        |   4 +-
 cuda_core/cuda/core/system/_system_events.pyx |   4 +-
 cuda_core/cuda/core/system/exceptions.py      |   4 +-
 cuda_core/tests/system/test_system_device.py  |   2 +-
 25 files changed, 96 insertions(+), 92 deletions(-)
 rename cuda_bindings/cuda/bindings/_internal/{_nvml.pxd => nvml.pxd} (99%)
 rename cuda_bindings/cuda/bindings/_internal/{_nvml_linux.pyx => nvml_linux.pyx} (100%)
 rename cuda_bindings/cuda/bindings/_internal/{_nvml_windows.pyx => nvml_windows.pyx} (100%)
 rename cuda_bindings/cuda/bindings/{cy_nvml.pxd => cynvml.pxd} (100%)
 rename cuda_bindings/cuda/bindings/{cy_nvml.pyx => cynvml.pyx} (99%)
 rename cuda_bindings/cuda/bindings/{_nvml.pxd => nvml.pxd} (99%)
 rename cuda_bindings/cuda/bindings/{_nvml.pyx => nvml.pyx} (99%)

diff --git a/cuda_bindings/cuda/bindings/_internal/_nvml.pxd b/cuda_bindings/cuda/bindings/_internal/nvml.pxd
similarity index 99%
rename from cuda_bindings/cuda/bindings/_internal/_nvml.pxd
rename to cuda_bindings/cuda/bindings/_internal/nvml.pxd
index b04df67558..e62b29b2c8 100644
--- a/cuda_bindings/cuda/bindings/_internal/_nvml.pxd
+++ b/cuda_bindings/cuda/bindings/_internal/nvml.pxd
@@ -4,7 +4,7 @@
 #
 # This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
 
-from ..cy_nvml cimport *
+from ..cynvml cimport *
 
 
 ###############################################################################
diff --git a/cuda_bindings/cuda/bindings/_internal/_nvml_linux.pyx b/cuda_bindings/cuda/bindings/_internal/nvml_linux.pyx
similarity index 100%
rename from cuda_bindings/cuda/bindings/_internal/_nvml_linux.pyx
rename to cuda_bindings/cuda/bindings/_internal/nvml_linux.pyx
diff --git a/cuda_bindings/cuda/bindings/_internal/_nvml_windows.pyx b/cuda_bindings/cuda/bindings/_internal/nvml_windows.pyx
similarity index 100%
rename from cuda_bindings/cuda/bindings/_internal/_nvml_windows.pyx
rename to cuda_bindings/cuda/bindings/_internal/nvml_windows.pyx
diff --git a/cuda_bindings/cuda/bindings/_test_helpers/arch_check.py b/cuda_bindings/cuda/bindings/_test_helpers/arch_check.py
index f9666c8a18..e063fd4ff8 100644
--- a/cuda_bindings/cuda/bindings/_test_helpers/arch_check.py
+++ b/cuda_bindings/cuda/bindings/_test_helpers/arch_check.py
@@ -5,7 +5,7 @@
 from contextlib import contextmanager
 
 import pytest
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 
 @contextmanager
diff --git a/cuda_bindings/cuda/bindings/cy_nvml.pxd b/cuda_bindings/cuda/bindings/cynvml.pxd
similarity index 100%
rename from cuda_bindings/cuda/bindings/cy_nvml.pxd
rename to cuda_bindings/cuda/bindings/cynvml.pxd
diff --git a/cuda_bindings/cuda/bindings/cy_nvml.pyx b/cuda_bindings/cuda/bindings/cynvml.pyx
similarity index 99%
rename from cuda_bindings/cuda/bindings/cy_nvml.pyx
rename to cuda_bindings/cuda/bindings/cynvml.pyx
index 82b4bf88bd..b395a6ac53 100644
--- a/cuda_bindings/cuda/bindings/cy_nvml.pyx
+++ b/cuda_bindings/cuda/bindings/cynvml.pyx
@@ -4,7 +4,7 @@
 #
 # This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
 
-from ._internal cimport _nvml as _nvml
+from ._internal cimport nvml as _nvml
 
 
 ###############################################################################
diff --git a/cuda_bindings/cuda/bindings/_nvml.pxd b/cuda_bindings/cuda/bindings/nvml.pxd
similarity index 99%
rename from cuda_bindings/cuda/bindings/_nvml.pxd
rename to cuda_bindings/cuda/bindings/nvml.pxd
index 36ab860e64..6cf00356c8 100644
--- a/cuda_bindings/cuda/bindings/_nvml.pxd
+++ b/cuda_bindings/cuda/bindings/nvml.pxd
@@ -6,7 +6,7 @@
 
 from libc.stdint cimport intptr_t
 
-from .cy_nvml cimport *
+from .cynvml cimport *
 
 
 ###############################################################################
diff --git a/cuda_bindings/cuda/bindings/_nvml.pyx b/cuda_bindings/cuda/bindings/nvml.pyx
similarity index 99%
rename from cuda_bindings/cuda/bindings/_nvml.pyx
rename to cuda_bindings/cuda/bindings/nvml.pyx
index e175c5d2f2..1a75193b2c 100644
--- a/cuda_bindings/cuda/bindings/_nvml.pyx
+++ b/cuda_bindings/cuda/bindings/nvml.pyx
@@ -37,6 +37,10 @@ cdef __from_data(data, dtype_name, expected_dtype, lowpp_type):
 
 
 
+cdef inline unsigned int NVML_VERSION_STRUCT(const unsigned int size, const unsigned int ver) nogil:
+    return (size | (ver << 24))
+
+
 ###############################################################################
 # Enum
 ###############################################################################
@@ -20200,7 +20204,7 @@ cpdef object device_get_pci_info_ext(intptr_t device):
     """
     cdef PciInfoExt_v1 pci_py = PciInfoExt_v1()
     cdef nvmlPciInfoExt_t *pci = <nvmlPciInfoExt_t *><intptr_t>(pci_py._get_ptr())
-    pci.version = sizeof(nvmlPciInfoExt_v1_t) | (1 << 24)
+    pci.version = NVML_VERSION_STRUCT(sizeof(nvmlPciInfoExt_v1_t), 1)
     with nogil:
         __status__ = nvmlDeviceGetPciInfoExt(<Device>device, pci)
     check_status(__status__)
@@ -20643,7 +20647,7 @@ cpdef object device_get_cooler_info(intptr_t device):
     """
     cdef CoolerInfo_v1 cooler_info_py = CoolerInfo_v1()
     cdef nvmlCoolerInfo_t *cooler_info = <nvmlCoolerInfo_t *><intptr_t>(cooler_info_py._get_ptr())
-    cooler_info.version = sizeof(nvmlCoolerInfo_v1_t) | (1 << 24)
+    cooler_info.version = NVML_VERSION_STRUCT(sizeof(nvmlCoolerInfo_v1_t), 1)
     with nogil:
         __status__ = nvmlDeviceGetCoolerInfo(<Device>device, cooler_info)
     check_status(__status__)
@@ -21027,7 +21031,7 @@ cpdef object device_get_memory_info_v2(intptr_t device):
     """
     cdef Memory_v2 memory_py = Memory_v2()
     cdef nvmlMemory_v2_t *memory = <nvmlMemory_v2_t *><intptr_t>(memory_py._get_ptr())
-    memory.version = sizeof(nvmlMemory_v2_t) | (2 << 24)
+    memory.version = NVML_VERSION_STRUCT(sizeof(nvmlMemory_v2_t), 2)
     with nogil:
         __status__ = nvmlDeviceGetMemoryInfo_v2(<Device>device, memory)
     check_status(__status__)
@@ -21847,7 +21851,7 @@ cpdef object system_get_conf_compute_settings():
     """
     cdef SystemConfComputeSettings_v1 settings_py = SystemConfComputeSettings_v1()
     cdef nvmlSystemConfComputeSettings_t *settings = <nvmlSystemConfComputeSettings_t *><intptr_t>(settings_py._get_ptr())
-    settings.version = sizeof(nvmlSystemConfComputeSettings_v1_t) | (1 << 24)
+    settings.version = NVML_VERSION_STRUCT(sizeof(nvmlSystemConfComputeSettings_v1_t), 1)
     with nogil:
         __status__ = nvmlSystemGetConfComputeSettings(settings)
     check_status(__status__)
@@ -21907,7 +21911,7 @@ cpdef object device_get_sram_ecc_error_status(intptr_t device):
     """
     cdef EccSramErrorStatus_v1 status_py = EccSramErrorStatus_v1()
     cdef nvmlEccSramErrorStatus_t *status = <nvmlEccSramErrorStatus_t *><intptr_t>(status_py._get_ptr())
-    status.version = sizeof(nvmlEccSramErrorStatus_v1_t) | (1 << 24)
+    status.version = NVML_VERSION_STRUCT(sizeof(nvmlEccSramErrorStatus_v1_t), 1)
     with nogil:
         __status__ = nvmlDeviceGetSramEccErrorStatus(<Device>device, status)
     check_status(__status__)
@@ -22580,7 +22584,7 @@ cpdef object device_get_nvlink_supported_bw_modes(intptr_t device):
     """
     cdef NvlinkSupportedBwModes_v1 supported_bw_mode_py = NvlinkSupportedBwModes_v1()
     cdef nvmlNvlinkSupportedBwModes_t *supported_bw_mode = <nvmlNvlinkSupportedBwModes_t *><intptr_t>(supported_bw_mode_py._get_ptr())
-    supported_bw_mode.version = sizeof(nvmlNvlinkSupportedBwModes_v1_t) | (1 << 24)
+    supported_bw_mode.version = NVML_VERSION_STRUCT(sizeof(nvmlNvlinkSupportedBwModes_v1_t), 1)
     with nogil:
         __status__ = nvmlDeviceGetNvlinkSupportedBwModes(<Device>device, supported_bw_mode)
     check_status(__status__)
@@ -22600,7 +22604,7 @@ cpdef object device_get_nvlink_bw_mode(intptr_t device):
     """
     cdef NvlinkGetBwMode_v1 get_bw_mode_py = NvlinkGetBwMode_v1()
     cdef nvmlNvlinkGetBwMode_t *get_bw_mode = <nvmlNvlinkGetBwMode_t *><intptr_t>(get_bw_mode_py._get_ptr())
-    get_bw_mode.version = sizeof(nvmlNvlinkGetBwMode_v1_t) | (1 << 24)
+    get_bw_mode.version = NVML_VERSION_STRUCT(sizeof(nvmlNvlinkGetBwMode_v1_t), 1)
     with nogil:
         __status__ = nvmlDeviceGetNvlinkBwMode(<Device>device, get_bw_mode)
     check_status(__status__)
@@ -22616,7 +22620,7 @@ cpdef device_set_nvlink_bw_mode(intptr_t device, intptr_t set_bw_mode):
 
     .. seealso:: `nvmlDeviceSetNvlinkBwMode`
     """
-    set_bw_mode.version = sizeof(nvmlNvlinkSetBwMode_v1_t) | (1 << 24)
+    set_bw_mode.version = NVML_VERSION_STRUCT(sizeof(nvmlNvlinkSetBwMode_v1_t), 1)
     with nogil:
         __status__ = nvmlDeviceSetNvlinkBwMode(<Device>device, <nvmlNvlinkSetBwMode_t*>set_bw_mode)
     check_status(__status__)
@@ -23141,7 +23145,7 @@ cpdef object vgpu_type_get_bar1_info(unsigned int vgpu_type_id):
     """
     cdef VgpuTypeBar1Info_v1 bar1info_py = VgpuTypeBar1Info_v1()
     cdef nvmlVgpuTypeBar1Info_t *bar1info = <nvmlVgpuTypeBar1Info_t *><intptr_t>(bar1info_py._get_ptr())
-    bar1info.version = sizeof(nvmlVgpuTypeBar1Info_v1_t) | (1 << 24)
+    bar1info.version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuTypeBar1Info_v1_t), 1)
     with nogil:
         __status__ = nvmlVgpuTypeGetBAR1Info(<nvmlVgpuTypeId_t>vgpu_type_id, bar1info)
     check_status(__status__)
@@ -23473,7 +23477,7 @@ cpdef gpu_instance_set_vgpu_scheduler_state(intptr_t gpu_instance, intptr_t p_sc
 
     .. seealso:: `nvmlGpuInstanceSetVgpuSchedulerState`
     """
-    (<nvmlVgpuSchedulerState_t*>p_scheduler).version = sizeof(nvmlVgpuSchedulerState_v1_t) | (1 << 24)
+    (<nvmlVgpuSchedulerState_t*>p_scheduler).version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuSchedulerState_v1_t), 1)
     with nogil:
         __status__ = nvmlGpuInstanceSetVgpuSchedulerState(<GpuInstance>gpu_instance, <nvmlVgpuSchedulerState_t*>p_scheduler)
     check_status(__status__)
@@ -23492,7 +23496,7 @@ cpdef object gpu_instance_get_vgpu_scheduler_state(intptr_t gpu_instance):
     """
     cdef VgpuSchedulerStateInfo_v1 p_scheduler_state_info_py = VgpuSchedulerStateInfo_v1()
     cdef nvmlVgpuSchedulerStateInfo_t *p_scheduler_state_info = <nvmlVgpuSchedulerStateInfo_t *><intptr_t>(p_scheduler_state_info_py._get_ptr())
-    p_scheduler_state_info.version = sizeof(nvmlVgpuSchedulerState_v1_t) | (1 << 24)
+    p_scheduler_state_info.version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuSchedulerState_v1_t), 1)
     with nogil:
         __status__ = nvmlGpuInstanceGetVgpuSchedulerState(<GpuInstance>gpu_instance, p_scheduler_state_info)
     check_status(__status__)
@@ -23512,7 +23516,7 @@ cpdef object gpu_instance_get_vgpu_scheduler_log(intptr_t gpu_instance):
     """
     cdef VgpuSchedulerLogInfo_v1 p_scheduler_log_info_py = VgpuSchedulerLogInfo_v1()
     cdef nvmlVgpuSchedulerLogInfo_t *p_scheduler_log_info = <nvmlVgpuSchedulerLogInfo_t *><intptr_t>(p_scheduler_log_info_py._get_ptr())
-    p_scheduler_log_info.version = sizeof(nvmlVgpuSchedulerLogInfo_v1_t) | (1 << 24)
+    p_scheduler_log_info.version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuSchedulerLogInfo_v1_t), 1)
     with nogil:
         __status__ = nvmlGpuInstanceGetVgpuSchedulerLog(<GpuInstance>gpu_instance, p_scheduler_log_info)
     check_status(__status__)
@@ -23962,7 +23966,7 @@ cpdef object gpu_instance_get_compute_instance_profile_info_v(intptr_t gpu_insta
     """
     cdef ComputeInstanceProfileInfo_v2 info_py = ComputeInstanceProfileInfo_v2()
     cdef nvmlComputeInstanceProfileInfo_v2_t *info = <nvmlComputeInstanceProfileInfo_v2_t *><intptr_t>(info_py._get_ptr())
-    info.version = sizeof(nvmlComputeInstanceProfileInfo_v2_t) | (2 << 24)
+    info.version = NVML_VERSION_STRUCT(sizeof(nvmlComputeInstanceProfileInfo_v2_t), 2)
     with nogil:
         __status__ = nvmlGpuInstanceGetComputeInstanceProfileInfoV(<GpuInstance>gpu_instance, profile, eng_profile, info)
     check_status(__status__)
@@ -24265,7 +24269,7 @@ cpdef object device_get_addressing_mode(intptr_t device):
     """
     cdef DeviceAddressingMode_v1 mode_py = DeviceAddressingMode_v1()
     cdef nvmlDeviceAddressingMode_t *mode = <nvmlDeviceAddressingMode_t *><intptr_t>(mode_py._get_ptr())
-    mode.version = sizeof(nvmlDeviceAddressingMode_v1_t) | (1 << 24)
+    mode.version = NVML_VERSION_STRUCT(sizeof(nvmlDeviceAddressingMode_v1_t), 1)
     with nogil:
         __status__ = nvmlDeviceGetAddressingMode(<Device>device, mode)
     check_status(__status__)
@@ -24285,7 +24289,7 @@ cpdef object device_get_repair_status(intptr_t device):
     """
     cdef RepairStatus_v1 repair_status_py = RepairStatus_v1()
     cdef nvmlRepairStatus_t *repair_status = <nvmlRepairStatus_t *><intptr_t>(repair_status_py._get_ptr())
-    repair_status.version = sizeof(nvmlRepairStatus_v1_t) | (1 << 24)
+    repair_status.version = NVML_VERSION_STRUCT(sizeof(nvmlRepairStatus_v1_t), 1)
     with nogil:
         __status__ = nvmlDeviceGetRepairStatus(<Device>device, repair_status)
     check_status(__status__)
@@ -24357,7 +24361,7 @@ cpdef str system_get_driver_branch():
     # Calculation copied from the macro NVML_STRUCT_VERSION in nvml.h
     # Needs to be updated if the version of the nvmlSystemDriverBranchInfo_t
     # struct changes in the future.
-    info.version = sizeof(nvmlSystemDriverBranchInfo_v1_t) | (1 << 24)
+    info.version = NVML_VERSION_STRUCT(sizeof(nvmlSystemDriverBranchInfo_v1_t), 1)
     cdef unsigned int length = 80
     with nogil:
         __status__ = nvmlSystemGetDriverBranch(&info, length)
@@ -24434,7 +24438,7 @@ cpdef int device_get_temperature_v(intptr_t device, nvmlTemperatureSensors_t sen
     cdef nvmlTemperature_v1_t[1] temperature
 
     with nogil:
-        temperature[0].version = sizeof(nvmlTemperature_v1_t) | (1 << 24)
+        temperature[0].version = NVML_VERSION_STRUCT(sizeof(nvmlTemperature_v1_t), 1)
         temperature[0].sensorType = <nvmlTemperatureSensors_t>sensorType
         __status__ = nvmlDeviceGetTemperatureV(<Device>device, temperature)
     check_status(__status__)
@@ -24475,7 +24479,7 @@ cpdef object device_get_running_process_detail_list(intptr_t device, unsigned in
 
     # Get size of array
     with nogil:
-        ptr.version = sizeof(nvmlProcessDetailList_v1_t) | (1 << 24)
+        ptr.version = NVML_VERSION_STRUCT(sizeof(nvmlProcessDetailList_v1_t), 1)
         ptr.mode = mode
         ptr.numProcArrayEntries = 0
         ptr.procArray = NULL
@@ -24563,7 +24567,7 @@ cpdef object device_get_processes_utilization_info(intptr_t device, unsigned lon
 
     # Get size of array
     with nogil:
-        ptr.version = sizeof(nvmlProcessesUtilizationInfo_v1_t) | (1 << 24)
+        ptr.version = NVML_VERSION_STRUCT(sizeof(nvmlProcessesUtilizationInfo_v1_t), 1)
         ptr.processSamplesCount = 0
         ptr.lastSeenTimeStamp = last_seen_time_stamp
         ptr.procUtilArray = NULL
@@ -24779,7 +24783,7 @@ cpdef object gpu_instance_get_creatable_vgpus(intptr_t gpu_instance):
 
     # Get size of array
     with nogil:
-        ptr.version = sizeof(nvmlVgpuTypeIdInfo_v1_t) | (1 << 24)
+        ptr.version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuTypeIdInfo_v1_t), 1)
         ptr.vgpuCount = 0
         ptr.vgpuTypeIds = NULL
         __status__ = nvmlGpuInstanceGetCreatableVgpus(<GpuInstance>gpu_instance, ptr)
@@ -24811,7 +24815,7 @@ cpdef object gpu_instance_get_active_vgpus(intptr_t gpu_instance):
     cdef nvmlActiveVgpuInstanceInfo_v1_t *ptr = <nvmlActiveVgpuInstanceInfo_v1_t *>activeVgpuInfo._get_ptr()
 
     with nogil:
-        ptr.version = sizeof(nvmlActiveVgpuInstanceInfo_v1_t) | (1 << 24)
+        ptr.version = NVML_VERSION_STRUCT(sizeof(nvmlActiveVgpuInstanceInfo_v1_t), 1)
         ptr.vgpuCount = 0
         ptr.vgpuInstances = NULL
         __status__ = nvmlGpuInstanceGetActiveVgpus(<GpuInstance>gpu_instance, ptr)
@@ -24846,7 +24850,7 @@ cpdef object gpu_instance_get_vgpu_type_creatable_placements(intptr_t gpu_instan
 
     # Get size of array
     with nogil:
-        ptr.version = sizeof(nvmlVgpuCreatablePlacementInfo_v1_t) | (1 << 24)
+        ptr.version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuCreatablePlacementInfo_v1_t), 1)
         ptr.count = 0
         ptr.placementIds = NULL
         ptr.vgpuTypeId = vgpu_type_id
@@ -24883,7 +24887,7 @@ cpdef object device_get_vgpu_type_creatable_placements(intptr_t device, unsigned
 
     # Get size of array
     with nogil:
-        ptr.version = sizeof(nvmlVgpuPlacementList_v2_t) | (2 << 24)
+        ptr.version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuPlacementList_v2_t), 2)
         ptr.count = 0
         ptr.placementIds = NULL
         ptr.mode = mode
@@ -25003,7 +25007,7 @@ cpdef object device_get_vgpu_instances_utilization_info(intptr_t device):
     cdef nvmlVgpuInstancesUtilizationInfo_v1_t *ptr = <nvmlVgpuInstancesUtilizationInfo_t *>vgpuUtilInfo._get_ptr()
 
     with nogil:
-        ptr.version = sizeof(nvmlVgpuInstancesUtilizationInfo_v1_t) | (1 << 24)
+        ptr.version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuInstancesUtilizationInfo_v1_t), 1)
         ptr.vgpuInstanceCount = 0
         ptr.vgpuUtilArray = NULL
         __status__ = nvmlDeviceGetVgpuInstancesUtilizationInfo(<Device>device, ptr)
@@ -25036,7 +25040,7 @@ cpdef object device_get_vgpu_processes_utilization_info(intptr_t device, unsigne
     cdef nvmlVgpuProcessesUtilizationInfo_v1_t *ptr = <nvmlVgpuProcessesUtilizationInfo_v1_t *>vgpuProcUtilInfo._get_ptr()
 
     with nogil:
-        ptr.version = sizeof(nvmlVgpuProcessesUtilizationInfo_v1_t) | (1 << 24)
+        ptr.version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuProcessesUtilizationInfo_v1_t), 1)
         ptr.vgpuProcessCount = 0
         ptr.vgpuProcUtilArray = NULL
         ptr.lastSeenTimeStamp = last_seen_time_stamp
@@ -25122,7 +25126,7 @@ cpdef object device_get_sram_unique_uncorrected_ecc_error_counts(intptr_t device
     cdef nvmlEccSramUniqueUncorrectedErrorCounts_v1_t *ptr = <nvmlEccSramUniqueUncorrectedErrorCounts_v1_t *>errorCounts._get_ptr()
 
     with nogil:
-        ptr.version = sizeof(nvmlEccSramUniqueUncorrectedErrorCounts_v1_t) | (1 << 24)
+        ptr.version = NVML_VERSION_STRUCT(sizeof(nvmlEccSramUniqueUncorrectedErrorCounts_v1_t), 1)
         ptr.entryCount = 0
         ptr.entries = NULL
         __status__ = nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts(<Device>device, ptr)
@@ -25159,7 +25163,7 @@ cpdef object device_get_gpu_fabric_info_v(intptr_t device):
         gpu_fabric_info_v3_py = GpuFabricInfo_v3()
         gpu_fabric_info = <nvmlGpuFabricInfoV_t *><intptr_t>(gpu_fabric_info_v3_py._get_ptr())
         with nogil:
-            gpu_fabric_info.version = sizeof(nvmlGpuFabricInfo_v3_t) | (3 << 24)
+            gpu_fabric_info.version = NVML_VERSION_STRUCT(sizeof(nvmlGpuFabricInfo_v3_t), 3)
             __status__ = nvmlDeviceGetGpuFabricInfoV(<Device>device, gpu_fabric_info)
         check_status(__status__)
         return gpu_fabric_info_v3_py
@@ -25168,7 +25172,7 @@ cpdef object device_get_gpu_fabric_info_v(intptr_t device):
         gpu_fabric_info_v2_py = GpuFabricInfo_v2()
         gpu_fabric_info = <nvmlGpuFabricInfoV_t *><intptr_t>(gpu_fabric_info_v2_py._get_ptr())
         with nogil:
-            gpu_fabric_info.version = sizeof(nvmlGpuFabricInfo_v2_t) | (2 << 24)
+            gpu_fabric_info.version = NVML_VERSION_STRUCT(sizeof(nvmlGpuFabricInfo_v2_t), 2)
             __status__ = nvmlDeviceGetGpuFabricInfoV(<Device>device, gpu_fabric_info)
         check_status(__status__)
         return gpu_fabric_info_v2_py
@@ -25193,7 +25197,7 @@ cpdef object device_get_platform_info(intptr_t device):
         platform_info_v2_py = PlatformInfo_v2()
         platform_info = <nvmlPlatformInfo_t *><intptr_t>(platform_info_v2_py._get_ptr())
         with nogil:
-            platform_info.version = sizeof(nvmlPlatformInfo_v2_t) | (2 << 24)
+            platform_info.version = NVML_VERSION_STRUCT(sizeof(nvmlPlatformInfo_v2_t), 2)
             __status__ = nvmlDeviceGetPlatformInfo(<Device>device, platform_info)
         check_status(__status__)
         return platform_info_v2_py
@@ -25202,7 +25206,7 @@ cpdef object device_get_platform_info(intptr_t device):
         platform_info_v1_py = PlatformInfo_v1()
         platform_info = <nvmlPlatformInfo_t *><intptr_t>(platform_info_v1_py._get_ptr())
         with nogil:
-            platform_info.version = sizeof(nvmlPlatformInfo_v1_t) | (1 << 24)
+            platform_info.version = NVML_VERSION_STRUCT(sizeof(nvmlPlatformInfo_v1_t), 1)
             __status__ = nvmlDeviceGetPlatformInfo(<Device>device, platform_info)
         check_status(__status__)
         return platform_info_v1_py
@@ -25227,7 +25231,7 @@ cpdef object device_get_nvlink_info(intptr_t device):
         info_v2_py = NvLinkInfo_v2()
         info = <nvmlNvLinkInfo_t *><intptr_t>(info_v2_py._get_ptr())
         with nogil:
-            info.version = sizeof(nvmlNvLinkInfo_v2_t) | (2 << 24)
+            info.version = NVML_VERSION_STRUCT(sizeof(nvmlNvLinkInfo_v2_t), 2)
             __status__ = nvmlDeviceGetNvLinkInfo(<Device>device, info)
         check_status(__status__)
         return info_v2_py
@@ -25236,7 +25240,7 @@ cpdef object device_get_nvlink_info(intptr_t device):
         info_v1_py = NvLinkInfo_v1()
         info = <nvmlNvLinkInfo_t *><intptr_t>(info_v1_py._get_ptr())
         with nogil:
-            info.version = sizeof(nvmlNvLinkInfo_v1_t) | (1 << 24)
+            info.version = NVML_VERSION_STRUCT(sizeof(nvmlNvLinkInfo_v1_t), 1)
             __status__ = nvmlDeviceGetNvLinkInfo(<Device>device, info)
         check_status(__status__)
         return info_v1_py
@@ -25246,7 +25250,7 @@ cpdef intptr_t system_event_set_create():
     """Create an empty set of system events. Event set should be freed by ``nvmlSystemEventSetFree``."""
     cdef nvmlSystemEventSetCreateRequest_v1_t[1] request
     with nogil:
-        request[0].version = sizeof(nvmlSystemEventSetCreateRequest_v1_t) | (1 << 24)
+        request[0].version = NVML_VERSION_STRUCT(sizeof(nvmlSystemEventSetCreateRequest_v1_t), 1)
         __status__ = nvmlSystemEventSetCreate(<nvmlSystemEventSetCreateRequest_t*>request)
     check_status(__status__)
     return <intptr_t>(request[0].set)
@@ -25257,7 +25261,7 @@ cpdef system_event_set_free(intptr_t event_set):
     cdef nvmlSystemEventSetFreeRequest_v1_t[1] request
     request[0].set = <SystemEventSet>event_set
     with nogil:
-        request[0].version = sizeof(nvmlSystemEventSetFreeRequest_v1_t) | (1 << 24)
+        request[0].version = NVML_VERSION_STRUCT(sizeof(nvmlSystemEventSetFreeRequest_v1_t), 1)
         __status__ = nvmlSystemEventSetFree(<nvmlSystemEventSetFreeRequest_t*>request)
     check_status(__status__)
 
@@ -25271,7 +25275,7 @@ cpdef system_register_events(unsigned long long event_types, intptr_t event_set)
     """
     cdef nvmlSystemRegisterEventRequest_v1_t[1] request
     with nogil:
-        request[0].version = sizeof(nvmlSystemRegisterEventRequest_v1_t) | (1 << 24)
+        request[0].version = NVML_VERSION_STRUCT(sizeof(nvmlSystemRegisterEventRequest_v1_t), 1)
         request[0].set = <SystemEventSet>event_set
         request[0].eventTypes = event_types
         __status__ = nvmlSystemRegisterEvents(<nvmlSystemRegisterEventRequest_t*>request)
@@ -25293,7 +25297,7 @@ cpdef object system_event_set_wait(intptr_t event_set, unsigned int timeout_ms,
     cdef SystemEventData_v1 event_data = SystemEventData_v1(buffer_size)
     request[0].data = <nvmlSystemEventData_v1_t *><intptr_t>(event_data._get_ptr())
     with nogil:
-        request[0].version = sizeof(nvmlSystemEventSetWaitRequest_v1_t) | (1 << 24)
+        request[0].version = NVML_VERSION_STRUCT(sizeof(nvmlSystemEventSetWaitRequest_v1_t), 1)
         request[0].timeoutms = timeout_ms
         request[0].set = <SystemEventSet>event_set
         request[0].dataSize = buffer_size
@@ -25317,7 +25321,7 @@ cpdef unsigned int device_get_fan_speed_rpm(intptr_t device, unsigned int fan):
     """
     cdef nvmlFanSpeedInfo_v1_t[1] fan_speed
     with nogil:
-        fan_speed[0].version = sizeof(nvmlFanSpeedInfo_v1_t) | (1 << 24)
+        fan_speed[0].version = NVML_VERSION_STRUCT(sizeof(nvmlFanSpeedInfo_v1_t), 1)
         fan_speed[0].fan = fan
         __status__ = nvmlDeviceGetFanSpeedRPM(<Device>device, fan_speed)
     check_status(__status__)
@@ -25337,7 +25341,7 @@ cpdef int device_get_margin_temperature(intptr_t device):
     """
     cdef nvmlMarginTemperature_v1_t[1] margin_temp_info
     with nogil:
-        margin_temp_info[0].version = sizeof(nvmlMarginTemperature_v1_t) | (1 << 24)
+        margin_temp_info[0].version = NVML_VERSION_STRUCT(sizeof(nvmlMarginTemperature_v1_t), 1)
         __status__ = nvmlDeviceGetMarginTemperature(<Device>device, margin_temp_info)
     check_status(__status__)
     return margin_temp_info[0].marginTemperature
@@ -25357,7 +25361,7 @@ cpdef object device_get_clock_offsets(intptr_t device, nvmlClockType_t clock_typ
     cdef ClockOffset_v1 info_py = ClockOffset_v1()
     cdef nvmlClockOffset_v1_t *info = <nvmlClockOffset_v1_t *><intptr_t>(info_py._get_ptr())
     with nogil:
-        info.version = sizeof(nvmlClockOffset_v1_t) | (1 << 24)
+        info.version = NVML_VERSION_STRUCT(sizeof(nvmlClockOffset_v1_t), 1)
         info.type = clock_type
         info.pstate = pstate
         __status__ = nvmlDeviceGetClockOffsets(<Device>device, info)
@@ -25383,7 +25387,7 @@ cpdef object device_get_vgpu_type_supported_placements(intptr_t device, unsigned
     with nogil:
         p_placement_list.count = 0
         p_placement_list.placementIds = NULL
-        p_placement_list.version = sizeof(nvmlVgpuPlacementList_v2_t) | (2 << 24)
+        p_placement_list.version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuPlacementList_v2_t), 2)
         __status__ = nvmlDeviceGetVgpuTypeSupportedPlacements(<Device>device, <nvmlVgpuTypeId_t>vgpu_type_id, p_placement_list)
     check_status_size(__status__)
 
@@ -25413,7 +25417,7 @@ cpdef unsigned int vgpu_instance_get_placement_id(unsigned int vgpu_instance):
     """
     cdef nvmlVgpuPlacementId_t[1] p_placement
     with nogil:
-        p_placement[0].version = sizeof(nvmlVgpuPlacementId_v1_t) | (1 << 24)
+        p_placement[0].version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuPlacementId_v1_t), 1)
         __status__ = nvmlVgpuInstanceGetPlacementId(<nvmlVgpuInstance_t>vgpu_instance, p_placement)
     check_status(__status__)
     return p_placement[0].placementId
@@ -25432,7 +25436,7 @@ cpdef object device_get_capabilities(intptr_t device):
     """
     cdef nvmlDeviceCapabilities_t[1] caps
     with nogil:
-        caps[0].version = sizeof(nvmlDeviceCapabilities_v1_t) | (1 << 24)
+        caps[0].version = NVML_VERSION_STRUCT(sizeof(nvmlDeviceCapabilities_v1_t), 1)
         __status__ = nvmlDeviceGetCapabilities(<Device>device, caps)
     check_status(__status__)
     return caps[0].capMask
@@ -25475,7 +25479,7 @@ cpdef tuple device_get_dram_encryption_mode(intptr_t device):
     cdef nvmlDramEncryptionInfo_t current
     cdef nvmlDramEncryptionInfo_t pending
     with nogil:
-        current.version = pending.version = sizeof(nvmlDramEncryptionInfo_t) | (1 << 24)
+        current.version = pending.version = NVML_VERSION_STRUCT(sizeof(nvmlDramEncryptionInfo_t), 1)
         __status__ = nvmlDeviceGetDramEncryptionMode(<Device>device, &current, &pending)
     check_status(__status__)
     return (current.encryptionState, pending.encryptionState)
@@ -25492,7 +25496,7 @@ cpdef device_set_dram_encryption_mode(intptr_t device, int dram_encryption):
     """
     cdef nvmlDramEncryptionInfo_t[1] encryption
     with nogil:
-        encryption[0].version = sizeof(nvmlDramEncryptionInfo_t) | (1 << 24)
+        encryption[0].version = NVML_VERSION_STRUCT(sizeof(nvmlDramEncryptionInfo_t), 1)
         encryption[0].encryptionState = <nvmlEnableState_t>dram_encryption
         __status__ = nvmlDeviceSetDramEncryptionMode(<Device>device, encryption)
     check_status(__status__)
@@ -25513,7 +25517,7 @@ cpdef object device_get_gpu_instance_profile_info_by_id_v(intptr_t device, unsig
     cdef GpuInstanceProfileInfo_v3 info_py = GpuInstanceProfileInfo_v3()
     cdef nvmlGpuInstanceProfileInfo_v3_t *info = <nvmlGpuInstanceProfileInfo_v3_t *><intptr_t>(info_py._get_ptr())
     with nogil:
-        info.version = sizeof(nvmlGpuInstanceProfileInfo_v3_t) | (3 << 24)
+        info.version = NVML_VERSION_STRUCT(sizeof(nvmlGpuInstanceProfileInfo_v3_t), 3)
         __status__ = nvmlDeviceGetGpuInstanceProfileInfoByIdV(<Device>device, profile_id, <nvmlGpuInstanceProfileInfo_v2_t *>info)
     check_status(__status__)
     return info_py
@@ -25534,7 +25538,7 @@ cpdef object device_get_gpu_instance_profile_info_v(intptr_t device, unsigned in
     cdef GpuInstanceProfileInfo_v3 info_py = GpuInstanceProfileInfo_v3()
     cdef nvmlGpuInstanceProfileInfo_v3_t *info = <nvmlGpuInstanceProfileInfo_v3_t *><intptr_t>(info_py._get_ptr())
     with nogil:
-        info.version = sizeof(nvmlGpuInstanceProfileInfo_v3_t) | (3 << 24)
+        info.version = NVML_VERSION_STRUCT(sizeof(nvmlGpuInstanceProfileInfo_v3_t), 3)
         __status__ = nvmlDeviceGetGpuInstanceProfileInfoV(<Device>device, profile, <nvmlGpuInstanceProfileInfo_v2_t *>info)
     check_status(__status__)
     return info_py
@@ -25570,7 +25574,7 @@ cpdef intptr_t device_get_handle_by_uuidv(int type, bytes uuid) except? 0:
         raise ValueError("Invalid UUID format specified")
 
     with nogil:
-        uuid_struct[0].version = sizeof(nvmlUUID_v1_t) | (1 << 24)
+        uuid_struct[0].version = NVML_VERSION_STRUCT(sizeof(nvmlUUID_v1_t), 1)
         uuid_struct[0].type = type
         __status__ = nvmlDeviceGetHandleByUUIDV(uuid_struct, &device)
     check_status(__status__)
@@ -25590,7 +25594,7 @@ cpdef unsigned long long device_get_pdi(intptr_t device):
     """
     cdef nvmlPdi_v1_t[1] pdi
     with nogil:
-        pdi[0].version = sizeof(nvmlPdi_v1_t) | (1 << 24)
+        pdi[0].version = NVML_VERSION_STRUCT(sizeof(nvmlPdi_v1_t), 1)
         __status__ = nvmlDeviceGetPdi(<Device>device, pdi)
     check_status(__status__)
     return pdi[0].value
@@ -25609,7 +25613,7 @@ cpdef str device_get_performance_modes(intptr_t device):
     """
     cdef nvmlDevicePerfModes_t[1] perf_modes
     with nogil:
-        perf_modes[0].version = sizeof(nvmlDevicePerfModes_v1_t) | (1 << 24)
+        perf_modes[0].version = NVML_VERSION_STRUCT(sizeof(nvmlDevicePerfModes_v1_t), 1)
         __status__ = nvmlDeviceGetPerformanceModes(<Device>device, perf_modes)
     check_status(__status__)
     return cpython.PyUnicode_FromString(perf_modes[0].str)
@@ -25646,7 +25650,7 @@ cpdef unsigned int device_get_vgpu_heterogeneous_mode(intptr_t device):
     """
     cdef nvmlVgpuHeterogeneousMode_t[1] heterogeneous_mode
     with nogil:
-        heterogeneous_mode[0].version = sizeof(nvmlVgpuHeterogeneousMode_v1_t) | (1 << 24)
+        heterogeneous_mode[0].version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuHeterogeneousMode_v1_t), 1)
         __status__ = nvmlDeviceGetVgpuHeterogeneousMode(<Device>device, heterogeneous_mode)
     check_status(__status__)
     return heterogeneous_mode[0].mode
@@ -25663,7 +25667,7 @@ cpdef device_set_vgpu_heterogeneous_mode(intptr_t device, int mode):
     """
     cdef nvmlVgpuHeterogeneousMode_t[1] heterogeneous_mode
     with nogil:
-        heterogeneous_mode[0].version = sizeof(nvmlVgpuHeterogeneousMode_v1_t) | (1 << 24)
+        heterogeneous_mode[0].version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuHeterogeneousMode_v1_t), 1)
         heterogeneous_mode[0].mode = mode
         __status__ = nvmlDeviceSetVgpuHeterogeneousMode(<Device>device, heterogeneous_mode)
     check_status(__status__)
@@ -25682,7 +25686,7 @@ cpdef object gpu_instance_get_vgpu_heterogeneous_mode(intptr_t gpu_instance):
     """
     cdef nvmlVgpuHeterogeneousMode_t[1] heterogeneous_mode
     with nogil:
-        heterogeneous_mode[0].version = sizeof(nvmlVgpuHeterogeneousMode_v1_t) | (1 << 24)
+        heterogeneous_mode[0].version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuHeterogeneousMode_v1_t), 1)
         __status__ = nvmlGpuInstanceGetVgpuHeterogeneousMode(<GpuInstance>gpu_instance, heterogeneous_mode)
     check_status(__status__)
     return heterogeneous_mode[0].mode
@@ -25699,7 +25703,7 @@ cpdef gpu_instance_set_vgpu_heterogeneous_mode(intptr_t gpu_instance, unsigned i
     """
     cdef nvmlVgpuHeterogeneousMode_t[1] heterogeneous_mode
     with nogil:
-        heterogeneous_mode[0].version = sizeof(nvmlVgpuHeterogeneousMode_v1_t) | (1 << 24)
+        heterogeneous_mode[0].version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuHeterogeneousMode_v1_t), 1)
         heterogeneous_mode[0].mode = mode
         __status__ = nvmlGpuInstanceSetVgpuHeterogeneousMode(<GpuInstance>gpu_instance, heterogeneous_mode)
     check_status(__status__)
@@ -25854,7 +25858,7 @@ cpdef device_set_power_management_limit_v2(intptr_t device, int power_scope, uns
     cdef nvmlPowerValue_v2_t[1] power_value
 
     with nogil:
-        power_value[0].version = sizeof(nvmlPowerValue_v2_t) | (2 << 24)
+        power_value[0].version = NVML_VERSION_STRUCT(sizeof(nvmlPowerValue_v2_t), 2)
         power_value[0].powerScope = <nvmlPowerScopeType_t>power_scope
         power_value[0].powerValueMw = power_value_mw
         __status__ = nvmlDeviceSetPowerManagementLimit_v2(<Device>device, power_value)
@@ -25872,7 +25876,7 @@ cpdef device_set_rusd_settings_v1(intptr_t device, unsigned long long poll_mask)
     """
     cdef nvmlRusdSettings_v1_t[1] settings
     with nogil:
-        settings[0].version = sizeof(nvmlRusdSettings_v1_t) | (1 << 24)
+        settings[0].version = NVML_VERSION_STRUCT(sizeof(nvmlRusdSettings_v1_t), 1)
         settings[0].pollMask = poll_mask
         __status__ = nvmlDeviceSetRusdSettings_v1(<Device>device, settings)
     check_status(__status__)
@@ -25903,7 +25907,7 @@ cpdef unsigned long long system_get_conf_compute_key_rotation_threshold_info():
     """
     cdef nvmlConfComputeGetKeyRotationThresholdInfo_t[1] key_rotation_thr_info
     with nogil:
-        key_rotation_thr_info[0].version = sizeof(nvmlConfComputeGetKeyRotationThresholdInfo_v1_t) | (1 << 24)
+        key_rotation_thr_info[0].version = NVML_VERSION_STRUCT(sizeof(nvmlConfComputeGetKeyRotationThresholdInfo_v1_t), 1)
         __status__ = nvmlSystemGetConfComputeKeyRotationThresholdInfo(key_rotation_thr_info)
     check_status(__status__)
     return key_rotation_thr_info[0].attackerAdvantage
@@ -25919,7 +25923,7 @@ cpdef system_set_conf_compute_key_rotation_threshold_info(unsigned long long max
     """
     cdef nvmlConfComputeSetKeyRotationThresholdInfo_t[1] key_rotation_thr_info
     with nogil:
-        key_rotation_thr_info[0].version = sizeof(nvmlConfComputeSetKeyRotationThresholdInfo_v1_t) | (1 << 24)
+        key_rotation_thr_info[0].version = NVML_VERSION_STRUCT(sizeof(nvmlConfComputeSetKeyRotationThresholdInfo_v1_t), 1)
         key_rotation_thr_info[0].maxAttackerAdvantage = max_attacker_advantage
         __status__ = nvmlSystemSetConfComputeKeyRotationThresholdInfo(key_rotation_thr_info)
     check_status(__status__)
@@ -25938,7 +25942,7 @@ cpdef unsigned long long vgpu_instance_get_runtime_state_size(unsigned int vgpu_
     """
     cdef nvmlVgpuRuntimeState_t[1] p_state
     with nogil:
-        p_state[0].version = sizeof(nvmlVgpuRuntimeState_v1_t) | (1 << 24)
+        p_state[0].version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuRuntimeState_v1_t), 1)
         __status__ = nvmlVgpuInstanceGetRuntimeStateSize(<nvmlVgpuInstance_t>vgpu_instance, p_state)
     check_status(__status__)
     return p_state[0].size
@@ -25957,7 +25961,7 @@ cpdef unsigned int vgpu_type_get_max_instances_per_gpu_instance(unsigned int vgp
     """
     cdef nvmlVgpuTypeMaxInstance_t[1] max_instance
     with nogil:
-        max_instance[0].version = sizeof(nvmlVgpuTypeMaxInstance_v1_t) | (1 << 24)
+        max_instance[0].version = NVML_VERSION_STRUCT(sizeof(nvmlVgpuTypeMaxInstance_v1_t), 1)
         max_instance[0].vgpuTypeId = <nvmlVgpuTypeId_t>vgpu_type_id
         __status__ = nvmlVgpuTypeGetMaxInstancesPerGpuInstance(max_instance)
     check_status(__status__)
@@ -25977,7 +25981,7 @@ cpdef str device_get_current_clock_freqs(intptr_t device):
     """
     cdef nvmlDeviceCurrentClockFreqs_t[1] current_clock_freqs
     with nogil:
-        current_clock_freqs[0].version = sizeof(nvmlDeviceCurrentClockFreqs_v1_t) | (1 << 24)
+        current_clock_freqs[0].version = NVML_VERSION_STRUCT(sizeof(nvmlDeviceCurrentClockFreqs_v1_t), 1)
         __status__ = nvmlDeviceGetCurrentClockFreqs(<Device>device, current_clock_freqs)
     check_status(__status__)
     return cpython.PyUnicode_FromString(current_clock_freqs[0].str)
diff --git a/cuda_bindings/tests/nvml/conftest.py b/cuda_bindings/tests/nvml/conftest.py
index 3bbbe2cb0a..9b7ef6abd1 100644
--- a/cuda_bindings/tests/nvml/conftest.py
+++ b/cuda_bindings/tests/nvml/conftest.py
@@ -1,10 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 from collections import namedtuple
 
 import pytest
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 from cuda.bindings._test_helpers.arch_check import unsupported_before  # noqa: F401
 
 
diff --git a/cuda_bindings/tests/nvml/test_compute_mode.py b/cuda_bindings/tests/nvml/test_compute_mode.py
index 0e9a67861f..1eb8b08288 100644
--- a/cuda_bindings/tests/nvml/test_compute_mode.py
+++ b/cuda_bindings/tests/nvml/test_compute_mode.py
@@ -5,7 +5,7 @@
 import sys
 
 import pytest
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 from .conftest import unsupported_before
 
diff --git a/cuda_bindings/tests/nvml/test_cuda.py b/cuda_bindings/tests/nvml/test_cuda.py
index 7be70a62dc..20ef7ed24e 100644
--- a/cuda_bindings/tests/nvml/test_cuda.py
+++ b/cuda_bindings/tests/nvml/test_cuda.py
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 import cuda.bindings.driver as cuda
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 from .conftest import NVMLInitializer
 
diff --git a/cuda_bindings/tests/nvml/test_device.py b/cuda_bindings/tests/nvml/test_device.py
index 96f1777c5f..82030e4408 100644
--- a/cuda_bindings/tests/nvml/test_device.py
+++ b/cuda_bindings/tests/nvml/test_device.py
@@ -5,7 +5,7 @@
 from functools import cache
 
 import pytest
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 from .conftest import unsupported_before
 
diff --git a/cuda_bindings/tests/nvml/test_gpu.py b/cuda_bindings/tests/nvml/test_gpu.py
index 52c5dc52fb..01ded61191 100644
--- a/cuda_bindings/tests/nvml/test_gpu.py
+++ b/cuda_bindings/tests/nvml/test_gpu.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import pytest
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 from . import util
 from .conftest import unsupported_before
diff --git a/cuda_bindings/tests/nvml/test_init.py b/cuda_bindings/tests/nvml/test_init.py
index 7970dc380a..8bf95ded0a 100644
--- a/cuda_bindings/tests/nvml/test_init.py
+++ b/cuda_bindings/tests/nvml/test_init.py
@@ -1,11 +1,11 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 import sys
 import warnings
 
 import pytest
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 
 def assert_nvml_is_initialized():
diff --git a/cuda_bindings/tests/nvml/test_nvlink.py b/cuda_bindings/tests/nvml/test_nvlink.py
index 99407abc19..d8e782831e 100644
--- a/cuda_bindings/tests/nvml/test_nvlink.py
+++ b/cuda_bindings/tests/nvml/test_nvlink.py
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 
 def test_nvlink_get_link_count(all_devices):
diff --git a/cuda_bindings/tests/nvml/test_page_retirement.py b/cuda_bindings/tests/nvml/test_page_retirement.py
index 4aa5260443..2f33c46050 100644
--- a/cuda_bindings/tests/nvml/test_page_retirement.py
+++ b/cuda_bindings/tests/nvml/test_page_retirement.py
@@ -1,8 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 import pytest
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 from . import util
 
diff --git a/cuda_bindings/tests/nvml/test_pci.py b/cuda_bindings/tests/nvml/test_pci.py
index 3e57f0267c..90605783d8 100644
--- a/cuda_bindings/tests/nvml/test_pci.py
+++ b/cuda_bindings/tests/nvml/test_pci.py
@@ -4,7 +4,7 @@
 
 import contextlib
 
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 from .conftest import unsupported_before
 
diff --git a/cuda_bindings/tests/nvml/test_pynvml.py b/cuda_bindings/tests/nvml/test_pynvml.py
index 4157bfb772..5a25f66f6f 100644
--- a/cuda_bindings/tests/nvml/test_pynvml.py
+++ b/cuda_bindings/tests/nvml/test_pynvml.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 # A set of tests ported from https://github.com/gpuopenanalytics/pynvml/blob/11.5.3/pynvml/tests/test_nvml.py
@@ -7,7 +7,7 @@
 import time
 
 import pytest
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 from . import util
 from .conftest import unsupported_before
diff --git a/cuda_bindings/tests/nvml/util.py b/cuda_bindings/tests/nvml/util.py
index e815231423..545826a2eb 100644
--- a/cuda_bindings/tests/nvml/util.py
+++ b/cuda_bindings/tests/nvml/util.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
 
@@ -6,7 +6,7 @@
 import platform
 from pathlib import Path
 
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 current_os = platform.system()
 if current_os == "VMkernel":
diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx
index ac633a3fea..f661c4e685 100644
--- a/cuda_core/cuda/core/system/_device.pyx
+++ b/cuda_core/cuda/core/system/_device.pyx
@@ -8,7 +8,7 @@ from libc.math cimport ceil
 from multiprocessing import cpu_count
 from typing import Iterable
 
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 from ._nvml_context cimport initialize
 
diff --git a/cuda_core/cuda/core/system/_nvml_context.pyx b/cuda_core/cuda/core/system/_nvml_context.pyx
index d6d9c46060..e32ff51835 100644
--- a/cuda_core/cuda/core/system/_nvml_context.pyx
+++ b/cuda_core/cuda/core/system/_nvml_context.pyx
@@ -1,10 +1,10 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
 import threading
 
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 from . import exceptions
 
diff --git a/cuda_core/cuda/core/system/_system.pyx b/cuda_core/cuda/core/system/_system.pyx
index 3e15420dc8..1ffbbed520 100644
--- a/cuda_core/cuda/core/system/_system.pyx
+++ b/cuda_core/cuda/core/system/_system.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -19,7 +19,7 @@ else:
 
 
 if CUDA_BINDINGS_NVML_IS_COMPATIBLE:
-    from cuda.bindings import _nvml as nvml
+    from cuda.bindings import nvml
     # TODO: We need to be even more specific than version numbers for development.
     # This can be removed once we have a release including everything we need.
     for member in ["FieldId", "ClocksEventReasons"]:
diff --git a/cuda_core/cuda/core/system/_system_events.pyx b/cuda_core/cuda/core/system/_system_events.pyx
index 8b1a91d7bd..d8a64b619b 100644
--- a/cuda_core/cuda/core/system/_system_events.pyx
+++ b/cuda_core/cuda/core/system/_system_events.pyx
@@ -1,11 +1,11 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
 
 from libc.stdint cimport intptr_t
 
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 from ._nvml_context cimport initialize
 
diff --git a/cuda_core/cuda/core/system/exceptions.py b/cuda_core/cuda/core/system/exceptions.py
index 65bcdd27b5..990648e531 100644
--- a/cuda_core/cuda/core/system/exceptions.py
+++ b/cuda_core/cuda/core/system/exceptions.py
@@ -1,9 +1,9 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
 
-from cuda.bindings import _nvml as nvml
+from cuda.bindings import nvml
 
 NvmlError = nvml.NvmlError
 UninitializedError = nvml.UninitializedError
diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py
index d4e5a2abb0..cf5c6f57af 100644
--- a/cuda_core/tests/system/test_system_device.py
+++ b/cuda_core/tests/system/test_system_device.py
@@ -19,7 +19,7 @@
 from cuda.core import system
 
 if system.CUDA_BINDINGS_NVML_IS_COMPATIBLE:
-    from cuda.bindings import _nvml as nvml
+    from cuda.bindings import nvml
     from cuda.core.system import DeviceArch, _device
 
 

From cd948b68b05905993b9e18555193fde6c492e336 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Thu, 29 Jan 2026 16:14:45 -0500
Subject: [PATCH 3/4] Fix NVML is compatible

---
 cuda_core/cuda/core/system/_system.pyx | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/cuda_core/cuda/core/system/_system.pyx b/cuda_core/cuda/core/system/_system.pyx
index 1ffbbed520..ecc80fb4e9 100644
--- a/cuda_core/cuda/core/system/_system.pyx
+++ b/cuda_core/cuda/core/system/_system.pyx
@@ -19,13 +19,17 @@ else:
 
 
 if CUDA_BINDINGS_NVML_IS_COMPATIBLE:
-    from cuda.bindings import nvml
-    # TODO: We need to be even more specific than version numbers for development.
-    # This can be removed once we have a release including everything we need.
-    for member in ["FieldId", "ClocksEventReasons"]:
-        if not hasattr(nvml, member):
-            CUDA_BINDINGS_NVML_IS_COMPATIBLE = False
-            break
+    try:
+        from cuda.bindings import nvml
+    except ImportError:
+        CUDA_BINDINGS_NVML_IS_COMPATIBLE = False
+    else:
+        # TODO: We need to be even more specific than version numbers for development.
+        # This can be removed once we have a release including everything we need.
+        for member in ["FieldId", "ClocksEventReasons"]:
+            if not hasattr(nvml, member):
+                CUDA_BINDINGS_NVML_IS_COMPATIBLE = False
+                break
 
 if CUDA_BINDINGS_NVML_IS_COMPATIBLE:
     from ._nvml_context import initialize

From ce6810fc940239189c1d877a04c7f6431fd5bbbb Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Fri, 30 Jan 2026 08:59:55 -0500
Subject: [PATCH 4/4] Update cuda_core/cuda/core/system/_system.pyx

Co-authored-by: Ralf W. Grosse-Kunstleve <rwgkio@gmail.com>
---
 cuda_core/cuda/core/system/_system.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuda_core/cuda/core/system/_system.pyx b/cuda_core/cuda/core/system/_system.pyx
index ecc80fb4e9..606c8aed8d 100644
--- a/cuda_core/cuda/core/system/_system.pyx
+++ b/cuda_core/cuda/core/system/_system.pyx
@@ -21,7 +21,7 @@ else:
 if CUDA_BINDINGS_NVML_IS_COMPATIBLE:
     try:
         from cuda.bindings import nvml
-    except ImportError:
+    except ModuleNotFoundError:
         CUDA_BINDINGS_NVML_IS_COMPATIBLE = False
     else:
         # TODO: We need to be even more specific than version numbers for development.