diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml
index 35c5e6c3734..a57107bb052 100644
--- a/.github/workflows/test-wheel-linux.yml
+++ b/.github/workflows/test-wheel-linux.yml
@@ -130,21 +130,21 @@ jobs:
           path: ./cuda_pathfinder
 
       - name: Download cuda-python build artifacts
-        if: ${{ env.USE_BACKPORT_BINDINGS == '0' }}
+        if: ${{ env.BINDINGS_SOURCE == 'main' }}
         uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
         with:
           name: cuda-python-wheel
           path: .
 
       - name: Download cuda.bindings build artifacts
-        if: ${{ env.USE_BACKPORT_BINDINGS == '0' }}
+        if: ${{ env.BINDINGS_SOURCE == 'main' }}
         uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
         with:
           name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
           path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
 
       - name: Download cuda-python & cuda.bindings build artifacts from the prior branch
-        if: ${{ env.USE_BACKPORT_BINDINGS == '1' }}
+        if: ${{ env.BINDINGS_SOURCE == 'backport' }}
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
@@ -184,6 +184,7 @@ jobs:
           ls -lahR .
 
       - name: Display structure of downloaded cuda.bindings artifacts
+        if: ${{ env.BINDINGS_SOURCE != 'published' }}
         run: |
           pwd
           ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
@@ -285,6 +286,7 @@ jobs:
         run: run-tests core
 
       - name: Ensure cuda-python installable
+        if: ${{ env.BINDINGS_SOURCE == 'main' }}
         run: |
           if [[ "${{ matrix.LOCAL_CTK }}" == 1 ]]; then
             pip install --only-binary=:all: cuda_python*.whl
diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml
index 765823c6bfc..b9e03d12851 100644
--- a/.github/workflows/test-wheel-windows.yml
+++ b/.github/workflows/test-wheel-windows.yml
@@ -125,21 +125,21 @@ jobs:
           path: ./cuda_pathfinder
 
       - name: Download cuda-python build artifacts
-        if: ${{ env.USE_BACKPORT_BINDINGS == '0' }}
+        if: ${{ env.BINDINGS_SOURCE == 'main' }}
         uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
         with:
           name: cuda-python-wheel
           path: .
 
       - name: Download cuda.bindings build artifacts
-        if: ${{ env.USE_BACKPORT_BINDINGS == '0' }}
+        if: ${{ env.BINDINGS_SOURCE == 'main' }}
         uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
         with:
           name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
           path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
 
       - name: Download cuda-python & cuda.bindings build artifacts from the prior branch
-        if: ${{ env.USE_BACKPORT_BINDINGS == '1' }}
+        if: ${{ env.BINDINGS_SOURCE == 'backport' }}
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
@@ -170,6 +170,7 @@ jobs:
           Get-ChildItem -Recurse -Force | Select-Object Mode, LastWriteTime, Length, FullName
 
       - name: Display structure of downloaded cuda.bindings artifacts
+        if: ${{ env.BINDINGS_SOURCE != 'published' }}
         run: |
           Get-Location
           Get-ChildItem -Recurse -Force $env:CUDA_BINDINGS_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName
@@ -261,6 +262,7 @@ jobs:
         run: run-tests core
 
       - name: Ensure cuda-python installable
+        if: ${{ env.BINDINGS_SOURCE == 'main' }}
         run: |
           if ('${{ matrix.LOCAL_CTK }}' -eq '1') {
             pip install --only-binary=:all: (Get-ChildItem -Filter cuda_python*.whl).FullName
diff --git a/ci/tools/env-vars b/ci/tools/env-vars
index 17db607c29b..30fac1cdce8 100755
--- a/ci/tools/env-vars
+++ b/ci/tools/env-vars
@@ -52,34 +52,38 @@ elif [[ "${1}" == "test" ]]; then
   BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${BUILD_CUDA_VER})"
   TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${CUDA_VER})"
   CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${BUILD_CUDA_VER}-${HOST_PLATFORM}"
-  # USE_BACKPORT_BINDINGS flags the CTK-major-mismatch case where the
-  # current-run bindings wheel was built for a different CTK major than the
-  # one under test, so we must pull the bindings wheel from the backport
-  # branch instead. This is independent of whether bindings tests run.
-  # SKIP_CUDA_BINDINGS_TEST is the test-time gate: it is set when the CTK
-  # majors differ OR when the caller tells us to skip for path-filter
-  # reasons via SKIP_BINDINGS_TEST_OVERRIDE.
+
+  # BINDINGS_SOURCE controls which cuda-bindings to install at test time:
+  #   main      — use the just-built bindings wheel from this CI run
+  #   backport  — fetch bindings from the prior (N-1) branch
+  #   published — install from PyPI (cuda-bindings==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.*)
+  #
+  # SKIP_CUDA_BINDINGS_TEST / SKIP_CYTHON_TEST control which *tests* to run
+  # (they do NOT affect installation — that's BINDINGS_SOURCE's job).
+
+  BUILD_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${BUILD_CUDA_VER})"
+  TEST_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${CUDA_VER})"
+
   if [[ ${BUILD_CUDA_MAJOR} != ${TEST_CUDA_MAJOR} ]]; then
-    USE_BACKPORT_BINDINGS=1
+    # Major mismatch (e.g. build=13.x, test=12.x): use the backport branch.
+    BINDINGS_SOURCE=backport
+    SKIP_CUDA_BINDINGS_TEST=1
+    SKIP_CYTHON_TEST=1
+  elif [[ ${BUILD_CUDA_MINOR} != ${TEST_CUDA_MINOR} ]]; then
+    # Same major, minor mismatch (e.g. build=13.2, test=13.0): use published
+    # bindings from PyPI to test the real-world backward-compat scenario.
+    BINDINGS_SOURCE=published
     SKIP_CUDA_BINDINGS_TEST=1
     SKIP_CYTHON_TEST=1
   else
-    USE_BACKPORT_BINDINGS=0
-    # Path-filter override only skips bindings tests, NOT cython tests
-    # for other modules (e.g. cuda.core). Cython skip is driven solely
-    # by the build/test CTK minor-version mismatch.
+    # Exact match: use the just-built bindings wheel.
+    BINDINGS_SOURCE=main
     if [[ "${SKIP_BINDINGS_TEST_OVERRIDE:-0}" == "1" ]]; then
       SKIP_CUDA_BINDINGS_TEST=1
     else
       SKIP_CUDA_BINDINGS_TEST=0
     fi
-    BUILD_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${BUILD_CUDA_VER})"
-    TEST_CUDA_MINOR="$(cut -d '.' -f 2 <<< ${CUDA_VER})"
-    if [[ ${BUILD_CUDA_MINOR} != ${TEST_CUDA_MINOR} ]]; then
-      SKIP_CYTHON_TEST=1
-    else
-      SKIP_CYTHON_TEST=0
-    fi
+    SKIP_CYTHON_TEST=0
   fi
   # We don't test compute-sanitizer on CTK<12 because backporting fixes is too much effort
   # We only test compute-sanitizer on python 3.12 arbitrarily; we don't need to use sanitizer on the entire matrix
@@ -93,10 +97,11 @@ elif [[ "${1}" == "test" ]]; then
   fi
   {
     echo "SETUP_SANITIZER=${SETUP_SANITIZER}"
+    echo "BINDINGS_SOURCE=${BINDINGS_SOURCE}"
     echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}"
     echo "SKIP_CYTHON_TEST=${SKIP_CYTHON_TEST}"
     echo "TEST_CUDA_MAJOR=${TEST_CUDA_MAJOR}"
-    echo "USE_BACKPORT_BINDINGS=${USE_BACKPORT_BINDINGS}"
+    echo "TEST_CUDA_MINOR=${TEST_CUDA_MINOR}"
   } >> $GITHUB_ENV
 fi
 
diff --git a/ci/tools/run-tests b/ci/tools/run-tests
index d42634a7073..eb741f4f61f 100755
--- a/ci/tools/run-tests
+++ b/ci/tools/run-tests
@@ -54,10 +54,12 @@ elif [[ "${test_module}" == "bindings" ]]; then
   fi
   popd
 elif [[ "${test_module}" == "core" ]]; then
-  # If build/test majors match: cuda.bindings is installed in the previous step.
-  # If mismatch: cuda.bindings is installed from the backport branch.
-  if [[ "${SKIP_CUDA_BINDINGS_TEST}" == 1 ]]; then
-    echo "Installing bindings wheel"
+  # Install cuda.bindings for core tests based on BINDINGS_SOURCE.
+  if [[ "${BINDINGS_SOURCE}" == "published" ]]; then
+    echo "Installing published cuda-bindings==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.* from PyPI"
+    pip install "cuda-bindings==${TEST_CUDA_MAJOR}.${TEST_CUDA_MINOR}.*"
+  elif [[ "${BINDINGS_SOURCE}" == "backport" || "${BINDINGS_SOURCE}" == "main" ]]; then
+    echo "Installing bindings wheel (source: ${BINDINGS_SOURCE})"
     if [[ "${LOCAL_CTK}" == 1 ]]; then
       pip install "${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl
     else
@@ -83,6 +85,8 @@ elif [[ "${test_module}" == "core" ]]; then
   # Constrain cuda-toolkit to the requested CTK version to avoid
   # pip pulling in a newer nvidia-cuda-runtime that conflicts with it.
   pip install "${WHL_EXTRA[@]}" --group "test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}" "cuda-toolkit==${CUDA_VER_MINOR}.*"
+  echo "Installed packages before core tests:"
+  pip list
   echo "Running core tests"
   ${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/
   # Currently our CI always installs the latest bindings (from either major version).
diff --git a/cuda_core/cuda/core/_cpp/resource_handles.cpp b/cuda_core/cuda/core/_cpp/resource_handles.cpp
index ff0e0db5066..f846308af70 100644
--- a/cuda_core/cuda/core/_cpp/resource_handles.cpp
+++ b/cuda_core/cuda/core/_cpp/resource_handles.cpp
@@ -77,6 +77,13 @@ decltype(&cuLinkDestroy) p_cuLinkDestroy = nullptr;
 decltype(&cuGraphicsUnmapResources) p_cuGraphicsUnmapResources = nullptr;
 decltype(&cuGraphicsUnregisterResource) p_cuGraphicsUnregisterResource = nullptr;
 
+// SM resource split (13.1+ — may be null on older drivers/bindings)
+#if CUDA_VERSION >= 13010
+decltype(&cuDevSmResourceSplit) p_cuDevSmResourceSplit = nullptr;
+#else
+void* p_cuDevSmResourceSplit = nullptr;
+#endif
+
 // NVRTC function pointers
 decltype(&nvrtcDestroyProgram) p_nvrtcDestroyProgram = nullptr;
 
@@ -1319,4 +1326,27 @@ FileDescriptorHandle create_fd_handle_ref(int fd) {
 #endif
 }
 
+// ============================================================================
+// SM resource split wrapper
+// ============================================================================
+
+CUresult sm_resource_split(CUdevResource* result, unsigned int nbGroups,
+                           const CUdevResource* input, CUdevResource* remainder,
+                           unsigned int flags, void* groupParams) {
+#if CUDA_VERSION >= 13010
+    if (!p_cuDevSmResourceSplit) {
+        return CUDA_ERROR_NOT_SUPPORTED;
+    }
+    return p_cuDevSmResourceSplit(
+        result, nbGroups, input, remainder, flags,
+        static_cast<CU_DEV_SM_RESOURCE_GROUP_PARAMS*>(groupParams));
+#else
+    return CUDA_ERROR_NOT_SUPPORTED;
+#endif
+}
+
+bool has_sm_resource_split() noexcept {
+    return p_cuDevSmResourceSplit != nullptr;
+}
+
 }  // namespace cuda_core
diff --git a/cuda_core/cuda/core/_cpp/resource_handles.hpp b/cuda_core/cuda/core/_cpp/resource_handles.hpp
index 3c195f1f1ad..1162ec89843 100644
--- a/cuda_core/cuda/core/_cpp/resource_handles.hpp
+++ b/cuda_core/cuda/core/_cpp/resource_handles.hpp
@@ -108,6 +108,15 @@ extern decltype(&cuLinkDestroy) p_cuLinkDestroy;
 extern decltype(&cuGraphicsUnmapResources) p_cuGraphicsUnmapResources;
 extern decltype(&cuGraphicsUnregisterResource) p_cuGraphicsUnregisterResource;
 
+// SM resource split (13.1+ — may be null on older drivers/bindings)
+#if CUDA_VERSION >= 13010
+extern decltype(&cuDevSmResourceSplit) p_cuDevSmResourceSplit;
+#else
+// cuDevSmResourceSplit doesn't exist in CUDA < 13.1 headers, so use a
+// void* placeholder. The pointer is always null when built against 12.x.
+extern void* p_cuDevSmResourceSplit;
+#endif
+
 // ============================================================================
 // NVRTC function pointers
 //
@@ -747,4 +756,22 @@ inline PyObject* as_py(const FileDescriptorHandle& h) noexcept {
     return PyLong_FromSsize_t(as_intptr(h));
 }
 
+// ============================================================================
+// SM resource split wrapper (13.1+)
+//
+// Calls through p_cuDevSmResourceSplit if available, otherwise returns
+// CUDA_ERROR_NOT_SUPPORTED. This avoids a direct Cython cimport of the
+// cydriver cdef function, which would fail at module init on cuda-bindings
+// < 13.1 (see https://github.com/NVIDIA/cuda-python/issues/2063).
+// ============================================================================
+
+// groupParams is void* so the Cython declaration doesn't reference
+// CU_DEV_SM_RESOURCE_GROUP_PARAMS (absent from cuda-bindings 13.0 .pxd).
+CUresult sm_resource_split(CUdevResource* result, unsigned int nbGroups,
+                           const CUdevResource* input, CUdevResource* remainder,
+                           unsigned int flags, void* groupParams);
+
+// Returns true if the cuDevSmResourceSplit function pointer is available.
+bool has_sm_resource_split() noexcept;
+
 }  // namespace cuda_core
diff --git a/cuda_core/cuda/core/_device_resources.pyx b/cuda_core/cuda/core/_device_resources.pyx
index 40c0a874d05..5ddc76dcacd 100644
--- a/cuda_core/cuda/core/_device_resources.pyx
+++ b/cuda_core/cuda/core/_device_resources.pyx
@@ -203,6 +203,9 @@ cdef inline unsigned int _to_sm_count(object value) except? 0:
     return <unsigned int>(value)
 
 
+IF CUDA_CORE_BUILD_MAJOR >= 13:
+    from cuda.core._resource_handles cimport sm_resource_split, has_sm_resource_split
+
 cdef int _structured_split_checked = 0
 
 cdef inline bint _can_use_structured_sm_split():
@@ -211,7 +214,9 @@ cdef inline bint _can_use_structured_sm_split():
     if _structured_split_checked != 0:
         return _structured_split_checked == 1
     IF CUDA_CORE_BUILD_MAJOR >= 13:
-        if cy_driver_version() >= (13, 1, 0) and cy_binding_version() >= (13, 1, 0):
+        if (has_sm_resource_split()
+                and cy_driver_version() >= (13, 1, 0)
+                and cy_binding_version() >= (13, 1, 0)):
             _structured_split_checked = 1
             return True
     _structured_split_checked = -1
@@ -300,13 +305,13 @@ IF CUDA_CORE_BUILD_MAJOR >= 13:
 
             memset(&remaining, 0, sizeof(cydriver.CUdevResource))
             with nogil:
-                HANDLE_RETURN(cydriver.cuDevSmResourceSplit(
+                HANDLE_RETURN(sm_resource_split(
                     result,
                     <unsigned int>(n_groups),
                     &sm._resource,
                     &remaining,
                     0,
-                    params,
+                    <void*>params,
                 ))
 
             if result != NULL:
diff --git a/cuda_core/cuda/core/_resource_handles.pxd b/cuda_core/cuda/core/_resource_handles.pxd
index 8d07c27dedb..8d11ce4c735 100644
--- a/cuda_core/cuda/core/_resource_handles.pxd
+++ b/cuda_core/cuda/core/_resource_handles.pxd
@@ -222,3 +222,12 @@ cdef CuLinkHandle create_culink_handle_ref(cydriver.CUlinkState state) except+ n
 # File descriptor handles
 cdef FileDescriptorHandle create_fd_handle(int fd) except+ nogil
 cdef FileDescriptorHandle create_fd_handle_ref(int fd) except+ nogil
+
+# SM resource split (13.1+ — calls through function pointer, safe on older bindings)
+# groupParams is void* here to avoid referencing CU_DEV_SM_RESOURCE_GROUP_PARAMS
+# (which doesn't exist in cuda-bindings 13.0 .pxd). The C++ side casts it.
+cdef cydriver.CUresult sm_resource_split(
+    cydriver.CUdevResource* result, unsigned int nbGroups,
+    const cydriver.CUdevResource* input, cydriver.CUdevResource* remainder,
+    unsigned int flags, void* groupParams) nogil
+cdef bint has_sm_resource_split() noexcept nogil
diff --git a/cuda_core/cuda/core/_resource_handles.pyx b/cuda_core/cuda/core/_resource_handles.pyx
index a1dc05464ac..cbb0fdb8433 100644
--- a/cuda_core/cuda/core/_resource_handles.pyx
+++ b/cuda_core/cuda/core/_resource_handles.pyx
@@ -208,6 +208,15 @@ cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
     FileDescriptorHandle create_fd_handle_ref "cuda_core::create_fd_handle_ref" (
         int fd) except+ nogil
 
+    # SM resource split (13.1+ wrapper — avoids direct cydriver cimport)
+    # groupParams is void* to avoid referencing CU_DEV_SM_RESOURCE_GROUP_PARAMS
+    # (which doesn't exist in cuda-bindings 13.0 .pxd). The C++ side casts it.
+    cydriver.CUresult sm_resource_split "cuda_core::sm_resource_split" (
+        cydriver.CUdevResource* result, unsigned int nbGroups,
+        const cydriver.CUdevResource* input, cydriver.CUdevResource* remainder,
+        unsigned int flags, void* groupParams) nogil
+    bint has_sm_resource_split "cuda_core::has_sm_resource_split" () noexcept nogil
+
 
 # =============================================================================
 # CUDA Driver API capsule
@@ -290,6 +299,9 @@ cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
     void* p_cuGraphicsUnmapResources "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnmapResources)"
     void* p_cuGraphicsUnregisterResource "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnregisterResource)"
 
+    # SM resource split (13.1+)
+    void* p_cuDevSmResourceSplit "reinterpret_cast<void*&>(cuda_core::p_cuDevSmResourceSplit)"
+
     # NVRTC
     void* p_nvrtcDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvrtcDestroyProgram)"
 
@@ -372,6 +384,9 @@ p_cuLinkDestroy = _get_driver_fn("cuLinkDestroy")
 p_cuGraphicsUnmapResources = _get_driver_fn("cuGraphicsUnmapResources")
 p_cuGraphicsUnregisterResource = _get_driver_fn("cuGraphicsUnregisterResource")
 
+# SM resource split (13.1+ — may not exist in older cuda-bindings)
+p_cuDevSmResourceSplit = _get_optional_driver_fn("cuDevSmResourceSplit")
+
 # =============================================================================
 # NVRTC function pointer initialization
 # =============================================================================
diff --git a/cuda_core/docs/source/release/1.0.1-notes.rst b/cuda_core/docs/source/release/1.0.1-notes.rst
index 8654f70e7fd..b3cc3b44965 100644
--- a/cuda_core/docs/source/release/1.0.1-notes.rst
+++ b/cuda_core/docs/source/release/1.0.1-notes.rst
@@ -10,6 +10,11 @@
 Fixes and enhancements
 ----------------------
 
+- Fixed ``ImportError`` when importing ``cuda.core`` with
+  ``cuda-bindings`` 13.0.x due to an unavailable driver function
+  (``cuDevSmResourceSplit``).
+  (`#2063 <https://github.com/NVIDIA/cuda-python/issues/2063>`__,
+  `#2064 <https://github.com/NVIDIA/cuda-python/pull/2064>`__)
 - When iterating over MIG devices with
   ``cuda.core.system.Device.mig.get_all_devices``, only available MIG devices will
   be returned. Previously, if any MIG device was unavailable, an exception would