Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
abdec47
wip
rparolin Mar 17, 2026
c418050
wip
rparolin Mar 17, 2026
b879fa5
fixing ci compiler errors
rparolin Mar 17, 2026
04ee3de
skipping tests that aren't supported
rparolin Mar 17, 2026
9ab3f46
cu12 support
rparolin Mar 17, 2026
bd75bc3
Merge branch 'main' into rparolin/managed_mem_advise_prefetch
rparolin Mar 17, 2026
1b1343b
Merge branch 'main' into rparolin/managed_mem_advise_prefetch
rparolin Mar 17, 2026
a948066
Moving to function from Buffer class methods to free standing functio…
rparolin Mar 17, 2026
1457599
precommit format
rparolin Mar 17, 2026
acb4024
iterating on implementation
rparolin Mar 18, 2026
d10ab07
Simplify managed-memory helpers: remove long-form aliases, cache look…
rparolin Mar 18, 2026
ae1de36
Merge branch 'main' into rparolin/managed_mem_advise_prefetch
rparolin Mar 18, 2026
c250c92
fix(test): reset _V2_BINDINGS cache so legacy-signature tests take th…
rparolin Mar 18, 2026
89329d9
fix(test): require concurrent_managed_access for advise tests that hi…
rparolin Mar 18, 2026
8a75d1b
fix: validate managed buffer before checking discard_prefetch binding…
rparolin Mar 18, 2026
9e9b1e0
refactor: extract managed memory ops into dedicated _managed_memory_o…
rparolin Mar 18, 2026
90f0711
pre-commit fix
rparolin Mar 18, 2026
b4d252c
Removing blank file
rparolin Mar 19, 2026
faaa1d8
wip
rparolin Mar 19, 2026
18786be
Merge branch 'main' into rparolin/managed_mem_advise_prefetch
rparolin Apr 6, 2026
9766ddc
Merge remote-tracking branch 'upstream/main' into rparolin/managed_me…
rparolin Apr 27, 2026
cf2f20d
fix(cuda.core): update binding_version import after upstream merge
rparolin Apr 27, 2026
db3bac2
revert: drop managed_memory shim in cuda.core.experimental
rparolin Apr 27, 2026
20d036e
feat(cuda.core): add Location dataclass for managed memory
rparolin Apr 27, 2026
c2dae53
feat(cuda.core): add _coerce_location helper
rparolin Apr 28, 2026
935c8ba
test(cuda.core): update monkeypatch target after binding_version rename
rparolin Apr 28, 2026
dc46535
refactor(cuda.core): tighten memory-attr query
rparolin Apr 28, 2026
818f5d2
feat(cuda.core): unified 1..N managed_memory.prefetch with cydriver
rparolin Apr 28, 2026
e296e72
feat(cuda.core): add managed_memory.discard
rparolin Apr 28, 2026
e697131
feat(cuda.core): unified 1..N managed_memory.discard_prefetch with cy…
rparolin Apr 28, 2026
3bc1021
feat(cuda.core): unified 1..N managed_memory.advise + drop legacy app…
rparolin Apr 28, 2026
fa23869
refactor(cuda.core): use Buffer.is_managed property in managed_memory…
rparolin Apr 28, 2026
68bdd14
docs(cuda.core): document Location, discard, and 1..N managed_memory ops
rparolin Apr 28, 2026
b4d9cbf
chore(cuda.core): drop narrative comments and tighten _coerce_locatio…
rparolin Apr 28, 2026
ee96758
chore(cuda.core): satisfy pre-commit hooks
rparolin Apr 28, 2026
d6f60f2
refactor(cuda.core): move managed_memory ops to cuda.core.utils
rparolin Apr 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cuda_core/cuda/core/_memory/_buffer.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from libc.stdint cimport uintptr_t

from cuda.bindings cimport cydriver
from cuda.core._resource_handles cimport DevicePtrHandle
from cuda.core._stream cimport Stream

Expand Down Expand Up @@ -38,3 +39,10 @@ cdef Buffer Buffer_from_deviceptr_handle(
MemoryResource mr,
object ipc_descriptor = *
)

# Memory attribute query helpers (used by _managed_memory_ops)
cdef void _init_mem_attrs(Buffer self)
cdef int _query_memory_attrs(
_MemAttrs& out,
cydriver.CUdeviceptr ptr,
) except -1 nogil
8 changes: 5 additions & 3 deletions cuda_core/cuda/core/_memory/_buffer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting
:attr:`Buffer.handle`.
"""


cdef class Buffer:
"""Represent a handle to allocated memory.

Expand Down Expand Up @@ -455,12 +456,15 @@ cdef inline int _query_memory_attrs(
ret = cydriver.cuPointerGetAttributes(3, attrs, <void**>vals, ptr)
HANDLE_RETURN(ret)

# TODO: HMM/ATS-enabled sysmem should also report is_managed=True; the
# CU_POINTER_ATTRIBUTE_IS_MANAGED query does not capture that yet.
out.is_managed = is_managed != 0

if memory_type == 0:
# unregistered host pointer
out.is_host_accessible = True
out.is_device_accessible = False
out.device_id = -1
out.is_managed = False
elif (
is_managed
or memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_HOST
Expand All @@ -469,12 +473,10 @@ cdef inline int _query_memory_attrs(
out.is_host_accessible = True
out.is_device_accessible = True
out.device_id = device_id
out.is_managed = is_managed
elif memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_DEVICE:
out.is_host_accessible = False
out.is_device_accessible = True
out.device_id = device_id
out.is_managed = False
else:
with cython.gil:
raise ValueError(f"Unsupported memory type: {memory_type}")
Expand Down
75 changes: 75 additions & 0 deletions cuda_core/cuda/core/_memory/_managed_location.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from dataclasses import dataclass
from typing import Literal

_VALID_KINDS = ("device", "host", "host_numa", "host_numa_current")
LocationKind = Literal["device", "host", "host_numa", "host_numa_current"]


@dataclass(frozen=True)
class Location:
"""Typed managed-memory location.

Use the classmethod constructors (``device``, ``host``, ``host_numa``,
``host_numa_current``) rather than constructing directly.
"""

kind: LocationKind
id: int | None = None

def __post_init__(self) -> None:
if self.kind not in _VALID_KINDS:
raise ValueError(f"kind must be one of {_VALID_KINDS!r}, got {self.kind!r}")
if self.kind == "device":
if not isinstance(self.id, int) or self.id < 0:
raise ValueError("device id must be >= 0")
elif self.kind == "host_numa":
if not isinstance(self.id, int) or self.id < 0:
raise ValueError("host_numa id must be >= 0")
elif self.kind in ("host", "host_numa_current"):
if self.id is not None:
raise ValueError(f"{self.kind} location must have id=None")

@classmethod
def device(cls, device_id: int) -> Location:
return cls(kind="device", id=device_id)

@classmethod
def host(cls) -> Location:
return cls(kind="host", id=None)

@classmethod
def host_numa(cls, numa_id: int) -> Location:
return cls(kind="host_numa", id=numa_id)

@classmethod
def host_numa_current(cls) -> Location:
return cls(kind="host_numa_current", id=None)


def _coerce_location(value, *, allow_none: bool = False) -> Location | None:
"""Coerce ``Location`` / ``Device`` / int / ``None`` to ``Location``.

Maps int ``-1`` to host and other non-negative ints to that device ordinal.
"""
from cuda.core._device import Device # avoid import cycle at module load

if isinstance(value, Location):
return value
if isinstance(value, Device):
return Location.device(value.device_id)
if value is None:
if allow_none:
return None
raise ValueError("location is required")
if isinstance(value, int):
if value == -1:
return Location.host()
if value >= 0:
return Location.device(value)
raise ValueError(f"device ordinal must be >= 0 (or -1 for host), got {value}")
raise TypeError(f"location must be a Location, Device, int, or None; got {type(value).__name__}")
Loading
Loading