Skip to content

hardware

orchard.core.environment.hardware

Hardware Acceleration & Computing Environment.

This module provides high-level abstractions for hardware discovery (CUDA/MPS), and compute resource optimization. It manages the detection of available accelerators and synchronizes PyTorch threading with system capabilities.

configure_system_libraries()

Configures libraries for headless environments and reduces logging noise.

  • Sets Matplotlib to 'Agg' backend on Linux/Docker (no GUI)
  • Configures font embedding for PDF/PS exports
  • Suppresses verbose Matplotlib warnings
Source code in orchard/core/environment/hardware.py
def configure_system_libraries() -> None:
    """
    Configures libraries for headless environments and reduces logging noise.

    - Sets Matplotlib to 'Agg' backend on Linux/Docker (no GUI)
    - Configures font embedding for PDF/PS exports
    - Suppresses verbose Matplotlib warnings
    """
    is_linux = platform.system() == "Linux"
    is_docker = os.environ.get("IN_DOCKER") == "TRUE" or Path("/.dockerenv").exists()

    if is_linux or is_docker:
        matplotlib.use("Agg")
        matplotlib.rcParams["pdf.fonttype"] = 42
        matplotlib.rcParams["ps.fonttype"] = 42
        logging.getLogger("matplotlib").setLevel(logging.WARNING)

has_mps_backend()

Check if MPS backend is available (macOS Apple Silicon).

Source code in orchard/core/environment/hardware.py
def has_mps_backend() -> bool:
    """Check if MPS backend is available (macOS Apple Silicon)."""
    return hasattr(torch.backends, "mps") and torch.backends.mps.is_available()

detect_best_device()

Detects the most performant accelerator (CUDA > MPS > CPU).

Returns:

Type Description
str

Device string: 'cuda', 'mps', or 'cpu'

Source code in orchard/core/environment/hardware.py
def detect_best_device() -> str:
    """
    Detects the most performant accelerator (CUDA > MPS > CPU).

    Returns:
        Device string: 'cuda', 'mps', or 'cpu'
    """
    if torch.cuda.is_available():
        return "cuda"
    if has_mps_backend():
        return "mps"
    return "cpu"

to_device_obj(device_str, local_rank=0)

Converts device string to PyTorch device object.

In distributed multi-GPU setups, uses local_rank to select the correct GPU and calls torch.cuda.set_device() for CUDA affinity.

Parameters:

Name Type Description Default
device_str str

'cuda', 'cpu', or 'auto' (auto-selects best available)

required
local_rank int

Node-local process rank for GPU assignment (default 0). Used to select cuda:{local_rank} in multi-GPU setups. Ignored for non-CUDA devices.

0

Returns:

Type Description
device

torch.device object

Raises:

Type Description
ValueError

If CUDA requested but unavailable, or invalid device string

Source code in orchard/core/environment/hardware.py
def to_device_obj(device_str: str, local_rank: int = 0) -> torch.device:
    """
    Converts device string to PyTorch device object.

    In distributed multi-GPU setups, uses ``local_rank`` to select the
    correct GPU and calls ``torch.cuda.set_device()`` for CUDA affinity.

    Args:
        device_str: 'cuda', 'cpu', or 'auto' (auto-selects best available)
        local_rank: Node-local process rank for GPU assignment (default 0).
            Used to select ``cuda:{local_rank}`` in multi-GPU setups.
            Ignored for non-CUDA devices.

    Returns:
        torch.device object

    Raises:
        ValueError: If CUDA requested but unavailable, or invalid device string
    """
    if device_str == "auto":
        device_str = detect_best_device()

    if device_str == "cuda" and not torch.cuda.is_available():
        raise ValueError("CUDA requested but not available")

    if device_str not in ("cuda", "cpu", "mps"):
        raise ValueError(f"Unsupported device: {device_str}")

    if device_str == "cuda" and local_rank > 0:
        torch.cuda.set_device(local_rank)
        return torch.device(f"cuda:{local_rank}")

    return torch.device(device_str)

get_accelerator_name()

Returns accelerator model name (CUDA GPU or Apple Silicon) or empty string.

Source code in orchard/core/environment/hardware.py
def get_accelerator_name() -> str:
    """Returns accelerator model name (CUDA GPU or Apple Silicon) or empty string."""
    if torch.cuda.is_available():
        return torch.cuda.get_device_name(0)
    if has_mps_backend():
        return f"Apple Silicon ({platform.machine()})"
    return ""

get_vram_info(device_idx=0)

Retrieves VRAM availability for a CUDA device.

Note

MPS (Apple Silicon) does not expose VRAM info via PyTorch — torch.mps.mem_get_info() does not exist. Returns 'N/A' for non-CUDA devices until Apple provides a public API.

Parameters:

Name Type Description Default
device_idx int

GPU index to query

0

Returns:

Type Description
str

Formatted string 'X.XX GB / Y.YY GB' or status message

Source code in orchard/core/environment/hardware.py
def get_vram_info(device_idx: int = 0) -> str:
    """
    Retrieves VRAM availability for a CUDA device.

    Note:
        MPS (Apple Silicon) does not expose VRAM info via PyTorch —
        ``torch.mps.mem_get_info()`` does not exist. Returns 'N/A' for
        non-CUDA devices until Apple provides a public API.

    Args:
        device_idx: GPU index to query

    Returns:
        Formatted string 'X.XX GB / Y.YY GB' or status message
    """
    if not torch.cuda.is_available():
        return "N/A"

    try:
        if device_idx >= torch.cuda.device_count():
            return "Invalid Device Index"

        free, total = torch.cuda.mem_get_info(device_idx)
        return f"{free / 1024**3:.2f} GB / {total / 1024**3:.2f} GB"
    except RuntimeError as e:
        logging.debug("VRAM query failed: %s", e)
        return "Query Failed"

get_num_workers()

Determines optimal DataLoader workers with RAM stability cap.

Returns:

Type Description
int

Recommended number of subprocesses (2-8 range)

Source code in orchard/core/environment/hardware.py
def get_num_workers() -> int:
    """
    Determines optimal DataLoader workers with RAM stability cap.

    Returns:
        Recommended number of subprocesses (2-8 range)
    """
    total_cores = os.cpu_count() or _MIN_WORKERS
    if total_cores <= 4:
        return _MIN_WORKERS
    return min(total_cores // 2, _MAX_WORKERS)

apply_cpu_threads(num_workers)

Sets optimal compute threads to avoid resource contention.

Synchronizes PyTorch, OMP, and MKL thread counts.

Parameters:

Name Type Description Default
num_workers int

Active DataLoader workers

required

Returns:

Type Description
int

Number of threads assigned to compute operations

Source code in orchard/core/environment/hardware.py
def apply_cpu_threads(num_workers: int) -> int:
    """
    Sets optimal compute threads to avoid resource contention.

    Synchronizes PyTorch, OMP, and MKL thread counts.

    Args:
        num_workers: Active DataLoader workers

    Returns:
        Number of threads assigned to compute operations
    """
    total_cores = os.cpu_count() or 1
    optimal_threads = max(2, total_cores - num_workers)

    torch.set_num_threads(optimal_threads)
    os.environ["OMP_NUM_THREADS"] = str(optimal_threads)
    os.environ["MKL_NUM_THREADS"] = str(optimal_threads)

    return optimal_threads