Skip to content

architectures

orchard.architectures

Architectures Factory Package.

Implements the Factory Pattern to decouple model instantiation from the main execution logic. Routes requests to specific architecture definitions and ensures models are correctly adapted to the dataset geometry (channels and classes) resolved at runtime.

build_convnext_tiny(num_classes, in_channels, *, pretrained)

Constructs ConvNeXt-Tiny adapted for image classification datasets.

Workflow
  1. Load pretrained weights from ImageNet (if enabled)
  2. Modify first conv layer to accept custom input channels
  3. Apply weight morphing for channel compression (if grayscale)
  4. Replace classification head with dataset-specific linear layer

Parameters:

Name Type Description Default
num_classes int

Number of dataset classes for classification head

required
in_channels int

Input channels (1=Grayscale, 3=RGB)

required
pretrained bool

Whether to load ImageNet pretrained weights

required

Returns:

Type Description
Module

Adapted ConvNeXt-Tiny model (device placement handled by factory).

Source code in orchard/architectures/convnext_tiny.py
def build_convnext_tiny(
    num_classes: int,
    in_channels: int,
    *,
    pretrained: bool,
) -> nn.Module:
    """
    Constructs ConvNeXt-Tiny adapted for image classification datasets.

    Workflow:
        1. Load pretrained weights from ImageNet (if enabled)
        2. Modify first conv layer to accept custom input channels
        3. Apply weight morphing for channel compression (if grayscale)
        4. Replace classification head with dataset-specific linear layer

    Args:
        num_classes: Number of dataset classes for classification head
        in_channels: Input channels (1=Grayscale, 3=RGB)
        pretrained: Whether to load ImageNet pretrained weights

    Returns:
        Adapted ConvNeXt-Tiny model (device placement handled by factory).
    """
    # --- Step 1: Initialize with Optional Pretraining ---
    weights = models.ConvNeXt_Tiny_Weights.IMAGENET1K_V1 if pretrained else None
    model = models.convnext_tiny(weights=weights)

    # Snapshot original first conv layer (before replacement)
    old_conv = model.features[0][0]  # Conv2d(3, 96, kernel_size=4, stride=4)

    # --- Step 2: Adapt First Convolutional Layer ---
    # ConvNeXt expects 3-channel input; modify for grayscale if needed
    new_conv = nn.Conv2d(
        in_channels=in_channels,  # Custom: 1 or 3
        out_channels=96,  # ConvNeXt-Tiny standard
        kernel_size=(4, 4),
        stride=(4, 4),
        padding=(0, 0),
        bias=True,  # ConvNeXt uses bias in stem conv
    )

    # --- Step 3: Weight Morphing (Transfer Pretrained Knowledge) ---
    if pretrained:
        morph_conv_weights(old_conv, new_conv, in_channels)

    # Replace entry layer with adapted version
    model.features[0][0] = new_conv

    # --- Step 4: Modify Classification Head ---
    # Replace ImageNet 1000-class head with dataset-specific projection
    # model.classifier[2] is Linear(768, 1000)
    model.classifier[2] = nn.Linear(model.classifier[2].in_features, num_classes)

    return cast(nn.Module, model)

build_efficientnet_b0(num_classes, in_channels, *, pretrained)

Constructs EfficientNet-B0 adapted for image classification datasets.

Workflow
  1. Load pretrained weights from ImageNet (if enabled)
  2. Modify first conv layer to accept custom input channels
  3. Apply weight morphing for channel compression (if grayscale)
  4. Replace classification head with dataset-specific linear layer

Parameters:

Name Type Description Default
num_classes int

Number of dataset classes for classification head

required
in_channels int

Input channels (1=Grayscale, 3=RGB)

required
pretrained bool

Whether to load ImageNet pretrained weights

required

Returns:

Type Description
Module

Adapted EfficientNet-B0 model (device placement handled by factory).

Source code in orchard/architectures/efficientnet_b0.py
def build_efficientnet_b0(
    num_classes: int,
    in_channels: int,
    *,
    pretrained: bool,
) -> nn.Module:
    """
    Constructs EfficientNet-B0 adapted for image classification datasets.

    Workflow:
        1. Load pretrained weights from ImageNet (if enabled)
        2. Modify first conv layer to accept custom input channels
        3. Apply weight morphing for channel compression (if grayscale)
        4. Replace classification head with dataset-specific linear layer

    Args:
        num_classes: Number of dataset classes for classification head
        in_channels: Input channels (1=Grayscale, 3=RGB)
        pretrained: Whether to load ImageNet pretrained weights

    Returns:
        Adapted EfficientNet-B0 model (device placement handled by factory).
    """
    # --- Step 1: Initialize with Optional Pretraining ---
    weights = models.EfficientNet_B0_Weights.IMAGENET1K_V1 if pretrained else None
    model = models.efficientnet_b0(weights=weights)

    # Snapshot original first conv layer (before replacement)
    old_conv = model.features[0][0]

    # --- Step 2: Adapt First Convolutional Layer ---
    # EfficientNet expects 3-channel input; modify for grayscale if needed
    new_conv = nn.Conv2d(
        in_channels=in_channels,  # Custom: 1 or 3
        out_channels=32,  # EfficientNet standard
        kernel_size=(3, 3),
        stride=(2, 2),  # Original EfficientNet stem (matches pretrained spatial statistics)
        padding=(1, 1),
        bias=False,
    )

    # --- Step 3: Weight Morphing (Transfer Pretrained Knowledge) ---
    if pretrained:
        morph_conv_weights(old_conv, new_conv, in_channels)

    # Replace entry layer with adapted version
    model.features[0][0] = new_conv

    # --- Step 4: Modify Classification Head ---
    # Replace ImageNet 1000-class head with dataset-specific projection
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)  # 1280 features

    return cast(nn.Module, model)

get_model(device, dataset_cfg, arch_cfg, verbose=True)

Factory function to resolve, instantiate, and prepare architectures.

It maps configuration identifiers to specific builder functions via an internal registry. Structural parameters like input channels and class cardinality are derived from the 'effective' geometry resolved by the DatasetConfig.

Parameters:

Name Type Description Default
device device

Hardware accelerator target.

required
dataset_cfg DatasetConfig

Dataset sub-config with resolved metadata.

required
arch_cfg ArchitectureConfig

Architecture sub-config with model selection.

required
verbose bool

If True, emit builder-internal INFO logging.

True

Returns:

Type Description
Module

nn.Module: The instantiated model synchronized with the target device.

Example

model = get_model(device, dataset_cfg=cfg.dataset, arch_cfg=cfg.architecture)

Raises:

Type Description
ValueError

If the requested architecture is not found in the registry.

Source code in orchard/architectures/factory.py
def get_model(
    device: torch.device,
    dataset_cfg: DatasetConfig,
    arch_cfg: ArchitectureConfig,
    verbose: bool = True,
) -> nn.Module:
    """
    Factory function to resolve, instantiate, and prepare architectures.

    It maps configuration identifiers to specific builder functions via an
    internal registry. Structural parameters like input channels and class
    cardinality are derived from the 'effective' geometry resolved by
    the DatasetConfig.

    Args:
        device: Hardware accelerator target.
        dataset_cfg: Dataset sub-config with resolved metadata.
        arch_cfg: Architecture sub-config with model selection.
        verbose: If True, emit builder-internal INFO logging.

    Returns:
        nn.Module: The instantiated model synchronized with the target device.

    Example:
        >>> model = get_model(device, dataset_cfg=cfg.dataset, arch_cfg=cfg.architecture)

    Raises:
        ValueError: If the requested architecture is not found in the registry.
    """
    # Resolve structural dimensions from sub-configs
    in_channels = dataset_cfg.effective_in_channels
    num_classes = dataset_cfg.num_classes
    model_name_lower = arch_cfg.name.lower()

    if verbose:
        logger.info(
            "%s%s %-18s: %s | Input: %dx%dx%d | Output: %d classes",
            LogStyle.INDENT,
            LogStyle.ARROW,
            "Architecture",
            arch_cfg.name,
            dataset_cfg.img_size,
            dataset_cfg.img_size,
            in_channels,
            num_classes,
        )

    # Instance construction and adaptation.
    # When verbose=False (e.g. export phase), suppress builder-internal INFO logs
    # to avoid duplicating messages already shown during training.
    _prev_level = logger.level
    if not verbose:
        logger.setLevel(logging.WARNING)
    try:
        with _suppress_download_noise():
            model = _dispatch_builder(
                model_name_lower, num_classes, in_channels, arch_cfg, dataset_cfg.resolution
            )
    finally:
        logger.setLevel(_prev_level)

    # Centralised device placement (builders stay device-agnostic)
    model = model.to(device)

    # Parameter telemetry
    if verbose:
        total_params = sum(p.numel() for p in model.parameters())
        logger.info(
            "%s%s %-18s: %s | Parameters: %s",
            LogStyle.INDENT,
            LogStyle.ARROW,
            "Deployed",
            str(device).upper(),
            f"{total_params:,}",
        )

    return model

build_mini_cnn(num_classes, in_channels, *, dropout)

Constructs MiniCNN for low-resolution image classification.

Parameters:

Name Type Description Default
num_classes int

Number of dataset classes

required
in_channels int

Input channels (1=Grayscale, 3=RGB)

required
dropout float

Dropout probability before final FC layer

required

Returns:

Type Description
Module

MiniCNN model (device placement handled by factory).

Source code in orchard/architectures/mini_cnn.py
def build_mini_cnn(
    num_classes: int,
    in_channels: int,
    *,
    dropout: float,
) -> nn.Module:
    """
    Constructs MiniCNN for low-resolution image classification.

    Args:
        num_classes: Number of dataset classes
        in_channels: Input channels (1=Grayscale, 3=RGB)
        dropout: Dropout probability before final FC layer

    Returns:
        MiniCNN model (device placement handled by factory).
    """
    return MiniCNN(in_channels=in_channels, num_classes=num_classes, dropout=dropout)

build_resnet_18(num_classes, in_channels, *, pretrained, resolution)

Constructs ResNet-18 with resolution-aware architectural adaptation.

At 28x28/32x32, performs stem surgery to preserve spatial resolution. At 64x64, 128x128, and 224x224, uses the standard ResNet-18 architecture.

Workflow
  1. Load ImageNet pretrained ResNet-18 (if enabled)
  2. Apply resolution-specific stem adaptation
  3. Replace classification head with dataset-specific linear layer

Parameters:

Name Type Description Default
num_classes int

Number of dataset classes

required
in_channels int

Input channels (1=Grayscale, 3=RGB)

required
pretrained bool

Whether to load ImageNet pretrained weights

required
resolution int

Input image resolution (28, 32, 64, 128, or 224)

required

Returns:

Type Description
Module

Adapted ResNet-18 (device placement handled by factory).

Source code in orchard/architectures/resnet_18.py
def build_resnet_18(
    num_classes: int,
    in_channels: int,
    *,
    pretrained: bool,
    resolution: int,
) -> nn.Module:
    """
    Constructs ResNet-18 with resolution-aware architectural adaptation.

    At 28x28/32x32, performs stem surgery to preserve spatial resolution.
    At 64x64, 128x128, and 224x224, uses the standard ResNet-18 architecture.

    Workflow:
        1. Load ImageNet pretrained ResNet-18 (if enabled)
        2. Apply resolution-specific stem adaptation
        3. Replace classification head with dataset-specific linear layer

    Args:
        num_classes: Number of dataset classes
        in_channels: Input channels (1=Grayscale, 3=RGB)
        pretrained: Whether to load ImageNet pretrained weights
        resolution: Input image resolution (28, 32, 64, 128, or 224)

    Returns:
        Adapted ResNet-18 (device placement handled by factory).
    """
    # --- Step 1: Initialize with Optional Pretraining ---
    weights = models.ResNet18_Weights.IMAGENET1K_V1 if pretrained else None
    model = models.resnet18(weights=weights)

    if resolution <= 32:  # 28, 32: small stem (3x3 stride-1, no MaxPool)
        _adapt_stem_28(model, in_channels, pretrained)
    else:  # 64, 224 — standard stem
        _adapt_stem_standard(model, in_channels, pretrained)

    # --- Step 3: Replace Classification Head ---
    model.fc = nn.Linear(model.fc.in_features, num_classes)

    return cast(nn.Module, model)

build_timm_model(num_classes, in_channels, *, arch_cfg)

Construct any timm-registered model with automatic adaptation.

timm.create_model handles:

  • Pretrained weight loading (from HuggingFace Hub or torch.hub)
  • Classification head replacement (num_classes)
  • Input channel adaptation with weight morphing (in_chans)
  • Dropout rate injection (drop_rate)

Parameters:

Name Type Description Default
num_classes int

Number of output classes for the classification head.

required
in_channels int

Number of input channels (1=grayscale, 3=RGB).

required
arch_cfg ArchitectureConfig

Architecture sub-config with name, pretrained, dropout.

required

Returns:

Type Description
Module

Adapted timm model (device placement handled by factory).

Raises:

Type Description
ValueError

If the timm model identifier is not found in the registry.

Source code in orchard/architectures/timm_backbone.py
def build_timm_model(
    num_classes: int,
    in_channels: int,
    *,
    arch_cfg: ArchitectureConfig,
) -> nn.Module:
    """
    Construct any timm-registered model with automatic adaptation.

    timm.create_model handles:

    - Pretrained weight loading (from HuggingFace Hub or torch.hub)
    - Classification head replacement (num_classes)
    - Input channel adaptation with weight morphing (in_chans)
    - Dropout rate injection (drop_rate)

    Args:
        num_classes: Number of output classes for the classification head.
        in_channels: Number of input channels (1=grayscale, 3=RGB).
        arch_cfg: Architecture sub-config with name, pretrained, dropout.

    Returns:
        Adapted timm model (device placement handled by factory).

    Raises:
        ValueError: If the timm model identifier is not found in the registry.
    """
    model_id = arch_cfg.name.split("/", 1)[1]

    try:
        model = timm.create_model(
            model_id,
            pretrained=arch_cfg.pretrained,
            num_classes=num_classes,
            in_chans=in_channels,
            drop_rate=arch_cfg.dropout,
        )
    except Exception as e:  # timm raises diverse internal errors
        raise ValueError(
            f"Failed to create timm model '{model_id}'. "
            f"Verify the identifier is valid: https://huggingface.co/timm. "
            f"Original error: {e}"
        ) from e

    return model

build_vit_tiny(num_classes, in_channels, *, pretrained, weight_variant=None)

Constructs Vision Transformer Tiny adapted for image classification datasets.

Workflow
  1. Resolve pretrained weight variant from config (if enabled)
  2. Load model via timm with automatic head replacement
  3. Modify patch embedding layer for custom input channels
  4. Apply weight morphing for channel compression (if grayscale)

Parameters:

Name Type Description Default
num_classes int

Number of dataset classes for classification head

required
in_channels int

Input channels (1=Grayscale, 3=RGB)

required
pretrained bool

Whether to load pretrained weights

required
weight_variant str | None

Specific timm weight variant identifier

None

Returns:

Type Description
Module

Adapted ViT-Tiny model (device placement handled by factory).

Raises:

Type Description
ValueError

If weight variant is invalid or incompatible with pretrained flag

Source code in orchard/architectures/vit_tiny.py
def build_vit_tiny(
    num_classes: int,
    in_channels: int,
    *,
    pretrained: bool,
    weight_variant: str | None = None,
) -> nn.Module:
    """
    Constructs Vision Transformer Tiny adapted for image classification datasets.

    Workflow:
        1. Resolve pretrained weight variant from config (if enabled)
        2. Load model via timm with automatic head replacement
        3. Modify patch embedding layer for custom input channels
        4. Apply weight morphing for channel compression (if grayscale)

    Args:
        num_classes: Number of dataset classes for classification head
        in_channels: Input channels (1=Grayscale, 3=RGB)
        pretrained: Whether to load pretrained weights
        weight_variant: Specific timm weight variant identifier

    Returns:
        Adapted ViT-Tiny model (device placement handled by factory).

    Raises:
        ValueError: If weight variant is invalid or incompatible with pretrained flag
    """
    # --- Step 1: Resolve Weight Variant ---
    _weight_variant = weight_variant or "vit_tiny_patch16_224.augreg_in21k_ft_in1k"

    if pretrained:
        logger.info(f"{LogStyle.INDENT}{LogStyle.ARROW} {'Weights':<18}: {_weight_variant}")
        pretrained_flag = True
    else:
        logger.info(f"{LogStyle.INDENT}{LogStyle.ARROW} {'Weights':<18}: random init")
        pretrained_flag = False
        _weight_variant = "vit_tiny_patch16_224"  # Use base architecture

    # --- Step 2: Load Model via timm ---
    try:
        model = timm.create_model(
            _weight_variant,
            pretrained=pretrained_flag,
            num_classes=num_classes,
            in_chans=3,  # Initially load for 3 channels (will adapt below)
        )
    except (RuntimeError, ValueError) as e:
        logger.error("Failed to load ViT variant '%s': %s", _weight_variant, e)
        raise OrchardConfigError(f"Invalid ViT weight variant: {_weight_variant}") from e

    # --- Step 3: Adapt Patch Embedding Layer ---
    if in_channels != 3:
        logger.info(f"Adapting patch embedding from 3 to {in_channels} channels")

        # type-narrow patch_embed.proj to Conv2d for mypy
        # Note: timm VisionTransformer.patch_embed has dynamic type, ignore for type checking
        old_proj = cast(nn.Conv2d, model.patch_embed.proj)  # type: ignore[union-attr]

        # Extract attributes (cast to specific types for mypy)
        kernel_size = cast("tuple[int, int]", old_proj.kernel_size)
        stride = cast("tuple[int, int]", old_proj.stride)
        padding = cast("tuple[int, int] | int", old_proj.padding)

        # Create new projection layer
        new_proj = nn.Conv2d(
            in_channels=in_channels,
            out_channels=old_proj.out_channels,  # 192 for ViT-Tiny
            kernel_size=kernel_size,  # (16, 16)
            stride=stride,  # (16, 16)
            padding=padding,
            bias=old_proj.bias is not None,
        )

        # --- Step 4: Weight Morphing (Transfer Pretrained Knowledge) ---
        if pretrained:
            morph_conv_weights(old_proj, new_proj, in_channels)

        # Replace patch embedding projection
        model.patch_embed.proj = new_proj  # type: ignore[union-attr]

    return model