nomad/stacks/ai/ai-backend.nomad

job "ai-backend" {
  datacenters = ["Homelab-PTECH-DC"]
  region      = "home"
  type        = "service"

  group "ollama-group" {
    count = 1

    # Pin to P52 Laptop (eGPU Host)
    constraint {
      attribute = "${meta.device}"
      value     = "p52-laptop"
    }

    network {
      # Static port ensures it's always on 11434 on the P52
      port "api" { static = 11434 }
    }

    task "ollama" {
      driver = "podman"

      env {
        OLLAMA_HOST    = "0.0.0.0"
        OLLAMA_ORIGINS = "*"

        # 1. Unlock the 6900XT (Navi 21) for ROCm
        HSA_OVERRIDE_GFX_VERSION = "10.3.0"

        # 2. Enable Debugging (Check logs for "ROCm compute capability detected")
        OLLAMA_DEBUG = "1"
      }

      config {
        image = "docker.io/ollama/ollama:latest"
        ports = ["api"]

        # Required to talk to hardware
        privileged = true

        # --- CRITICAL FIXES FOR AMD ROCM ---
        # 1. Allow shared memory access (Required for eGPU communication)
        ipc_mode = "host"

        # 2. Disable security labels that might block device access
        security_opt = ["label=disable"]

        # 3. Explicit Device Mapping (Hides Nvidia/Intel cards from Ollama)
        # We map the Compute interface, the Physical Card (card1), and the Render Node (renderD128)
        devices = [
            "/dev/kfd",
            "/dev/dri/card1",
            "/dev/dri/renderD128"
        ]

        # 4. Storage Volume (Updated to your request)
        volumes = [
          "/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama"
        ]
      }

      service {
        name = "ollama"
        port = "api"

        check {
          type     = "http"
          path     = "/"
          interval = "20s"
          timeout  = "2s"
        }
      }

      resources {
        cpu    = 2000
        memory = 8192 # 8GB System RAM
      }
    }
  }
}