diff --git a/stacks/ai/ai-backend.nomad b/stacks/ai/ai-backend.nomad index fa4aec6..37a8375 100644 --- a/stacks/ai/ai-backend.nomad +++ b/stacks/ai/ai-backend.nomad @@ -16,21 +16,18 @@ job "ai-backend" { port "api" { static = 11434 } } -task "ollama" { + task "ollama" { driver = "podman" - + env { OLLAMA_HOST = "0.0.0.0" OLLAMA_ORIGINS = "*" - # 1. The Magic Key for the 6900XT + # 1. Unlock the 6900XT (Navi 21) for ROCm HSA_OVERRIDE_GFX_VERSION = "10.3.0" # 2. Enable Debugging OLLAMA_DEBUG = "1" - - # 3. CRITICAL: Remove any ROCR_VISIBLE_DEVICES variable here! - # Let Ollama see all cards and pick the one that works. } config { @@ -40,17 +37,17 @@ task "ollama" { # Required to talk to hardware privileged = true - # --- THE FIX --- - # 1. Map /dev/kfd (Compute Interface) - devices = ["/dev/kfd"] - - # 2. Map the ENTIRE graphics folder as a Volume - # This ensures the driver sees card0, card1, renderD128, etc. + # --- THE FIX: CLEAN VOLUMES ONLY --- + # We mount the compute interface and the entire graphics directory. + # This avoids the 'devices' syntax error entirely. volumes = [ "/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama", + "/dev/kfd:/dev/kfd", "/dev/dri:/dev/dri" ] - } service { + } + + service { name = "ollama" port = "api" @@ -64,7 +61,7 @@ task "ollama" { resources { cpu = 2000 - memory = 8192 # 8GB System RAM (The GPU has its own VRAM) + memory = 8192 # 8GB System RAM } } }