diff --git a/stacks/ai/ai-backend.nomad b/stacks/ai/ai-backend.nomad index dd5e99a..678a8d6 100644 --- a/stacks/ai/ai-backend.nomad +++ b/stacks/ai/ai-backend.nomad @@ -6,7 +6,6 @@ job "ai-backend" { group "ollama-group" { count = 1 - # Pin to P52 Laptop (eGPU Host) constraint { attribute = "${meta.device}" value = "p52-laptop" @@ -22,44 +21,34 @@ job "ai-backend" { env { OLLAMA_HOST = "0.0.0.0" OLLAMA_ORIGINS = "*" + OLLAMA_DEBUG = "1" - # 1. Force 6900XT Support + # --- THE FIX: USE VULKAN INSTEAD OF ROCm --- + OLLAMA_VULKAN = "1" + + # We keep this just in case Vulkan falls back to ROCm, + # but Vulkan should take priority. HSA_OVERRIDE_GFX_VERSION = "10.3.0" - - # 2. Debugging - OLLAMA_DEBUG = "1" } config { image = "docker.io/ollama/ollama:latest" ports = ["api"] - - # Required to talk to hardware (This handles most security opts) privileged = true - # --- Explicit Device Mapping --- - # Map the Compute interface and the Physical Card - devices = [ - "/dev/kfd", - "/dev/dri/card1", - "/dev/dri/renderD128" - ] - - # --- Volumes --- + # --- MOUNT EVERYTHING --- + # Since Vulkan scans all GPUs to pick the best one, + # we give it access to the whole folder. volumes = [ - # 1. Your Custom Storage Path "/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama", - - # 2. Shared Memory Workaround (Replaces ipc_mode = "host") - # This helps the AMD driver communicate efficiently - "/dev/shm:/dev/shm" + "/dev/kfd:/dev/kfd", + "/dev/dri:/dev/dri" ] } service { name = "ollama" port = "api" - check { type = "http" path = "/" @@ -70,7 +59,7 @@ job "ai-backend" { resources { cpu = 2000 - memory = 8192 # 8GB System RAM + memory = 8192 } } }