From b104dc46405633711bb35719052a0340b810a89e Mon Sep 17 00:00:00 2001 From: Preston Hunter Date: Sat, 27 Dec 2025 14:26:56 -0500 Subject: [PATCH] Fixed eGPU deploy? --- stacks/ai/ai-backend.nomad | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/stacks/ai/ai-backend.nomad b/stacks/ai/ai-backend.nomad index af5ebf0..0c0a45d 100644 --- a/stacks/ai/ai-backend.nomad +++ b/stacks/ai/ai-backend.nomad @@ -16,34 +16,37 @@ job "ai-backend" { port "api" { static = 11434 } } - task "ollama" { +task "ollama" { driver = "podman" - + env { OLLAMA_HOST = "0.0.0.0" OLLAMA_ORIGINS = "*" - # CRITICAL FOR 6900XT: - # This tells ROCm to treat the card like a supported Pro workstation card + # Unlock the 6900XT (Navi 21) for ROCm HSA_OVERRIDE_GFX_VERSION = "10.3.0" + + # Debugging enabled so we can confirm it worked + OLLAMA_DEBUG = "1" } config { - # Standard image (contains ROCm libraries) image = "docker.io/ollama/ollama:latest" ports = ["api"] - - # Required for hardware access privileged = true - # Pass the graphics hardware to the container + # --- THE FIX: STRICT MAPPING --- + # Only map the eGPU (renderD128) and the Compute interface (kfd) + devices = [ + "/dev/kfd", + "/dev/dri/renderD128" + ] + + # Do NOT map the whole /dev/dri folder, or it might peek at the others volumes = [ - "/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama", - "/dev/kfd:/dev/kfd", - "/dev/dri:/dev/dri" + "/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama" ] } - service { name = "ollama" port = "api"