From a6774d816d44e677f47dba6b836b3456b1ea1620 Mon Sep 17 00:00:00 2001 From: Preston Hunter Date: Sat, 27 Dec 2025 14:37:33 -0500 Subject: [PATCH] Maybe Fixed GPU? --- stacks/ai/ai-backend.nomad | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/stacks/ai/ai-backend.nomad b/stacks/ai/ai-backend.nomad index 0c0a45d..fa4aec6 100644 --- a/stacks/ai/ai-backend.nomad +++ b/stacks/ai/ai-backend.nomad @@ -23,31 +23,34 @@ task "ollama" { OLLAMA_HOST = "0.0.0.0" OLLAMA_ORIGINS = "*" - # Unlock the 6900XT (Navi 21) for ROCm + # 1. The Magic Key for the 6900XT HSA_OVERRIDE_GFX_VERSION = "10.3.0" - # Debugging enabled so we can confirm it worked + # 2. Enable Debugging OLLAMA_DEBUG = "1" + + # 3. CRITICAL: Remove any ROCR_VISIBLE_DEVICES variable here! + # Let Ollama see all cards and pick the one that works. } config { image = "docker.io/ollama/ollama:latest" ports = ["api"] + + # Required to talk to hardware privileged = true - # --- THE FIX: STRICT MAPPING --- - # Only map the eGPU (renderD128) and the Compute interface (kfd) - devices = [ - "/dev/kfd", - "/dev/dri/renderD128" - ] + # --- THE FIX --- + # 1. Map /dev/kfd (Compute Interface) + devices = ["/dev/kfd"] - # Do NOT map the whole /dev/dri folder, or it might peek at the others + # 2. Map the ENTIRE graphics folder as a Volume + # This ensures the driver sees card0, card1, renderD128, etc. volumes = [ - "/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama" + "/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama", + "/dev/dri:/dev/dri" ] - } - service { + } service { name = "ollama" port = "api"