adfadfs

2025-12-27 14:49:00 -05:00
parent 619124a30c
commit 762bfb29fa
1 changed files with 12 additions and 15 deletions
--- a/stacks/ai/ai-backend.nomad
+++ b/stacks/ai/ai-backend.nomad
@@ -13,7 +13,6 @@ job "ai-backend" {
    }

    network {
-      # Static port ensures it's always on 11434 on the P52
      port "api" { static = 11434 }
    }

@@ -24,10 +23,10 @@ job "ai-backend" {
        OLLAMA_HOST    = "0.0.0.0"
        OLLAMA_ORIGINS = "*"
        
-        # 1. Unlock the 6900XT (Navi 21) for ROCm
+        # 1. Force 6900XT Support
        HSA_OVERRIDE_GFX_VERSION = "10.3.0"
        
-        # 2. Enable Debugging (Check logs for "ROCm compute capability detected")
+        # 2. Debugging
        OLLAMA_DEBUG = "1"
      }

@@ -35,27 +34,25 @@ job "ai-backend" {
        image = "docker.io/ollama/ollama:latest"
        ports = ["api"]
        
-        # Required to talk to hardware
+        # Required to talk to hardware (This handles most security opts)
        privileged = true

-        # --- CRITICAL FIXES FOR AMD ROCM ---
-        # 1. Allow shared memory access (Required for eGPU communication)
-        ipc_mode = "host"
-        
-        # 2. Disable security labels that might block device access
-        security_opt = ["label=disable"]
-
-        # 3. Explicit Device Mapping (Hides Nvidia/Intel cards from Ollama)
-        # We map the Compute interface, the Physical Card (card1), and the Render Node (renderD128)
+        # --- Explicit Device Mapping ---
+        # Map the Compute interface and the Physical Card
        devices = [
            "/dev/kfd",
            "/dev/dri/card1",
            "/dev/dri/renderD128"
        ]

-        # 4. Storage Volume (Updated to your request)
+        # --- Volumes ---
        volumes = [
-          "/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama"
+          # 1. Your Custom Storage Path
+          "/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama",
+          
+          # 2. Shared Memory Workaround (Replaces ipc_mode = "host")
+          # This helps the AMD driver communicate efficiently
+          "/dev/shm:/dev/shm"
        ]
      }