job "ai-backend" { datacenters = ["Homelab-PTECH-DC"] region = "home" type = "service" group "ollama-group" { count = 1 constraint { attribute = "${meta.device}" value = "p52-laptop" } network { port "api" { static = 11434 } } task "ollama" { driver = "podman" env { OLLAMA_HOST = "0.0.0.0:11434" OLLAMA_ORIGINS = "*" OLLAMA_DEBUG = "1" # --- THE FIX: USE VULKAN INSTEAD OF ROCm --- OLLAMA_VULKAN = "1" # We keep this just in case Vulkan falls back to ROCm, # but Vulkan should take priority. HSA_OVERRIDE_GFX_VERSION = "10.3.0" } config { image = "docker.io/ollama/ollama:latest" ports = ["api"] privileged = true # --- MOUNT EVERYTHING --- # Since Vulkan scans all GPUs to pick the best one, # we give it access to the whole folder. volumes = [ "/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama", "/dev/kfd:/dev/kfd", "/dev/dri:/dev/dri" ] } service { name = "ollama" port = "api" check { type = "http" path = "/" interval = "20s" timeout = "2s" } } resources { cpu = 2000 memory = 8192 } } } }