80 lines
1.9 KiB
HCL
80 lines
1.9 KiB
HCL
job "ai-backend" {
|
|
datacenters = ["Homelab-PTECH-DC"]
|
|
region = "home"
|
|
type = "service"
|
|
|
|
group "ollama-group" {
|
|
count = 1
|
|
|
|
# Pin to P52 Laptop (eGPU Host)
|
|
constraint {
|
|
attribute = "${meta.device}"
|
|
value = "p52-laptop"
|
|
}
|
|
|
|
network {
|
|
# Static port ensures it's always on 11434 on the P52
|
|
port "api" { static = 11434 }
|
|
}
|
|
|
|
task "ollama" {
|
|
driver = "podman"
|
|
|
|
env {
|
|
OLLAMA_HOST = "0.0.0.0"
|
|
OLLAMA_ORIGINS = "*"
|
|
|
|
# 1. Unlock the 6900XT (Navi 21) for ROCm
|
|
HSA_OVERRIDE_GFX_VERSION = "10.3.0"
|
|
|
|
# 2. Enable Debugging (Check logs for "ROCm compute capability detected")
|
|
OLLAMA_DEBUG = "1"
|
|
}
|
|
|
|
config {
|
|
image = "docker.io/ollama/ollama:latest"
|
|
ports = ["api"]
|
|
|
|
# Required to talk to hardware
|
|
privileged = true
|
|
|
|
# --- CRITICAL FIXES FOR AMD ROCM ---
|
|
# 1. Allow shared memory access (Required for eGPU communication)
|
|
ipc_mode = "host"
|
|
|
|
# 2. Disable security labels that might block device access
|
|
security_opt = ["label=disable"]
|
|
|
|
# 3. Explicit Device Mapping (Hides Nvidia/Intel cards from Ollama)
|
|
# We map the Compute interface, the Physical Card (card1), and the Render Node (renderD128)
|
|
devices = [
|
|
"/dev/kfd",
|
|
"/dev/dri/card1",
|
|
"/dev/dri/renderD128"
|
|
]
|
|
|
|
# 4. Storage Volume (Updated to your request)
|
|
volumes = [
|
|
"/mnt/local-ssd/nomad/stacks/ai/ai-backend/ollama:/root/.ollama"
|
|
]
|
|
}
|
|
|
|
service {
|
|
name = "ollama"
|
|
port = "api"
|
|
|
|
check {
|
|
type = "http"
|
|
path = "/"
|
|
interval = "20s"
|
|
timeout = "2s"
|
|
}
|
|
}
|
|
|
|
resources {
|
|
cpu = 2000
|
|
memory = 8192 # 8GB System RAM
|
|
}
|
|
}
|
|
}
|
|
} |