(no title)
predkambrij | 26 days ago
cat docker-compose.yml
services:
llamacpp:
volumes:
- llamacpp:/root
container_name: llamacpp
restart: unless-stopped
image: ghcr.io/ggml-org/llama.cpp:server-cuda
network_mode: host
command: |
-hf unsloth/Qwen3-Coder-Next-GGUF:Q4_K_XL --jinja --cpu-moe --n-gpu-layers 999 --ctx-size 102400 --temp 1.0 --top-p 0.95 --min-p 0.01 --top-k 40 --fit on
# unsloth/gpt-oss-120b-GGUF:Q2_K
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
volumes:
llamacpp:
No comments yet.