commit 900600e9959f0a1e66a812e71ca2e1193bfc9584 Author: jimmychen Date: Sun Mar 29 00:05:18 2026 -0400 添加llama-cpp server端部署指令 diff --git a/llama-cpp.md b/llama-cpp.md new file mode 100644 index 0000000..5cbacb9 --- /dev/null +++ b/llama-cpp.md @@ -0,0 +1,5 @@ +./llama-server --model /home/jimmy/NVME/model/Qwen3.5-35B-A3B-UD-IQ4_XS.gguf --alias "Qwen3.5-35B-A3B" --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 --port 11434 --kv-unified --cache-type-k q8_0 --cache-type-v q8_0 --flash-attn on --fit on --ctx-size 262144 --jinja --no-mmap + +context_size 128K = 131072 + +No thinking: --chat-template-kwargs "{\"enable_thinking\": false}"