From 900600e9959f0a1e66a812e71ca2e1193bfc9584 Mon Sep 17 00:00:00 2001
From: jimmychen <jimmychen23333@gmail.com>
Date: Sun, 29 Mar 2026 00:05:18 -0400
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0llama-cpp=20server=E7=AB=AF?=
 =?UTF-8?q?=E9=83=A8=E7=BD=B2=E6=8C=87=E4=BB=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 llama-cpp.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 llama-cpp.md

diff --git a/llama-cpp.md b/llama-cpp.md
new file mode 100644
index 0000000..5cbacb9
--- /dev/null
+++ b/llama-cpp.md
@@ -0,0 +1,5 @@
+./llama-server --model /home/jimmy/NVME/model/Qwen3.5-35B-A3B-UD-IQ4_XS.gguf --alias "Qwen3.5-35B-A3B" --temp 0.6 --top-p 0.95 --top-k 20 --min-p 0.00 --port 11434 --kv-unified --cache-type-k q8_0 --cache-type-v q8_0 --flash-attn on --fit on --ctx-size 262144 --jinja --no-mmap
+
+context_size 128K = 131072 
+
+No thinking: --chat-template-kwargs "{\"enable_thinking\": false}"