From 14cad19e58faff7dee1bc7aa30c9323e50ccf230 Mon Sep 17 00:00:00 2001
From: JimmysAIPG <xiaojimmychen@gmail.com>
Date: Thu, 26 Mar 2026 22:07:32 +0800
Subject: [PATCH] add zimage local and qwen silliconflow

---
 .gitignore              |   2 +
 README.md               | 146 +++++++
 config.yaml             |  74 ++++
 config_qwen.yaml        |  55 +++
 poetry_to_image.py      | 854 ++++++++++++++++++++++++++++++++++++++++
 poetry_to_image_qwen.py | 529 +++++++++++++++++++++++++
 requirements.txt        |  16 +
 7 files changed, 1676 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 config.yaml
 create mode 100644 config_qwen.yaml
 create mode 100644 poetry_to_image.py
 create mode 100644 poetry_to_image_qwen.py
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..333ae70
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+**/output/*
+**/output_qwen/*
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c4d8ed0
--- /dev/null
+++ b/README.md
@@ -0,0 +1,146 @@
+# 古诗词意境图生成器
+
+基于 LLM 古诗词分析 + Z-Image-Turbo 本地文生图的自动化工具。
+
+输入一首古诗词（唐诗、宋词、元曲等均可），LLM 会以「信、雅、达」的标准分析意境，自动选择最合适的中国传统画风，并拆解为多个画面。然后调用本地 Z-Image-Turbo 模型逐一生成高质量图片。
+
+## 特性
+
+- 支持唐诗、宋词、元曲等所有中国古典诗词体裁
+- LLM 自动识别体裁并匹配最佳画风（水墨写意、青绿山水、工笔花鸟等 9 种风格）
+- 可选加载 LoRA 增强特定画风（如水墨风 LoRA）
+- Z-Image-Turbo 本地推理，16GB 显存即可运行
+
+## 环境要求
+
+- Python >= 3.10
+- GPU（至少 16GB 显存），支持以下任一：
+  - NVIDIA CUDA GPU
+  - Intel Arc GPU（A770 等，通过 XPU 支持）
+  - Apple Silicon（MPS）
+- 兼容 OpenAI API 的 LLM 端点
+
+## 安装
+
+```bash
+pip install -r requirements.txt
+```
+
+> diffusers 需要从源码安装（requirements.txt 已配置）。
+
+### Intel Arc GPU 额外步骤
+
+```bash
+# 安装 Intel Extension for PyTorch
+pip install intel-extension-for-pytorch
+```
+
+将 `config.yaml` 中的 `device` 设为 `"auto"` 或 `"xpu"` 即可自动适配。脚本会自动将数据类型切换为 `float16`（Intel Arc 对 bfloat16 兼容性不佳）。
+
+## 配置
+
+编辑 `config.yaml`：
+
+| 配置项 | 说明 |
+|--------|------|
+| `llm.base_url` | LLM API 端点地址（兼容 OpenAI 格式） |
+| `llm.api_key` | API 密钥（也可通过环境变量 `LLM_API_KEY` 设置） |
+| `llm.model` | 模型名称 |
+| `image.model_id` | Z-Image-Turbo 的 HuggingFace ID 或本地 HF 格式目录 |
+| `image.comfyui.*` | ComfyUI 拆分文件路径（见下方说明） |
+| `image.device` | 推理设备：`auto` / `cuda` / `xpu` / `mps` / `cpu` |
+| `lora.enabled` | 是否启用 LoRA |
+| `lora.path` | LoRA 文件路径（.safetensors） |
+| `output.dir` | 图片输出目录 |
+
+### 模型加载方式
+
+支持两种模型来源，在 `config.yaml` 中二选一：
+
+**方式一：HuggingFace 格式**（默认）
+
+```yaml
+image:
+  model_id: "Tongyi-MAI/Z-Image-Turbo"   # 或本地 HF 格式目录
+```
+
+**方式二：ComfyUI 拆分文件**
+
+如果你已通过 ComfyUI 下载了模型，直接填写三个文件的路径即可：
+
+```yaml
+image:
+  comfyui:
+    text_encoder: "/path/to/ComfyUI/models/text_encoders/qwen_3_4b.safetensors"
+    transformer: "/path/to/ComfyUI/models/diffusion_models/z_image_turbo_bf16.safetensors"
+    vae: "/path/to/ComfyUI/models/vae/ae.safetensors"
+```
+
+> 首次运行 ComfyUI 模式时，脚本会自动从 HuggingFace 下载微型配置文件（< 100KB），之后自动缓存。
+
+### LoRA 推荐
+
+| LoRA | 风格 | 触发词 | 权重 | 来源 |
+|------|------|--------|------|------|
+| zyd232's Ink Style v1.2 | 水墨写意 | `ink style` | 0.6-1.2 | Civitai |
+| Painterly - CE | 水彩/油画 | - | 0.8 | Civitai |
+
+## 使用方法
+
+### 交互式输入
+
+```bash
+python poetry_to_image.py
+```
+
+运行后按提示输入古诗词，输入空行结束。
+
+### 命令行直接传入
+
+```bash
+# 唐诗
+python poetry_to_image.py -p "床前明月光，疑是地上霜。举头望明月，低头思故乡。"
+
+# 宋词
+python poetry_to_image.py -p "明月几时有？把酒问青天。不知天上宫阙，今夕是何年。"
+```
+
+### 仅分析不生成图片
+
+```bash
+python poetry_to_image.py -p "大漠孤烟直，长河落日圆。" --analyze-only
+```
+
+### 指定配置文件和输出目录
+
+```bash
+python poetry_to_image.py -c my_config.yaml -o ./my_output -p "春江潮水连海平，海上明月共潮生。"
+```
+
+## 输出
+
+- `output/poem_01.png` ... `poem_N.png` — 生成的图片
+- `output/poem_01_prompt.txt` ... — 每张图片的 prompt 与画风记录
+- `output/analysis.json` — LLM 完整分析结果（含体裁识别、风格选择）
+
+## 工作流程
+
+```
+古诗词输入 → LLM 体裁识别 → 意境分析 & 画风匹配 → 生成 prompt → [可选 LoRA 增强] → Z-Image-Turbo 逐一生图 → 输出
+```
+
+## 支持的画风
+
+LLM 会根据诗意自动选择：
+
+| 画风 | 适用场景 |
+|------|---------|
+| 水墨写意 | 山水、边塞、禅意 |
+| 青绿山水 | 春夏山水、壮丽河山 |
+| 工笔花鸟 | 花卉、仕女、精致细腻 |
+| 工笔重彩 | 华丽、宫廷、历史叙事 |
+| 没骨画法 | 花卉、蔬果、清新淡雅 |
+| 文人画 | 隐逸、高洁、书卷气 |
+| 泼墨大写意 | 豪放、苍茫、气势磅礴 |
+| 界画 | 楼阁、宫殿、城市 |
+| 浅绛山水 | 秋冬山水、怀古 |
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000..0f1abcc
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,74 @@
+# ========== LLM 配置（用于古诗词分析） ==========
+llm:
+  base_url: "https://api.siliconflow.cn/v1"    # 兼容 OpenAI API 的端点地址
+  api_key: "sk-rooopitditvwbgdjxnkywgvdhsepfucbxcwoagickbnrxqyo"              # API 密钥，也可通过环境变量 LLM_API_KEY 设置
+  model: "Qwen/Qwen3.5-397B-A17B"                           # 部署的模型名称
+  temperature: 0.9
+  max_tokens: 8192
+
+# ========== 图片生成配置 ==========
+image:
+  # --- 加载模式（三选一，优先级: openvino > comfyui > model_id） ---
+  # 模式一（OpenVINO）：填写 openvino.model_path，使用 OpenVINO IR 模型推理
+  # 模式二（ComfyUI）：  填写 comfyui 的三个 safetensors 路径
+  # 模式三（HuggingFace）：填写 model_id
+  model_id: "Tongyi-MAI/Z-Image-Turbo"     # HuggingFace 模型 ID 或本地 HF 格式目录
+
+  # --- OpenVINO 推理模式 ---
+  # 需要先通过 optimum-cli 导出模型：
+  #   optimum-cli export openvino --model Tongyi-MAI/Z-Image-Turbo --weight-format int8 z-image-turbo-ov
+  openvino:
+    model_path: "D:\\models\\ov"                          # OpenVINO IR 模型目录路径（填写则启用 OpenVINO 模式）
+    device: "GPU"                           # OpenVINO 设备: GPU | CPU
+
+  # --- ComfyUI 拆分文件模式（三个路径都填则启用） ---
+  comfyui:
+    text_encoder: ""                        # safetensors 格式 text encoder 路径
+    transformer: ""                         # safetensors 格式 transformer 路径
+    vae: ""                                 # safetensors 格式 VAE 路径
+  torch_dtype: "float16"                       # auto | bfloat16 | float16 | float32
+                                            #   auto: CUDA/MPS→bfloat16, XPU→float16, CPU→float32
+  device: "auto"                            # auto | cuda | xpu | mps | cpu
+                                            #   auto: 自动检测可用设备（cuda > xpu > mps > cpu）
+  size_preset: "phone_hd"                     # 尺寸预设（优先于 height/width），可选值：
+                                            #   square       — 1024×1024  正方形（默认）
+                                            #   phone        — 576×1024   手机壁纸 9:16
+                                            #   phone_hd     — 768×1344   手机壁纸 9:16 高清
+                                            #   desktop      — 1024×576   电脑壁纸 16:9
+                                            #   desktop_hd   — 1344×768   电脑壁纸 16:9 高清
+                                            #   ultrawide    — 1536×640   带鱼屏壁纸 21:9
+                                            #   custom       — 使用下方 height/width 自定义尺寸
+  height: 1024                              # 仅 size_preset: custom 时生效
+  width: 1024                               # 仅 size_preset: custom 时生效
+  num_inference_steps: 9                    # Z-Image-Turbo 推荐 9（实际 8 步 DiT）
+  guidance_scale: 0.0                       # Turbo 模型应设为 0（不支持 negative prompt）
+  seed: -1                                  # -1 表示随机种子
+  images_per_prompt: 2                      # 每个 prompt 生成几张图（不同种子），1-10
+  enable_cpu_offload: "model"               # false: 全部常驻显卡（需≈24GB+）
+                                            # model: 组件级卸载（峰值≈4-6GB）
+                                            # sequential: 逐层卸载（最省显存但较慢）
+                                            # true: 等同于 model
+                                            # OpenVINO 模式下此选项无效
+  attention_backend: "sdpa"                 # sdpa | flash | flash_3（XPU 仅支持 sdpa）
+  prompt_language: "zh"                     # zh | en — 发送给 Z-Image-Turbo 的 prompt 语言
+                                            #   zh: 使用中文 prompt（Qwen3 中文编码器原生支持）
+                                            #   en: 使用英文 prompt
+  style_preference: ""                      # 风格期望（可选，留空则由 LLM 根据诗意自动选择）
+                                            #   可选值示例：水墨写意 / 青绿山水 / 工笔花鸟 / 工笔重彩
+                                            #             文人画 / 泼墨大写意 / 浅绛山水
+                                            #             具有电影光影质感的新国风写实
+
+# ========== LoRA 配置（可选） ==========
+# 加载 LoRA 可显著提升特定画风质量，如水墨风
+# 推荐：zyd232's Ink Style (Civitai) — 触发词: 水墨风 / ink style / zydink
+lora:
+  enabled: false
+  path: ""                                  # LoRA 文件路径（.safetensors）
+  weight: 0.8                               # LoRA 权重（推荐 0.6-1.2）
+  trigger_words: ""                         # 触发词，会自动追加到 prompt 开头
+
+# ========== 输出配置 ==========
+output:
+  dir: "./output"                           # 图片输出目录
+  filename_prefix: "poem"                   # 文件名前缀
+  save_prompts: true                        # 是否保存 prompt 到 txt 文件
diff --git a/config_qwen.yaml b/config_qwen.yaml
new file mode 100644
index 0000000..8256a8a
--- /dev/null
+++ b/config_qwen.yaml
@@ -0,0 +1,55 @@
+# ========== LLM 配置（用于古诗词分析） ==========
+llm:
+  base_url: "https://api.siliconflow.cn/v1"    # 兼容 OpenAI API 的端点地址
+  api_key: "sk-rooopitditvwbgdjxnkywgvdhsepfucbxcwoagickbnrxqyo"              # API 密钥，也可通过环境变量 LLM_API_KEY 设置
+  model: "deepseek-ai/DeepSeek-V3.2"                           # 部署的模型名称
+  temperature: 0.9
+  max_tokens: 8192
+
+# ========== Qwen-Image API 图片生成配置 ==========
+image:
+  base_url: "https://api.siliconflow.cn/v1"    # SiliconFlow API 端点
+  api_key: "sk-rooopitditvwbgdjxnkywgvdhsepfucbxcwoagickbnrxqyo"                                   # 图片生成 API 密钥（留空则复用 LLM 的 api_key）
+                                                # 也可通过环境变量 IMAGE_API_KEY 设置
+  model: "Qwen/Qwen-Image"                     # SiliconFlow 上的模型名称
+
+  # --- 图片尺寸 ---
+  # Qwen-Image 推荐分辨率（与 Z-Image-Turbo 不同，请使用以下预设）：
+  size_preset: "square"                       # 尺寸预设，可选值：
+                                                #   square      — 1328×1328  正方形 1:1
+                                                #   phone       — 928×1664   手机壁纸 9:16
+                                                #   phone_hd    — 1056×1584  手机壁纸 2:3（高清）
+                                                #   desktop     — 1664×928   电脑壁纸 16:9
+                                                #   desktop_hd  — 1584×1056  电脑壁纸 3:2（高清）
+                                                #   landscape   — 1472×1140  横版 4:3
+                                                #   portrait    — 1140×1472  竖版 3:4
+                                                #   custom      — 使用下方 height/width 自定义尺寸
+  height: 1328                                  # 仅 size_preset: custom 时生效
+  width: 1328                                   # 仅 size_preset: custom 时生效
+
+  # --- 生成参数 ---
+  num_inference_steps: 20                       # 推理步数（1-100，默认 20，步数越多质量越高但越慢）
+  guidance_scale: 7.5                           # 引导系数（0-20，默认 7.5，越高越贴近 prompt）
+  # cfg:                                        # CFG 值（0.1-20，仅在需要图片中渲染文字时启用）
+                                                #   官方推荐：50 步 + CFG 4.0 用于文字渲染场景
+                                                #   CFG 设置过小时几乎无法生成文字
+  seed: -1                                      # -1 表示随机种子，设定固定值可复现结果
+  images_per_prompt: 2                          # 每个 prompt 生成几张图（不同种子），1-4
+
+  # --- 提示词配置 ---
+  negative_prompt: ""                           # 全局负向提示词（可选），会与 LLM 为每幅画面生成的
+                                                # 专属 negative_prompt 合并（LLM 生成的在前，全局的在后）
+                                                # 示例："低分辨率, 低质量, 肢体变形, 手指畸形, 过度饱和"
+  prompt_language: "zh"                         # zh | en — 发送给 Qwen-Image 的 prompt 语言
+                                                #   zh: 使用中文 prompt（Qwen-Image 对中文支持优秀）
+                                                #   en: 使用英文 prompt
+
+  # --- 网络配置 ---
+  max_retries: 3                                # API 调用失败时的最大重试次数
+  request_timeout: 180                          # 单次 API 请求超时时间（秒）
+
+# ========== 输出配置 ==========
+output:
+  dir: "./output_qwen"                          # 图片输出目录
+  filename_prefix: "poem"                       # 文件名前缀
+  save_prompts: true                            # 是否保存 prompt 到 txt 文件
diff --git a/poetry_to_image.py b/poetry_to_image.py
new file mode 100644
index 0000000..171a13c
--- /dev/null
+++ b/poetry_to_image.py
@@ -0,0 +1,854 @@
+"""
+古诗词意境图生成器
+将中国古典诗词通过 LLM 分析拆解为多个意境画面，
+再使用 Z-Image-Turbo 本地模型逐一生成高质量图片。
+"""
+
+import argparse
+import json
+import os
+import re
+import shutil
+import sys
+import tempfile
+import time
+from datetime import datetime
+from pathlib import Path
+
+import torch
+import yaml
+from openai import OpenAI
+from PIL import Image
+
+# ---------------------------------------------------------------------------
+# 设备检测与适配
+# ---------------------------------------------------------------------------
+
+def _init_xpu():
+    """尝试初始化 Intel XPU 支持（需要 intel-extension-for-pytorch）。"""
+    try:
+        import intel_extension_for_pytorch as ipex  # noqa: F401
+        return True
+    except ImportError:
+        return False
+
+
+def resolve_device(configured_device: str) -> str:
+    """根据配置和硬件可用性，决定实际使用的推理设备。
+
+    优先级: 用户配置 > auto 自动检测
+    auto 检测顺序: cuda > xpu > mps > cpu
+    """
+    if configured_device == "auto":
+        if torch.cuda.is_available():
+            return "cuda"
+        if hasattr(torch, "xpu") and torch.xpu.is_available():
+            _init_xpu()
+            return "xpu"
+        if hasattr(torch, "backends") and hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            return "mps"
+        return "cpu"
+
+    if configured_device == "xpu":
+        if not (hasattr(torch, "xpu") and torch.xpu.is_available()):
+            print("警告: 配置了 xpu 设备但未检测到 Intel XPU，尝试初始化 IPEX...")
+            if not _init_xpu():
+                print("错误: 无法加载 intel-extension-for-pytorch，请确认已安装。")
+                print("安装命令: pip install intel-extension-for-pytorch")
+                sys.exit(1)
+            if not torch.xpu.is_available():
+                print("错误: IPEX 已加载但仍未检测到 XPU 设备。")
+                sys.exit(1)
+        else:
+            _init_xpu()
+
+    return configured_device
+
+
+def get_supported_dtype(device: str, configured_dtype: str) -> torch.dtype:
+    """根据设备返回合适的数据类型。
+
+    Intel Arc GPU 对 bfloat16 部分算子兼容性不佳，推荐使用 float16。
+    """
+    dtype_map = {
+        "bfloat16": torch.bfloat16,
+        "float16": torch.float16,
+        "float32": torch.float32,
+    }
+
+    if configured_dtype == "auto":
+        if device == "xpu":
+            return torch.float16
+        if device in ("cuda", "mps"):
+            return torch.bfloat16
+        return torch.float32
+
+    dtype = dtype_map.get(configured_dtype, torch.bfloat16)
+
+    if device == "xpu" and dtype == torch.bfloat16:
+        print("提示: Intel Arc GPU 上 bfloat16 部分算子兼容性不佳，自动切换为 float16")
+        return torch.float16
+
+    return dtype
+
+
+def create_generator(device: str, seed: int) -> torch.Generator:
+    """为指定设备创建随机数生成器。"""
+    if device == "xpu":
+        return torch.Generator("xpu").manual_seed(seed)
+    if device == "cuda":
+        return torch.Generator("cuda").manual_seed(seed)
+    return torch.Generator().manual_seed(seed)
+
+
+# ---------------------------------------------------------------------------
+# 配置加载
+# ---------------------------------------------------------------------------
+
+def load_config(config_path: str = "config.yaml") -> dict:
+    with open(config_path, "r", encoding="utf-8") as f:
+        cfg = yaml.safe_load(f)
+
+    api_key = os.environ.get("LLM_API_KEY") or cfg["llm"].get("api_key", "")
+    cfg["llm"]["api_key"] = api_key
+    return cfg
+
+
+# ---------------------------------------------------------------------------
+# LLM 古诗词分析
+# ---------------------------------------------------------------------------
+
+SYSTEM_PROMPT = """\
+# Role（角色设定）
+
+你是一位顶级的中国古典文学泰斗，同时也是一位精通 AI 文本到图像生成（Text-to-Image）\
+底层逻辑的顶级提示词工程师（Prompt Engineer）。\
+你对中国古诗词中的"意境"、"留白"、"虚实相生"有极其深刻的理解，\
+并且知道如何将这些抽象的美学概念转化为扩散模型（Diffusion Models）能够精准识别的\
+视觉特征参数（如光影、材质、构图、渲染引擎词汇）。
+
+# Objective（工作目标）
+
+你的任务是接收用户输入的古诗词，严格按照"四段式思维链"将其转化为最高质量的图像生成提示词。\
+你需要具备探索长诗或多句诗词连贯多图意象的能力，\
+确保最终生成的单张或多张分镜图像能够完美传达原诗的意境，而不只是生硬的元素堆砌。
+
+# Workflow（强制执行四段式思维链）
+
+对于用户的每一次输入，你必须严格按顺序在内部执行以下四个步骤，缺一不可：
+
+## 第一步：意境与分镜逻辑判断
+
+重要分析全诗的时空连贯性：
+- 如果全诗描绘的是同一时间、同一地点的统一场景，生成【单幅画面】
+- 如果诗句间存在明显的视角切换（如远景切特写）、时间推移（如白天到黑夜）或场景跳跃，\
+按内在逻辑拆分为 2 到 4 幅画面的【分镜序列】
+- 意境连贯的相邻诗句应合并为一幅，避免碎片化
+
+## 第二步：意境深度解析
+
+针对每一个分镜（或单幅画面），分析：
+- 核心情感基调（苍凉悲壮 / 空灵婉约 / 萧瑟肃杀 / 雄浑壮阔 / 闲适恬淡 / 凄婉哀怨等）
+- 季节时间与天气状态
+- "意境"类型与情感张力
+
+## 第三步：现代文视觉转义
+
+将每一个分镜扩写为极具画面感的现代文视觉脚本。\
+你必须大胆发挥想象力，补全诗句中省略的视觉细节，明确写出：
+- **主体景物**：人物姿态、动作、表情、服饰；核心景物的具体形态
+- **配景与地理环境**：山川、水域、植被、建筑等空间层次
+- **光线条件**：斜阳逆光、清冷月光、破晓微光、黄昏余晖等
+- **天气效果**：晨雾弥漫、细雨如织、大雪纷飞、长风浩荡等
+- **画面构图**：大远景 / 中景 / 特写 / 俯瞰 / 平视等
+
+## 第四步：图像生成 Prompt 生成
+
+基于第三步的现代文视觉脚本，为每一个分镜生成精确的图像 Prompt。
+
+### Prompt 结构（必须遵循）
+
+每个 Prompt 必须涵盖以下六大要素，按顺序自然融合为一段连贯流畅的描述文字：
+1. 画面主体：核心人物 / 景物及其状态
+2. 环境背景：空间层次、地理环境、建筑植被
+3. 场景光影：具体光源、光线方向、明暗对比
+4. 气候与氛围：天气、季节、情感色彩
+5. 艺术风格与媒介：中国传统画风关键词 + 媒介质感
+6. 图像质量词：masterpiece, 8k resolution, highly detailed 等
+
+【极其重要】最终输出的 prompt 和 prompt_en 必须是自然流畅的连续段落，\
+绝对不要使用方括号 [] 标注要素名称，不要出现类似"[画面主体：...]"的格式标签。\
+六大要素是你内部的组织逻辑，输出时必须将它们无缝融合为一段完整的、富有画面感的描述。
+
+### Prompt 长度要求
+
+Z-Image-Turbo 非常适合处理包含丰富细节的长描述提示词：
+- 中文 Prompt：80-250 字
+- 英文 Prompt：80-200 词
+
+### 风格约束（极其重要）
+
+Z-Image-Turbo 不支持负面提示词（Negative Prompts），所有约束必须以正向描述表达。\
+为确保生成"古诗词意境"而非现代写实照片，你必须在 Prompt 末尾加上强有力的风格约束词。\
+以下是可根据诗意灵活选用的风格约束：
+
+| 风格 | Prompt 约束词 |
+|------|-------------|
+| 水墨写意 | Traditional Chinese ink wash painting (中国传统水墨画), freehand brushwork (写意), \
+negative space (留白), ethereal atmosphere (空灵的氛围) |
+| 青绿山水 | Traditional Chinese blue-green landscape painting (青绿山水), mineral pigments (石青石绿), \
+golden and jade-like tones (金碧辉煌) |
+| 工笔花鸟 | Chinese meticulous brushwork (工笔), fine detailed rendering (精细渲染), \
+delicate line drawing (细腻勾勒) |
+| 工笔重彩 | Chinese meticulous heavy-color painting (工笔重彩), rich saturated pigments (浓墨重色), \
+elaborate detail (华丽精细) |
+| 文人画 | Chinese literati painting (文人画), poetry-calligraphy-painting unity (诗书画印一体), \
+lofty elegance (意趣高远) |
+| 泼墨大写意 | Splash ink painting (泼墨大写意), bold expressive brushstrokes (墨色淋漓), \
+majestic momentum (气势磅礴) |
+| 浅绛山水 | Light crimson landscape painting (浅绛山水), ochre wash (赭石淡彩), \
+sparse and distant (萧疏清远) |
+
+通用质量约束词（所有风格都应附加）：\
+masterpiece, 8k resolution, highly detailed, cinematic composition
+
+如果用户指定了风格期望，请优先使用用户指定的风格。\
+如果用户未指定风格，请根据诗意自动选择最契合的传统画风。
+
+### 中文 Prompt 要求
+- 使用中国传统绘画的专业术语
+- 具体且富有画面感，避免抽象空泛的概念
+- 末尾必须附加风格约束词和质量约束词
+
+### 英文 Prompt 要求
+- 中文 Prompt 的忠实翻译与适配，保持相同的画面内容和风格意图
+- 使用对应的英文艺术术语
+- 自然流畅的英文表达，非逐字翻译
+- 末尾必须附加英文风格约束词和质量约束词
+
+# Rules（输出规则）
+
+严格按照以下 JSON 格式输出结果，不要输出任何与格式无关的文字。\
+四段式思维链的推理过程请融入到对应的 JSON 字段中：
+
+```json
+{
+  "title": "诗词标题",
+  "author": "作者",
+  "dynasty": "朝代",
+  "genre": "体裁（如：五言绝句、七言律诗、词·水调歌头等）",
+  "analysis": "第一步【分镜逻辑判断】的理由 + 第二步【意境深度解析】的综合分析：包含分镜拆分依据、整首诗的意境类型、核心情感基调、时空特征（中文，3-5句话）",
+  "images": [
+    {
+      "scene": "这幅画对应的诗句（原文）",
+      "description": "第三步【现代文视觉转义】的完整输出：极具画面感的视觉脚本，包含主体景物、配景、光线、天气、构图等所有视觉细节（中文，100-200字）",
+      "style": "选用的画风（中文名称，如：水墨写意、青绿山水、工笔花鸟等）",
+      "prompt": "第四步生成的中文 Prompt，自然融合六大要素为连续流畅的段落（禁止使用方括号标注），末尾附加风格约束词和质量词，80-250字",
+      "prompt_en": "Step 4 English Prompt, naturally blending all six elements into a fluent paragraph (NO square brackets), ending with style and quality keywords, 80-200 words"
+    }
+  ]
+}
+```\
+"""
+
+
+def _build_user_message(poem: str, cfg: dict) -> str:
+    """构造发送给 LLM 的用户消息，包含诗词和可选的风格期望。"""
+    style_pref = cfg["image"].get("style_preference", "").strip()
+    if style_pref:
+        style_line = f"【风格期望】：{style_pref}"
+    else:
+        style_line = "【风格期望】：默认（根据诗意自动选择最契合的传统画风）"
+
+    return (
+        f"请为以下古诗词生成图像提示词：\n\n"
+        f"【输入诗词】：\n{poem}\n\n"
+        f"{style_line}\n\n"
+        f"请严格按照 System Prompt 的要求，首先进行【意境与分镜逻辑判断】，"
+        f"随后针对单幅或多幅分镜依次输出对应的【意境深度解析】、"
+        f"【现代文视觉转义】以及最终的【图像生成 Prompt】。"
+    )
+
+
+def analyze_poetry(poem: str, cfg: dict) -> dict:
+    """调用 LLM 分析古诗词，返回结构化的图片生成方案。"""
+    llm_cfg = cfg["llm"]
+
+    client = OpenAI(
+        base_url=llm_cfg["base_url"],
+        api_key=llm_cfg["api_key"],
+        timeout=60,
+    )
+
+    style_pref = cfg["image"].get("style_preference", "").strip()
+    print(f"\n{'='*60}")
+    print("正在调用 LLM 分析古诗词意境（四段式思维链）...")
+    print(f"模型: {llm_cfg['model']}")
+    if style_pref:
+        print(f"风格期望: {style_pref}")
+    print(f"{'='*60}\n")
+
+    user_message = _build_user_message(poem, cfg)
+
+    response = client.chat.completions.create(
+        model=llm_cfg["model"],
+        temperature=llm_cfg.get("temperature", 0.7),
+        max_tokens=llm_cfg.get("max_tokens", 4096),
+        messages=[
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": user_message},
+        ],
+    )
+
+    content = response.choices[0].message.content.strip()
+
+    json_match = re.search(r"```(?:json)?\s*(.*?)```", content, re.DOTALL)
+    if json_match:
+        content = json_match.group(1).strip()
+
+    try:
+        result = json.loads(content)
+    except json.JSONDecodeError:
+        json_match = re.search(r"\{.*\}", content, re.DOTALL)
+        if json_match:
+            result = json.loads(json_match.group())
+        else:
+            print("LLM 返回内容无法解析为 JSON：")
+            print(content)
+            sys.exit(1)
+
+    return result
+
+
+def display_analysis(analysis: dict) -> None:
+    """友好地展示 LLM 的分析结果。"""
+    print(f"\n{'='*60}")
+    title = analysis.get("title", "未知")
+    author = analysis.get("author", "未知")
+    dynasty = analysis.get("dynasty", "")
+    genre = analysis.get("genre", "")
+    print(f"📜  {title}  —  {dynasty} · {author}  [{genre}]")
+    print(f"{'='*60}")
+    print(f"\n🔍  意境分析：{analysis.get('analysis', '')}\n")
+
+    for i, img in enumerate(analysis["images"], 1):
+        print(f"{'─'*50}")
+        print(f"🖼  第 {i} 幅  |  {img['scene']}")
+        print(f"   画风选择：{img.get('style', '未指定')}")
+        print(f"   中文描述：{img['description']}")
+        print(f"   Prompt(zh)：{img['prompt'][:120]}...")
+        if img.get("prompt_en"):
+            print(f"   Prompt(en)：{img['prompt_en'][:120]}...")
+
+    print(f"\n共 {len(analysis['images'])} 幅画面\n")
+
+
+# ---------------------------------------------------------------------------
+# 尺寸预设
+# ---------------------------------------------------------------------------
+
+SIZE_PRESETS: dict[str, tuple[int, int]] = {
+    "square":      (1024, 1024),
+    "phone":       ( 576, 1024),
+    "phone_hd":    ( 768, 1344),
+    "desktop":     (1024,  576),
+    "desktop_hd":  (1344,  768),
+    "ultrawide":   (1536,  640),
+}
+
+
+def resolve_image_size(img_cfg: dict) -> tuple[int, int]:
+    """根据 size_preset 或 height/width 配置，返回 (width, height)。"""
+    preset = img_cfg.get("size_preset", "").strip().lower()
+    if preset and preset != "custom" and preset in SIZE_PRESETS:
+        w, h = SIZE_PRESETS[preset]
+        return w, h
+    return img_cfg.get("width", 1024), img_cfg.get("height", 1024)
+
+
+# ---------------------------------------------------------------------------
+# Z-Image-Turbo 本地图片生成
+# ---------------------------------------------------------------------------
+
+HF_REPO = "Tongyi-MAI/Z-Image-Turbo"
+
+# 从 HuggingFace 仓库下载的小型配置文件（首次需要网络，之后自动缓存）
+_HF_CONFIG_FILES = [
+    "model_index.json",
+    "scheduler/scheduler_config.json",
+    "tokenizer/merges.txt",
+    "tokenizer/tokenizer_config.json",
+    "tokenizer/vocab.json",
+    "text_encoder/config.json",
+    "text_encoder/generation_config.json",
+    "transformer/config.json",
+    "vae/config.json",
+]
+
+
+def _force_link(src: Path, dst: Path) -> None:
+    """创建从 dst 指向 src 的链接，兼容 Windows 无管理员权限的场景。
+
+    优先级: 符号链接 → 硬链接 → 复制文件
+    - 符号链接在 Windows 下需要管理员权限或开启开发者模式
+    - 硬链接无需特权但要求 src 和 dst 在同一驱动器
+    - 以上均失败时回退到复制（大文件会较慢，但保证可用）
+    """
+    src = Path(src).resolve()
+    dst = Path(dst)
+    if dst.exists() or dst.is_symlink():
+        dst.unlink()
+
+    # 1. 尝试符号链接
+    try:
+        dst.symlink_to(src)
+        return
+    except OSError:
+        pass
+
+    # 2. 尝试硬链接（要求同一驱动器/文件系统）
+    try:
+        os.link(str(src), str(dst))
+        return
+    except OSError:
+        pass
+
+    # 3. 回退到复制
+    print(f"  提示: 无法创建链接，正在复制文件: {src.name}（可能较慢）")
+    shutil.copy2(str(src), str(dst))
+
+
+def _is_comfyui_mode(cfg: dict) -> bool:
+    """判断是否配置了 ComfyUI 拆分文件模式。"""
+    comfyui = cfg["image"].get("comfyui", {})
+    return bool(
+        comfyui.get("text_encoder")
+        and comfyui.get("transformer")
+        and comfyui.get("vae")
+    )
+
+
+def _is_openvino_mode(cfg: dict) -> bool:
+    """判断是否配置了 OpenVINO 推理模式。"""
+    return bool(cfg["image"].get("openvino", {}).get("model_path"))
+
+
+def _load_pipeline_openvino(cfg: dict):
+    """使用 OpenVINO 加载 Z-Image-Turbo pipeline。
+
+    需要预先通过 optimum-cli 导出 OpenVINO IR 模型：
+        optimum-cli export openvino --model Tongyi-MAI/Z-Image-Turbo \\
+            --weight-format int8 z-image-turbo-ov
+    """
+    from optimum.intel import OVZImagePipeline
+
+    ov_cfg = cfg["image"]["openvino"]
+    model_path = ov_cfg["model_path"]
+    ov_device = ov_cfg.get("device", "GPU")
+
+    print(f"模式: OpenVINO 推理")
+    print(f"  模型路径 : {model_path}")
+    print(f"  OV 设备  : {ov_device}")
+
+    if not Path(model_path).exists():
+        print(f"错误: OpenVINO 模型目录不存在: {model_path}")
+        print("请先使用 optimum-cli 导出模型：")
+        print(f"  optimum-cli export openvino --model {HF_REPO} --weight-format int8 {model_path}")
+        sys.exit(1)
+
+    pipe = OVZImagePipeline.from_pretrained(model_path, device=ov_device)
+    return pipe
+
+
+def _build_hf_layout_from_comfyui(cfg: dict) -> str:
+    """从 ComfyUI 拆分文件构建 HuggingFace 兼容的目录布局。
+
+    原理：下载 HuggingFace 仓库中的微型配置文件（JSON/txt，共计 < 100KB），
+    然后创建指向 ComfyUI 权重文件的符号链接，最终得到一个
+    `ZImagePipeline.from_pretrained()` 可直接加载的目录。
+    """
+    from huggingface_hub import hf_hub_download
+
+    comfyui = cfg["image"]["comfyui"]
+    te_path = Path(comfyui["text_encoder"]).resolve()
+    tf_path = Path(comfyui["transformer"]).resolve()
+    vae_path = Path(comfyui["vae"]).resolve()
+
+    for name, p in [("text_encoder", te_path), ("transformer", tf_path), ("vae", vae_path)]:
+        if not p.exists():
+            print(f"错误: ComfyUI {name} 文件不存在: {p}")
+            sys.exit(1)
+
+    cache_dir = Path(".cache") / "comfyui_hf_layout"
+    cache_dir.mkdir(parents=True, exist_ok=True)
+
+    print("正在准备 HuggingFace 兼容目录结构（仅首次需下载配置文件）...")
+
+    for rel_path in _HF_CONFIG_FILES:
+        dest = cache_dir / rel_path
+        if not dest.exists():
+            dest.parent.mkdir(parents=True, exist_ok=True)
+            src = hf_hub_download(HF_REPO, rel_path)
+            shutil.copy2(src, dest)
+
+    # 链接权重文件 —— text_encoder
+    te_link = cache_dir / "text_encoder" / "model.safetensors"
+    _force_link(te_path, te_link)
+    # 删除分片索引（如果存在），因为 ComfyUI 的文件是单一非分片文件
+    shard_idx = cache_dir / "text_encoder" / "model.safetensors.index.json"
+    if shard_idx.exists():
+        shard_idx.unlink()
+
+    # 链接权重文件 —— transformer
+    tf_link = cache_dir / "transformer" / "diffusion_pytorch_model.safetensors"
+    _force_link(tf_path, tf_link)
+
+    # 链接权重文件 —— vae
+    vae_link = cache_dir / "vae" / "diffusion_pytorch_model.safetensors"
+    _force_link(vae_path, vae_link)
+
+    print(f"目录结构已就绪: {cache_dir}")
+    return str(cache_dir)
+
+
+
+def _load_pipeline_comfyui(cfg: dict, device: str, torch_dtype: torch.dtype):
+    """从 ComfyUI 拆分文件加载 pipeline（使用逐组件加载方式，仅支持 safetensors）。"""
+    from diffusers import (
+        AutoencoderKL,
+        FlowMatchEulerDiscreteScheduler,
+        ZImagePipeline,
+        ZImageTransformer2DModel,
+    )
+    from transformers import AutoTokenizer, Qwen3Model
+
+    comfyui = cfg["image"]["comfyui"]
+    te_path = comfyui["text_encoder"]
+    tf_path = comfyui["transformer"]
+    vae_path = comfyui["vae"]
+
+    print("模式: ComfyUI 拆分文件加载")
+    print(f"  Text Encoder : {te_path}")
+    print(f"  Transformer  : {tf_path}")
+    print(f"  VAE          : {vae_path}")
+
+    print("  加载 Scheduler & Tokenizer（配置来自 HuggingFace 缓存）...")
+    scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(HF_REPO, subfolder="scheduler")
+    tokenizer = AutoTokenizer.from_pretrained(HF_REPO, subfolder="tokenizer")
+
+    print("  加载 Transformer...")
+    transformer = ZImageTransformer2DModel.from_single_file(
+        tf_path,
+        config=HF_REPO,
+        subfolder="transformer",
+        torch_dtype=torch_dtype,
+    )
+
+    print("  加载 VAE...")
+    vae = AutoencoderKL.from_single_file(
+        vae_path,
+        config=HF_REPO,
+        subfolder="vae",
+        torch_dtype=torch_dtype,
+    )
+
+    print("  加载 Text Encoder (Qwen3 4B)...")
+    te_config_path = hf_hub_download_cached(HF_REPO, "text_encoder/config.json")
+    te_gen_config_path = hf_hub_download_cached(HF_REPO, "text_encoder/generation_config.json")
+
+    te_parent_dir = str(Path(te_path).resolve().parent)
+    with tempfile.TemporaryDirectory(dir=te_parent_dir) as tmpdir:
+        shutil.copy2(te_config_path, os.path.join(tmpdir, "config.json"))
+        shutil.copy2(te_gen_config_path, os.path.join(tmpdir, "generation_config.json"))
+        _force_link(Path(te_path), Path(tmpdir) / "model.safetensors")
+        text_encoder = Qwen3Model.from_pretrained(tmpdir, torch_dtype=torch_dtype)
+
+    pipe = ZImagePipeline(
+        scheduler=scheduler,
+        vae=vae,
+        text_encoder=text_encoder,
+        tokenizer=tokenizer,
+        transformer=transformer,
+    )
+    return pipe
+
+
+def hf_hub_download_cached(repo_id: str, filename: str) -> str:
+    """下载 HuggingFace 仓库中的文件（自动缓存）。"""
+    from huggingface_hub import hf_hub_download
+    return hf_hub_download(repo_id, filename)
+
+
+def load_pipeline(cfg: dict):
+    """加载 Z-Image-Turbo pipeline。自动适配 OpenVINO / HuggingFace / ComfyUI 格式。"""
+    img_cfg = cfg["image"]
+
+    # OpenVINO 模式：由 optimum.intel 管理设备，无需手动 resolve_device
+    if _is_openvino_mode(cfg):
+        ov_device = img_cfg["openvino"].get("device", "GPU")
+        print(f"\n{'='*60}")
+        print("正在加载 Z-Image-Turbo 模型 (OpenVINO)...")
+        print(f"OpenVINO 设备: {ov_device}")
+        print(f"{'='*60}\n")
+
+        pipe = _load_pipeline_openvino(cfg)
+        cfg["_resolved_device"] = "cpu"
+        cfg["_openvino_mode"] = True
+        return pipe
+
+    from diffusers import ZImagePipeline
+
+    device = resolve_device(img_cfg.get("device", "auto"))
+    torch_dtype = get_supported_dtype(device, img_cfg.get("torch_dtype", "auto"))
+
+    print(f"\n{'='*60}")
+    print("正在加载 Z-Image-Turbo 模型...")
+    print(f"推理设备: {device}")
+    print(f"数据类型: {torch_dtype}")
+    print(f"{'='*60}\n")
+
+    if _is_comfyui_mode(cfg):
+        pipe = _load_pipeline_comfyui(cfg, device, torch_dtype)
+    else:
+        model_id = img_cfg["model_id"]
+        print(f"模式: HuggingFace 标准加载")
+        print(f"模型路径: {model_id}")
+        pipe = ZImagePipeline.from_pretrained(
+            model_id,
+            torch_dtype=torch_dtype,
+            low_cpu_mem_usage=False,
+        )
+
+    raw_offload = str(img_cfg.get("enable_cpu_offload", "false")).strip().lower()
+    offload_mode = {
+        "false": None, "0": None, "no": None, "off": None,
+        "true": "model", "1": "model", "yes": "model", "on": "model",
+        "model": "model",
+        "sequential": "sequential",
+    }.get(raw_offload, None)
+
+    if offload_mode and device in ("cuda", "xpu", "mps"):
+        if offload_mode == "sequential":
+            print("启用 Sequential CPU Offload: 逐层搬入显卡，最省显存但较慢")
+            pipe.enable_sequential_cpu_offload(device=device)
+        else:
+            print("启用 Model CPU Offload: 组件级按需加载到显卡")
+            pipe.enable_model_cpu_offload(device=device)
+    else:
+        if device != "cpu" and not offload_mode:
+            print("提示: 所有模型将同时加载到显卡，如显存不足请在配置中开启 enable_cpu_offload")
+        pipe.to(device)
+
+    if device not in ("xpu", "cpu"):
+        attn_backend = img_cfg.get("attention_backend", "sdpa")
+        if attn_backend == "flash":
+            pipe.transformer.set_attention_backend("flash")
+        elif attn_backend == "flash_3":
+            pipe.transformer.set_attention_backend("_flash_3")
+
+    lora_cfg = cfg.get("lora", {})
+    if lora_cfg.get("enabled") and lora_cfg.get("path"):
+        lora_path = lora_cfg["path"]
+        lora_weight = lora_cfg.get("weight", 0.8)
+        print(f"正在加载 LoRA: {lora_path} (权重: {lora_weight})")
+        pipe.load_lora_weights(lora_path)
+        pipe.fuse_lora(lora_scale=lora_weight)
+        print("LoRA 加载完成")
+
+    cfg["_resolved_device"] = device
+    cfg["_openvino_mode"] = False
+    return pipe
+
+
+def generate_images(pipe, analysis: dict, cfg: dict) -> list[Path]:
+    """根据分析结果逐一生成图片，返回保存路径列表。"""
+    img_cfg = cfg["image"]
+    out_cfg = cfg["output"]
+    lora_cfg = cfg.get("lora", {})
+    device = cfg.get("_resolved_device", "cpu")
+
+    output_dir = Path(out_cfg.get("dir", "./output"))
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    prefix = out_cfg.get("filename_prefix", "poem")
+    width, height = resolve_image_size(img_cfg)
+    steps = img_cfg.get("num_inference_steps", 9)
+    guidance = img_cfg.get("guidance_scale", 0.0)
+    seed = img_cfg.get("seed", -1)
+
+    trigger_words = ""
+    if lora_cfg.get("enabled") and lora_cfg.get("trigger_words"):
+        trigger_words = lora_cfg["trigger_words"].strip()
+
+    preset = img_cfg.get("size_preset", "custom")
+    prompt_lang = img_cfg.get("prompt_language", "zh")
+    images_per_prompt = max(1, min(10, img_cfg.get("images_per_prompt", 1)))
+    print(f"图片尺寸: {width}×{height}" + (f" (预设: {preset})" if preset != "custom" else ""))
+    print(f"Prompt 语言: {prompt_lang}")
+    if images_per_prompt > 1:
+        print(f"每个 prompt 生成 {images_per_prompt} 张图（不同种子）")
+
+    saved_paths = []
+    total = len(analysis["images"])
+
+    for i, img_info in enumerate(analysis["images"], 1):
+        if prompt_lang == "en" and img_info.get("prompt_en"):
+            prompt = img_info["prompt_en"]
+        else:
+            prompt = img_info["prompt"]
+        if trigger_words:
+            prompt = f"{trigger_words}, {prompt}"
+
+        print(f"\n[{i}/{total}] 正在生成: {img_info['scene']}")
+        print(f"  画风: {img_info.get('style', '未指定')}")
+        print(f"  Prompt({prompt_lang}): {prompt[:120]}...")
+
+        for j in range(images_per_prompt):
+            variant_offset = i * 100 + j
+            actual_seed = (seed + variant_offset) if seed >= 0 else (int(time.time() * 1000) % (2**32) + variant_offset)
+            generator = create_generator(device, actual_seed)
+
+            suffix = chr(ord("a") + j) if images_per_prompt > 1 else ""
+            if images_per_prompt > 1:
+                print(f"  --- 第 {j+1}/{images_per_prompt} 张 (seed={actual_seed}) ---")
+
+            start_time = time.time()
+
+            result = pipe(
+                prompt=prompt,
+                height=height,
+                width=width,
+                num_inference_steps=steps,
+                guidance_scale=guidance,
+                generator=generator,
+            )
+            image: Image.Image = result.images[0]
+
+            elapsed = time.time() - start_time
+            print(f"  生成完成，耗时 {elapsed:.1f}s")
+
+            img_path = output_dir / f"{prefix}_{i:02d}{suffix}.png"
+            image.save(img_path)
+            saved_paths.append(img_path)
+            print(f"  已保存: {img_path}")
+
+        if out_cfg.get("save_prompts", True):
+            txt_path = output_dir / f"{prefix}_{i:02d}_prompt.txt"
+            prompt_zh = img_info["prompt"]
+            prompt_en = img_info.get("prompt_en", "")
+            txt_path.write_text(
+                f"Scene: {img_info['scene']}\n"
+                f"Style: {img_info.get('style', '')}\n"
+                f"Description: {img_info['description']}\n"
+                f"Prompt(zh): {prompt_zh}\n"
+                f"Prompt(en): {prompt_en}\n"
+                f"Used({prompt_lang}): {prompt}\n",
+                encoding="utf-8",
+            )
+
+    return saved_paths
+
+
+# ---------------------------------------------------------------------------
+# 主流程
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="古诗词意境图生成器 — 基于 LLM 分析 + Z-Image-Turbo 生成"
+    )
+    parser.add_argument(
+        "-c", "--config",
+        default="config.yaml",
+        help="配置文件路径（默认: config.yaml）",
+    )
+    parser.add_argument(
+        "-p", "--poem",
+        type=str,
+        default=None,
+        help="直接传入古诗词文本（如不指定则交互式输入）",
+    )
+    parser.add_argument(
+        "--analyze-only",
+        action="store_true",
+        help="仅进行 LLM 分析，不生成图片",
+    )
+    parser.add_argument(
+        "-o", "--output",
+        type=str,
+        default=None,
+        help="覆盖输出目录",
+    )
+    args = parser.parse_args()
+
+    cfg = load_config(args.config)
+
+    if args.output:
+        cfg["output"]["dir"] = args.output
+    else:
+        now = datetime.now()
+        date_dir = now.strftime("%Y-%m-%d")
+        time_dir = now.strftime("%H-%M-%S")
+        cfg["output"]["dir"] = str(Path(cfg["output"].get("dir", "./output")) / date_dir / time_dir)
+
+    if args.poem:
+        poem = args.poem
+    else:
+        print("请输入古诗词（输入空行结束）：")
+        lines = []
+        while True:
+            line = input()
+            if line.strip() == "":
+                break
+            lines.append(line)
+        poem = "\n".join(lines)
+
+    if not poem.strip():
+        print("未输入任何内容，退出。")
+        sys.exit(0)
+
+    print(f"\n📝  输入的诗词：\n{poem}")
+
+    analysis = analyze_poetry(poem, cfg)
+    display_analysis(analysis)
+
+    output_dir = Path(cfg["output"].get("dir", "./output"))
+    output_dir.mkdir(parents=True, exist_ok=True)
+    analysis_path = output_dir / "analysis.json"
+    analysis_path.write_text(
+        json.dumps(analysis, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+    print(f"分析结果已保存: {analysis_path}")
+
+    if args.analyze_only:
+        print("\n已完成分析（--analyze-only 模式），跳过图片生成。")
+        return
+
+    if _is_openvino_mode(cfg):
+        ov_device = cfg["image"]["openvino"].get("device", "GPU")
+        print(f"\n🖥  推理模式: OpenVINO ({ov_device})")
+        if ov_device.upper() == "GPU" and hasattr(torch, "xpu") and torch.xpu.is_available():
+            print(f"   Intel XPU: {torch.xpu.get_device_name(0)}")
+    else:
+        device = resolve_device(cfg["image"].get("device", "auto"))
+        print(f"\n🖥  推理设备: {device}")
+        if device == "xpu":
+            print(f"   Intel XPU: {torch.xpu.get_device_name(0)}")
+            print(f"   显存: {torch.xpu.get_device_properties(0).total_memory / 1024**3:.1f} GB")
+        elif device == "cuda":
+            print(f"   CUDA GPU: {torch.cuda.get_device_name(0)}")
+            print(f"   显存: {torch.cuda.get_device_properties(0).total_mem / 1024**3:.1f} GB")
+
+    pipe = load_pipeline(cfg)
+    saved = generate_images(pipe, analysis, cfg)
+
+    print(f"\n{'='*60}")
+    print(f"全部完成！共生成 {len(saved)} 幅图片：")
+    for p in saved:
+        print(f"  📁 {p}")
+    print(f"{'='*60}\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/poetry_to_image_qwen.py b/poetry_to_image_qwen.py
new file mode 100644
index 0000000..f43e561
--- /dev/null
+++ b/poetry_to_image_qwen.py
@@ -0,0 +1,529 @@
+"""
+古诗词意境图生成器（Qwen-Image 云端版）
+将中国古典诗词通过 LLM 分析拆解为多个意境画面，
+再使用 Qwen-Image API（通过 SiliconFlow）逐一生成高质量图片。
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+import time
+from datetime import datetime
+from io import BytesIO
+from pathlib import Path
+
+import requests as http_requests
+import yaml
+from openai import OpenAI
+from PIL import Image
+
+
+# ---------------------------------------------------------------------------
+# 配置加载
+# ---------------------------------------------------------------------------
+
+def load_config(config_path: str = "config_qwen.yaml") -> dict:
+    with open(config_path, "r", encoding="utf-8") as f:
+        cfg = yaml.safe_load(f)
+
+    api_key = os.environ.get("LLM_API_KEY") or cfg["llm"].get("api_key", "")
+    cfg["llm"]["api_key"] = api_key
+
+    img_api_key = (
+        os.environ.get("IMAGE_API_KEY")
+        or cfg["image"].get("api_key", "")
+        or api_key
+    )
+    cfg["image"]["api_key"] = img_api_key
+    return cfg
+
+
+# ---------------------------------------------------------------------------
+# LLM 古诗词分析
+# ---------------------------------------------------------------------------
+
+SYSTEM_PROMPT = """\
+你是一位精通中国古典文学与视觉艺术的大师，同时深谙文生图 AI 的 prompt 工程。\
+你的任务是分析用户提供的古诗词，将其意境拆解为若干幅独立的画面，\
+每幅画面对应诗词中一个完整的意象或场景。
+
+## 核心原则：信、雅、达
+
+1. **信**（忠实）：画面内容必须忠于原诗的意象、情感和时代背景，不可凭空臆造。\
+诗中有月则画月，诗中无人则不强加人物。
+2. **雅**（优美）：画面描述应体现中国传统美学，注重意境营造、留白与含蓄之美。
+3. **达**（通畅）：prompt 要清晰、具体、富有画面感，\
+能被文生图模型准确理解并生成高质量图像。
+
+## 诗词体裁识别与风格匹配
+
+请先识别诗词的体裁（唐诗/宋词/元曲/其他），再根据题材选择最合适的中国传统画风。\
+以下是可选的风格菜单，请根据诗意灵活选取，同一首诗的不同画面可以使用不同风格：
+
+| 风格 | prompt 关键词 | 适用场景 |
+|------|-------------|---------|
+| 水墨写意 | 水墨写意，淡墨晕染，留白 | 山水、边塞、禅意、抒情 |
+| 青绿山水 | 青绿山水，石青石绿，金碧辉煌 | 春夏山水、游记、壮丽河山 |
+| 工笔花鸟 | 工笔花鸟，细腻勾勒，精细渲染 | 花卉、仕女、宫廷、精致细腻 |
+| 工笔重彩 | 工笔重彩，浓墨重色，华丽精细 | 华丽、富贵、节庆、历史叙事 |
+| 没骨画法 | 没骨画法，不勾轮廓，直接点染 | 花卉、蔬果、清新淡雅 |
+| 文人画 | 文人画风格，诗书画印，意趣高远 | 隐逸、高洁、书卷气 |
+| 泼墨大写意 | 泼墨大写意，墨色淋漓，气势磅礴 | 豪放、苍茫、雄壮 |
+| 界画/建筑 | 界画，工整精细，楼台亭阁 | 楼阁、宫殿、城市场景 |
+| 浅绛山水 | 浅绛山水，赭石淡彩，萧疏清远 | 秋冬山水、萧瑟、怀古 |
+
+### 体裁特点提示
+- **唐诗**（尤其五七言律绝）：意境开阔，气象宏大，多配水墨写意或青绿山水。
+- **宋词**：情感细腻，意象精致，婉约派多配工笔花鸟/没骨，豪放派可配泼墨写意。
+- **边塞诗**：苍凉壮阔，适合泼墨大写意或浅绛山水。
+- **田园诗**：恬淡自然，适合青绿山水或文人画。
+- **咏物诗/闺怨词**：精致细腻，适合工笔花鸟或没骨画法。
+
+## 分析步骤
+
+1. 识别诗词的标题、作者、体裁、题材和情感基调。
+2. 逐句/逐联理解字面意思与深层意境。
+3. 判断需要多少幅画来完整呈现意境（通常每一联或每一句对应一幅，\
+但意境连贯的句子可以合并为一幅）。
+4. 为每幅画从上方风格菜单中选择最匹配的画风。
+5. 为每幅画撰写**中文 prompt** 和**英文 prompt**，均采用「正向描述」策略（只描述要画什么，\
+不描述不要什么），包含：
+   - 画面主体（人物、景物、动作、姿态）
+   - 环境氛围（季节、天气、光线、时辰、色调）
+   - 选定的艺术风格关键词
+   - 构图与视角（远景/中景/特写，俯视/平视等）
+   - 画面质感（绢本/纸本/留白/墨色浓淡等细节）
+   - 画面氛围（清冷/温暖/苍茫/静谧等情感色彩）
+
+### 中文 prompt 要求
+- 使用中国传统绘画的专业术语（如水墨写意、工笔重彩、留白等）。
+- 具体且富有画面感，避免抽象空泛的概念。
+
+### 英文 prompt 要求
+- 中文 prompt 的忠实翻译与适配，保持相同的画面内容和风格意图。
+- 使用英文中对应的艺术术语（如 ink wash painting, meticulous brushwork, negative space 等）。
+- 自然流畅的英文表达，而非逐字翻译。
+
+## 重要提示
+- 文生图模型（Qwen-Image）对中英文 prompt 均有优秀支持，中文表现尤为突出。
+- 支持 negative prompt：请为每幅画面生成针对性的 negative_prompt，排除与目标画风冲突的元素。
+- 每个 prompt 建议 80-200 字（中文）/ 50-150 词（英文），确保细节充分。
+- 必须同时输出中文和英文两个版本的 prompt。
+
+### negative_prompt 编写要点
+- 针对所选画风排除冲突风格（如：水墨写意应排除"照片写实, 3D渲染, 油画质感"；\
+工笔花鸟应排除"粗犷笔触, 抽象风格, 泼墨"）。
+- 排除常见 AI 生成瑕疵（如：肢体变形, 手指畸形, 面部模糊, 文字乱码）。
+- 排除与诗词意境不符的元素（如：悲秋诗不应出现"鲜艳色彩, 欢快氛围"）。
+- 简洁有效，20-60 字（中文），以逗号分隔。
+
+## 输出格式
+
+严格按照以下 JSON 格式输出，不要包含任何其他文字：
+
+```json
+{
+  "title": "诗词标题",
+  "author": "作者",
+  "dynasty": "朝代",
+  "genre": "体裁（如：五言绝句、七言律诗、词·水调歌头等）",
+  "analysis": "对整首诗意境的简要分析（中文，2-3句话）",
+  "images": [
+    {
+      "scene": "这幅画对应的诗句（原文）",
+      "description": "画面内容的中文描述",
+      "style": "选用的画风（中文名称）",
+      "prompt": "详细的中文文生图提示词，80-200字，仅使用正向描述...",
+      "prompt_en": "Detailed English text-to-image prompt, 50-150 words, positive description only...",
+      "negative_prompt": "针对该画面的负向提示词，排除与画风冲突的元素和常见瑕疵，20-60字..."
+    }
+  ]
+}
+```\
+"""
+
+
+def analyze_poetry(poem: str, cfg: dict) -> dict:
+    """调用 LLM 分析古诗词，返回结构化的图片生成方案。"""
+    llm_cfg = cfg["llm"]
+
+    client = OpenAI(
+        base_url=llm_cfg["base_url"],
+        api_key=llm_cfg["api_key"],
+    )
+
+    print(f"\n{'='*60}")
+    print("正在调用 LLM 分析古诗词意境...")
+    print(f"模型: {llm_cfg['model']}")
+    print(f"{'='*60}\n")
+
+    response = client.chat.completions.create(
+        model=llm_cfg["model"],
+        temperature=llm_cfg.get("temperature", 0.7),
+        max_tokens=llm_cfg.get("max_tokens", 4096),
+        messages=[
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": f"请分析以下古诗词并生成图片方案：\n\n{poem}"},
+        ],
+    )
+
+    content = response.choices[0].message.content.strip()
+
+    json_match = re.search(r"```(?:json)?\s*(.*?)```", content, re.DOTALL)
+    if json_match:
+        content = json_match.group(1).strip()
+
+    try:
+        result = json.loads(content)
+    except json.JSONDecodeError:
+        json_match = re.search(r"\{.*\}", content, re.DOTALL)
+        if json_match:
+            result = json.loads(json_match.group())
+        else:
+            print("LLM 返回内容无法解析为 JSON：")
+            print(content)
+            sys.exit(1)
+
+    return result
+
+
+def display_analysis(analysis: dict) -> None:
+    """友好地展示 LLM 的分析结果。"""
+    print(f"\n{'='*60}")
+    title = analysis.get("title", "未知")
+    author = analysis.get("author", "未知")
+    dynasty = analysis.get("dynasty", "")
+    genre = analysis.get("genre", "")
+    print(f"  {title}  —  {dynasty} · {author}  [{genre}]")
+    print(f"{'='*60}")
+    print(f"\n  意境分析：{analysis.get('analysis', '')}\n")
+
+    for i, img in enumerate(analysis["images"], 1):
+        print(f"{'─'*50}")
+        print(f"  第 {i} 幅  |  {img['scene']}")
+        print(f"   画风选择：{img.get('style', '未指定')}")
+        print(f"   中文描述：{img['description']}")
+        print(f"   Prompt(zh)：{img['prompt'][:120]}...")
+        if img.get("prompt_en"):
+            print(f"   Prompt(en)：{img['prompt_en'][:120]}...")
+        if img.get("negative_prompt"):
+            print(f"   Negative  ：{img['negative_prompt'][:120]}")
+
+    print(f"\n共 {len(analysis['images'])} 幅画面\n")
+
+
+# ---------------------------------------------------------------------------
+# 尺寸预设（适配 Qwen-Image 推荐分辨率）
+# ---------------------------------------------------------------------------
+
+SIZE_PRESETS: dict[str, str] = {
+    "square":     "1328x1328",   # 1:1
+    "phone":      "928x1664",    # 9:16
+    "phone_hd":   "1056x1584",   # 2:3（接近 9:16 高清）
+    "desktop":    "1664x928",    # 16:9
+    "desktop_hd": "1584x1056",   # 3:2（接近 16:9 高清）
+    "landscape":  "1472x1140",   # 4:3
+    "portrait":   "1140x1472",   # 3:4
+}
+
+
+def resolve_image_size(img_cfg: dict) -> str:
+    """根据 size_preset 或 height/width 配置，返回 'WIDTHxHEIGHT' 字符串。"""
+    preset = img_cfg.get("size_preset", "").strip().lower()
+    if preset and preset != "custom" and preset in SIZE_PRESETS:
+        return SIZE_PRESETS[preset]
+    w = img_cfg.get("width", 1328)
+    h = img_cfg.get("height", 1328)
+    return f"{w}x{h}"
+
+
+# ---------------------------------------------------------------------------
+# Qwen-Image API 图片生成
+# ---------------------------------------------------------------------------
+
+def _call_image_api(
+    prompt: str,
+    cfg: dict,
+    seed: int | None = None,
+    negative_prompt: str = "",
+) -> tuple[str, int]:
+    """调用 SiliconFlow Qwen-Image API，返回 (image_url, seed)。
+
+    图片 URL 有效期为 1 小时，调用方应及时下载。
+    negative_prompt: 每幅画面专属的负向提示词，会与配置中的全局 negative_prompt 合并。
+    """
+    img_cfg = cfg["image"]
+    base_url = img_cfg.get("base_url", "https://api.siliconflow.cn/v1").rstrip("/")
+    api_key = img_cfg["api_key"]
+
+    url = f"{base_url}/images/generations"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+
+    payload: dict = {
+        "model": img_cfg.get("model", "Qwen/Qwen-Image"),
+        "prompt": prompt,
+        "image_size": resolve_image_size(img_cfg),
+    }
+
+    steps = img_cfg.get("num_inference_steps")
+    if steps is not None:
+        payload["num_inference_steps"] = steps
+
+    guidance = img_cfg.get("guidance_scale")
+    if guidance is not None:
+        payload["guidance_scale"] = guidance
+
+    cfg_scale = img_cfg.get("cfg")
+    if cfg_scale is not None:
+        payload["cfg"] = cfg_scale
+
+    global_negative = img_cfg.get("negative_prompt", "").strip()
+    scene_negative = negative_prompt.strip()
+    parts = [p for p in (scene_negative, global_negative) if p]
+    merged_negative = ", ".join(parts)
+    if merged_negative:
+        payload["negative_prompt"] = merged_negative
+
+    if seed is not None and seed >= 0:
+        payload["seed"] = seed
+
+    max_retries = img_cfg.get("max_retries", 3)
+    timeout = img_cfg.get("request_timeout", 180)
+
+    for attempt in range(max_retries):
+        try:
+            resp = http_requests.post(
+                url, headers=headers, json=payload, timeout=timeout
+            )
+
+            if resp.status_code == 429:
+                wait = min(60, 5 * (attempt + 1))
+                print(f"  API 限流 (429)，等待 {wait}s 后重试...")
+                time.sleep(wait)
+                continue
+
+            if resp.status_code != 200:
+                error_detail = resp.text[:500]
+                print(f"  API 返回错误 [{resp.status_code}]: {error_detail}")
+                if attempt < max_retries - 1:
+                    time.sleep(3)
+                    continue
+                resp.raise_for_status()
+
+            data = resp.json()
+            img_url = data["images"][0]["url"]
+            returned_seed = data.get("seed", seed if seed and seed >= 0 else 0)
+            return img_url, returned_seed
+
+        except http_requests.exceptions.Timeout:
+            print(f"  请求超时 ({timeout}s)，" + (
+                f"重试 ({attempt+1}/{max_retries})..." if attempt < max_retries - 1 else "已达最大重试次数"
+            ))
+            if attempt < max_retries - 1:
+                time.sleep(3)
+                continue
+            raise
+
+        except http_requests.exceptions.ConnectionError as e:
+            print(f"  连接失败: {e}")
+            if attempt < max_retries - 1:
+                time.sleep(5)
+                continue
+            raise
+
+    raise RuntimeError("API 调用失败，已达最大重试次数")
+
+
+def _download_image(url: str, save_path: Path, timeout: int = 120) -> None:
+    """下载图片并保存到本地。"""
+    resp = http_requests.get(url, timeout=timeout, stream=True)
+    resp.raise_for_status()
+    img = Image.open(BytesIO(resp.content))
+    img.save(save_path)
+
+
+def generate_images(analysis: dict, cfg: dict) -> list[Path]:
+    """根据分析结果逐一调用 Qwen-Image API 生成图片，返回保存路径列表。"""
+    img_cfg = cfg["image"]
+    out_cfg = cfg["output"]
+
+    output_dir = Path(out_cfg.get("dir", "./output"))
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    prefix = out_cfg.get("filename_prefix", "poem")
+    image_size = resolve_image_size(img_cfg)
+    seed = img_cfg.get("seed", -1)
+    prompt_lang = img_cfg.get("prompt_language", "zh")
+    images_per_prompt = max(1, min(4, img_cfg.get("images_per_prompt", 1)))
+
+    print(f"\n{'='*60}")
+    print("Qwen-Image API 图片生成")
+    print(f"模型: {img_cfg.get('model', 'Qwen/Qwen-Image')}")
+    print(f"图片尺寸: {image_size}")
+    print(f"Prompt 语言: {prompt_lang}")
+    if images_per_prompt > 1:
+        print(f"每个 prompt 生成 {images_per_prompt} 张图（不同种子）")
+    print(f"{'='*60}\n")
+
+    saved_paths = []
+    total = len(analysis["images"])
+
+    for i, img_info in enumerate(analysis["images"], 1):
+        if prompt_lang == "en" and img_info.get("prompt_en"):
+            prompt = img_info["prompt_en"]
+        else:
+            prompt = img_info["prompt"]
+
+        scene_negative = img_info.get("negative_prompt", "")
+
+        print(f"\n[{i}/{total}] 正在生成: {img_info['scene']}")
+        print(f"  画风: {img_info.get('style', '未指定')}")
+        print(f"  Prompt({prompt_lang}): {prompt[:120]}...")
+        if scene_negative:
+            print(f"  Negative: {scene_negative[:100]}")
+
+        for j in range(images_per_prompt):
+            variant_offset = i * 100 + j
+            if seed >= 0:
+                actual_seed = seed + variant_offset
+            else:
+                actual_seed = (int(time.time() * 1000) % (10**10)) + variant_offset
+
+            suffix = chr(ord("a") + j) if images_per_prompt > 1 else ""
+            if images_per_prompt > 1:
+                print(f"  --- 第 {j+1}/{images_per_prompt} 张 (seed={actual_seed}) ---")
+
+            start_time = time.time()
+
+            try:
+                img_url, returned_seed = _call_image_api(
+                    prompt, cfg, seed=actual_seed, negative_prompt=scene_negative
+                )
+            except Exception as e:
+                print(f"  生成失败: {e}")
+                continue
+
+            elapsed_api = time.time() - start_time
+            print(f"  API 响应完成，耗时 {elapsed_api:.1f}s")
+
+            img_path = output_dir / f"{prefix}_{i:02d}{suffix}.png"
+            try:
+                _download_image(img_url, img_path)
+                saved_paths.append(img_path)
+                print(f"  已保存: {img_path}")
+            except Exception as e:
+                print(f"  图片下载失败: {e}")
+                print(f"  URL（1小时内有效）: {img_url}")
+
+        if out_cfg.get("save_prompts", True):
+            txt_path = output_dir / f"{prefix}_{i:02d}_prompt.txt"
+            prompt_zh = img_info["prompt"]
+            prompt_en = img_info.get("prompt_en", "")
+            txt_path.write_text(
+                f"Scene: {img_info['scene']}\n"
+                f"Style: {img_info.get('style', '')}\n"
+                f"Description: {img_info['description']}\n"
+                f"Prompt(zh): {prompt_zh}\n"
+                f"Prompt(en): {prompt_en}\n"
+                f"Negative: {scene_negative}\n"
+                f"Used({prompt_lang}): {prompt}\n",
+                encoding="utf-8",
+            )
+
+    return saved_paths
+
+
+# ---------------------------------------------------------------------------
+# 主流程
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="古诗词意境图生成器 — 基于 LLM 分析 + Qwen-Image API 生成"
+    )
+    parser.add_argument(
+        "-c", "--config",
+        default="config_qwen.yaml",
+        help="配置文件路径（默认: config_qwen.yaml）",
+    )
+    parser.add_argument(
+        "-p", "--poem",
+        type=str,
+        default=None,
+        help="直接传入古诗词文本（如不指定则交互式输入）",
+    )
+    parser.add_argument(
+        "--analyze-only",
+        action="store_true",
+        help="仅进行 LLM 分析，不生成图片",
+    )
+    parser.add_argument(
+        "-o", "--output",
+        type=str,
+        default=None,
+        help="覆盖输出目录",
+    )
+    args = parser.parse_args()
+
+    cfg = load_config(args.config)
+
+    if args.output:
+        cfg["output"]["dir"] = args.output
+    else:
+        now = datetime.now()
+        date_dir = now.strftime("%Y-%m-%d")
+        time_dir = now.strftime("%H-%M-%S")
+        cfg["output"]["dir"] = str(
+            Path(cfg["output"].get("dir", "./output")) / date_dir / time_dir
+        )
+
+    if args.poem:
+        poem = args.poem
+    else:
+        print("请输入古诗词（输入空行结束）：")
+        lines = []
+        while True:
+            line = input()
+            if line.strip() == "":
+                break
+            lines.append(line)
+        poem = "\n".join(lines)
+
+    if not poem.strip():
+        print("未输入任何内容，退出。")
+        sys.exit(0)
+
+    print(f"\n输入的诗词：\n{poem}")
+
+    analysis = analyze_poetry(poem, cfg)
+    display_analysis(analysis)
+
+    output_dir = Path(cfg["output"].get("dir", "./output"))
+    output_dir.mkdir(parents=True, exist_ok=True)
+    analysis_path = output_dir / "analysis.json"
+    analysis_path.write_text(
+        json.dumps(analysis, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+    print(f"分析结果已保存: {analysis_path}")
+
+    if args.analyze_only:
+        print("\n已完成分析（--analyze-only 模式），跳过图片生成。")
+        return
+
+    saved = generate_images(analysis, cfg)
+
+    print(f"\n{'='*60}")
+    print(f"全部完成！共生成 {len(saved)} 幅图片：")
+    for p in saved:
+        print(f"  {p}")
+    print(f"{'='*60}\n")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..5a6ae3c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,16 @@
+torch>=2.1.0
+diffusers @ git+https://github.com/huggingface/diffusers
+transformers>=4.40.0
+accelerate>=0.30.0
+sentencepiece>=0.2.0
+protobuf>=4.25.0
+safetensors>=0.4.0
+gguf>=0.6.0
+huggingface-hub>=0.23.0
+openai>=1.30.0
+pyyaml>=6.0
+Pillow>=10.0.0
+
+# ===== Intel Arc GPU (XPU) 用户额外安装 =====
+# 取消下方注释并安装：
+# intel-extension-for-pytorch