530 lines
19 KiB
Python
530 lines
19 KiB
Python
"""
|
||
古诗词意境图生成器(Qwen-Image 云端版)
|
||
将中国古典诗词通过 LLM 分析拆解为多个意境画面,
|
||
再使用 Qwen-Image API(通过 SiliconFlow)逐一生成高质量图片。
|
||
"""
|
||
|
||
import argparse
|
||
import json
|
||
import os
|
||
import re
|
||
import sys
|
||
import time
|
||
from datetime import datetime
|
||
from io import BytesIO
|
||
from pathlib import Path
|
||
|
||
import requests as http_requests
|
||
import yaml
|
||
from openai import OpenAI
|
||
from PIL import Image
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 配置加载
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def load_config(config_path: str = "config_qwen.yaml") -> dict:
|
||
with open(config_path, "r", encoding="utf-8") as f:
|
||
cfg = yaml.safe_load(f)
|
||
|
||
api_key = os.environ.get("LLM_API_KEY") or cfg["llm"].get("api_key", "")
|
||
cfg["llm"]["api_key"] = api_key
|
||
|
||
img_api_key = (
|
||
os.environ.get("IMAGE_API_KEY")
|
||
or cfg["image"].get("api_key", "")
|
||
or api_key
|
||
)
|
||
cfg["image"]["api_key"] = img_api_key
|
||
return cfg
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# LLM 古诗词分析
|
||
# ---------------------------------------------------------------------------
|
||
|
||
SYSTEM_PROMPT = """\
|
||
你是一位精通中国古典文学与视觉艺术的大师,同时深谙文生图 AI 的 prompt 工程。\
|
||
你的任务是分析用户提供的古诗词,将其意境拆解为若干幅独立的画面,\
|
||
每幅画面对应诗词中一个完整的意象或场景。
|
||
|
||
## 核心原则:信、雅、达
|
||
|
||
1. **信**(忠实):画面内容必须忠于原诗的意象、情感和时代背景,不可凭空臆造。\
|
||
诗中有月则画月,诗中无人则不强加人物。
|
||
2. **雅**(优美):画面描述应体现中国传统美学,注重意境营造、留白与含蓄之美。
|
||
3. **达**(通畅):prompt 要清晰、具体、富有画面感,\
|
||
能被文生图模型准确理解并生成高质量图像。
|
||
|
||
## 诗词体裁识别与风格匹配
|
||
|
||
请先识别诗词的体裁(唐诗/宋词/元曲/其他),再根据题材选择最合适的中国传统画风。\
|
||
以下是可选的风格菜单,请根据诗意灵活选取,同一首诗的不同画面可以使用不同风格:
|
||
|
||
| 风格 | prompt 关键词 | 适用场景 |
|
||
|------|-------------|---------|
|
||
| 水墨写意 | 水墨写意,淡墨晕染,留白 | 山水、边塞、禅意、抒情 |
|
||
| 青绿山水 | 青绿山水,石青石绿,金碧辉煌 | 春夏山水、游记、壮丽河山 |
|
||
| 工笔花鸟 | 工笔花鸟,细腻勾勒,精细渲染 | 花卉、仕女、宫廷、精致细腻 |
|
||
| 工笔重彩 | 工笔重彩,浓墨重色,华丽精细 | 华丽、富贵、节庆、历史叙事 |
|
||
| 没骨画法 | 没骨画法,不勾轮廓,直接点染 | 花卉、蔬果、清新淡雅 |
|
||
| 文人画 | 文人画风格,诗书画印,意趣高远 | 隐逸、高洁、书卷气 |
|
||
| 泼墨大写意 | 泼墨大写意,墨色淋漓,气势磅礴 | 豪放、苍茫、雄壮 |
|
||
| 界画/建筑 | 界画,工整精细,楼台亭阁 | 楼阁、宫殿、城市场景 |
|
||
| 浅绛山水 | 浅绛山水,赭石淡彩,萧疏清远 | 秋冬山水、萧瑟、怀古 |
|
||
|
||
### 体裁特点提示
|
||
- **唐诗**(尤其五七言律绝):意境开阔,气象宏大,多配水墨写意或青绿山水。
|
||
- **宋词**:情感细腻,意象精致,婉约派多配工笔花鸟/没骨,豪放派可配泼墨写意。
|
||
- **边塞诗**:苍凉壮阔,适合泼墨大写意或浅绛山水。
|
||
- **田园诗**:恬淡自然,适合青绿山水或文人画。
|
||
- **咏物诗/闺怨词**:精致细腻,适合工笔花鸟或没骨画法。
|
||
|
||
## 分析步骤
|
||
|
||
1. 识别诗词的标题、作者、体裁、题材和情感基调。
|
||
2. 逐句/逐联理解字面意思与深层意境。
|
||
3. 判断需要多少幅画来完整呈现意境(通常每一联或每一句对应一幅,\
|
||
但意境连贯的句子可以合并为一幅)。
|
||
4. 为每幅画从上方风格菜单中选择最匹配的画风。
|
||
5. 为每幅画撰写**中文 prompt** 和**英文 prompt**,均采用「正向描述」策略(只描述要画什么,\
|
||
不描述不要什么),包含:
|
||
- 画面主体(人物、景物、动作、姿态)
|
||
- 环境氛围(季节、天气、光线、时辰、色调)
|
||
- 选定的艺术风格关键词
|
||
- 构图与视角(远景/中景/特写,俯视/平视等)
|
||
- 画面质感(绢本/纸本/留白/墨色浓淡等细节)
|
||
- 画面氛围(清冷/温暖/苍茫/静谧等情感色彩)
|
||
|
||
### 中文 prompt 要求
|
||
- 使用中国传统绘画的专业术语(如水墨写意、工笔重彩、留白等)。
|
||
- 具体且富有画面感,避免抽象空泛的概念。
|
||
|
||
### 英文 prompt 要求
|
||
- 中文 prompt 的忠实翻译与适配,保持相同的画面内容和风格意图。
|
||
- 使用英文中对应的艺术术语(如 ink wash painting, meticulous brushwork, negative space 等)。
|
||
- 自然流畅的英文表达,而非逐字翻译。
|
||
|
||
## 重要提示
|
||
- 文生图模型(Qwen-Image)对中英文 prompt 均有优秀支持,中文表现尤为突出。
|
||
- 支持 negative prompt:请为每幅画面生成针对性的 negative_prompt,排除与目标画风冲突的元素。
|
||
- 每个 prompt 建议 80-200 字(中文)/ 50-150 词(英文),确保细节充分。
|
||
- 必须同时输出中文和英文两个版本的 prompt。
|
||
|
||
### negative_prompt 编写要点
|
||
- 针对所选画风排除冲突风格(如:水墨写意应排除"照片写实, 3D渲染, 油画质感";\
|
||
工笔花鸟应排除"粗犷笔触, 抽象风格, 泼墨")。
|
||
- 排除常见 AI 生成瑕疵(如:肢体变形, 手指畸形, 面部模糊, 文字乱码)。
|
||
- 排除与诗词意境不符的元素(如:悲秋诗不应出现"鲜艳色彩, 欢快氛围")。
|
||
- 简洁有效,20-60 字(中文),以逗号分隔。
|
||
|
||
## 输出格式
|
||
|
||
严格按照以下 JSON 格式输出,不要包含任何其他文字:
|
||
|
||
```json
|
||
{
|
||
"title": "诗词标题",
|
||
"author": "作者",
|
||
"dynasty": "朝代",
|
||
"genre": "体裁(如:五言绝句、七言律诗、词·水调歌头等)",
|
||
"analysis": "对整首诗意境的简要分析(中文,2-3句话)",
|
||
"images": [
|
||
{
|
||
"scene": "这幅画对应的诗句(原文)",
|
||
"description": "画面内容的中文描述",
|
||
"style": "选用的画风(中文名称)",
|
||
"prompt": "详细的中文文生图提示词,80-200字,仅使用正向描述...",
|
||
"prompt_en": "Detailed English text-to-image prompt, 50-150 words, positive description only...",
|
||
"negative_prompt": "针对该画面的负向提示词,排除与画风冲突的元素和常见瑕疵,20-60字..."
|
||
}
|
||
]
|
||
}
|
||
```\
|
||
"""
|
||
|
||
|
||
def analyze_poetry(poem: str, cfg: dict) -> dict:
|
||
"""调用 LLM 分析古诗词,返回结构化的图片生成方案。"""
|
||
llm_cfg = cfg["llm"]
|
||
|
||
client = OpenAI(
|
||
base_url=llm_cfg["base_url"],
|
||
api_key=llm_cfg["api_key"],
|
||
)
|
||
|
||
print(f"\n{'='*60}")
|
||
print("正在调用 LLM 分析古诗词意境...")
|
||
print(f"模型: {llm_cfg['model']}")
|
||
print(f"{'='*60}\n")
|
||
|
||
response = client.chat.completions.create(
|
||
model=llm_cfg["model"],
|
||
temperature=llm_cfg.get("temperature", 0.7),
|
||
max_tokens=llm_cfg.get("max_tokens", 4096),
|
||
messages=[
|
||
{"role": "system", "content": SYSTEM_PROMPT},
|
||
{"role": "user", "content": f"请分析以下古诗词并生成图片方案:\n\n{poem}"},
|
||
],
|
||
)
|
||
|
||
content = response.choices[0].message.content.strip()
|
||
|
||
json_match = re.search(r"```(?:json)?\s*(.*?)```", content, re.DOTALL)
|
||
if json_match:
|
||
content = json_match.group(1).strip()
|
||
|
||
try:
|
||
result = json.loads(content)
|
||
except json.JSONDecodeError:
|
||
json_match = re.search(r"\{.*\}", content, re.DOTALL)
|
||
if json_match:
|
||
result = json.loads(json_match.group())
|
||
else:
|
||
print("LLM 返回内容无法解析为 JSON:")
|
||
print(content)
|
||
sys.exit(1)
|
||
|
||
return result
|
||
|
||
|
||
def display_analysis(analysis: dict) -> None:
|
||
"""友好地展示 LLM 的分析结果。"""
|
||
print(f"\n{'='*60}")
|
||
title = analysis.get("title", "未知")
|
||
author = analysis.get("author", "未知")
|
||
dynasty = analysis.get("dynasty", "")
|
||
genre = analysis.get("genre", "")
|
||
print(f" {title} — {dynasty} · {author} [{genre}]")
|
||
print(f"{'='*60}")
|
||
print(f"\n 意境分析:{analysis.get('analysis', '')}\n")
|
||
|
||
for i, img in enumerate(analysis["images"], 1):
|
||
print(f"{'─'*50}")
|
||
print(f" 第 {i} 幅 | {img['scene']}")
|
||
print(f" 画风选择:{img.get('style', '未指定')}")
|
||
print(f" 中文描述:{img['description']}")
|
||
print(f" Prompt(zh):{img['prompt'][:120]}...")
|
||
if img.get("prompt_en"):
|
||
print(f" Prompt(en):{img['prompt_en'][:120]}...")
|
||
if img.get("negative_prompt"):
|
||
print(f" Negative :{img['negative_prompt'][:120]}")
|
||
|
||
print(f"\n共 {len(analysis['images'])} 幅画面\n")
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 尺寸预设(适配 Qwen-Image 推荐分辨率)
|
||
# ---------------------------------------------------------------------------
|
||
|
||
SIZE_PRESETS: dict[str, str] = {
|
||
"square": "1328x1328", # 1:1
|
||
"phone": "928x1664", # 9:16
|
||
"phone_hd": "1056x1584", # 2:3(接近 9:16 高清)
|
||
"desktop": "1664x928", # 16:9
|
||
"desktop_hd": "1584x1056", # 3:2(接近 16:9 高清)
|
||
"landscape": "1472x1140", # 4:3
|
||
"portrait": "1140x1472", # 3:4
|
||
}
|
||
|
||
|
||
def resolve_image_size(img_cfg: dict) -> str:
|
||
"""根据 size_preset 或 height/width 配置,返回 'WIDTHxHEIGHT' 字符串。"""
|
||
preset = img_cfg.get("size_preset", "").strip().lower()
|
||
if preset and preset != "custom" and preset in SIZE_PRESETS:
|
||
return SIZE_PRESETS[preset]
|
||
w = img_cfg.get("width", 1328)
|
||
h = img_cfg.get("height", 1328)
|
||
return f"{w}x{h}"
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Qwen-Image API 图片生成
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _call_image_api(
|
||
prompt: str,
|
||
cfg: dict,
|
||
seed: int | None = None,
|
||
negative_prompt: str = "",
|
||
) -> tuple[str, int]:
|
||
"""调用 SiliconFlow Qwen-Image API,返回 (image_url, seed)。
|
||
|
||
图片 URL 有效期为 1 小时,调用方应及时下载。
|
||
negative_prompt: 每幅画面专属的负向提示词,会与配置中的全局 negative_prompt 合并。
|
||
"""
|
||
img_cfg = cfg["image"]
|
||
base_url = img_cfg.get("base_url", "https://api.siliconflow.cn/v1").rstrip("/")
|
||
api_key = img_cfg["api_key"]
|
||
|
||
url = f"{base_url}/images/generations"
|
||
headers = {
|
||
"Authorization": f"Bearer {api_key}",
|
||
"Content-Type": "application/json",
|
||
}
|
||
|
||
payload: dict = {
|
||
"model": img_cfg.get("model", "Qwen/Qwen-Image"),
|
||
"prompt": prompt,
|
||
"image_size": resolve_image_size(img_cfg),
|
||
}
|
||
|
||
steps = img_cfg.get("num_inference_steps")
|
||
if steps is not None:
|
||
payload["num_inference_steps"] = steps
|
||
|
||
guidance = img_cfg.get("guidance_scale")
|
||
if guidance is not None:
|
||
payload["guidance_scale"] = guidance
|
||
|
||
cfg_scale = img_cfg.get("cfg")
|
||
if cfg_scale is not None:
|
||
payload["cfg"] = cfg_scale
|
||
|
||
global_negative = img_cfg.get("negative_prompt", "").strip()
|
||
scene_negative = negative_prompt.strip()
|
||
parts = [p for p in (scene_negative, global_negative) if p]
|
||
merged_negative = ", ".join(parts)
|
||
if merged_negative:
|
||
payload["negative_prompt"] = merged_negative
|
||
|
||
if seed is not None and seed >= 0:
|
||
payload["seed"] = seed
|
||
|
||
max_retries = img_cfg.get("max_retries", 3)
|
||
timeout = img_cfg.get("request_timeout", 180)
|
||
|
||
for attempt in range(max_retries):
|
||
try:
|
||
resp = http_requests.post(
|
||
url, headers=headers, json=payload, timeout=timeout
|
||
)
|
||
|
||
if resp.status_code == 429:
|
||
wait = min(60, 5 * (attempt + 1))
|
||
print(f" API 限流 (429),等待 {wait}s 后重试...")
|
||
time.sleep(wait)
|
||
continue
|
||
|
||
if resp.status_code != 200:
|
||
error_detail = resp.text[:500]
|
||
print(f" API 返回错误 [{resp.status_code}]: {error_detail}")
|
||
if attempt < max_retries - 1:
|
||
time.sleep(3)
|
||
continue
|
||
resp.raise_for_status()
|
||
|
||
data = resp.json()
|
||
img_url = data["images"][0]["url"]
|
||
returned_seed = data.get("seed", seed if seed and seed >= 0 else 0)
|
||
return img_url, returned_seed
|
||
|
||
except http_requests.exceptions.Timeout:
|
||
print(f" 请求超时 ({timeout}s)," + (
|
||
f"重试 ({attempt+1}/{max_retries})..." if attempt < max_retries - 1 else "已达最大重试次数"
|
||
))
|
||
if attempt < max_retries - 1:
|
||
time.sleep(3)
|
||
continue
|
||
raise
|
||
|
||
except http_requests.exceptions.ConnectionError as e:
|
||
print(f" 连接失败: {e}")
|
||
if attempt < max_retries - 1:
|
||
time.sleep(5)
|
||
continue
|
||
raise
|
||
|
||
raise RuntimeError("API 调用失败,已达最大重试次数")
|
||
|
||
|
||
def _download_image(url: str, save_path: Path, timeout: int = 120) -> None:
|
||
"""下载图片并保存到本地。"""
|
||
resp = http_requests.get(url, timeout=timeout, stream=True)
|
||
resp.raise_for_status()
|
||
img = Image.open(BytesIO(resp.content))
|
||
img.save(save_path)
|
||
|
||
|
||
def generate_images(analysis: dict, cfg: dict) -> list[Path]:
|
||
"""根据分析结果逐一调用 Qwen-Image API 生成图片,返回保存路径列表。"""
|
||
img_cfg = cfg["image"]
|
||
out_cfg = cfg["output"]
|
||
|
||
output_dir = Path(out_cfg.get("dir", "./output"))
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
prefix = out_cfg.get("filename_prefix", "poem")
|
||
image_size = resolve_image_size(img_cfg)
|
||
seed = img_cfg.get("seed", -1)
|
||
prompt_lang = img_cfg.get("prompt_language", "zh")
|
||
images_per_prompt = max(1, min(4, img_cfg.get("images_per_prompt", 1)))
|
||
|
||
print(f"\n{'='*60}")
|
||
print("Qwen-Image API 图片生成")
|
||
print(f"模型: {img_cfg.get('model', 'Qwen/Qwen-Image')}")
|
||
print(f"图片尺寸: {image_size}")
|
||
print(f"Prompt 语言: {prompt_lang}")
|
||
if images_per_prompt > 1:
|
||
print(f"每个 prompt 生成 {images_per_prompt} 张图(不同种子)")
|
||
print(f"{'='*60}\n")
|
||
|
||
saved_paths = []
|
||
total = len(analysis["images"])
|
||
|
||
for i, img_info in enumerate(analysis["images"], 1):
|
||
if prompt_lang == "en" and img_info.get("prompt_en"):
|
||
prompt = img_info["prompt_en"]
|
||
else:
|
||
prompt = img_info["prompt"]
|
||
|
||
scene_negative = img_info.get("negative_prompt", "")
|
||
|
||
print(f"\n[{i}/{total}] 正在生成: {img_info['scene']}")
|
||
print(f" 画风: {img_info.get('style', '未指定')}")
|
||
print(f" Prompt({prompt_lang}): {prompt[:120]}...")
|
||
if scene_negative:
|
||
print(f" Negative: {scene_negative[:100]}")
|
||
|
||
for j in range(images_per_prompt):
|
||
variant_offset = i * 100 + j
|
||
if seed >= 0:
|
||
actual_seed = seed + variant_offset
|
||
else:
|
||
actual_seed = (int(time.time() * 1000) % (10**10)) + variant_offset
|
||
|
||
suffix = chr(ord("a") + j) if images_per_prompt > 1 else ""
|
||
if images_per_prompt > 1:
|
||
print(f" --- 第 {j+1}/{images_per_prompt} 张 (seed={actual_seed}) ---")
|
||
|
||
start_time = time.time()
|
||
|
||
try:
|
||
img_url, returned_seed = _call_image_api(
|
||
prompt, cfg, seed=actual_seed, negative_prompt=scene_negative
|
||
)
|
||
except Exception as e:
|
||
print(f" 生成失败: {e}")
|
||
continue
|
||
|
||
elapsed_api = time.time() - start_time
|
||
print(f" API 响应完成,耗时 {elapsed_api:.1f}s")
|
||
|
||
img_path = output_dir / f"{prefix}_{i:02d}{suffix}.png"
|
||
try:
|
||
_download_image(img_url, img_path)
|
||
saved_paths.append(img_path)
|
||
print(f" 已保存: {img_path}")
|
||
except Exception as e:
|
||
print(f" 图片下载失败: {e}")
|
||
print(f" URL(1小时内有效): {img_url}")
|
||
|
||
if out_cfg.get("save_prompts", True):
|
||
txt_path = output_dir / f"{prefix}_{i:02d}_prompt.txt"
|
||
prompt_zh = img_info["prompt"]
|
||
prompt_en = img_info.get("prompt_en", "")
|
||
txt_path.write_text(
|
||
f"Scene: {img_info['scene']}\n"
|
||
f"Style: {img_info.get('style', '')}\n"
|
||
f"Description: {img_info['description']}\n"
|
||
f"Prompt(zh): {prompt_zh}\n"
|
||
f"Prompt(en): {prompt_en}\n"
|
||
f"Negative: {scene_negative}\n"
|
||
f"Used({prompt_lang}): {prompt}\n",
|
||
encoding="utf-8",
|
||
)
|
||
|
||
return saved_paths
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# 主流程
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(
|
||
description="古诗词意境图生成器 — 基于 LLM 分析 + Qwen-Image API 生成"
|
||
)
|
||
parser.add_argument(
|
||
"-c", "--config",
|
||
default="config_qwen.yaml",
|
||
help="配置文件路径(默认: config_qwen.yaml)",
|
||
)
|
||
parser.add_argument(
|
||
"-p", "--poem",
|
||
type=str,
|
||
default=None,
|
||
help="直接传入古诗词文本(如不指定则交互式输入)",
|
||
)
|
||
parser.add_argument(
|
||
"--analyze-only",
|
||
action="store_true",
|
||
help="仅进行 LLM 分析,不生成图片",
|
||
)
|
||
parser.add_argument(
|
||
"-o", "--output",
|
||
type=str,
|
||
default=None,
|
||
help="覆盖输出目录",
|
||
)
|
||
args = parser.parse_args()
|
||
|
||
cfg = load_config(args.config)
|
||
|
||
if args.output:
|
||
cfg["output"]["dir"] = args.output
|
||
else:
|
||
now = datetime.now()
|
||
date_dir = now.strftime("%Y-%m-%d")
|
||
time_dir = now.strftime("%H-%M-%S")
|
||
cfg["output"]["dir"] = str(
|
||
Path(cfg["output"].get("dir", "./output")) / date_dir / time_dir
|
||
)
|
||
|
||
if args.poem:
|
||
poem = args.poem
|
||
else:
|
||
print("请输入古诗词(输入空行结束):")
|
||
lines = []
|
||
while True:
|
||
line = input()
|
||
if line.strip() == "":
|
||
break
|
||
lines.append(line)
|
||
poem = "\n".join(lines)
|
||
|
||
if not poem.strip():
|
||
print("未输入任何内容,退出。")
|
||
sys.exit(0)
|
||
|
||
print(f"\n输入的诗词:\n{poem}")
|
||
|
||
analysis = analyze_poetry(poem, cfg)
|
||
display_analysis(analysis)
|
||
|
||
output_dir = Path(cfg["output"].get("dir", "./output"))
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
analysis_path = output_dir / "analysis.json"
|
||
analysis_path.write_text(
|
||
json.dumps(analysis, ensure_ascii=False, indent=2),
|
||
encoding="utf-8",
|
||
)
|
||
print(f"分析结果已保存: {analysis_path}")
|
||
|
||
if args.analyze_only:
|
||
print("\n已完成分析(--analyze-only 模式),跳过图片生成。")
|
||
return
|
||
|
||
saved = generate_images(analysis, cfg)
|
||
|
||
print(f"\n{'='*60}")
|
||
print(f"全部完成!共生成 {len(saved)} 幅图片:")
|
||
for p in saved:
|
||
print(f" {p}")
|
||
print(f"{'='*60}\n")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|