From 44fd84d380893c40317ac666c6be10698c5c465a Mon Sep 17 00:00:00 2001 From: JimmysAIPG Date: Mon, 23 Mar 2026 22:31:48 +0800 Subject: [PATCH] =?UTF-8?q?=E6=89=80=E6=9C=89=E5=8A=9F=E8=83=BD=E5=B7=B2?= =?UTF-8?q?=E5=AE=8C=E6=88=90=EF=BC=8C=E8=BF=90=E8=A1=8COK?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 + README.md | 385 ++++++++++++++++ backend/main.py | 630 ++++++++++++++++++++++++++ classify_pois.py | 616 ++++++++++++++++++++++++++ frontend/index.html | 193 ++++++++ frontend/script.js | 630 ++++++++++++++++++++++++++ frontend/style.css | 1027 +++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 11 + 8 files changed, 3495 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 backend/main.py create mode 100644 classify_pois.py create mode 100644 frontend/index.html create mode 100644 frontend/script.js create mode 100644 frontend/style.css create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e69b384 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +**/__pycache__/* +**/*.json +**/*.jsonl diff --git a/README.md b/README.md new file mode 100644 index 0000000..37e9d15 --- /dev/null +++ b/README.md @@ -0,0 +1,385 @@ +# 古诗词阅读网站 + +基于大模型分类的中国古诗词阅读管理系统 + +## 系统架构 + +``` +┌─────────────────────────────────────────────────────────┐ +│ 古诗词阅读网站 │ +├─────────────────────────────────────────────────────────┤ +│ Frontend (原生 JS + CSS) │ Backend (FastAPI + SQLite)│ +│ - 多类别组合筛选 │ - RESTful API │ +│ - 阅读标记管理 │ - SQLite 数据库 │ +│ - 标签云浏览 │ - 分类查询 │ +│ - 进度统计 │ - 批量导入 │ +├─────────────────────────────────────────────────────────┤ +│ classify_pois.py (LLM 分类脚本) │ +│ - 20 维分类标签体系 │ +│ - 繁体转简体支持 │ +│ - 实时去重 + 持久化 │ +└─────────────────────────────────────────────────────────┘ +``` + +## 核心功能 + +### 1. 20 维分类标签体系 + +| 维度分类 | 标签数量 | 示例 | +|---------|---------|------| +| **季节时序** | 39 | 春、夏、秋、冬、24 节气、时辰 | +| **题材类型** | 17 | 山水田园、边塞征战、咏史怀古等 | +| **情感心境** | 31 | 情感基调 11 种 + 具体情感 20 种 | +| **景物意象** | 77 | 自然、植物、动物、建筑、色彩、声音 | +| **哲理思想** | 7 | 儒家、道家、佛家、人生感悟等 | +| **艺术手法** | 18 | 写作手法、修辞手法 | +| **人物社会** | 33 | 人生阶段、社会身份、地理方位、节日 | + +### 2. 繁体转简体支持 + +- ✅ 自动识别繁体中文诗词 +- ✅ LLM 自动转换为简体中文 +- ✅ 保留原始繁体版本(可选) +- ✅ 基于简体内容去重(繁简同一首诗不会重复) +- ✅ 所有分类标签统一使用简体 + +**输入示例(繁体):** +```json +{ + "title": "山居秋暝", + "author": "王維", + "paragraphs": ["空山新雨後,天氣晚來秋。", "明月鬆間照,清泉石上流。"] +} +``` + +**输出示例(简体):** +```json +{ + "title": "山居秋暝", + "author": "王维", + "paragraphs": ["空山新雨后,天气晚来秋。", "明月松间照,清泉石上流。"], + "original_paragraphs": ["空山新雨後,天氣晚來秋。", "明月鬆間照,清泉石上流。"], + "llm_classification": { + "season": ["秋"], + "genre": ["山水田园", "隐逸闲适"], + ... + } +} +``` + +### 3. 多类别组合筛选 + +支持同时选择多个分类标签进行组合筛选: +- 题材=山水田园 AND 情感=宁静淡泊 +- 季节=春 AND 地点=江南 AND 情感=喜悦 +- 使用 SQL 查询优化,支持复杂筛选条件 + +### 4. 阅读管理 + +- 标记诗词为已读/未读 +- 阅读进度统计(百分比 + 数量) +- 阅读时间记录 +- 快速切换阅读状态 + +### 5. 批量导入 + +- 支持 JSON/JSONL 格式 +- 自动去重(基于内容签名 MD5) +- 实时写入数据库 +- 导入结果反馈 + +## 快速开始 + +### 1. 安装依赖 + +```bash +pip install -r requirements.txt +``` + +### 2. 使用 LLM 脚本分类诗词 + +```bash +# 本地模型 (Ollama 等) +python classify_pois.py ./poems ./output.jsonl --llm \ + --base-url http://localhost:11434/v1 \ + --model qwen:7b + +# 远程模型 (OpenAI 等) +python classify_pois.py ./poems ./output.jsonl --llm \ + --base-url https://api.openai.com/v1 \ + --model gpt-4 \ + --api-key sk-xxx + +# 繁体诗词测试 +python classify_pois.py ./ ./output.jsonl --llm \ + --base-url http://localhost:11434/v1 \ + --model qwen:7b +``` + +### 3. 启动后端服务 + +```bash +cd backend +python main.py +``` + +服务启动后访问:http://localhost:8000 + +### 4. 前端页面 + +浏览器打开:http://localhost:8000 + +### 5. 上传诗词 + +通过前端界面的上传功能,上传分类后的 JSONL 文件到数据库。 + +## API 端点 + +| 方法 | 路径 | 说明 | +|------|------|------| +| GET | `/` | 前端页面 | +| GET | `/api/categories` | 获取分类体系 | +| GET | `/api/stats` | 获取统计数据 | +| POST | `/api/poems/import` | 批量导入诗词 | +| GET | `/api/poems` | 获取诗词列表(支持分页、筛选) | +| GET | `/api/poems/{id}` | 获取诗词详情 | +| PUT | `/api/poems/{id}/read` | 切换阅读状态 | +| GET | `/api/poems/random` | 随机一首 | +| GET | `/api/tags/cloud` | 标签云数据 | + +### API 使用示例 + +```bash +# 获取诗词列表(带多类别筛选) +GET /api/poems?page=1&page_size=20&categories=genre:山水田园,emotion_tone:宁静淡泊 + +# 搜索诗词 +GET /api/poems?search=李白 + +# 获取已读诗词 +GET /api/poems?is_read=true + +# 获取未读诗词 +GET /api/poems?is_read=false + +# 标记为已读 +PUT /api/poems/{id}/read?is_read=true + +# 标记为未读 +PUT /api/poems/{id}/read?is_read=false +``` + +## 数据格式 + +### 输入格式(JSON/JSONL) + +```json +{ + "id": "poem_001", + "title": "山居秋暝", + "author": "王维", + "paragraphs": [ + "空山新雨后,天气晚来秋。", + "明月松间照,清泉石上流。", + "竹喧归浣女,莲动下渔舟。", + "随意春芳歇,王孙自可留。" + ], + "llm_classification": { + "season": ["秋"], + "solar_terms": ["白露"], + "time_of_day": "黄昏", + "genre": ["山水田园", "隐逸闲适"], + "emotion_tone": "宁静淡泊", + "emotions": ["静", "喜", "乐"], + "nature_scenery": ["山", "水", "月"], + "plants": ["松", "竹"], + "animals": ["鸟"], + "buildings": [], + "imagery": ["空山", "新雨", "明月"], + "philosophy": ["道家思想", "自然之道"], + "life_stage": "中年", + "social_role": "隐士", + "technique": ["借景抒情", "动静结合"], + "rhetoric": ["拟人"], + "colors": ["青", "白"], + "sounds": [], + "location": "终南山", + "festival": "无", + "analysis": "这首诗描绘了秋日山居的幽静景色..." + } +} +``` + +### 输出格式(API 响应) + +```json +{ + "id": "poem_001", + "title": "山居秋暝", + "author": "王维", + "paragraphs": ["空山新雨后,天气晚来秋。", ...], + "original_paragraphs": null, + "classifications": { + "season": ["秋"], + "genre": ["山水田园", "隐逸闲适"], + ... + }, + "is_read": false, + "read_at": null, + "created_at": "2024-01-01T00:00:00" +} +``` + +## 数据库结构 + +### poems 表 +```sql +CREATE TABLE poems ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + author TEXT NOT NULL, + paragraphs TEXT, + signature TEXT UNIQUE, + created_at TIMESTAMP, + updated_at TIMESTAMP +); +``` + +### classifications 表 +```sql +CREATE TABLE classifications ( + id INTEGER PRIMARY KEY, + poem_id TEXT, + category TEXT, + tags TEXT, + FOREIGN KEY (poem_id) REFERENCES poems(id) +); +``` + +### reading_records 表 +```sql +CREATE TABLE reading_records ( + id INTEGER PRIMARY KEY, + poem_id TEXT UNIQUE, + is_read BOOLEAN, + read_at TIMESTAMP, + FOREIGN KEY (poem_id) REFERENCES poems(id) +); +``` + +## 完整分类标签体系 + +| 维度 | 键名 | 标签数量 | 标签示例 | +|------|------|---------|---------| +| 季节 | season | 6 | 春、夏、秋、冬、四季 | +| 节气 | solar_terms | 24 | 立春、雨水、清明、谷雨 | +| 时辰 | time_of_day | 9 | 清晨、黄昏、夜晚、黎明 | +| 题材 | genre | 17 | 山水田园、边塞征战、咏史怀古 | +| 情感基调 | emotion_tone | 11 | 宁静淡泊、喜悦欢快、悲伤哀愁 | +| 具体情感 | emotions | 20 | 喜、怒、哀、乐、忧、思 | +| 自然景物 | nature_scenery | 14 | 山、水、云、雨、日、月 | +| 植物 | plants | 15 | 松、竹、梅、兰、菊、荷 | +| 动物 | animals | 14 | 鸟、雁、燕、蝉、蛙、鱼 | +| 建筑 | buildings | 15 | 楼、阁、亭、台、桥、寺 | +| 哲理 | philosophy | 7 | 儒家思想、道家思想、佛家禅理 | +| 人生阶段 | life_stage | 5 | 少年、青年、中年、老年 | +| 社会身份 | social_role | 10 | 士人、官员、隐士、游子 | +| 写作手法 | technique | 9 | 比兴、借景抒情、托物言志 | +| 修辞手法 | rhetoric | 9 | 比喻、拟人、夸张、对偶 | +| 色彩 | colors | 10 | 青、绿、红、白、黄、紫 | +| 声音 | sounds | 9 | 钟声、琴声、鸟鸣、雨声 | +| 地理 | location | 9 | 江南、塞北、巴蜀、关中 | +| 节日 | festival | 9 | 春节、中秋、重阳、端午 | + +## 技术栈 + +- **后端**: FastAPI + SQLite + Uvicorn +- **前端**: 原生 JavaScript + CSS (无框架依赖) +- **分类**: 基于 LLM API (支持本地/远程模型) +- **简繁转换**: LLM 自动识别和转换 + +## 扩展开发 + +### 添加新的分类维度 + +1. 在 `backend/main.py` 的 `CATEGORY_SYSTEM` 中添加新维度 +2. 在 `classify_pois.py` 的 `CLASSIFICATION_PROMPT` 中添加对应提示 +3. 在前端 CSS 中添加对应标签样式 + +### 部署建议 + +```bash +# 生产环境使用 +uvicorn backend.main:app --host 0.0.0.0 --port 8000 --workers 4 + +# 或使用 gunicorn +gunicorn backend.main:app -w 4 -k uvicorn.workers.UvicornWorker +``` + +### 简繁切换显示(可选) + +在前端添加简繁切换功能: + +```javascript +let showTraditional = false; + +function toggleTraditional() { + showTraditional = !showTraditional; + const poem = state.currentPoem; + const content = showTraditional && poem.original_paragraphs + ? poem.original_paragraphs + : poem.paragraphs; + // 重新渲染内容 +} +``` + +## 常见问题 + +**Q: 导入时提示重复?** +A: 系统基于诗词内容签名(MD5)去重,相同内容的诗词只会保留一份。繁体和简体版本会被识别为同一首诗。 + +**Q: 如何重置阅读进度?** +A: 直接修改数据库中 `reading_records` 表的 `is_read` 字段,或在前端重新标记。 + +**Q: 支持哪些 LLM 模型?** +A: 任何支持 OpenAI 兼容 API 的模型,包括 Ollama、vLLM、OpenAI GPT、Anthropic Claude 等。 + +**Q: 繁体诗词如何处理?** +A: 分类脚本会自动识别繁体中文,LLM 会返回简体版本。原始繁体会保存在 `original_paragraphs` 字段中。 + +**Q: JSONL 文件和 SQLite 数据库的关系?** +A: JSONL 是分类脚本的输出格式(中间产物),SQLite 是网站运行的数据库(最终存储)。通过 API 导入功能将 JSONL 导入数据库。 + +## 项目结构 + +``` +PoemClassify/ +├── classify_pois.py # LLM 分类脚本 +├── requirements.txt # Python 依赖 +├── backend/ +│ ├── main.py # FastAPI 后端 +│ └── poems.db # SQLite 数据库(运行时生成) +├── frontend/ +│ ├── index.html # 前端页面 +│ ├── script.js # 前端逻辑 +│ └── style.css # 样式文件 +├── test_traditional.json # 繁体测试文件 +└── README.md # 本文档 +``` + +## 数据备份 + +```bash +# 备份 SQLite 数据库 +cp backend/poems.db backup_$(date +%Y%m%d).db + +# 导出数据库为 JSONL +sqlite3 backend/poems.db "SELECT json_object('id',id,'title',title,'author',author,'paragraphs',paragraphs) FROM poems;" > export.jsonl + +# 从 JSONL 恢复 +curl -X POST http://localhost:8000/api/poems/import -F "file=@export.jsonl" +``` + +## License + +MIT License \ No newline at end of file diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..8a894c5 --- /dev/null +++ b/backend/main.py @@ -0,0 +1,630 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +古诗词阅读网站后端 API + +基于 FastAPI 的 RESTful API,提供: +- 诗词数据管理(CRUD) +- 多类别组合筛选 +- 阅读标记管理 +- 统计分析 +- 批量导入 +""" + +import json +import os +import sqlite3 +from datetime import datetime +from typing import List, Optional, Dict, Any +from contextlib import contextmanager +from fastapi import FastAPI, HTTPException, UploadFile, File, Query +from fastapi.staticfiles import StaticFiles +from fastapi.responses import FileResponse, JSONResponse +from fastapi.middleware.cors import CORSMiddleware +import uvicorn + +app = FastAPI(title="古诗词阅读 API", version="2.0.0") + +# 启用 CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# 配置 +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +PROJECT_ROOT = os.path.dirname(BASE_DIR) +DB_PATH = os.path.join(BASE_DIR, 'poems.db') +FRONTEND_DIR = os.path.join(PROJECT_ROOT, 'frontend') + +# 分类标签体系定义 +CATEGORY_SYSTEM = { + "season": { + "name": "季节", + "tags": ["春", "夏", "秋", "冬", "四季", "无明确季节"] + }, + "solar_terms": { + "name": "节气", + "tags": ["立春", "雨水", "惊蛰", "春分", "清明", "谷雨", + "立夏", "小满", "芒种", "夏至", "小暑", "大暑", + "立秋", "处暑", "白露", "秋分", "寒露", "霜降", + "立冬", "小雪", "大雪", "冬至", "小寒", "大寒"] + }, + "time_of_day": { + "name": "时辰", + "tags": ["清晨", "上午", "正午", "下午", "黄昏", "夜晚", "深夜", "黎明", "不明确"] + }, + "genre": { + "name": "题材类型", + "tags": ["山水田园", "边塞征战", "咏史怀古", "咏物言志", "送别怀人", "思乡怀远", + "爱情闺怨", "友情赠答", "羁旅漂泊", "隐逸闲适", "讽喻时事", "节日习俗", + "宴饮酬唱", "读书治学", "农耕劳作", "宗教禅理", "其他"] + }, + "emotion_tone": { + "name": "情感基调", + "tags": ["喜悦欢快", "悲伤哀愁", "愤怒激愤", "忧郁伤感", "孤独寂寞", + "宁静淡泊", "豪迈激昂", "思念眷恋", "惆怅失落", "平和超脱", "复杂混合"] + }, + "emotions": { + "name": "具体情感", + "tags": ["喜", "怒", "哀", "乐", "忧", "思", "悲", "恐", "惊", "愁", + "恨", "爱", "恋", "盼", "悔", "愧", "傲", "谦", "静", "躁"] + }, + "nature_scenery": { + "name": "自然景物", + "tags": ["山", "水", "云", "雨", "雪", "风", "雷", "电", "日", "月", "星", "霜", "露", "霞"] + }, + "plants": { + "name": "植物", + "tags": ["松", "竹", "梅", "兰", "菊", "荷", "柳", "桃", "李", "杏", "梨", "枫", "梧桐", "芭蕉", "其他"] + }, + "animals": { + "name": "动物", + "tags": ["鸟", "雁", "燕", "鹊", "蝉", "蛙", "鱼", "龙", "凤", "马", "牛", "羊", "犬", "其他"] + }, + "buildings": { + "name": "建筑", + "tags": ["楼", "阁", "亭", "台", "轩", "榭", "桥", "寺", "塔", "城", "关", "宫", "殿", "院", "其他"] + }, + "philosophy": { + "name": "哲理思想", + "tags": ["儒家思想", "道家思想", "佛家禅理", "人生感悟", "历史兴叹", "自然之道", "无明显哲理"] + }, + "life_stage": { + "name": "人生阶段", + "tags": ["少年", "青年", "中年", "老年", "不明确"] + }, + "social_role": { + "name": "社会身份", + "tags": ["士人", "官员", "隐士", "游子", "征人", "商贾", "农夫", "僧道", "闺中", "其他"] + }, + "technique": { + "name": "写作手法", + "tags": ["比兴", "赋", "对仗", "用典", "借景抒情", "托物言志", "虚实结合", "动静结合", "其他"] + }, + "rhetoric": { + "name": "修辞手法", + "tags": ["比喻", "拟人", "夸张", "对偶", "排比", "反复", "设问", "反问", "其他"] + }, + "colors": { + "name": "色彩意象", + "tags": ["青", "绿", "红", "白", "黄", "紫", "碧", "翠", "苍", "金"] + }, + "sounds": { + "name": "声音意象", + "tags": ["钟声", "鼓声", "笛声", "琴声", "风声", "雨声", "鸟鸣", "蝉鸣", "其他"] + }, + "location": { + "name": "地理方位", + "tags": ["江南", "塞北", "中原", "巴蜀", "关中", "岭南", "吴越", "荆楚", "其他"] + }, + "festival": { + "name": "节日习俗", + "tags": ["春节", "元宵", "清明", "端午", "七夕", "中秋", "重阳", "除夕", "无"] + } +} + + +@contextmanager +def get_db_connection(): + """数据库连接上下文管理器""" + conn = sqlite3.connect(DB_PATH) + conn.row_factory = sqlite3.Row + try: + yield conn + finally: + conn.close() + + +def init_database(): + """初始化数据库表结构""" + with get_db_connection() as conn: + cursor = conn.cursor() + + # 诗词表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS poems ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + author TEXT NOT NULL, + paragraphs TEXT, + signature TEXT UNIQUE, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''') + + # 分类标签表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS classifications ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + poem_id TEXT NOT NULL, + category TEXT NOT NULL, + tags TEXT, + FOREIGN KEY (poem_id) REFERENCES poems(id) ON DELETE CASCADE + ) + ''') + + # 阅读记录表 + cursor.execute(''' + CREATE TABLE IF NOT EXISTS reading_records ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + poem_id TEXT UNIQUE NOT NULL, + is_read BOOLEAN DEFAULT FALSE, + read_at TIMESTAMP, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (poem_id) REFERENCES poems(id) ON DELETE CASCADE + ) + ''') + + # 创建索引 + cursor.execute('CREATE INDEX IF NOT EXISTS idx_classifications_poem ON classifications(poem_id)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_classifications_category ON classifications(category)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_reading_records_poem ON reading_records(poem_id)') + cursor.execute('CREATE INDEX IF NOT EXISTS idx_poems_signature ON poems(signature)') + + conn.commit() + + +def poem_to_dict(row: sqlite3.Row, classifications: Optional[List[sqlite3.Row]] = None, reading_record: Optional[sqlite3.Row] = None) -> Dict: + """将数据库行转换为诗词字典""" + result = { + 'id': row['id'], + 'title': row['title'], + 'author': row['author'], + 'paragraphs': json.loads(row['paragraphs']) if row['paragraphs'] else [], + 'created_at': row['created_at'], + 'updated_at': row['updated_at'] + } + + if classifications: + result['classifications'] = {} + for clf in classifications: + result['classifications'][clf['category']] = json.loads(clf['tags']) + + if reading_record: + result['is_read'] = bool(reading_record['is_read']) + result['read_at'] = reading_record['read_at'] + else: + result['is_read'] = False + result['read_at'] = None + + return result + + +# ===== API 端点 ===== + +# 挂载静态文件目录 +app.mount("/static", StaticFiles(directory=FRONTEND_DIR), name="static") + +@app.get("/") +async def serve_frontend(): + """提供前端页面""" + return FileResponse(os.path.join(FRONTEND_DIR, 'index.html')) + + +@app.get("/style.css") +async def serve_css(): + """提供 CSS 文件""" + return FileResponse(os.path.join(FRONTEND_DIR, 'style.css')) + + +@app.get("/script.js") +async def serve_js(): + """提供 JS 文件""" + return FileResponse(os.path.join(FRONTEND_DIR, 'script.js')) + + +@app.get("/api/categories") +async def get_categories(): + """获取分类标签体系""" + return CATEGORY_SYSTEM + + +@app.get("/api/stats") +async def get_statistics(): + """获取统计数据""" + with get_db_connection() as conn: + cursor = conn.cursor() + + # 总数 + cursor.execute('SELECT COUNT(*) FROM poems') + total = cursor.fetchone()[0] + + # 已读数量 + cursor.execute('SELECT COUNT(*) FROM reading_records WHERE is_read = TRUE') + read_count = cursor.fetchone()[0] + + # 各分类统计 + cursor.execute(''' + SELECT category, COUNT(*) as count + FROM classifications + GROUP BY category + ''') + category_stats = {row['category']: row['count'] for row in cursor.fetchall()} + + # 热门标签 + cursor.execute(''' + SELECT category, tags FROM classifications + WHERE category = 'genre' OR category = 'emotion_tone' + ''') + popular_tags = {} + for row in cursor.fetchall(): + tags = json.loads(row['tags']) + for tag in tags: + if tag not in popular_tags: + popular_tags[tag] = 0 + popular_tags[tag] += 1 + + top_tags = sorted(popular_tags.items(), key=lambda x: x[1], reverse=True)[:20] + + return { + 'total_poems': total, + 'read_count': read_count, + 'unread_count': total - read_count, + 'category_stats': category_stats, + 'top_tags': top_tags, + 'reading_progress': round((read_count / total * 100) if total > 0 else 0, 1) + } + + +@app.post("/api/poems/import") +async def import_poems(file: UploadFile = File(...)): + """批量导入诗词(支持 JSON/JSONL)""" + if not file.filename or not file.filename.endswith(('.json', '.jsonl')): + raise HTTPException(status_code=400, detail="仅支持 JSON 和 JSONL 格式") + + content = await file.read() + content_str = content.decode('utf-8').strip() + + if not content_str: + raise HTTPException(status_code=400, detail="文件内容为空") + + # 解析文件 + raw_items = [] + try: + data = json.loads(content_str) + if isinstance(data, list): + raw_items = data + elif isinstance(data, dict): + raw_items = [data] + except json.JSONDecodeError: + for line in content_str.splitlines(): + line = line.strip() + if line: + try: + obj = json.loads(line) + if isinstance(obj, dict): + raw_items.append(obj) + except json.JSONDecodeError: + continue + + # 导入数据库 + imported_count = 0 + skipped_count = 0 + + with get_db_connection() as conn: + cursor = conn.cursor() + + for item in raw_items: + try: + # 验证基本格式 + if not all(k in item for k in ['title', 'author', 'paragraphs']): + skipped_count += 1 + continue + + poem_id = item.get('id', f"poem_{datetime.now().timestamp()}") + signature = item.get('signature', '') + paragraphs = json.dumps(item['paragraphs'], ensure_ascii=False) + + # 检查是否已存在 + if signature: + cursor.execute('SELECT id FROM poems WHERE signature = ?', (signature,)) + if cursor.fetchone(): + skipped_count += 1 + continue + + # 插入诗词 + cursor.execute(''' + INSERT OR REPLACE INTO poems (id, title, author, paragraphs, signature) + VALUES (?, ?, ?, ?, ?) + ''', (poem_id, item['title'], item['author'], paragraphs, signature or '')) + + # 插入分类 + if 'llm_classification' in item or 'classifications' in item: + classifications = item.get('llm_classification', item.get('classifications', {})) + + # 先删除旧分类 + cursor.execute('DELETE FROM classifications WHERE poem_id = ?', (poem_id,)) + + # 插入新分类 + for category, tags in classifications.items(): + if isinstance(tags, list): + tags_str = json.dumps(tags, ensure_ascii=False) + else: + tags_str = json.dumps([tags], ensure_ascii=False) + + cursor.execute(''' + INSERT INTO classifications (poem_id, category, tags) + VALUES (?, ?, ?) + ''', (poem_id, category, tags_str)) + + # 初始化阅读记录 + cursor.execute(''' + INSERT OR IGNORE INTO reading_records (poem_id, is_read) + VALUES (?, FALSE) + ''', (poem_id,)) + + imported_count += 1 + + except Exception as e: + print(f"导入失败:{e}") + skipped_count += 1 + continue + + conn.commit() + + return { + 'message': f'成功导入 {imported_count} 首诗词', + 'imported': imported_count, + 'skipped': skipped_count + } + + +@app.get("/api/poems") +async def get_poems( + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), + categories: Optional[str] = Query(None, description="多类别筛选,格式:category1:tag1,category2:tag2"), + search: Optional[str] = Query(None, description="搜索关键词"), + is_read: Optional[bool] = Query(None, description="阅读状态筛选") +): + """获取诗词列表(支持分页、多类别筛选、搜索)""" + with get_db_connection() as conn: + cursor = conn.cursor() + + # 构建查询条件 + conditions = [] + params = [] + + # 阅读状态筛选 + if is_read is not None: + conditions.append('rr.is_read = ?') + params.append(1 if is_read else 0) + + # 搜索 + if search: + conditions.append('(p.title LIKE ? OR p.author LIKE ?)') + params.extend([f'%{search}%', f'%{search}%']) + + # 多类别筛选 + if categories: + category_conditions = [] + for cat_filter in categories.split(','): + if ':' in cat_filter: + category, tag = cat_filter.split(':', 1) + category_conditions.append(''' + EXISTS ( + SELECT 1 FROM classifications c + WHERE c.poem_id = p.id + AND c.category = ? + AND c.tags LIKE ? + ) + ''') + params.extend([category, f'%{tag}%']) + + if category_conditions: + conditions.append(' AND '.join(category_conditions)) + + where_clause = ' AND '.join(conditions) if conditions else '1=1' + + # 查询总数 + count_sql = f''' + SELECT COUNT(DISTINCT p.id) FROM poems p + LEFT JOIN reading_records rr ON p.id = rr.poem_id + WHERE {where_clause} + ''' + cursor.execute(count_sql, params) + total = cursor.fetchone()[0] + + # 查询数据 + sql = f''' + SELECT p.*, rr.is_read, rr.read_at + FROM poems p + LEFT JOIN reading_records rr ON p.id = rr.poem_id + WHERE {where_clause} + ORDER BY p.created_at DESC + LIMIT ? OFFSET ? + ''' + params.extend([page_size, (page - 1) * page_size]) + cursor.execute(sql, params) + poems = cursor.fetchall() + + # 获取每首诗的分类 + poem_ids = [row['id'] for row in poems] + classifications = {} + if poem_ids: + cursor.execute(''' + SELECT poem_id, category, tags FROM classifications + WHERE poem_id IN ({}) + '''.format(','.join('?' * len(poem_ids))), poem_ids) + + for clf in cursor.fetchall(): + if clf['poem_id'] not in classifications: + classifications[clf['poem_id']] = [] + classifications[clf['poem_id']].append(clf) + + # 转换结果 + result = [] + for row in poems: + poem_dict = { + 'id': row['id'], + 'title': row['title'], + 'author': row['author'], + 'paragraphs': json.loads(row['paragraphs']) if row['paragraphs'] else [], + 'created_at': row['created_at'], + 'updated_at': row['updated_at'], + 'is_read': bool(row['is_read']), + 'read_at': row['read_at'], + 'classifications': {} + } + + # 添加分类 + if row['id'] in classifications: + for clf in classifications[row['id']]: + poem_dict['classifications'][clf['category']] = json.loads(clf['tags']) + + result.append(poem_dict) + + return { + 'total': total, + 'page': page, + 'page_size': page_size, + 'total_pages': (total + page_size - 1) // page_size, + 'poems': result + } + + +@app.get("/api/poems/random") +async def get_random_poem(): + """获取随机一首诗词""" + with get_db_connection() as conn: + cursor = conn.cursor() + cursor.execute('SELECT * FROM poems ORDER BY RANDOM() LIMIT 1') + poem = cursor.fetchone() + + if not poem: + raise HTTPException(status_code=404, detail="没有可用的诗词") + + cursor.execute('SELECT * FROM classifications WHERE poem_id = ?', (poem['id'],)) + classifications = cursor.fetchall() + + cursor.execute('SELECT * FROM reading_records WHERE poem_id = ?', (poem['id'],)) + reading_record = cursor.fetchone() + + return poem_to_dict(poem, classifications, reading_record) + + +@app.get("/api/poems/{poem_id}") +async def get_poem(poem_id: str): + """获取单首诗词详情""" + with get_db_connection() as conn: + cursor = conn.cursor() + + # 获取诗词 + cursor.execute('SELECT * FROM poems WHERE id = ?', (poem_id,)) + poem = cursor.fetchone() + + if not poem: + raise HTTPException(status_code=404, detail="诗词不存在") + + # 获取分类 + cursor.execute('SELECT * FROM classifications WHERE poem_id = ?', (poem_id,)) + classifications = cursor.fetchall() + + # 获取阅读记录 + cursor.execute('SELECT * FROM reading_records WHERE poem_id = ?', (poem_id,)) + reading_record = cursor.fetchone() + + return poem_to_dict(poem, classifications, reading_record) + + +@app.put("/api/poems/{poem_id}/read") +async def toggle_read_status(poem_id: str, is_read: bool = Query(...)): + """切换阅读状态""" + with get_db_connection() as conn: + cursor = conn.cursor() + + # 检查诗词是否存在 + cursor.execute('SELECT id FROM poems WHERE id = ?', (poem_id,)) + if not cursor.fetchone(): + raise HTTPException(status_code=404, detail="诗词不存在") + + # 更新或插入阅读记录 + read_at = datetime.now().isoformat() if is_read else None + cursor.execute(''' + INSERT INTO reading_records (poem_id, is_read, read_at) + VALUES (?, ?, ?) + ON CONFLICT(poem_id) DO UPDATE SET + is_read = excluded.is_read, + read_at = excluded.read_at + ''', (poem_id, 1 if is_read else 0, read_at)) + + conn.commit() + + return {'success': True, 'is_read': is_read, 'read_at': read_at} + + +@app.get("/api/tags/cloud") +async def get_tag_cloud(): + """获取标签云数据""" + with get_db_connection() as conn: + cursor = conn.cursor() + + cursor.execute(''' + SELECT category, tags FROM classifications + ''') + + tag_counts = {} + for row in cursor.fetchall(): + tags = json.loads(row['tags']) + for tag in tags: + key = f"{row['category']}:{tag}" + if key not in tag_counts: + tag_counts[key] = { + 'category': row['category'], + 'tag': tag, + 'count': 0 + } + tag_counts[key]['count'] += 1 + + return { + 'tags': list(tag_counts.values()), + 'categories': {k: v['name'] for k, v in CATEGORY_SYSTEM.items()} + } + + +# 启动时初始化数据库 +@app.on_event("startup") +async def startup_event(): + init_database() + print(f"\n数据库路径:{DB_PATH}") + print(f"前端目录:{FRONTEND_DIR}") + print("\nAPI 端点:") + print(" GET / - 前端页面") + print(" GET /api/categories - 分类体系") + print(" GET /api/stats - 统计数据") + print(" POST /api/poems/import - 批量导入") + print(" GET /api/poems - 诗词列表") + print(" GET /api/poems/{id} - 诗词详情") + print(" PUT /api/poems/{id}/read - 切换阅读状态") + print(" GET /api/poems/random - 随机诗词") + print(" GET /api/tags/cloud - 标签云") + print("\n按 Ctrl+C 停止服务\n") + + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/classify_pois.py b/classify_pois.py new file mode 100644 index 0000000..e442981 --- /dev/null +++ b/classify_pois.py @@ -0,0 +1,616 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Poem Classification System with LLM API Integration + +基于大模型的中国古代诗词多维度分类系统 + +分类维度包括: +- 季节时序(四季、节气、时辰) +- 题材类型(山水、边塞、咏史、咏物等) +- 情感心境(喜怒哀乐、忧思愁绪等) +- 景物意象(自然、植物、动物、建筑等) +- 哲理思想(儒释道、人生感悟等) +- 艺术手法(比兴、用典、对仗等) +- 人物关系(送别、怀人、思乡等) +- 生活场景(宴饮、耕作、读书等) + +Configuration via environment variables: +- LLM_BASE_URL: API base URL (e.g., http://localhost:11434/v1) +- LLM_API_KEY: API key (optional for local models) +- LLM_MODEL: Model name (e.g., qwen:7b, gpt-4) +""" + +import json +import os +import re +import hashlib +from typing import Dict, List, Set, Optional +from datetime import datetime +from concurrent.futures import ThreadPoolExecutor, as_completed +import urllib.request +import urllib.error +import threading + + +def generate_poem_signature(poem_data: Dict) -> str: + """ + 基于标题、作者、内容生成诗词的唯一签名(MD5) + 用于检测重复诗词 + """ + title = poem_data.get('title', '').strip() + author = poem_data.get('author', '').strip() + + content_parts = [] + if 'paragraphs' in poem_data: + for para in poem_data['paragraphs']: + if isinstance(para, str): + content_parts.append(para.strip()) + elif isinstance(para, list): + content_parts.extend([p.strip() for p in para]) + if 'content' in poem_data: + if isinstance(poem_data['content'], str): + content_parts.append(poem_data['content'].strip()) + elif isinstance(poem_data['content'], list): + content_parts.extend([p.strip() for p in poem_data['content']]) + + content = '\n'.join(content_parts) + signature_str = f"title:{title}|author:{author}|content:{content}" + return hashlib.md5(signature_str.encode('utf-8')).hexdigest() + + +def load_existing_poems(output_file: str) -> Set[str]: + """ + 从输出文件加载已存在的诗词签名 + """ + existing_signatures = set() + + if not os.path.exists(output_file): + return existing_signatures + + try: + with open(output_file, 'r', encoding='utf-8') as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line: + continue + try: + data = json.loads(line) + if 'title' in data and 'author' in data: + sig = generate_poem_signature({ + 'title': data.get('title', ''), + 'author': data.get('author', ''), + 'paragraphs': data.get('llm_classification', {}).get('analysis', '').split('\n') + if data.get('llm_classification') else [] + }) + existing_signatures.add(sig) + except json.JSONDecodeError: + print(f" Warning: Skipping invalid JSON on line {line_num}") + except IOError as e: + print(f" Warning: Could not read existing output file: {e}") + + return existing_signatures + + +def append_to_output_file(output_file: str, data: Dict, lock: threading.Lock): + """ + 线程安全地追加写入输出文件 + """ + with lock: + with open(output_file, 'a', encoding='utf-8') as f: + f.write(json.dumps(data, ensure_ascii=False) + '\n') + + +class LLMClassifier: + """ + 基于大模型的诗词分类器 + + 支持通过 OpenAI 兼容 API 调用本地或远程模型 + 提供 20+ 维度的诗词分类标签 + """ + + CLASSIFICATION_PROMPT = """你是一位中国古代诗词分类专家。请深入分析以下诗词,并从多个维度进行分类标注。 + +【诗词信息】 +- 标题:{title} +- 作者:{author} +- 内容:{content} + +【重要说明】 +- 如果诗词内容是繁体中文,请在输出中包含简体中文版本 +- 分类标签一律使用简体中文 + +【分类维度与标签选项】 + +1. 季节(season):["春", "夏", "秋", "冬", "四季", "无明确季节"] +2. 节气(solar_terms):24 节气中的具体节气,如 "立春"、"清明"、"冬至" 等,无则空数组 +3. 时辰(time_of_day):["清晨", "上午", "正午", "下午", "黄昏", "夜晚", "深夜", "黎明", "不明确"] +4. 题材类型(genre):["山水田园", "边塞征战", "咏史怀古", "咏物言志", "送别怀人", "思乡怀远", + "爱情闺怨", "友情赠答", "羁旅漂泊", "隐逸闲适", "讽喻时事", "节日习俗", + "宴饮酬唱", "读书治学", "农耕劳作", "宗教禅理", "其他"] +5. 情感基调(emotion_tone):["喜悦欢快", "悲伤哀愁", "愤怒激愤", "忧郁伤感", "孤独寂寞", + "宁静淡泊", "豪迈激昂", "思念眷恋", "惆怅失落", "平和超脱", "复杂混合"] +6. 具体情感(emotions):从以下选择 3-5 个最贴切的: + ["喜", "怒", "哀", "乐", "忧", "思", "悲", "恐", "惊", "愁", "恨", "爱", + "恋", "盼", "悔", "愧", "傲", "谦", "静", "躁"] +7. 景物 - 自然(nature_scenery):["山", "水", "云", "雨", "雪", "风", "雷", "电", "日", "月", "星", "霜", "露", "霞"] +8. 景物 - 植物(plants):["松", "竹", "梅", "兰", "菊", "荷", "柳", "桃", "李", "杏", "梨", "枫", "梧桐", "芭蕉", "其他"] +9. 景物 - 动物(animals):["鸟", "雁", "燕", "鹊", "蝉", "蛙", "鱼", "龙", "凤", "马", "牛", "羊", "犬", "其他"] +10. 景物 - 建筑(buildings):["楼", "阁", "亭", "台", "轩", "榭", "桥", "寺", "塔", "城", "关", "宫", "殿", "院", "其他"] +11. 意象关键词(imagery):提取 5-10 个诗中最具代表性的意象词汇(简体中文) +12. 哲理思想(philosophy):["儒家思想", "道家思想", "佛家禅理", "人生感悟", "历史兴叹", "自然之道", "无明显哲理"] +13. 人生阶段(life_stage):["少年", "青年", "中年", "老年", "不明确"] +14. 社会身份(social_role):["士人", "官员", "隐士", "游子", "征人", "商贾", "农夫", "僧道", "闺中", "其他"] +15. 写作手法(technique):["比兴", "赋", "对仗", "用典", "借景抒情", "托物言志", "虚实结合", "动静结合", "其他"] +16. 修辞手法(rhetoric):["比喻", "拟人", "夸张", "对偶", "排比", "反复", "设问", "反问", "其他"] +17. 色彩意象(colors):提取诗中的色彩词,如 ["青", "绿", "红", "白", "黄", "紫", "碧", "翠", "苍", "金"] +18. 声音意象(sounds):["钟声", "鼓声", "笛声", "琴声", "风声", "雨声", "鸟鸣", "蝉鸣", "其他"] +19. 地理方位(location):["江南", "塞北", "中原", "巴蜀", "关中", "岭南", "吴越", "荆楚", "其他"] +20. 节日习俗(festival):["春节", "元宵", "清明", "端午", "七夕", "中秋", "重阳", "除夕", "无"] + +【返回格式】 +请返回严格的 JSON 格式,结构如下: +{{ + "original_text": ["空山新雨後,天氣晚來秋。", ...], // 原始文本(如果输入是繁体) + "simplified_text": ["空山新雨后,天气晚来秋。", ...], // 简体中文版本 + "season": ["秋"], + "solar_terms": ["白露"], + "time_of_day": "黄昏", + "genre": ["山水田园", "隐逸闲适"], + "emotion_tone": "宁静淡泊", + "emotions": ["静", "喜", "乐"], + "nature_scenery": ["山", "水", "月"], + "plants": ["松", "竹"], + "animals": ["鸟"], + "buildings": [], + "imagery": ["空山", "新雨", "明月", "青松"], + "philosophy": ["道家思想", "自然之道"], + "life_stage": "中年", + "social_role": "隐士", + "technique": ["借景抒情", "动静结合"], + "rhetoric": ["拟人"], + "colors": ["青", "白"], + "sounds": [], + "location": "终南山", + "festival": "无", + "analysis": "简要分析这首诗的主题思想、艺术特色和情感内涵(100-200 字,使用简体中文)" +}} + +【注意事项】 +- 只返回 JSON,不要有任何其他文字说明 +- 每个维度根据诗意选择最贴切的标签,可以是 1 个或多个 +- 如果某个维度没有明确对应,选择"无"、"不明确"或空数组 +- 意象关键词应从原诗中提取或合理归纳(使用简体中文) +- 分析要准确、深入、简洁(使用简体中文) +- 如果输入是繁体中文,original_text 和 simplified_text 都要填写 +- 如果输入已经是简体中文,original_text 和 simplified_text 填写相同内容""" + + def __init__(self, base_url: Optional[str] = None, api_key: Optional[str] = None, + model: Optional[str] = None, output_file: Optional[str] = None): + """ + 初始化 LLM 分类器 + + Args: + base_url: API 基础 URL + api_key: API 密钥(本地模型可选) + model: 模型名称 + output_file: 输出文件路径(用于实时写入) + """ + self.base_url = (base_url or os.getenv('LLM_BASE_URL') or 'https://api.siliconflow.cn/v1').rstrip('/') + self.api_key = api_key or os.getenv('LLM_API_KEY') or '' + self.model = model or os.getenv('LLM_MODEL') or 'qwen:7b' + self.output_file = output_file + self.file_lock = threading.Lock() if output_file else None + + def _call_api(self, messages: List[Dict], temperature: float = 0.3, max_retries: int = 3) -> str: + """ + 调用 LLM API,带重试机制 + + Args: + messages: 消息列表 + temperature: 温度参数 + max_retries: 最大重试次数 + + Returns: + API 返回的文本内容 + """ + url = f"{self.base_url}/chat/completions" + + headers = { + 'Content-Type': 'application/json', + } + + if self.api_key: + headers['Authorization'] = f'Bearer {self.api_key}' + + payload = { + 'model': self.model, + 'messages': messages, + 'temperature': temperature, + 'stream': False + } + + for attempt in range(max_retries): + try: + data = json.dumps(payload).encode('utf-8') + req = urllib.request.Request(url, data=data, headers=headers, method='POST') + + with urllib.request.urlopen(req, timeout=180) as response: + result = json.loads(response.read().decode('utf-8')) + return result['choices'][0]['message']['content'] + + except urllib.error.URLError as e: + print(f" API request failed (attempt {attempt + 1}/{max_retries}): {e}") + if attempt == max_retries - 1: + return "" + except json.JSONDecodeError as e: + print(f" Failed to parse API response: {e}") + return "" + except Exception as e: + print(f" Unexpected error (attempt {attempt + 1}/{max_retries}): {e}") + if attempt == max_retries - 1: + return "" + + return "" + + def classify_poem(self, poem_data: Dict, skip_if_exists: bool = False, + existing_signatures: Optional[Set[str]] = None) -> Optional[Dict]: + """ + 使用 LLM 对单首诗词进行分类 + + Args: + poem_data: 诗词数据字典 + skip_if_exists: 是否跳过已存在的诗词 + existing_signatures: 已存在的诗词签名集合 + + Returns: + 分类结果字典,失败返回 None + """ + title = poem_data.get('title', '') + author = poem_data.get('author', '') + + content_parts = [] + if 'paragraphs' in poem_data: + for para in poem_data['paragraphs']: + if isinstance(para, str): + content_parts.append(para) + elif isinstance(para, list): + content_parts.extend(para) + if 'content' in poem_data: + if isinstance(poem_data['content'], str): + content_parts.append(poem_data['content']) + elif isinstance(poem_data['content'], list): + content_parts.extend(poem_data['content']) + + content = '\n'.join(content_parts) + + if not title and not content: + return None + + # 检查是否已存在 + if skip_if_exists and existing_signatures is not None: + poem_sig = generate_poem_signature(poem_data) + if poem_sig in existing_signatures: + return None + + prompt = self.CLASSIFICATION_PROMPT.format( + title=title or '无题', + author=author or '佚名', + content=content + ) + + messages = [ + {'role': 'system', 'content': '你是一位中国古代诗词分类专家,精通诗词鉴赏和分类,能够准确识别诗词的题材、情感、意象和艺术特色。'}, + {'role': 'user', 'content': prompt} + ] + + response = self._call_api(messages, temperature=0.3) + + if not response: + return None + + try: + # 提取 JSON 部分 + json_match = re.search(r'\{[\s\S]*\}', response) + if json_match: + result = json.loads(json_match.group()) + else: + result = json.loads(response) + + # 处理简繁转换结果 + paragraphs = poem_data.get('paragraphs', []) + simplified_paragraphs = [] + + # 如果 LLM 返回了简体版本,使用它 + if 'simplified_text' in result: + simplified_paragraphs = result['simplified_text'] + elif 'original_text' in result: + # 如果只有 original_text,说明输入已经是简体 + simplified_paragraphs = paragraphs + else: + # 没有文本转换信息,使用原始内容 + simplified_paragraphs = paragraphs + + classification_result = { + 'id': poem_data.get('id', 'unknown'), + 'title': title, + 'author': author, + 'paragraphs': simplified_paragraphs, # 使用简体版本 + 'original_paragraphs': paragraphs if paragraphs != simplified_paragraphs else None, # 保留原始(如果不同) + 'llm_classification': result, + 'signature': generate_poem_signature({ + 'title': title, + 'author': author, + 'paragraphs': simplified_paragraphs + }), # 基于简体内容生成签名 + 'timestamp': datetime.now().isoformat(), + 'read_mark': False + } + + # 实时写入文件 + if self.output_file and self.file_lock: + append_to_output_file(self.output_file, classification_result, self.file_lock) + + return classification_result + except json.JSONDecodeError as e: + print(f" Failed to parse LLM response as JSON: {e}") + print(f" Raw response: {response[:300]}...") + return None + + +def is_valid_poem_data(poem_data: Dict) -> bool: + """ + 验证诗词数据格式是否有效 + """ + required_fields = {'author', 'paragraphs', 'title'} + + if not isinstance(poem_data, dict): + return False + + if not required_fields.issubset(poem_data.keys()): + return False + + paragraphs = poem_data.get('paragraphs') + if paragraphs is None or not isinstance(paragraphs, list): + return False + + for para in paragraphs: + if not isinstance(para, str): + return False + + if 'id' not in poem_data or not isinstance(poem_data['id'], str): + return False + + if not isinstance(poem_data['author'], str) or len(poem_data['author'].strip()) == 0: + return False + + if not isinstance(poem_data['title'], str) or len(poem_data['title'].strip()) == 0: + return False + + return True + + +def load_poems_from_file(file_path: str) -> List[Dict]: + """ + 从文件加载诗词数据 + 支持 JSON 数组、单个 JSON 对象、JSONL 三种格式 + """ + poems = [] + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read().strip() + + if not content: + return poems + + # 尝试解析为 JSON + try: + data = json.loads(content) + if isinstance(data, list): + poems = data + elif isinstance(data, dict): + poems = [data] + except json.JSONDecodeError: + # 按 JSONL 处理 + for line in content.splitlines(): + line = line.strip() + if line: + try: + obj = json.loads(line) + if isinstance(obj, dict): + poems.append(obj) + except json.JSONDecodeError: + continue + except IOError as e: + print(f"Error reading file {file_path}: {e}") + + return poems + + +def main(): + """主入口函数""" + import argparse + + parser = argparse.ArgumentParser( + description='基于大模型的中国古代诗词多维度分类系统', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + # 使用本地模型分类(递归搜索子目录) + python classify_pois.py ./poems ./output.jsonl 10 --llm \\ + --base-url http://localhost:11434/v1 --model qwen:7b + + # 使用远程模型分类 + python classify_pois.py ./poems ./output.jsonl 10 --llm \\ + --base-url https://api.openai.com/v1 --model gpt-4 --api-key sk-xxx + + # 断点续跑(自动跳过已处理的诗词) + python classify_pois.py ./poems ./output.jsonl 10 --llm \\ + --base-url http://localhost:11434/v1 --model qwen:7b + + # 只扫描当前目录,不递归子目录 + python classify_pois.py ./poems ./output.jsonl 10 --llm --no-recursive \\ + --base-url http://localhost:11434/v1 --model qwen:7b + +环境变量(备选): + LLM_BASE_URL, LLM_API_KEY, LLM_MODEL + """ + ) + + parser.add_argument('input_folder', help='输入文件夹路径(包含诗词 JSON/JSONL 文件)') + parser.add_argument('output_file', help='输出 JSONL 文件路径') + parser.add_argument('max_workers', nargs='?', type=int, default=10, help='并发数(默认:10)') + + parser.add_argument('--llm', action='store_true', help='使用 LLM 分类(默认)') + parser.add_argument('--base-url', type=str, help='LLM API 基础 URL') + parser.add_argument('--api-key', type=str, help='LLM API 密钥') + parser.add_argument('--model', type=str, help='LLM 模型名称') + parser.add_argument('--no-recursive', action='store_true', help='不递归搜索子目录') + + args = parser.parse_args() + + # 获取配置 + base_url = args.base_url or os.getenv('LLM_BASE_URL', 'http://localhost:11434/v1') + api_key = args.api_key or os.getenv('LLM_API_KEY', '') + model = args.model or os.getenv('LLM_MODEL', 'qwen:7b') + + print(f"\n{'='*70}") + print(f"基于大模型的诗词多维度分类系统") + print(f"{'='*70}") + print(f"输入目录:{args.input_folder}") + print(f"输出文件:{args.output_file}") + print(f"API 地址:{base_url}") + print(f"模型名称:{model}") + print(f"{'='*70}\n") + + # 加载已存在的诗词签名 + existing_signatures = load_existing_poems(args.output_file) + if existing_signatures: + print(f"检测到输出文件中已有 {len(existing_signatures)} 首诗词,将自动跳过重复项\n") + + # 创建分类器 + llm_classifier = LLMClassifier( + base_url=base_url, + api_key=api_key, + model=model, + output_file=args.output_file + ) + + # 扫描输入文件(支持递归搜索子目录) + valid_extensions = {'.json', '.jsonl'} + json_files = [] + + if args.no_recursive: + # 不递归,只扫描当前目录 + for f in os.listdir(args.input_folder): + if not any(f.lower().endswith(ext) for ext in valid_extensions): + continue + file_path = os.path.join(args.input_folder, f) + if not os.path.isfile(file_path): + continue + json_files.append(file_path) + else: + # 递归扫描所有子目录 + for root, dirs, files in os.walk(args.input_folder): + # 跳过隐藏目录 + dirs[:] = [d for d in dirs if not d.startswith('.')] + + for f in files: + if not any(f.lower().endswith(ext) for ext in valid_extensions): + continue + # 跳过隐藏文件 + if f.startswith('.'): + continue + file_path = os.path.join(root, f) + if os.path.isfile(file_path): + json_files.append(file_path) + + # 按文件路径排序,保证处理顺序一致 + json_files.sort() + + print(f"发现 {len(json_files)} 个有效的 JSON/JSONL 文件\n") + + # 显示目录结构(如果有子目录) + subdirs = set(os.path.dirname(f).replace(args.input_folder, '').lstrip('\\').lstrip('/') for f in json_files) + if subdirs: + print(f"包含子目录:{', '.join(sorted(subdirs))}") + + # 统计信息 + stats = { + 'processed': 0, + 'skipped': 0, + 'failed': 0, + 'invalid': 0, + 'total': 0 + } + stats_lock = threading.Lock() + + def process_poem_batch(file_path: str) -> dict: + """处理单个文件的诗词""" + file_stats = {'processed': 0, 'skipped': 0, 'failed': 0, 'invalid': 0} + poems = load_poems_from_file(file_path) + + for idx, poem in enumerate(poems): + if not is_valid_poem_data(poem): + file_stats['invalid'] += 1 + print(f" [{idx+1}/{len(poems)}] 格式无效:{poem.get('title', 'Unknown')}") + continue + + poem_sig = generate_poem_signature(poem) + if poem_sig in existing_signatures: + file_stats['skipped'] += 1 + print(f" [{idx+1}/{len(poems)}] 跳过(已存在): {poem.get('title', 'Unknown')}") + continue + + print(f" [{idx+1}/{len(poems)}] 分类中:{poem.get('title', 'Unknown')}") + result = llm_classifier.classify_poem(poem, skip_if_exists=True, existing_signatures=existing_signatures) + + if result: + file_stats['processed'] += 1 + existing_signatures.add(poem_sig) + print(f" ✓ 成功") + else: + file_stats['failed'] += 1 + print(f" ✗ 失败") + + return file_stats + + # 使用线程池并发处理 + print(f"使用 {args.max_workers} 个并发线程处理\n") + + with ThreadPoolExecutor(max_workers=args.max_workers) as executor: + # 提交所有文件处理任务 + future_to_file = {executor.submit(process_poem_batch, fp): fp for fp in json_files} + + # 收集结果 + for future in as_completed(future_to_file): + file_path = future_to_file[future] + try: + file_stats = future.result() + with stats_lock: + stats['processed'] += file_stats['processed'] + stats['skipped'] += file_stats['skipped'] + stats['failed'] += file_stats['failed'] + stats['invalid'] += file_stats['invalid'] + stats['total'] += file_stats['processed'] + file_stats['skipped'] + file_stats['failed'] + file_stats['invalid'] + except Exception as e: + print(f"处理文件 {file_path} 时出错:{e}") + + print() + + # 输出统计 + print(f"\n{'='*70}") + print(f"分类完成!") + print(f"{'='*70}") + print(f"诗词总数:{stats['total']}") + print(f"成功分类:{stats['processed']}") + print(f"跳过重复:{stats['skipped']}") + print(f"分类失败:{stats['failed']}") + print(f"格式无效:{stats['invalid']}") + print(f"输出文件:{args.output_file}") + print(f"{'='*70}\n") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..26d4d36 --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,193 @@ + + + + + + 古诗词阅读网 + + + + +
+
+
+ +
+

古诗词阅读网

+

品读经典 · 感悟人生

+
+
+ +
+
+ + +
+ + + + +
+ +
+

诗词列表

+ 0 首 +
+ + +
+ + + + + + + + + +
+
+ + + + + + + + + + \ No newline at end of file diff --git a/frontend/script.js b/frontend/script.js new file mode 100644 index 0000000..aeec51f --- /dev/null +++ b/frontend/script.js @@ -0,0 +1,630 @@ +/** + * 古诗词阅读网 - 前端 JavaScript + */ + +const API_BASE = window.location.origin; + +// 状态 +let state = { + poems: [], + currentPage: 1, + totalPages: 1, + pageSize: 20, + total: 0, + filters: { + view: 'all', + search: '', + categories: [] // 格式:[{category: 'genre', tag: '山水田园'}] + }, + currentPoem: null, + categories: {} +}; + +// 初始化 +document.addEventListener('DOMContentLoaded', async () => { + await loadCategories(); + await loadStats(); + await loadPoems(); + setupEventListeners(); + setupUpload(); +}); + +// 加载分类体系 +async function loadCategories() { + try { + const res = await fetch(`${API_BASE}/api/categories`); + if (res.ok) { + state.categories = await res.json(); + renderCategoryTabs(); + } + } catch (err) { + console.error('加载分类失败:', err); + } +} + +// 加载统计 +async function loadStats() { + try { + const res = await fetch(`${API_BASE}/api/stats`); + if (res.ok) { + const stats = await res.json(); + updateStats(stats); + } + } catch (err) { + console.error('加载统计失败:', err); + } +} + +// 加载诗词 +async function loadPoems() { + showLoading(true); + + try { + const params = new URLSearchParams({ + page: state.currentPage, + page_size: state.pageSize + }); + + // 搜索 + if (state.filters.search) { + params.append('search', state.filters.search); + } + + // 阅读状态 + if (state.filters.view === 'read') { + params.append('is_read', 'true'); + } else if (state.filters.view === 'unread') { + params.append('is_read', 'false'); + } + + // 多类别筛选 + if (state.filters.categories.length > 0) { + const catStr = state.filters.categories + .map(c => `${c.category}:${c.tag}`) + .join(','); + params.append('categories', catStr); + } + + const res = await fetch(`${API_BASE}/api/poems?${params}`); + if (res.ok) { + const data = await res.json(); + state.poems = data.poems; + state.total = data.total; + state.totalPages = data.total_pages; + + renderPoems(); + updateResultCount(); + } + } catch (err) { + console.error('加载诗词失败:', err); + showToast('加载失败', 'error'); + } finally { + showLoading(false); + } +} + +// 渲染分类标签页 +function renderCategoryTabs() { + const container = document.getElementById('categoryTabs'); + const categories = Object.entries(state.categories); + + // 显示所有分类 + container.innerHTML = categories.map(([key, data]) => ` +
+ ${data.name} +
+ `).join(''); + + // 点击事件 + container.querySelectorAll('.category-tab').forEach(tab => { + tab.addEventListener('click', () => { + const category = tab.dataset.category; + showTagsForCategory(category); + + // 切换激活状态 + container.querySelectorAll('.category-tab').forEach(t => t.classList.remove('active')); + tab.classList.add('active'); + }); + }); + + // 默认显示第一个分类的标签 + if (categories.length > 0) { + const firstCategory = categories[0][0]; + showTagsForCategory(firstCategory); + container.querySelector('.category-tab')?.classList.add('active'); + } +} + +// 显示某分类的标签 +function showTagsForCategory(category) { + const container = document.getElementById('tagCloud'); + const data = state.categories[category]; + + if (!data) return; + + container.innerHTML = data.tags.map(tag => ` + ${tag} + `).join(''); + + // 点击事件 + container.querySelectorAll('.tag-item').forEach(item => { + item.addEventListener('click', (e) => { + e.stopPropagation(); + addFilter(category, item.dataset.tag); + }); + }); +} + +// 添加筛选条件 +function addFilter(category, tag) { + // 检查是否已存在 + const exists = state.filters.categories.some( + c => c.category === category && c.tag === tag + ); + + if (!exists) { + state.filters.categories.push({ category, tag }); + renderSelectedFilters(); + state.currentPage = 1; + loadPoems(); + } +} + +// 渲染已选筛选 +function renderSelectedFilters() { + const container = document.getElementById('selectedFilters'); + const btn = document.getElementById('clearFiltersBtn'); + + if (state.filters.categories.length === 0) { + container.innerHTML = ''; + btn.style.display = 'none'; + return; + } + + container.innerHTML = state.filters.categories.map((f, i) => ` + + ${state.categories[f.category]?.name || f.category}: ${f.tag} + + + `).join(''); + + btn.style.display = 'inline-block'; +} + +// 移除筛选 +function removeFilter(index) { + state.filters.categories.splice(index, 1); + renderSelectedFilters(); + state.currentPage = 1; + loadPoems(); +} + +// 清除所有筛选 +function clearFilters() { + state.filters.categories = []; + state.filters.search = ''; + state.filters.view = 'all'; + document.getElementById('searchInput').value = ''; + renderSelectedFilters(); + + // 重置导航激活状态 + document.querySelectorAll('.nav-item').forEach(n => n.classList.remove('active')); + document.querySelector('[data-view="all"]').classList.add('active'); + + loadPoems(); +} + +// 渲染诗词列表 +function renderPoems() { + const grid = document.getElementById('poemGrid'); + const empty = document.getElementById('emptyState'); + + if (state.poems.length === 0) { + grid.innerHTML = ''; + empty.style.display = 'block'; + return; + } + + empty.style.display = 'none'; + grid.innerHTML = ''; + + state.poems.forEach(poem => { + const card = createPoemCard(poem); + grid.appendChild(card); + }); +} + +// 创建诗词卡片 +function createPoemCard(poem) { + const isRead = poem.is_read || false; + const paragraphs = poem.paragraphs || []; + const excerpt = paragraphs.slice(0, 2).join('
'); + const tags = getTopTags(poem.classifications); + + const card = document.createElement('div'); + card.className = `poem-card ${isRead ? 'read' : ''}`; + card.dataset.id = poem.id; + card.innerHTML = ` +
+

${escapeHtml(poem.title)}

+ 作者:${escapeHtml(poem.author)} +
+
${excerpt || '暂无内容'}
+
${tags}
+ + `; + + // 点击卡片显示详情(排除复选框和按钮) + card.addEventListener('click', (e) => { + if (!e.target.closest('.read-toggle') && !e.target.closest('button')) { + showPoemDetail(poem.id); + } + }); + + return card; +} + +// 获取主要标签 +function getTopTags(classifications) { + if (!classifications) return ''; + + // 显示所有分类标签 + const priority = ['genre', 'emotion_tone', 'season', 'location', 'time_of_day', + 'philosophy', 'nature_scenery', 'plants', 'animals']; + const tags = []; + const shown = new Set(); + + for (const cat of priority) { + if (classifications[cat] && classifications[cat].length > 0) { + // 每个分类最多显示 2 个标签 + classifications[cat].slice(0, 2).forEach(tag => { + if (!shown.has(tag)) { + tags.push(`${tag}`); + shown.add(tag); + } + }); + if (tags.length >= 6) break; + } + } + + return tags.join(''); +} + +// 显示诗词详情 +async function showPoemDetail(poemId) { + try { + const res = await fetch(`${API_BASE}/api/poems/${poemId}`); + if (!res.ok) { + showToast('加载失败', 'error'); + return; + } + + const poem = await res.json(); + state.currentPoem = poem; + + // 填充弹窗 + document.getElementById('modalTitle').textContent = poem.title; + document.getElementById('modalAuthor').textContent = poem.author; + document.getElementById('modalContent').innerHTML = poem.paragraphs + .map(p => `

${escapeHtml(p)}

`).join(''); + + // 渲染标签 + document.getElementById('modalTags').innerHTML = renderAllTags(poem.classifications); + + // 设置阅读状态 + document.getElementById('modalReadCheck').checked = poem.is_read || false; + + // 显示弹窗 + document.getElementById('poemModal').style.display = 'flex'; + document.body.style.overflow = 'hidden'; + + } catch (err) { + console.error('加载详情失败:', err); + showToast('加载失败', 'error'); + } +} + +// 渲染所有标签 +function renderAllTags(classifications) { + if (!classifications) return ''; + + const categoryNames = state.categories; + const categoryOrder = ['season', 'solar_terms', 'time_of_day', 'genre', 'emotion_tone', + 'emotions', 'nature_scenery', 'plants', 'animals', 'buildings', + 'philosophy', 'life_stage', 'social_role', 'technique', 'rhetoric', + 'colors', 'sounds', 'location', 'festival']; + + return categoryOrder.map(cat => { + if (!classifications[cat] || classifications[cat].length === 0) return ''; + + const catName = categoryNames[cat]?.name || cat; + const tags = classifications[cat]; + + return ` +
+ ${catName}: +
+ ${tags.map(tag => `${tag}`).join('')} +
+
+ `; + }).filter(html => html.trim() !== '').join(''); +} + +// 切换弹窗阅读状态 +async function toggleModalRead() { + if (!state.currentPoem) return; + + const isRead = document.getElementById('modalReadCheck').checked; + + try { + const res = await fetch( + `${API_BASE}/api/poems/${state.currentPoem.id}/read?is_read=${isRead}`, + { method: 'PUT' } + ); + + if (res.ok) { + state.currentPoem.is_read = isRead; + await loadStats(); + await loadPoems(); + } + } catch (err) { + console.error('更新状态失败:', err); + } +} + +// 关闭弹窗 +function closeModal() { + document.getElementById('poemModal').style.display = 'none'; + document.body.style.overflow = ''; + state.currentPoem = null; +} + +// 切换阅读状态 +async function toggleRead(poemId, isRead) { + try { + const res = await fetch( + `${API_BASE}/api/poems/${poemId}/read?is_read=${isRead}`, + { method: 'PUT' } + ); + + if (res.ok) { + await loadStats(); + await loadPoems(); + showToast(isRead ? '已标记为已读' : '已标记为未读', 'success'); + } + } catch (err) { + console.error('更新失败:', err); + showToast('更新失败', 'error'); + } +} + +// 随机一首 +async function showRandomPoem() { + try { + const res = await fetch(`${API_BASE}/api/poems/random`); + if (res.ok) { + const poem = await res.json(); + state.currentPoem = poem; + + document.getElementById('modalTitle').textContent = poem.title; + document.getElementById('modalAuthor').textContent = poem.author; + document.getElementById('modalContent').innerHTML = poem.paragraphs + .map(p => `

${escapeHtml(p)}

`).join(''); + document.getElementById('modalTags').innerHTML = renderAllTags(poem.classifications); + document.getElementById('modalReadCheck').checked = poem.is_read || false; + + document.getElementById('poemModal').style.display = 'flex'; + document.body.style.overflow = 'hidden'; + } + } catch (err) { + showToast('获取失败', 'error'); + } +} + +// 上传文件 +async function uploadFile() { + const fileInput = document.getElementById('fileInput'); + const file = fileInput.files[0]; + + if (!file) { + showToast('请选择文件', 'error'); + return; + } + + const formData = new FormData(); + formData.append('file', file); + + const status = document.getElementById('uploadStatus'); + status.textContent = '上传中...'; + status.className = 'status-msg'; + + try { + const res = await fetch(`${API_BASE}/api/poems/import`, { + method: 'POST', + body: formData + }); + + const data = await res.json(); + + if (res.ok) { + status.textContent = `成功导入 ${data.imported} 首,跳过 ${data.skipped} 首`; + status.className = 'status-msg success'; + fileInput.value = ''; + await loadStats(); + await loadPoems(); + } else { + status.textContent = data.detail || '导入失败'; + status.className = 'status-msg error'; + } + } catch (err) { + status.textContent = '上传失败:' + err.message; + status.className = 'status-msg error'; + } +} + +// 更新统计显示 +function updateStats(stats) { + document.getElementById('totalPoems').textContent = stats.total_poems; + document.getElementById('readCount').textContent = stats.read_count; + document.getElementById('unreadCount').textContent = stats.unread_count; + + const percent = stats.reading_progress || 0; + document.getElementById('progressPercent').textContent = percent; + document.getElementById('progressText').textContent = `${stats.read_count}/${stats.total_poems}`; + document.querySelector('.progress-fill').style.width = `${percent}%`; +} + +// 更新结果数量 +function updateResultCount() { + document.getElementById('resultCount').textContent = `${state.total} 首`; + updatePagination(); +} + +// 更新分页控件 +function updatePagination() { + const pagination = document.getElementById('pagination'); + const prevBtn = document.getElementById('prevPage'); + const nextBtn = document.getElementById('nextPage'); + const pageInfo = document.getElementById('pageInfo'); + + if (state.totalPages <= 1) { + pagination.style.display = 'none'; + return; + } + + pagination.style.display = 'flex'; + prevBtn.disabled = state.currentPage <= 1; + nextBtn.disabled = state.currentPage >= state.totalPages; + prevBtn.style.opacity = prevBtn.disabled ? '0.5' : '1'; + nextBtn.style.opacity = nextBtn.disabled ? '0.5' : '1'; + pageInfo.textContent = `第 ${state.currentPage} / ${state.totalPages} 页,共 ${state.total} 首`; +} + +// 跳转到指定页 +function goToPage(page) { + if (page < 1 || page > state.totalPages) return; + state.currentPage = page; + loadPoems(); + window.scrollTo({ top: 0, behavior: 'smooth' }); +} + +// 设置事件监听 +function setupEventListeners() { + // 导航 + document.querySelectorAll('.nav-item').forEach(item => { + item.addEventListener('click', (e) => { + e.preventDefault(); + + const view = item.dataset.view; + const action = item.dataset.action; + + if (action === 'random') { + showRandomPoem(); + return; + } + + if (view) { + state.filters.view = view; + state.currentPage = 1; + + document.querySelectorAll('.nav-item').forEach(n => n.classList.remove('active')); + item.classList.add('active'); + + loadPoems(); + } + }); + }); + + // 搜索 + let searchTimeout; + document.getElementById('searchInput').addEventListener('input', (e) => { + clearTimeout(searchTimeout); + searchTimeout = setTimeout(() => { + state.filters.search = e.target.value.trim(); + state.currentPage = 1; + loadPoems(); + }, 500); + }); +} + +// 设置上传区域 +function setupUpload() { + const area = document.getElementById('uploadArea'); + const fileInput = document.getElementById('fileInput'); + const uploadStatus = document.getElementById('uploadStatus'); + + area.addEventListener('dragover', (e) => { + e.preventDefault(); + area.classList.add('dragover'); + }); + + area.addEventListener('dragleave', () => { + area.classList.remove('dragover'); + }); + + area.addEventListener('drop', (e) => { + e.preventDefault(); + area.classList.remove('dragover'); + + const files = e.dataTransfer.files; + if (files.length > 0) { + fileInput.files = files; + // 触发 change 事件以更新文件名显示 + fileInput.dispatchEvent(new Event('change')); + } + }); + + // 文件选择后显示文件名 + fileInput.addEventListener('change', () => { + const file = fileInput.files[0]; + if (file) { + uploadStatus.textContent = `已选择:${file.name}`; + uploadStatus.className = 'status-msg info'; + } else { + uploadStatus.textContent = ''; + uploadStatus.className = 'status-msg'; + } + }); +} + +// 工具函数 +function showLoading(show) { + document.getElementById('loading').style.display = show ? 'flex' : 'none'; +} + +function showToast(message, type = 'info') { + const toast = document.getElementById('toast'); + const msg = document.getElementById('toastMessage'); + + msg.textContent = message; + toast.style.display = 'block'; + toast.className = `toast toast-${type}`; + + setTimeout(() => { + toast.style.display = 'none'; + }, 3000); +} + +function escapeHtml(text) { + if (!text) return ''; + const div = document.createElement('div'); + div.textContent = text; + return div.innerHTML; +} + +// 导出全局函数 +window.toggleRead = toggleRead; +window.showPoemDetail = showPoemDetail; +window.closeModal = closeModal; +window.uploadFile = uploadFile; +window.clearFilters = clearFilters; +window.removeFilter = removeFilter; +window.goToPage = goToPage; \ No newline at end of file diff --git a/frontend/style.css b/frontend/style.css new file mode 100644 index 0000000..3b1eec7 --- /dev/null +++ b/frontend/style.css @@ -0,0 +1,1027 @@ +/* 古诗词阅读网 - 样式表 */ + +:root { + --ink: #2c2416; + --ink-light: #5a4e3c; + --ink-muted: #8c7e6a; + --ink-faint: #b8ad9a; + --paper: #faf8f5; + --paper-warm: #f5f1eb; + --paper-deep: #ede8df; + --card: #ffffff; + --accent: #8b4513; + --accent-light: #a0522d; + --accent-soft: rgba(139, 69, 19, 0.08); + --green: #2d6a4f; + --green-soft: rgba(45, 106, 79, 0.1); + --amber: #b8860b; + --amber-soft: rgba(184, 134, 11, 0.1); + --border: #e8e2d8; + --shadow: 0 2px 8px rgba(44, 36, 22, 0.08); + --shadow-lg: 0 8px 30px rgba(44, 36, 22, 0.12); + --radius: 10px; + --radius-lg: 16px; + --font-serif: 'Noto Serif SC', 'Songti SC', 'SimSun', serif; + --font-sans: 'Noto Sans SC', 'Microsoft YaHei', sans-serif; +} + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: var(--font-sans); + background: var(--paper); + color: var(--ink); + line-height: 1.6; + min-height: 100vh; +} + +/* Header */ +.header { + background: var(--ink); + color: var(--paper); + position: sticky; + top: 0; + z-index: 100; + border-bottom: 3px solid var(--accent); + box-shadow: 0 2px 8px rgba(0,0,0,0.1); +} + +.header-inner { + max-width: 1440px; + margin: 0 auto; + padding: 0 2rem; + height: 64px; + display: flex; + align-items: center; + justify-content: space-between; +} + +.brand { + display: flex; + align-items: center; + gap: 1rem; +} + +.brand-icon { + width: 40px; + height: 40px; + background: var(--accent); + border-radius: 6px; + display: flex; + align-items: center; + justify-content: center; + font-family: var(--font-serif); + font-size: 1.3rem; + font-weight: 700; + flex-shrink: 0; +} + +.brand h1 { + font-family: var(--font-serif); + font-size: 1.2rem; + font-weight: 600; + white-space: nowrap; +} + +.tagline { + font-size: 0.73rem; + opacity: 0.5; + letter-spacing: 0.15em; + margin-top: 1px; +} + +.nav { + display: flex; + gap: 0.25rem; + flex-shrink: 0; +} + +.nav-item { + padding: 0.45rem 1rem; + color: rgba(250, 248, 245, 0.6); + text-decoration: none; + font-size: 0.87rem; + font-weight: 500; + border-radius: 6px; + transition: 0.2s; + white-space: nowrap; +} + +.nav-item:hover { + color: var(--paper); + background: rgba(250, 248, 245, 0.08); +} + +.nav-item.active { + color: var(--paper); + background: var(--accent); +} + +/* Main Layout */ +.main { + max-width: 1440px; + margin: 0 auto; + padding: 1.5rem 2rem; + display: grid; + grid-template-columns: 300px 1fr; + gap: 1.5rem; + min-height: calc(100vh - 64px); +} + +/* Sidebar */ +.sidebar { + display: flex; + flex-direction: column; + gap: 1rem; + position: sticky; + top: calc(64px + 1.5rem); + max-height: calc(100vh - 64px - 3rem); + overflow-y: auto; +} + +.sidebar::-webkit-scrollbar { + width: 6px; +} + +.sidebar::-webkit-scrollbar-thumb { + background: var(--border); + border-radius: 3px; +} + +.card { + background: var(--card); + border: 1px solid var(--border); + border-radius: var(--radius-lg); + padding: 1.25rem; + flex-shrink: 0; +} + +.card-title { + font-family: var(--font-serif); + font-size: 0.93rem; + font-weight: 600; + margin-bottom: 0.9rem; + padding-bottom: 0.6rem; + border-bottom: 1px solid var(--paper-deep); +} + +/* Stats */ +.stats-grid { + display: grid; + grid-template-columns: repeat(3, 1fr); + gap: 0.5rem; + margin-bottom: 1rem; +} + +.stat-item { + text-align: center; + padding: 0.7rem 0.4rem; + border-radius: var(--radius); + background: var(--paper-warm); +} + +.stat-item.read { background: var(--green-soft); } +.stat-item.unread { background: var(--amber-soft); } + +.stat-value { + display: block; + font-family: var(--font-serif); + font-size: 1.4rem; + font-weight: 700; + color: var(--ink); + line-height: 1.2; +} + +.stat-item.read .stat-value { color: var(--green); } +.stat-item.unread .stat-value { color: var(--amber); } + +.stat-label { + font-size: 0.73rem; + color: var(--ink-muted); + display: block; + margin-top: 2px; +} + +.progress { + margin-top: 0.25rem; +} + +.progress-bar { + height: 6px; + background: var(--paper-deep); + border-radius: 3px; + overflow: hidden; +} + +.progress-fill { + height: 100%; + background: linear-gradient(90deg, var(--green), #3a7d7b); + transition: width 0.5s; + border-radius: 3px; +} + +.progress-text { + display: flex; + justify-content: space-between; + margin-top: 0.5rem; + font-size: 0.75rem; + color: var(--ink-muted); +} + +/* Upload */ +.upload-area { + border: 2px dashed var(--border); + border-radius: var(--radius); + padding: 1rem; + text-align: center; + margin-bottom: 0.75rem; + position: relative; + transition: 0.2s; + min-height: 80px; + display: flex; + align-items: center; + justify-content: center; +} + +.upload-area:hover { + border-color: var(--accent); + background: var(--accent-soft); +} + +.upload-area.dragover { + border-color: var(--accent); + background: var(--accent-soft); +} + +.upload-area input[type="file"] { + position: absolute; + inset: 0; + opacity: 0; + cursor: pointer; + width: 100%; + height: 100%; +} + +.upload-label { + display: flex; + flex-direction: column; + align-items: center; + gap: 0.4rem; + color: var(--ink-muted); + pointer-events: none; + width: 100%; +} + +.upload-icon { + width: 24px; + height: 24px; + color: var(--ink-faint); + flex-shrink: 0; +} + +.upload-text { + font-size: 0.8rem; + font-weight: 500; + color: var(--ink-light); +} + +.upload-hint { + font-size: 0.68rem; + color: var(--ink-faint); +} + +.status-msg { + margin-top: 0.5rem; + font-size: 0.75rem; + padding: 0.4rem 0.6rem; + border-radius: 6px; + word-break: break-word; +} + +.status-msg:empty { display: none; } +.status-msg.success { background: var(--green-soft); color: var(--green); } +.status-msg.error { background: rgba(239, 68, 68, 0.1); color: #ef4444; } +.status-msg.info { background: rgba(74, 111, 165, 0.1); color: #4a6fa5; } + +/* Search */ +.search-box { + position: relative; +} + +.search-icon { + position: absolute; + left: 0.75rem; + top: 50%; + transform: translateY(-50%); + width: 14px; + height: 14px; + color: var(--ink-muted); + flex-shrink: 0; +} + +.search-input { + width: 100%; + padding: 0.55rem 0.75rem 0.55rem 2.1rem; + border: 1px solid var(--border); + border-radius: var(--radius); + font-size: 0.85rem; + background: var(--paper-warm); + transition: 0.2s; + font-family: var(--font-sans); +} + +.search-input:focus { + outline: none; + border-color: var(--accent); + background: var(--card); + box-shadow: 0 0 0 3px var(--accent-soft); +} + +.search-input::placeholder { + color: var(--ink-faint); +} + +/* Category Tabs */ +.category-tabs { + display: flex; + flex-wrap: wrap; + gap: 0.4rem; + margin-bottom: 0.75rem; +} + +.category-tab { + padding: 0.35rem 0.75rem; + background: var(--paper-warm); + border-radius: 20px; + font-size: 0.78rem; + cursor: pointer; + transition: 0.2s; + border: 1px solid transparent; +} + +.category-tab:hover { + border-color: var(--accent); + color: var(--accent); +} + +.category-tab.active { + background: var(--accent); + color: var(--paper); +} + +/* Tag Cloud */ +.tag-cloud { + display: flex; + flex-wrap: wrap; + gap: 0.35rem; + margin-bottom: 0.75rem; + max-height: 200px; + overflow-y: auto; + padding: 0.25rem; +} + +.tag-cloud::-webkit-scrollbar { + width: 4px; +} + +.tag-cloud::-webkit-scrollbar-thumb { + background: var(--border); + border-radius: 2px; +} + +.tag-item { + padding: 0.25rem 0.55rem; + background: var(--paper-warm); + border-radius: 20px; + font-size: 0.72rem; + cursor: pointer; + transition: 0.2s; + border: 1px solid transparent; +} + +.tag-item:hover { + background: var(--accent-soft); + color: var(--accent); + border-color: var(--accent); +} + +/* Selected Filters */ +.selected-filters { + display: flex; + flex-wrap: wrap; + gap: 0.35rem; + margin-top: 0.5rem; + min-height: 0; +} + +.filter-tag { + display: inline-flex; + align-items: center; + gap: 0.3rem; + padding: 0.2rem 0.45rem; + background: var(--accent-soft); + color: var(--accent); + border-radius: 20px; + font-size: 0.7rem; + border: 1px solid var(--accent); +} + +.filter-tag button { + background: none; + border: none; + color: var(--accent); + cursor: pointer; + font-size: 1rem; + line-height: 1; + padding: 0; + display: flex; + align-items: center; +} + +.filter-tag button:hover { + color: var(--ink); +} + +/* Buttons */ +.btn { + padding: 0.55rem 1.1rem; + border: none; + border-radius: var(--radius); + font-size: 0.83rem; + font-weight: 600; + cursor: pointer; + transition: 0.2s; + display: inline-flex; + align-items: center; + justify-content: center; + gap: 0.4rem; + white-space: nowrap; +} + +.btn-primary { + background: var(--accent); + color: var(--paper); +} + +.btn-primary:hover { + background: var(--accent-light); + box-shadow: var(--shadow); +} + +.btn-ghost { + background: transparent; + color: var(--ink-muted); + padding: 0.45rem 0.8rem; + border: 1px solid transparent; +} + +.btn-ghost:hover { + color: var(--accent); + background: var(--accent-soft); + border-color: var(--accent-soft); +} + +.btn-block { + width: 100%; +} + +.btn-sm { + padding: 0.35rem 0.7rem; + font-size: 0.75rem; +} + +/* Content */ +.content { + min-width: 0; +} + +.content-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 1.25rem; + padding-bottom: 1rem; + border-bottom: 1px solid var(--border); +} + +.content-header h2 { + font-family: var(--font-serif); + font-size: 1.35rem; + font-weight: 600; +} + +.badge { + padding: 0.25rem 0.7rem; + background: var(--paper-deep); + border-radius: 20px; + font-size: 0.78rem; + color: var(--ink-muted); + font-weight: 500; +} + +/* Poem Grid */ +.poem-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); + gap: 1rem; +} + +/* Poem Card */ +.poem-card { + background: var(--card); + border: 1px solid var(--border); + border-radius: var(--radius-lg); + padding: 1.2rem; + cursor: pointer; + transition: all 0.25s; + position: relative; + display: flex; + flex-direction: column; +} + +.poem-card::before { + content: ''; + position: absolute; + top: 0; + left: 0; + width: 3px; + height: 100%; + background: var(--amber); + transition: 0.2s; + border-radius: 3px 0 0 3px; +} + +.poem-card.read::before { + background: var(--green); +} + +.poem-card:hover { + transform: translateY(-2px); + box-shadow: var(--shadow-lg); + border-color: var(--accent); + background: var(--paper-warm); +} + +.poem-card:active { + transform: translateY(0); +} + +.poem-card-header { + margin-bottom: 0.7rem; +} + +.poem-title { + font-family: var(--font-serif); + font-size: 1.05rem; + font-weight: 600; + margin-bottom: 0.25rem; + line-height: 1.4; +} + +.poem-author { + font-size: 0.78rem; + color: var(--ink-muted); +} + +.poem-excerpt { + font-family: var(--font-serif); + font-size: 0.85rem; + color: var(--ink-light); + line-height: 1.8; + margin-bottom: 0.8rem; + flex: 1; +} + +.poem-tags { + display: flex; + flex-wrap: wrap; + gap: 0.25rem; + margin-bottom: 0.7rem; +} + +.tag { + padding: 0.15rem 0.45rem; + border-radius: 20px; + font-size: 0.65rem; + font-weight: 500; + border: 1px solid currentColor; + white-space: nowrap; +} + +.tag-genre { background: rgba(107, 76, 138, 0.08); color: #6b4c8a; } +.tag-emotion_tone { background: rgba(160, 82, 45, 0.08); color: #a0522d; } +.tag-season { background: rgba(74, 111, 165, 0.08); color: #4a6fa5; } +.tag-location { background: rgba(45, 106, 79, 0.08); color: #2d6a4f; } +.tag-time_of_day { background: rgba(184, 134, 11, 0.08); color: #b8860b; } +.tag-philosophy { background: rgba(58, 125, 123, 0.08); color: #3a7d7b; } +.tag-nature_scenery { background: rgba(74, 111, 165, 0.08); color: #4a6fa5; } +.tag-plants { background: rgba(45, 106, 79, 0.08); color: #2d6a4f; } +.tag-animals { background: rgba(160, 82, 45, 0.08); color: #a0522d; } +.tag-buildings { background: rgba(139, 69, 19, 0.08); color: #8b4513; } +.tag-social_role { background: rgba(107, 76, 138, 0.08); color: #6b4c8a; } +.tag-technique { background: rgba(184, 134, 11, 0.08); color: #b8860b; } +.tag-rhetoric { background: rgba(160, 82, 45, 0.08); color: #a0522d; } +.tag-colors { background: rgba(184, 134, 11, 0.08); color: #b8860b; } +.tag-sounds { background: rgba(74, 111, 165, 0.08); color: #4a6fa5; } +.tag-solar_terms { background: rgba(184, 134, 11, 0.08); color: #b8860b; } +.tag-festival { background: rgba(184, 134, 11, 0.08); color: #b8860b; } +.tag-life_stage { background: rgba(58, 125, 123, 0.08); color: #3a7d7b; } +.tag-emotions { background: rgba(160, 82, 45, 0.08); color: #a0522d; } + +.poem-card-footer { + display: flex; + justify-content: space-between; + align-items: center; + padding-top: 0.75rem; + border-top: 1px solid var(--paper-deep); + margin-top: auto; +} + +.read-toggle { + display: flex; + align-items: center; + gap: 0.4rem; + font-size: 0.75rem; + color: var(--ink-muted); + cursor: pointer; +} + +.read-toggle input { + width: 14px; + height: 14px; + accent-color: var(--green); + cursor: pointer; +} + +/* Pagination */ +.pagination { + display: flex; + align-items: center; + justify-content: center; + gap: 1rem; + margin-top: 1.5rem; + padding: 1rem; +} + +.page-btn { + display: inline-flex; + align-items: center; + gap: 0.4rem; + padding: 0.55rem 1rem; + background: var(--card); + border: 1px solid var(--border); + border-radius: var(--radius); + font-size: 0.83rem; + font-weight: 500; + color: var(--ink-light); + cursor: pointer; + transition: all 0.2s; +} + +.page-btn:hover:not(:disabled) { + border-color: var(--accent); + color: var(--accent); + background: var(--accent-soft); +} + +.page-btn:disabled { + cursor: not-allowed; +} + +.page-btn svg { + width: 14px; + height: 14px; +} + +.page-info { + font-size: 0.85rem; + color: var(--ink-muted); + min-width: 120px; + text-align: center; +} + +/* Empty & Loading */ +.empty-state { + text-align: center; + padding: 4rem 2rem; +} + +.empty-icon svg { + width: 100px; + height: 100px; + margin-bottom: 1.5rem; + opacity: 0.5; +} + +.empty-state h3 { + font-family: var(--font-serif); + font-size: 1.15rem; + font-weight: 600; + color: var(--ink-light); + margin-bottom: 0.5rem; +} + +.empty-state p { + font-size: 0.85rem; + color: var(--ink-muted); +} + +.loading { + text-align: center; + padding: 3rem; +} + +.spinner { + width: 32px; + height: 32px; + border: 3px solid var(--border); + border-top-color: var(--accent); + border-radius: 50%; + animation: spin 0.8s linear infinite; + margin: 0 auto 1rem; +} + +@keyframes spin { + to { transform: rotate(360deg); } +} + +/* Modal */ +.modal { + position: fixed; + inset: 0; + z-index: 1000; + display: flex; + align-items: center; + justify-content: center; + padding: 1.5rem; +} + +.modal-backdrop { + position: absolute; + inset: 0; + background: rgba(44, 36, 22, 0.5); + backdrop-filter: blur(4px); +} + +.modal-panel { + position: relative; + background: var(--card); + border-radius: 20px; + max-width: 680px; + width: 100%; + max-height: 85vh; + overflow: hidden; + display: flex; + flex-direction: column; + box-shadow: 0 20px 60px rgba(44, 36, 22, 0.15); +} + +.modal-header { + display: flex; + justify-content: space-between; + align-items: flex-start; + padding: 1.5rem 1.75rem 1rem; + border-bottom: 1px solid var(--paper-deep); +} + +.modal-title { + font-family: var(--font-serif); + font-size: 1.35rem; + font-weight: 700; +} + +.modal-author { + font-size: 0.85rem; + color: var(--ink-muted); + margin-top: 0.25rem; +} + +.modal-close { + background: none; + border: none; + color: var(--ink-muted); + cursor: pointer; + padding: 0.4rem; + border-radius: 6px; + display: flex; + align-items: center; + justify-content: center; +} + +.modal-close:hover { + background: var(--paper-warm); + color: var(--ink); +} + +.modal-close svg { + width: 18px; + height: 18px; +} + +.modal-body { + padding: 1.25rem 1.75rem; + overflow-y: auto; + flex: 1; +} + +.modal-content { + background: var(--paper-warm); + border-radius: var(--radius); + padding: 1.2rem 1.5rem; + margin-bottom: 1.25rem; + border-left: 3px solid var(--accent); +} + +.modal-content p { + font-family: var(--font-serif); + font-size: 0.98rem; + line-height: 2; + margin-bottom: 0.5rem; +} + +.modal-section { + margin-bottom: 1rem; +} + +.section-title { + font-family: var(--font-serif); + font-size: 0.9rem; + font-weight: 600; + color: var(--ink-light); + margin-bottom: 0.6rem; +} + +.modal-tags { + display: flex; + flex-direction: column; + gap: 0.7rem; +} + +.tag-group { + width: 100%; + margin-bottom: 0.5rem; +} + +.tag-group-label { + font-size: 0.75rem; + color: var(--ink-muted); + margin-bottom: 0.35rem; + font-weight: 600; + display: block; +} + +.tag-group-tags { + display: flex; + flex-wrap: wrap; + gap: 0.3rem; +} + +.modal-footer { + display: flex; + justify-content: space-between; + align-items: center; + padding: 1rem 1.75rem; + border-top: 1px solid var(--paper-deep); +} + +.read-checkbox { + display: flex; + align-items: center; + gap: 0.4rem; + font-size: 0.85rem; + color: var(--ink-muted); + cursor: pointer; +} + +.read-checkbox input { + width: 15px; + height: 15px; + accent-color: var(--green); +} + +/* Toast */ +.toast { + position: fixed; + bottom: 1.5rem; + right: 1.5rem; + padding: 0.75rem 1.2rem; + border-radius: var(--radius); + color: white; + font-size: 0.85rem; + font-weight: 500; + z-index: 2000; + box-shadow: 0 8px 30px rgba(0, 0, 0, 0.15); + max-width: 400px; +} + +.toast-success { background: #10b981; } +.toast-error { background: #ef4444; } +.toast-info { background: #1e293b; } + +/* Responsive */ +@media (max-width: 1024px) { + .main { + grid-template-columns: 280px 1fr; + padding: 1rem; + } + + .poem-grid { + grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); + } +} + +@media (max-width: 860px) { + .main { + grid-template-columns: 1fr; + } + + .sidebar { + position: static; + max-height: none; + flex-direction: row; + flex-wrap: wrap; + } + + .card { + flex: 1; + min-width: 280px; + } + + .header-inner { + flex-direction: column; + height: auto; + padding: 1rem 1.5rem; + gap: 0.75rem; + } + + .nav { + width: 100%; + justify-content: center; + flex-wrap: wrap; + } +} + +@media (max-width: 600px) { + .header-inner { + padding: 0.75rem 1rem; + } + + .brand h1 { + font-size: 1.1rem; + } + + .tagline { + display: none; + } + + .main { + padding: 0.75rem; + } + + .sidebar { + gap: 0.75rem; + } + + .card { + min-width: 100%; + } + + .poem-grid { + grid-template-columns: 1fr; + } + + .modal { + padding: 1rem; + } + + .modal-panel { + border-radius: var(--radius-lg); + } +} + +/* Print */ +@media print { + .header, .sidebar { display: none; } + .main { grid-template-columns: 1fr; } + .poem-card { break-inside: avoid; box-shadow: none; border: 1px solid #ccc; } +} + +/* Focus */ +*:focus-visible { + outline: 2px solid var(--accent); + outline-offset: 2px; +} + +*:focus:not(:focus-visible) { + outline: none; +} + +/* Selection */ +::selection { + background: var(--accent); + color: var(--paper); +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8abfcd9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +# 后端 API 依赖 +fastapi==0.104.1 +uvicorn[standard]==0.24.0 +python-multipart==0.0.6 + +# LLM 分类脚本(使用标准库,无需额外依赖) + +# 可选:前端开发 +# vite +# vue@3 +# element-plus \ No newline at end of file