From bfcf84001343e20bf272a47f76c3820106c79b60 Mon Sep 17 00:00:00 2001
From: along <1015042407@qq.com>
Date: Wed, 1 Apr 2026 15:55:53 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9GraphRAG=E5=90=8E=E7=AB=AF?=
 =?UTF-8?q?=EF=BC=8C=E5=8F=A6=E5=A4=96=E4=B8=A4=E6=9C=AC=E5=B0=8F=E8=AF=B4?=
 =?UTF-8?q?=E5=85=A5=E5=BA=93?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 backend/app.py           |  11 +--
 backend/graph_builder.py | 160 ++++++++++++++++++++++++++++++---------
 backend/prompts.py       |  61 ++++++++-------
 backend/run_import.py    |   8 +-
 4 files changed, 170 insertions(+), 70 deletions(-)

diff --git a/backend/app.py b/backend/app.py
index c764c2e..5003825 100644
--- a/backend/app.py
+++ b/backend/app.py
@@ -1,5 +1,5 @@
 """
-大唐双龙传 GraphRAG — FastAPI 后端
+武侠三部曲 GraphRAG — FastAPI 后端
 
 端点:
   GET  /api/health    — 健康检查（含 Neo4j 连通性）
@@ -13,11 +13,11 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 
 from graph_query import get_driver, get_graph_stats
-from graph_builder import build_graph
+from graph_builder import build_all_graphs
 from llm_router import answer_question
 import uvicorn
 
-app = FastAPI(title="大唐双龙传 GraphRAG API", version="1.0.0")
+app = FastAPI(title="武侠三部曲 GraphRAG API", version="1.1.0")
 
 app.add_middleware(
     CORSMiddleware,
@@ -37,6 +37,7 @@ class ChatRequest(BaseModel):
 
 class ImportRequest(BaseModel):
     clear: bool = False  # True = 先清空图谱再重新导入
+    novels: list[str] | None = None  # 默认导入 dtslz/ldj/tlbb
 
 
 # ── Endpoints ─────────────────────────────────────────────
@@ -62,10 +63,10 @@ def stats():
 
 @app.post("/api/import")
 def import_data(req: ImportRequest = ImportRequest()):
-    """导入所有卷数据到 Neo4j（耗时约 1-3 分钟，请勿重复调用）"""
+    """导入小说数据到 Neo4j（耗时约 1-3 分钟，请勿重复调用）"""
     driver = get_driver()
     try:
-        build_graph(driver, clear=req.clear)
+        build_all_graphs(driver, novels=req.novels, clear=req.clear)
         stats = get_graph_stats()
         return {"status": "ok", "stats": stats}
     except Exception as e:
diff --git a/backend/graph_builder.py b/backend/graph_builder.py
index 8e24e8f..df6c8f9 100644
--- a/backend/graph_builder.py
+++ b/backend/graph_builder.py
@@ -10,7 +10,8 @@ import json
 from pathlib import Path
 from neo4j import Driver
 
-DATA_DIR = Path(__file__).parent.parent / "data"
+DATA_DIR_BASE = Path(__file__).parent.parent / "fiction"
+SUPPORTED_NOVELS = ("dtslz", "ldj", "tlbb")
 
 
 # ── 工具函数 ──────────────────────────────────────────────
@@ -26,32 +27,77 @@ def _split_leaders(leader: str) -> list[str]:
     return [p for p in parts if p not in ("未提及", "")]
 
 
+def _node_id(novel: str, raw_id: str) -> str:
+    return f"{novel}:{raw_id}"
+
+
+def _get_data_dir(novel: str) -> Path:
+    data_dir = DATA_DIR_BASE / novel / "data"
+    if not data_dir.exists():
+        raise ValueError(f"小说数据目录不存在: {data_dir}")
+    return data_dir
+
+
+def _iter_volume_files(data_dir: Path):
+    for filepath in sorted(data_dir.glob("vol*.json")):
+        stem = filepath.stem  # vol01
+        if len(stem) >= 5 and stem[:3] == "vol" and stem[3:].isdigit():
+            yield int(stem[3:]), filepath
+
+
 # ── Schema 初始化 ─────────────────────────────────────────
 
+def _drop_legacy_constraints(session):
+    """兼容旧版本：移除 Character(name) 唯一约束，改为 (novel, name) 复合唯一约束。"""
+    rows = session.run(
+        """
+        SHOW CONSTRAINTS
+        YIELD name, labelsOrTypes, properties
+        RETURN name, labelsOrTypes, properties
+        """
+    )
+    for row in rows:
+        labels = row.get("labelsOrTypes") or []
+        properties = row.get("properties") or []
+        if labels == ["Character"] and properties == ["name"]:
+            constraint_name = row["name"].replace("`", "")
+            session.run(f"DROP CONSTRAINT `{constraint_name}` IF EXISTS")
+
+
 def setup_schema(driver: Driver):
     with driver.session() as s:
-        s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (n:Character) REQUIRE n.name IS UNIQUE")
+        _drop_legacy_constraints(s)
+        s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (n:Character) REQUIRE (n.novel, n.name) IS UNIQUE")
         s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (n:Location)  REQUIRE n.id   IS UNIQUE")
         s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (n:Faction)   REQUIRE n.id   IS UNIQUE")
         s.run("CREATE CONSTRAINT IF NOT EXISTS FOR (n:Event)     REQUIRE n.id   IS UNIQUE")
         s.run("CREATE INDEX IF NOT EXISTS FOR (e:Event) ON (e.vol)")
+        s.run("CREATE INDEX IF NOT EXISTS FOR (e:Event) ON (e.novel)")
+        s.run("CREATE INDEX IF NOT EXISTS FOR (c:Character) ON (c.novel)")
+        s.run("CREATE INDEX IF NOT EXISTS FOR (l:Location) ON (l.novel)")
+        s.run("CREATE INDEX IF NOT EXISTS FOR (f:Faction) ON (f.novel)")
         s.run("CREATE INDEX IF NOT EXISTS FOR ()-[r:VISITED]-()   ON (r.vol)")
         s.run("CREATE INDEX IF NOT EXISTS FOR ()-[r:CONTROLS]-()  ON (r.vol)")
 
 
 # ── 各类型导入 ────────────────────────────────────────────
 
-def _import_locations(session, locations: list[dict]):
+def _import_locations(session, novel: str, locations: list[dict]):
     for loc in locations:
+        raw_id = loc["id"]
         session.run(
             """
             MERGE (l:Location {id: $id})
             SET l.name = $name,
+                l.source_id = $source_id,
+                l.novel = $novel,
                 l.type = $type,
                 l.lat  = $lat,
                 l.lng  = $lng
             """,
-            id=loc["id"],
+            id=_node_id(novel, raw_id),
+            source_id=raw_id,
+            novel=novel,
             name=loc["name"],
             type=loc.get("type", ""),
             lat=loc.get("lat"),
@@ -59,14 +105,22 @@ def _import_locations(session, locations: list[dict]):
         )
 
 
-def _import_factions(session, factions: list[dict], vol: int):
+def _import_factions(session, novel: str, factions: list[dict], vol: int):
     for f in factions:
+        raw_id = f["id"]
         session.run(
             """
             MERGE (n:Faction {id: $id})
-            SET n.name = $name, n.type = $type, n.color = $color
+            SET n.name = $name,
+                n.source_id = $source_id,
+                n.novel = $novel,
+                n.type = $type,
+                n.color = $color
             """,
-            id=f["id"], name=f["name"],
+            id=_node_id(novel, raw_id),
+            source_id=raw_id,
+            novel=novel,
+            name=f["name"],
             type=f.get("type", ""), color=f.get("color", ""),
         )
 
@@ -76,9 +130,12 @@ def _import_factions(session, factions: list[dict], vol: int):
                 """
                 MATCH (fac:Faction {id: $fid})
                 MATCH (loc:Location {id: $lid})
-                MERGE (fac)-[:CONTROLS {vol: $vol}]->(loc)
+                MERGE (fac)-[:CONTROLS {novel: $novel, vol: $vol}]->(loc)
                 """,
-                fid=f["id"], lid=loc_id, vol=vol,
+                fid=_node_id(novel, raw_id),
+                lid=_node_id(novel, loc_id),
+                novel=novel,
+                vol=vol,
             )
 
         # Faction → HAS_MEMBER → Character
@@ -87,36 +144,44 @@ def _import_factions(session, factions: list[dict], vol: int):
                 continue
             session.run(
                 """
-                MERGE (c:Character {name: $name})
+                MERGE (c:Character {novel: $novel, name: $name})
                 WITH c
                 MATCH (fac:Faction {id: $fid})
-                MERGE (fac)-[:HAS_MEMBER {vol: $vol}]->(c)
+                MERGE (fac)-[:HAS_MEMBER {novel: $novel, vol: $vol}]->(c)
                 """,
-                name=figure, fid=f["id"], vol=vol,
+                novel=novel,
+                name=figure,
+                fid=_node_id(novel, raw_id),
+                vol=vol,
             )
 
         # Character → LEADS → Faction
         for leader_name in _split_leaders(f.get("leader", "")):
             session.run(
                 """
-                MERGE (c:Character {name: $name})
+                MERGE (c:Character {novel: $novel, name: $name})
                 WITH c
                 MATCH (fac:Faction {id: $fid})
-                MERGE (c)-[:LEADS {vol: $vol}]->(fac)
+                MERGE (c)-[:LEADS {novel: $novel, vol: $vol}]->(fac)
                 """,
-                name=leader_name, fid=f["id"], vol=vol,
+                novel=novel,
+                name=leader_name,
+                fid=_node_id(novel, raw_id),
+                vol=vol,
             )
 
 
-def _import_routes(session, routes: list[dict], vol: int):
+def _import_routes(session, novel: str, routes: list[dict], vol: int):
     for route in routes:
         char_color = route.get("color", "")
         char_names = _split_characters(route["character"])
 
         for char_name in char_names:
             session.run(
-                "MERGE (c:Character {name: $name}) SET c.color = $color",
-                name=char_name, color=char_color,
+                "MERGE (c:Character {novel: $novel, name: $name}) SET c.color = $color",
+                novel=novel,
+                name=char_name,
+                color=char_color,
             )
 
             for wp in route.get("route", []):
@@ -128,28 +193,37 @@ def _import_routes(session, routes: list[dict], vol: int):
 
                 session.run(
                     """
-                    MATCH (c:Character {name: $char})
+                    MATCH (c:Character {novel: $novel, name: $char})
                     MATCH (l:Location {id: $lid})
-                    MERGE (c)-[v:VISITED {vol: $vol, chapter: $chapter}]->(l)
+                    MERGE (c)-[v:VISITED {novel: $novel, vol: $vol, chapter: $chapter}]->(l)
                     SET v.event = $event
                     """,
-                    char=char_name, lid=loc_id,
+                    novel=novel,
+                    char=char_name,
+                    lid=_node_id(novel, loc_id),
                     vol=vol, chapter=chapter, event=event,
                 )
 
 
-def _import_events(session, events: list[dict], vol: int):
+def _import_events(session, novel: str, events: list[dict], vol: int):
     for i, evt in enumerate(events):
-        event_id = f"v{vol:02d}_e{i:03d}"
+        event_id = _node_id(novel, f"v{vol:02d}_e{i:03d}")
         chapter = evt.get("chapter", 0)
         description = evt.get("event", "")
 
         session.run(
             """
             MERGE (e:Event {id: $id})
-            SET e.vol = $vol, e.chapter = $chapter, e.description = $description
+            SET e.novel = $novel,
+                e.vol = $vol,
+                e.chapter = $chapter,
+                e.description = $description
             """,
-            id=event_id, vol=vol, chapter=chapter, description=description,
+            id=event_id,
+            novel=novel,
+            vol=vol,
+            chapter=chapter,
+            description=description,
         )
 
         # 只在有命名地点 id 时建立关系（lat/lng 条目跳过）
@@ -161,13 +235,17 @@ def _import_events(session, events: list[dict], vol: int):
                 MATCH (l:Location {id: $lid})
                 MERGE (e)-[:OCCURRED_AT]->(l)
                 """,
-                eid=event_id, lid=loc_ref,
+                eid=event_id,
+                lid=_node_id(novel, loc_ref),
             )
 
 
 # ── 主入口 ────────────────────────────────────────────────
 
-def build_graph(driver: Driver, clear: bool = False):
+def build_graph(driver: Driver, novel: str, clear: bool = False):
+    if novel not in SUPPORTED_NOVELS:
+        raise ValueError(f"不支持的小说标识: {novel}，可选: {', '.join(SUPPORTED_NOVELS)}")
+
     if clear:
         print("Clearing existing graph data...")
         with driver.session() as s:
@@ -176,22 +254,30 @@ def build_graph(driver: Driver, clear: bool = False):
     print("Setting up schema constraints and indexes...")
     setup_schema(driver)
 
+    data_dir = _get_data_dir(novel)
     imported = 0
-    for vol_num in range(1, 64):
-        filepath = DATA_DIR / f"vol{vol_num:02d}.json"
-        if not filepath.exists():
-            continue
+    print(f"Importing novel: {novel} ({data_dir})")
+    for vol_num, filepath in _iter_volume_files(data_dir):
 
         with open(filepath, encoding="utf-8") as f:
             data = json.load(f)
 
         with driver.session() as session:
-            _import_locations(session, data.get("locations", []))
-            _import_factions(session, data.get("factions", []), vol_num)
-            _import_routes(session, data.get("character_routes", []), vol_num)
-            _import_events(session, data.get("key_events", []), vol_num)
+            _import_locations(session, novel, data.get("locations", []))
+            _import_factions(session, novel, data.get("factions", []), vol_num)
+            _import_routes(session, novel, data.get("character_routes", []), vol_num)
+            _import_events(session, novel, data.get("key_events", []), vol_num)
 
         imported += 1
-        print(f"  [✓] vol{vol_num:02d} imported")
+        print(f"  [✓] {novel}/vol{vol_num:02d} imported")
 
-    print(f"\nDone. Imported {imported} volumes.")
+    print(f"Done. Imported {imported} volumes for {novel}.\n")
+
+
+def build_all_graphs(driver: Driver, novels: list[str] | None = None, clear: bool = False):
+    selected = novels or list(SUPPORTED_NOVELS)
+    if not selected:
+        raise ValueError("novels 不能为空")
+
+    for i, novel in enumerate(selected):
+        build_graph(driver, novel=novel, clear=(clear and i == 0))
diff --git a/backend/prompts.py b/backend/prompts.py
index 6c8bf20..9b14992 100644
--- a/backend/prompts.py
+++ b/backend/prompts.py
@@ -1,44 +1,49 @@
 SCHEMA_DESCRIPTION = """
-大唐双龙传知识图谱 Schema（Neo4j）：
+武侠三部曲知识图谱 Schema（Neo4j）：
+
+小说标识（novel）：
+- dtslz = 大唐双龙传
+- ldj = 鹿鼎记
+- tlbb = 天龙八部
 
 节点类型：
-- Character {name, color}
-  主要人物：寇仲、徐子陵、宇文化及、傅君婥、宋师道、李靖、石青璇、李密、李子通、
-           杜伏威、跋锋寒、李世民、李渊、宋缺、寇仲、毕玄、阴后
+- Character {novel, name, color}
+  同名人物在不同小说中会按 novel 隔离
 
-- Location {id, name, type, lat, lng}
+- Location {id, novel, source_id, name, type, lat, lng}
   type 取值：city / town / waterway / landmark / grassland / forest / region
-  主要城市：扬州(yangzhou)、洛阳(luoyang)、长安/大兴(daxing)、丹阳(danyang)、
-           梁都、历阳(liyang)、江陵
+  id 为全局唯一键，格式：{novel}:{source_id}
 
-- Faction {id, name, type, color}
+- Faction {id, novel, source_id, name, type, color}
   type 取值：朝廷 / 门阀 / 义军 / 游牧政权 / 江湖势力 / 地方军阀 / 帮会 / 外族
-  主要势力：隋朝(sui)、李阀(li_clan)、宋阀(song_clan)、宇文阀(yuwen)、
-           瓦岗军(wagang_army)、突厥(turks)、慈航静斋、阴癸派
+  id 为全局唯一键，格式：{novel}:{source_id}
 
-- Event {id, vol, chapter, description}
-  vol 是卷号（整数 1-63），chapter 是章节号
+- Event {id, novel, vol, chapter, description}
+  id 为全局唯一键，格式：{novel}:vXX_eYYY
+  vol 是该小说内部卷号（整数），chapter 是章节号
 
 关系类型：
-- (Character)-[:VISITED {vol, chapter, event}]->(Location)
+- (Character)-[:VISITED {novel, vol, chapter, event}]->(Location)
   人物在某卷某章到访某地
 
-- (Faction)-[:CONTROLS {vol}]->(Location)
+- (Faction)-[:CONTROLS {novel, vol}]->(Location)
   势力在某卷控制某地
 
-- (Faction)-[:HAS_MEMBER {vol}]->(Character)
+- (Faction)-[:HAS_MEMBER {novel, vol}]->(Character)
   势力在某卷拥有某成员
 
-- (Character)-[:LEADS {vol}]->(Faction)
+- (Character)-[:LEADS {novel, vol}]->(Faction)
   人物在某卷领导某势力
 
 - (Event)-[:OCCURRED_AT]->(Location)
   事件发生于某地
 
-注意：vol 属性用整数表示（如 vol=1 代表第一卷，vol=20 代表第二十卷）
+查询建议：
+- 用户明确提到小说名时，务必加 novel 过滤
+- 未指定小说时，可跨小说查询
 """
 
-CYPHER_SYSTEM_PROMPT = f"""你是大唐双龙传知识图谱的 Cypher 查询专家。
+CYPHER_SYSTEM_PROMPT = f"""你是武侠三部曲知识图谱的 Cypher 查询专家。
 
 {SCHEMA_DESCRIPTION}
 
@@ -49,26 +54,30 @@ CYPHER_SYSTEM_PROMPT = f"""你是大唐双龙传知识图谱的 Cypher 查询专
 4. 默认加 LIMIT 30，除非用户指定数量
 5. 使用 DISTINCT 去重
 6. 属性名用 n.name、r.vol 格式，不要用整个节点
-7. 如果问题完全无法用图谱回答，只输出单词：UNSUPPORTED
+7. 若问题指定小说，优先使用 novel 过滤：
+  - 大唐双龙传 => novel = "dtslz"
+  - 鹿鼎记 => novel = "ldj"
+  - 天龙八部 => novel = "tlbb"
+8. 如果问题完全无法用图谱回答，只输出单词：UNSUPPORTED
 
 示例：
 Q: 寇仲去过哪些地方？
-A: MATCH (c:Character {{name: "寇仲"}})-[v:VISITED]->(l:Location) RETURN DISTINCT l.name, l.type, min(v.vol) AS first_vol ORDER BY first_vol LIMIT 30
+A: MATCH (c:Character {{novel: "dtslz", name: "寇仲"}})-[v:VISITED]->(l:Location) RETURN DISTINCT l.name, l.type, min(v.vol) AS first_vol ORDER BY first_vol LIMIT 30
 
 Q: 第30卷时宇文阀控制哪些城市？
-A: MATCH (f:Faction {{name: "宇文阀"}})-[r:CONTROLS]->(l:Location) WHERE r.vol <= 30 AND l.type = "city" RETURN DISTINCT l.name, r.vol ORDER BY r.vol LIMIT 30
+A: MATCH (f:Faction {{novel: "dtslz", name: "宇文阀"}})-[r:CONTROLS]->(l:Location) WHERE r.vol <= 30 AND l.type = "city" RETURN DISTINCT l.name, r.vol ORDER BY r.vol LIMIT 30
 
 Q: 扬州发生过哪些重要事件？
-A: MATCH (e:Event)-[:OCCURRED_AT]->(l:Location {{name: "扬州"}}) RETURN e.description, e.vol, e.chapter ORDER BY e.vol, e.chapter LIMIT 30
+A: MATCH (e:Event {{novel: "dtslz"}})-[:OCCURRED_AT]->(l:Location {{novel: "dtslz", name: "扬州"}}) RETURN e.description, e.vol, e.chapter ORDER BY e.vol, e.chapter LIMIT 30
 
 Q: 谁领导过瓦岗军？
-A: MATCH (c:Character)-[r:LEADS]->(f:Faction {{name: "瓦岗军"}}) RETURN DISTINCT c.name, r.vol ORDER BY r.vol LIMIT 30
+A: MATCH (c:Character)-[r:LEADS]->(f:Faction {{novel: "dtslz", name: "瓦岗军"}}) RETURN DISTINCT c.name, r.vol ORDER BY r.vol LIMIT 30
 
-Q: 寇仲和哪些势力有过关联？
-A: MATCH (c:Character {{name: "寇仲"}})-[:VISITED]->(l:Location)<-[:CONTROLS]-(f:Faction) RETURN DISTINCT f.name, f.type LIMIT 30
+Q: 韦小宝加入过哪些势力？
+A: MATCH (c:Character {{novel: "ldj", name: "韦小宝"}})<-[:HAS_MEMBER]-(f:Faction) RETURN DISTINCT f.name, f.type LIMIT 30
 """
 
-ANSWER_SYSTEM_PROMPT = """你是大唐双龙传的知识问答助手，熟悉小说中的人物、势力、地点和事件。
+ANSWER_SYSTEM_PROMPT = """你是武侠三部曲（大唐双龙传、鹿鼎记、天龙八部）的知识问答助手，熟悉小说中的人物、势力、地点和事件。
 
 请根据知识图谱的查询结果，用中文给出准确、自然的回答：
 - 直接回答问题，语言简洁流畅
diff --git a/backend/run_import.py b/backend/run_import.py
index 4d62237..76a2819 100644
--- a/backend/run_import.py
+++ b/backend/run_import.py
@@ -5,25 +5,29 @@
 用法:
   python run_import.py          # 增量导入（MERGE，不删除现有数据）
   python run_import.py --clear  # 清空图谱后全量重新导入
+    python run_import.py ldj      # 仅导入鹿鼎记
+    python run_import.py dtslz tlbb --clear  # 清空后导入指定小说
 """
 
 import sys
 from dotenv import load_dotenv
 from graph_query import get_driver
-from graph_builder import build_graph
+from graph_builder import build_all_graphs, SUPPORTED_NOVELS
 
 load_dotenv()
 
 
 def main():
     clear = "--clear" in sys.argv
+    novels = [arg for arg in sys.argv[1:] if not arg.startswith("--")]
+    selected = novels or list(SUPPORTED_NOVELS)
 
     print("Connecting to Neo4j...")
     driver = get_driver()
     driver.verify_connectivity()
     print("Connected.\n")
 
-    build_graph(driver, clear=clear)
+    build_all_graphs(driver, novels=selected, clear=clear)
 
     print("\nGraph stats:")
     from graph_query import get_graph_stats