添加MAP MCP、联网搜索skill
This commit is contained in:
12
MCP/mars-mcp/pyproject.toml
Normal file
12
MCP/mars-mcp/pyproject.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[project]
|
||||
name = "mars-mcp"
|
||||
version = "0.1.0"
|
||||
description = "火星数据 MCP 服务器 - 为 nanobot 提供时空查询工具"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"mcp[cli]>=1.0.0",
|
||||
"chromadb>=1.4.1",
|
||||
"requests>=2.32.5",
|
||||
"numpy>=1.26.0",
|
||||
"python-dotenv>=1.0.0",
|
||||
]
|
||||
312
MCP/mars-mcp/server.py
Normal file
312
MCP/mars-mcp/server.py
Normal file
@@ -0,0 +1,312 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
火星数据 MCP 服务器
|
||||
为 nanobot 提供时空查询和时空-语义查询工具
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import requests
|
||||
import chromadb
|
||||
from dotenv import load_dotenv
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
# 加载环境变量 - 优先本地 .env,回退到 mars-data-serv-llm 项目的 .env
|
||||
_here = Path(__file__).parent
|
||||
for _env in [_here / ".env", Path("~/studio/mars-data-serv-llm/.env").expanduser()]:
|
||||
if _env.exists():
|
||||
load_dotenv(_env)
|
||||
break
|
||||
|
||||
DATACUBE_API_URL = os.getenv(
|
||||
"DATACUBE_API_URL",
|
||||
"http://digitalmars.com.cn/datacube/datasets/search"
|
||||
)
|
||||
EMBEDDING_API_URL = os.getenv(
|
||||
"EMBEDDING_API_URL",
|
||||
"http://192.168.190.42:11434/api/embeddings"
|
||||
)
|
||||
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "qwen3-embedding:8b-fp16")
|
||||
CHROMADB_PATH = os.getenv(
|
||||
"CHROMADB_PATH",
|
||||
str(Path("~/studio/mars-data-serv-llm/data/chromadb").expanduser())
|
||||
)
|
||||
|
||||
logging.basicConfig(level=logging.WARNING)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
mcp = FastMCP("mars-datacube")
|
||||
|
||||
|
||||
# ─── 内部工具函数 ─────────────────────────────────────────────────────────────
|
||||
|
||||
def _validate_bbox(lat_min, lat_max, lon_min, lon_max):
|
||||
"""验证边界框参数,返回错误字符串或 None"""
|
||||
if not (-90 <= lat_min <= 90) or not (-90 <= lat_max <= 90):
|
||||
return "纬度必须在-90到90之间"
|
||||
if not (-180 <= lon_min <= 180) or not (-180 <= lon_max <= 180):
|
||||
return "经度必须在-180到180之间"
|
||||
if lat_min >= lat_max or lon_min >= lon_max:
|
||||
return "最小值必须小于最大值"
|
||||
return None
|
||||
|
||||
|
||||
def _parse_item(item: dict) -> dict:
|
||||
"""从 DataCube API 响应条目中提取标准化字段"""
|
||||
image_path = item.get("image_urls", {}).get("red", "")
|
||||
filename = image_path.split("/")[-1] if image_path else ""
|
||||
|
||||
coords = item.get("coordinates", [[]])
|
||||
if coords and len(coords[0]) > 0:
|
||||
lons = [p[0] for p in coords[0]]
|
||||
lats = [p[1] for p in coords[0]]
|
||||
coord_range = {
|
||||
"lon_min": min(lons), "lon_max": max(lons),
|
||||
"lat_min": min(lats), "lat_max": max(lats),
|
||||
}
|
||||
else:
|
||||
coord_range = None
|
||||
|
||||
return {
|
||||
"id": item.get("id", ""),
|
||||
"filename": filename,
|
||||
"product": item.get("product", ""),
|
||||
"image_path": image_path,
|
||||
"coordinates": coord_range,
|
||||
"time": str(item.get("time", "")) or "未知时间",
|
||||
}
|
||||
|
||||
|
||||
def _spatial_query(lat_min, lat_max, lon_min, lon_max, product) -> list:
|
||||
"""调用 DataCube API 进行空间查询,返回原始条目列表"""
|
||||
params = {
|
||||
"min_lon": lon_min, "max_lon": lon_max,
|
||||
"min_lat": lat_min, "max_lat": lat_max,
|
||||
"product": product,
|
||||
}
|
||||
response = requests.get(DATACUBE_API_URL, params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.json() or []
|
||||
|
||||
|
||||
def _get_embedding(text: str):
|
||||
"""调用 Embedding API,返回向量列表,失败返回 None"""
|
||||
try:
|
||||
resp = requests.post(
|
||||
EMBEDDING_API_URL,
|
||||
json={"model": EMBEDDING_MODEL, "prompt": text},
|
||||
timeout=30,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["embedding"]
|
||||
except Exception as e:
|
||||
logger.error(f"Embedding API 请求失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _cosine_similarity(vec1, vec2) -> float:
|
||||
v1, v2 = np.array(vec1), np.array(vec2)
|
||||
norm = np.linalg.norm(v1) * np.linalg.norm(v2)
|
||||
return float(np.dot(v1, v2) / norm) if norm else 0.0
|
||||
|
||||
|
||||
def _get_chroma_collection():
|
||||
client = chromadb.PersistentClient(path=CHROMADB_PATH)
|
||||
return client.get_or_create_collection("mars_images")
|
||||
|
||||
|
||||
# ─── MCP 工具定义 ─────────────────────────────────────────────────────────────
|
||||
|
||||
# @mcp.tool()
|
||||
# def spatiotemporal_search(
|
||||
# lat_min: float,
|
||||
# lat_max: float,
|
||||
# lon_min: float,
|
||||
# lon_max: float,
|
||||
# product: str = "tianwen_moric",
|
||||
# top_k: int = 10,
|
||||
# ) -> str:
|
||||
# """
|
||||
# 按空间范围查询火星遥感影像。
|
||||
|
||||
# 返回指定经纬度矩形区域内的影像列表,包含影像 ID、文件路径、
|
||||
# 坐标范围和拍摄时间。适用于"查找某区域有哪些影像"类问题。
|
||||
|
||||
# Args:
|
||||
# lat_min: 最小纬度 (-90 到 90)
|
||||
# lat_max: 最大纬度 (-90 到 90)
|
||||
# lon_min: 最小经度 (-180 到 180)
|
||||
# lon_max: 最大经度 (-180 到 180)
|
||||
# product: 产品类型,默认 "tianwen_moric"(天问一号MoRIC)
|
||||
# top_k: 返回数量上限,默认 10,最大 100
|
||||
# """
|
||||
# err = _validate_bbox(lat_min, lat_max, lon_min, lon_max)
|
||||
# if err:
|
||||
# return json.dumps({"success": False, "error": err, "results": []}, ensure_ascii=False)
|
||||
|
||||
# top_k = min(max(1, top_k), 100)
|
||||
|
||||
# try:
|
||||
# raw = _spatial_query(lat_min, lat_max, lon_min, lon_max, product)
|
||||
|
||||
# if not raw:
|
||||
# return json.dumps({
|
||||
# "success": True,
|
||||
# "query": {"spatial": {"lat_min": lat_min, "lat_max": lat_max, "lon_min": lon_min, "lon_max": lon_max}, "product": product},
|
||||
# "total_results": 0,
|
||||
# "results": [],
|
||||
# "message": f"在指定区域未找到 {product} 影像",
|
||||
# }, ensure_ascii=False, indent=2)
|
||||
|
||||
# results = [_parse_item(item) for item in raw[:top_k]]
|
||||
|
||||
# return json.dumps({
|
||||
# "success": True,
|
||||
# "query": {"spatial": {"lat_min": lat_min, "lat_max": lat_max, "lon_min": lon_min, "lon_max": lon_max}, "product": product},
|
||||
# "total_results": len(raw),
|
||||
# "returned_results": len(results),
|
||||
# "results": results,
|
||||
# "message": f"找到 {len(raw)} 幅 {product} 影像,返回前 {len(results)} 幅",
|
||||
# }, ensure_ascii=False, indent=2)
|
||||
|
||||
# except requests.exceptions.RequestException as e:
|
||||
# return json.dumps({"success": False, "error": f"DataCube API 请求失败: {e}", "results": []}, ensure_ascii=False)
|
||||
# except Exception as e:
|
||||
# logger.exception("spatiotemporal_search 失败")
|
||||
# return json.dumps({"success": False, "error": str(e), "results": []}, ensure_ascii=False)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
def spatiotemporal_semantic_search(
|
||||
query: str,
|
||||
lat_min: float,
|
||||
lat_max: float,
|
||||
lon_min: float,
|
||||
lon_max: float,
|
||||
product: str = "tianwen_moric",
|
||||
top_k: int = 10,
|
||||
) -> str:
|
||||
"""
|
||||
时空-语义搜索:先按空间范围筛选影像,再按语义相似度排序。
|
||||
|
||||
先从 DataCube API 获取区域内所有影像,再从 ChromaDB 取出这些影像的
|
||||
视觉描述向量,与 query 计算余弦相似度后排序,返回最相关的影像。
|
||||
适用于"在某区域找尘卷风/撞击坑/沙丘"等含语义内容的问题。
|
||||
|
||||
结果额外包含:
|
||||
- description: 影像视觉描述文本
|
||||
- similarity: 与 query 的余弦相似度 (0-1)
|
||||
|
||||
Args:
|
||||
query: 语义查询文本,如 "尘卷风"、"撞击坑"、"沙丘"
|
||||
lat_min: 最小纬度 (-90 到 90)
|
||||
lat_max: 最大纬度 (-90 到 90)
|
||||
lon_min: 最小经度 (-180 到 180)
|
||||
lon_max: 最大经度 (-180 到 180)
|
||||
product: 产品类型,默认 "tianwen_moric"
|
||||
top_k: 返回数量上限,默认 10,最大 100
|
||||
"""
|
||||
if not query or not query.strip():
|
||||
return json.dumps({"success": False, "error": "语义查询不能为空", "results": []}, ensure_ascii=False)
|
||||
|
||||
err = _validate_bbox(lat_min, lat_max, lon_min, lon_max)
|
||||
if err:
|
||||
return json.dumps({"success": False, "error": err, "results": []}, ensure_ascii=False)
|
||||
|
||||
top_k = min(max(1, top_k), 100)
|
||||
|
||||
query_info = {
|
||||
"spatial": {"lat_min": lat_min, "lat_max": lat_max, "lon_min": lon_min, "lon_max": lon_max},
|
||||
"product": product,
|
||||
}
|
||||
|
||||
try:
|
||||
# 步骤 1:空间筛选
|
||||
raw = _spatial_query(lat_min, lat_max, lon_min, lon_max, product)
|
||||
|
||||
if not raw:
|
||||
return json.dumps({
|
||||
"success": True, "query": query_info,
|
||||
"total_results": 0, "results": [],
|
||||
"message": f"在指定区域未找到 {product} 影像",
|
||||
}, ensure_ascii=False, indent=2)
|
||||
|
||||
# 步骤 2:构建文件名索引
|
||||
spatial_map = {}
|
||||
for item in raw:
|
||||
parsed = _parse_item(item)
|
||||
if parsed["filename"]:
|
||||
spatial_map[parsed["filename"]] = parsed
|
||||
|
||||
# 步骤 3:从 ChromaDB 批量取向量
|
||||
try:
|
||||
collection = _get_chroma_collection()
|
||||
chroma_data = collection.get(
|
||||
ids=list(spatial_map.keys()),
|
||||
include=["documents", "embeddings"],
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"ChromaDB 查询失败: {e}")
|
||||
results = list(spatial_map.values())[:top_k]
|
||||
return json.dumps({
|
||||
"success": True, "query": query_info,
|
||||
"total_results": len(spatial_map),
|
||||
"returned_results": len(results),
|
||||
"results": results,
|
||||
"message": f"ChromaDB 不可用,返回 {len(results)} 幅时空搜索结果",
|
||||
"warning": str(e),
|
||||
}, ensure_ascii=False, indent=2)
|
||||
|
||||
if not chroma_data or not chroma_data["ids"]:
|
||||
results = list(spatial_map.values())[:top_k]
|
||||
return json.dumps({
|
||||
"success": True, "query": query_info,
|
||||
"total_results": len(spatial_map),
|
||||
"returned_results": len(results),
|
||||
"results": results,
|
||||
"message": f"区域内 {len(spatial_map)} 幅影像在 ChromaDB 中无语义数据",
|
||||
"warning": "ChromaDB 中没有这些影像的语义向量",
|
||||
}, ensure_ascii=False, indent=2)
|
||||
|
||||
# 步骤 4:生成查询向量并排序
|
||||
query_vec = _get_embedding(query)
|
||||
|
||||
scored = []
|
||||
for i, fname in enumerate(chroma_data["ids"]):
|
||||
if fname not in spatial_map:
|
||||
continue
|
||||
item = dict(spatial_map[fname])
|
||||
item["description"] = chroma_data["documents"][i]
|
||||
|
||||
if query_vec is not None and chroma_data["embeddings"] is not None:
|
||||
item["similarity"] = round(_cosine_similarity(query_vec, chroma_data["embeddings"][i]), 4)
|
||||
else:
|
||||
item["similarity"] = 0.0
|
||||
scored.append(item)
|
||||
|
||||
scored.sort(key=lambda x: x["similarity"], reverse=True)
|
||||
final = scored[:top_k]
|
||||
|
||||
return json.dumps({
|
||||
"success": True, "query": query_info,
|
||||
"total_results": len(final),
|
||||
"returned_results": len(final),
|
||||
"results": final,
|
||||
"message": (
|
||||
f"区域内 {len(spatial_map)} 幅影像,其中 {len(chroma_data['ids'])} 幅有语义数据,"
|
||||
f"按 '{query}' 排序后返回前 {len(final)} 幅"
|
||||
),
|
||||
}, ensure_ascii=False, indent=2)
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
return json.dumps({"success": False, "error": f"DataCube API 请求失败: {e}", "results": []}, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.exception("spatiotemporal_semantic_search 失败")
|
||||
return json.dumps({"success": False, "error": str(e), "results": []}, ensure_ascii=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mcp.run()
|
||||
1943
MCP/mars-mcp/uv.lock
generated
Normal file
1943
MCP/mars-mcp/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user