2024-12-17 22:09:47 +08:00
|
|
|
|
import os
|
|
|
|
|
import shutil
|
2024-12-21 12:03:54 +08:00
|
|
|
|
from datetime import timedelta
|
2024-12-17 22:09:47 +08:00
|
|
|
|
from dataclasses import dataclass
|
2024-12-19 20:48:53 +08:00
|
|
|
|
from typing import Dict
|
|
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
import pandas as pd
|
2024-12-17 22:09:47 +08:00
|
|
|
|
from tqdm import tqdm
|
2024-12-19 20:48:53 +08:00
|
|
|
|
|
|
|
|
|
from preprocess.cluster import GPSCluster
|
|
|
|
|
from preprocess.command_runner import CommandRunner
|
|
|
|
|
from preprocess.gps_extractor import GPSExtractor
|
|
|
|
|
from preprocess.gps_filter import GPSFilter
|
|
|
|
|
from preprocess.grid_divider import GridDivider
|
|
|
|
|
from preprocess.logger import setup_logger
|
2024-12-17 22:09:47 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
|
class PreprocessConfig:
|
|
|
|
|
"""预处理配置类"""
|
2024-12-19 20:48:53 +08:00
|
|
|
|
|
2024-12-17 22:09:47 +08:00
|
|
|
|
image_dir: str
|
|
|
|
|
output_dir: str
|
2024-12-21 12:03:54 +08:00
|
|
|
|
# 聚类过滤参数
|
|
|
|
|
cluster_eps: float = 0.01
|
|
|
|
|
cluster_min_samples: int = 5
|
|
|
|
|
# 孤立点过滤参数
|
|
|
|
|
filter_distance_threshold: float = 0.001 # 经纬度距离
|
2024-12-17 22:09:47 +08:00
|
|
|
|
filter_min_neighbors: int = 6
|
2024-12-21 12:03:54 +08:00
|
|
|
|
# 密集点过滤参数
|
|
|
|
|
filter_grid_size: float = 0.001
|
|
|
|
|
filter_dense_distance_threshold: float = 10 # 普通距离,单位:米
|
|
|
|
|
filter_time_threshold: timedelta = timedelta(minutes=5)
|
|
|
|
|
# 网格划分参数
|
2024-12-17 22:09:47 +08:00
|
|
|
|
grid_overlap: float = 0.05
|
2024-12-20 21:10:22 +08:00
|
|
|
|
grid_size: float = 500
|
2024-12-21 12:03:54 +08:00
|
|
|
|
# 几个pipline过程是否开启
|
2024-12-17 22:09:47 +08:00
|
|
|
|
enable_filter: bool = True
|
|
|
|
|
enable_grid_division: bool = True
|
|
|
|
|
enable_visualization: bool = True
|
2024-12-18 20:50:39 +08:00
|
|
|
|
enable_copy_images: bool = True
|
2024-12-21 12:03:54 +08:00
|
|
|
|
mode: str = "快拼模式"
|
2024-12-17 22:09:47 +08:00
|
|
|
|
|
|
|
|
|
class ImagePreprocessor:
|
|
|
|
|
def __init__(self, config: PreprocessConfig):
|
|
|
|
|
self.config = config
|
|
|
|
|
self.logger = setup_logger(config.output_dir)
|
|
|
|
|
self.gps_points = []
|
2024-12-21 12:36:14 +08:00
|
|
|
|
self.command_runner = CommandRunner(config.output_dir, mode=config.mode)
|
2024-12-17 22:09:47 +08:00
|
|
|
|
|
2024-12-18 20:50:39 +08:00
|
|
|
|
def extract_gps(self) -> pd.DataFrame:
|
2024-12-17 22:09:47 +08:00
|
|
|
|
"""提取GPS数据"""
|
|
|
|
|
self.logger.info("开始提取GPS数据")
|
|
|
|
|
extractor = GPSExtractor(self.config.image_dir)
|
|
|
|
|
self.gps_points = extractor.extract_all_gps()
|
|
|
|
|
self.logger.info(f"成功提取 {len(self.gps_points)} 个GPS点")
|
|
|
|
|
return self.gps_points
|
2024-12-18 21:07:47 +08:00
|
|
|
|
|
2024-12-19 20:48:53 +08:00
|
|
|
|
def cluster(self) -> pd.DataFrame:
|
|
|
|
|
"""使用DBSCAN对GPS点进行聚类,只保留最大的类"""
|
|
|
|
|
self.logger.info("开始聚类")
|
|
|
|
|
# 创建聚类器并执行聚类
|
2024-12-21 10:44:25 +08:00
|
|
|
|
clusterer = GPSCluster(
|
2024-12-21 12:03:54 +08:00
|
|
|
|
self.gps_points, output_dir=self.config.output_dir,
|
|
|
|
|
eps=self.config.cluster_eps, min_samples=self.config.cluster_min_samples)
|
2024-12-19 20:48:53 +08:00
|
|
|
|
# 获取主要类别的点
|
2024-12-21 12:03:54 +08:00
|
|
|
|
self.clustered_points = clusterer.fit()
|
|
|
|
|
self.gps_points = clusterer.get_main_cluster(self.clustered_points)
|
2024-12-19 20:48:53 +08:00
|
|
|
|
# 获取统计信息并记录
|
2024-12-21 12:03:54 +08:00
|
|
|
|
stats = clusterer.get_cluster_stats(self.clustered_points)
|
2024-12-19 20:48:53 +08:00
|
|
|
|
self.logger.info(
|
|
|
|
|
f"聚类完成:主要类别包含 {stats['main_cluster_points']} 个点,"
|
|
|
|
|
f"噪声点 {stats['noise_points']} 个"
|
|
|
|
|
)
|
|
|
|
|
|
2024-12-22 14:28:07 +08:00
|
|
|
|
# TODO 过滤算法还需要更新
|
2024-12-18 20:50:39 +08:00
|
|
|
|
def filter_points(self) -> pd.DataFrame:
|
2024-12-17 22:09:47 +08:00
|
|
|
|
"""过滤GPS点"""
|
|
|
|
|
if not self.config.enable_filter:
|
|
|
|
|
return self.gps_points
|
|
|
|
|
|
|
|
|
|
self.logger.info("开始过滤GPS点")
|
|
|
|
|
filter = GPSFilter(self.config.output_dir)
|
|
|
|
|
|
2024-12-18 21:07:47 +08:00
|
|
|
|
self.logger.info(
|
2024-12-19 20:48:53 +08:00
|
|
|
|
f"开始过滤孤立点(距离阈值: {self.config.filter_distance_threshold}, 最小邻居数: {self.config.filter_min_neighbors})"
|
|
|
|
|
)
|
2024-12-17 22:09:47 +08:00
|
|
|
|
self.gps_points = filter.filter_isolated_points(
|
|
|
|
|
self.gps_points,
|
|
|
|
|
self.config.filter_distance_threshold,
|
2024-12-19 20:48:53 +08:00
|
|
|
|
self.config.filter_min_neighbors,
|
2024-12-17 22:09:47 +08:00
|
|
|
|
)
|
|
|
|
|
self.logger.info(f"孤立点过滤后剩余 {len(self.gps_points)} 个GPS点")
|
|
|
|
|
|
2024-12-18 21:07:47 +08:00
|
|
|
|
self.logger.info(
|
2024-12-19 20:48:53 +08:00
|
|
|
|
f"开始过滤密集点(网格大小: {self.config.filter_grid_size}, 距离阈值: {self.config.filter_dense_distance_threshold})"
|
|
|
|
|
)
|
2024-12-17 22:09:47 +08:00
|
|
|
|
self.gps_points = filter.filter_dense_points(
|
|
|
|
|
self.gps_points,
|
|
|
|
|
grid_size=self.config.filter_grid_size,
|
2024-12-19 20:48:53 +08:00
|
|
|
|
distance_threshold=self.config.filter_dense_distance_threshold,
|
2024-12-21 12:03:54 +08:00
|
|
|
|
time_threshold=self.config.filter_time_threshold,
|
2024-12-17 22:09:47 +08:00
|
|
|
|
)
|
|
|
|
|
self.logger.info(f"密集点过滤后剩余 {len(self.gps_points)} 个GPS点")
|
|
|
|
|
return self.gps_points
|
|
|
|
|
|
2024-12-18 20:50:39 +08:00
|
|
|
|
def divide_grids(self) -> Dict[int, pd.DataFrame]:
|
2024-12-17 22:09:47 +08:00
|
|
|
|
"""划分网格"""
|
|
|
|
|
if not self.config.enable_grid_division:
|
|
|
|
|
return {0: self.gps_points} # 不划分网格时,所有点放在一个网格中
|
|
|
|
|
|
|
|
|
|
self.logger.info(f"开始划分网格 (重叠率: {self.config.grid_overlap})")
|
|
|
|
|
grid_divider = GridDivider(overlap=self.config.grid_overlap)
|
2024-12-18 21:07:47 +08:00
|
|
|
|
grids = grid_divider.divide_grids(
|
2024-12-19 20:48:53 +08:00
|
|
|
|
self.gps_points, grid_size=self.config.grid_size
|
|
|
|
|
)
|
2024-12-17 22:09:47 +08:00
|
|
|
|
grid_points = grid_divider.assign_to_grids(self.gps_points, grids)
|
|
|
|
|
self.logger.info(f"成功划分为 {len(grid_points)} 个网格")
|
|
|
|
|
return grid_points
|
|
|
|
|
|
2024-12-18 20:50:39 +08:00
|
|
|
|
def copy_images(self, grid_points: Dict[int, pd.DataFrame]):
|
2024-12-17 22:09:47 +08:00
|
|
|
|
"""复制图像到目标文件夹"""
|
2024-12-18 20:50:39 +08:00
|
|
|
|
if not self.config.enable_copy_images:
|
|
|
|
|
return
|
|
|
|
|
|
2024-12-17 22:09:47 +08:00
|
|
|
|
self.logger.info("开始复制图像文件")
|
|
|
|
|
|
|
|
|
|
for grid_idx, points in grid_points.items():
|
|
|
|
|
if self.config.enable_grid_division:
|
2024-12-18 21:07:47 +08:00
|
|
|
|
output_dir = os.path.join(
|
2024-12-19 20:48:53 +08:00
|
|
|
|
self.config.output_dir, f"grid_{grid_idx + 1}", "project", "images"
|
|
|
|
|
)
|
2024-12-17 22:09:47 +08:00
|
|
|
|
else:
|
2024-12-21 10:44:25 +08:00
|
|
|
|
output_dir = os.path.join(
|
|
|
|
|
self.config.output_dir, "project", "images")
|
2024-12-17 22:09:47 +08:00
|
|
|
|
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
for point in tqdm(points, desc=f"复制网格 {grid_idx + 1} 的图像"):
|
2024-12-19 20:48:53 +08:00
|
|
|
|
src = os.path.join(self.config.image_dir, point["file"])
|
|
|
|
|
dst = os.path.join(output_dir, point["file"])
|
2024-12-17 22:09:47 +08:00
|
|
|
|
shutil.copy(src, dst)
|
|
|
|
|
self.logger.info(f"网格 {grid_idx + 1} 包含 {len(points)} 张图像")
|
|
|
|
|
|
|
|
|
|
def visualize_results(self):
|
|
|
|
|
"""可视化处理结果"""
|
|
|
|
|
if not self.config.enable_visualization:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
self.logger.info("开始生成可视化结果")
|
|
|
|
|
extractor = GPSExtractor(self.config.image_dir)
|
2024-12-18 20:50:39 +08:00
|
|
|
|
original_points_df = extractor.extract_all_gps()
|
2024-12-17 22:09:47 +08:00
|
|
|
|
|
2024-12-18 20:50:39 +08:00
|
|
|
|
# 读取被过滤的图片列表
|
2024-12-19 20:48:53 +08:00
|
|
|
|
with open(
|
|
|
|
|
os.path.join(self.config.output_dir, "del_imgs.txt"), "r", encoding="utf-8"
|
|
|
|
|
) as file:
|
2024-12-18 20:50:39 +08:00
|
|
|
|
filtered_files = [line.strip() for line in file if line.strip()]
|
|
|
|
|
|
|
|
|
|
# 创建一个新的图形
|
|
|
|
|
plt.figure(figsize=(20, 16))
|
2024-12-18 21:07:47 +08:00
|
|
|
|
|
2024-12-18 20:50:39 +08:00
|
|
|
|
# 绘制所有原始点
|
2024-12-19 20:48:53 +08:00
|
|
|
|
plt.scatter(
|
|
|
|
|
original_points_df["lon"],
|
|
|
|
|
original_points_df["lat"],
|
|
|
|
|
color="blue",
|
|
|
|
|
label="Original Points",
|
|
|
|
|
alpha=0.6,
|
|
|
|
|
)
|
2024-12-18 21:07:47 +08:00
|
|
|
|
|
2024-12-18 20:50:39 +08:00
|
|
|
|
# 绘制被过滤的点
|
2024-12-19 20:48:53 +08:00
|
|
|
|
filtered_points_df = original_points_df[
|
|
|
|
|
original_points_df["file"].isin(filtered_files)
|
|
|
|
|
]
|
|
|
|
|
plt.scatter(
|
|
|
|
|
filtered_points_df["lon"],
|
|
|
|
|
filtered_points_df["lat"],
|
|
|
|
|
color="red",
|
2024-12-21 12:36:14 +08:00
|
|
|
|
marker="x",
|
2024-12-19 20:48:53 +08:00
|
|
|
|
label="Filtered Points",
|
|
|
|
|
alpha=0.6,
|
|
|
|
|
)
|
2024-12-18 20:50:39 +08:00
|
|
|
|
|
|
|
|
|
# 设置图形属性
|
2024-12-17 22:09:47 +08:00
|
|
|
|
plt.title("GPS Coordinates of Images", fontsize=14)
|
|
|
|
|
plt.xlabel("Longitude", fontsize=12)
|
|
|
|
|
plt.ylabel("Latitude", fontsize=12)
|
|
|
|
|
plt.grid(True)
|
|
|
|
|
plt.legend()
|
2024-12-18 21:07:47 +08:00
|
|
|
|
|
2024-12-18 20:50:39 +08:00
|
|
|
|
# 保存图形
|
2024-12-19 20:48:53 +08:00
|
|
|
|
plt.savefig(os.path.join(self.config.output_dir, "filter_GPS.png"))
|
2024-12-17 22:09:47 +08:00
|
|
|
|
plt.close()
|
|
|
|
|
self.logger.info("预处理结果图已保存")
|
|
|
|
|
|
|
|
|
|
def process(self):
|
|
|
|
|
"""执行完整的预处理流程"""
|
2024-12-18 21:07:47 +08:00
|
|
|
|
try:
|
2024-12-17 22:09:47 +08:00
|
|
|
|
self.extract_gps()
|
2024-12-19 20:48:53 +08:00
|
|
|
|
self.cluster()
|
2024-12-21 12:03:54 +08:00
|
|
|
|
self.filter_points()
|
2024-12-20 21:10:22 +08:00
|
|
|
|
grid_points = self.divide_grids()
|
|
|
|
|
self.copy_images(grid_points)
|
2024-12-20 20:57:01 +08:00
|
|
|
|
self.visualize_results()
|
2024-12-21 12:03:54 +08:00
|
|
|
|
self.logger.info("预处理任务完成")
|
2024-12-20 21:30:44 +08:00
|
|
|
|
self.command_runner.run_grid_commands(
|
|
|
|
|
grid_points,
|
2024-12-21 12:03:54 +08:00
|
|
|
|
self.config.enable_grid_division,
|
2024-12-20 21:30:44 +08:00
|
|
|
|
)
|
2024-12-21 12:06:51 +08:00
|
|
|
|
# TODO 拼图
|
2024-12-17 22:09:47 +08:00
|
|
|
|
except Exception as e:
|
|
|
|
|
self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True)
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
|
2024-12-19 20:48:53 +08:00
|
|
|
|
if __name__ == "__main__":
|
2024-12-17 22:09:47 +08:00
|
|
|
|
# 创建配置
|
|
|
|
|
config = PreprocessConfig(
|
2024-12-22 14:28:07 +08:00
|
|
|
|
image_dir=r"E:\datasets\UAV\502",
|
2024-12-21 12:36:14 +08:00
|
|
|
|
output_dir=r"E:\studio2\ODM_pro\test",
|
2024-12-21 12:03:54 +08:00
|
|
|
|
|
|
|
|
|
cluster_eps=0.01,
|
|
|
|
|
cluster_min_samples=5,
|
|
|
|
|
|
2024-12-17 22:09:47 +08:00
|
|
|
|
filter_distance_threshold=0.001,
|
|
|
|
|
filter_min_neighbors=6,
|
2024-12-21 12:03:54 +08:00
|
|
|
|
|
|
|
|
|
filter_grid_size=0.001,
|
|
|
|
|
filter_dense_distance_threshold=10,
|
|
|
|
|
filter_time_threshold=timedelta(minutes=5),
|
|
|
|
|
|
2024-12-21 12:36:14 +08:00
|
|
|
|
grid_overlap=0.03,
|
2024-12-20 21:10:22 +08:00
|
|
|
|
grid_size=500,
|
2024-12-21 12:03:54 +08:00
|
|
|
|
|
2024-12-18 20:50:39 +08:00
|
|
|
|
enable_filter=True,
|
2024-12-17 22:09:47 +08:00
|
|
|
|
enable_grid_division=True,
|
2024-12-18 20:50:39 +08:00
|
|
|
|
enable_visualization=True,
|
2024-12-19 20:48:53 +08:00
|
|
|
|
enable_copy_images=True,
|
2024-12-21 12:36:14 +08:00
|
|
|
|
|
2024-12-22 14:28:07 +08:00
|
|
|
|
mode="sadf模式",
|
2024-12-17 22:09:47 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 创建处理器并执行
|
|
|
|
|
processor = ImagePreprocessor(config)
|
|
|
|
|
processor.process()
|