diff --git a/filter/cluster_filter.py b/filter/cluster_filter.py index eadebf1..6607435 100644 --- a/filter/cluster_filter.py +++ b/filter/cluster_filter.py @@ -63,7 +63,7 @@ class GPSCluster: clustered_points: 带有聚类标签的DataFrame 返回: - 聚类统计信息的字典 + 聚类统计信息的字典, 主类, 噪声点 """ main_cluster_points = sum(clustered_points["cluster"] == 1) stats = { @@ -72,11 +72,4 @@ class GPSCluster: "noise_points": sum(clustered_points["cluster"] == -1), } - noise_cluster = self.get_noise_cluster(clustered_points) - return stats - - def get_main_cluster(self, clustered_points): - return clustered_points[clustered_points["cluster"] == 1] - - def get_noise_cluster(self, clustered_points): - return clustered_points[clustered_points["cluster"] == -1] + return stats, clustered_points[clustered_points["cluster"] == 1], clustered_points[clustered_points["cluster"] == -1] diff --git a/filter/gps_filter.py b/filter/gps_filter.py index 95d1e1b..40e7ea5 100644 --- a/filter/gps_filter.py +++ b/filter/gps_filter.py @@ -243,6 +243,7 @@ class GPSFilter: f"删除孤立点: {row['file']} (邻居数: {neighbors_count[i]})") filtered_df = points_df[~points_df['file'].isin(isolated_points)] + removed_df = points_df[points_df['file'].isin(isolated_points)] self.logger.info( f"孤立点过滤完成,共删除 {len(isolated_points)} 个点,剩余 {len(filtered_df)} 个点") - return filtered_df + return filtered_df, removed_df diff --git a/odm_preprocess.py b/odm_preprocess.py index 6034e5f..9309093 100644 --- a/odm_preprocess.py +++ b/odm_preprocess.py @@ -2,7 +2,7 @@ import os import shutil from datetime import timedelta from dataclasses import dataclass -from typing import Dict +from typing import Dict, Optional import matplotlib.pyplot as plt import pandas as pd @@ -27,7 +27,7 @@ class PreprocessConfig: """预处理配置类""" image_dir: str - output_dir: str + output_dir: Optional[str] = None # 聚类过滤参数 cluster_eps: float = 0.01 cluster_min_samples: int = 5 @@ -45,13 +45,15 @@ class PreprocessConfig: grid_overlap: float = 0.05 grid_size: float = 500 # 几个pipline过程是否开启 - mode: str = "快拼模式" + fast_mode: bool = False class ImagePreprocessor: def __init__(self, config: PreprocessConfig): self.config = config - + self.grandpa_dir = os.path.dirname( + os.path.dirname(self.config.image_dir)) + self.config.output_dir = os.path.join(self.grandpa_dir, 'preprocess') # 清理并重建输出目录 if os.path.exists(config.output_dir): self._clean_output_dir() @@ -61,7 +63,7 @@ class ImagePreprocessor: self.logger = setup_logger(config.output_dir) self.gps_points = None self.odm_monitor = ODMProcessMonitor( - config.output_dir, mode=config.mode) + config.output_dir, fast_mode=config.fast_mode) self.visualizer = FilterVisualizer(config.output_dir) def _clean_output_dir(self): @@ -98,22 +100,21 @@ class ImagePreprocessor: self.logger.info(f"成功提取 {len(self.gps_points)} 个GPS点") return self.gps_points - def cluster(self) -> pd.DataFrame: + def cluster(self, previous_points) -> pd.DataFrame: """使用DBSCAN对GPS点进行聚类,只保留最大的类""" self.logger.info("开始聚类") - previous_points = self.gps_points.copy() # 创建聚类器并执行聚类 clusterer = GPSCluster( - self.gps_points, output_dir=self.config.output_dir, + previous_points, output_dir=self.config.output_dir, eps=self.config.cluster_eps, min_samples=self.config.cluster_min_samples) # 获取主要类别的点 self.clustered_points = clusterer.fit() - self.gps_points = clusterer.get_main_cluster(self.clustered_points) # 获取统计信息并记录 - stats = clusterer.get_cluster_stats(self.clustered_points) + stats, retained_points, removed_points = clusterer.get_cluster_stats( + self.clustered_points) self.logger.info( f"聚类完成:主要类别包含 {stats['main_cluster_points']} 个点," f"噪声点 {stats['noise_points']} 个" @@ -121,16 +122,16 @@ class ImagePreprocessor: # 可视化聚类结果 self.visualizer.visualize_filter_step( - self.gps_points, previous_points, "1-Clustering") + retained_points, removed_points, "1-Clustering") + # 移动被过滤的图片 + self.move_images(removed_points, "cluster") + return retained_points - return self.gps_points - - def filter_time_group_overlap(self) -> pd.DataFrame: + def filter_time_group_overlap(self, previous_points) -> pd.DataFrame: """过滤重叠的时间组""" self.logger.info("开始过滤重叠时间组") self.logger.info("开始过滤重叠时间组") - previous_points = self.gps_points.copy() filter = TimeGroupOverlapFilter( self.config.image_dir, @@ -143,39 +144,43 @@ class ImagePreprocessor: ) # 更新GPS点数据,移除被删除的图像 - self.gps_points = self.gps_points[~self.gps_points['file'].isin( + retained_points = previous_points[~previous_points['file'].isin( deleted_files)] - self.logger.info(f"重叠时间组过滤后剩余 {len(self.gps_points)} 个GPS点") + removed_points = previous_points[previous_points['file'].isin( + deleted_files)] + self.logger.info(f"重叠时间组过滤后剩余 {len(retained_points)} 个GPS点") # 可视化过滤结果 self.visualizer.visualize_filter_step( - self.gps_points, previous_points, "2-Time Group Overlap") - - return self.gps_points + retained_points, removed_points, "2-Time Group Overlap") + # 移动被过滤的图片 + self.move_images(removed_points, "time_group_overlap") + return retained_points # TODO 过滤算法还需要更新 - def filter_points(self) -> pd.DataFrame: + def filter_points(self, previous_points) -> pd.DataFrame: """过滤GPS点""" self.logger.info("开始过滤GPS点") filter = GPSFilter(self.config.output_dir) # 过滤孤立点 - previous_points = self.gps_points.copy() self.logger.info( f"开始过滤孤立点(距离阈值: {self.config.filter_distance_threshold}, " f"最小邻居数: {self.config.filter_min_neighbors})" ) - self.gps_points = filter.filter_isolated_points( - self.gps_points, + retained_points, removed_points = filter.filter_isolated_points( + previous_points, self.config.filter_distance_threshold, self.config.filter_min_neighbors, ) - self.logger.info(f"孤立点过滤后剩余 {len(self.gps_points)} 个GPS点") + self.logger.info(f"孤立点过滤后剩余 {len(retained_points)} 个GPS点") # 可视化孤立点过滤结果 self.visualizer.visualize_filter_step( - self.gps_points, previous_points, "3-Isolated Points") + retained_points, removed_points, "3-Isolated Points") + # 移动被过滤的图片 + self.move_images(removed_points, "isolated_points") # # 过滤密集点 # previous_points = self.gps_points.copy() @@ -195,7 +200,7 @@ class ImagePreprocessor: # self.visualizer.visualize_filter_step( # self.gps_points, previous_points, "4-Dense Points") - return self.gps_points + return retained_points def divide_grids(self) -> Dict[int, pd.DataFrame]: """划分网格""" @@ -209,79 +214,102 @@ class ImagePreprocessor: ) grid_points = grid_divider.assign_to_grids(self.gps_points, grids) self.logger.info(f"成功划分为 {len(grid_points)} 个网格") - + # 生成image_groups.txt文件 try: - groups_file = os.path.join(self.config.output_dir, "image_groups.txt") + groups_file = os.path.join( + os.path.dirname(self.config.image_dir), "image_groups.txt" + ) self.logger.info(f"开始生成分组文件: {groups_file}") - + with open(groups_file, 'w') as f: for grid_idx, points_lt in grid_points.items(): # 使用ASCII字母作为组标识(A, B, C...) group_letter = chr(65 + grid_idx) # 65是ASCII中'A'的编码 - + # 为每个网格中的图像写入分组信息 for point in points_lt: f.write(f"{point['file']} {group_letter}\n") - + self.logger.info(f"分组文件生成成功: {groups_file}") except Exception as e: self.logger.error(f"生成分组文件时发生错误: {str(e)}", exc_info=True) raise - - return grid_points - def copy_images(self, grid_points: Dict[int, pd.DataFrame]): - """复制图像到目标文件夹""" - self.logger.info("开始复制图像文件") - self.logger.info("开始复制图像文件") + def move_images(self, removed_points: pd.DataFrame, step_name: str): + """ + 将被过滤掉的图片移动到ret文件夹中 - for grid_idx, points in grid_points.items(): - output_dir = os.path.join( - self.config.output_dir, f"grid_{grid_idx + 1}", "project", "images" - ) + Args: + removed_points: 被过滤掉的GPS点对应的数据 + step_name: 过滤步骤名称,用于创建子文件夹 + """ + if removed_points.empty: + return - os.makedirs(output_dir, exist_ok=True) + # 创建ret目录和对应步骤的子目录 + ret_dir = os.path.join(self.grandpa_dir, 'ret') + os.makedirs(ret_dir, exist_ok=True) - for point in tqdm(points, desc=f"复制网格 {grid_idx + 1} 的图像"): - src = os.path.join(self.config.image_dir, point["file"]) - dst = os.path.join(output_dir, point["file"]) - shutil.copy(src, dst) - self.logger.info(f"网格 {grid_idx + 1} 包含 {len(points)} 张图像") + self.logger.info(f"开始移动{step_name}步骤中被过滤的图片") - def merge_tif(self, grid_points: Dict[int, pd.DataFrame]): - """合并所有网格的影像产品""" - self.logger.info("开始合并所有影像产品") - merger = MergeTif(self.config.output_dir) - merger.merge_all_tifs(grid_points) + # 移动每张被过滤的图片 + for _, point in removed_points.iterrows(): + src_path = os.path.join(self.config.image_dir, point['file']) + dst_path = os.path.join(ret_dir, point['file']) - def merge_obj(self, grid_points: Dict[int, pd.DataFrame]): - """合并所有网格的OBJ模型""" - self.logger.info("开始合并OBJ模型") - merger = MergeObj(self.config.output_dir) - merger.merge_grid_obj(grid_points) + try: + shutil.move(src_path, dst_path) + except Exception as e: + self.logger.warning(f"移动图片 {point['file']} 时发生错误: {str(e)}") - def merge_ply(self, grid_points: Dict[int, pd.DataFrame]): - """合并所有网格的PLY点云""" - self.logger.info("开始合并PLY点云") - merger = MergePly(self.config.output_dir) - merger.merge_grid_ply(grid_points) + self.logger.info(f"完成移动 {len(removed_points)} 张被{step_name}过滤的图片") + + def restore_filtered_images(self): + """将ret文件夹中的图片恢复到原始图片目录""" + try: + # 获取ret文件夹路径 + ret_dir = os.path.join(self.grandpa_dir, 'ret') + + if not os.path.exists(ret_dir): + self.logger.info("没有找到ret文件夹,跳过恢复步骤") + return + + self.logger.info("开始恢复被过滤的图片") + + # 获取ret文件夹中的所有图片 + filtered_images = os.listdir(ret_dir) + + # 将图片移回原始目录 + for img in filtered_images: + src_path = os.path.join(ret_dir, img) + dst_path = os.path.join(self.config.image_dir, img) + try: + shutil.move(src_path, dst_path) + except Exception as e: + self.logger.warning(f"恢复图片 {img} 时发生错误: {str(e)}") + + self.logger.info(f"成功恢复 {len(filtered_images)} 张图片") + + except Exception as e: + self.logger.error(f"恢复图片过程中发生错误: {str(e)}", exc_info=True) + raise def process(self): """执行完整的预处理流程""" try: self.extract_gps() - self.cluster() - # self.filter_time_group_overlap() - self.filter_points() - grid_points = self.divide_grids() - self.copy_images(grid_points) + self.gps_points = self.cluster(self.gps_points) + # self.gps_points = self.filter_time_group_overlap(self.gps_points) + self.gps_points = self.filter_points(self.gps_points) + self.divide_grids() self.logger.info("预处理任务完成") - self.odm_monitor.process_all_grids(grid_points) - self.merge_tif(grid_points) - self.merge_obj(grid_points) - self.merge_ply(grid_points) + self.odm_monitor.run_odm_with_monitor( + self.grandpa_dir, self.config.fast_mode) + + self.restore_filtered_images() + except Exception as e: self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True) raise @@ -290,8 +318,7 @@ class ImagePreprocessor: if __name__ == "__main__": # 创建配置 config = PreprocessConfig( - image_dir=r"E:\datasets\UAV\134\project\images", - output_dir=r"G:\ODM_output\134_test", + image_dir=r"G:\error_data\20241104140457\project\images", cluster_eps=0.01, cluster_min_samples=5, @@ -307,11 +334,11 @@ if __name__ == "__main__": filter_dense_distance_threshold=10, filter_time_threshold=timedelta(minutes=5), - grid_size=300, - grid_overlap=0.1, + grid_size=1000, + grid_overlap=0.05, - mode="重建模式", + fast_mode=False, ) # 创建处理器并执行 diff --git a/utils/odm_monitor.py b/utils/odm_monitor.py index 9b50c1a..6fd8a82 100644 --- a/utils/odm_monitor.py +++ b/utils/odm_monitor.py @@ -8,33 +8,31 @@ import pandas as pd class ODMProcessMonitor: """ODM处理监控器""" - def __init__(self, output_dir: str, mode: str = "快拼模式"): + def __init__(self, output_dir: str, fast_mode: bool): self.output_dir = output_dir self.logger = logging.getLogger('UAV_Preprocess.ODMMonitor') - self.mode = mode + self.fast_mode = fast_mode def _check_success(self, grid_dir: str) -> bool: """检查ODM是否执行成功""" success_markers = ['odm_orthophoto', 'odm_georeferencing'] - if self.mode != "快拼模式": + if not self.fast_mode: success_markers.append('odm_texturing') return all(os.path.exists(os.path.join(grid_dir, 'project', marker)) for marker in success_markers) - def run_odm_with_monitor(self, grid_dir: str, grid_idx: int, fast_mode: bool = True) -> Tuple[bool, str]: + def run_odm_with_monitor(self, project_dir: str, fast_mode: bool = True) -> Tuple[bool, str]: """运行ODM命令""" - self.logger.info(f"开始处理网格 {grid_idx + 1}") - # 构建Docker命令 - grid_dir = grid_dir[0].lower()+grid_dir[1:].replace('\\', '/') docker_command = ( f"docker run --gpus all -ti --rm " - f"-v {grid_dir}:/datasets " + f"-v {project_dir}:/datasets " f"opendronemap/odm:gpu " f"--project-path /datasets project " - f"--max-concurrency 10 " + f"--max-concurrency 15 " f"--force-gps " f"--feature-quality lowest " f"--orthophoto-resolution 10 " + f"--split-overlap 0 " ) if fast_mode: @@ -53,26 +51,7 @@ class ODMProcessMonitor: self.logger.info(f"==========stdout==========: {stdout}") self.logger.error(f"==========stderr==========: {stderr}") # 检查执行结果 - if self._check_success(grid_dir): - self.logger.info(f"网格 {grid_idx + 1} 处理成功") - return True, "" + if self._check_success(image_dir): + self.logger.info(f"处理成功") else: - self.logger.error(f"网格 {grid_idx + 1} 处理失败") - return False, f"网格 {grid_idx + 1} 处理失败" - - def process_all_grids(self, grid_points: Dict[int, pd.DataFrame]): - """处理所有网格""" - self.logger.info("开始执行网格处理") - for grid_idx in grid_points.keys(): - grid_dir = os.path.join( - self.output_dir, f'grid_{grid_idx + 1}' - ) - - success, error_msg = self.run_odm_with_monitor( - grid_dir=grid_dir, - grid_idx=grid_idx, - fast_mode=(self.mode == "快拼模式") - ) - - if not success: - raise Exception(f"网格 {grid_idx + 1} 处理失败: {error_msg}") + self.logger.error(f"处理失败") diff --git a/utils/visualizer.py b/utils/visualizer.py index f74f5a9..69cfe6e 100644 --- a/utils/visualizer.py +++ b/utils/visualizer.py @@ -7,86 +7,84 @@ from typing import Optional class FilterVisualizer: """过滤结果可视化器""" - + def __init__(self, output_dir: str): """ 初始化可视化器 - + Args: output_dir: 输出目录路径 """ self.output_dir = output_dir self.logger = logging.getLogger('UAV_Preprocess.Visualizer') - - def visualize_filter_step(self, - current_points: pd.DataFrame, - previous_points: pd.DataFrame, - step_name: str, - save_name: Optional[str] = None): + + def visualize_filter_step(self, + retained_points: pd.DataFrame, + filtered_points: pd.DataFrame, + step_name: str, + save_name: Optional[str] = None): """ 可视化单个过滤步骤的结果 - + Args: - current_points: 当前步骤后的点 - previous_points: 上一步骤的点 + retained_points: 留下的点 + filtered_points: 过滤掉的点 step_name: 步骤名称 save_name: 保存文件名,默认为step_name """ + total_points_len = len(retained_points) + len(filtered_points) self.logger.info(f"开始生成{step_name}的可视化结果") - - # 找出被过滤掉的点 - filtered_files = set(previous_points['file']) - set(current_points['file']) - filtered_points = previous_points[previous_points['file'].isin(filtered_files)] - + # 创建图形 plt.figure(figsize=(20, 16)) - + # 绘制保留的点 - plt.scatter(current_points['lon'], current_points['lat'], - color='blue', label='Retained Points', - alpha=0.6, s=50) - + plt.scatter(retained_points['lon'], retained_points['lat'], + color='blue', label='Retained Points', + alpha=0.6, s=50) + # 绘制被过滤的点 if not filtered_points.empty: plt.scatter(filtered_points['lon'], filtered_points['lat'], - color='red', marker='x', label='Filtered Points', - alpha=0.6, s=100) - + color='red', marker='x', label='Filtered Points', + alpha=0.6, s=100) + # 设置图形属性 plt.title(f"GPS Points After {step_name}\n" - f"(Filtered: {len(filtered_points)}, Retained: {len(current_points)})", - fontsize=14) + f"(Filtered: {len(filtered_points)}, Retained: {len(retained_points)})", + fontsize=14) plt.xlabel("Longitude", fontsize=12) plt.ylabel("Latitude", fontsize=12) plt.grid(True) - + # 添加统计信息 stats_text = ( - f"Original Points: {len(previous_points)}\n" + f"Original Points: {total_points_len}\n" f"Filtered Points: {len(filtered_points)}\n" - f"Remaining Points: {len(current_points)}\n" - f"Filter Rate: {len(filtered_points)/len(previous_points)*100:.1f}%" + f"Remaining Points: {len(retained_points)}\n" + f"Filter Rate: {len(filtered_points)/total_points_len*100:.1f}%" ) plt.figtext(0.02, 0.02, stats_text, fontsize=10, - bbox=dict(facecolor='white', alpha=0.8)) - + bbox=dict(facecolor='white', alpha=0.8)) + # 添加图例 plt.legend(loc='upper right', fontsize=10) - + # 调整布局 plt.tight_layout() - + # 保存图形 save_name = save_name or step_name.lower().replace(' ', '_') - save_path = os.path.join(self.output_dir, 'filter_imgs', f'filter_{save_name}.png') + save_path = os.path.join( + self.output_dir, 'filter_imgs_visual', f'filter_{save_name}.png') plt.savefig(save_path, dpi=300, bbox_inches='tight') plt.close() - + self.logger.info( f"{step_name}过滤可视化结果已保存至 {save_path}\n" f"过滤掉 {len(filtered_points)} 个点," - f"保留 {len(current_points)} 个点," - f"过滤率 {len(filtered_points)/len(previous_points)*100:.1f}%" + f"保留 {len(retained_points)} 个点," + f"过滤率 {len(filtered_points)/total_points_len*100:.1f}%" ) @@ -94,11 +92,11 @@ if __name__ == '__main__': # 测试代码 import numpy as np from datetime import datetime - + # 创建测试数据 np.random.seed(42) n_points = 1000 - + # 生成随机点 test_data = pd.DataFrame({ 'lon': np.random.uniform(120, 121, n_points), @@ -106,16 +104,16 @@ if __name__ == '__main__': 'file': [f'img_{i}.jpg' for i in range(n_points)], 'date': [datetime.now() for _ in range(n_points)] }) - + # 随机选择点作为过滤后的结果 filtered_data = test_data.sample(n=800) - + # 测试可视化 visualizer = FilterVisualizer('test_output') os.makedirs('test_output', exist_ok=True) - + visualizer.visualize_filter_step( filtered_data, test_data, "Test Filter" - ) \ No newline at end of file + )