聚类更新

This commit is contained in:
龙澳 2024-12-20 20:57:01 +08:00
parent 5cbc07ea53
commit 0d13b37368
6 changed files with 53 additions and 5 deletions

View File

@ -22,6 +22,8 @@ class PreprocessConfig:
image_dir: str image_dir: str
output_dir: str output_dir: str
eps: float = 0.01
min_samples: int = 5
filter_grid_size: float = 0.001 filter_grid_size: float = 0.001
filter_dense_distance_threshold: float = 10 filter_dense_distance_threshold: float = 10
filter_distance_threshold: float = 0.001 filter_distance_threshold: float = 0.001
@ -57,11 +59,12 @@ class ImagePreprocessor:
self.logger.info(f"时间过滤后剩余 {len(self.gps_points)} 个GPS点") self.logger.info(f"时间过滤后剩余 {len(self.gps_points)} 个GPS点")
return self.gps_points return self.gps_points
# TODO 添加聚类参数
def cluster(self) -> pd.DataFrame: def cluster(self) -> pd.DataFrame:
"""使用DBSCAN对GPS点进行聚类只保留最大的类""" """使用DBSCAN对GPS点进行聚类只保留最大的类"""
self.logger.info("开始聚类") self.logger.info("开始聚类")
# 创建聚类器并执行聚类 # 创建聚类器并执行聚类
clusterer = GPSCluster(self.gps_points, eps=0.01, min_samples=5) clusterer = GPSCluster(self.gps_points, output_dir=self.config.output_dir)
# 获取主要类别的点 # 获取主要类别的点
self.gps_points = clusterer.get_main_cluster() self.gps_points = clusterer.get_main_cluster()
# 获取统计信息并记录 # 获取统计信息并记录
@ -199,7 +202,7 @@ class ImagePreprocessor:
# self.filter_points() # self.filter_points()
# grid_points = self.divide_grids() # grid_points = self.divide_grids()
# self.copy_images(grid_points) # self.copy_images(grid_points)
# self.visualize_results() self.visualize_results()
# self.logger.info("预处理任务完成") # self.logger.info("预处理任务完成")
# self.command_runner.run_grid_commands( # self.command_runner.run_grid_commands(
# grid_points, # grid_points,
@ -213,8 +216,8 @@ class ImagePreprocessor:
if __name__ == "__main__": if __name__ == "__main__":
# 创建配置 # 创建配置
config = PreprocessConfig( config = PreprocessConfig(
image_dir=r"../code/images", image_dir=r"E:\湖南省第二测绘院\11-06-项目移交文件(王辉给)\无人机二三维节点扩容生产影像\影像数据\199\code\images",
output_dir=r"../code/output", output_dir=r"test",
filter_grid_size=0.001, filter_grid_size=0.001,
filter_dense_distance_threshold=10, filter_dense_distance_threshold=10,
filter_distance_threshold=0.001, filter_distance_threshold=0.001,

View File

@ -1,9 +1,10 @@
from sklearn.cluster import DBSCAN from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
import os
class GPSCluster: class GPSCluster:
def __init__(self, gps_points, eps=0.01, min_samples=5): def __init__(self, gps_points, output_dir: str, eps=0.01, min_samples=5):
""" """
初始化GPS聚类器 初始化GPS聚类器
@ -17,6 +18,7 @@ class GPSCluster:
self.scaler = StandardScaler() self.scaler = StandardScaler()
self.gps_points = gps_points self.gps_points = gps_points
self.clustered_points = self.fit() self.clustered_points = self.fit()
self.log_file = os.path.join(output_dir, 'del_imgs.txt')
def fit(self): def fit(self):
""" """
@ -71,6 +73,12 @@ class GPSCluster:
"main_cluster_points": main_cluster_points, "main_cluster_points": main_cluster_points,
"noise_points": sum(self.clustered_points["cluster"] == -1), "noise_points": sum(self.clustered_points["cluster"] == -1),
} }
noise_cluster = self.get_noise_cluster()
with open(self.log_file, 'a', encoding='utf-8') as f:
for i, (_, row) in enumerate(noise_cluster.iterrows()):
f.write(row['file']+'\n')
f.write('\n')
return stats return stats
def get_main_cluster(self): def get_main_cluster(self):

21
test/del_imgs.txt Normal file
View File

@ -0,0 +1,21 @@
0097ad48530f4ae1af34c48e281aaf3d.jpg
1568ec834f2a4b119ef3292dc31dfa4d.jpg
181afdbcba774f769ab2fc4bb8064f3e.jpg
23ba09a2cf2240728c4ba60068352864.jpg
41099dc566814ddf96cf4e1c0d2bcea6.jpg
4213c5dd18784272a983a9a14e2944a4.jpg
54e5e0ad38f14ad48c82a15ccf3c7337.jpg
554e893c3dc5400fa9e9d282b7f333a3.jpg
743be156d28b4fd783508bf1897e7184.jpg
8a226f7400244b6a87d170bed292b719.jpg
94a05fc70d284c7282c332c90c6e28bf.jpg
bbe3e6dd7c354d5f9352d46078369f1d.jpg
bfd5e60af76243c29c217220e08f2559.jpg
d4f55a5f91e84df3a7f38e02363e8f5f.jpg
e024f299df454e5e87e2954754035966.jpg
e6bd58b1639140b3b2aa90cf0c12eaaf.jpg
eb35f2f429f84579b73ad47d7fe0e152.jpg
f1aa2a76d56c4904bf15de3ae4694821.jpg
f3c7ba174f8a427c9383dc4fcc3b2c72.jpg
fe94e1ff5dff4a0abe62f9e63f9f9340.jpg

BIN
test/filter_GPS.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

View File

@ -0,0 +1,6 @@
2024-12-20 20:38:18 - UAV_Preprocess - INFO - 开始提取GPS数据
2024-12-20 20:38:18 - UAV_Preprocess.GPSExtractor - INFO - 开始从目录提取GPS坐标和拍摄日期: E:\湖南省第二测绘院\11-06-项目移交文件(王辉给)\无人机二三维节点扩容生产影像\影像数据\199\code\images
2024-12-20 20:38:24 - UAV_Preprocess.GPSExtractor - INFO - GPS坐标和拍摄日期提取完成 - 总图片数: 199, 成功提取: 199, 失败: 0
2024-12-20 20:38:24 - UAV_Preprocess - INFO - 成功提取 199 个GPS点
2024-12-20 20:38:24 - UAV_Preprocess - INFO - 开始聚类
2024-12-20 20:38:24 - UAV_Preprocess - INFO - 聚类完成:主要类别包含 179 个点,噪声点 20 个

View File

@ -0,0 +1,10 @@
2024-12-20 20:53:29 - UAV_Preprocess - INFO - 开始提取GPS数据
2024-12-20 20:53:29 - UAV_Preprocess.GPSExtractor - INFO - 开始从目录提取GPS坐标和拍摄日期: E:\湖南省第二测绘院\11-06-项目移交文件(王辉给)\无人机二三维节点扩容生产影像\影像数据\199\code\images
2024-12-20 20:53:29 - UAV_Preprocess.GPSExtractor - INFO - GPS坐标和拍摄日期提取完成 - 总图片数: 199, 成功提取: 199, 失败: 0
2024-12-20 20:53:29 - UAV_Preprocess - INFO - 成功提取 199 个GPS点
2024-12-20 20:53:29 - UAV_Preprocess - INFO - 开始聚类
2024-12-20 20:53:29 - UAV_Preprocess - INFO - 聚类完成:主要类别包含 179 个点,噪声点 20 个
2024-12-20 20:53:29 - UAV_Preprocess - INFO - 开始生成可视化结果
2024-12-20 20:53:29 - UAV_Preprocess.GPSExtractor - INFO - 开始从目录提取GPS坐标和拍摄日期: E:\湖南省第二测绘院\11-06-项目移交文件(王辉给)\无人机二三维节点扩容生产影像\影像数据\199\code\images
2024-12-20 20:53:29 - UAV_Preprocess.GPSExtractor - INFO - GPS坐标和拍摄日期提取完成 - 总图片数: 199, 成功提取: 199, 失败: 0
2024-12-20 20:53:30 - UAV_Preprocess - INFO - 预处理结果图已保存