数据结构修改为DataFrame,过滤算法更新,增加run command模块

This commit is contained in:
long.ao 2024-12-18 20:50:39 +08:00
parent 22a041db6c
commit 9e30a58fa9
12 changed files with 331 additions and 94 deletions

View File

@ -1,7 +1,9 @@
from gps_extractor import GPSExtractor from preprocess.gps_extractor import GPSExtractor
from gps_filter import GPSFilter from preprocess.time_filter import TimeFilter
from grid_divider import GridDivider from preprocess.gps_filter import GPSFilter
from logger import setup_logger from preprocess.grid_divider import GridDivider
from preprocess.logger import setup_logger
from preprocess.command_runner import CommandRunner
import os import os
import pandas as pd import pandas as pd
import shutil import shutil
@ -9,6 +11,8 @@ import matplotlib.pyplot as plt
from typing import List, Dict, Optional from typing import List, Dict, Optional
from dataclasses import dataclass from dataclasses import dataclass
from tqdm import tqdm from tqdm import tqdm
import subprocess
from concurrent.futures import ThreadPoolExecutor
@dataclass @dataclass
@ -21,9 +25,11 @@ class PreprocessConfig:
filter_distance_threshold: float = 0.001 filter_distance_threshold: float = 0.001
filter_min_neighbors: int = 6 filter_min_neighbors: int = 6
grid_overlap: float = 0.05 grid_overlap: float = 0.05
grid_size: float = 250
enable_filter: bool = True enable_filter: bool = True
enable_grid_division: bool = True enable_grid_division: bool = True
enable_visualization: bool = True enable_visualization: bool = True
enable_copy_images: bool = True
class ImagePreprocessor: class ImagePreprocessor:
@ -31,16 +37,26 @@ class ImagePreprocessor:
self.config = config self.config = config
self.logger = setup_logger(config.output_dir) self.logger = setup_logger(config.output_dir)
self.gps_points = [] self.gps_points = []
self.command_runner = CommandRunner(config.output_dir)
def extract_gps(self) -> List[Dict]: def extract_gps(self) -> pd.DataFrame:
"""提取GPS数据""" """提取GPS数据"""
self.logger.info("开始提取GPS数据") self.logger.info("开始提取GPS数据")
extractor = GPSExtractor(self.config.image_dir) extractor = GPSExtractor(self.config.image_dir)
self.gps_points = extractor.extract_all_gps() self.gps_points = extractor.extract_all_gps()
self.logger.info(f"成功提取 {len(self.gps_points)} 个GPS点") self.logger.info(f"成功提取 {len(self.gps_points)} 个GPS点")
return self.gps_points return self.gps_points
def time_filter(self) -> pd.DataFrame:
"""时间过滤"""
self.logger.info("开始时间过滤")
time_filter = TimeFilter(self.config.output_dir)
self.gps_points = time_filter.filter_by_date(self.gps_points)
self.logger.info(f"时间过滤后剩余 {len(self.gps_points)} 个GPS点")
return self.gps_points
def filter_points(self) -> List[Dict]: # TODO 过滤密集点算法需要改进
def filter_points(self) -> pd.DataFrame:
"""过滤GPS点""" """过滤GPS点"""
if not self.config.enable_filter: if not self.config.enable_filter:
return self.gps_points return self.gps_points
@ -67,29 +83,30 @@ class ImagePreprocessor:
self.logger.info(f"密集点过滤后剩余 {len(self.gps_points)} 个GPS点") self.logger.info(f"密集点过滤后剩余 {len(self.gps_points)} 个GPS点")
return self.gps_points return self.gps_points
def divide_grids(self) -> Dict[int, List[Dict]]: def divide_grids(self) -> Dict[int, pd.DataFrame]:
"""划分网格""" """划分网格"""
if not self.config.enable_grid_division: if not self.config.enable_grid_division:
return {0: self.gps_points} # 不划分网格时,所有点放在一个网格中 return {0: self.gps_points} # 不划分网格时,所有点放在一个网格中
self.logger.info(f"开始划分网格 (重叠率: {self.config.grid_overlap})") self.logger.info(f"开始划分网格 (重叠率: {self.config.grid_overlap})")
grid_divider = GridDivider(overlap=self.config.grid_overlap) grid_divider = GridDivider(overlap=self.config.grid_overlap)
grids = grid_divider.divide_grids(self.gps_points) grids = grid_divider.divide_grids(self.gps_points, grid_size=self.config.grid_size)
grid_points = grid_divider.assign_to_grids(self.gps_points, grids) grid_points = grid_divider.assign_to_grids(self.gps_points, grids)
self.logger.info(f"成功划分为 {len(grid_points)} 个网格") self.logger.info(f"成功划分为 {len(grid_points)} 个网格")
return grid_points return grid_points
def copy_images(self, grid_points: Dict[int, List[Dict]]): def copy_images(self, grid_points: Dict[int, pd.DataFrame]):
"""复制图像到目标文件夹""" """复制图像到目标文件夹"""
if not self.config.enable_copy_images:
return
self.logger.info("开始复制图像文件") self.logger.info("开始复制图像文件")
os.makedirs(self.config.output_dir, exist_ok=True)
for grid_idx, points in grid_points.items(): for grid_idx, points in grid_points.items():
if self.config.enable_grid_division: if self.config.enable_grid_division:
output_dir = os.path.join(self.config.output_dir, f'grid_{ output_dir = os.path.join(self.config.output_dir, f'grid_{grid_idx + 1}', 'project', 'images')
grid_idx + 1}', 'images')
else: else:
output_dir = os.path.join(self.config.output_dir, 'images') output_dir = os.path.join(self.config.output_dir, 'project', 'images')
os.makedirs(output_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True)
@ -106,38 +123,56 @@ class ImagePreprocessor:
self.logger.info("开始生成可视化结果") self.logger.info("开始生成可视化结果")
extractor = GPSExtractor(self.config.image_dir) extractor = GPSExtractor(self.config.image_dir)
original_points = extractor.extract_all_gps() original_points_df = extractor.extract_all_gps()
# 读取被过滤的图片列表
with open(os.path.join(self.config.output_dir, 'del_imgs.txt'), "r", encoding="utf-8") as file: with open(os.path.join(self.config.output_dir, 'del_imgs.txt'), "r", encoding="utf-8") as file:
filtered_file = [line.strip() for line in file] filtered_files = [line.strip() for line in file if line.strip()]
# 绘制散点图 # 创建一个新的图形
plt.figure(figsize=(10, 8)) plt.figure(figsize=(20, 16))
plt.scatter([p['lon'] for p in original_points],
[p['lat'] for p in original_points], # 绘制所有原始点
color='blue', label="Original Points", alpha=0.6) plt.scatter(original_points_df['lon'],
plt.scatter([p['lon'] for p in original_points if p['file'] in filtered_file], original_points_df['lat'],
[p['lat'] color='blue',
for p in original_points if p['file'] in filtered_file], label="Original Points",
color="red", label="Filtered Points", alpha=0.6) alpha=0.6)
# 绘制被过滤的点
filtered_points_df = original_points_df[original_points_df['file'].isin(filtered_files)]
plt.scatter(filtered_points_df['lon'],
filtered_points_df['lat'],
color="red",
label="Filtered Points",
alpha=0.6)
# 设置图形属性
plt.title("GPS Coordinates of Images", fontsize=14) plt.title("GPS Coordinates of Images", fontsize=14)
plt.xlabel("Longitude", fontsize=12) plt.xlabel("Longitude", fontsize=12)
plt.ylabel("Latitude", fontsize=12) plt.ylabel("Latitude", fontsize=12)
plt.grid(True) plt.grid(True)
plt.legend() plt.legend()
# 保存图形
plt.savefig(os.path.join(self.config.output_dir, 'filter_GPS.png')) plt.savefig(os.path.join(self.config.output_dir, 'filter_GPS.png'))
plt.close() plt.close()
self.logger.info("预处理结果图已保存") self.logger.info("预处理结果图已保存")
def process(self): def process(self):
"""执行完整的预处理流程""" """执行完整的预处理流程"""
try: try:
self.extract_gps() self.extract_gps()
self.time_filter()
self.filter_points() self.filter_points()
grid_points = self.divide_grids() grid_points = self.divide_grids()
self.copy_images(grid_points) self.copy_images(grid_points)
self.visualize_results() self.visualize_results()
self.logger.info("预处理任务完成") self.logger.info("预处理任务完成")
self.command_runner.run_grid_commands(
grid_points,
self.config.enable_grid_division
)
except Exception as e: except Exception as e:
self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True) self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True)
raise raise
@ -146,16 +181,17 @@ class ImagePreprocessor:
if __name__ == '__main__': if __name__ == '__main__':
# 创建配置 # 创建配置
config = PreprocessConfig( config = PreprocessConfig(
image_dir=r'C:\datasets\1815\output\grid_5\images', image_dir=r'C:\datasets\1815\images',
output_dir=r'C:\datasets\1815\output\grid_5', output_dir=r'C:\datasets\1815\output',
filter_grid_size=0.001, filter_grid_size=0.001,
filter_dense_distance_threshold=10, filter_dense_distance_threshold=10,
filter_distance_threshold=0.001, filter_distance_threshold=0.001,
filter_min_neighbors=6, filter_min_neighbors=6,
grid_overlap=0.05, grid_overlap=0.05,
enable_filter=False, enable_filter=True,
enable_grid_division=True, enable_grid_division=True,
enable_visualization=False enable_visualization=True,
enable_copy_images=True
) )
# 创建处理器并执行 # 创建处理器并执行

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,89 @@
import os
import logging
import subprocess
from typing import Dict
import pandas as pd
class CommandRunner:
"""执行网格处理命令的类"""
def __init__(self, output_dir: str):
"""
初始化命令执行器
Args:
output_dir: 输出目录路径
"""
self.output_dir = output_dir
self.logger = logging.getLogger('UAV_Preprocess.CommandRunner')
def run_grid_commands(self, grid_points: Dict[int, pd.DataFrame], enable_grid_division: bool = True):
"""
为每个网格顺序运行指定命令
Args:
grid_points: 网格点数据字典键为网格索引值为该网格的点数据
enable_grid_division: 是否启用网格划分
"""
if not enable_grid_division:
self._run_command(0)
return
self.logger.info("开始执行网格处理命令")
# 顺序执行每个网格的命令
for grid_idx in grid_points.keys():
try:
self._run_command(grid_idx)
except Exception as e:
self.logger.error(f"网格 {grid_idx + 1} 处理命令执行失败: {str(e)}")
raise # 如果一个网格失败,停止后续执行
def _run_command(self, grid_idx: int):
"""
执行单个网格的命令
Args:
grid_idx: 网格索引
Raises:
Exception: 当命令执行失败时抛出异常
"""
try:
# 确定网格目录和命令
grid_dir = os.path.join(self.output_dir, f'grid_{grid_idx + 1}')
command = f"docker run -ti --rm -v {grid_dir}:/datasets opendronemap/odm --project-path /datasets project --feature-quality lowest --force-gps"
self.logger.info(f"执行命令: {command} 在目录: {grid_dir}")
# 在指定目录下执行命令
process = subprocess.Popen(
command,
shell=True,
cwd=grid_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
# 获取命令输出
stdout, stderr = process.communicate()
# 检查命令执行结果
if process.returncode == 0:
self.logger.info(f"网格 {grid_idx + 1} 命令执行成功")
self.logger.debug(f"命令输出至日志文件")
with open(os.path.join(grid_dir, 'odm_success.log'), 'a', encoding='utf-8') as f:
f.write(f"{stdout}")
else:
self.logger.error(f"网格 {grid_idx + 1} 命令执行失败")
self.logger.error(f"错误信息输出至日志文件")
with open(os.path.join(grid_dir, 'odm_error.log'), 'a', encoding='utf-8') as f:
f.write(f"{stdout}")
f.write(f"\n错误日志:\n")
f.write(f"{stderr}")
raise Exception(f"命令执行失败: {stderr}")
except Exception as e:
self.logger.error(f"网格 {grid_idx + 1} 命令执行出错: {str(e)}")
raise

View File

@ -2,10 +2,12 @@ import os
from PIL import Image from PIL import Image
import piexif import piexif
import logging import logging
import pandas as pd
from datetime import datetime
class GPSExtractor: class GPSExtractor:
"""从图像文件提取GPS坐标""" """从图像文件提取GPS坐标和拍摄日期"""
def __init__(self, image_dir): def __init__(self, image_dir):
self.image_dir = image_dir self.image_dir = image_dir
@ -16,28 +18,60 @@ class GPSExtractor:
"""将DMS格式转换为十进制度""" """将DMS格式转换为十进制度"""
return dms[0][0] / dms[0][1] + (dms[1][0] / dms[1][1]) / 60 + (dms[2][0] / dms[2][1]) / 3600 return dms[0][0] / dms[0][1] + (dms[1][0] / dms[1][1]) / 60 + (dms[2][0] / dms[2][1]) / 3600
def get_gps(self, image_path): @staticmethod
"""提取单张图片的GPS坐标""" def _parse_datetime(datetime_str):
"""解析EXIF中的日期时间字符串"""
try:
# EXIF日期格式通常为 'YYYY:MM:DD HH:MM:SS'
return datetime.strptime(datetime_str.decode(), '%Y:%m:%d %H:%M:%S')
except Exception:
return None
def get_gps_and_date(self, image_path):
"""提取单张图片的GPS坐标和拍摄日期"""
try: try:
image = Image.open(image_path) image = Image.open(image_path)
exif_data = piexif.load(image.info['exif']) exif_data = piexif.load(image.info['exif'])
# 提取GPS信息
gps_info = exif_data.get("GPS", {}) gps_info = exif_data.get("GPS", {})
lat = lon = None
if gps_info: if gps_info:
lat = self._dms_to_decimal(gps_info.get(2, [])) lat = self._dms_to_decimal(gps_info.get(2, []))
lon = self._dms_to_decimal(gps_info.get(4, [])) lon = self._dms_to_decimal(gps_info.get(4, []))
self.logger.debug(f"成功提取图片GPS坐标: {image_path} - 纬度: {lat}, 经度: {lon}") self.logger.debug(f"成功提取图片GPS坐标: {image_path} - 纬度: {lat}, 经度: {lon}")
return lat, lon
else: # 提取拍摄日期
date_info = None
if "Exif" in exif_data:
# 优先使用DateTimeOriginal
date_str = exif_data["Exif"].get(36867) # DateTimeOriginal
if not date_str:
# 备选DateTime
date_str = exif_data["Exif"].get(36868) # DateTimeDigitized
if not date_str:
# 最后使用基本DateTime
date_str = exif_data["0th"].get(306) # DateTime
if date_str:
date_info = self._parse_datetime(date_str)
self.logger.debug(f"成功提取图片拍摄日期: {image_path} - {date_info}")
if not gps_info:
self.logger.warning(f"图片无GPS信息: {image_path}") self.logger.warning(f"图片无GPS信息: {image_path}")
return None, None if not date_info:
self.logger.warning(f"图片无拍摄日期信息: {image_path}")
return lat, lon, date_info
except Exception as e: except Exception as e:
self.logger.error(f"提取GPS坐标时发生错误: {image_path} - {str(e)}") self.logger.error(f"提取图片信息时发生错误: {image_path} - {str(e)}")
return None, None return None, None, None
def extract_all_gps(self): def extract_all_gps(self):
"""提取所有图片的GPS坐标""" """提取所有图片的GPS坐标和拍摄日期"""
self.logger.info(f"开始从目录提取GPS坐标: {self.image_dir}") self.logger.info(f"开始从目录提取GPS坐标和拍摄日期: {self.image_dir}")
gps_points = [] gps_data = []
total_images = 0 total_images = 0
successful_extractions = 0 successful_extractions = 0
@ -45,11 +79,15 @@ class GPSExtractor:
if image_file.lower().endswith('.jpg'): if image_file.lower().endswith('.jpg'):
total_images += 1 total_images += 1
image_path = os.path.join(self.image_dir, image_file) image_path = os.path.join(self.image_dir, image_file)
lat, lon = self.get_gps(image_path) lat, lon, date = self.get_gps_and_date(image_path)
if lat and lon: if lat and lon: # 仍然以GPS信息作为主要判断依据
successful_extractions += 1 successful_extractions += 1
gps_points.append( gps_data.append({
{'file': image_file, 'lat': lat, 'lon': lon}) 'file': image_file,
'lat': lat,
'lon': lon,
'date': date
})
self.logger.info(f"GPS坐标提取完成 - 总图片数: {total_images}, 成功提取: {successful_extractions}, 失败: {total_images - successful_extractions}") self.logger.info(f"GPS坐标和拍摄日期提取完成 - 总图片数: {total_images}, 成功提取: {successful_extractions}, 失败: {total_images - successful_extractions}")
return gps_points return pd.DataFrame(gps_data)

View File

@ -4,6 +4,7 @@ from itertools import combinations
import numpy as np import numpy as np
from scipy.spatial import KDTree from scipy.spatial import KDTree
import logging import logging
import pandas as pd
class GPSFilter: class GPSFilter:
@ -33,25 +34,23 @@ class GPSFilter:
grid_y = int((lon - min_lon) // grid_size) grid_y = int((lon - min_lon) // grid_size)
return grid_x, grid_y return grid_x, grid_y
def _get_distances(self, points, grid_size): def _get_distances(self, points_df, grid_size):
"""读取图片 GPS 坐标,计算点对之间的距离并排序""" """读取图片 GPS 坐标,计算点对之间的距离并排序"""
# 确定经纬度范围 # 确定经纬度范围
coords = np.array([[p['lat'], p['lon']] for p in points]) min_lat, max_lat = points_df['lat'].min(), points_df['lat'].max()
min_lat, min_lon = np.min(coords, axis=0) min_lon, max_lon = points_df['lon'].min(), points_df['lon'].max()
max_lat, max_lon = np.max(coords, axis=0)
self.logger.info( self.logger.info(
f"经纬度范围:纬度[{min_lat:.6f}, {max_lat:.6f}],纬度范围[{max_lat-min_lat:.6f}]" f"经纬度范围:纬度[{min_lat:.6f}, {max_lat:.6f}],纬度范围[{max_lat-min_lat:.6f}]"
f"经度[{min_lon:.6f}, {max_lon:.6f}],经度范围[{max_lon-min_lon:.6f}]") f"经度[{min_lon:.6f}, {max_lon:.6f}],经度范围[{max_lon-min_lon:.6f}]")
# 分配到网格 # 分配到网格
grid_map = {} grid_map = {}
for img_info_dict in points: for _, row in points_df.iterrows():
grid = self._assign_to_grid( grid = self._assign_to_grid(
img_info_dict['lat'], img_info_dict['lon'], grid_size, min_lat, min_lon) row['lat'], row['lon'], grid_size, min_lat, min_lon)
if grid not in grid_map: if grid not in grid_map:
grid_map[grid] = [] grid_map[grid] = []
grid_map[grid].append( grid_map[grid].append((row['file'], row['lat'], row['lon']))
(img_info_dict['file'], img_info_dict['lat'], img_info_dict['lon']))
self.logger.info(f"图像点已分配到 {len(grid_map)} 个网格中") self.logger.info(f"图像点已分配到 {len(grid_map)} 个网格中")
@ -68,14 +67,13 @@ class GPSFilter:
return sorted_distances return sorted_distances
def filter_dense_points(self, points, grid_size=0.001, distance_threshold=13): def filter_dense_points(self, points_df, grid_size=0.001, distance_threshold=13):
"""过滤密集点,根据提供的距离阈值""" """过滤密集点,根据提供的距离阈值"""
self.logger.info(f"开始过滤密集点 (网格大小: {grid_size}, 距离阈值: {distance_threshold}米)") self.logger.info(f"开始过滤密集点 (网格大小: {grid_size}, 距离阈值: {distance_threshold}米)")
# 获取每个网格中的图片的两两距离信息 sorted_distances = self._get_distances(points_df, grid_size)
sorted_distances = self._get_distances(points, grid_size)
to_del_imgs = [] to_del_imgs = []
"""遍历每个网格,删除网格中距离小于阈值的点""" """遍历每个网格,删除网格中距离小于阈值的点"""
for grid, distances in sorted_distances.items(): for grid, distances in sorted_distances.items():
grid_del_count = 0 grid_del_count = 0
@ -119,28 +117,28 @@ class GPSFilter:
for img in to_del_imgs: for img in to_del_imgs:
f.write(img+'\n') f.write(img+'\n')
filtered_points = [point for point in points if point['file'] not in to_del_imgs] filtered_df = points_df[~points_df['file'].isin(to_del_imgs)]
self.logger.info(f"密集点过滤完成,共删除 {len(to_del_imgs)} 个点,剩余 {len(filtered_points)} 个点") self.logger.info(f"密集点过滤完成,共删除 {len(to_del_imgs)} 个点,剩余 {len(filtered_df)} 个点")
return filtered_points return filtered_df
def filter_isolated_points(self, points, threshold_distance=0.001, min_neighbors=6): def filter_isolated_points(self, points_df, threshold_distance=0.001, min_neighbors=6):
"""过滤孤立点""" """过滤孤立点"""
self.logger.info(f"开始过滤孤立点 (距离阈值: {threshold_distance}, 最小邻居数: {min_neighbors})") self.logger.info(f"开始过滤孤立点 (距离阈值: {threshold_distance}, 最小邻居数: {min_neighbors})")
coords = np.array([[p['lat'], p['lon']] for p in points]) coords = points_df[['lat', 'lon']].values
kdtree = KDTree(coords) kdtree = KDTree(coords)
neighbors_count = [len(kdtree.query_ball_point( neighbors_count = [len(kdtree.query_ball_point(
coord, threshold_distance)) for coord in coords] coord, threshold_distance)) for coord in coords]
isolated_points = [] isolated_points = []
with open(self.log_file, 'a', encoding='utf-8') as f: with open(self.log_file, 'a', encoding='utf-8') as f:
for i, p in enumerate(points): for i, (_, row) in enumerate(points_df.iterrows()):
if neighbors_count[i] < min_neighbors: if neighbors_count[i] < min_neighbors:
isolated_points.append(p['file']) isolated_points.append(row['file'])
f.write(p['file']+'\n') f.write(row['file']+'\n')
self.logger.debug(f"删除孤立点: {p['file']} (邻居数: {neighbors_count[i]})") self.logger.debug(f"删除孤立点: {row['file']} (邻居数: {neighbors_count[i]})")
f.write('\n') f.write('\n')
filtered_points = [p for i, p in enumerate(points) if neighbors_count[i] >= min_neighbors] filtered_df = points_df[~points_df['file'].isin(isolated_points)]
self.logger.info(f"孤立点过滤完成,共删除 {len(isolated_points)} 个点,剩余 {len(filtered_points)} 个点") self.logger.info(f"孤立点过滤完成,共删除 {len(isolated_points)} 个点,剩余 {len(filtered_df)} 个点")
return filtered_points return filtered_df

View File

@ -1,4 +1,5 @@
import logging import logging
from geopy.distance import geodesic
class GridDivider: class GridDivider:
"""划分九宫格,并将图片分配到对应网格""" """划分九宫格,并将图片分配到对应网格"""
@ -8,58 +9,60 @@ class GridDivider:
self.logger = logging.getLogger('UAV_Preprocess.GridDivider') self.logger = logging.getLogger('UAV_Preprocess.GridDivider')
self.logger.info(f"初始化网格划分器,重叠率: {overlap}") self.logger.info(f"初始化网格划分器,重叠率: {overlap}")
def divide_grids(self, points): def divide_grids(self, points_df, grid_size=250):
"""计算边界框并划分九宫格""" """计算边界框并划分九宫格"""
self.logger.info("开始划分九宫格") self.logger.info("开始划分九宫格")
lats = [p['lat'] for p in points] min_lat, max_lat = points_df['lat'].min(), points_df['lat'].max()
lons = [p['lon'] for p in points] min_lon, max_lon = points_df['lon'].min(), points_df['lon'].max()
min_lat, max_lat = min(lats), max(lats)
min_lon, max_lon = min(lons), max(lons)
# 计算区域的实际距离(米)
width = geodesic((min_lat, min_lon), (min_lat, max_lon)).meters
height = geodesic((min_lat, min_lon), (max_lat, min_lon)).meters
self.logger.info( self.logger.info(
f"区域边界: 纬度[{min_lat:.6f}, {max_lat:.6f}], " f"区域宽度: {width:.2f}米, 高度: {height:.2f}"
f"经度[{min_lon:.6f}, {max_lon:.6f}]"
) )
lat_step = (max_lat - min_lat) / 3 # 计算需要划分的网格数量
lon_step = (max_lon - min_lon) / 3 num_grids_width = int(width / grid_size) if int(width / grid_size) > 0 else 1
num_grids_height = int(height / grid_size) if int(height / grid_size) > 0 else 1
self.logger.debug(f"网格步长: 纬度{lat_step:.6f}, 经度{lon_step:.6f}") # 计算每个网格对应的经纬度步长
lat_step = (max_lat - min_lat) / num_grids_height
lon_step = (max_lon - min_lon) / num_grids_width
grids = [] grids = []
for i in range(3): for i in range(num_grids_height):
for j in range(3): for j in range(num_grids_width):
grid_min_lat = min_lat + i * lat_step - self.overlap * lat_step grid_min_lat = min_lat + i * lat_step - self.overlap * lat_step
grid_max_lat = min_lat + \ grid_max_lat = min_lat + (i + 1) * lat_step + self.overlap * lat_step
(i + 1) * lat_step + self.overlap * lat_step
grid_min_lon = min_lon + j * lon_step - self.overlap * lon_step grid_min_lon = min_lon + j * lon_step - self.overlap * lon_step
grid_max_lon = min_lon + \ grid_max_lon = min_lon + (j + 1) * lon_step + self.overlap * lon_step
(j + 1) * lon_step + self.overlap * lon_step grids.append((grid_min_lat, grid_max_lat, grid_min_lon, grid_max_lon))
grids.append((grid_min_lat, grid_max_lat,
grid_min_lon, grid_max_lon))
self.logger.debug( self.logger.debug(
f"网格[{i},{j}]: 纬度[{grid_min_lat:.6f}, {grid_max_lat:.6f}], " f"网格[{i},{j}]: 纬度[{grid_min_lat:.6f}, {grid_max_lat:.6f}], "
f"经度[{grid_min_lon:.6f}, {grid_max_lon:.6f}]" f"经度[{grid_min_lon:.6f}, {grid_max_lon:.6f}]"
) )
self.logger.info(f"成功划分为 {len(grids)} 个网格") self.logger.info(f"成功划分为 {len(grids)} 个网格 ({num_grids_width}x{num_grids_height})")
return grids return grids
def assign_to_grids(self, points, grids): def assign_to_grids(self, points_df, grids):
"""将点分配到对应网格""" """将点分配到对应网格"""
self.logger.info(f"开始将 {len(points)} 个点分配到网格中") self.logger.info(f"开始将 {len(points_df)} 个点分配到网格中")
grid_points = {i: [] for i in range(len(grids))} grid_points = {i: [] for i in range(len(grids))}
points_assigned = 0 points_assigned = 0
multiple_grid_points = 0 multiple_grid_points = 0
for point in points: for _, point in points_df.iterrows():
point_assigned = False point_assigned = False
for i, (min_lat, max_lat, min_lon, max_lon) in enumerate(grids): for i, (min_lat, max_lat, min_lon, max_lon) in enumerate(grids):
if min_lat <= point['lat'] <= max_lat and min_lon <= point['lon'] <= max_lon: if min_lat <= point['lat'] <= max_lat and min_lon <= point['lon'] <= max_lon:
grid_points[i].append(point) grid_points[i].append(point.to_dict())
if point_assigned: if point_assigned:
multiple_grid_points += 1 multiple_grid_points += 1
else: else:
@ -76,7 +79,7 @@ class GridDivider:
self.logger.info(f"网格 {grid_idx} 包含 {len(points)} 个点") self.logger.info(f"网格 {grid_idx} 包含 {len(points)} 个点")
self.logger.info( self.logger.info(
f"点分配完成: 总点数 {len(points)}, " f"点分配完成: 总点数 {len(points_df)}, "
f"成功分配 {points_assigned} 个点, " f"成功分配 {points_assigned} 个点, "
f"{multiple_grid_points} 个点被分配到多个网格" f"{multiple_grid_points} 个点被分配到多个网格"
) )

73
preprocess/time_filter.py Normal file
View File

@ -0,0 +1,73 @@
import os
import logging
import pandas as pd
from datetime import datetime, timedelta
class TimeFilter:
"""基于拍摄时间过滤图片"""
def __init__(self, output_dir):
self.log_file = os.path.join(output_dir, 'del_imgs.txt')
self.logger = logging.getLogger('UAV_Preprocess.TimeFilter')
self.time_threshold = timedelta(minutes=5) # 5分钟阈值
def filter_by_date(self, points_df: pd.DataFrame) -> pd.DataFrame:
"""根据拍摄时间过滤图片
如果相邻两张图片的拍摄时间差超过5分钟过滤掉后续所有图片
Args:
points_df: 包含图片信息的DataFrame必须包含'file''date'
Returns:
过滤后的DataFrame
"""
self.logger.info("开始基于拍摄时间进行过滤")
# 确保date列存在且不为空
if 'date' not in points_df.columns:
self.logger.error("输入数据中缺少date列")
return points_df
# 删除date为空的行
points_df = points_df.dropna(subset=['date'])
if len(points_df) == 0:
self.logger.warning("没有有效的拍摄时间数据")
return points_df
# 按时间排序
points_df = points_df.sort_values('date')
self.logger.info(f"排序后的时间范围: {points_df['date'].min()}{points_df['date'].max()}")
# 计算时间差
time_diffs = points_df['date'].diff()
# 找到第一个时间差超过阈值的位置
break_idx = None
for idx, time_diff in enumerate(time_diffs):
if time_diff and time_diff > self.time_threshold:
break_idx = idx
break_time = points_df.iloc[idx]['date']
self.logger.info(f"在索引 {idx} 处发现时间断点,时间差为 {time_diff}")
self.logger.info(f"断点时间: {break_time}")
break
# 如果找到断点,过滤掉后续图片
if break_idx is not None:
to_delete = points_df.iloc[break_idx:]['file'].tolist()
self.logger.info(f"将删除 {len(to_delete)} 张断点后的图片")
# 记录被删除的图片
with open(self.log_file, 'a', encoding='utf-8') as f:
for img in to_delete:
f.write(img + '\n')
f.write('\n')
# 保留断点之前的图片
filtered_df = points_df.iloc[:break_idx]
self.logger.info(f"时间过滤完成,保留了 {len(filtered_df)} 张图片")
return filtered_df
self.logger.info("未发现时间断点,保留所有图片")
return points_df