数据结构修改为DataFrame,过滤算法更新,增加run command模块
This commit is contained in:
parent
22a041db6c
commit
9e30a58fa9
@ -1,7 +1,9 @@
|
||||
from gps_extractor import GPSExtractor
|
||||
from gps_filter import GPSFilter
|
||||
from grid_divider import GridDivider
|
||||
from logger import setup_logger
|
||||
from preprocess.gps_extractor import GPSExtractor
|
||||
from preprocess.time_filter import TimeFilter
|
||||
from preprocess.gps_filter import GPSFilter
|
||||
from preprocess.grid_divider import GridDivider
|
||||
from preprocess.logger import setup_logger
|
||||
from preprocess.command_runner import CommandRunner
|
||||
import os
|
||||
import pandas as pd
|
||||
import shutil
|
||||
@ -9,6 +11,8 @@ import matplotlib.pyplot as plt
|
||||
from typing import List, Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
from tqdm import tqdm
|
||||
import subprocess
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -21,9 +25,11 @@ class PreprocessConfig:
|
||||
filter_distance_threshold: float = 0.001
|
||||
filter_min_neighbors: int = 6
|
||||
grid_overlap: float = 0.05
|
||||
grid_size: float = 250
|
||||
enable_filter: bool = True
|
||||
enable_grid_division: bool = True
|
||||
enable_visualization: bool = True
|
||||
enable_copy_images: bool = True
|
||||
|
||||
|
||||
class ImagePreprocessor:
|
||||
@ -31,8 +37,9 @@ class ImagePreprocessor:
|
||||
self.config = config
|
||||
self.logger = setup_logger(config.output_dir)
|
||||
self.gps_points = []
|
||||
self.command_runner = CommandRunner(config.output_dir)
|
||||
|
||||
def extract_gps(self) -> List[Dict]:
|
||||
def extract_gps(self) -> pd.DataFrame:
|
||||
"""提取GPS数据"""
|
||||
self.logger.info("开始提取GPS数据")
|
||||
extractor = GPSExtractor(self.config.image_dir)
|
||||
@ -40,7 +47,16 @@ class ImagePreprocessor:
|
||||
self.logger.info(f"成功提取 {len(self.gps_points)} 个GPS点")
|
||||
return self.gps_points
|
||||
|
||||
def filter_points(self) -> List[Dict]:
|
||||
def time_filter(self) -> pd.DataFrame:
|
||||
"""时间过滤"""
|
||||
self.logger.info("开始时间过滤")
|
||||
time_filter = TimeFilter(self.config.output_dir)
|
||||
self.gps_points = time_filter.filter_by_date(self.gps_points)
|
||||
self.logger.info(f"时间过滤后剩余 {len(self.gps_points)} 个GPS点")
|
||||
return self.gps_points
|
||||
|
||||
# TODO 过滤密集点算法需要改进
|
||||
def filter_points(self) -> pd.DataFrame:
|
||||
"""过滤GPS点"""
|
||||
if not self.config.enable_filter:
|
||||
return self.gps_points
|
||||
@ -67,29 +83,30 @@ class ImagePreprocessor:
|
||||
self.logger.info(f"密集点过滤后剩余 {len(self.gps_points)} 个GPS点")
|
||||
return self.gps_points
|
||||
|
||||
def divide_grids(self) -> Dict[int, List[Dict]]:
|
||||
def divide_grids(self) -> Dict[int, pd.DataFrame]:
|
||||
"""划分网格"""
|
||||
if not self.config.enable_grid_division:
|
||||
return {0: self.gps_points} # 不划分网格时,所有点放在一个网格中
|
||||
|
||||
self.logger.info(f"开始划分网格 (重叠率: {self.config.grid_overlap})")
|
||||
grid_divider = GridDivider(overlap=self.config.grid_overlap)
|
||||
grids = grid_divider.divide_grids(self.gps_points)
|
||||
grids = grid_divider.divide_grids(self.gps_points, grid_size=self.config.grid_size)
|
||||
grid_points = grid_divider.assign_to_grids(self.gps_points, grids)
|
||||
self.logger.info(f"成功划分为 {len(grid_points)} 个网格")
|
||||
return grid_points
|
||||
|
||||
def copy_images(self, grid_points: Dict[int, List[Dict]]):
|
||||
def copy_images(self, grid_points: Dict[int, pd.DataFrame]):
|
||||
"""复制图像到目标文件夹"""
|
||||
if not self.config.enable_copy_images:
|
||||
return
|
||||
|
||||
self.logger.info("开始复制图像文件")
|
||||
os.makedirs(self.config.output_dir, exist_ok=True)
|
||||
|
||||
for grid_idx, points in grid_points.items():
|
||||
if self.config.enable_grid_division:
|
||||
output_dir = os.path.join(self.config.output_dir, f'grid_{
|
||||
grid_idx + 1}', 'images')
|
||||
output_dir = os.path.join(self.config.output_dir, f'grid_{grid_idx + 1}', 'project', 'images')
|
||||
else:
|
||||
output_dir = os.path.join(self.config.output_dir, 'images')
|
||||
output_dir = os.path.join(self.config.output_dir, 'project', 'images')
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
@ -106,25 +123,38 @@ class ImagePreprocessor:
|
||||
|
||||
self.logger.info("开始生成可视化结果")
|
||||
extractor = GPSExtractor(self.config.image_dir)
|
||||
original_points = extractor.extract_all_gps()
|
||||
original_points_df = extractor.extract_all_gps()
|
||||
|
||||
# 读取被过滤的图片列表
|
||||
with open(os.path.join(self.config.output_dir, 'del_imgs.txt'), "r", encoding="utf-8") as file:
|
||||
filtered_file = [line.strip() for line in file]
|
||||
filtered_files = [line.strip() for line in file if line.strip()]
|
||||
|
||||
# 绘制散点图
|
||||
plt.figure(figsize=(10, 8))
|
||||
plt.scatter([p['lon'] for p in original_points],
|
||||
[p['lat'] for p in original_points],
|
||||
color='blue', label="Original Points", alpha=0.6)
|
||||
plt.scatter([p['lon'] for p in original_points if p['file'] in filtered_file],
|
||||
[p['lat']
|
||||
for p in original_points if p['file'] in filtered_file],
|
||||
color="red", label="Filtered Points", alpha=0.6)
|
||||
# 创建一个新的图形
|
||||
plt.figure(figsize=(20, 16))
|
||||
|
||||
# 绘制所有原始点
|
||||
plt.scatter(original_points_df['lon'],
|
||||
original_points_df['lat'],
|
||||
color='blue',
|
||||
label="Original Points",
|
||||
alpha=0.6)
|
||||
|
||||
# 绘制被过滤的点
|
||||
filtered_points_df = original_points_df[original_points_df['file'].isin(filtered_files)]
|
||||
plt.scatter(filtered_points_df['lon'],
|
||||
filtered_points_df['lat'],
|
||||
color="red",
|
||||
label="Filtered Points",
|
||||
alpha=0.6)
|
||||
|
||||
# 设置图形属性
|
||||
plt.title("GPS Coordinates of Images", fontsize=14)
|
||||
plt.xlabel("Longitude", fontsize=12)
|
||||
plt.ylabel("Latitude", fontsize=12)
|
||||
plt.grid(True)
|
||||
plt.legend()
|
||||
|
||||
# 保存图形
|
||||
plt.savefig(os.path.join(self.config.output_dir, 'filter_GPS.png'))
|
||||
plt.close()
|
||||
self.logger.info("预处理结果图已保存")
|
||||
@ -133,11 +163,16 @@ class ImagePreprocessor:
|
||||
"""执行完整的预处理流程"""
|
||||
try:
|
||||
self.extract_gps()
|
||||
self.time_filter()
|
||||
self.filter_points()
|
||||
grid_points = self.divide_grids()
|
||||
self.copy_images(grid_points)
|
||||
self.visualize_results()
|
||||
self.logger.info("预处理任务完成")
|
||||
self.command_runner.run_grid_commands(
|
||||
grid_points,
|
||||
self.config.enable_grid_division
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True)
|
||||
raise
|
||||
@ -146,16 +181,17 @@ class ImagePreprocessor:
|
||||
if __name__ == '__main__':
|
||||
# 创建配置
|
||||
config = PreprocessConfig(
|
||||
image_dir=r'C:\datasets\1815\output\grid_5\images',
|
||||
output_dir=r'C:\datasets\1815\output\grid_5',
|
||||
image_dir=r'C:\datasets\1815\images',
|
||||
output_dir=r'C:\datasets\1815\output',
|
||||
filter_grid_size=0.001,
|
||||
filter_dense_distance_threshold=10,
|
||||
filter_distance_threshold=0.001,
|
||||
filter_min_neighbors=6,
|
||||
grid_overlap=0.05,
|
||||
enable_filter=False,
|
||||
enable_filter=True,
|
||||
enable_grid_division=True,
|
||||
enable_visualization=False
|
||||
enable_visualization=True,
|
||||
enable_copy_images=True
|
||||
)
|
||||
|
||||
# 创建处理器并执行
|
||||
|
BIN
preprocess/__pycache__/command_runner.cpython-312.pyc
Normal file
BIN
preprocess/__pycache__/command_runner.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
preprocess/__pycache__/time_filter.cpython-312.pyc
Normal file
BIN
preprocess/__pycache__/time_filter.cpython-312.pyc
Normal file
Binary file not shown.
89
preprocess/command_runner.py
Normal file
89
preprocess/command_runner.py
Normal file
@ -0,0 +1,89 @@
|
||||
import os
|
||||
import logging
|
||||
import subprocess
|
||||
from typing import Dict
|
||||
import pandas as pd
|
||||
|
||||
class CommandRunner:
|
||||
"""执行网格处理命令的类"""
|
||||
|
||||
def __init__(self, output_dir: str):
|
||||
"""
|
||||
初始化命令执行器
|
||||
|
||||
Args:
|
||||
output_dir: 输出目录路径
|
||||
"""
|
||||
self.output_dir = output_dir
|
||||
self.logger = logging.getLogger('UAV_Preprocess.CommandRunner')
|
||||
|
||||
def run_grid_commands(self, grid_points: Dict[int, pd.DataFrame], enable_grid_division: bool = True):
|
||||
"""
|
||||
为每个网格顺序运行指定命令
|
||||
|
||||
Args:
|
||||
grid_points: 网格点数据字典,键为网格索引,值为该网格的点数据
|
||||
enable_grid_division: 是否启用网格划分
|
||||
"""
|
||||
if not enable_grid_division:
|
||||
self._run_command(0)
|
||||
return
|
||||
|
||||
self.logger.info("开始执行网格处理命令")
|
||||
|
||||
# 顺序执行每个网格的命令
|
||||
for grid_idx in grid_points.keys():
|
||||
try:
|
||||
self._run_command(grid_idx)
|
||||
except Exception as e:
|
||||
self.logger.error(f"网格 {grid_idx + 1} 处理命令执行失败: {str(e)}")
|
||||
raise # 如果一个网格失败,停止后续执行
|
||||
|
||||
def _run_command(self, grid_idx: int):
|
||||
"""
|
||||
执行单个网格的命令
|
||||
|
||||
Args:
|
||||
grid_idx: 网格索引
|
||||
|
||||
Raises:
|
||||
Exception: 当命令执行失败时抛出异常
|
||||
"""
|
||||
try:
|
||||
# 确定网格目录和命令
|
||||
grid_dir = os.path.join(self.output_dir, f'grid_{grid_idx + 1}')
|
||||
command = f"docker run -ti --rm -v {grid_dir}:/datasets opendronemap/odm --project-path /datasets project --feature-quality lowest --force-gps"
|
||||
|
||||
self.logger.info(f"执行命令: {command} 在目录: {grid_dir}")
|
||||
|
||||
# 在指定目录下执行命令
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
shell=True,
|
||||
cwd=grid_dir,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
|
||||
# 获取命令输出
|
||||
stdout, stderr = process.communicate()
|
||||
|
||||
# 检查命令执行结果
|
||||
if process.returncode == 0:
|
||||
self.logger.info(f"网格 {grid_idx + 1} 命令执行成功")
|
||||
self.logger.debug(f"命令输出至日志文件")
|
||||
with open(os.path.join(grid_dir, 'odm_success.log'), 'a', encoding='utf-8') as f:
|
||||
f.write(f"{stdout}")
|
||||
else:
|
||||
self.logger.error(f"网格 {grid_idx + 1} 命令执行失败")
|
||||
self.logger.error(f"错误信息输出至日志文件")
|
||||
with open(os.path.join(grid_dir, 'odm_error.log'), 'a', encoding='utf-8') as f:
|
||||
f.write(f"{stdout}")
|
||||
f.write(f"\n错误日志:\n")
|
||||
f.write(f"{stderr}")
|
||||
raise Exception(f"命令执行失败: {stderr}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"网格 {grid_idx + 1} 命令执行出错: {str(e)}")
|
||||
raise
|
@ -2,10 +2,12 @@ import os
|
||||
from PIL import Image
|
||||
import piexif
|
||||
import logging
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class GPSExtractor:
|
||||
"""从图像文件提取GPS坐标"""
|
||||
"""从图像文件提取GPS坐标和拍摄日期"""
|
||||
|
||||
def __init__(self, image_dir):
|
||||
self.image_dir = image_dir
|
||||
@ -16,28 +18,60 @@ class GPSExtractor:
|
||||
"""将DMS格式转换为十进制度"""
|
||||
return dms[0][0] / dms[0][1] + (dms[1][0] / dms[1][1]) / 60 + (dms[2][0] / dms[2][1]) / 3600
|
||||
|
||||
def get_gps(self, image_path):
|
||||
"""提取单张图片的GPS坐标"""
|
||||
@staticmethod
|
||||
def _parse_datetime(datetime_str):
|
||||
"""解析EXIF中的日期时间字符串"""
|
||||
try:
|
||||
# EXIF日期格式通常为 'YYYY:MM:DD HH:MM:SS'
|
||||
return datetime.strptime(datetime_str.decode(), '%Y:%m:%d %H:%M:%S')
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def get_gps_and_date(self, image_path):
|
||||
"""提取单张图片的GPS坐标和拍摄日期"""
|
||||
try:
|
||||
image = Image.open(image_path)
|
||||
exif_data = piexif.load(image.info['exif'])
|
||||
|
||||
# 提取GPS信息
|
||||
gps_info = exif_data.get("GPS", {})
|
||||
lat = lon = None
|
||||
if gps_info:
|
||||
lat = self._dms_to_decimal(gps_info.get(2, []))
|
||||
lon = self._dms_to_decimal(gps_info.get(4, []))
|
||||
self.logger.debug(f"成功提取图片GPS坐标: {image_path} - 纬度: {lat}, 经度: {lon}")
|
||||
return lat, lon
|
||||
else:
|
||||
|
||||
# 提取拍摄日期
|
||||
date_info = None
|
||||
if "Exif" in exif_data:
|
||||
# 优先使用DateTimeOriginal
|
||||
date_str = exif_data["Exif"].get(36867) # DateTimeOriginal
|
||||
if not date_str:
|
||||
# 备选DateTime
|
||||
date_str = exif_data["Exif"].get(36868) # DateTimeDigitized
|
||||
if not date_str:
|
||||
# 最后使用基本DateTime
|
||||
date_str = exif_data["0th"].get(306) # DateTime
|
||||
|
||||
if date_str:
|
||||
date_info = self._parse_datetime(date_str)
|
||||
self.logger.debug(f"成功提取图片拍摄日期: {image_path} - {date_info}")
|
||||
|
||||
if not gps_info:
|
||||
self.logger.warning(f"图片无GPS信息: {image_path}")
|
||||
return None, None
|
||||
if not date_info:
|
||||
self.logger.warning(f"图片无拍摄日期信息: {image_path}")
|
||||
|
||||
return lat, lon, date_info
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"提取GPS坐标时发生错误: {image_path} - {str(e)}")
|
||||
return None, None
|
||||
self.logger.error(f"提取图片信息时发生错误: {image_path} - {str(e)}")
|
||||
return None, None, None
|
||||
|
||||
def extract_all_gps(self):
|
||||
"""提取所有图片的GPS坐标"""
|
||||
self.logger.info(f"开始从目录提取GPS坐标: {self.image_dir}")
|
||||
gps_points = []
|
||||
"""提取所有图片的GPS坐标和拍摄日期"""
|
||||
self.logger.info(f"开始从目录提取GPS坐标和拍摄日期: {self.image_dir}")
|
||||
gps_data = []
|
||||
total_images = 0
|
||||
successful_extractions = 0
|
||||
|
||||
@ -45,11 +79,15 @@ class GPSExtractor:
|
||||
if image_file.lower().endswith('.jpg'):
|
||||
total_images += 1
|
||||
image_path = os.path.join(self.image_dir, image_file)
|
||||
lat, lon = self.get_gps(image_path)
|
||||
if lat and lon:
|
||||
lat, lon, date = self.get_gps_and_date(image_path)
|
||||
if lat and lon: # 仍然以GPS信息作为主要判断依据
|
||||
successful_extractions += 1
|
||||
gps_points.append(
|
||||
{'file': image_file, 'lat': lat, 'lon': lon})
|
||||
gps_data.append({
|
||||
'file': image_file,
|
||||
'lat': lat,
|
||||
'lon': lon,
|
||||
'date': date
|
||||
})
|
||||
|
||||
self.logger.info(f"GPS坐标提取完成 - 总图片数: {total_images}, 成功提取: {successful_extractions}, 失败: {total_images - successful_extractions}")
|
||||
return gps_points
|
||||
self.logger.info(f"GPS坐标和拍摄日期提取完成 - 总图片数: {total_images}, 成功提取: {successful_extractions}, 失败: {total_images - successful_extractions}")
|
||||
return pd.DataFrame(gps_data)
|
||||
|
@ -4,6 +4,7 @@ from itertools import combinations
|
||||
import numpy as np
|
||||
from scipy.spatial import KDTree
|
||||
import logging
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class GPSFilter:
|
||||
@ -33,25 +34,23 @@ class GPSFilter:
|
||||
grid_y = int((lon - min_lon) // grid_size)
|
||||
return grid_x, grid_y
|
||||
|
||||
def _get_distances(self, points, grid_size):
|
||||
def _get_distances(self, points_df, grid_size):
|
||||
"""读取图片 GPS 坐标,计算点对之间的距离并排序"""
|
||||
# 确定经纬度范围
|
||||
coords = np.array([[p['lat'], p['lon']] for p in points])
|
||||
min_lat, min_lon = np.min(coords, axis=0)
|
||||
max_lat, max_lon = np.max(coords, axis=0)
|
||||
min_lat, max_lat = points_df['lat'].min(), points_df['lat'].max()
|
||||
min_lon, max_lon = points_df['lon'].min(), points_df['lon'].max()
|
||||
self.logger.info(
|
||||
f"经纬度范围:纬度[{min_lat:.6f}, {max_lat:.6f}],纬度范围[{max_lat-min_lat:.6f}],"
|
||||
f"经度[{min_lon:.6f}, {max_lon:.6f}],经度范围[{max_lon-min_lon:.6f}]")
|
||||
|
||||
# 分配到网格
|
||||
grid_map = {}
|
||||
for img_info_dict in points:
|
||||
for _, row in points_df.iterrows():
|
||||
grid = self._assign_to_grid(
|
||||
img_info_dict['lat'], img_info_dict['lon'], grid_size, min_lat, min_lon)
|
||||
row['lat'], row['lon'], grid_size, min_lat, min_lon)
|
||||
if grid not in grid_map:
|
||||
grid_map[grid] = []
|
||||
grid_map[grid].append(
|
||||
(img_info_dict['file'], img_info_dict['lat'], img_info_dict['lon']))
|
||||
grid_map[grid].append((row['file'], row['lat'], row['lon']))
|
||||
|
||||
self.logger.info(f"图像点已分配到 {len(grid_map)} 个网格中")
|
||||
|
||||
@ -68,14 +67,13 @@ class GPSFilter:
|
||||
|
||||
return sorted_distances
|
||||
|
||||
def filter_dense_points(self, points, grid_size=0.001, distance_threshold=13):
|
||||
def filter_dense_points(self, points_df, grid_size=0.001, distance_threshold=13):
|
||||
"""过滤密集点,根据提供的距离阈值"""
|
||||
self.logger.info(f"开始过滤密集点 (网格大小: {grid_size}, 距离阈值: {distance_threshold}米)")
|
||||
|
||||
# 获取每个网格中的图片的两两距离信息
|
||||
sorted_distances = self._get_distances(points, grid_size)
|
||||
|
||||
sorted_distances = self._get_distances(points_df, grid_size)
|
||||
to_del_imgs = []
|
||||
|
||||
"""遍历每个网格,删除网格中距离小于阈值的点"""
|
||||
for grid, distances in sorted_distances.items():
|
||||
grid_del_count = 0
|
||||
@ -119,28 +117,28 @@ class GPSFilter:
|
||||
for img in to_del_imgs:
|
||||
f.write(img+'\n')
|
||||
|
||||
filtered_points = [point for point in points if point['file'] not in to_del_imgs]
|
||||
self.logger.info(f"密集点过滤完成,共删除 {len(to_del_imgs)} 个点,剩余 {len(filtered_points)} 个点")
|
||||
return filtered_points
|
||||
filtered_df = points_df[~points_df['file'].isin(to_del_imgs)]
|
||||
self.logger.info(f"密集点过滤完成,共删除 {len(to_del_imgs)} 个点,剩余 {len(filtered_df)} 个点")
|
||||
return filtered_df
|
||||
|
||||
def filter_isolated_points(self, points, threshold_distance=0.001, min_neighbors=6):
|
||||
def filter_isolated_points(self, points_df, threshold_distance=0.001, min_neighbors=6):
|
||||
"""过滤孤立点"""
|
||||
self.logger.info(f"开始过滤孤立点 (距离阈值: {threshold_distance}, 最小邻居数: {min_neighbors})")
|
||||
|
||||
coords = np.array([[p['lat'], p['lon']] for p in points])
|
||||
coords = points_df[['lat', 'lon']].values
|
||||
kdtree = KDTree(coords)
|
||||
neighbors_count = [len(kdtree.query_ball_point(
|
||||
coord, threshold_distance)) for coord in coords]
|
||||
|
||||
isolated_points = []
|
||||
with open(self.log_file, 'a', encoding='utf-8') as f:
|
||||
for i, p in enumerate(points):
|
||||
for i, (_, row) in enumerate(points_df.iterrows()):
|
||||
if neighbors_count[i] < min_neighbors:
|
||||
isolated_points.append(p['file'])
|
||||
f.write(p['file']+'\n')
|
||||
self.logger.debug(f"删除孤立点: {p['file']} (邻居数: {neighbors_count[i]})")
|
||||
isolated_points.append(row['file'])
|
||||
f.write(row['file']+'\n')
|
||||
self.logger.debug(f"删除孤立点: {row['file']} (邻居数: {neighbors_count[i]})")
|
||||
f.write('\n')
|
||||
|
||||
filtered_points = [p for i, p in enumerate(points) if neighbors_count[i] >= min_neighbors]
|
||||
self.logger.info(f"孤立点过滤完成,共删除 {len(isolated_points)} 个点,剩余 {len(filtered_points)} 个点")
|
||||
return filtered_points
|
||||
filtered_df = points_df[~points_df['file'].isin(isolated_points)]
|
||||
self.logger.info(f"孤立点过滤完成,共删除 {len(isolated_points)} 个点,剩余 {len(filtered_df)} 个点")
|
||||
return filtered_df
|
||||
|
@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from geopy.distance import geodesic
|
||||
|
||||
class GridDivider:
|
||||
"""划分九宫格,并将图片分配到对应网格"""
|
||||
@ -8,58 +9,60 @@ class GridDivider:
|
||||
self.logger = logging.getLogger('UAV_Preprocess.GridDivider')
|
||||
self.logger.info(f"初始化网格划分器,重叠率: {overlap}")
|
||||
|
||||
def divide_grids(self, points):
|
||||
def divide_grids(self, points_df, grid_size=250):
|
||||
"""计算边界框并划分九宫格"""
|
||||
self.logger.info("开始划分九宫格")
|
||||
|
||||
lats = [p['lat'] for p in points]
|
||||
lons = [p['lon'] for p in points]
|
||||
min_lat, max_lat = min(lats), max(lats)
|
||||
min_lon, max_lon = min(lons), max(lons)
|
||||
min_lat, max_lat = points_df['lat'].min(), points_df['lat'].max()
|
||||
min_lon, max_lon = points_df['lon'].min(), points_df['lon'].max()
|
||||
|
||||
# 计算区域的实际距离(米)
|
||||
width = geodesic((min_lat, min_lon), (min_lat, max_lon)).meters
|
||||
height = geodesic((min_lat, min_lon), (max_lat, min_lon)).meters
|
||||
|
||||
self.logger.info(
|
||||
f"区域边界: 纬度[{min_lat:.6f}, {max_lat:.6f}], "
|
||||
f"经度[{min_lon:.6f}, {max_lon:.6f}]"
|
||||
f"区域宽度: {width:.2f}米, 高度: {height:.2f}米"
|
||||
)
|
||||
|
||||
lat_step = (max_lat - min_lat) / 3
|
||||
lon_step = (max_lon - min_lon) / 3
|
||||
# 计算需要划分的网格数量
|
||||
num_grids_width = int(width / grid_size) if int(width / grid_size) > 0 else 1
|
||||
num_grids_height = int(height / grid_size) if int(height / grid_size) > 0 else 1
|
||||
|
||||
# 计算每个网格对应的经纬度步长
|
||||
lat_step = (max_lat - min_lat) / num_grids_height
|
||||
lon_step = (max_lon - min_lon) / num_grids_width
|
||||
|
||||
self.logger.debug(f"网格步长: 纬度{lat_step:.6f}, 经度{lon_step:.6f}")
|
||||
|
||||
grids = []
|
||||
for i in range(3):
|
||||
for j in range(3):
|
||||
for i in range(num_grids_height):
|
||||
for j in range(num_grids_width):
|
||||
grid_min_lat = min_lat + i * lat_step - self.overlap * lat_step
|
||||
grid_max_lat = min_lat + \
|
||||
(i + 1) * lat_step + self.overlap * lat_step
|
||||
grid_max_lat = min_lat + (i + 1) * lat_step + self.overlap * lat_step
|
||||
grid_min_lon = min_lon + j * lon_step - self.overlap * lon_step
|
||||
grid_max_lon = min_lon + \
|
||||
(j + 1) * lon_step + self.overlap * lon_step
|
||||
grids.append((grid_min_lat, grid_max_lat,
|
||||
grid_min_lon, grid_max_lon))
|
||||
grid_max_lon = min_lon + (j + 1) * lon_step + self.overlap * lon_step
|
||||
grids.append((grid_min_lat, grid_max_lat, grid_min_lon, grid_max_lon))
|
||||
|
||||
self.logger.debug(
|
||||
f"网格[{i},{j}]: 纬度[{grid_min_lat:.6f}, {grid_max_lat:.6f}], "
|
||||
f"经度[{grid_min_lon:.6f}, {grid_max_lon:.6f}]"
|
||||
)
|
||||
|
||||
self.logger.info(f"成功划分为 {len(grids)} 个网格")
|
||||
self.logger.info(f"成功划分为 {len(grids)} 个网格 ({num_grids_width}x{num_grids_height})")
|
||||
return grids
|
||||
|
||||
def assign_to_grids(self, points, grids):
|
||||
def assign_to_grids(self, points_df, grids):
|
||||
"""将点分配到对应网格"""
|
||||
self.logger.info(f"开始将 {len(points)} 个点分配到网格中")
|
||||
self.logger.info(f"开始将 {len(points_df)} 个点分配到网格中")
|
||||
|
||||
grid_points = {i: [] for i in range(len(grids))}
|
||||
points_assigned = 0
|
||||
multiple_grid_points = 0
|
||||
|
||||
for point in points:
|
||||
for _, point in points_df.iterrows():
|
||||
point_assigned = False
|
||||
for i, (min_lat, max_lat, min_lon, max_lon) in enumerate(grids):
|
||||
if min_lat <= point['lat'] <= max_lat and min_lon <= point['lon'] <= max_lon:
|
||||
grid_points[i].append(point)
|
||||
grid_points[i].append(point.to_dict())
|
||||
if point_assigned:
|
||||
multiple_grid_points += 1
|
||||
else:
|
||||
@ -76,7 +79,7 @@ class GridDivider:
|
||||
self.logger.info(f"网格 {grid_idx} 包含 {len(points)} 个点")
|
||||
|
||||
self.logger.info(
|
||||
f"点分配完成: 总点数 {len(points)}, "
|
||||
f"点分配完成: 总点数 {len(points_df)}, "
|
||||
f"成功分配 {points_assigned} 个点, "
|
||||
f"{multiple_grid_points} 个点被分配到多个网格"
|
||||
)
|
||||
|
73
preprocess/time_filter.py
Normal file
73
preprocess/time_filter.py
Normal file
@ -0,0 +1,73 @@
|
||||
import os
|
||||
import logging
|
||||
import pandas as pd
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
class TimeFilter:
|
||||
"""基于拍摄时间过滤图片"""
|
||||
|
||||
def __init__(self, output_dir):
|
||||
self.log_file = os.path.join(output_dir, 'del_imgs.txt')
|
||||
self.logger = logging.getLogger('UAV_Preprocess.TimeFilter')
|
||||
self.time_threshold = timedelta(minutes=5) # 5分钟阈值
|
||||
|
||||
def filter_by_date(self, points_df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""根据拍摄时间过滤图片
|
||||
|
||||
如果相邻两张图片的拍摄时间差超过5分钟,过滤掉后续所有图片
|
||||
|
||||
Args:
|
||||
points_df: 包含图片信息的DataFrame,必须包含'file'和'date'列
|
||||
|
||||
Returns:
|
||||
过滤后的DataFrame
|
||||
"""
|
||||
self.logger.info("开始基于拍摄时间进行过滤")
|
||||
|
||||
# 确保date列存在且不为空
|
||||
if 'date' not in points_df.columns:
|
||||
self.logger.error("输入数据中缺少date列")
|
||||
return points_df
|
||||
|
||||
# 删除date为空的行
|
||||
points_df = points_df.dropna(subset=['date'])
|
||||
|
||||
if len(points_df) == 0:
|
||||
self.logger.warning("没有有效的拍摄时间数据")
|
||||
return points_df
|
||||
|
||||
# 按时间排序
|
||||
points_df = points_df.sort_values('date')
|
||||
self.logger.info(f"排序后的时间范围: {points_df['date'].min()} 到 {points_df['date'].max()}")
|
||||
|
||||
# 计算时间差
|
||||
time_diffs = points_df['date'].diff()
|
||||
|
||||
# 找到第一个时间差超过阈值的位置
|
||||
break_idx = None
|
||||
for idx, time_diff in enumerate(time_diffs):
|
||||
if time_diff and time_diff > self.time_threshold:
|
||||
break_idx = idx
|
||||
break_time = points_df.iloc[idx]['date']
|
||||
self.logger.info(f"在索引 {idx} 处发现时间断点,时间差为 {time_diff}")
|
||||
self.logger.info(f"断点时间: {break_time}")
|
||||
break
|
||||
|
||||
# 如果找到断点,过滤掉后续图片
|
||||
if break_idx is not None:
|
||||
to_delete = points_df.iloc[break_idx:]['file'].tolist()
|
||||
self.logger.info(f"将删除 {len(to_delete)} 张断点后的图片")
|
||||
|
||||
# 记录被删除的图片
|
||||
with open(self.log_file, 'a', encoding='utf-8') as f:
|
||||
for img in to_delete:
|
||||
f.write(img + '\n')
|
||||
f.write('\n')
|
||||
|
||||
# 保留断点之前的图片
|
||||
filtered_df = points_df.iloc[:break_idx]
|
||||
self.logger.info(f"时间过滤完成,保留了 {len(filtered_df)} 张图片")
|
||||
return filtered_df
|
||||
|
||||
self.logger.info("未发现时间断点,保留所有图片")
|
||||
return points_df
|
Loading…
Reference in New Issue
Block a user