磁盘空间、容错优化

This commit is contained in:
龙澳 2025-01-06 15:50:11 +08:00
parent 0c44cde378
commit a0a7f6930a
3 changed files with 158 additions and 77 deletions

View File

@ -3,6 +3,7 @@ import shutil
from datetime import timedelta
from dataclasses import dataclass
from typing import Dict, Tuple
import psutil # 需要添加到 requirements.txt
import matplotlib.pyplot as plt
import pandas as pd
@ -53,6 +54,9 @@ class ImagePreprocessor:
def __init__(self, config: PreprocessConfig):
self.config = config
# 检查磁盘空间
self._check_disk_space()
# 清理并重建输出目录
if os.path.exists(config.output_dir):
self._clean_output_dir()
@ -91,6 +95,45 @@ class ImagePreprocessor:
print(f"创建输出目录时发生错误: {str(e)}")
raise
def _get_directory_size(self, path):
"""获取目录的总大小(字节)"""
total_size = 0
for dirpath, dirnames, filenames in os.walk(path):
for filename in filenames:
file_path = os.path.join(dirpath, filename)
try:
total_size += os.path.getsize(file_path)
except (OSError, FileNotFoundError):
continue
return total_size
def _check_disk_space(self):
"""检查磁盘空间是否足够"""
# 获取输入目录大小
input_size = self._get_directory_size(self.config.image_dir)
# 获取输出目录所在磁盘的剩余空间
output_drive = os.path.splitdrive(
os.path.abspath(self.config.output_dir))[0]
if not output_drive: # 处理Linux/Unix路径
output_drive = '/'
disk_usage = psutil.disk_usage(output_drive)
free_space = disk_usage.free
# 计算所需空间输入大小的1.5倍)
required_space = input_size * 1.5
if free_space < required_space:
error_msg = (
f"磁盘空间不足!\n"
f"输入目录大小: {input_size / (1024**3):.2f} GB\n"
f"所需空间: {required_space / (1024**3):.2f} GB\n"
f"可用空间: {free_space / (1024**3):.2f} GB\n"
f"在驱动器 {output_drive}"
)
raise RuntimeError(error_msg)
def extract_gps(self) -> pd.DataFrame:
"""提取GPS数据"""
self.logger.info("开始提取GPS数据")
@ -217,9 +260,9 @@ class ImagePreprocessor:
for grid_id, points in grid_points.items():
output_dir = os.path.join(
self.config.output_dir,
f"grid_{grid_id[0]}_{grid_id[1]}",
"project",
self.config.output_dir,
f"grid_{grid_id[0]}_{grid_id[1]}",
"project",
"images"
)
@ -229,7 +272,8 @@ class ImagePreprocessor:
src = os.path.join(self.config.image_dir, point["file"])
dst = os.path.join(output_dir, point["file"])
shutil.copy(src, dst)
self.logger.info(f"网格 ({grid_id[0]},{grid_id[1]}) 包含 {len(points)} 张图像")
self.logger.info(
f"网格 ({grid_id[0]},{grid_id[1]}) 包含 {len(points)} 张图像")
def merge_tif(self, grid_points: Dict[tuple, pd.DataFrame], produce_dem: bool):
"""合并所有网格的影像产品"""
@ -248,13 +292,17 @@ class ImagePreprocessor:
self.logger.info("开始合并OBJ模型")
merger = MergeObj(self.config.output_dir)
merger.merge_grid_obj(grid_points, translations)
def post_process(self, successful_grid_points: Dict[tuple, pd.DataFrame], grid_points: Dict[tuple, pd.DataFrame], translations: Dict[tuple, tuple]):
if len(successful_grid_points) < len(grid_points):
self.logger.warning(
f"{len(grid_points) - len(successful_grid_points)} 个网格处理失败,"
f"将只合并成功处理的 {len(successful_grid_points)} 个网格"
)
if len(successful_grid_points) == 1:
self.logger.info(f"只有一个网格{successful_grid_points.keys()},无需合并")
self.logger.info(f"生产结果请在{successful_grid_points.keys()[0]}目录下查看")
return
elif len(successful_grid_points) < len(grid_points):
self.logger.warning(
f"{len(grid_points) - len(successful_grid_points)} 个网格处理失败,"
f"将只合并成功处理的 {len(successful_grid_points)} 个网格"
)
self.merge_tif(successful_grid_points, self.config.produce_dem)
if self.config.mode != "快拼模式":
self.merge_ply(successful_grid_points)
@ -271,10 +319,12 @@ class ImagePreprocessor:
self.copy_images(grid_points)
self.logger.info("预处理任务完成")
successful_grid_points = self.odm_monitor.process_all_grids(grid_points, self.config.produce_dem)
successful_grid_points = self.odm_monitor.process_all_grids(
grid_points, self.config.produce_dem)
self.post_process(successful_grid_points,
grid_points, translations)
self.post_process(successful_grid_points, grid_points, translations)
except Exception as e:
self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True)
raise
@ -283,8 +333,8 @@ class ImagePreprocessor:
if __name__ == "__main__":
# 创建配置
config = PreprocessConfig(
image_dir=r"/home/cug/datasets/error3/project/images",
output_dir=r"/home/cug/ODM_output/error3",
image_dir=r"E:\datasets\UAV\1619\project\images",
output_dir=r"G:\ODM_output\1619",
cluster_eps=0.01,
cluster_min_samples=5,
@ -300,7 +350,7 @@ if __name__ == "__main__":
filter_dense_distance_threshold=10,
filter_time_threshold=timedelta(minutes=5),
grid_size=800,
grid_size=1000,
grid_overlap=0.05,

View File

@ -4,5 +4,5 @@ scikit-learn
matplotlib
piexif
geopy
psutil
psutil>=5.8.0
docker>=6.1.3

View File

@ -11,14 +11,27 @@ class NotOverlapError(Exception):
"""图像重叠度不足异常"""
pass
class DockerNotRunError(Exception):
"""Docker未启动异常"""
pass
class DockerShareError(Exception):
"""Docker目录共享异常"""
pass
class OutOfMemoryError(Exception):
"""内存不足异常"""
pass
class StrangeValuesError(Exception):
"""异常值异常"""
pass
class ODMProcessMonitor:
"""ODM处理监控器"""
@ -29,7 +42,7 @@ class ODMProcessMonitor:
def _check_success(self, grid_dir: str) -> bool:
"""检查ODM是否执行成功
检查项目:
1. 必要的文件夹是否存在
2. 正射影像是否生成且有效
@ -39,41 +52,42 @@ class ODMProcessMonitor:
success_markers = ['odm_orthophoto']
if self.mode != "快拼模式":
success_markers.extend(['odm_texturing', 'odm_georeferencing'])
if not all(os.path.exists(os.path.join(grid_dir, 'project', marker)) for marker in success_markers):
self.logger.error("必要的文件夹未生成")
return False
# 检查正射影像文件
ortho_path = os.path.join(grid_dir, 'project', 'odm_orthophoto', 'odm_orthophoto.original.tif')
ortho_path = os.path.join(
grid_dir, 'project', 'odm_orthophoto', 'odm_orthophoto.original.tif')
if not os.path.exists(ortho_path):
self.logger.error("正射影像文件未生成")
return False
# 检查文件大小
file_size_mb = os.path.getsize(ortho_path) / (1024 * 1024) # 转换为MB
if file_size_mb < 1:
self.logger.error(f"正射影像文件过小: {file_size_mb:.2f}MB")
return False
try:
# 打开影像文件
ds = gdal.Open(ortho_path)
if ds is None:
self.logger.error("无法打开正射影像文件")
return False
# 读取第一个波段
band = ds.GetRasterBand(1)
# 获取统计信息
stats = band.GetStatistics(False, True)
if stats is None:
self.logger.error("无法获取影像统计信息")
return False
min_val, max_val, mean, std = stats
# 计算空值比例
no_data_value = band.GetNoDataValue()
array = band.ReadAsArray()
@ -81,20 +95,20 @@ class ODMProcessMonitor:
no_data_ratio = np.sum(array == no_data_value) / array.size
else:
no_data_ratio = 0
# 检查空值比例是否过高超过50%
if no_data_ratio > 0.5:
self.logger.error(f"正射影像空值比例过高: {no_data_ratio:.2%}")
return False
# 检查影像是否全黑或全白
if max_val - min_val < 1:
self.logger.error("正射影像可能无效:像素值范围过小")
return False
ds = None # 关闭数据集
return True
except Exception as e:
self.logger.error(f"检查正射影像时发生错误: {str(e)}")
return False
@ -112,7 +126,8 @@ class ODMProcessMonitor:
while current_try < max_retries:
current_try += 1
self.logger.info(f"{current_try} 次尝试处理网格 ({grid_id[0]},{grid_id[1]})")
self.logger.info(
f"{current_try} 次尝试处理网格 ({grid_id[0]},{grid_id[1]})")
try:
# 构建Docker命令
@ -146,54 +161,59 @@ class ODMProcessMonitor:
docker_command += "--rerun-all"
self.logger.info(docker_command)
result = subprocess.run(
docker_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = result.stdout.decode(
'utf-8'), result.stderr.decode('utf-8')
self.logger.error(f"==========stderr==========: {stderr}")
# 检查是否有错误
stdout_lines = stdout.strip().split('\n')
last_lines = stdout_lines[-10:] if len(stdout_lines) > 10 else stdout_lines
# 检查Docker是否未运行
if any("docker not run" in line.lower() for line in last_lines) or \
any("docker daemon" in line.lower() for line in last_lines) or \
any("cannot connect to the docker daemon" in line.lower() for line in last_lines):
raise DockerNotRunError("Docker服务未启动")
last_lines = '\n'.join(
stdout_lines[-50:] if len(stdout_lines) > 10 else stdout_lines)
self.logger.info(f"==========stdout==========: {last_lines}")
# 检查目录共享问题
if any("not share" in line.lower() for line in last_lines) or \
any("permission denied" in line.lower() for line in last_lines) or \
any("access is denied" in line.lower() for line in last_lines):
raise DockerShareError("Docker无法访问目录")
# 检查是否有重叠度不足错误
if any("not overlap" in line.lower() for line in last_lines):
raise NotOverlapError("检测到图像重叠度不足错误")
# 检查执行结果
if self._check_success(grid_dir):
self.logger.info(f"网格 ({grid_id[0]},{grid_id[1]}) 处理成功")
return True, ""
if current_try < max_retries:
self.logger.warning(f"网格处理失败,准备第 {current_try + 1} 次重试")
if stderr:
self.logger.error(f"docker run指令执行失败")
self.logger.error(f"==========stderr==========: {stderr}")
if "error during connect" in stderr or "The system cannot find the file specified" in stderr:
raise DockerNotRunError
elif "user declined directory sharing" in stderr:
raise DockerShareError
else:
raise Exception(f"Docker运行失败需要人工排查错误")
# TODO 处理时间组删除,删多了的情况
else:
self.logger.error(f"网格 ({grid_id[0]},{grid_id[1]}) 处理失败,已达到最大重试次数")
return False, f"网格 ({grid_id[0]},{grid_id[1]}) 处理失败,已重试{max_retries}"
self.logger.info("docker run指令执行成功")
if "ODM app finished" in last_lines:
self.logger.info("ODM处理完成")
if self._check_success(grid_dir):
self.logger.info(
f"网格 ({grid_id[0]},{grid_id[1]}) 处理成功")
return True, ""
else:
self.logger.error(
f"虽然ODM处理完成但是生产产品质量可能不合格需要人工检查")
raise Exception(f"虽然ODM处理完成但是生产产品质量可能不合格需要人工检查")
elif "enough overlap" in last_lines:
raise NotOverlapError
elif "out of memory" in last_lines:
raise OutOfMemoryError
elif "strange values" in last_lines:
raise StrangeValuesError
else:
raise Exception(f"ODM处理失败需要人工排查错误")
except NotOverlapError:
if use_lowest_quality:
self.logger.warning("检测到'not overlap'错误移除lowest quality参数后重试")
self.logger.warning(
"检测到not overlap错误移除lowest quality参数后重试")
use_lowest_quality = False
continue
else:
self.logger.error("即使移除lowest quality参数后仍然出现'not overlap'错误")
return False, "图像重叠度不足"
self.logger.error(
"即使移除lowest quality参数后仍然出现not overlap错误")
return False, "图像重叠度不足,请检查数据集的采样间隔情况"
except DockerNotRunError:
self.logger.error("Docker服务未启动")
return False, "Docker没有启动请启动Docker"
@ -202,18 +222,26 @@ class ODMProcessMonitor:
self.logger.error("Docker无法访问目录")
return False, "Docker无法访问数据目录或输出目录请检查目录权限和共享设置"
except OutOfMemoryError:
self.logger.error("内存不足,请减少输入图像的数量")
return False, "内存不足"
except StrangeValuesError:
# TODO 怎么处理异常值
self.logger.error("重建过程中出现异常值")
return False, "检测到异常值,请检查输入图像"
return False, f"网格 ({grid_id[0]},{grid_id[1]}) 处理失败"
def process_all_grids(self, grid_points: Dict[tuple, pd.DataFrame], produce_dem: bool) -> Dict[tuple, pd.DataFrame]:
"""处理所有网格
Returns:
Dict[tuple, pd.DataFrame]: 成功处理的网格点数据字典
"""
self.logger.info("开始执行网格处理")
successful_grid_points = {}
failed_grids = []
for grid_id, points in grid_points.items():
grid_dir = os.path.join(
self.output_dir, f'grid_{grid_id[0]}_{grid_id[1]}'
@ -230,27 +258,30 @@ class ODMProcessMonitor:
if success:
successful_grid_points[grid_id] = points
else:
self.logger.error(f"网格 ({grid_id[0]},{grid_id[1]}) 处理失败: {error_msg}")
self.logger.error(
f"网格 ({grid_id[0]},{grid_id[1]}) 处理失败: {error_msg}")
failed_grids.append((grid_id, error_msg))
except Exception as e:
error_msg = str(e)
self.logger.error(f"处理网格 ({grid_id[0]},{grid_id[1]}) 时发生异常: {error_msg}")
self.logger.error(
f"处理网格 ({grid_id[0]},{grid_id[1]}) 时发生异常: {error_msg}")
failed_grids.append((grid_id, error_msg))
# 汇总处理结果
total_grids = len(grid_points)
failed_count = len(failed_grids)
success_count = len(successful_grid_points)
self.logger.info(f"网格处理完成。总计: {total_grids}, 成功: {success_count}, 失败: {failed_count}")
self.logger.info(
f"网格处理完成。总计: {total_grids}, 成功: {success_count}, 失败: {failed_count}")
if failed_grids:
self.logger.error("失败的网格:")
for grid_id, error_msg in failed_grids:
self.logger.error(f"网格 ({grid_id[0]},{grid_id[1]}): {error_msg}")
self.logger.error(
f"网格 ({grid_id[0]},{grid_id[1]}): {error_msg}")
if len(successful_grid_points) == 0:
raise Exception("所有网格处理都失败,无法继续处理")
return successful_grid_points