diff --git a/odm_preprocess.py b/odm_preprocess.py index 00d3ac7..4a2af80 100644 --- a/odm_preprocess.py +++ b/odm_preprocess.py @@ -204,10 +204,10 @@ class ImagePreprocessor: self.copy_images(grid_points) self.visualize_results() # self.logger.info("预处理任务完成") - # self.command_runner.run_grid_commands( - # grid_points, - # self.config.enable_grid_division - # ) + self.command_runner.run_grid_commands( + grid_points, + self.config.enable_grid_division + ) except Exception as e: self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True) raise diff --git a/preprocess/command_runner.py b/preprocess/command_runner.py index d93fe72..75f9f51 100644 --- a/preprocess/command_runner.py +++ b/preprocess/command_runner.py @@ -1,22 +1,51 @@ import os import logging import subprocess +import time from typing import Dict import pandas as pd +from preprocess.odm_monitor import ODMProcessMonitor class CommandRunner: """执行网格处理命令的类""" - def __init__(self, output_dir: str): + def __init__(self, output_dir: str, max_retries: int = 3): """ 初始化命令执行器 - +i Args: output_dir: 输出目录路径 + max_retries: 最大重试次数 """ self.output_dir = output_dir + self.max_retries = max_retries self.logger = logging.getLogger('UAV_Preprocess.CommandRunner') + self.monitor = ODMProcessMonitor(max_retries=max_retries) + + def _run_command(self, grid_idx: int): + """ + 执行单个网格的命令 + + Args: + grid_idx: 网格索引 + + Raises: + Exception: 当命令执行失败时抛出异常 + """ + try: + grid_dir = os.path.join(self.output_dir, f'grid_{grid_idx + 1}') + command = f"docker run -ti --rm -v {grid_dir}:/datasets opendronemap/odm --project-path /datasets project --feature-quality lowest --force-gps --use-3dmesh" + + self.logger.info(f"开始执行命令: {command}") + success, error_msg = self.monitor.run_odm_with_monitor(command, grid_dir, grid_idx) + + if not success: + raise Exception(error_msg) + + except Exception as e: + self.logger.error(f"网格 {grid_idx + 1} 处理失败: {str(e)}") + raise def run_grid_commands(self, grid_points: Dict[int, pd.DataFrame], enable_grid_division: bool = True): """ @@ -32,59 +61,9 @@ class CommandRunner: self.logger.info("开始执行网格处理命令") - # 顺序执行每个网格的命令 for grid_idx in grid_points.keys(): try: self._run_command(grid_idx) except Exception as e: - self.logger.error(f"网格 {grid_idx + 1} 处理命令执行失败: {str(e)}") - raise # 如果一个网格失败,停止后续执行 - - def _run_command(self, grid_idx: int): - """ - 执行单个网格的命令 - - Args: - grid_idx: 网格索引 - - Raises: - Exception: 当命令执行失败时抛出异常 - """ - try: - # 确定网格目录和命令 - grid_dir = os.path.join(self.output_dir, f'grid_{grid_idx + 1}') - command = f"docker run -ti --rm -v {grid_dir}:/datasets opendronemap/odm --project-path /datasets project --feature-quality lowest --force-gps --use-3dmesh" - - self.logger.info(f"执行命令: {command} 在目录: {grid_dir}") - - # 在指定目录下执行命令 - process = subprocess.Popen( - command, - shell=True, - cwd=grid_dir, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True - ) - - # 获取命令输出 - stdout, stderr = process.communicate() - - # 检查命令执行结果 - if process.returncode == 0: - self.logger.info(f"网格 {grid_idx + 1} 命令执行成功") - self.logger.debug(f"命令输出至日志文件") - with open(os.path.join(grid_dir, 'odm_success.log'), 'a', encoding='utf-8') as f: - f.write(f"{stdout}") - else: - self.logger.error(f"网格 {grid_idx + 1} 命令执行失败") - self.logger.error(f"错误信息输出至日志文件") - with open(os.path.join(grid_dir, 'odm_error.log'), 'a', encoding='utf-8') as f: - f.write(f"{stdout}") - f.write(f"\n错误日志:\n") - f.write(f"{stderr}") - raise Exception(f"命令执行失败: {stderr}") - - except Exception as e: - self.logger.error(f"网格 {grid_idx + 1} 命令执行出错: {str(e)}") - raise + self.logger.error(f"网格 {grid_idx + 1} 处理失败,停止后续执行: {str(e)}") + raise diff --git a/preprocess/odm_monitor.py b/preprocess/odm_monitor.py new file mode 100644 index 0000000..2fd766d --- /dev/null +++ b/preprocess/odm_monitor.py @@ -0,0 +1,102 @@ +import os +import time +import psutil +import logging +import subprocess +from typing import Optional, Tuple + +class ODMProcessMonitor: + """ODM进程监控器""" + + def __init__(self, max_retries: int = 3, check_interval: int = 5): + """ + 初始化监控器 + + Args: + max_retries: 最大重试次数 + check_interval: 检查间隔(秒) + """ + self.max_retries = max_retries + self.check_interval = check_interval + self.logger = logging.getLogger('UAV_Preprocess.ODMMonitor') + + def _is_process_running(self, pid: int) -> bool: + """检查进程是否在运行""" + try: + process = psutil.Process(pid) + return process.is_running() + except psutil.NoSuchProcess: + return False + + def _check_success(self, grid_dir: str) -> bool: + """检查ODM是否执行成功""" + # ODM成功完成时会生成这些文件夹 + success_markers = ['odm_orthophoto', 'odm_georeferencing', 'odm_texturing'] + return all(os.path.exists(os.path.join(grid_dir, marker)) for marker in success_markers) + + def run_odm_with_monitor(self, command: str, grid_dir: str, grid_idx: int) -> Tuple[bool, str]: + """ + 运行ODM命令并监控进程 + + Args: + command: ODM命令 + grid_dir: 网格目录 + grid_idx: 网格索引 + + Returns: + Tuple[bool, str]: (是否成功, 错误信息) + """ + attempt = 0 + while attempt < self.max_retries: + try: + self.logger.info(f"网格 {grid_idx + 1} 第 {attempt + 1} 次尝试执行ODM") + + # 启动ODM进程 + process = subprocess.Popen( + command, + shell=True, + cwd=grid_dir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + pid = process.pid + self.logger.info(f"ODM进程启动,PID: {pid}") + + # 监控进程 + while True: + if not self._is_process_running(pid): + # 进程结束,检查是否成功 + stdout, stderr = process.communicate() + + # 保存日志 + log_file = os.path.join(grid_dir, f'odm_attempt_{attempt + 1}.log') + with open(log_file, 'w', encoding='utf-8') as f: + f.write(f"=== 标准输出 ===\n{stdout}\n\n=== 错误输出 ===\n{stderr}") + + if self._check_success(grid_dir): + self.logger.info(f"网格 {grid_idx + 1} ODM处理成功") + return True, "" + else: + self.logger.warning(f"网格 {grid_idx + 1} 第 {attempt + 1} 次尝试失败") + break + + time.sleep(self.check_interval) + + # 如果不是最后一次尝试,等待后重试 + if attempt < self.max_retries - 1: + wait_time = (attempt + 1) * 30 + self.logger.info(f"等待 {wait_time} 秒后重试...") + time.sleep(wait_time) + + attempt += 1 + + except Exception as e: + error_msg = f"监控进程发生异常: {str(e)}" + self.logger.error(error_msg) + return False, error_msg + + error_msg = f"网格 {grid_idx + 1} 在 {self.max_retries} 次尝试后仍然失败,需要人工查看" + self.logger.error(error_msg) + return False, error_msg \ No newline at end of file