import os import time import psutil import logging import subprocess from typing import Optional, Tuple class ODMProcessMonitor: """ODM进程监控器""" def __init__(self, max_retries: int = 3, check_interval: int = 300): """ 初始化监控器 Args: max_retries: 最大重试次数 check_interval: 检查间隔(秒) """ self.max_retries = max_retries self.check_interval = check_interval self.logger = logging.getLogger('UAV_Preprocess.ODMMonitor') def _is_process_running(self, pid: int) -> bool: """检查进程是否在运行""" try: process = psutil.Process(pid) return process.is_running() except psutil.NoSuchProcess: return False def _check_success(self, grid_dir: str) -> bool: """检查ODM是否执行成功""" # ODM成功完成时会生成这些文件夹 if self.mode == "快拼模式": success_markers = ['odm_orthophoto', 'odm_georeferencing'] else: success_markers = ['odm_orthophoto', 'odm_georeferencing', 'odm_texturing'] return all(os.path.exists(os.path.join(grid_dir, marker)) for marker in success_markers) def run_odm_with_monitor(self, command: str, grid_dir: str, grid_idx: int) -> Tuple[bool, str]: """ 运行ODM命令并监控进程 Args: command: ODM命令 grid_dir: 网格目录 grid_idx: 网格索引 Returns: Tuple[bool, str]: (是否成功, 错误信息) """ # TODO 加入rerun信息 attempt = 0 while attempt < self.max_retries: try: self.logger.info(f"网格 {grid_idx + 1} 第 {attempt + 1} 次尝试执行ODM") # 启动ODM进程 process = subprocess.Popen( command, shell=True, cwd=grid_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True ) pid = process.pid self.logger.info(f"ODM进程启动,PID: {pid}") # 监控进程 while True: if not self._is_process_running(pid): # 进程结束,检查是否成功 stdout, stderr = process.communicate() # 保存日志 log_file = os.path.join(grid_dir, f'odm_attempt_{attempt + 1}.log') with open(log_file, 'w', encoding='utf-8') as f: f.write(f"=== 标准输出 ===\n{stdout}\n\n=== 错误输出 ===\n{stderr}") if self._check_success(grid_dir): self.logger.info(f"网格 {grid_idx + 1} ODM处理成功") return True, "" else: self.logger.warning(f"网格 {grid_idx + 1} 第 {attempt + 1} 次尝试失败") break time.sleep(self.check_interval) # 如果不是最后一次尝试,等待后重试 if attempt < self.max_retries - 1: wait_time = (attempt + 1) * 30 self.logger.info(f"等待 {wait_time} 秒后重试...") time.sleep(wait_time) attempt += 1 except Exception as e: error_msg = f"监控进程发生异常: {str(e)}" self.logger.error(error_msg) return False, error_msg error_msg = f"网格 {grid_idx + 1} 在 {self.max_retries} 次尝试后仍然失败,需要人工查看" self.logger.error(error_msg) return False, error_msg