diff --git a/utils/command_runner.py b/utils/command_runner.py index b098ca1..66411fa 100644 --- a/utils/command_runner.py +++ b/utils/command_runner.py @@ -8,19 +8,20 @@ from utils.odm_monitor import ODMProcessMonitor class CommandRunner: """执行网格处理命令的类""" - def __init__(self, output_dir: str, max_retries: int = 3, mode: str = "快拼模式"): + def __init__(self, output_dir: str, mode: str = "快拼模式"): self.output_dir = output_dir - self.max_retries = max_retries self.logger = logging.getLogger('UAV_Preprocess.CommandRunner') - self.monitor = ODMProcessMonitor(max_retries=max_retries, mode=mode) + self.monitor = ODMProcessMonitor(mode=mode) self.mode = mode - def _run_command(self, grid_idx: int): - """执行单个网格的处理""" - try: - grid_dir = os.path.join(self.output_dir, f'grid_{grid_idx + 1}') - grid_dir = os.path.abspath(grid_dir) # 确保使用绝对路径 - + def run_grid_commands(self, grid_points: Dict[int, pd.DataFrame]): + """处理所有网格""" + self.logger.info("开始执行网格处理") + for grid_idx in grid_points.keys(): + grid_dir = os.path.abspath(os.path.join( + self.output_dir, f'grid_{grid_idx + 1}' + )) + success, error_msg = self.monitor.run_odm_with_monitor( grid_dir=grid_dir, grid_idx=grid_idx, @@ -28,19 +29,4 @@ class CommandRunner: ) if not success: - raise Exception(error_msg) - - except Exception as e: - self.logger.error(f"网格 {grid_idx + 1} 处理失败: {str(e)}") - raise - - def run_grid_commands(self, grid_points: Dict[int, pd.DataFrame]): - """处理所有网格""" - self.logger.info("开始执行网格处理") - - for grid_idx in grid_points.keys(): - try: - self._run_command(grid_idx) - except Exception as e: - self.logger.error(f"网格 {grid_idx + 1} 处理失败,停止后续执行: {str(e)}") - raise + raise Exception(f"网格 {grid_idx + 1} 处理失败: {error_msg}") diff --git a/utils/odm_monitor.py b/utils/odm_monitor.py index 3001fcd..a4cf994 100644 --- a/utils/odm_monitor.py +++ b/utils/odm_monitor.py @@ -1,5 +1,4 @@ import os -import time import logging import docker from typing import Tuple @@ -23,66 +22,54 @@ class ODMProcessMonitor: def run_odm_with_monitor(self, grid_dir: str, grid_idx: int, fast_mode: bool = True) -> Tuple[bool, str]: """运行ODM容器""" - attempt = 0 - while attempt < self.max_retries: - try: - self.logger.info(f"网格 {grid_idx + 1} 第 {attempt + 1} 次尝试") + try: + self.logger.info(f"开始处理网格 {grid_idx + 1}") - # 准备容器配置 - volumes = { - grid_dir: {'bind': '/datasets', 'mode': 'rw'} - } + # 准备容器配置 + volumes = { + grid_dir: {'bind': '/datasets', 'mode': 'rw'} + } - # 准备命令参数 - command = [ - "--project-path", "/datasets", "project", - "--max-concurrency", "10", - "--force-gps", - "--rerun-all" - ] + # 准备命令参数 + command = [ + "--project-path", "/datasets", "project", + "--max-concurrency", "10", + "--force-gps", + "--rerun-all" + ] - if fast_mode: - command.extend([ - "--feature-quality", "lowest", - "--orthophoto-resolution", "8", - "--fast-orthophoto", - "--skip-3dmodel" - ]) + if fast_mode: + command.extend([ + "--feature-quality", "lowest", + "--orthophoto-resolution", "8", + "--fast-orthophoto", + "--skip-3dmodel" + ]) - # 运行容器 - container = self.client.containers.run( - "opendronemap/odm", - command=command, - volumes=volumes, - detach=True, - remove=True - ) + # 运行容器并等待完成 + container = self.client.containers.run( + "opendronemap/odm", + command=command, + volumes=volumes, + detach=True, + remove=True, + environment={"PYTHONUNBUFFERED": "1"}, + mem_limit="0", # 不限制内存 + cpu_count=0, # 使用所有CPU + network_mode="host" # 使用主机网络模式 + ) - # 等待容器完成 - result = container.wait() + # 等待容器完成并获取状态码 + result = container.wait() - # 只在失败时获取日志 - if result['StatusCode'] != 0: - logs = container.logs().decode('utf-8') - self.logger.error("容器执行失败,最后10行日志:") - self.logger.error(''.join(logs.split('\n')[-10:])) + # 检查是否成功完成 + if result['StatusCode'] == 0 and self._check_success(grid_dir): + self.logger.info(f"网格 {grid_idx + 1} 处理成功") + return True, "" - # 检查是否成功完成 - if result['StatusCode'] == 0 and self._check_success(grid_dir): - self.logger.info(f"网格 {grid_idx + 1} ODM处理成功") - return True, "" + return False, f"网格 {grid_idx + 1} 处理失败" - self.logger.warning(f"网格 {grid_idx + 1} 第 {attempt + 1} 次尝试失败") - - except Exception as e: - error_msg = f"执行异常: {str(e)}" - self.logger.error(error_msg) - return False, error_msg - - attempt += 1 - if attempt < self.max_retries: - time.sleep(30) - - error_msg = f"网格 {grid_idx + 1} 在 {self.max_retries} 次尝试后仍然失败" - self.logger.error(error_msg) - return False, error_msg + except Exception as e: + error_msg = f"执行异常: {str(e)}" + self.logger.error(error_msg) + return False, error_msg