添加守护进程

This commit is contained in:
龙澳 2024-12-20 21:30:44 +08:00
parent 0f6a30f11e
commit 1d3c1bd71b
3 changed files with 139 additions and 58 deletions

View File

@ -204,10 +204,10 @@ class ImagePreprocessor:
self.copy_images(grid_points) self.copy_images(grid_points)
self.visualize_results() self.visualize_results()
# self.logger.info("预处理任务完成") # self.logger.info("预处理任务完成")
# self.command_runner.run_grid_commands( self.command_runner.run_grid_commands(
# grid_points, grid_points,
# self.config.enable_grid_division self.config.enable_grid_division
# ) )
except Exception as e: except Exception as e:
self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True) self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True)
raise raise

View File

@ -1,22 +1,51 @@
import os import os
import logging import logging
import subprocess import subprocess
import time
from typing import Dict from typing import Dict
import pandas as pd import pandas as pd
from preprocess.odm_monitor import ODMProcessMonitor
class CommandRunner: class CommandRunner:
"""执行网格处理命令的类""" """执行网格处理命令的类"""
def __init__(self, output_dir: str): def __init__(self, output_dir: str, max_retries: int = 3):
""" """
初始化命令执行器 初始化命令执行器
i
Args: Args:
output_dir: 输出目录路径 output_dir: 输出目录路径
max_retries: 最大重试次数
""" """
self.output_dir = output_dir self.output_dir = output_dir
self.max_retries = max_retries
self.logger = logging.getLogger('UAV_Preprocess.CommandRunner') self.logger = logging.getLogger('UAV_Preprocess.CommandRunner')
self.monitor = ODMProcessMonitor(max_retries=max_retries)
def _run_command(self, grid_idx: int):
"""
执行单个网格的命令
Args:
grid_idx: 网格索引
Raises:
Exception: 当命令执行失败时抛出异常
"""
try:
grid_dir = os.path.join(self.output_dir, f'grid_{grid_idx + 1}')
command = f"docker run -ti --rm -v {grid_dir}:/datasets opendronemap/odm --project-path /datasets project --feature-quality lowest --force-gps --use-3dmesh"
self.logger.info(f"开始执行命令: {command}")
success, error_msg = self.monitor.run_odm_with_monitor(command, grid_dir, grid_idx)
if not success:
raise Exception(error_msg)
except Exception as e:
self.logger.error(f"网格 {grid_idx + 1} 处理失败: {str(e)}")
raise
def run_grid_commands(self, grid_points: Dict[int, pd.DataFrame], enable_grid_division: bool = True): def run_grid_commands(self, grid_points: Dict[int, pd.DataFrame], enable_grid_division: bool = True):
""" """
@ -32,59 +61,9 @@ class CommandRunner:
self.logger.info("开始执行网格处理命令") self.logger.info("开始执行网格处理命令")
# 顺序执行每个网格的命令
for grid_idx in grid_points.keys(): for grid_idx in grid_points.keys():
try: try:
self._run_command(grid_idx) self._run_command(grid_idx)
except Exception as e: except Exception as e:
self.logger.error(f"网格 {grid_idx + 1} 处理命令执行失败: {str(e)}") self.logger.error(f"网格 {grid_idx + 1} 处理失败,停止后续执行: {str(e)}")
raise # 如果一个网格失败,停止后续执行 raise
def _run_command(self, grid_idx: int):
"""
执行单个网格的命令
Args:
grid_idx: 网格索引
Raises:
Exception: 当命令执行失败时抛出异常
"""
try:
# 确定网格目录和命令
grid_dir = os.path.join(self.output_dir, f'grid_{grid_idx + 1}')
command = f"docker run -ti --rm -v {grid_dir}:/datasets opendronemap/odm --project-path /datasets project --feature-quality lowest --force-gps --use-3dmesh"
self.logger.info(f"执行命令: {command} 在目录: {grid_dir}")
# 在指定目录下执行命令
process = subprocess.Popen(
command,
shell=True,
cwd=grid_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
# 获取命令输出
stdout, stderr = process.communicate()
# 检查命令执行结果
if process.returncode == 0:
self.logger.info(f"网格 {grid_idx + 1} 命令执行成功")
self.logger.debug(f"命令输出至日志文件")
with open(os.path.join(grid_dir, 'odm_success.log'), 'a', encoding='utf-8') as f:
f.write(f"{stdout}")
else:
self.logger.error(f"网格 {grid_idx + 1} 命令执行失败")
self.logger.error(f"错误信息输出至日志文件")
with open(os.path.join(grid_dir, 'odm_error.log'), 'a', encoding='utf-8') as f:
f.write(f"{stdout}")
f.write(f"\n错误日志:\n")
f.write(f"{stderr}")
raise Exception(f"命令执行失败: {stderr}")
except Exception as e:
self.logger.error(f"网格 {grid_idx + 1} 命令执行出错: {str(e)}")
raise

102
preprocess/odm_monitor.py Normal file
View File

@ -0,0 +1,102 @@
import os
import time
import psutil
import logging
import subprocess
from typing import Optional, Tuple
class ODMProcessMonitor:
"""ODM进程监控器"""
def __init__(self, max_retries: int = 3, check_interval: int = 5):
"""
初始化监控器
Args:
max_retries: 最大重试次数
check_interval: 检查间隔
"""
self.max_retries = max_retries
self.check_interval = check_interval
self.logger = logging.getLogger('UAV_Preprocess.ODMMonitor')
def _is_process_running(self, pid: int) -> bool:
"""检查进程是否在运行"""
try:
process = psutil.Process(pid)
return process.is_running()
except psutil.NoSuchProcess:
return False
def _check_success(self, grid_dir: str) -> bool:
"""检查ODM是否执行成功"""
# ODM成功完成时会生成这些文件夹
success_markers = ['odm_orthophoto', 'odm_georeferencing', 'odm_texturing']
return all(os.path.exists(os.path.join(grid_dir, marker)) for marker in success_markers)
def run_odm_with_monitor(self, command: str, grid_dir: str, grid_idx: int) -> Tuple[bool, str]:
"""
运行ODM命令并监控进程
Args:
command: ODM命令
grid_dir: 网格目录
grid_idx: 网格索引
Returns:
Tuple[bool, str]: (是否成功, 错误信息)
"""
attempt = 0
while attempt < self.max_retries:
try:
self.logger.info(f"网格 {grid_idx + 1}{attempt + 1} 次尝试执行ODM")
# 启动ODM进程
process = subprocess.Popen(
command,
shell=True,
cwd=grid_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
pid = process.pid
self.logger.info(f"ODM进程启动PID: {pid}")
# 监控进程
while True:
if not self._is_process_running(pid):
# 进程结束,检查是否成功
stdout, stderr = process.communicate()
# 保存日志
log_file = os.path.join(grid_dir, f'odm_attempt_{attempt + 1}.log')
with open(log_file, 'w', encoding='utf-8') as f:
f.write(f"=== 标准输出 ===\n{stdout}\n\n=== 错误输出 ===\n{stderr}")
if self._check_success(grid_dir):
self.logger.info(f"网格 {grid_idx + 1} ODM处理成功")
return True, ""
else:
self.logger.warning(f"网格 {grid_idx + 1}{attempt + 1} 次尝试失败")
break
time.sleep(self.check_interval)
# 如果不是最后一次尝试,等待后重试
if attempt < self.max_retries - 1:
wait_time = (attempt + 1) * 30
self.logger.info(f"等待 {wait_time} 秒后重试...")
time.sleep(wait_time)
attempt += 1
except Exception as e:
error_msg = f"监控进程发生异常: {str(e)}"
self.logger.error(error_msg)
return False, error_msg
error_msg = f"网格 {grid_idx + 1}{self.max_retries} 次尝试后仍然失败,需要人工查看"
self.logger.error(error_msg)
return False, error_msg