添加守护进程
This commit is contained in:
parent
0f6a30f11e
commit
1d3c1bd71b
@ -204,10 +204,10 @@ class ImagePreprocessor:
|
||||
self.copy_images(grid_points)
|
||||
self.visualize_results()
|
||||
# self.logger.info("预处理任务完成")
|
||||
# self.command_runner.run_grid_commands(
|
||||
# grid_points,
|
||||
# self.config.enable_grid_division
|
||||
# )
|
||||
self.command_runner.run_grid_commands(
|
||||
grid_points,
|
||||
self.config.enable_grid_division
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True)
|
||||
raise
|
||||
|
@ -1,22 +1,51 @@
|
||||
import os
|
||||
import logging
|
||||
import subprocess
|
||||
import time
|
||||
from typing import Dict
|
||||
import pandas as pd
|
||||
from preprocess.odm_monitor import ODMProcessMonitor
|
||||
|
||||
|
||||
class CommandRunner:
|
||||
"""执行网格处理命令的类"""
|
||||
|
||||
def __init__(self, output_dir: str):
|
||||
def __init__(self, output_dir: str, max_retries: int = 3):
|
||||
"""
|
||||
初始化命令执行器
|
||||
|
||||
i
|
||||
Args:
|
||||
output_dir: 输出目录路径
|
||||
max_retries: 最大重试次数
|
||||
"""
|
||||
self.output_dir = output_dir
|
||||
self.max_retries = max_retries
|
||||
self.logger = logging.getLogger('UAV_Preprocess.CommandRunner')
|
||||
self.monitor = ODMProcessMonitor(max_retries=max_retries)
|
||||
|
||||
def _run_command(self, grid_idx: int):
|
||||
"""
|
||||
执行单个网格的命令
|
||||
|
||||
Args:
|
||||
grid_idx: 网格索引
|
||||
|
||||
Raises:
|
||||
Exception: 当命令执行失败时抛出异常
|
||||
"""
|
||||
try:
|
||||
grid_dir = os.path.join(self.output_dir, f'grid_{grid_idx + 1}')
|
||||
command = f"docker run -ti --rm -v {grid_dir}:/datasets opendronemap/odm --project-path /datasets project --feature-quality lowest --force-gps --use-3dmesh"
|
||||
|
||||
self.logger.info(f"开始执行命令: {command}")
|
||||
success, error_msg = self.monitor.run_odm_with_monitor(command, grid_dir, grid_idx)
|
||||
|
||||
if not success:
|
||||
raise Exception(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"网格 {grid_idx + 1} 处理失败: {str(e)}")
|
||||
raise
|
||||
|
||||
def run_grid_commands(self, grid_points: Dict[int, pd.DataFrame], enable_grid_division: bool = True):
|
||||
"""
|
||||
@ -32,59 +61,9 @@ class CommandRunner:
|
||||
|
||||
self.logger.info("开始执行网格处理命令")
|
||||
|
||||
# 顺序执行每个网格的命令
|
||||
for grid_idx in grid_points.keys():
|
||||
try:
|
||||
self._run_command(grid_idx)
|
||||
except Exception as e:
|
||||
self.logger.error(f"网格 {grid_idx + 1} 处理命令执行失败: {str(e)}")
|
||||
raise # 如果一个网格失败,停止后续执行
|
||||
|
||||
def _run_command(self, grid_idx: int):
|
||||
"""
|
||||
执行单个网格的命令
|
||||
|
||||
Args:
|
||||
grid_idx: 网格索引
|
||||
|
||||
Raises:
|
||||
Exception: 当命令执行失败时抛出异常
|
||||
"""
|
||||
try:
|
||||
# 确定网格目录和命令
|
||||
grid_dir = os.path.join(self.output_dir, f'grid_{grid_idx + 1}')
|
||||
command = f"docker run -ti --rm -v {grid_dir}:/datasets opendronemap/odm --project-path /datasets project --feature-quality lowest --force-gps --use-3dmesh"
|
||||
|
||||
self.logger.info(f"执行命令: {command} 在目录: {grid_dir}")
|
||||
|
||||
# 在指定目录下执行命令
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
shell=True,
|
||||
cwd=grid_dir,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
|
||||
# 获取命令输出
|
||||
stdout, stderr = process.communicate()
|
||||
|
||||
# 检查命令执行结果
|
||||
if process.returncode == 0:
|
||||
self.logger.info(f"网格 {grid_idx + 1} 命令执行成功")
|
||||
self.logger.debug(f"命令输出至日志文件")
|
||||
with open(os.path.join(grid_dir, 'odm_success.log'), 'a', encoding='utf-8') as f:
|
||||
f.write(f"{stdout}")
|
||||
else:
|
||||
self.logger.error(f"网格 {grid_idx + 1} 命令执行失败")
|
||||
self.logger.error(f"错误信息输出至日志文件")
|
||||
with open(os.path.join(grid_dir, 'odm_error.log'), 'a', encoding='utf-8') as f:
|
||||
f.write(f"{stdout}")
|
||||
f.write(f"\n错误日志:\n")
|
||||
f.write(f"{stderr}")
|
||||
raise Exception(f"命令执行失败: {stderr}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"网格 {grid_idx + 1} 命令执行出错: {str(e)}")
|
||||
self.logger.error(f"网格 {grid_idx + 1} 处理失败,停止后续执行: {str(e)}")
|
||||
raise
|
||||
|
102
preprocess/odm_monitor.py
Normal file
102
preprocess/odm_monitor.py
Normal file
@ -0,0 +1,102 @@
|
||||
import os
|
||||
import time
|
||||
import psutil
|
||||
import logging
|
||||
import subprocess
|
||||
from typing import Optional, Tuple
|
||||
|
||||
class ODMProcessMonitor:
|
||||
"""ODM进程监控器"""
|
||||
|
||||
def __init__(self, max_retries: int = 3, check_interval: int = 5):
|
||||
"""
|
||||
初始化监控器
|
||||
|
||||
Args:
|
||||
max_retries: 最大重试次数
|
||||
check_interval: 检查间隔(秒)
|
||||
"""
|
||||
self.max_retries = max_retries
|
||||
self.check_interval = check_interval
|
||||
self.logger = logging.getLogger('UAV_Preprocess.ODMMonitor')
|
||||
|
||||
def _is_process_running(self, pid: int) -> bool:
|
||||
"""检查进程是否在运行"""
|
||||
try:
|
||||
process = psutil.Process(pid)
|
||||
return process.is_running()
|
||||
except psutil.NoSuchProcess:
|
||||
return False
|
||||
|
||||
def _check_success(self, grid_dir: str) -> bool:
|
||||
"""检查ODM是否执行成功"""
|
||||
# ODM成功完成时会生成这些文件夹
|
||||
success_markers = ['odm_orthophoto', 'odm_georeferencing', 'odm_texturing']
|
||||
return all(os.path.exists(os.path.join(grid_dir, marker)) for marker in success_markers)
|
||||
|
||||
def run_odm_with_monitor(self, command: str, grid_dir: str, grid_idx: int) -> Tuple[bool, str]:
|
||||
"""
|
||||
运行ODM命令并监控进程
|
||||
|
||||
Args:
|
||||
command: ODM命令
|
||||
grid_dir: 网格目录
|
||||
grid_idx: 网格索引
|
||||
|
||||
Returns:
|
||||
Tuple[bool, str]: (是否成功, 错误信息)
|
||||
"""
|
||||
attempt = 0
|
||||
while attempt < self.max_retries:
|
||||
try:
|
||||
self.logger.info(f"网格 {grid_idx + 1} 第 {attempt + 1} 次尝试执行ODM")
|
||||
|
||||
# 启动ODM进程
|
||||
process = subprocess.Popen(
|
||||
command,
|
||||
shell=True,
|
||||
cwd=grid_dir,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True
|
||||
)
|
||||
|
||||
pid = process.pid
|
||||
self.logger.info(f"ODM进程启动,PID: {pid}")
|
||||
|
||||
# 监控进程
|
||||
while True:
|
||||
if not self._is_process_running(pid):
|
||||
# 进程结束,检查是否成功
|
||||
stdout, stderr = process.communicate()
|
||||
|
||||
# 保存日志
|
||||
log_file = os.path.join(grid_dir, f'odm_attempt_{attempt + 1}.log')
|
||||
with open(log_file, 'w', encoding='utf-8') as f:
|
||||
f.write(f"=== 标准输出 ===\n{stdout}\n\n=== 错误输出 ===\n{stderr}")
|
||||
|
||||
if self._check_success(grid_dir):
|
||||
self.logger.info(f"网格 {grid_idx + 1} ODM处理成功")
|
||||
return True, ""
|
||||
else:
|
||||
self.logger.warning(f"网格 {grid_idx + 1} 第 {attempt + 1} 次尝试失败")
|
||||
break
|
||||
|
||||
time.sleep(self.check_interval)
|
||||
|
||||
# 如果不是最后一次尝试,等待后重试
|
||||
if attempt < self.max_retries - 1:
|
||||
wait_time = (attempt + 1) * 30
|
||||
self.logger.info(f"等待 {wait_time} 秒后重试...")
|
||||
time.sleep(wait_time)
|
||||
|
||||
attempt += 1
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"监控进程发生异常: {str(e)}"
|
||||
self.logger.error(error_msg)
|
||||
return False, error_msg
|
||||
|
||||
error_msg = f"网格 {grid_idx + 1} 在 {self.max_retries} 次尝试后仍然失败,需要人工查看"
|
||||
self.logger.error(error_msg)
|
||||
return False, error_msg
|
Loading…
Reference in New Issue
Block a user