ODM_pro/utils/odm_monitor.py

140 lines
5.7 KiB
Python
Raw Permalink Normal View History

2024-12-20 21:30:44 +08:00
import os
import time
import psutil
import logging
import subprocess
from typing import Optional, Tuple
2024-12-22 14:28:07 +08:00
2024-12-20 21:30:44 +08:00
class ODMProcessMonitor:
"""ODM进程监控器"""
2024-12-22 14:28:07 +08:00
def __init__(self, max_retries: int = 3, check_interval: int = 10, mode: str = "快拼模式"):
2024-12-20 21:30:44 +08:00
"""
初始化监控器
2024-12-22 14:28:07 +08:00
2024-12-20 21:30:44 +08:00
Args:
max_retries: 最大重试次数
check_interval: 检查间隔
2024-12-21 12:36:14 +08:00
mode: 模式
2024-12-20 21:30:44 +08:00
"""
self.max_retries = max_retries
self.check_interval = check_interval
self.logger = logging.getLogger('UAV_Preprocess.ODMMonitor')
2024-12-21 12:36:14 +08:00
self.mode = mode
2024-12-20 21:30:44 +08:00
2024-12-22 14:28:07 +08:00
def _check_docker_container(self, process_name: str = "opendronemap/odm") -> bool:
"""检查是否有指定的Docker容器在运行"""
2024-12-20 21:30:44 +08:00
try:
2024-12-22 14:28:07 +08:00
result = subprocess.run(
["docker", "ps", "--filter",
f"ancestor={process_name}", "--format", "{{.ID}}"],
capture_output=True,
text=True
)
return bool(result.stdout.strip())
except Exception as e:
self.logger.error(f"检查Docker容器状态时发生错误: {str(e)}")
2024-12-20 21:30:44 +08:00
return False
def _check_success(self, grid_dir: str) -> bool:
"""检查ODM是否执行成功"""
2024-12-21 12:03:54 +08:00
if self.mode == "快拼模式":
success_markers = ['odm_orthophoto', 'odm_georeferencing']
else:
2024-12-22 14:28:07 +08:00
success_markers = ['odm_orthophoto',
'odm_georeferencing', 'odm_texturing']
return all(os.path.exists(os.path.join(grid_dir, 'project', marker)) for marker in success_markers)
2024-12-20 21:30:44 +08:00
def run_odm_with_monitor(self, command: str, grid_dir: str, grid_idx: int) -> Tuple[bool, str]:
"""
运行ODM命令并监控进程
"""
attempt = 0
while attempt < self.max_retries:
try:
self.logger.info(f"网格 {grid_idx + 1}{attempt + 1} 次尝试执行ODM")
2024-12-22 16:11:55 +08:00
# 创建日志文件
log_file = os.path.join(grid_dir, f'odm_attempt_{attempt + 1}.log')
with open(log_file, 'w', encoding='utf-8') as f:
f.write(f"=== ODM处理日志 ===\n开始时间: {time.strftime('%Y-%m-%d %H:%M:%S')}\n\n")
# 启动ODM进程实时获取输出
2024-12-20 21:30:44 +08:00
process = subprocess.Popen(
command,
shell=True,
cwd=grid_dir,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
2024-12-22 16:11:55 +08:00
text=True,
bufsize=1, # 行缓冲
universal_newlines=True
2024-12-20 21:30:44 +08:00
)
2024-12-22 16:11:55 +08:00
2024-12-22 14:28:07 +08:00
self.logger.info("ODM进程已启动开始监控Docker容器")
2024-12-22 16:11:55 +08:00
2024-12-22 14:28:07 +08:00
# 等待进程启动
time.sleep(10)
2024-12-22 16:11:55 +08:00
# 实时读取输出并写入日志
def log_output(pipe, log_file, prefix=""):
with open(log_file, 'a', encoding='utf-8') as f:
for line in pipe:
f.write(f"{prefix}{line}")
f.flush() # 确保立即写入
# 创建线程读取输出
from threading import Thread
stdout_thread = Thread(target=log_output,
args=(process.stdout, log_file))
stderr_thread = Thread(target=log_output,
args=(process.stderr, log_file, "ERROR: "))
stdout_thread.daemon = True
stderr_thread.daemon = True
stdout_thread.start()
stderr_thread.start()
2024-12-22 14:28:07 +08:00
# 监控Docker容器
2024-12-20 21:30:44 +08:00
while True:
2024-12-22 14:28:07 +08:00
if not self._check_docker_container():
2024-12-22 16:11:55 +08:00
# Docker容器已结束
process.wait() # 等待进程完全结束
# 等待输出线程结束
stdout_thread.join(timeout=5)
stderr_thread.join(timeout=5)
# 记录结束时间
with open(log_file, 'a', encoding='utf-8') as f:
f.write(f"\n=== 处理结束 ===\n结束时间: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
2024-12-22 14:28:07 +08:00
# 检查是否成功完成
2024-12-20 21:30:44 +08:00
if self._check_success(grid_dir):
self.logger.info(f"网格 {grid_idx + 1} ODM处理成功")
return True, ""
else:
2024-12-22 14:28:07 +08:00
self.logger.warning(
f"网格 {grid_idx + 1}{attempt + 1} 次尝试失败")
2024-12-20 21:30:44 +08:00
break
2024-12-22 16:11:55 +08:00
2024-12-20 21:30:44 +08:00
time.sleep(self.check_interval)
2024-12-22 16:11:55 +08:00
2024-12-20 21:30:44 +08:00
# 如果不是最后一次尝试,等待后重试
if attempt < self.max_retries - 1:
wait_time = (attempt + 1) * 30
self.logger.info(f"等待 {wait_time} 秒后重试...")
time.sleep(wait_time)
2024-12-22 16:11:55 +08:00
2024-12-20 21:30:44 +08:00
attempt += 1
2024-12-22 16:11:55 +08:00
2024-12-20 21:30:44 +08:00
except Exception as e:
error_msg = f"监控进程发生异常: {str(e)}"
self.logger.error(error_msg)
return False, error_msg
error_msg = f"网格 {grid_idx + 1}{self.max_retries} 次尝试后仍然失败,需要人工查看"
self.logger.error(error_msg)
2024-12-22 14:28:07 +08:00
return False, error_msg