import os from PIL import Image import piexif import logging import pandas as pd from datetime import datetime class GPSExtractor: """从图像文件提取GPS坐标和拍摄日期""" def __init__(self, image_dir): self.image_dir = image_dir self.logger = logging.getLogger('UAV_Preprocess.GPSExtractor') @staticmethod def _dms_to_decimal(dms): """将DMS格式转换为十进制度""" return dms[0][0] / dms[0][1] + (dms[1][0] / dms[1][1]) / 60 + (dms[2][0] / dms[2][1]) / 3600 @staticmethod def _parse_datetime(datetime_str): """解析EXIF中的日期时间字符串""" try: # EXIF日期格式通常为 'YYYY:MM:DD HH:MM:SS' return datetime.strptime(datetime_str.decode(), '%Y:%m:%d %H:%M:%S') except Exception: return None def get_gps_and_date(self, image_path): """提取单张图片的GPS坐标和拍摄日期""" try: image = Image.open(image_path) exif_data = piexif.load(image.info['exif']) # 提取GPS信息 gps_info = exif_data.get("GPS", {}) lat = lon = None if gps_info: lat = self._dms_to_decimal(gps_info.get(2, [])) lon = self._dms_to_decimal(gps_info.get(4, [])) self.logger.debug(f"成功提取图片GPS坐标: {image_path} - 纬度: {lat}, 经度: {lon}") # 提取拍摄日期 date_info = None if "Exif" in exif_data: # 优先使用DateTimeOriginal date_str = exif_data["Exif"].get(36867) # DateTimeOriginal if not date_str: # 备选DateTime date_str = exif_data["Exif"].get(36868) # DateTimeDigitized if not date_str: # 最后使用基本DateTime date_str = exif_data["0th"].get(306) # DateTime if date_str: date_info = self._parse_datetime(date_str) self.logger.debug(f"成功提取图片拍摄日期: {image_path} - {date_info}") if not gps_info: self.logger.warning(f"图片无GPS信息: {image_path}") if not date_info: self.logger.warning(f"图片无拍摄日期信息: {image_path}") return lat, lon, date_info except Exception as e: self.logger.error(f"提取图片信息时发生错误: {image_path} - {str(e)}") return None, None, None def extract_all_gps(self): """提取所有图片的GPS坐标和拍摄日期""" self.logger.info(f"开始从目录提取GPS坐标和拍摄日期: {self.image_dir}") gps_data = [] total_images = 0 successful_extractions = 0 for image_file in os.listdir(self.image_dir): total_images += 1 image_path = os.path.join(self.image_dir, image_file) lat, lon, date = self.get_gps_and_date(image_path) if lat and lon: # 仍然以GPS信息作为主要判断依据 successful_extractions += 1 gps_data.append({ 'file': image_file, 'lat': lat, 'lon': lon, 'date': date }) self.logger.info(f"GPS坐标和拍摄日期提取完成 - 总图片数: {total_images}, 成功提取: {successful_extractions}, 失败: {total_images - successful_extractions}") return pd.DataFrame(gps_data)