From 9e30a58fa93cfa8b187d0d18692ab1e91e8182f4 Mon Sep 17 00:00:00 2001 From: "long.ao" Date: Wed, 18 Dec 2024 20:50:39 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E7=BB=93=E6=9E=84=E4=BF=AE?= =?UTF-8?q?=E6=94=B9=E4=B8=BADataFrame=EF=BC=8C=E8=BF=87=E6=BB=A4=E7=AE=97?= =?UTF-8?q?=E6=B3=95=E6=9B=B4=E6=96=B0=EF=BC=8C=E5=A2=9E=E5=8A=A0run=20com?= =?UTF-8?q?mand=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- odm_preprocess.py | 94 ++++++++++++------ .../command_runner.cpython-312.pyc | Bin 0 -> 4686 bytes .../__pycache__/gps_extractor.cpython-312.pyc | Bin 3727 -> 5037 bytes .../__pycache__/gps_filter.cpython-312.pyc | Bin 8844 -> 8633 bytes .../__pycache__/grid_divider.cpython-312.pyc | Bin 4662 -> 5080 bytes preprocess/__pycache__/logger.cpython-312.pyc | Bin 1586 -> 1609 bytes .../__pycache__/time_filter.cpython-312.pyc | Bin 0 -> 3977 bytes preprocess/command_runner.py | 89 +++++++++++++++++ preprocess/gps_extractor.py | 72 ++++++++++---- preprocess/gps_filter.py | 46 ++++----- preprocess/grid_divider.py | 51 +++++----- preprocess/time_filter.py | 73 ++++++++++++++ 12 files changed, 331 insertions(+), 94 deletions(-) create mode 100644 preprocess/__pycache__/command_runner.cpython-312.pyc create mode 100644 preprocess/__pycache__/time_filter.cpython-312.pyc create mode 100644 preprocess/command_runner.py create mode 100644 preprocess/time_filter.py diff --git a/odm_preprocess.py b/odm_preprocess.py index bdc56c7..37faaf6 100644 --- a/odm_preprocess.py +++ b/odm_preprocess.py @@ -1,7 +1,9 @@ -from gps_extractor import GPSExtractor -from gps_filter import GPSFilter -from grid_divider import GridDivider -from logger import setup_logger +from preprocess.gps_extractor import GPSExtractor +from preprocess.time_filter import TimeFilter +from preprocess.gps_filter import GPSFilter +from preprocess.grid_divider import GridDivider +from preprocess.logger import setup_logger +from preprocess.command_runner import CommandRunner import os import pandas as pd import shutil @@ -9,6 +11,8 @@ import matplotlib.pyplot as plt from typing import List, Dict, Optional from dataclasses import dataclass from tqdm import tqdm +import subprocess +from concurrent.futures import ThreadPoolExecutor @dataclass @@ -21,9 +25,11 @@ class PreprocessConfig: filter_distance_threshold: float = 0.001 filter_min_neighbors: int = 6 grid_overlap: float = 0.05 + grid_size: float = 250 enable_filter: bool = True enable_grid_division: bool = True enable_visualization: bool = True + enable_copy_images: bool = True class ImagePreprocessor: @@ -31,16 +37,26 @@ class ImagePreprocessor: self.config = config self.logger = setup_logger(config.output_dir) self.gps_points = [] + self.command_runner = CommandRunner(config.output_dir) - def extract_gps(self) -> List[Dict]: + def extract_gps(self) -> pd.DataFrame: """提取GPS数据""" self.logger.info("开始提取GPS数据") extractor = GPSExtractor(self.config.image_dir) self.gps_points = extractor.extract_all_gps() self.logger.info(f"成功提取 {len(self.gps_points)} 个GPS点") return self.gps_points + + def time_filter(self) -> pd.DataFrame: + """时间过滤""" + self.logger.info("开始时间过滤") + time_filter = TimeFilter(self.config.output_dir) + self.gps_points = time_filter.filter_by_date(self.gps_points) + self.logger.info(f"时间过滤后剩余 {len(self.gps_points)} 个GPS点") + return self.gps_points - def filter_points(self) -> List[Dict]: + # TODO 过滤密集点算法需要改进 + def filter_points(self) -> pd.DataFrame: """过滤GPS点""" if not self.config.enable_filter: return self.gps_points @@ -67,29 +83,30 @@ class ImagePreprocessor: self.logger.info(f"密集点过滤后剩余 {len(self.gps_points)} 个GPS点") return self.gps_points - def divide_grids(self) -> Dict[int, List[Dict]]: + def divide_grids(self) -> Dict[int, pd.DataFrame]: """划分网格""" if not self.config.enable_grid_division: return {0: self.gps_points} # 不划分网格时,所有点放在一个网格中 self.logger.info(f"开始划分网格 (重叠率: {self.config.grid_overlap})") grid_divider = GridDivider(overlap=self.config.grid_overlap) - grids = grid_divider.divide_grids(self.gps_points) + grids = grid_divider.divide_grids(self.gps_points, grid_size=self.config.grid_size) grid_points = grid_divider.assign_to_grids(self.gps_points, grids) self.logger.info(f"成功划分为 {len(grid_points)} 个网格") return grid_points - def copy_images(self, grid_points: Dict[int, List[Dict]]): + def copy_images(self, grid_points: Dict[int, pd.DataFrame]): """复制图像到目标文件夹""" + if not self.config.enable_copy_images: + return + self.logger.info("开始复制图像文件") - os.makedirs(self.config.output_dir, exist_ok=True) for grid_idx, points in grid_points.items(): if self.config.enable_grid_division: - output_dir = os.path.join(self.config.output_dir, f'grid_{ - grid_idx + 1}', 'images') + output_dir = os.path.join(self.config.output_dir, f'grid_{grid_idx + 1}', 'project', 'images') else: - output_dir = os.path.join(self.config.output_dir, 'images') + output_dir = os.path.join(self.config.output_dir, 'project', 'images') os.makedirs(output_dir, exist_ok=True) @@ -106,38 +123,56 @@ class ImagePreprocessor: self.logger.info("开始生成可视化结果") extractor = GPSExtractor(self.config.image_dir) - original_points = extractor.extract_all_gps() + original_points_df = extractor.extract_all_gps() + # 读取被过滤的图片列表 with open(os.path.join(self.config.output_dir, 'del_imgs.txt'), "r", encoding="utf-8") as file: - filtered_file = [line.strip() for line in file] + filtered_files = [line.strip() for line in file if line.strip()] - # 绘制散点图 - plt.figure(figsize=(10, 8)) - plt.scatter([p['lon'] for p in original_points], - [p['lat'] for p in original_points], - color='blue', label="Original Points", alpha=0.6) - plt.scatter([p['lon'] for p in original_points if p['file'] in filtered_file], - [p['lat'] - for p in original_points if p['file'] in filtered_file], - color="red", label="Filtered Points", alpha=0.6) + # 创建一个新的图形 + plt.figure(figsize=(20, 16)) + + # 绘制所有原始点 + plt.scatter(original_points_df['lon'], + original_points_df['lat'], + color='blue', + label="Original Points", + alpha=0.6) + + # 绘制被过滤的点 + filtered_points_df = original_points_df[original_points_df['file'].isin(filtered_files)] + plt.scatter(filtered_points_df['lon'], + filtered_points_df['lat'], + color="red", + label="Filtered Points", + alpha=0.6) + + # 设置图形属性 plt.title("GPS Coordinates of Images", fontsize=14) plt.xlabel("Longitude", fontsize=12) plt.ylabel("Latitude", fontsize=12) plt.grid(True) plt.legend() + + # 保存图形 plt.savefig(os.path.join(self.config.output_dir, 'filter_GPS.png')) plt.close() self.logger.info("预处理结果图已保存") def process(self): """执行完整的预处理流程""" - try: + try: self.extract_gps() + self.time_filter() self.filter_points() grid_points = self.divide_grids() self.copy_images(grid_points) self.visualize_results() self.logger.info("预处理任务完成") + self.command_runner.run_grid_commands( + grid_points, + self.config.enable_grid_division + ) except Exception as e: self.logger.error(f"处理过程中发生错误: {str(e)}", exc_info=True) raise @@ -146,16 +181,17 @@ class ImagePreprocessor: if __name__ == '__main__': # 创建配置 config = PreprocessConfig( - image_dir=r'C:\datasets\1815\output\grid_5\images', - output_dir=r'C:\datasets\1815\output\grid_5', + image_dir=r'C:\datasets\1815\images', + output_dir=r'C:\datasets\1815\output', filter_grid_size=0.001, filter_dense_distance_threshold=10, filter_distance_threshold=0.001, filter_min_neighbors=6, grid_overlap=0.05, - enable_filter=False, + enable_filter=True, enable_grid_division=True, - enable_visualization=False + enable_visualization=True, + enable_copy_images=True ) # 创建处理器并执行 diff --git a/preprocess/__pycache__/command_runner.cpython-312.pyc b/preprocess/__pycache__/command_runner.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2336853d073743bdd0aca74b75fa75eb20ffc52a GIT binary patch literal 4686 zcmbtYe{d639)G*p&5t&vX-U%-DQp3yL0f+?DrP*TSP6GPL4g|x(HOGZl9DFz?S|4$ ziX4MdygJYm42X>72G4T!Xq|JkA}HQC!i@LF_FAvK#hY=)Zd(4hxhmXnga6$3?QYVv zfTP~a^!@z4@Atj;{q@;?QBaVFpwyn()~MJK`i2ZLg)ur;>!6cDJmRSc^64$*qqH&Y zqoJiEre?;+Q2H9{Gt`&Sz~Z@Gjvi& zKtuv?r+g-!7MKPa_%S|~&*K@vELa3xG33OF$j=+g(@o;cY#e!eT+eNH{KZ~V|pYS;O(k4}xhzHj{9 zkCePvLW(D(03XI7<2IT!6V&AI>Po0mC`lzzKwBV{L{_xYlzGR@1hhQWOX)+{lcj8> z-KInY=JPnCs2wNNw|}d4zNQ~YKd5@FCrgm+)Z#|blO2)mQ>XTD6Cb~-?ms(z{ET}3 z$ixSI>c{)Cq-}FI)&4rL9t&|C3kjlFGwprq-3&Ivi4;pD*4P-1HY&D8L0SWS0V`%Q zCtx?NFrpA?&aP>T9a1dr7o`Lrj``Qstqp)ce>^MK zAJX*_z?x!e;;o7`5C})ZQXtUgns%s~%sjDOk!Wxanw*91xU+-lEd0t9@*8oO55!~Ps3a;*AsXBk5dzvAAKn=j!?9>6s|lbdyfy9CfTlpZ zv{;-(O@`J8!%UgdC52jPpvkongQzKE-I-0$@}^#c8wIx0WXV}Fy1hnQhn{gF)NZn& zTc04A!>QB7z$q3Y$(rG}2;rse%x%t3M*h7!-`3iu$#q1ySR{44zkCpyxLMy zZT@tpyNLj75{ZYQ+JnAzAjhInyJN&&F=($i8XUHBqxRyjt@eZY2l7X(WrNl-x%`n~ zt7p=T$`_54RSlL^{k?2S$Er!1DR7NCoFk6PK}ThG)t8RDzhzPR-T%F2M$U@w5Us7I z^1Jd43UAM<{^w7x>3K|kWaIFr&GHuC(59{OM!&q^nPJDXqmGjACt28ZLo9=XeaaNo zmeHS<*;Y8&Pb+Iy6taIQWTB7ez~Z!6Zi5NU+m}PMP9YnTU_viH)}O({NTi%mcq&Qp zwC+tO=u9?jhisOl!M~UjrgjtT%zEq8_NH6;CYv-7uOxeOeQ?T4n`TXM$n?OUElFA; zde%x4v4$DthgW&aJJw#~b`VRLrr9&L!!Q%A{NM@gY`Yn#h=t53ylqDn*yD>o;m&ze zFUaSg$cc5g;ZZt6Bn)|bZ#LFS1R`kVXXu#b3o<(7XYqyaIC_no@gQoqBrQ$EmuHkD zdmK$k=ZnZM!viu#(QR%~F@p^Z`P`Q%{t+*6D;~3_&hmf0gfnxCAN>;MA9@LCe&)t} zNfIc*TMV7;r52+5kw~@B6xxC)__Y@4nP%pE=fcdqL|}ndwESoNnXl}UDl>eN*35aP z`yS-KqITrWw{e6*}>Dnuqey` zg4yig;nef5PD7Gfib<#5PrrN|@S%EcuR1W0<)yG%KETA++e-OZXor9~$O2ro6y~a{ zaWhxFlWPll?&X70P!uGQi^YW~k7H4xIT*h;#sfh$fV?IlBvr?Q(spi&(wGK<1|cXV zuu%PcA{Yrvtz0D5B8U=Mh~bb>-53`WB%~Q{sNr%Cmmsl>Q{U*(A+3ko=w=mG+%7~S ziYe5>D`rvR0RU?)q(p_0gk6%BqcQnxNRT^mxlfVqIHbP(rk0EKX&o|8?EiB*^_Kcc zD*f92v5($wD}-AG#6(C#$^d^AD%i#*q=xEc@KIwGqM;ZMAi4f#{;KxW31UrG4j-H7 z>l^#zWcoLKx0X^sq-dGE24o~i7|lJa&I?|lWaceEiNGB&Ly)*t}so2c{OmZ zK?h8BDS2XIn}PThc0Ey?!mR)0`X`lqlK&IYa40AVN*)$KIpkJ?!t%nlM5DrJv}_9w zO9CcI7L$03Nlp=u$Xd}52`0C0 zn=>7Q+uWZyRsA^`k57ut%)O3PC%T`j3(SF*eA?R6*D^(O{4U!p$r57f&Kt{y5~ z)3Iu_?B0HBPunCyo~<<0rGC1@`?cM1aP@)JBlfw2_PIxQ_cZow>5p7kc43KJv3A&A z4?v`}a>RAlpzE$tN9m|*&Zu+lSEZGy7rI_Bn$eQ-YXz3lSsmUMTilySztt z9^F3dth#POE7Q1;%sa&IO2?2g%9 zo9VL}E9pO6>xSlYZnn3?^!ZX3^e@_2Z?Wa#EEDM$Yjp|hooBf?$3kW*ZL2Cw7njtm zDl=UwW1$cKjM(TdAfKAiV6zYiD0zWEbBs^Gw_yNqcYuT?W5p5(KoATBG+W2yy9kry zubAMAQlaC#VylBdxDo>&h1nL1MetlgL+to8!X$|4hE0$+CKdS-5Nn~@hyKA-O)^$? z71h1;DkAN-PABW?+1S6ZAAgX1Hz_~x#9(#ZB!WrV|J+s5PTo_^7I$yBhM>{86P|7> zz6%;m03~)qrI@AGIK(9VtmZiU>mw*)4_Vd@@(a?-9!{}7s^9)Gyc`z5yO791HEE(K e>N2ujMzb!XqJN^&%V_p>W<5m}e}_n|arht9vg+0V literal 0 HcmV?d00001 diff --git a/preprocess/__pycache__/gps_extractor.cpython-312.pyc b/preprocess/__pycache__/gps_extractor.cpython-312.pyc index 0a9f3f8056d09938896b4937e08a58f62b42c544..bd6097bc705ca9ca608fbda10e0256f8512df28b 100644 GIT binary patch delta 2594 zcmaJ@Yfu~46~3!|Xcw&zSP}>%EXioGg>8(1U`nYS8}qVb48|`Ah03fTqJY$0sXRuM z*5esN;|8glsYo2xcsjK`bv+?zr{f=;HZz%IoXL-_Mx+vpGvjnX4*z7k50CSw_X^~} zWZFB@IrrXkzH`r>J?Gq=ExNli`*W=}3&8RDYZnG|x^T7$YYXpm_V9qmhR7}oa(qZV ztnN}{Kn0orPh0{#c}ImH0Cdsgn3GyV(^e;@kS)Wm0XLk`yvy(QdxqV8NR$~HAn^1^ z_^qAgZW7tLvmxwK^W-IHkEbAI#??+*VVl}I8piyBtIzKh0(-+>*mbMFypnwL7s>FY zWaPKW_pXQk{b2u7Zn0&+s3MZe%f@l+_@J=N#s-3rq% z*5`(M-VqpYHWTk&s3@XmunAfO)b;vYg6Qte$R{kDgIcylYJ?E68j_!a6h$zm6}>sG zcSQA$`R+JZ6Xj}RT&-MJdNQVOli9Xs4|Jv{0Aox``ur3Q`E!wl*o(y_;3iXne}Gls z(g%2+Y6yqXn`3!=44Wh;sURWrOSg!_(oLcaBc(5hLzPqzy9#)9kc5;DQdhw>>dwZC zz+nDOs(B`e{}>4VAkH%d@Qgu?bb~bHWt1lAAT1g)Lg^4;%-4KN#v&k+?bmLW2^B#w zsE490BS^o{QcG*Z?#k@6le(Z9x<_PW7zh&Lc4#4c3mh&7jnZL4zc&ZruBkR5)c+sR zkozqJ8Ki>hemtFSd6|=7x>Zdw|KscebavCWb)Ws_3?|5A{CN2(I#HOJpD^S4bv}x{ zNMS=@uk;ORFWORj3gI5}^eZ^br&d8PIjNRCo!WuP0|itFymaz516&GnY5la;0CNilFv}Mr3}o(#l_NB!BjPtu0WD z>}|5NE`SxaIvalU_Vu;5-}(A|sOQ96TU(vk(w`}=62(o>K%$Vm`$Awqu%Nkm)Fq5~ zMh1k!w8Sk4UO~ZO6*;p6Rx+xqhZs~ly`GVN?|NH9C8l6*5p_-5i?LpAIRkEg?|@J2 zb&c@pnwuzksou7Vw?iA^Js2QZa0JIVq_@NGyNfbaTRFky*=h zeW-XzmoI&#wn+a_H%vjjf}-h`gw8lu5Y_E?Xm-q>jhgqw&E-*Z`GPTKJ`kdp@~sc8 zyKd)2t>tlRRn%Iwa3*H`Zm4-<-x#&-i(4zB*2;zYn6)O<{FUCEusPzks;I4M;e5031L2qx{P3#OLzmPC#rq6ur_ zImOYOV!5RLqhlY{$&R+goc4$Myg7WXQ`VQrY{|0~)IC&;Bf;j(9-lcLXN#h2k!(xr z-xn7<&dTS`$2vOYQ(f}**B9CD2m0b~QX~XC7fmp17aJ-Ys=+7KOk*Yf$)VE5GW^pr zaywkS)WR94nQq~`;1v;(yjgps$FvW-EL~teHbTK9HK~T(gA%bTgF!WBn5yE zYg9qz#hMUnQne#?O4XxE9RR?k8dW_;%Tcu3t{OwEa9s6I0GJ8Y16}THa3(m{9Mf?! z%?apBpw*crUDTGHK#g5tMZe4M=^J+Y-|+H6x|5*pA)ts0jffZ!LAZ1y5YRV4nxJrs zuR?MOED;B!XIjftiU!oS$0~|6J;mWHg=SB3c9M>#r?5P-WZeof0AJd7AXD(Rme&N7 z#exG7WKyAgu*JAU;Z?MQz7f`seAx|!t{e99fg$%np%tp2D^)Ds4Jo{?F>DzyUjpMY Z&@O}QWuX5O6#a)fjbSBE0HSF({tN30>zx1q delta 1457 zcmZ8hZ%kWN6uRod8zY+?6F1#l!jRW>MWE!qRthQ3 zn3=I;2{d<+&52`v!Yn$YiJ2da==`w6FONV%_n5kvfbLrd8qG|m_jY5O-sJwyIq&|? zIWOm&dq3Gc?sxv6C^kTry7XG)Ybot4giYzq+I9xC0i8VsbnXg6j{!Ks4?~T=L)DN5 zExx_KI}+}p=K6X_1Z#EaPd5j3?i4s8>O4VdSkOev(bmzqD;Y=KJ@FVy1TYUv@iXxA zVv+b8)HfKsSJl&a)QBhaXsqt&!JUV&k9h&wzIcUkKfdQ+fR!CN^Me<&0{78}A&j06w9AyilJDR>d+LW(CSri6=NO1J`P z8~`9l@hRar(+Tip$qQ9{Sy0O)%AaB$bB5(I5cfRr0H7S82K=q)gI>y3ZMo?7xNrSD zCE}kYU$J9Ud`ldb^_(1nLm218I&NHJ8oRI#{lGJ4!%Nu5w0J|MT_pJd-wjR}Pk zK@w_Zd}4Y0J#x4_HL)^%dS&v<7#CoE%M$P9L0Up7+*{?YZe9#j$V1 zAK7=E%y^5=w@f4_T({kxNqgG9=q{UcmzkRyuQgt)Hp|-<+o`PIE&` z0r>8@0Kw}8a;wT-FR5yEvfn#dzE#C8#mn4>xKgl%ZIPUr|% z(>oYJZlY?e#T}3Kg;6CjkVYV}iNK?d;(uJtiapmg z`YL2@+iUvvEl4kB^1Wj=+_j-lo-b-x1(LiQ;*kwaCyPriHCS|_O0>}R4>7WFc}ll%S#lX3Y>$%p7u5YlAa zmsfLAp`~#c@$PuEr#~D&5z|rb9?%M+=PA%p&AH^}8u8Smkb8qD?G(7lRjzUj-v+Z1 z5c2La3?GJTEK##Lbyafnfwl5HzUV<5>a7W=hKOSp+tI!c(RTqqs6&>85H11vN07e+lqKN&Q+OH{tpWnMWd8yca(EyB diff --git a/preprocess/__pycache__/gps_filter.cpython-312.pyc b/preprocess/__pycache__/gps_filter.cpython-312.pyc index e1c83c9dd5cdab3681cfb7133eb0d6a5e0f9a613..7740c089f46cb8c0709bfc6798d5b6bf032adf31 100644 GIT binary patch delta 2893 zcmah~U2Idy6`t#BU;keJ$3MAF949!z2}xwK#w>;;5IX@9b{7ccXH8v`>jVeKHupLS z(CdXts0~sol1`%CqdN1ujEJdED68@5D1pABb(y{jHqg5;t=XgR``KIq0LIOB$7zp zRFK%K3XUw5l?h;6Djw?%7?;DFgy|qDkgW8&l$8lmv84p5Kwd!ZiX*uoTbrfCRv}P= zY<*UFgT6&nj1nLYvT}hKSE5ta>GlB-Mt6}_WR+-lHS|hN0cFJxL0LJjAQ4DGc1u=S zpH&GeK|V!rvN0gY1=UU@#;xcWjn^BLtT;mJ#rqJH@VU~&3`)U&_3u7o-jdN(y%yxv z!(Ioh<_1|EiAPrp@^J$?y1Pe})r^5GoiVMi)39{)VT0hLui{!5Jx*4e)n)Z!&yY1{ zP3Rys_^35HK`$5u)2+2@&zc2#Ty(;!6%2xT^tHWb{m20t-dCCHS?y);OWnq?1X@t9 z-2{3;_zBbE(+etec*AQ>t-k=08*j)qR?p5M!|KDpV}hTk4?wwQgeb!^nhOa3tnnFo zk$b!X!6lG0E9!_JBFq}$&~w*hHsnUQ(`jzyHyb`j9tYC|ylnnG5rFU3^vcSFiu1vr*BrR05ucmuM!Rr_Fzi84Cex`j z*NcZiJd~!=u_UL)UD-5;VgiK+A5U_-k;G#Xqk;=zZ5$6}BOB*4(d0-hk~z)A_|vHb z%MIZPbtV;IV~I$7VvOeovHm;ogyk*bAiQcBGN?!62^4KH76leB-q+tPLsUOf&$9}Yj*6aH}T?c=K?us6axw%+f7hV3th z2%t62ZCYrbZ(lOyrXSJ%W$zPf;~ZJi8y4jA@qtTZF9^ia-~Rs#&x4QUWurA8!UD4|rF1^-ZMuaRddlr$9?dVL;@YjJ&nT1HPRGlq3J0Z*zNTdSL(e9Eqs3YAze)LBh6 z?F-}?+*j2Q8VH#_&)O``?o}J8Pri{9M^RG5Ys}E( zaA*Vq@lG^3&Y;9?)>kc8xR<5z6}q*rxvi*bAj`Vfapxna zqKQOA%*)*ScuwT1GM1d2h;dQGsva!t#Sq32#BdS;vW|1Q$lkJXov#rXtG6o3)JQ7D zu{`{7{Z_{qQY#a24pn%-?Z*&Dz~f@k0Do6ML)slmEP!TK5MkG=rn9<%F9UmzEPKXTSxEcpV6c1qhSKw?w}n7SF=Q`+!vynE{0 zoU<1_fr2Nn;@S1s**&kGBj&fy?VdxKy>ZD~Fu8N|ipf_pHxwCV;Gx-H zGS@5&%nzV^PhA_#NehlG56$f*vu$B;ez0hEzh5xB^PAsZZdx9_+qPVHZz4ZDl4oOu z;nDozvHaNSd~^Jfd3;p~8hTb$fYx3exH2#ooIUW0#&p;J2ioYME6S# zjmI)8vB^9;m7mJy?H@c;T-=~Lv+I?j%61&!_sPbPNBSAj7V02BYv0|BnqPOwLVo!> z9?4(6`}p~T1E=2W`5KeWBjL`&O|B!G#iTusB-}X+ehfH!a+rD?RRtBi>Dt9OWeqR( z5O2wtmkGiwV=9VR$D@JYx?Iw-B$!)3+Ji4;4Th;?&6 cLCbM%;+tW#vDW+qm|ju+5`y^#U=+9b7s|BL+5i9m delta 3211 zcma)8du&tJ89(Q~{Jef%CysB7^B@U^JTR*RO_MYs!GV@AUI7Au%;Y-3G>Ow|lK{PL zsMJZR7L{aY5`hXW{cAyVS}LT;Xw$}Mso3_9on(#Y@&|uRnmTEcXVM`klXi|12h$>L zw)B1Hp7WjWb-(ZX-P>)Gy9|Hkc`d-dpMG^>pi!JQ*x~-^swRN~9YA8<2NHXeBF6w6 z*L-?u0UE3N8hXN!b5bz0t*5K?2s%$~hx%Ld)P5S8Zb9x_2%YFZs-vt-18@b^(Iwkq zJGe~uQ3IflI;jUBN-MMki?Bw_DZ&S+X=>8I;PWVb6B1ikib21nT{cFdKVfnn^k7e- zY?MQ3+Ew)Z6IE2LfU|0puvo`+t6K7@#44=BeZ)vAMU~}hMWwJ8aq!tEjl!*+sZA|*qs2D)d^==4j)T*#1*u&PQlS)9;cl|4qEVfqQ#7Mc zW(Rb@-Ljba=*G2gO5H&<>>neE3n0q-`&buntgWr7A7&S%B z*_9<~jS31tTXjN{HuO7PCohv$ zaKjYiKOYPuoxTq~MIrq~O#h+xT3hj9nDo*mFlV0g+z>9|p!sIKAw><3q*%W!`!Apn zZ$lsQoUnnF?VYwDR%XMT^HRM0vJLr$BEkeU7^?)3d zdWZZYDLyzf&>I}?5A{mHz6hcX2iXFArv!By+Iu$QRoI0+z64e{U=gm^r|q$VFC8uO ztuw7NfvdY`{d4TcXBNb|1xL%GxpjhjAUc0uf3<$La_;RV(UVZyCsb2~FWp2dhPP^+ z}xarV0G~~NrT1WJyO(1pHv{MEg`_((ahp z^Ps_ZXY~Htiwy@dfa1N#Zfcq({wh^;V8y6tCX-z#eLD^meC zNk$St55+C$7e$U+Ci8n#HJ(kDyxC+?ERr?bXzVBq!%3ZDS)EO|h%MwMEktbSpVmU% z^Cqw&qfi8`$SpX{C(hH|U<|HRj%5s>PT^0m5!r1m>{F(NPD=eSNbV8>)qFme|m z?U3%MnB2qlhouQkUjHamlh+n-FW~M&~aqimh?A{b7N9LYGv(k#yB9ySxdP zchR*y1Ijh0AnGk_eIvQCCIe_&*^D|Vy6289h~DJ3hM4EmE%D;IzZRFKRroBe1{Jmc zQwlvSluED*^{@Y%h83q^((SpkIexfjVQ0^^-RbPsj0OelEfvK0%3cTzmZ^@kgZu?KbDFCE>pT`@M+f1AI5VqwRzYu>n{ zd%}nb`qo}{B(mr~7yr>{d^8#_dUr`Zo-}TVt2aE!IEgclO+*VZ@GPSS;&B)z?e|%g z#MbhdcH|dk5fx#PgK>@%++^}GND$lHYsYv#>vJmcntowc_u*NsMVG{_dtdI?WgSi;o&ESQz0|{=H4how z=oSV;UsQ)bJ|2oS<$qvLCm1cDD4A6Ea}15&}VT2l}P6QY-H!>phsDubkV=LA*%8 zpgf3qn0($Kzrl(cH@wQdOLp#}cQ(|F|IKXuKS0G?IK!b06Wl}ZYq)X6rew8eg$Bj0 znbBoeeZp0r)SKhh3XBmYRo?(uyeSQ^(1bC|aJ!4n3F8Ca{GCqbG>(|f^T1v@Te)a= zX8^+o;WRS^r#xt@t8m;icjU&aOM*8k*sgR;cVN8hzq)5uy1H*k*pd|NSKgd{b6KcJ z2o>>7uf|GazT57Yc&9Ud&>uhDmpCZJ4+P?Y{&?lUUxYJh4bh#}0^V@Bw#GvD{fXxN@xuoHVzd96C+;{sVN5tqqd&UJ#?LPX zF2tpaX#n1VJ1NW@ZPW??nszETNJzH}&^tH~zKId3yirUz6A8rYZR% zYAfm5l*OANyd_^CWCtOnp=I)6D04ZYL3c`;#8S58kq>%V*UB&ySl6l`k#TF0y|k2- xN%5hk(jxcj%@?&jRW}ZWq;rFTX8Ffhj(pjN%h69t9p=Z}9tyfLfY7Yc{{k=V0Qvv` diff --git a/preprocess/__pycache__/grid_divider.cpython-312.pyc b/preprocess/__pycache__/grid_divider.cpython-312.pyc index e6b87941eaf9cf621cddac3854755a8fa5843c72..c8f1835a45e1d2d12baff2deb6745b9a63f109b6 100644 GIT binary patch delta 2864 zcma)8T}&HS7QT1>JY$dTF$MxSfPp~jVxWW&{@VNqR^w?ZoEJjAnHYs* zM3{hp0R2EDZvc^+Bd`wuVe&c*&=2wB*#I=zSae*B%JHEgtSO8<7Hl;j%`FfGVJLzy zA;K^zG9qyUhAGj8;=(X3Qa5ZM%v`4f?5A%<-~k)*kg>nt|J~7{?tzdTmE?iAJUX5j z=!?o%lH(Htaxx{x#|O^!of({v#x)nw18v$#qR&Sb5>6|V^)?3QvuhMEDy5PLNB)RdISh`Kep)a_rITF2sE`UhOd!b&6NQr> zvEEt%k(*W96FwC7e7X-h43)))k){-zNWV>qjKW&8?Ol2f=E6ddbQU&JYzi^y0H9zK z*(-ejNUoVt+Zik)yp$O^kh1dnLLf92bAt%wlR_r(d)uAv7ZLK+L&BuW1OuDxd zwHH$C1DC!)c-Iaax5M@cvbhxxskju!B)S8WD1rh;1no=8TbI+IO;rIMzf0sS>l3)j zZH?+tXvMAYPejUTaHJxP$VYkt=ib(+Ud1`$#aR>yibp9@yu(x>ZKzfAWw1p1lJGX! z71!H9bSQ2lc|{PN=q0)ok2SlMqWs!x&4TtP`bdW-xo7*7oQ06cyGY@X^eT2Fi{G^s zET}}BFM>dc_7mpRc^C5Bn7X@h`}W4ipKsj#pxZx5HXj&H;S2KZr@ssOAI@bhbqZG- z(nz>^AL5&o!WaJA&u@M+^T$_B7);>{d`X*OYSJdWloHVDo6Pja8@D$yZ*63L^m{2$PH_k|r%h60xX+ z_cfWw#6&b9nzR^wEfq7#(P$!2Ea45z5^iD~!R0}5*yLhTTpX0+lTqyrTYW2iH7+JE zo6O~CJa##0x)Z6fL5z?Ga|YqDA_vhvwuLK-QB?mA49Tha*+vF$DvxLv*z&Y*-%QBx zHLm(v3}4HVuXVm_A!M}muD10XZT(AaC#LsgY->LT^oRkT^6dVZ4!yi_Ha34%Z#uFh z99Out?coV~hW)0>Yjd5*1mjvHyu@eFgH<==F??pS5Z47O~|wKp4@ zY0I9PQRgo%^v<7NBz3;;5lL|NM|RNOxj-*+M(2s8gD3A@`0C)2bVfga;j2Rz-f7GJVwN+iTbF$Y^p{`J*|JRrlY`vr+dt{}gaT=~^zvZf`XKQ97 zck1UmjX;l4-LoKmG5Xo)y;qFGA)`7p&0Cm5{|6(K+~RNY+0oh43&DH5UKv^zF5VZ4 zHPU|0;RdC_O%fCZHY>DC_VUBZ;yJzdCG;2di^F;-rpGSptZwr2^Ffqd4#^!>Nh{>e5q{3Xd?%E{rpe2_^^^0<~Z|wR)8kb4))w zvQ3@&T+lR^e=H=0sDMM;F`{h}6}-w!{Y;_r&$P&@+*CxX;>)226VnSEO2UqJKy>0L zBrWoYo}7jBtd(mg)l7Xr0fj=mm*MM4m+D5nsmX&0PDpO|XT`1klV5k>mM#@x;nn`) ztV4A8ALk!lwm;?u$&ZbdNSJIKv4S*yRW`XOD%q$MLF+1Pu_ea`#rRNCs>UlC;l|{E zU=f2RL{yaOFlZkZ^;qB!9rQ7TsU$95GnpK} z$e81H(*ojzl)E+Ej!g1AqRC%^@6}_r`~9KWzM+4s{WTR(-h zv@hp^(E&n^JC*;YBYLSI)yB_KgqZ& ZZjunfuYv1tpzhz)ixB$$1+cU{{|Bk%z;plr delta 2498 zcma)7U2I!b59iwQ?)gLE$DLD}uRj3NS!5sskE4T#LxafRP9cN8 z$T1v32k~n-DBNb--J^_F*RmbmX`A}1U*}JugQ6h-m0lBrw!dE*!A%lCqYM@Z+zpOy zIUJ3}jj`h!4jvCP1oIR(sR+p8LGvICzXBszTuiJ)M2PU!=rM>_S@(37WQnoPdtl)K;F`{oEgN+S z>xM)_JSP|uk;>dgY|FA-=JK>sjRGGe5pe=8-%uu0_#B1E#I{1VL%arh1lJ|(L@9f# z#LoD1?Np4y$WmkAU0Fsg`T!dK5@I@j#daBNP9hQqQ6}IEC%~PcqmF8vdz{%50Dv?t zjX4;OGWd76Rl-_2u@NV+|Hd&DJ3Gb2@XM!Ew0nL%yINSwP1K|My}YYCq7j|Ai2KNj zJFDL7Iwx_QLxx72Ky{)mzm#6)HsV^A-DNJ*H>JKlSIk|DtX5wVyxr5aQi69?_d$tb z_*vYaSL}iw`hzs&@E4R)}(zCYJiMx3CHV=k)9gti9p1PU&{N|@0 z+&=T=mcXs6mlm&n7?;7&6}a`;X%KF+riinetw0am#a(gM#=}+kp|}ggo5|GTTjv*( zZ!ad_yfJ$+6Aez8mktyek| zEI@QKjDk1Ng6uf0{!RT__T^YHJoY9ypY|O z*)`)X@!0v;w>+=dOLpXKPKWb#?OAvJp+}Q)p{{AASc~77mm71<-7~(~H8a}m%hyik zp4)dVlJg&!lb>I3d6MG1yFT?Q^(`T{rg)M&=Dq%O5B=j3aymUUWnbM<@Ybi@QwP#7 z=j%HD&*Q+_LU7&X=8v0a<$Ul-rr3c~js=>tuNwG$P?z_tW6F^p&+aU=1hY@TH?7+I zEyC_CB?-B`v`yWkdg1UA5BZb=^mVmy+eEJMsX4j7pw}fG-!JuJql#K}8s4#adm=#N<0)(42Z90kelfBijtYl4GpE%8PK-egXSlgbu^wTl`@GsAAbI z)q+{XrX?HJqufdE6+cv_MC^=5@uWWj9W~iPRKQ60VI0ett}uMqu*Ig zuqx1 zhSf}LWn>K_Z9s~0*c>xn95YTvH4uN^y3n^JS5IOC7@}DiKlk-*dJ8mQ$Awoi!?_JZ zxyGGy@~#^lotOG&#EZ{f@96!iepAMsdNtiT70=agN~%B#{-&f}csP)gn-{!IOGs0+ z%pE!2hh>wT1n|)wlGd>D` X2Yv)?i}8Pv_MNa7W4J0t%Wgjb*iuWl diff --git a/preprocess/__pycache__/logger.cpython-312.pyc b/preprocess/__pycache__/logger.cpython-312.pyc index c45ca20deb68ae36079ee65ebdcca98a25530e21..5212144568af7860461616d16a136162e01af1df 100644 GIT binary patch delta 70 zcmdnQbCQSqG%qg~0}!0|OxnmjlSR$S*(xTqIJKxaCbKvvKQG25wK%&ZzaXZ#q%q^= CUk>X4 diff --git a/preprocess/__pycache__/time_filter.cpython-312.pyc b/preprocess/__pycache__/time_filter.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..99244d3c728d7d5c896dae16a373197b3c1824ed GIT binary patch literal 3977 zcmb6cZA?_z^}d-m-w#BX!5QRZjI20rBwcs4F0!bB)mpo#6*ntoeHnQV9GnmKeW;Kj zaV@$-WEI@nfuaQ)+s+nsCym)6l6BKRZPTQAQ?Rg4lWtOGkY6S1CMNr<=gyn=X0Vv7 zm*L)X?>*n=+xYu3Z4 z^OFIxnG6`34FuAmy@=8GB8D8-DIA)OU4+N5h$oF6oor^j0xS3etZczO!}3FC!Wn^^YgaQGLQQZ?(sSu;TMdo0b)%7mq zF*J5l;9%MpXy>biGeWoP$nJOPgB;t*g<4sjug*GS5$o$AWK+Nw6a|*|SY%y@m&s1A za6%?KLcXAE_J`W(HlLrBO#rp~g6*=UofUopFw4nCyvA}I_L7{BLrV)MIF>&V@-q;2 zl4t#GvNa?MouWW9K90vubGyIWSzps~gl9Rv#mDNHE@Id6+o3y)9y?1$O~a;9YM2^pj+ak6 zcSXqlyvN05v&B`3;woul{d92yEZH8HSH+2$@+}eap?U37phdE!6FeaBG}9&7T)?Jdb(WV3jc0%n#F#a_B6T-AX7R3ThlT(jWjH&sm@1@F^ynB%k6u`#u7@=% z8qoWGgjyh%6@ni#}Bhl{)lRbKfOi6Y}g~BAZQPHQz_!9oH zhDosA7B-BaWww3)T%^KApw;Xm5P^aOiNg91|M3D{^^BziF?s2zl&KbEN6mMpm#`y0 zCl^g>K7nPcuIFU7P@zfz&TbXXj!Xfw`c~n1CXcqF9&^|PaVmxwS;FRy^;t|63R{?> zZkVVN3DoZd+gHS8in3UY1^=nnI?kn)qRLpp`V!P*eF;Czx z2p6k-u%3s}dh{A(<7pj%jzX-#V)~5!C^}7e)`+!mEv3fpr9Qs%qPNlN&NK~3{_b4r z*Vh(CCKk`#pP#syoElSJ(S=|2<}ykC>CThS&%-mjH5s|EcxCjzQ~k=Do4VAO9JsV_ z|8}bHZ0cH{LacGufZbd>U!%>i658cl<4*RUPY#SLr1KAMq(1x%q&Ij4?;g$@U_n&! z!TH4-_Z74%{F+rIuk=^pqNFMri7sQSO@5lb@t+f#sS`lq903}MSP8k5oI0EQ{l%3^ zjFBfdGDgcJmMxY7`jam{NuK|0>S`RkK0k4LVQMlNAH=($7(C5JUOdjT0*4i1>a#PdBL3)gZV1F#7&sw+89s>00c7W3$}%1Q)|dC279m&%V!dp9h!f~jUcbolvL5Os=)7kjj=r`w*}?|J0LwwpIQ&cj zV}GbsHVPrXj~8Sz)X4@p{Bp{M)0|IWJvOcoW*mN2WNW&z!EAWha-3tmCutvZM%Lpl zT`WW01R|`;7Dk5U5DgU`E3h)r2ABa*PbrO928tEl27EKe6c3yo%gI+T=;K{9&aQ4( zPTjP83&)m!8xrm;S}I1Cyvw#e+pO7{FgqpJ&KdKrB_pB=hg`F+O$qqfHqB85m-qMW zpQXwYRGCy>J43xSS5zudrGV%tpS4#e?3J;1V?UDYmDBdE5zD+SKhkxvZs^cIZRHwm zMS`l3-1Re5!!lY=!tRM}n6|$bvCNsN98K14jr+bdCbsRJ-L^lmZU6MPcciV2e`Ten z_oZV@qKTbt@+F#l(@iI(!+yyXm@x-Io8pS0Qwhh0S;wmh$E(wh%@HeB(G{^k@fWlH ztNiu8#z_5uOJ3z1IvTUjdTJA%+Uc^|xzdUO%Us!p(PP8MV)co#AH<6jW!nd=a|QOO zZO}Gb;7$~{$7*7&u_Mx!_oSo8rT1E;b<9iwyBwn~zA0WRt*e_U*!fj~V<`_+ys=b> zD2HTo&pFqP<`3tOb&YjP&Mon631`i$b9chId)iqa*{6A_G(nXPb>CVa^Nm+ao;Rg8 z4@hMPXQ)Hp8c|{K022)i2F4D@fB10qj)@N*t*soWpR+rndk6Op)sIyU|M;Q3YDteu zU-`OVb<{Ly8X|v@_soO}YnLdrwsN$3xH?u6^GGFcMD{NeC{@%=@n7zk-g!W(J18A& zmdbuML$%Bml}l9l*UN(9dV9vaXKs~KGCQBG0mjnm9pCZzqwO!&&HEhszc~#1%1w

oZx`$;g%l^-AY(asMge@b!{T zZw`MWr8^ApuRzy}z9Kh%L+TB$FIjgOTru5e)Th)E0&t@6LFv8HZ!xHFz%(8chp$5p zXTNOh^adF(&%K3La6OU!!cm0m#8XYMY_3)Guj6*Z2CnOQTof(o34(ZpERRtBBUJQn RwDB2PPY|we5bo3D{{ttSHMsx) literal 0 HcmV?d00001 diff --git a/preprocess/command_runner.py b/preprocess/command_runner.py new file mode 100644 index 0000000..2b8c9b1 --- /dev/null +++ b/preprocess/command_runner.py @@ -0,0 +1,89 @@ +import os +import logging +import subprocess +from typing import Dict +import pandas as pd + +class CommandRunner: + """执行网格处理命令的类""" + + def __init__(self, output_dir: str): + """ + 初始化命令执行器 + + Args: + output_dir: 输出目录路径 + """ + self.output_dir = output_dir + self.logger = logging.getLogger('UAV_Preprocess.CommandRunner') + + def run_grid_commands(self, grid_points: Dict[int, pd.DataFrame], enable_grid_division: bool = True): + """ + 为每个网格顺序运行指定命令 + + Args: + grid_points: 网格点数据字典,键为网格索引,值为该网格的点数据 + enable_grid_division: 是否启用网格划分 + """ + if not enable_grid_division: + self._run_command(0) + return + + self.logger.info("开始执行网格处理命令") + + # 顺序执行每个网格的命令 + for grid_idx in grid_points.keys(): + try: + self._run_command(grid_idx) + except Exception as e: + self.logger.error(f"网格 {grid_idx + 1} 处理命令执行失败: {str(e)}") + raise # 如果一个网格失败,停止后续执行 + + def _run_command(self, grid_idx: int): + """ + 执行单个网格的命令 + + Args: + grid_idx: 网格索引 + + Raises: + Exception: 当命令执行失败时抛出异常 + """ + try: + # 确定网格目录和命令 + grid_dir = os.path.join(self.output_dir, f'grid_{grid_idx + 1}') + command = f"docker run -ti --rm -v {grid_dir}:/datasets opendronemap/odm --project-path /datasets project --feature-quality lowest --force-gps" + + self.logger.info(f"执行命令: {command} 在目录: {grid_dir}") + + # 在指定目录下执行命令 + process = subprocess.Popen( + command, + shell=True, + cwd=grid_dir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + # 获取命令输出 + stdout, stderr = process.communicate() + + # 检查命令执行结果 + if process.returncode == 0: + self.logger.info(f"网格 {grid_idx + 1} 命令执行成功") + self.logger.debug(f"命令输出至日志文件") + with open(os.path.join(grid_dir, 'odm_success.log'), 'a', encoding='utf-8') as f: + f.write(f"{stdout}") + else: + self.logger.error(f"网格 {grid_idx + 1} 命令执行失败") + self.logger.error(f"错误信息输出至日志文件") + with open(os.path.join(grid_dir, 'odm_error.log'), 'a', encoding='utf-8') as f: + f.write(f"{stdout}") + f.write(f"\n错误日志:\n") + f.write(f"{stderr}") + raise Exception(f"命令执行失败: {stderr}") + + except Exception as e: + self.logger.error(f"网格 {grid_idx + 1} 命令执行出错: {str(e)}") + raise \ No newline at end of file diff --git a/preprocess/gps_extractor.py b/preprocess/gps_extractor.py index 5b217db..9bb2e57 100644 --- a/preprocess/gps_extractor.py +++ b/preprocess/gps_extractor.py @@ -2,10 +2,12 @@ import os from PIL import Image import piexif import logging +import pandas as pd +from datetime import datetime class GPSExtractor: - """从图像文件提取GPS坐标""" + """从图像文件提取GPS坐标和拍摄日期""" def __init__(self, image_dir): self.image_dir = image_dir @@ -16,28 +18,60 @@ class GPSExtractor: """将DMS格式转换为十进制度""" return dms[0][0] / dms[0][1] + (dms[1][0] / dms[1][1]) / 60 + (dms[2][0] / dms[2][1]) / 3600 - def get_gps(self, image_path): - """提取单张图片的GPS坐标""" + @staticmethod + def _parse_datetime(datetime_str): + """解析EXIF中的日期时间字符串""" + try: + # EXIF日期格式通常为 'YYYY:MM:DD HH:MM:SS' + return datetime.strptime(datetime_str.decode(), '%Y:%m:%d %H:%M:%S') + except Exception: + return None + + def get_gps_and_date(self, image_path): + """提取单张图片的GPS坐标和拍摄日期""" try: image = Image.open(image_path) exif_data = piexif.load(image.info['exif']) + + # 提取GPS信息 gps_info = exif_data.get("GPS", {}) + lat = lon = None if gps_info: lat = self._dms_to_decimal(gps_info.get(2, [])) lon = self._dms_to_decimal(gps_info.get(4, [])) self.logger.debug(f"成功提取图片GPS坐标: {image_path} - 纬度: {lat}, 经度: {lon}") - return lat, lon - else: + + # 提取拍摄日期 + date_info = None + if "Exif" in exif_data: + # 优先使用DateTimeOriginal + date_str = exif_data["Exif"].get(36867) # DateTimeOriginal + if not date_str: + # 备选DateTime + date_str = exif_data["Exif"].get(36868) # DateTimeDigitized + if not date_str: + # 最后使用基本DateTime + date_str = exif_data["0th"].get(306) # DateTime + + if date_str: + date_info = self._parse_datetime(date_str) + self.logger.debug(f"成功提取图片拍摄日期: {image_path} - {date_info}") + + if not gps_info: self.logger.warning(f"图片无GPS信息: {image_path}") - return None, None + if not date_info: + self.logger.warning(f"图片无拍摄日期信息: {image_path}") + + return lat, lon, date_info + except Exception as e: - self.logger.error(f"提取GPS坐标时发生错误: {image_path} - {str(e)}") - return None, None + self.logger.error(f"提取图片信息时发生错误: {image_path} - {str(e)}") + return None, None, None def extract_all_gps(self): - """提取所有图片的GPS坐标""" - self.logger.info(f"开始从目录提取GPS坐标: {self.image_dir}") - gps_points = [] + """提取所有图片的GPS坐标和拍摄日期""" + self.logger.info(f"开始从目录提取GPS坐标和拍摄日期: {self.image_dir}") + gps_data = [] total_images = 0 successful_extractions = 0 @@ -45,11 +79,15 @@ class GPSExtractor: if image_file.lower().endswith('.jpg'): total_images += 1 image_path = os.path.join(self.image_dir, image_file) - lat, lon = self.get_gps(image_path) - if lat and lon: + lat, lon, date = self.get_gps_and_date(image_path) + if lat and lon: # 仍然以GPS信息作为主要判断依据 successful_extractions += 1 - gps_points.append( - {'file': image_file, 'lat': lat, 'lon': lon}) + gps_data.append({ + 'file': image_file, + 'lat': lat, + 'lon': lon, + 'date': date + }) - self.logger.info(f"GPS坐标提取完成 - 总图片数: {total_images}, 成功提取: {successful_extractions}, 失败: {total_images - successful_extractions}") - return gps_points + self.logger.info(f"GPS坐标和拍摄日期提取完成 - 总图片数: {total_images}, 成功提取: {successful_extractions}, 失败: {total_images - successful_extractions}") + return pd.DataFrame(gps_data) diff --git a/preprocess/gps_filter.py b/preprocess/gps_filter.py index 8c61088..2d7ef22 100644 --- a/preprocess/gps_filter.py +++ b/preprocess/gps_filter.py @@ -4,6 +4,7 @@ from itertools import combinations import numpy as np from scipy.spatial import KDTree import logging +import pandas as pd class GPSFilter: @@ -33,25 +34,23 @@ class GPSFilter: grid_y = int((lon - min_lon) // grid_size) return grid_x, grid_y - def _get_distances(self, points, grid_size): + def _get_distances(self, points_df, grid_size): """读取图片 GPS 坐标,计算点对之间的距离并排序""" # 确定经纬度范围 - coords = np.array([[p['lat'], p['lon']] for p in points]) - min_lat, min_lon = np.min(coords, axis=0) - max_lat, max_lon = np.max(coords, axis=0) + min_lat, max_lat = points_df['lat'].min(), points_df['lat'].max() + min_lon, max_lon = points_df['lon'].min(), points_df['lon'].max() self.logger.info( f"经纬度范围:纬度[{min_lat:.6f}, {max_lat:.6f}],纬度范围[{max_lat-min_lat:.6f}]," f"经度[{min_lon:.6f}, {max_lon:.6f}],经度范围[{max_lon-min_lon:.6f}]") # 分配到网格 grid_map = {} - for img_info_dict in points: + for _, row in points_df.iterrows(): grid = self._assign_to_grid( - img_info_dict['lat'], img_info_dict['lon'], grid_size, min_lat, min_lon) + row['lat'], row['lon'], grid_size, min_lat, min_lon) if grid not in grid_map: grid_map[grid] = [] - grid_map[grid].append( - (img_info_dict['file'], img_info_dict['lat'], img_info_dict['lon'])) + grid_map[grid].append((row['file'], row['lat'], row['lon'])) self.logger.info(f"图像点已分配到 {len(grid_map)} 个网格中") @@ -68,14 +67,13 @@ class GPSFilter: return sorted_distances - def filter_dense_points(self, points, grid_size=0.001, distance_threshold=13): + def filter_dense_points(self, points_df, grid_size=0.001, distance_threshold=13): """过滤密集点,根据提供的距离阈值""" self.logger.info(f"开始过滤密集点 (网格大小: {grid_size}, 距离阈值: {distance_threshold}米)") - # 获取每个网格中的图片的两两距离信息 - sorted_distances = self._get_distances(points, grid_size) - + sorted_distances = self._get_distances(points_df, grid_size) to_del_imgs = [] + """遍历每个网格,删除网格中距离小于阈值的点""" for grid, distances in sorted_distances.items(): grid_del_count = 0 @@ -119,28 +117,28 @@ class GPSFilter: for img in to_del_imgs: f.write(img+'\n') - filtered_points = [point for point in points if point['file'] not in to_del_imgs] - self.logger.info(f"密集点过滤完成,共删除 {len(to_del_imgs)} 个点,剩余 {len(filtered_points)} 个点") - return filtered_points + filtered_df = points_df[~points_df['file'].isin(to_del_imgs)] + self.logger.info(f"密集点过滤完成,共删除 {len(to_del_imgs)} 个点,剩余 {len(filtered_df)} 个点") + return filtered_df - def filter_isolated_points(self, points, threshold_distance=0.001, min_neighbors=6): + def filter_isolated_points(self, points_df, threshold_distance=0.001, min_neighbors=6): """过滤孤立点""" self.logger.info(f"开始过滤孤立点 (距离阈值: {threshold_distance}, 最小邻居数: {min_neighbors})") - coords = np.array([[p['lat'], p['lon']] for p in points]) + coords = points_df[['lat', 'lon']].values kdtree = KDTree(coords) neighbors_count = [len(kdtree.query_ball_point( coord, threshold_distance)) for coord in coords] isolated_points = [] with open(self.log_file, 'a', encoding='utf-8') as f: - for i, p in enumerate(points): + for i, (_, row) in enumerate(points_df.iterrows()): if neighbors_count[i] < min_neighbors: - isolated_points.append(p['file']) - f.write(p['file']+'\n') - self.logger.debug(f"删除孤立点: {p['file']} (邻居数: {neighbors_count[i]})") + isolated_points.append(row['file']) + f.write(row['file']+'\n') + self.logger.debug(f"删除孤立点: {row['file']} (邻居数: {neighbors_count[i]})") f.write('\n') - filtered_points = [p for i, p in enumerate(points) if neighbors_count[i] >= min_neighbors] - self.logger.info(f"孤立点过滤完成,共删除 {len(isolated_points)} 个点,剩余 {len(filtered_points)} 个点") - return filtered_points + filtered_df = points_df[~points_df['file'].isin(isolated_points)] + self.logger.info(f"孤立点过滤完成,共删除 {len(isolated_points)} 个点,剩余 {len(filtered_df)} 个点") + return filtered_df diff --git a/preprocess/grid_divider.py b/preprocess/grid_divider.py index 733ce8c..c7bdb51 100644 --- a/preprocess/grid_divider.py +++ b/preprocess/grid_divider.py @@ -1,4 +1,5 @@ import logging +from geopy.distance import geodesic class GridDivider: """划分九宫格,并将图片分配到对应网格""" @@ -8,58 +9,60 @@ class GridDivider: self.logger = logging.getLogger('UAV_Preprocess.GridDivider') self.logger.info(f"初始化网格划分器,重叠率: {overlap}") - def divide_grids(self, points): + def divide_grids(self, points_df, grid_size=250): """计算边界框并划分九宫格""" self.logger.info("开始划分九宫格") - lats = [p['lat'] for p in points] - lons = [p['lon'] for p in points] - min_lat, max_lat = min(lats), max(lats) - min_lon, max_lon = min(lons), max(lons) + min_lat, max_lat = points_df['lat'].min(), points_df['lat'].max() + min_lon, max_lon = points_df['lon'].min(), points_df['lon'].max() + # 计算区域的实际距离(米) + width = geodesic((min_lat, min_lon), (min_lat, max_lon)).meters + height = geodesic((min_lat, min_lon), (max_lat, min_lon)).meters + self.logger.info( - f"区域边界: 纬度[{min_lat:.6f}, {max_lat:.6f}], " - f"经度[{min_lon:.6f}, {max_lon:.6f}]" + f"区域宽度: {width:.2f}米, 高度: {height:.2f}米" ) - lat_step = (max_lat - min_lat) / 3 - lon_step = (max_lon - min_lon) / 3 + # 计算需要划分的网格数量 + num_grids_width = int(width / grid_size) if int(width / grid_size) > 0 else 1 + num_grids_height = int(height / grid_size) if int(height / grid_size) > 0 else 1 - self.logger.debug(f"网格步长: 纬度{lat_step:.6f}, 经度{lon_step:.6f}") + # 计算每个网格对应的经纬度步长 + lat_step = (max_lat - min_lat) / num_grids_height + lon_step = (max_lon - min_lon) / num_grids_width + grids = [] - for i in range(3): - for j in range(3): + for i in range(num_grids_height): + for j in range(num_grids_width): grid_min_lat = min_lat + i * lat_step - self.overlap * lat_step - grid_max_lat = min_lat + \ - (i + 1) * lat_step + self.overlap * lat_step + grid_max_lat = min_lat + (i + 1) * lat_step + self.overlap * lat_step grid_min_lon = min_lon + j * lon_step - self.overlap * lon_step - grid_max_lon = min_lon + \ - (j + 1) * lon_step + self.overlap * lon_step - grids.append((grid_min_lat, grid_max_lat, - grid_min_lon, grid_max_lon)) + grid_max_lon = min_lon + (j + 1) * lon_step + self.overlap * lon_step + grids.append((grid_min_lat, grid_max_lat, grid_min_lon, grid_max_lon)) self.logger.debug( f"网格[{i},{j}]: 纬度[{grid_min_lat:.6f}, {grid_max_lat:.6f}], " f"经度[{grid_min_lon:.6f}, {grid_max_lon:.6f}]" ) - self.logger.info(f"成功划分为 {len(grids)} 个网格") + self.logger.info(f"成功划分为 {len(grids)} 个网格 ({num_grids_width}x{num_grids_height})") return grids - def assign_to_grids(self, points, grids): + def assign_to_grids(self, points_df, grids): """将点分配到对应网格""" - self.logger.info(f"开始将 {len(points)} 个点分配到网格中") + self.logger.info(f"开始将 {len(points_df)} 个点分配到网格中") grid_points = {i: [] for i in range(len(grids))} points_assigned = 0 multiple_grid_points = 0 - for point in points: + for _, point in points_df.iterrows(): point_assigned = False for i, (min_lat, max_lat, min_lon, max_lon) in enumerate(grids): if min_lat <= point['lat'] <= max_lat and min_lon <= point['lon'] <= max_lon: - grid_points[i].append(point) + grid_points[i].append(point.to_dict()) if point_assigned: multiple_grid_points += 1 else: @@ -76,7 +79,7 @@ class GridDivider: self.logger.info(f"网格 {grid_idx} 包含 {len(points)} 个点") self.logger.info( - f"点分配完成: 总点数 {len(points)}, " + f"点分配完成: 总点数 {len(points_df)}, " f"成功分配 {points_assigned} 个点, " f"{multiple_grid_points} 个点被分配到多个网格" ) diff --git a/preprocess/time_filter.py b/preprocess/time_filter.py new file mode 100644 index 0000000..ab9d808 --- /dev/null +++ b/preprocess/time_filter.py @@ -0,0 +1,73 @@ +import os +import logging +import pandas as pd +from datetime import datetime, timedelta + +class TimeFilter: + """基于拍摄时间过滤图片""" + + def __init__(self, output_dir): + self.log_file = os.path.join(output_dir, 'del_imgs.txt') + self.logger = logging.getLogger('UAV_Preprocess.TimeFilter') + self.time_threshold = timedelta(minutes=5) # 5分钟阈值 + + def filter_by_date(self, points_df: pd.DataFrame) -> pd.DataFrame: + """根据拍摄时间过滤图片 + + 如果相邻两张图片的拍摄时间差超过5分钟,过滤掉后续所有图片 + + Args: + points_df: 包含图片信息的DataFrame,必须包含'file'和'date'列 + + Returns: + 过滤后的DataFrame + """ + self.logger.info("开始基于拍摄时间进行过滤") + + # 确保date列存在且不为空 + if 'date' not in points_df.columns: + self.logger.error("输入数据中缺少date列") + return points_df + + # 删除date为空的行 + points_df = points_df.dropna(subset=['date']) + + if len(points_df) == 0: + self.logger.warning("没有有效的拍摄时间数据") + return points_df + + # 按时间排序 + points_df = points_df.sort_values('date') + self.logger.info(f"排序后的时间范围: {points_df['date'].min()} 到 {points_df['date'].max()}") + + # 计算时间差 + time_diffs = points_df['date'].diff() + + # 找到第一个时间差超过阈值的位置 + break_idx = None + for idx, time_diff in enumerate(time_diffs): + if time_diff and time_diff > self.time_threshold: + break_idx = idx + break_time = points_df.iloc[idx]['date'] + self.logger.info(f"在索引 {idx} 处发现时间断点,时间差为 {time_diff}") + self.logger.info(f"断点时间: {break_time}") + break + + # 如果找到断点,过滤掉后续图片 + if break_idx is not None: + to_delete = points_df.iloc[break_idx:]['file'].tolist() + self.logger.info(f"将删除 {len(to_delete)} 张断点后的图片") + + # 记录被删除的图片 + with open(self.log_file, 'a', encoding='utf-8') as f: + for img in to_delete: + f.write(img + '\n') + f.write('\n') + + # 保留断点之前的图片 + filtered_df = points_df.iloc[:break_idx] + self.logger.info(f"时间过滤完成,保留了 {len(filtered_df)} 张图片") + return filtered_df + + self.logger.info("未发现时间断点,保留所有图片") + return points_df