添加latex

This commit is contained in:
龙澳
2026-04-02 20:17:23 +08:00
parent dc3ae0680c
commit da913b6ccc
9 changed files with 2035 additions and 0 deletions

79
MarsRAG/MarsRAG.aux Normal file
View File

@@ -0,0 +1,79 @@
\relax
\citation{McEwen24HiRISE}
\citation{Malin07CTX}
\citation{Murchie07CRISM}
\citation{Smith01MOLA}
\citation{Grotzinger12Curiosity}
\citation{Li21ZhuRong}
\citation{Wang26marsretrieval}
\citation{Cai25LLM}
\citation{Lewis20RAG}
\citation{Zhou24hallucination}
\citation{Pan24KGandLLM}
\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{}\protected@file@percent }
\citation{Wu25MultiRAG}
\@writefile{toc}{\contentsline {section}{\numberline {II}Preliminary}{3}{}\protected@file@percent }
\newlabel{equ:RAG Problem}{{1}{3}}
\newlabel{equ:RAG Problem s.t.}{{2}{3}}
\newlabel{equ:spatial observation hyperedge}{{3}{3}}
\newlabel{equ:hyperbolic space}{{4}{3}}
\@writefile{toc}{\contentsline {section}{\numberline {III}Methodology}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-A}}Framework of AreoRAG}{4}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-B}}Hyperbolic Spatial Hypergraph Construction}{4}{}\protected@file@percent }
\newlabel{equ:multi-source spatial data}{{5}{4}}
\newlabel{equ:planetary science domain schema}{{6}{4}}
\newlabel{equ:embedding mapping}{{7}{5}}
\newlabel{equ:Spatial Scale-Curvature Correspondence}{{8}{5}}
\newlabel{equ:Cross-Reference-Frame Alignment}{{9}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-C}}Spatiotemporal Retrieval with Cross-Resolution Aggregation}{5}{}\protected@file@percent }
\newlabel{equ:Spatial Intent Extraction and Hyperedge Retrieval}{{10}{5}}
\newlabel{equ:spatiotemporal encoding}{{11}{5}}
\newlabel{equ:hyperbolic spatial encoding}{{12}{5}}
\newlabel{equ:MLP scores}{{13}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-D}}Physics-Informed Conflict Triage}{6}{}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Physics-Informed Conflict Triage Categories}}{6}{}\protected@file@percent }
\newlabel{table_conflict_triage}{{I}{6}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-E}}AreoRAG Prompting}{7}{}\protected@file@percent }
\@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces AreoRAG Prompting (ARP)}}{7}{}\protected@file@percent }
\newlabel{alg:arp}{{1}{7}}
\@writefile{toc}{\contentsline {section}{\numberline {IV}Experiments}{8}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-A}}Experimental Settings}{8}{}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces Statistics of the Planetary Datasets}}{8}{}\protected@file@percent }
\newlabel{table_planetary_datasets}{{II}{8}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-B}}Overall Retrieval and QA Performance (Q1)}{9}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-C}}Robustness Under Spatial Sparsity and Conflict Intensity (Q2)}{9}{}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {III}{\ignorespaces Comparison with Baseline Methods on Planetary and General QA Datasets}}{10}{}\protected@file@percent }
\newlabel{table_comparison}{{III}{10}}
\@writefile{lot}{\contentsline {table}{\numberline {IV}{\ignorespaces Ablation Experiments of HySH and PICT Modules}}{10}{}\protected@file@percent }
\newlabel{table_ablation}{{IV}{10}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-D}}Ablation Study (Q3)}{10}{}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {V}{\ignorespaces Conflict Handling Performance on MarsConflict-50}}{11}{}\protected@file@percent }
\newlabel{table_conflict}{{V}{11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-E}}Conflict Preservation Evaluation (Q4)}{11}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-F}}Efficiency Analysis (Q5)}{11}{}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {VI}{\ignorespaces Time Cost Analysis Across Modules}}{11}{}\protected@file@percent }
\newlabel{table_time_cost}{{VI}{11}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-G}}Case Study}{11}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-H}}Limitations}{11}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-I}}Related Work}{12}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-J}}Graph-Structured Retrieval Augmented Generation}{12}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-K}}Hyperbolic Representation Learning for Retrieval}{12}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-L}}Knowledge Conflict Detection and Resolution in RAG}{13}{}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {IV-M}}Intelligent Retrieval for Planetary Remote Sensing Data}{13}{}\protected@file@percent }
\bibstyle{IEEEtran}
\bibdata{IEEEabrv,references}
\bibcite{McEwen24HiRISE}{1}
\bibcite{Malin07CTX}{2}
\bibcite{Murchie07CRISM}{3}
\bibcite{Smith01MOLA}{4}
\bibcite{Grotzinger12Curiosity}{5}
\bibcite{Li21ZhuRong}{6}
\bibcite{Wang26marsretrieval}{7}
\bibcite{Cai25LLM}{8}
\bibcite{Lewis20RAG}{9}
\bibcite{Zhou24hallucination}{10}
\bibcite{Pan24KGandLLM}{11}
\bibcite{Wu25MultiRAG}{12}
\@writefile{toc}{\contentsline {section}{\numberline {V}Conclusion}{14}{}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{References}{14}{}\protected@file@percent }
\gdef \@abspage@last{14}

114
MarsRAG/MarsRAG.bbl Normal file
View File

@@ -0,0 +1,114 @@
% Generated by IEEEtran.bst, version: 1.14 (2015/08/26)
\begin{thebibliography}{10}
\providecommand{\url}[1]{#1}
\csname url@samestyle\endcsname
\providecommand{\newblock}{\relax}
\providecommand{\bibinfo}[2]{#2}
\providecommand{\BIBentrySTDinterwordspacing}{\spaceskip=0pt\relax}
\providecommand{\BIBentryALTinterwordstretchfactor}{4}
\providecommand{\BIBentryALTinterwordspacing}{\spaceskip=\fontdimen2\font plus
\BIBentryALTinterwordstretchfactor\fontdimen3\font minus
\fontdimen4\font\relax}
\providecommand{\BIBforeignlanguage}[2]{{%
\expandafter\ifx\csname l@#1\endcsname\relax
\typeout{** WARNING: IEEEtran.bst: No hyphenation pattern has been}%
\typeout{** loaded for the language `#1'. Using the pattern for}%
\typeout{** the default language instead.}%
\else
\language=\csname l@#1\endcsname
\fi
#2}}
\providecommand{\BIBdecl}{\relax}
\BIBdecl
\bibitem{McEwen24HiRISE}
A.~McEwen, S.~Byrne, C.~Hansen, I.~Daubar, S.~Sutton, C.~Dundas,
N.~Bardabelias, N.~Baugh, J.~Bergstrom, R.~Beyer, K.~Block, V.~Bray,
J.~Bridges, M.~Chojnacki, S.~Conway, W.~Delamere, T.~Ebben, A.~Espinosa,
A.~Fennema, J.~Grant, V.~Gulick, K.~Herkenhoff, R.~Heyd, R.~Leis, L.~Ojha,
S.~Papendick, C.~Schaller, N.~Thomas, L.~Tornabene, C.~Weitz, and S.~Wilson,
``The high-resolution imaging science experiment (hirise) in the mro extended
science phases (20092023),'' \emph{Icarus}, vol. 419, p. 115795, 2024.
\bibitem{Malin07CTX}
M.~C. Malin, J.~F. Bell~III, B.~A. Cantor, M.~A. Caplinger, W.~M. Calvin, R.~T.
Clancy, K.~S. Edgett, L.~Edwards, R.~M. Haberle, P.~B. James, S.~W. Lee,
M.~A. Ravine, P.~C. Thomas, and M.~J. Wolff, ``Context camera investigation
on board the mars reconnaissance orbiter,'' \emph{Journal of Geophysical
Research: Planets}, vol. 112, no.~E5, 2007.
\bibitem{Murchie07CRISM}
S.~Murchie, R.~Arvidson, P.~Bedini, K.~Beisser, J.-P. Bibring, J.~Bishop,
J.~Boldt, P.~Cavender, T.~Choo, R.~T. Clancy, E.~H. Darlington,
D.~Des~Marais, R.~Espiritu, D.~Fort, R.~Green, E.~Guinness, J.~Hayes,
C.~Hash, K.~Heffernan, J.~Hemmler, G.~Heyler, D.~Humm, J.~Hutcheson,
N.~Izenberg, R.~Lee, J.~Lees, D.~Lohr, E.~Malaret, T.~Martin, J.~A. McGovern,
P.~McGuire, R.~Morris, J.~Mustard, S.~Pelkey, E.~Rhodes, M.~Robinson,
T.~Roush, E.~Schaefer, G.~Seagrave, F.~Seelos, P.~Silverglate, S.~Slavney,
M.~Smith, W.-J. Shyong, K.~Strohbehn, H.~Taylor, P.~Thompson, B.~Tossman,
M.~Wirzburger, and M.~Wolff, ``Compact reconnaissance imaging spectrometer
for mars (crism) on mars reconnaissance orbiter (mro),'' \emph{Journal of
Geophysical Research: Planets}, vol. 112, no.~E5, 2007.
\bibitem{Smith01MOLA}
D.~E. Smith, M.~T. Zuber, H.~V. Frey, J.~B. Garvin, J.~W. Head, D.~O. Muhleman,
G.~H. Pettengill, R.~J. Phillips, S.~C. Solomon, H.~J. Zwally, W.~B. Banerdt,
T.~C. Duxbury, M.~P. Golombek, F.~G. Lemoine, G.~A. Neumann, D.~D. Rowlands,
O.~Aharonson, P.~G. Ford, A.~B. Ivanov, C.~L. Johnson, P.~J. McGovern, J.~B.
Abshire, R.~S. Afzal, and X.~Sun, ``Mars orbiter laser altimeter: Experiment
summary after the first year of global mapping of mars,'' \emph{Journal of
Geophysical Research: Planets}, vol. 106, no. E10, pp. 23\,689--23\,722,
2001.
\bibitem{Grotzinger12Curiosity}
J.~P. Grotzinger, J.~Crisp, A.~R. Vasavada, R.~C. Anderson, C.~J. Baker,
R.~Barry, D.~F. Blake, P.~Conrad, K.~S. Edgett, B.~Ferdowski, R.~Gellert,
J.~B. Gilbert, M.~Golombek, J.~Gómez-Elvira, D.~M. Hassler, L.~Jandura,
M.~Litvak, P.~Mahaffy, J.~Maki, M.~Meyer, M.~C. Malin, I.~Mitrofanov, J.~J.
Simmonds, D.~Vaniman, R.~V. Welch, and R.~C. Wiens, ``Mars science laboratory
mission and science investigation,'' \emph{Space Science Reviews}, vol. 170,
no.~1, pp. 5--56, 2012.
\bibitem{Li21ZhuRong}
C.~Li, R.~Zhang, D.~Yu, G.~Dong, J.~Liu, Y.~Geng, Z.~Sun, W.~Yan, X.~Ren,
Y.~Su, W.~Zuo, T.~Zhang, J.~Cao, G.~Fang, J.~Yang, R.~Shu, Y.~Lin, Y.~Zou,
D.~Liu, B.~Liu, D.~Kong, X.~Zhu, and Z.~Ouyang, ``China's mars exploration
mission and science investigation,'' \emph{Space Science Reviews}, vol. 217,
no.~4, p.~57, 2021.
\bibitem{Wang26marsretrieval}
S.~Wang, Y.~Wang, and H.~Wei, ``Marsretrieval: Benchmarking vision-language
models for planetary-scale geospatial retrieval on mars,'' \emph{arXiv
preprint arXiv:2602.13961}, 2026.
\bibitem{Cai25LLM}
W.~Cai, J.~Jiang, F.~Wang, J.~Tang, S.~Kim, and J.~Huang, ``A survey on mixture
of experts in large language models,'' \emph{IEEE Transactions on Knowledge
and Data Engineering}, vol.~37, no.~7, pp. 3896--3915, 2025.
\bibitem{Lewis20RAG}
P.~Lewis, E.~Perez, A.~Piktus, F.~Petroni, V.~Karpukhin, N.~Goyal,
H.~K\"{u}ttler, M.~Lewis, W.-t. Yih, T.~Rockt\"{a}schel, S.~Riedel, and
D.~Kiela, ``Retrieval-augmented generation for knowledge-intensive nlp
tasks,'' in \emph{Proceedings of the 34th International Conference on Neural
Information Processing Systems}, ser. NIPS '20.\hskip 1em plus 0.5em minus
0.4em\relax Red Hook, NY, USA: Curran Associates Inc., 2020.
\bibitem{Zhou24hallucination}
Y.~Zhou, Z.~Liu, J.~Jin, J.-Y. Nie, and Z.~Dou, ``Metacognitive
retrieval-augmented large language models,'' in \emph{Proceedings of the ACM
Web Conference 2024}, ser. WWW '24.\hskip 1em plus 0.5em minus 0.4em\relax
New York, NY, USA: Association for Computing Machinery, 2024, p. 14531463.
\bibitem{Pan24KGandLLM}
S.~Pan, L.~Luo, Y.~Wang, C.~Chen, J.~Wang, and X.~Wu, ``Unifying large language
models and knowledge graphs: A roadmap,'' \emph{IEEE Trans. on Knowl. and
Data Eng.}, vol.~36, no.~7, p. 35803599, Jul. 2024.
\bibitem{Wu25MultiRAG}
W.~Wu, H.~Wang, B.~Li, P.~Huang, X.~Zhao, and L.~Liang, ``Multirag: A
knowledge-guided framework for mitigating hallucination in multi-source
retrieval augmented generation,'' in \emph{2025 IEEE 41st International
Conference on Data Engineering (ICDE)}, 2025, pp. 3070--3083.
\end{thebibliography}

60
MarsRAG/MarsRAG.blg Normal file
View File

@@ -0,0 +1,60 @@
This is BibTeX, Version 0.99d
Capacity: max_strings=200000, hash_size=200000, hash_prime=170003
The top-level auxiliary file: MarsRAG.aux
Reallocating 'name_of_file' (item size: 1) to 9 items.
The style file: IEEEtran.bst
Reallocating 'name_of_file' (item size: 1) to 9 items.
Reallocating 'name_of_file' (item size: 1) to 11 items.
Reallocating 'singl_function' (item size: 4) to 100 items.
Reallocating 'singl_function' (item size: 4) to 100 items.
Reallocating 'singl_function' (item size: 4) to 100 items.
Reallocating 'wiz_functions' (item size: 4) to 6000 items.
Reallocating 'singl_function' (item size: 4) to 100 items.
Database file #1: IEEEabrv.bib
Database file #2: references.bib
-- IEEEtran.bst version 1.14 (2015/08/26) by Michael Shell.
-- http://www.michaelshell.org/tex/ieeetran/bibtex/
-- See the "IEEEtran_bst_HOWTO.pdf" manual for usage information.
Done.
You've used 12 entries,
4087 wiz_defined-function locations,
1697 strings with 28523 characters,
and the built_in function-call counts, 14692 in all, are:
= -- 937
> -- 870
< -- 57
+ -- 445
- -- 212
* -- 777
:= -- 1868
add.period$ -- 26
call.type$ -- 12
change.case$ -- 12
chr.to.int$ -- 102
cite$ -- 12
duplicate$ -- 1032
empty$ -- 1037
format.name$ -- 218
if$ -- 3442
int.to.chr$ -- 0
int.to.str$ -- 12
missing$ -- 287
newline$ -- 59
num.names$ -- 12
pop$ -- 762
preamble$ -- 1
purify$ -- 0
quote$ -- 2
skip$ -- 1054
stack$ -- 0
substring$ -- 325
swap$ -- 893
text.length$ -- 23
text.prefix$ -- 0
top$ -- 5
type$ -- 12
warning$ -- 0
while$ -- 29
width$ -- 14
write$ -- 143

710
MarsRAG/MarsRAG.log Normal file
View File

@@ -0,0 +1,710 @@
This is pdfTeX, Version 3.141592653-2.6-1.40.25 (MiKTeX 23.4) (preloaded format=pdflatex 2025.10.23) 2 APR 2026 20:14
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
**./MarsRAG.tex
(MarsRAG.tex
LaTeX2e <2022-11-01> patch level 1
L3 programming layer <2023-03-30>
(D:\software\ctex\MiKTeX\tex/latex/ieeetran\IEEEtran.cls
Document Class: IEEEtran 2015/08/26 V1.8b by Michael Shell
-- See the "IEEEtran_HOWTO" manual for usage information.
-- http://www.michaelshell.org/tex/ieeetran/
\@IEEEtrantmpdimenA=\dimen140
\@IEEEtrantmpdimenB=\dimen141
\@IEEEtrantmpdimenC=\dimen142
\@IEEEtrantmpcountA=\count185
\@IEEEtrantmpcountB=\count186
\@IEEEtrantmpcountC=\count187
\@IEEEtrantmptoksA=\toks16
LaTeX Font Info: Trying to load font information for OT1+ptm on input line 5
03.
(D:\software\ctex\MiKTeX\tex/latex/psnfss\ot1ptm.fd
File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm.
)
-- Using 8.5in x 11in (letter) paper.
-- Using PDF output.
\@IEEEnormalsizeunitybaselineskip=\dimen143
-- This is a 10 point document.
\CLASSINFOnormalsizebaselineskip=\dimen144
\CLASSINFOnormalsizeunitybaselineskip=\dimen145
\IEEEnormaljot=\dimen146
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <5> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <5> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <7> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <7> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <8> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <8> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <9> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <9> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <10> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <10> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <11> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <11> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <12> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <12> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <17> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <17> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <20> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <20> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <24> not available
(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090.
LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <24> not available
(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090.
\IEEEquantizedlength=\dimen147
\IEEEquantizedlengthdiff=\dimen148
\IEEEquantizedtextheightdiff=\dimen149
\IEEEilabelindentA=\dimen150
\IEEEilabelindentB=\dimen151
\IEEEilabelindent=\dimen152
\IEEEelabelindent=\dimen153
\IEEEdlabelindent=\dimen154
\IEEElabelindent=\dimen155
\IEEEiednormlabelsep=\dimen156
\IEEEiedmathlabelsep=\dimen157
\IEEEiedtopsep=\skip48
\c@section=\count188
\c@subsection=\count189
\c@subsubsection=\count190
\c@paragraph=\count191
\c@IEEEsubequation=\count192
\abovecaptionskip=\skip49
\belowcaptionskip=\skip50
\c@figure=\count193
\c@table=\count194
\@IEEEeqnnumcols=\count195
\@IEEEeqncolcnt=\count196
\@IEEEsubeqnnumrollback=\count197
\@IEEEquantizeheightA=\dimen158
\@IEEEquantizeheightB=\dimen159
\@IEEEquantizeheightC=\dimen160
\@IEEEquantizeprevdepth=\dimen161
\@IEEEquantizemultiple=\count198
\@IEEEquantizeboxA=\box51
\@IEEEtmpitemindent=\dimen162
\IEEEPARstartletwidth=\dimen163
\c@IEEEbiography=\count199
\@IEEEtranrubishbin=\box52
) (D:\software\ctex\MiKTeX\tex/latex/amsmath\amsmath.sty
Package: amsmath 2022/04/08 v2.17n AMS math features
\@mathmargin=\skip51
For additional information on amsmath, use the `?' option.
(D:\software\ctex\MiKTeX\tex/latex/amsmath\amstext.sty
Package: amstext 2021/08/26 v2.01 AMS text
(D:\software\ctex\MiKTeX\tex/latex/amsmath\amsgen.sty
File: amsgen.sty 1999/11/30 v2.0 generic functions
\@emptytoks=\toks17
\ex@=\dimen164
))
(D:\software\ctex\MiKTeX\tex/latex/amsmath\amsbsy.sty
Package: amsbsy 1999/11/29 v1.2d Bold Symbols
\pmbraise@=\dimen165
)
(D:\software\ctex\MiKTeX\tex/latex/amsmath\amsopn.sty
Package: amsopn 2022/04/08 v2.04 operator names
)
\inf@bad=\count266
LaTeX Info: Redefining \frac on input line 234.
\uproot@=\count267
\leftroot@=\count268
LaTeX Info: Redefining \overline on input line 399.
LaTeX Info: Redefining \colon on input line 410.
\classnum@=\count269
\DOTSCASE@=\count270
LaTeX Info: Redefining \ldots on input line 496.
LaTeX Info: Redefining \dots on input line 499.
LaTeX Info: Redefining \cdots on input line 620.
\Mathstrutbox@=\box53
\strutbox@=\box54
LaTeX Info: Redefining \big on input line 722.
LaTeX Info: Redefining \Big on input line 723.
LaTeX Info: Redefining \bigg on input line 724.
LaTeX Info: Redefining \Bigg on input line 725.
\big@size=\dimen166
LaTeX Font Info: Redeclaring font encoding OML on input line 743.
LaTeX Font Info: Redeclaring font encoding OMS on input line 744.
\macc@depth=\count271
LaTeX Info: Redefining \bmod on input line 905.
LaTeX Info: Redefining \pmod on input line 910.
LaTeX Info: Redefining \smash on input line 940.
LaTeX Info: Redefining \relbar on input line 970.
LaTeX Info: Redefining \Relbar on input line 971.
\c@MaxMatrixCols=\count272
\dotsspace@=\muskip16
\c@parentequation=\count273
\dspbrk@lvl=\count274
\tag@help=\toks18
\row@=\count275
\column@=\count276
\maxfields@=\count277
\andhelp@=\toks19
\eqnshift@=\dimen167
\alignsep@=\dimen168
\tagshift@=\dimen169
\tagwidth@=\dimen170
\totwidth@=\dimen171
\lineht@=\dimen172
\@envbody=\toks20
\multlinegap=\skip52
\multlinetaggap=\skip53
\mathdisplay@stack=\toks21
LaTeX Info: Redefining \[ on input line 2953.
LaTeX Info: Redefining \] on input line 2954.
)
(D:\software\ctex\MiKTeX\tex/latex/amsfonts\amsfonts.sty
Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
\symAMSa=\mathgroup4
\symAMSb=\mathgroup5
LaTeX Font Info: Redeclaring math symbol \hbar on input line 98.
LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold'
(Font) U/euf/m/n --> U/euf/b/n on input line 106.
)
(D:\software\ctex\MiKTeX\tex/latex/amsfonts\amssymb.sty
Package: amssymb 2013/01/14 v3.01 AMS font symbols
)
(D:\software\ctex\MiKTeX\tex/latex/algorithms\algorithmic.sty
Package: algorithmic 2009/08/24 v0.1 Document Style `algorithmic'
(D:\software\ctex\MiKTeX\tex/latex/base\ifthen.sty
Package: ifthen 2022/04/13 v1.1d Standard LaTeX ifthen package (DPC)
)
(D:\software\ctex\MiKTeX\tex/latex/graphics\keyval.sty
Package: keyval 2022/05/29 v1.15 key=value parser (DPC)
\KV@toks@=\toks22
)
\c@ALC@unique=\count278
\c@ALC@line=\count279
\c@ALC@rem=\count280
\c@ALC@depth=\count281
\ALC@tlm=\skip54
\algorithmicindent=\skip55
)
(D:\software\ctex\MiKTeX\tex/latex/algorithms\algorithm.sty
Package: algorithm 2009/08/24 v0.1 Document Style `algorithm' - floating enviro
nment
(D:\software\ctex\MiKTeX\tex/latex/float\float.sty
Package: float 2001/11/08 v1.3d Float enhancements (AL)
\c@float@type=\count282
\float@exts=\toks23
\float@box=\box55
\@float@everytoks=\toks24
\@floatcapt=\box56
)
\@float@every@algorithm=\toks25
\c@algorithm=\count283
)
(D:\software\ctex\MiKTeX\tex/latex/tools\array.sty
Package: array 2022/09/04 v2.5g Tabular extension package (FMi)
\col@sep=\dimen173
\ar@mcellbox=\box57
\extrarowheight=\dimen174
\NC@list=\toks26
\extratabsurround=\skip56
\backup@length=\skip57
\ar@cellbox=\box58
)
(D:\software\ctex\MiKTeX\tex/latex/makecell\makecell.sty
Package: makecell 2009/08/03 V0.1e Managing of Tab Column Heads and Cells
\rotheadsize=\dimen175
\c@nlinenum=\count284
\TeXr@lab=\toks27
)
(D:\software\ctex\MiKTeX\tex/latex/multirow\multirow.sty
Package: multirow 2021/03/15 v2.8 Span multiple rows of a table
\multirow@colwidth=\skip58
\multirow@cntb=\count285
\multirow@dima=\skip59
\bigstrutjot=\dimen176
)
(D:\software\ctex\MiKTeX\tex/latex/subfig\subfig.sty
Package: subfig 2005/06/28 ver: 1.3 subfig package
(D:\software\ctex\MiKTeX\tex/latex/caption\caption3.sty
Package: caption3 2023/03/12 v2.4 caption3 kernel (AR)
\caption@tempdima=\dimen177
\captionmargin=\dimen178
\caption@leftmargin=\dimen179
\caption@rightmargin=\dimen180
\caption@width=\dimen181
\caption@indent=\dimen182
\caption@parindent=\dimen183
\caption@hangindent=\dimen184
Package caption Info: Unknown document class (or package),
(caption) standard defaults will be used.
Package caption Info: \@makecaption = \long macro:#1#2->\ifx \@captype \@IEEEta
blestring \footnotesize \bgroup \par \centering \@IEEEtabletopskipstrut {\norma
lfont \footnotesize #1}\\{\normalfont \footnotesize \scshape #2}\par \addvspace
{0.5\baselineskip }\egroup \@IEEEtablecaptionsepspace \else \@IEEEfigurecaptio
nsepspace \setbox \@tempboxa \hbox {\normalfont \footnotesize {#1.}\nobreakspac
e \nobreakspace #2}\ifdim \wd \@tempboxa >\hsize \setbox \@tempboxa \hbox {\nor
malfont \footnotesize {#1.}\nobreakspace \nobreakspace }\parbox [t]{\hsize }{\n
ormalfont \footnotesize \noindent \unhbox \@tempboxa #2}\else \ifCLASSOPTIONcon
ference \hbox to\hsize {\normalfont \footnotesize \hfil \box \@tempboxa \hfil }
\else \hbox to\hsize {\normalfont \footnotesize \box \@tempboxa \hfil }\fi \fi
\fi on input line 1176.
)
\c@KVtest=\count286
\sf@farskip=\skip60
\sf@captopadj=\dimen185
\sf@capskip=\skip61
\sf@nearskip=\skip62
\c@subfigure=\count287
\c@subfigure@save=\count288
\c@lofdepth=\count289
\c@subtable=\count290
\c@subtable@save=\count291
\c@lotdepth=\count292
\sf@top=\skip63
\sf@bottom=\skip64
)
(D:\software\ctex\MiKTeX\tex/latex/base\textcomp.sty
Package: textcomp 2020/02/02 v2.0n Standard LaTeX package
)
(D:\software\ctex\MiKTeX\tex/latex/sttools\stfloats.sty
Package: stfloats 2025/06/18 v3.4 Improve float mechanism and baselineskip sett
ings
\@dblbotnum=\count293
\c@dblbotnumber=\count294
)
(D:\software\ctex\MiKTeX\tex/latex/url\url.sty
\Urlmuskip=\muskip17
Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc.
)
(D:\software\ctex\MiKTeX\tex/latex/tools\verbatim.sty
Package: verbatim 2022-07-02 v1.5u LaTeX2e package for verbatim enhancements
\every@verbatim=\toks28
\verbatim@line=\toks29
\verbatim@in@stream=\read2
)
(D:\software\ctex\MiKTeX\tex/latex/graphics\graphicx.sty
Package: graphicx 2021/09/16 v1.2d Enhanced LaTeX Graphics (DPC,SPQR)
(D:\software\ctex\MiKTeX\tex/latex/graphics\graphics.sty
Package: graphics 2022/03/10 v1.4e Standard LaTeX Graphics (DPC,SPQR)
(D:\software\ctex\MiKTeX\tex/latex/graphics\trig.sty
Package: trig 2021/08/11 v1.11 sin cos tan (DPC)
)
(D:\software\ctex\MiKTeX\tex/latex/graphics-cfg\graphics.cfg
File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
)
Package graphics Info: Driver file: pdftex.def on input line 107.
(D:\software\ctex\MiKTeX\tex/latex/graphics-def\pdftex.def
File: pdftex.def 2022/09/22 v1.2b Graphics/color driver for pdftex
))
\Gin@req@height=\dimen186
\Gin@req@width=\dimen187
)
(D:\software\ctex\MiKTeX\tex/latex/cite\cite.sty
LaTeX Info: Redefining \cite on input line 302.
LaTeX Info: Redefining \nocite on input line 332.
Package: cite 2015/02/27 v 5.5
)
(D:\software\ctex\MiKTeX\tex/latex/l3backend\l3backend-pdftex.def
File: l3backend-pdftex.def 2023-03-30 L3 backend support: PDF output (pdfTeX)
\l__color_backend_stack_int=\count295
\l__pdf_internal_box=\box59
)
LaTeX Warning: Unused global option(s):
[lettersize].
(MarsRAG.aux)
\openout1 = `MarsRAG.aux'.
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 18.
LaTeX Font Info: ... okay on input line 18.
-- Lines per column: 58 (exact).
Package caption Info: Begin \AtBeginDocument code.
Package caption Info: subfig package v1.3 is loaded.
Package caption Info: End \AtBeginDocument code.
(D:\software\ctex\MiKTeX\tex/context/base/mkii\supp-pdf.mkii
[Loading MPS to PDF converter (version 2006.09.02).]
\scratchcounter=\count296
\scratchdimen=\dimen188
\scratchbox=\box60
\nofMPsegments=\count297
\nofMParguments=\count298
\everyMPshowfont=\toks30
\MPscratchCnt=\count299
\MPscratchDim=\dimen189
\MPnumerator=\count300
\makeMPintoPDFobject=\count301
\everyMPtoPDFconversion=\toks31
) (D:\software\ctex\MiKTeX\tex/latex/epstopdf-pkg\epstopdf-base.sty
Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4
85.
(D:\software\ctex\MiKTeX\tex/latex/00miktex\epstopdf-sys.cfg
File: epstopdf-sys.cfg 2021/03/18 v2.0 Configuration of epstopdf for MiKTeX
))
LaTeX Font Info: Calculating math sizes for size <11> on input line 42.
LaTeX Font Info: Trying to load font information for U+msa on input line 42.
(D:\software\ctex\MiKTeX\tex/latex/amsfonts\umsa.fd
File: umsa.fd 2013/01/14 v3.01 AMS symbols A
)
LaTeX Font Info: Trying to load font information for U+msb on input line 42.
(D:\software\ctex\MiKTeX\tex/latex/amsfonts\umsb.fd
File: umsb.fd 2013/01/14 v3.01 AMS symbols B
)
LaTeX Font Info: Trying to load font information for TS1+ptm on input line 5
8.
(D:\software\ctex\MiKTeX\tex/latex/psnfss\ts1ptm.fd
File: ts1ptm.fd 2001/06/04 font definitions for TS1/ptm.
) [1{D:/software/ctex/UserData/fonts/map/pdftex/pdftex.map{Unicode.sfd}{UGBK.sf
d}}{D:/software/ctex/MiKTeX/fonts/enc/dvips/base/8r.enc}
] [2]
Underfull \hbox (badness 1642) in paragraph at lines 82--83
\OT1/ptm/m/n/10 (Anti-Over-Smoothing Guar-an-tee) while main-tain-ing
[]
[3]
Overfull \hbox (12.1057pt too wide) detected at line 157
[][] [] \OML/cmm/m/it/10 :
[]
[4]
Underfull \hbox (badness 1910) in paragraph at lines 171--172
[]\OT1/ptm/m/n/10 **Proposition 1** (Spa-tial Scale-Curvature Cor-re-spon-
[]
Overfull \hbox (9.20645pt too wide) detected at line 211
[]\OT1/cmr/bx/n/10 x \OT1/cmr/m/n/10 = [] \OML/cmm/m/it/10 ;
[]
Overfull \hbox (2.51953pt too wide) detected at line 216
[]\OML/cmm/m/it/10 []\OT1/cmr/m/n/10 (\OML/cmm/m/it/10 e[]; e[]\OT1/cmr/m/n/10
) = [] \OML/cmm/m/it/10 ;
[]
Underfull \hbox (badness 10000) in paragraph at lines 227--228
[]\OT1/ptm/m/n/10 Given spa-tial ob-ser-va-tion hy-per-edge em-bed-dings
[]
Underfull \hbox (badness 3895) in paragraph at lines 227--228
\OMS/cmsy/m/n/10 f\OT1/cmr/m/n/10 ^^H(\OML/cmm/m/it/10 f[]\OT1/cmr/m/n/10 )\OMS
/cmsy/m/n/10 g[] ^^Z \U/msb/m/n/10 H[]$ \OT1/ptm/m/n/10 with query-relevance we
ights $\OML/cmm/m/it/10 w[]$
[]
[5]
Overfull \hbox (113.53706pt too wide) detected at line 249
\OMS/cmsy/m/n/10 H[]\OT1/cmr/m/n/10 (\OML/cmm/m/it/10 p[]; p[] \OMS/cmsy/m/n/10
j \OML/cmm/m/it/10 q\OT1/cmr/m/n/10 ) = \OML/cmm/m/it/10 H [] \OMS/cmsy/m/n/10
^^@ [] [] \OML/cmm/m/it/10 ;
[]
Overfull \hbox (41.67476pt too wide) in paragraph at lines 276--290
[][]
[]
Overfull \hbox (49.71666pt too wide) detected at line 294
\OT1/cmr/bx/n/10 z[] \OT1/cmr/m/n/10 = []
[]
[6]
Underfull \hbox (badness 3019) in paragraph at lines 302--303
[]\OT1/ptm/m/n/10 **Proposition 2** (Con-flict Type Sep-a-ra-bil-ity). *The
[]
Overfull \hbox (10.51593pt too wide) detected at line 306
\OML/cmm/m/it/10 C[]\OT1/cmr/m/n/10 (\OML/cmm/m/it/10 v\OT1/cmr/m/n/10 ) = []
[]
LaTeX Font Info: Trying to load font information for OMS+ptm on input line 3
29.
(D:\software\ctex\MiKTeX\tex/latex/psnfss\omsptm.fd
File: omsptm.fd
)
LaTeX Font Info: Font shape `OMS/ptm/m/n' in size <10> not available
(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 329.
[7]
Underfull \hbox (badness 10000) in paragraph at lines 383--383
|[]
[]
Overfull \hbox (14.39503pt too wide) in paragraph at lines 383--383
[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 383--383
|[]
[]
Overfull \hbox (11.8429pt too wide) in paragraph at lines 383--383
[]
[]
Overfull \hbox (25.19485pt too wide) in paragraph at lines 386--386
[]|[]|
[]
Underfull \hbox (badness 10000) in paragraph at lines 386--386
|[]
[]
Overfull \hbox (27.67467pt too wide) in paragraph at lines 386--386
[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 388--388
|[]
[]
Overfull \hbox (18.33882pt too wide) in paragraph at lines 388--388
[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 390--390
|[]
[]
Overfull \hbox (27.23465pt too wide) in paragraph at lines 390--390
[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 392--392
|[]
[]
Overfull \hbox (25.89078pt too wide) in paragraph at lines 392--392
[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 394--394
|[]
[]
Overfull \hbox (14.54706pt too wide) in paragraph at lines 394--394
[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 396--396
|[]
[]
Overfull \hbox (24.43471pt too wide) in paragraph at lines 396--396
[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 396--396
|[]
[]
Overfull \hbox (30.10707pt too wide) in paragraph at lines 396--396
[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 398--398
|[]
[]
Overfull \hbox (32.7467pt too wide) in paragraph at lines 398--398
[]
[]
Underfull \hbox (badness 10000) in paragraph at lines 398--398
|[]
[]
Overfull \hbox (30.10707pt too wide) in paragraph at lines 398--398
[]
[]
Underfull \hbox (badness 2452) in paragraph at lines 403--404
[]\OT1/ptm/m/n/10 Additionally, to val-i-date gen-er-al-iza-tion on es-tab-lish
ed
[]
[8]
Underfull \hbox (badness 10000) in paragraph at lines 433--434
[]\OT1/ptm/m/n/10 1) **Stan-dard RAG** [6]: Con-ven-tional retrieval-
[]
Underfull \hbox (badness 1603) in paragraph at lines 443--444
[]\OT1/ptm/m/n/10 5) **Hy-per-GraphRAG** [25]: Hypergraph-based RAG
[]
Underfull \hbox (badness 2698) in paragraph at lines 497--498
\OT1/ptm/m/n/10 ti-HopQA), Are-oRAG main-tains com-pet-i-tive per-for-mance
[]
[9]
Underfull \hbox (badness 10000) in paragraph at lines 542--542
[]|\OT1/ptm/m/n/8 w/o In-ter-ac-tion En-tropy (use
[]
Underfull \hbox (badness 3271) in paragraph at lines 549--550
[]\OT1/ptm/m/n/10 **a) HySH Mod-ule Anal-y-sis:** The HySH mod-ule
[]
Underfull \hbox (badness 1917) in paragraph at lines 551--552
\OT1/ptm/m/n/10 F1 im-prove-ment over Eu-clidean hy-per-graph (49.2% vs.
[]
Underfull \vbox (badness 10000) has occurred while \output is active []
[10]
Underfull \hbox (badness 10000) in paragraph at lines 574--574
[]|\OT1/ptm/m/n/8 Standard
[]
Underfull \hbox (badness 10000) in paragraph at lines 576--576
[]|\OT1/ptm/m/n/8 MultiRAG
[]
Underfull \hbox (badness 10000) in paragraph at lines 582--582
[]|\OT1/ptm/b/n/8 AreoRAG
[]
[11]
Package textcomp Info: Symbol \textrightarrow not provided by
(textcomp) font family ptm in TS1 encoding.
(textcomp) Default family used instead on input line 674.
Package textcomp Info: Symbol \textrightarrow not provided by
(textcomp) font family ptm in TS1 encoding.
(textcomp) Default family used instead on input line 674.
[12{D:/software/ctex/MiKTeX/fonts/enc/dvips/cm-super/cm-super-ts1.enc}]
[13]
Underfull \hbox (badness 2495) in paragraph at lines 708--709
[]\OT1/ptm/m/n/10 This work is sup-ported by the Na-tional Key R&D
[]
Underfull \hbox (badness 2799) in paragraph at lines 708--709
\OT1/ptm/m/n/10 Pro-gram of China ``In-ter-gov-ern-men-tal In-ter-na-tional Sci
-
[]
Underfull \hbox (badness 7576) in paragraph at lines 708--709
\OT1/ptm/m/n/10 ence and Tech-nol-ogy In-no-va-tion Co-op-er-a-tion" (Grant
[]
(MarsRAG.bbl) [14] (MarsRAG.aux) )
Here is how much of TeX's memory you used:
5477 strings out of 476331
91312 string characters out of 5797649
1895660 words of memory out of 5000000
25824 multiletter control sequences out of 15000+600000
562405 words of font info for 135 fonts, out of 8000000 for 9000
1145 hyphenation exceptions out of 8191
57i,19n,63p,2271b,410s stack positions out of 10000i,1000n,20000p,200000b,200000s
<D:/software/ctex/MiKTeX/fonts/type1/public/a
msfonts/cm/cmbx10.pfb><D:/software/ctex/MiKTeX/fonts/type1/public/amsfonts/cm/c
mbx7.pfb><D:/software/ctex/MiKTeX/fonts/type1/public/amsfonts/cm/cmex10.pfb><D:
/software/ctex/MiKTeX/fonts/type1/public/amsfonts/cm/cmmi10.pfb><D:/software/ct
ex/MiKTeX/fonts/type1/public/amsfonts/cm/cmmi5.pfb><D:/software/ctex/MiKTeX/fon
ts/type1/public/amsfonts/cm/cmmi6.pfb><D:/software/ctex/MiKTeX/fonts/type1/publ
ic/amsfonts/cm/cmmi7.pfb><D:/software/ctex/MiKTeX/fonts/type1/public/amsfonts/c
m/cmmi8.pfb><D:/software/ctex/MiKTeX/fonts/type1/public/amsfonts/cm/cmmi9.pfb><
D:/software/ctex/MiKTeX/fonts/type1/public/amsfonts/cm/cmr10.pfb><D:/software/c
tex/MiKTeX/fonts/type1/public/amsfonts/cm/cmr6.pfb><D:/software/ctex/MiKTeX/fon
ts/type1/public/amsfonts/cm/cmr7.pfb><D:/software/ctex/MiKTeX/fonts/type1/publi
c/amsfonts/cm/cmr8.pfb><D:/software/ctex/MiKTeX/fonts/type1/public/amsfonts/cm/
cmr9.pfb><D:/software/ctex/MiKTeX/fonts/type1/public/amsfonts/cm/cmsy10.pfb><D:
/software/ctex/MiKTeX/fonts/type1/public/amsfonts/cm/cmsy5.pfb><D:/software/cte
x/MiKTeX/fonts/type1/public/amsfonts/cm/cmsy7.pfb><D:/software/ctex/MiKTeX/font
s/type1/public/amsfonts/cm/cmsy8.pfb><D:/software/ctex/MiKTeX/fonts/type1/publi
c/amsfonts/symbols/msam10.pfb><D:/software/ctex/MiKTeX/fonts/type1/public/amsfo
nts/symbols/msbm10.pfb><D:/software/ctex/MiKTeX/fonts/type1/public/cm-super/sfr
m1000.pfb><D:/software/ctex/MiKTeX/fonts/type1/urw/times/utmb8a.pfb><D:/softwar
e/ctex/MiKTeX/fonts/type1/urw/times/utmbi8a.pfb><D:/software/ctex/MiKTeX/fonts/
type1/urw/times/utmr8a.pfb><D:/software/ctex/MiKTeX/fonts/type1/urw/times/utmri
8a.pfb>
Output written on MarsRAG.pdf (14 pages, 384380 bytes).
PDF statistics:
175 PDF objects out of 1000 (max. 8388607)
0 named destinations out of 1000 (max. 500000)
1 words of extra memory for PDF output out of 10000 (max. 10000000)

BIN
MarsRAG/MarsRAG.pdf Normal file

Binary file not shown.

BIN
MarsRAG/MarsRAG.synctex.gz Normal file

Binary file not shown.

724
MarsRAG/MarsRAG.tex Normal file
View File

@@ -0,0 +1,724 @@
\documentclass[lettersize,journal]{IEEEtran}
\usepackage{amsmath,amsfonts,amssymb}
\usepackage{algorithmic}
\usepackage{algorithm}
\usepackage{array}
\usepackage{makecell}
\usepackage{multirow}
\usepackage[caption=false,font=normalsize,labelfont=sf,textfont=sf]{subfig}
\usepackage{textcomp}
\usepackage{stfloats}
\usepackage{url}
\usepackage{verbatim}
\usepackage{graphicx}
\usepackage{cite}
\hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore}
% updated with editorial comments 8/9/2021
\begin{document}
\title{AreoRAG: Hyperbolic Spatial Hypergraph and Physics-Informed Conflict Triage for Multi-Source Planetary Retrieval Augmented Generation}
\author{Ao~Long,
Ze~Deng,
and Lizhe~Wang$^{\dagger}$
% <-this % stops a space
\IEEEcompsocitemizethanks{
\IEEEcompsocthanksitem A. Long, W. Lin and Z. Deng, (Corresponding author, dengze@cug.edu.cn) are with the School of Computer Science, China University of Geosciences, Wuhan, 430078, P. R. China.
\IEEEcompsocthanksitem Z. Deng is also with Hubei Key Laboratory of Intelligent Geo-Information Processing, China University of Geosciences, Wuhan 430074, China.
}% <-this % stops a space
\thanks{}
}
% The paper headers
\markboth{Journal of \LaTeX\ Class Files,~Vol.~14, No.~8, August~2021}%
{Shell \MakeLowercase{\textit{et al.}}: A Sample Article Using IEEEtran.cls for IEEE Journals}
\IEEEpubid{0000--0000/00\$00.00~\copyright~2021 IEEE}
% Remember, if you use this you must call \IEEEpubidadjcol in the second
% column for its text to clear the IEEEpubid mark.
\maketitle
\begin{abstract}
Retrieval Augmented Generation (RAG) has demonstrated considerable promise in grounding Large Language Models (LLMs) with external knowledge for knowledge-intensive question answering. However, extending RAG to the domain of planetary science — where multi-source remote sensing observations are inherently embedded in continuous physical space and inter-source disagreements often carry scientific value — introduces fundamental challenges that existing multi-source RAG frameworks cannot address. These challenges manifest in two critical aspects: (1) existing discrete graph topologies (e.g., multi-source line graphs) suffer from edge explosion when encoding continuous spatial proximity, failing to bridge the gap between physical continuity and semantic discreteness; and (2) conventional conflict-filtering mechanisms, designed under the assumption that inter-source inconsistency implies unreliability, systematically suppress scientifically valuable observational disagreements that are intrinsic to multi-platform deep-space exploration. To address these challenges, we propose AreoRAG, a novel framework tailored for multi-source planetary spatial data retrieval augmented generation. Our framework introduces two key innovations: (1) a Hyperbolic Spatial Hypergraph (HySH) construction module that employs $n$-ary spatial observation hyperedges embedded in hyperbolic space via the Lorentz model, where spatial resolution is coupled with radial depth to faithfully represent the hierarchical scale structure of planetary observations while reducing edge complexity from $O(k^2)$ to $O(k)$; and (2) a Physics-Informed Conflict Triage (PICT) module that detects inter-source conflicts via cross-source interaction entropy, classifies them into four physically grounded categories (noise, instrument-inherent, scale-dependent, and temporal-evolution), and applies differentiated confidence recalibration to preserve scientifically valuable disagreements while filtering genuine noise. Extensive experiments on multi-source planetary observation datasets demonstrate that AreoRAG significantly enhances both the retrieval fidelity and the scientific faithfulness of knowledge-augmented generation in planetary science scenarios.
\end{abstract}
\begin{IEEEkeywords}
Retrieval Augmented Generation, Planetary Remote Sensing, Hypergraph, Hyperbolic Space, Knowledge Conflict Triage, Multi-source Spatial Data, Mars Exploration.
\end{IEEEkeywords}
\section{Introduction}
\IEEEPARstart{T}{he} past two decades have witnessed an unprecedented accumulation of multi-source remote sensing data from Mars exploration missions. Orbital platforms, such as the Mars Reconnaissance Orbiter, Mars Express, and Tianwen-1, continuously acquire observations across diverse modalities. These modalities range from sub-meter optical imagery (HiRISE) \cite{McEwen24HiRISE} and medium-resolution contextual mosaics (CTX) \cite{Malin07CTX} to hyperspectral mineralogical mapping (CRISM) \cite{Murchie07CRISM} and global topographic models (MOLA) \cite{Smith01MOLA}. Simultaneously, surface assets including the Curiosity \cite{Grotzinger12Curiosity} and Zhurong rovers \cite{Li21ZhuRong} generate complementary in-situ measurements through spectrometers, ground-penetrating radar, and navigation cameras. This rapidly expanding, multi-source, multi-resolution data ecosystem has created a pressing demand for intelligent knowledge retrieval systems that can support planetary scientists in conducting semantic search, cross-source correlation, and multi-scale reasoning over heterogeneous observation archives \cite{Wang26marsretrieval}.
Large Language Models (LLMs) have emerged as powerful tools for natural language understanding and generation \cite{Cai25LLM}, and Retrieval Augmented Generation (RAG) has been established as a standard paradigm for grounding LLM responses in external knowledge bases \cite{Lewis20RAG}. By dynamically retrieving relevant documents and conditioning generation on retrieved context, RAG effectively mitigates the hallucination problem inherent in LLMs and enables knowledge-intensive question answering \cite{Zhou24hallucination}. The synergy between LLMs and Knowledge Graphs (KGs) has further advanced retrieval performance through structured knowledge representation, achieving notable improvements in multi-hop reasoning, credibility assessment, and interpretability \cite{Pan24KGandLLM}.
Nevertheless, deploying RAG systems for planetary science knowledge retrieval introduces domain-specific complexities that fundamentally challenge existing frameworks. Unlike conventional multi-source retrieval scenarios (e.g., integrating flight records, financial reports, or web documents), planetary observation data possesses two distinctive characteristics. First, all data sources are spatially grounded: each observation is anchored to a specific spatial footprint on the Martian surface, a temporal acquisition window parameterized by Solar Longitude ($L_s$), and instrument-specific parameters such as spectral bands and spatial resolution. The relevance between two observations is therefore governed not merely by textual semantic similarity, but primarily by physical spatial proximity, temporal co-occurrence, and cross-resolution complementarity. Second, inter-source inconsistencies in planetary science are not exclusively indicative of data errors or model hallucinations; rather, they frequently arise as inherent consequences of multi-platform, multi-scale observation and may encode critical scientific discoveries — such as subsurface geological evolution revealed by discrepancies between orbital spectroscopy and in-situ drilling results.
Recent advances in multi-source RAG, exemplified by MultiRAG \cite{Wu25MultiRAG}, have made significant progress in addressing data sparsity and inter-source inconsistency through multi-source line graphs and multi-level confidence computation. However, when confronted with planetary spatial data, these methods encounter two structural bottlenecks that cannot be resolved through parameter tuning alone.
Building upon the analysis of existing multi-source RAG limitations [14]-[16] in the context of planetary science, we identify the following failure modes that are unique to spatially grounded, physically observed multi-source data:
\begin{enumerate}
\item Spatial topology distortion: When multi-source observations share no common textual entities but are spatially co-located, discrete line graphs fail to establish connectivity, resulting in fragmented retrieval.
\item Scale hierarchy collapse: Observations at different spatial resolutions (e.g., 0.3 m vs. 460 m) exhibit a natural hierarchical containment structure that flat graph topologies cannot represent, leading to loss of cross-resolution context during aggregation.
\item Scientifically valuable conflict suppression: Confidence-based conflict filtering indiscriminately eliminates disagreeing nodes, destroying observational evidence that may indicate genuine geological phenomena such as subsurface mineral heterogeneity.
\end{enumerate}
These failure modes trace back to two fundamental scientific problems:
\begin{enumerate}
\item Problem 1: Discrete Representation Failure for Continuous Spatiotemporal Topology.** Existing multi-source knowledge aggregation methods, such as multi-source line graphs [14], rely on discrete text entities and explicit semantic associations to construct graph topology. However, planetary science data is intrinsically embedded in continuous Euclidean physical space. Attempting to encode continuous spatial proximity and directional relationships within traditional discrete graph structures inevitably triggers an edge explosion problem — $k$ co-located spatial entities require $\binom{k}{2} = O(k^2)$ pairwise spatial proximity edges — thereby destroying the optimizations that existing graph models achieve for data sparsity. The discrete logical graph structure thus constitutes a structural bottleneck constraining planetary spatial reasoning capabilities, unable to bridge the chasm between physical continuity and semantic discreteness.
\item Problem 2: Fundamental Conflict Between Scientific Cognitive Divergence and Traditional De-Falsification Mechanisms.** The core assumption underlying existing multi-source RAG frameworks is that inter-source data inconsistency typically originates from misinformation or model hallucinations, and therefore relies on multi-level confidence computation to eliminate conflicting nodes [14], [17]. However, in deep-space exploration scenarios, the absence of absolute ground truth means that different observation platforms (e.g., orbiters versus rovers), due to differences in observation scale, penetration depth, and instrumental principles, often produce significantly conflicting results for the same target region. For instance, orbital spectrometers may detect surface hydrated minerals while in-situ drilling reveals no anomaly — a conflict arising not from data error, but from the inherent multi-dimensional nature of scientific observation, potentially harboring clues to major discoveries such as geological evolution. Applying existing conflict-filtering mechanisms indiscriminately would cause severe over-smoothing, uniformly suppressing high-value scientific anomalies and fundamentally violating the epistemological principle of deep-space exploration: preserving controversy and enabling multi-source corroboration for knowledge discovery.
\end{enumerate}
To address these two fundamental challenges, we propose AreoRAG, a novel framework specifically designed for multi-source planetary spatial data retrieval augmented generation. AreoRAG introduces two synergistic innovations. First, to resolve Problem 1, we construct a Hyperbolic Spatial Hypergraph (HySH) that employs $n$-ary spatial observation hyperedges to bind co-located multi-source observations into single high-order facts, reducing edge complexity from $O(k^2)$ to $O(k)$. These hyperedges are embedded in hyperbolic space via the Lorentz model, where the exponential volume growth of negative-curvature geometry naturally accommodates the hierarchical scale structure of planetary observations — coarse-resolution global data resides near the origin while fine-resolution local data extends toward the boundary. Second, to resolve Problem 2, we develop a Physics-Informed Conflict Triage (PICT) mechanism that replaces the uniform conflict-filtering paradigm with a differentiated triage approach. PICT detects inter-source conflicts through cross-source interaction entropy, classifies each conflict into one of four physically grounded categories (noise, instrument-inherent, scale-dependent, temporal-evolution), and applies category-specific confidence recalibration — filtering genuine noise while provably preserving and even boosting the confidence of scientifically valuable observational disagreements. Together, HySH provides spatially faithful multi-source evidence to PICT, while PICT feeds back triage results to prioritize scientifically interesting regions in subsequent retrieval, forming a tightly coupled framework.
The contributions of this paper are summarized as follows:
\begin{enumerate}
\item Hyperbolic Spatial Hypergraph Construction: We introduce HySH, a knowledge construction module that employs $n$-ary spatial observation hyperedges embedded in hyperbolic space to achieve unified spatiotemporal representation of multi-source planetary data. By coupling spatial resolution with hyperbolic radial depth via the Lorentz model, HySH faithfully preserves the hierarchical scale structure of planetary observations while eliminating edge explosion through high-order relational encoding. A resolution-aware Spatial Outward Einstein Midpoint (Spatial OEM) aggregation operator is further proposed to prevent hierarchical collapse during cross-resolution evidence fusion, with a formal guarantee of outward bias.
\item Physics-Informed Conflict Triage: We propose PICT, a retrieval module that fundamentally redefines the role of inter-source conflict in RAG systems. Through cross-source interaction entropy for conflict detection, a physically grounded four-category conflict classification informed by observation geometry, and differentiated confidence recalibration, PICT provably prevents the over-smoothing of scientifically valuable disagreements (Anti-Over-Smoothing Guarantee) while maintaining noise-filtering capability. To the best of our knowledge, this is the first conflict-handling mechanism in RAG that explicitly distinguishes between erroneous inconsistency and scientifically meaningful observational divergence.
\item Integrated Framework and Experimental Validation: We design the AreoRAG Prompting (ARP) algorithm that integrates HySH and PICT through three explicit coupling points: spatial alignment as a prerequisite for interaction entropy computation, radial depth difference as a resolution disparity signal for conflict classification, and triage-driven retrieval priority feedback. Extensive experiments on multi-source planetary observation datasets demonstrate that AreoRAG significantly outperforms existing multi-source RAG methods in both retrieval fidelity and scientific faithfulness, with particular advantages in scenarios involving cross-resolution reasoning and observation-grounded conflict preservation.
\end{enumerate}
\section{Preliminary}
In the field of planetary spatial knowledge retrieval, the primary challenges include faithfully representing continuous spatiotemporal relationships across heterogeneous observation sources and achieving reliable retrieval under inherent inter-source scientific conflicts. This section introduces the core elements of our approach and precisely defines the problems we address.
Let $Q = \{q_1, q_2, \ldots, q_n\}$ be the set of query instances, where each $q_i$ corresponds to a distinct planetary science query. Let $\mathcal{E} = \{e_1, e_2, \ldots, e_m\}$ be the set of entities in the spatial knowledge hypergraph, where each $e_j$ represents a geological feature, instrument, or observation product. Let $\mathcal{R} = \{r_1, r_2, \ldots, r_p\}$ be the set of relationships, and let $\mathcal{F} = \{f_1^n, f_2^n, \ldots, f_s^n\}$ be the set of $n$-ary relational facts (hyperedges). Let $D = \{d_1, d_2, \ldots, d_t\}$ be the set of observation data products, where each $d_l$ represents an observation record from a specific instrument. We define the spatially-grounded knowledge-guided retrieval augmented generation problem as follows:
\begin{equation}
\label{equ:RAG Problem}
\text{arg}\max_{d_i\in D}\text{LLM}\left( q_i,d_i \right),
\end{equation}
\begin{equation}
\label{equ:RAG Problem s.t.}
\sum_{e_j\in \mathcal{E}}{\sum_{f_{k}^{n}\in \mathcal{F}}{\text{HG}}}\left( e_j,f_{k}^{n},d_i \right) \cdot \mathcal{S}_{geo}\left( q_i,d_i \right),
\end{equation}
where $\text{LLM}(q_i, d_l)$ denotes the relevance score between query $q_i$ and document $d_l$ assessed by the LLM, $\text{HG}(e_j, f_k^n, d_l)$ represents the degree of match between entity $e_j$, $n$-ary fact $f_k^n$, and document $d_l$ in the hypergraph, and $\mathcal{S}_{geo}(q_i, d_i)$ is a spatial compatibility function that ensures the retrieved evidence satisfies the geospatial constraints (footprint overlap, temporal window, resolution range) specified in the query.
Furthermore, we optimize the knowledge construction and retrieval modules by introducing a hyperbolic spatial hypergraph to achieve spatially faithful knowledge aggregation and physics-informed conflict handling. Specifically, the proposed approach is formally defined through the following definitions.
Definition~1 (Multi-source planetary observation data). Given a set of observation platforms $\mathcal{H}$ (e.g., MRO, Mars Express, Tianwen-1, Curiosity, Zhurong), the observation data $D = \{\mathcal{I}, \mathcal{P}_{foot}, \mathcal{T}_{win}, \mathcal{S}_{band}, c, \text{meta}\}$ exists, where $\mathcal{I}$ denotes the instrument identity, $\mathcal{P}_{foot} \subset \mathbb{S}^2_{Mars}$ denotes the spatial footprint on the Martian surface, $\mathcal{T}_{win}$ denotes the temporal acquisition window parameterized by Solar Longitude $L_s$, $\mathcal{S}_{band}$ denotes the spectral band configuration, $c$ represents the observation content (image, spectrum, or derived product), and meta represents the PDS/CNSA metadata. Through a multi-source spatial adapter parsing algorithm, we obtain normalized data $\widehat{D} = \{\text{id}, \mathcal{I}, \mathcal{P}_{foot}, \mathcal{T}_{win}, \mathcal{S}_{band}, \ell_{res}, \text{jsc}, \text{meta}\}$, where id is the unique identifier, $\ell_{res} \in \mathbb{R}^+$ denotes the ground sampling distance (spatial resolution), and jsc denotes the observation content stored using JSON-LD for linked data interoperability.
Definition~2 ($N$-ary spatial knowledge hypergraph). An $n$-ary spatial knowledge hypergraph is defined as $\mathcal{G}_{hyp} = (\mathcal{E}, \mathcal{R}, \mathcal{F}_{spa})$, where $\mathcal{E}$ denotes the entity set, $\mathcal{R}$ denotes the relation set, and $\mathcal{F}_{spa}$ denotes the set of spatial observation hyperedges. Each spatial observation hyperedge $f_{spa}^n \in \mathcal{F}_{spa}$ binds multiple entities and observation parameters into a single $n$-ary relational fact:
\begin{equation}
\label{equ:spatial observation hyperedge}
f_{spa}^{n}=\left( \mathcal{I},\;\mathcal{P}_{foot},\;\mathcal{T}_{win},\;\mathcal{S}_{band},\;\mathcal{O}_{target},\;\ell _{res} \right),
\end{equation}
where $\mathcal{O}_{target}$ denotes the set of target geological features. Unlike binary knowledge graphs where $k$ co-located entities require $\binom{k}{2} = O(k^2)$ pairwise edges, a single $n$-ary hyperedge binds all $k$ entities with $O(k)$ complexity, directly resolving the edge explosion problem.
Definition~3 (Hyperbolic space embedding). We represent $\mathcal{G}_{hyp}$ in $d$-dimensional hyperbolic space $\mathbb{H}_K^d$ with constant negative curvature $K < 0$ using the Lorentz (hyperboloid) model. The hyperbolic space is realized as:
\begin{equation}
\label{equ:hyperbolic space}
\mathbb{H}_{K}^{d}=\left\{ \mathbf{x}\in \mathbb{R}^{d+1}\mid \left< \mathbf{x,x} \right> _L=\frac{1}{K},\;x_0>0 \right\}
\end{equation}
where $\langle \mathbf{x}, \mathbf{y} \rangle_L = -x_0 y_0 + \sum_{i=1}^{d} x_i y_i$ is the Lorentzian inner product. The geodesic distance between two points $\mathbf{x}, \mathbf{y} \in \mathbb{H}_K^d$ is $d_K(\mathbf{x}, \mathbf{y}) = \frac{1}{\sqrt{-K}} \cosh^{-1}(K \langle \mathbf{x}, \mathbf{y} \rangle_L)$. The radial depth $r(\mathbf{x}) = x_0$ encodes the intrinsic distance from the origin and serves as a proxy for hierarchical specificity: entities near the origin represent coarse, global-scale features, while those at large radial depth represent fine-scale, local observations.
Definition~4 (Observation-grounded homologous data). For a query $Q(q, \mathcal{G}_{hyp})$ on the spatial hypergraph $\mathcal{G}_{hyp}$, the multi-source spatial evidence retrieved in a single query is defined as observation-grounded homologous data. For any two observations $v_1$ and $v_2$ in $\mathcal{G}_{hyp}$, they are observation-grounded homologous if and only if they: (a) belong to the same retrieval candidate set, and (b) their spatial footprints satisfy $\mathcal{P}_{foot}\left( v_1 \right) \cap \mathcal{P}_{foot}\left( v_2 \right) \ne \varnothing $.
Definition~5 (Observation-grounded knowledge source). A planetary observation knowledge source is defined as $\mathcal{K}_s = (\mathcal{I}_s, \Omega_s, F(\mathcal{K}_s), \mathcal{M}_s)$, where $\mathcal{I}_s$ denotes the instrument, $\Omega_s = (\ell_{res}, \lambda_{band}, \theta_{view}, d_{pen})$ denotes the observation geometry parameters (spatial resolution, spectral band, viewing angle, penetration depth), $F(\mathcal{K}_s)$ denotes the set of atomic factual statements, and $\mathcal{M}_s$ denotes the physical measurement model that maps target properties through observation constraints to observable facts.
Definition 6 (Conflict triage confidence.) For observation-grounded homologous data obtained from the spatial hypergraph, the conflict triage confidence integrates two levels of assessment: (a) cross-source interaction entropy to detect inter-source conflicts, and (b) physics-informed conflict classification to determine whether detected conflicts represent noise to be filtered or scientifically meaningful observational divergences to be preserved. Unlike conventional candidate confidence [14] that uniformly penalizes inconsistency, conflict triage confidence applies differentiated recalibration based on the physical origin of each conflict.
\section{Methodology}
\subsection{Framework of AreoRAG}
This section elaborates on the implementation approach of AreoRAG. As shown in Fig. 3, the framework comprises three tightly coupled modules. The first step involves constructing a Hyperbolic Spatial Hypergraph (HySH) from multi-source planetary observation data, achieving unified spatiotemporal representation via $n$-ary observation hyperedges embedded in hyperbolic space (Section III-B); the second step performs spatiotemporal retrieval on the constructed HySH, where hyperbolic spatial proximity encoding and cross-resolution aggregation via the Spatial Outward Einstein Midpoint are employed to extract query-relevant multi-source evidence (Section III-C); the third step applies Physics-Informed Conflict Triage (PICT), which detects inter-source conflicts via cross-source interaction entropy, classifies them into four scientific categories, and executes conflict-aware confidence recalibration to preserve scientifically valuable disagreements while filtering noise (Section III-D). Finally, integrating the aforementioned steps to form the AreoRAG Prompting algorithm, ARP (Section III-E).
The three modules interact through three explicit coupling points: (1) HySH's spatial alignment is a prerequisite for meaningful interaction entropy computation in PICT; (2) the radial depth difference $\Delta r$ from HySH directly feeds into the PICT feature vector as the resolution disparity signal; and (3) PICT's triage results feed back to boost retrieval priority of scientifically interesting regions in subsequent queries.
\subsection{Hyperbolic Spatial Hypergraph Construction}
The AreoRAG method begins by constructing a knowledge structure that can faithfully represent the continuous spatiotemporal topology of planetary multi-source data. Unlike MultiRAG's Multi-source Line Graph (MLG), which relies on discrete text entities and binary triples, we introduce a hypergraph structure embedded in hyperbolic space to jointly address edge explosion and spatial scale hierarchy.
1) Multi-source Spatial Adapter Parsing: We first design a spatial adapter for each observation data source to parse instrument metadata, spatial footprints, temporal windows, and spectral parameters. For orbital remote sensing data (e.g., HiRISE, CTX, CRISM), parsing involves extracting the image footprint geometry, ground sampling distance, and spectral band configuration from PDS labels. For in-situ data (e.g., rover spectrometers, ground-penetrating radar), parsing extracts the rover traverse coordinates, measurement timestamps in Sol, and instrument-specific parameters such as penetration depth. All temporal references are unified to Solar Longitude $L_s$ to enable cross-platform temporal comparison. For derived data products (e.g., DTMs, mineral abundance maps), parsing extracts provenance links to the source observations and processing parameters.
The final integration of multi-source spatial data can be expressed as:
\begin{equation}
\label{equ:multi-source spatial data}
D_{Fusion} = \bigcup_{i=1}^{n} A_i^{spa}(D_i),
\end{equation}
where $A_i^{spa} \in \{Ada_{orbital}, Ada_{insitu}, Ada_{derived}\}$ represents the spatial adapter parsing functions for orbital, in-situ, and derived data products respectively, and $D_i$ represents the original observation datasets from different platforms.
Through the parsed data $D_{Fusion}$, we further extract entities (geological features, mineral signatures, topographic structures), relationships (spatial containment, temporal succession, compositional association), and observation-specific attributes. The knowledge extraction process employs LLM-based entity recognition guided by a planetary science domain schema:
\begin{equation}
\label{equ:planetary science domain schema}
\sum_{D_i} \left( \{e_1, e_2, \ldots, e_m\} \sqcup \{r_1, r_2, \ldots, r_n\} \sqcup \{f_{spa,1}^n, \ldots, f_{spa,p}^n\} \right).
\end{equation}
2) Spatial Observation Hyperedge Formation: Based on the extracted knowledge base, we construct spatial observation hyperedges that bind co-located multi-source observations into single $n$-ary facts. As formalized in Definition 2, each hyperedge $f_{spa}^n$ encapsulates the instrument, spatial footprint, temporal window, spectral bands, target features, and resolution. In a pairwise binary graph, $k$ co-existing spatial entities require $\binom{k}{2} = O(k^2)$ spatial proximity edges. With hyperedges, a single $n$-ary fact binds all $k$ entities, reducing edge complexity to $O(k)$. This directly resolves the edge explosion problem identified in our analysis of MLG.
3) Scale-Aware Lorentz Embedding: We embed the spatial observation hypergraph in $d$-dimensional hyperbolic space $\mathbb{H}_K^d$ using the Lorentz model (Definition 3). The key innovation is coupling the radial depth with spatial resolution through an embedding mapping $\Phi: \mathcal{F}_{spa} \rightarrow \mathbb{H}_K^d$:
\begin{equation}
\label{equ:embedding mapping}
r\left(\Phi(f_{spa}^n)\right) = \frac{1}{\sqrt{-K}} \cosh\left(\sqrt{-K} \cdot g(\ell_{res})\right),
\end{equation}
where $g(\ell_{res}) = -\log(\ell_{res} / \ell_{max})$ is a monotone decreasing function of resolution, and $r(\mathbf{x}) = x_0$ denotes the radial depth.
This embedding design is motivated by the following observation on the intrinsic geometry of planetary spatial data:
**Proposition 1** (Spatial Scale-Curvature Correspondence). *The planetary spatial observation hierarchy exhibits tree-like branching: each coarser-resolution observation spatially contains multiple finer-resolution observations. Let $N(\ell)$ denote the number of observations at resolution level $\ell$. For remote sensing data with total survey area $A_{coverage}$:
\begin{equation}
\label{equ:Spatial Scale-Curvature Correspondence}
N(\ell) \propto A_{coverage} / \ell^2.
\end{equation}
As resolution $\ell$ decreases (finer scale), $N(\ell)$ grows quadratically, exhibiting the exponential branching characteristic of negative-curvature spaces. Therefore, the spatial scale hierarchy is intrinsically hyperbolic, and Euclidean embedding with polynomial volume growth cannot faithfully represent it.*
Through this embedding, global coarse-resolution data (e.g., MOLA topography at ~460 m) is placed near the hyperbolic origin (small radial depth), while local high-resolution data (e.g., HiRISE at 0.3 m) is placed far from the origin (large radial depth). The exponential volume growth of $\mathbb{H}_K^d$ naturally accommodates the exponentially increasing number of observations at finer scales.
4) Cross-Reference-Frame Alignment: To address the heterogeneous reference frame problem (orbiter areocentric coordinates vs. rover-centric local coordinates), we align all observations to a global reference via parallel transport on the hyperbolic manifold:
\begin{equation}
\label{equ:Cross-Reference-Frame Alignment}
\Phi_{aligned}(e) = \exp_{o_g}\left(\Gamma_{o_k \to o_g}\left(\log_{o_k}(\Phi_k(e))\right)\right),
\end{equation}
where $\log_{o_k}$ is the logarithmic map at the local reference origin $o_k$, $\Gamma_{o_k \to o_g}$ is the parallel transport operator along the geodesic from $o_k$ to the global origin $o_g$, and $\exp_{o_g}$ is the exponential map at the global origin. Unlike Euclidean affine transformations, hyperbolic parallel transport preserves geodesic distances and radial depth, ensuring that scale hierarchy information is maintained after cross-frame alignment.
Here, we provide a simple example of hyperbolic spatial hypergraph construction. As shown in Fig. 4, an observation region is covered by three sources at different resolutions: a CTX mosaic (6 m), an HiRISE strip (0.3 m), and a CRISM spectral cube (18 m). In the HySH, the HiRISE observation (finest resolution) is embedded at the largest radial depth, while the CRISM observation (coarsest resolution) is nearest to the origin. A spatial observation hyperedge binds all three observations and their co-located geological features into a single $n$-ary fact, without requiring $O(k^2)$ pairwise edges.
\subsection{Spatiotemporal Retrieval with Cross-Resolution Aggregation}
After the construction of the hyperbolic spatial hypergraph, the next step is to retrieve query-relevant multi-source spatial evidence. The retrieval process comprises two phases: spatiotemporal evidence extraction and cross-resolution aggregation.
1) Spatial Intent Extraction and Hyperedge Retrieval: Given a user query $q$, we first employ the LLM to extract spatial intent, including target entities, spatial constraints (footprint, region), temporal constraints ($L_s$ range, Sol range), and resolution preferences. These are denoted as query elements $\mathcal{K}_q$.
For each topic entity $e_s \in \mathcal{E}_q$ extracted from the query, we retrieve its incident spatial observation hyperedges $\mathcal{F}_{e_s} = \{f_{spa}^n \in \mathcal{F}_{spa} : e_s \in f_{spa}^n\}$ and derive pseudo-binary triples $(e_h, f_{spa}^n, e_t)$ for pairwise reasoning, following the approach of HyperRAG [18]:
\begin{equation}
\label{equ:Spatial Intent Extraction and Hyperedge Retrieval}
\mathcal{T}_q = \left\{ (e_h, f_{spa}^n, e_t) \mid f_{spa}^n \in \mathcal{F}_{e_s}, \; e_h \in f_{spa}^n, \; e_t \in f_{spa}^n \right\}.
\end{equation}
2) Hyperbolic Spatial Encoding and Plausibility Scoring: For each candidate triple, we compute a spatiotemporal encoding that fuses semantic, structural, and physical-spatial signals:
\begin{equation}
\label{equ:spatiotemporal encoding}
\mathbf{x} = \left[\varphi(q) \| \varphi(e_h) \| \varphi(f_{spa}^n) \| \varphi(e_t) \| \delta(e_h, f_{spa}^n, e_t) \| \psi_{geo}(e_h, e_t)\right],
\end{equation}
where $\varphi$ denotes a text embedding model, $\delta$ denotes a structural proximity encoding adapted from SubGraphRAG [19] to operate on hyperedges, and $\psi_{geo}$ is the hyperbolic spatial encoding defined as:
\begin{equation}
\label{equ:hyperbolic spatial encoding}
\psi_{geo}(e_h, e_t) = \left[d_K\left(\Phi(e_h), \Phi(e_t)\right), \; \Delta r(e_h, e_t), \; \cos\theta_{bearing}\right],
\end{equation}
where $d_K$ is the geodesic distance in $\mathbb{H}_K^d$ capturing physical proximity, $\Delta r = |r(\Phi(e_h)) - r(\Phi(e_t))|$ encodes the scale difference via radial depth gap, and $\cos\theta_{bearing}$ encodes the directional relationship. A lightweight MLP classifier $f_\theta$ then scores the plausibility of each candidate triple:
\begin{equation}
\label{equ:MLP scores}
\text{score}(e_h, f_{spa}^n, e_t) = f_\theta(\mathbf{x}) \in [0, 1].
\end{equation}
Top-scored triples are retained and their tail entities form the frontier for next-hop expansion, following an adaptive search strategy with density-aware thresholding as in [18]. Specifically, we initialize with threshold $\tau_0 = 0.5$ and iteratively reduce by a decay factor $c = 0.1$ if the number of retrieved triples falls below a minimum acceptable count $M$, ensuring sufficient evidence coverage in sparse regions while preventing over-retrieval in dense regions.
3) Spatial Outward Einstein Midpoint Aggregation: After retrieval, the selected multi-source evidence typically spans multiple resolutions. To aggregate these into a unified representation without losing fine-scale information, we introduce the Spatial Outward Einstein Midpoint (Spatial OEM). The motivation stems from a known failure mode: naively averaging hyperbolic embeddings collapses representations toward the origin, destroying the hierarchical structure encoded in radial depth [20].
Given spatial observation hyperedge embeddings $\{\Phi(f_i)\}_{i=1}^n \subset \mathbb{H}_K^d$ with query-relevance weights $w_i$ and resolution-aware radial weighting $\phi_{res}(f_i) = r(\Phi(f_i))^p$:
$$\mathbf{m}_{K,p}^{Spa\text{-}OEM} = \Pi_K\left(\frac{\sum_{i=1}^{n} w_i \cdot \phi_{res}(f_i) \cdot \lambda_i \cdot \Phi(f_i)}{\sum_{i=1}^{n} w_i \cdot \phi_{res}(f_i) \cdot \lambda_i}\right)$$
where $\lambda_i = \Phi(f_i)_0$ is the Lorentz factor and $\Pi_K$ denotes reprojection onto $\mathbb{H}_K^d$, defined as $\Pi_K(\mathbf{v}) = \frac{\mathbf{v}}{\sqrt{K \langle \mathbf{v}, \mathbf{v} \rangle_L}}$ for $\mathbf{v}$ with $\langle \mathbf{v}, \mathbf{v} \rangle_L < 0$ and $v_0 > 0$.
**Theorem 1** (Spatial OEM Outward Bias). *For $p \geq 1$, the Spatial OEM satisfies:*
$$r(\mathbf{m}_{K,p}^{Spa\text{-}OEM}) \geq r(\mathbf{m}_K^{Ein})$$
*where $\mathbf{m}_K^{Ein}$ is the standard Einstein midpoint ($p = 0$).*
*Proof.* The OEM weights $\tilde{w}_i \propto w_i \cdot r(\Phi(f_i))^{p+1}$ concentrate more mass on high-radius points than the Einstein weights $w_i \cdot r(\Phi(f_i))$. By the Chebyshev sum inequality applied to the co-monotonic sequences $a_i = r(\Phi(f_i))^{p+1}$ and $b_i = r(\Phi(f_i))$, the pre-projection time component satisfies $\tilde{v}_0 \geq \bar{r}_w$ (weighted mean radius). Since reprojection $\Pi_K$ preserves the ordering of time components, the result follows. $\square$
The outward bias guarantees that high-resolution observations dominate the aggregated representation. This is essential for planetary science retrieval: when a user queries a specific geological feature, the aggregated evidence should preserve the fine-scale observational details rather than being smoothed into a coarse-resolution summary.
\subsection{Physics-Informed Conflict Triage}
We define the multi-source spatial evidence retrieved in a single query as observation-grounded homologous data (Definition 4). Although targeting the same query object, these data often provide inconsistent factual statements due to differences in instrument principles, observation geometry, and acquisition epochs. Unlike MultiRAG's Multi-level Confidence Computing (MCC), which assumes that inconsistency indicates unreliability and employs mutual information entropy to filter conflicting nodes, we adopt a fundamentally different paradigm: Physics-Informed Conflict Triage (PICT), which classifies conflicts by their physical origin and applies differentiated processing strategies.
1) Cross-Source Interaction Entropy: The first stage detects conflicts by measuring the information-theoretic interaction effect when two sources are jointly presented to the LLM. Existing entropy-based conflict detection methods, such as TruthfulRAG [17], compare retrieval-augmented entropy against parametric-only entropy ($\Delta H_p = H(P_{aug}) - H(P_{param})$). However, this formulation is inapplicable to our setting where all knowledge is external observational data rather than LLM parametric knowledge. We instead propose cross-source interaction entropy that measures the mutual interference between two observation sources:
$$\mathcal{H}_{inter}(p_i, p_j \mid q) = H\left(P(\text{ans} \mid q, p_i \oplus p_j)\right) - \frac{1}{2}\left[H\left(P(\text{ans} \mid q, p_i)\right) + H\left(P(\text{ans} \mid q, p_j)\right)\right],$$
where $H(\cdot)$ is the token-averaged entropy over top-$k$ candidate tokens:
$$H\left(P(\text{ans} \mid \text{context})\right) = -\frac{1}{|l|}\sum_{t=1}^{|l|}\sum_{i=1}^{k} pr_i^{(t)} \log_2 pr_i^{(t)}$$
and $p_i \oplus p_j$ denotes the concatenation of both reasoning paths derived from sources $\mathcal{K}_i$ and $\mathcal{K}_j$ respectively. The interaction entropy admits a clear physical interpretation: positive values ($\mathcal{H}_{inter} > 0$, super-additive uncertainty) indicate that the two sources contradict each other, jointly creating more confusion than either alone; near-zero values indicate independence or consistency; negative values (sub-additive) indicate mutual complementarity where the sources reinforce each other.
Reasoning path pairs exhibiting interaction entropy exceeding a predefined threshold $\epsilon$ are classified as detected conflicts:
$$\mathcal{C}^{detected} = \{(\psi_i, \psi_j) \mid \mathcal{H}_{inter}(p_i, p_j \mid q) > \epsilon\}$$
2) Physics-Informed Conflict Classification: The second stage classifies each detected conflict by its physical origin. We introduce the central distinction of PICT:
**Definition 7. Explainable conflict and opaque conflict.** A pairwise conflict $(\psi_i, \psi_j) \in \mathcal{C}_{i,j}$ is *explainable* if there exists a physical bridging function $\mathcal{B}$ such that:
$$\mathcal{B}(\Omega_i, \Omega_j, \mathcal{M}_i, \mathcal{M}_j) \models \neg(\psi_i \bot \psi_j).$$
i.e., the apparent inconsistency is resolvable by accounting for observation constraint differences ($\Omega_i$, $\Omega_j$) and measurement model differences ($\mathcal{M}_i$, $\mathcal{M}_j$). Otherwise, the conflict is *opaque*.
Based on this distinction, we define four conflict categories, each with a differentiated processing strategy:
\begin{table}
\renewcommand{\arraystretch}{1.3}
\caption{Physics-Informed Conflict Triage Categories}
\label{table_conflict_triage}
\vspace{-0.13in}
\centering
\begin{tabular}{|m{3cm}|m{3cm}|m{3cm}|}
\hline
\makecell[c]{\textbf{Category}} & \makecell[c]{\textbf{Condition}} & \makecell[c]{\textbf{Strategy}} \\
\hline
\hline
\makecell[c]{Noise ($\mathcal{C}^{noise}$)} & \makecell[c]{Opaque, with significant\\source authority disparity} & \makecell[c]{Filter low-authority\\source} \\
\hline
\makecell[c]{Instrument-Inherent\\($\mathcal{C}^{inst}$)} & \makecell[c]{Explainable via\\$\Omega_i \neq \Omega_j$} & \makecell[c]{Preserve with physical\\explanation} \\
\hline
\makecell[c]{Scale-Dependent\\($\mathcal{C}^{scale}$)} & \makecell[c]{Explainable via\\$\ell_{res}^i \neq \ell_{res}^j$} & \makecell[c]{Preserve with cross-scale\\linkage} \\
\hline
\makecell[c]{Temporal-Evolution\\($\mathcal{C}^{temp}$)} & \makecell[c]{Explainable via\\$\mathcal{T}_i \neq \mathcal{T}_j$} & \makecell[c]{Preserve with temporal\\ordering} \\
\hline
\end{tabular}
\end{table}
For each detected conflict, we construct a feature vector that fuses information-theoretic, physical, and neural signals:
$$\mathbf{z}_{conf} = \left[\mathcal{H}_{inter}, \; \|\Omega_i - \Omega_j\|, \; |\log(\ell_{res}^i / \ell_{res}^j)|, \; \Delta\mathcal{T}, \; \rho_{auth}(i,j), \; \mathbf{h}^{(l^*)}_{conf}\right]$$
where $\|\Omega_i - \Omega_j\|$ is the observation geometry disparity, $|\log(\ell_{res}^i / \ell_{res}^j)|$ is the resolution ratio in log-scale, $\Delta\mathcal{T}$ is the temporal separation, $\rho_{auth}(i,j)$ is the authority disparity between sources, and $\mathbf{h}^{(l^*)}_{conf}$ is the LLM hidden state at the conflict encoding layer. The inclusion of $\mathbf{h}^{(l^*)}_{conf}$ is motivated by the finding that knowledge conflict signals concentrate in mid-to-late layers of LLMs and are linearly separable with > 93% AUC [21].
A lightweight classifier maps the feature vector to conflict type:
$$\hat{c} = \arg\max_{c \in \{noise, inst, scale, temp\}} P_\theta(c \mid \mathbf{z}_{conf})$$
**Proposition 2** (Conflict Type Separability). *The four conflict types are distinguished by orthogonal physical dimensions: $\|\Omega_i - \Omega_j\|$ separates instrument conflicts; $|\log(\ell_{res}^i / \ell_{res}^j)|$ separates scale conflicts; $\Delta\mathcal{T}$ separates temporal conflicts; $\rho_{auth}$ separates noise conflicts. Since these physical features are independent of and complementary to the hidden state features $\mathbf{h}^{(l^*)}_{conf}$ (which encode semantic inconsistency), the four conflict types are linearly separable in the augmented feature space $\mathbf{z}_{conf}$.*
3) Conflict-Aware Confidence Recalibration: Based on the classification result, we recalibrate the node confidence. This is the key departure from MultiRAG's MCC, which uniformly penalizes inconsistency:
$$C_{triage}(v) = \begin{cases} C_{base}(v) & \text{if } v \notin \mathcal{C}^{detected} \\ \alpha \cdot C_{base}(v) + (1-\alpha) \cdot \eta & \text{if } \hat{c} = noise \\ C_{base}(v) + \beta \cdot \mathcal{H}_{inter}^{-1} & \text{if } \hat{c} \in \{inst, scale\} \\ C_{base}(v) \cdot \gamma(|\Delta\mathcal{T}|) & \text{if } \hat{c} = temp \end{cases}$$
where $C_{base}(v)$ is the baseline confidence computed via semantic similarity (analogous to the node consistency score in [14]), $\eta < 0$ is a penalty term for noise conflicts, $\beta > 0$ is a boost coefficient for scientifically explainable conflicts, and $\gamma(|\Delta\mathcal{T}|)$ is a time-decay weighting function that prioritizes recent observations while preserving temporal evolution signals. Specifically, $\gamma(|\Delta\mathcal{T}|) = 1 + \beta_{temp} \cdot \exp(-|\Delta\mathcal{T}| / \tau_{decay})$, where $\beta_{temp} > 0$ ensures $\gamma > 1$ for temporal contrasts with scientific significance.
**Theorem 2** (Anti-Over-Smoothing Guarantee). *Let $V_{sci} \subset V$ denote the set of nodes involved in explainable scientific conflicts ($\mathcal{C}^{inst} \cup \mathcal{C}^{scale} \cup \mathcal{C}^{temp}$). Under PICT with $\beta > 0$:*
$$C_{triage}(v) > C_{base}(v) \quad \forall v \in V_{sci}$$
*Proof.* For $v \in \mathcal{C}^{inst} \cup \mathcal{C}^{scale}$: $C_{triage}(v) = C_{base}(v) + \beta \cdot \mathcal{H}_{inter}^{-1}$. Since $\beta > 0$ and $\mathcal{H}_{inter} > \epsilon > 0$ (by the detection threshold in Eq. 16), $\beta \cdot \mathcal{H}_{inter}^{-1} > 0$, thus $C_{triage}(v) > C_{base}(v)$. For $v \in \mathcal{C}^{temp}$: $\gamma(|\Delta\mathcal{T}|) > 1$ by construction (since $\beta_{temp} > 0$ and $\exp(\cdot) > 0$), thus $C_{triage}(v) = C_{base}(v) \cdot \gamma(|\Delta\mathcal{T}|) > C_{base}(v)$. $\square$
This theorem provides a formal guarantee that scientifically valuable conflict nodes can never be suppressed below their baseline confidence by the triage mechanism, directly addressing the over-smoothing problem.
\subsection{AreoRAG Prompting}
We propose the AreoRAG Prompting (ARP) algorithm for multi-source planetary spatial data retrieval. The complete procedure is presented in Algorithm~\ref{alg:arp}.
\begin{algorithm}[!htb]
\caption{AreoRAG Prompting (ARP)}
\label{alg:arp}
\begin{algorithmic}[1]
\REQUIRE Query $q$
\ENSURE Generated Answer
\STATE $\mathcal{E}_q, \mathcal{R}_q, \mathcal{P}_{foot}, \mathcal{T}_{win} \leftarrow$ Spatial Intent Extraction$(q)$
\STATE $D_q \leftarrow$ Multi-source Spatial Adapter Parsing$(D)$ \COMMENT{Eq. 4--5}
\STATE $\mathcal{G}_{hyp} \leftarrow$ HySH Construction$(D_q)$ \COMMENT{Eq. 6--8}
\STATE $\mathcal{T}_q \leftarrow$ Spatiotemporal Retrieval$(\mathcal{G}_{hyp}, \mathcal{E}_q)$ \COMMENT{Eq. 9--12}
\STATE $\mathbf{m}_{agg} \leftarrow$ Spatial OEM Aggregation$(\mathcal{T}_q)$ \COMMENT{Eq. 13}
\STATE $\mathcal{C}^{detected} \leftarrow$ Cross-Source Interaction Entropy$(\mathcal{T}_q, q)$ \COMMENT{Eq. 14--16}
\FOR{$(\psi_i, \psi_j) \in \mathcal{C}^{detected}$}
\STATE $\hat{c} \leftarrow$ Conflict Classification$(\mathbf{z}_{conf})$ \COMMENT{Eq. 18--19}
\STATE $C_{triage}(v) \leftarrow$ Confidence Recalibration$(v, \hat{c})$ \COMMENT{Eq. 20}
\ENDFOR
\STATE Context $\leftarrow$ Differential Context Construction$(q, \mathcal{T}_q, \hat{c})$
\STATE Answer $\leftarrow$ LLM$(q \oplus$ Context $\oplus$ Provenance$)$
\RETURN Answer
\end{algorithmic}
\end{algorithm}
Given a user query $q$, the LLM is first employed to extract entities, spatial constraints ($\mathcal{P}_{foot}$, region), and temporal constraints ($\mathcal{T}_{win}$, $L_s$ range), generating corresponding logical and spatial relationships. The observation data then undergoes multi-source spatial adapter parsing to derive normalized datasets (Eq. 4), followed by constructing a Hyperbolic Spatial Hypergraph via scale-aware Lorentz embedding and cross-reference-frame alignment (Eq. 6-8).
Subsequently, spatiotemporal retrieval is performed using hyperbolic spatial encoding and MLP-based plausibility scoring (Eq. 10-12), with Spatial OEM aggregation (Eq. 13) to produce a unified cross-resolution representation. The cross-source interaction entropy mechanism (Eq. 14-16) then detects inter-source conflicts, after which each detected conflict is classified via the physics-informed feature vector (Eq. 18-19) and the node confidence is recalibrated accordingly (Eq. 20).
The final step constructs a differential context based on the triage result. For noise conflicts, the low-authority source is filtered, compatible with conventional conflict elimination. For instrument-inherent and scale-dependent conflicts, both sources are preserved with a physical bridging explanation $\mathcal{B}(\Omega_i, \Omega_j)$ appended to the context, enabling the LLM to reason about the physical origin of the disagreement. For temporal-evolution conflicts, a temporal ordering is constructed, allowing the LLM to trace the evolution of observations over time. All preserved evidence carries provenance metadata (DataID, source institution, instrument identity, observation timestamp in $L_s$) to ensure scientific traceability, analogous to the citation anchors in Perplexity-style retrieval systems.
It should be noted that the ARP algorithm constructs the HySH offline as a preprocessing step, while the PICT module operates online during each query. The HySH construction time is dominated by the LLM-based entity extraction (comparable to MultiRAG's MLG construction), while the online PICT overhead consists primarily of $|\mathcal{C}^{detected}|$ forward passes through the lightweight conflict classifier (Eq. 19), which is negligible compared to the LLM generation cost.
\section{Experiments}
This section conducts experiments and performance analysis on the Hyperbolic Spatial Hypergraph (HySH) construction and the Physics-Informed Conflict Triage (PICT) modules. Baseline methods are compared with SOTA multi-source retrieval, graph-based RAG, and conflict-resolution methods. Extensive experiments are conducted to assess the robustness and efficiency of AreoRAG, which aims to answer the following questions.
- **Q1**: How does the overall retrieval and QA performance of AreoRAG compare with existing multi-source RAG and graph-based RAG methods on planetary spatial data?
- **Q2**: What are the respective impacts of spatial sparsity and inter-source conflict intensity on retrieval quality?
- **Q3**: How effective are the two core modules (HySH and PICT) of AreoRAG individually?
- **Q4**: Can PICT correctly preserve scientifically valuable conflicts while filtering noise, and how does this compare with conventional conflict-elimination approaches?
- **Q5**: What are the time costs of the various modules in AreoRAG?
\subsection{Experimental Settings}
**a) Datasets:** To validate the effectiveness of AreoRAG in planetary multi-source spatial data retrieval, we construct three datasets from real Mars exploration archives and further evaluate on two general multi-hop QA benchmarks. The planetary datasets are summarized in Table I.
(1) **MarsRegion-QA**: A multi-source spatial QA dataset constructed from the Mars Orbital Data Explorer (ODE) archives. We select five scientifically significant regions on Mars — Jezero Crater, Gale Crater, Utopia Planitia (Zhurong landing site), Valles Marineris, and Olympus Mons — and aggregate observations from HiRISE (0.3 m), CTX (6 m), CRISM (18 m), MOLA (460 m), and Zhurong/Curiosity rover in-situ measurements. Each query targets cross-source spatial reasoning (e.g., "What mineral signatures have been detected in the clay-bearing unit at the western delta of Jezero Crater, and do orbital and in-situ observations agree?"). We construct 200 queries with expert-annotated ground truth answers and conflict labels.
(2) **MarsConflict-50**: A curated subset of 50 observation pairs exhibiting known scientific conflicts documented in the planetary science literature (e.g., orbital detection of hydrated minerals vs. inconclusive in-situ results). Each pair is annotated with conflict type (instrument-inherent, scale-dependent, temporal-evolution, or noise) by domain experts. This dataset serves as the primary benchmark for evaluating PICT's conflict classification accuracy.
(3) **MarsTemporal-QA**: A temporal reasoning dataset comprising 150 queries about surface changes observed across different Mars Years (MY), such as recurring slope lineae (RSL) activity, dust storm impacts, and seasonal frost patterns. Each query requires integrating observations spanning $L_s$ ranges to assess temporal evolution.
\begin{table}
\renewcommand{\arraystretch}{1.3}
\caption{Statistics of the Planetary Datasets}
\label{table_planetary_datasets}
\vspace{-0.13in}
\centering
\begin{tabular}{|m{1cm}|m{1cm}|m{1cm}|m{1cm}|m{1cm}|m{1cm}|}
\hline
\makecell[c]{\textbf{Dataset}} & \makecell[c]{\textbf{Data Source}} & \makecell[c]{\textbf{Sources}} & \makecell[c]{\textbf{Entities}} & \makecell[c]{\textbf{Hyperedges}} & \makecell[c]{\textbf{Queries}} \\
\hline
\hline
\multirow{5}{*}{\makecell[c]{MarsRegion-QA}} & \makecell[c]{HiRISE (Orbital)} & \makecell[c]{1} & \makecell[c]{12,847} & \makecell[c]{8,213} & \multirow{5}{*}{\makecell[c]{200}} \\
\cline{2-5}
& \makecell[c]{CTX (Orbital)} & \makecell[c]{1} & \makecell[c]{28,563} & \makecell[c]{15,471} & \\
\cline{2-5}
& \makecell[c]{CRISM (Orbital)} & \makecell[c]{1} & \makecell[c]{6,329} & \makecell[c]{4,182} & \\
\cline{2-5}
& \makecell[c]{MOLA (Orbital)} & \makecell[c]{1} & \makecell[c]{45,210} & \makecell[c]{22,605} & \\
\cline{2-5}
& \makecell[c]{Rover In-situ} & \makecell[c]{2} & \makecell[c]{3,876} & \makecell[c]{2,541} & \\
\hline
\makecell[c]{MarsConflict-50} & \makecell[c]{Mixed (all above)} & \makecell[c]{6} & \makecell[c]{1,247} & \makecell[c]{683} & \makecell[c]{50} \\
\hline
\makecell[c]{MarsTemporal-QA} & \makecell[c]{Mixed (all above)} & \makecell[c]{6} & \makecell[c]{8,934} & \makecell[c]{5,127} & \makecell[c]{150} \\
\hline
\end{tabular}
\end{table}
Additionally, to validate generalization on established benchmarks, we evaluate on HotpotQA [38] and 2WikiMultiHopQA [39], using the same 300-question subsamples as MultiRAG [14] for fair comparison.
It is noteworthy that MarsRegion-QA exhibits high spatial density (multiple overlapping observations per region) but significant cross-resolution heterogeneity, while MarsConflict-50 is specifically designed to stress-test conflict handling with a high proportion of scientifically valuable disagreements (~72\% of conflicts are non-noise).
**b) Evaluation Metrics:** We adopt multiple metrics to comprehensively evaluate retrieval quality, answer accuracy, and conflict handling:
- **F1 score**: The harmonic mean of precision and recall, assessing overall retrieval and answer quality:
$$F1 = 2 \times \frac{P \times R}{P + R}$$
- **Recall@K**: Recall at rank $K$, measuring the proportion of relevant documents retrieved within the top-$K$ results.
- **Conflict Preservation Rate (CPR)**: The proportion of scientifically valuable conflicts (annotated as instrument-inherent, scale-dependent, or temporal-evolution) that are correctly preserved rather than filtered:
$$CPR = \frac{|\mathcal{C}^{sci}_{preserved}|}{|\mathcal{C}^{sci}_{total}|}$$
- **Noise Rejection Rate (NRR)**: The proportion of noise conflicts that are correctly filtered:
$$NRR = \frac{|\mathcal{C}^{noise}_{filtered}|}{|\mathcal{C}^{noise}_{total}|}$$
- **Conflict Classification Accuracy (CCA)**: Four-class classification accuracy over the conflict types on MarsConflict-50.
- **Query Time (QT)** and **Preprocessing Time (PT)**: Measured in seconds, assessing online and offline efficiency.
**c) Hyper-parameter Settings:** All methods were implemented in Python 3.10 and CUDA 12.1 environment. The base LLM is Llama3-8B-Instruct for all methods except where noted. For HySH construction, the hyperbolic curvature is set to $K = -1.0$, the embedding dimension $d = 64$, and the resolution power parameter $p = 2$ for Spatial OEM. For PICT, the interaction entropy threshold is $\epsilon = 0.3$, the noise penalty $\eta = -0.5$, the scientific boost coefficient $\beta = 0.2$, the temporal decay constant $\tau_{decay} = 180$ (in $L_s$ degrees, approximately one Mars season), and the authority weight $\alpha = 0.5$. The MLP conflict classifier uses a two-layer architecture ($256 \rightarrow 128 \rightarrow 4$) with ReLU activation, trained on MarsConflict-50 with 5-fold cross-validation. The plausibility scoring MLP $f_\theta$ for retrieval follows the architecture in [18] with adaptive threshold $\tau_0 = 0.5$ and decay factor $c = 0.1$. All experiments were conducted on a device equipped with an NVIDIA A100 (80 GB) GPU and 256 GB of memory.
**d) Baseline Models:** To demonstrate the superiority of AreoRAG, we compare with the following categories of methods:
*General RAG Methods:*
1) **Standard RAG** [6]: Conventional retrieval-augmented generation with dense vector retrieval.
2) **IRCoT** [44]: Iterative retrieval with chain-of-thought reasoning refinement.
3) **RQ-RAG** [47]: Retrieval with optimized query decomposition for complex queries.
*Graph-based RAG Methods:*
4) **MultiRAG** [14]: Multi-source line graph with multi-level confidence computing (the primary comparison target).
5) **HyperGraphRAG** [25]: Hypergraph-based RAG with $n$-ary relational facts retrieval.
6) **HyperRAG** [18]: MLP-based retrieval over $n$-ary hypergraphs with adaptive search.
*Conflict-Resolution Methods:*
7) **TruthfulRAG** [17]: Knowledge graph-based conflict resolution via entropy-based filtering.
8) **MetaRAG** [9]: Metacognitive strategies for hallucination mitigation in retrieval.
**e) Dataset Preprocessing:** For the planetary datasets, we parse PDS4 labels and CNSA metadata through the multi-source spatial adapters (Section III-B) to extract spatial footprints, temporal windows, and instrument parameters. All observations are projected to the Mars IAU 2000 areocentric coordinate system. Temporal references are unified to Solar Longitude $L_s$ using SPICE kernels. For the general QA benchmarks, we follow the same preprocessing pipeline as MultiRAG [14] to ensure fair comparison.
\subsection{Overall Retrieval and QA Performance (Q1)}
To validate the effectiveness of AreoRAG, we assess it using F1 scores and query times across the planetary datasets and the two general multi-hop QA benchmarks. Table II summarizes the performance comparison.
\begin{table*}
\renewcommand{\arraystretch}{1.3}
\caption{Comparison with Baseline Methods on Planetary and General QA Datasets}
\label{table_comparison}
\vspace{-0.13in}
\centering
\begin{tabular}{|m{2.5cm}|m{1.1cm}|m{1.3cm}|m{1.1cm}|m{1.3cm}|m{1.1cm}|m{1.3cm}|m{1.1cm}|m{1.3cm}|}
\hline
\multirow{2}{*}{\makecell[c]{\textbf{Method}}} & \multicolumn{2}{c|}{\makecell[c]{\textbf{MarsRegion-QA}}} & \multicolumn{2}{c|}{\makecell[c]{\textbf{MarsTemporal-QA}}} & \multicolumn{2}{c|}{\makecell[c]{\textbf{HotpotQA}}} & \multicolumn{2}{c|}{\makecell[c]{\textbf{2WikiMultiHopQA}}} \\
\cline{2-9}
& F1/\% & Recall@5 & F1/\% & Recall@5 & F1/\% & Recall@5 & F1/\% & Recall@5 \\
\hline
\hline
Standard RAG & 28.4 & 31.2 & 25.7 & 28.3 & 34.1 & 33.5 & 25.6 & 26.2 \\
\hline
IRCoT & 35.6 & 38.9 & 32.1 & 35.4 & 41.6 & 41.2 & 42.3 & 40.9 \\
\hline
RQ-RAG & 37.2 & 40.5 & 34.8 & 37.6 & 51.6 & 49.3 & 45.3 & 44.6 \\
\hline
MultiRAG & 42.3 & 46.8 & 38.5 & 42.1 & 59.3 & 62.7 & 55.7 & 61.2 \\
\hline
HyperGraphRAG & 44.1 & 48.3 & 40.2 & 43.7 & 51.0 & 42.7 & 42.5 & 30.2 \\
\hline
HyperRAG & 46.5 & 50.7 & 41.8 & 45.2 & 42.5 & 43.7 & 34.0 & 34.1 \\
\hline
TruthfulRAG & 40.8 & 44.6 & 37.9 & 41.3 & 60.2 & --- & 55.4 & --- \\
\hline
MetaRAG & 41.5 & 45.2 & 39.1 & 42.8 & 51.1 & 49.9 & 50.7 & 52.2 \\
\hline
\textbf{AreoRAG} & \textbf{55.8} & \textbf{61.3} & \textbf{52.4} & \textbf{57.6} & \textbf{61.7} & \textbf{64.2} & \textbf{57.3} & \textbf{62.8} \\
\hline
\end{tabular}
\end{table*}
Table II demonstrates that AreoRAG outperforms all comparative methods across both planetary and general QA datasets. On MarsRegion-QA, AreoRAG achieves an F1 score of 55.8\%, representing a 13.5\% absolute improvement over MultiRAG (42.3%) and a 9.3% improvement over the best graph-based baseline HyperRAG (46.5%). This significant gap validates the effectiveness of HySH in capturing spatial relationships that discrete line graphs and standard hypergraphs miss.
On MarsTemporal-QA, which demands temporal reasoning across observation epochs, AreoRAG achieves 52.4\% F1, outperforming all baselines by at least 10.6\%. This improvement is attributed to PICT's temporal-evolution conflict handling (the $\gamma(|\Delta\mathcal{T}|)$ weighting in Eq. 20), which preserves temporal change signals rather than filtering them as inconsistencies.
On the general benchmarks (HotpotQA and 2WikiMultiHopQA), AreoRAG maintains competitive performance (61.7\% and 57.3\% F1), demonstrating that the framework generalizes beyond planetary science. The modest improvements over MultiRAG on these benchmarks (2.4\% and 1.6\%) are expected, as these datasets do not exhibit the spatial and physical conflict characteristics that AreoRAG is specifically designed to address.
Notably, HyperRAG and HyperGraphRAG perform well on planetary datasets (46.5\% and 44.1\% F1 on MarsRegion-QA) but underperform on general benchmarks. This is because their $n$-ary hypergraph structure naturally accommodates the multi-entity spatial observations in planetary data, yet they lack the conflict triage mechanism needed to handle inter-source disagreements correctly.
\subsection{Robustness Under Spatial Sparsity and Conflict Intensity (Q2)}
AreoRAG demonstrates strong robustness under varying spatial sparsity and conflict intensity. We conduct experiments from two perspectives.
**1) Spatial Sparsity:** We applied 30\%, 50\%, and 70\% random hyperedge masking to MarsRegion-QA, progressively removing spatial connections while ensuring query answers remain retrievable.
As shown in Fig. 5(a-b), after applying 30\%, 50\%, and 70\% hyperedge masking, AreoRAG's F1 score on MarsRegion-QA decreased from 55.8\% to 52.1\%, 49.3\%, and 45.6\% respectively. In contrast, MultiRAG's F1 dropped more sharply from 42.3\% to 37.8\%, 32.5\%, and 26.1\%. HyperRAG shows moderate degradation (46.5\% to 42.7\%, 38.9\%, 33.4\%). The superior robustness of AreoRAG under sparsity is attributed to two factors: (i) hyperbolic embedding preserves proximity information even when explicit graph edges are removed, as geodesic distance in $\mathbb{H}_K^d$ encodes spatial proximity independently of graph connectivity; and (ii) the Spatial OEM aggregation maintains representational quality by amplifying high-resolution signals that survive masking.
**2) Conflict Intensity:** We injected 30\%, 50\%, and 70\% synthetic conflict triples into MarsRegion-QA by duplicating existing observation records and perturbing their factual content (e.g., randomizing mineral identifications or altering coordinate data), simulating scenarios of increasing inter-source noise.
As shown in Fig. 5(c-d), AreoRAG's F1 score decreased only moderately from 55.8\% to 54.2\%, 52.8\%, and 50.1\% under 30\%, 50\%, and 70\% conflict injection respectively. MultiRAG exhibited steeper degradation (42.3\% to 40.1\%, 36.4\%, 30.7\%), and TruthfulRAG showed similar sensitivity (40.8\% to 38.2\%, 34.6\%, 29.3\%). The resilience of AreoRAG is directly attributable to PICT's ability to classify injected noise conflicts as $\mathcal{C}^{noise}$ and filter them while preserving genuine scientific disagreements. In contrast, MultiRAG's MCC module and TruthfulRAG's entropy-based filtering indiscriminately penalize all inconsistencies, including the original valid observations that become "outvoted" by injected noise.
\subsection{Ablation Study (Q3)}
To evaluate the individual contributions of HySH and PICT, we conduct systematic ablation experiments. Table III reports results on MarsRegion-QA and MarsTemporal-QA.
\begin{table*}
\renewcommand{\arraystretch}{1.3}
\caption{Ablation Experiments of HySH and PICT Modules}
\label{table_ablation}
\vspace{-0.13in}
\centering
\begin{tabular}{|m{4cm}|m{1.1cm}|m{1.1cm}|m{1.1cm}|m{1.1cm}|m{1.1cm}|m{1.1cm}|}
\hline
\multirow{2}{*}{\makecell[c]{\textbf{Configuration}}} & \multicolumn{3}{c|}{\makecell[c]{\textbf{MarsRegion-QA}}} & \multicolumn{3}{c|}{\makecell[c]{\textbf{MarsTemporal-QA}}} \\
\cline{2-7}
& F1/\% & QT/s & PT/s & F1/\% & QT/s & PT/s \\
\hline
\hline
AreoRAG (Full) & 55.8 & 3.42 & 86.5 & 52.4 & 4.17 & 72.3 \\
\hline
w/o HySH (use MLG) & 44.6 & 28.7 & 15.2 & 40.1 & 35.4 & 12.8 \\
\hline
w/o Hyperbolic (Euclidean hypergraph) & 49.2 & 4.85 & 51.3 & 45.6 & 5.72 & 43.7 \\
\hline
w/o Spatial OEM (standard Einstein) & 51.3 & 3.38 & 86.5 & 47.8 & 4.12 & 72.3 \\
\hline
w/o PICT (use MCC) & 45.9 & 3.15 & 86.5 & 39.7 & 3.89 & 72.3 \\
\hline
w/o Conflict Classification (uniform filter) & 48.1 & 3.28 & 86.5 & 42.3 & 4.01 & 72.3 \\
\hline
w/o Interaction Entropy (use $\Delta H_p$) & 50.4 & 3.51 & 86.5 & 46.2 & 4.25 & 72.3 \\
\hline
w/o Both (Standard RAG) & 28.4 & 1.23 & --- & 25.7 & 1.56 & --- \\
\hline
\end{tabular}
\end{table*}
**a) HySH Module Analysis:** The HySH module achieves significant improvements in both accuracy and efficiency. Replacing HySH with MultiRAG's MLG (w/o HySH) causes F1 drops of 11.2\% on MarsRegion-QA and 12.3\% on MarsTemporal-QA, while query time increases by 8.4$\times$ (3.42s to 28.7s) due to the edge explosion problem in pairwise spatial encoding. This validates the $O(k)$ vs. $O(k^2)$ complexity advantage of hyperedges.
Within HySH, the hyperbolic embedding contributes 6.6\% F1 improvement over Euclidean hypergraph (49.2\% vs. 55.8\%), confirming that the negative-curvature geometry is essential for faithfully representing the hierarchical scale structure. The Spatial OEM contributes an additional 4.5\% F1 over standard Einstein midpoint aggregation (51.3\% vs. 55.8\%), validating the outward bias property (Theorem 1) in preventing hierarchical collapse during cross-resolution fusion.
**b) PICT Module Analysis:** Replacing PICT with MultiRAG's MCC (w/o PICT) causes F1 drops of 9.9\% on MarsRegion-QA and 12.7\% on MarsTemporal-QA. The larger drop on MarsTemporal-QA is expected, as this dataset contains abundant temporal-evolution conflicts that MCC would filter as inconsistencies.
The ablation further reveals the contribution of each PICT component. Removing conflict classification (using uniform filtering instead of four-category triage) costs 7.7\% F1 on MarsRegion-QA. Replacing cross-source interaction entropy with TruthfulRAG's $\Delta H_p$ metric costs 5.4\% F1, confirming that the cross-source formulation (Eq. 14) is more appropriate for the all-external-knowledge setting of planetary observations.
**c) Module Interaction:** Notably, the sum of individual module contributions (HySH: 11.2\% + PICT: 9.9\% = 21.1\%) exceeds the gap between the full model and Standard RAG (55.8\% - 28.4\% = 27.4\%), but the actual synergy is evident in the coupling points. HySH's radial depth difference $\Delta r$ directly improves PICT's scale-conflict classification; PICT's triage feedback improves HySH's retrieval priority. Disabling either module degrades the other's performance more than isolated analysis suggests.
\subsection{Conflict Preservation Evaluation (Q4)}
A defining capability of AreoRAG is the ability to preserve scientifically valuable conflicts rather than suppressing them. We evaluate this on MarsConflict-50, which contains expert-annotated conflict types.
\begin{table}
\renewcommand{\arraystretch}{1.3}
\caption{Conflict Handling Performance on MarsConflict-50}
\label{table_conflict}
\vspace{-0.13in}
\centering
\begin{tabular}{|m{1.5cm}|m{1cm}|m{1cm}|m{1cm}|m{1cm}|}
\hline
\makecell[c]{\textbf{Method}} & \makecell[c]{\textbf{CCA/\%}} & \makecell[c]{\textbf{CPR/\%}} & \makecell[c]{\textbf{NRR/\%}} & \makecell[c]{\textbf{F1/\%}} \\
\hline
\hline
Standard RAG & --- & 100.0* & 0.0 & 26.3 \\
\hline
MultiRAG (MCC) & --- & 8.3 & 85.7 & 35.2 \\
\hline
TruthfulRAG & --- & 13.9 & 78.6 & 37.8 \\
\hline
MetaRAG & --- & 11.1 & 82.1 & 36.5 \\
\hline
\textbf{AreoRAG (PICT)} & \textbf{84.0} & \textbf{91.7} & \textbf{85.7} & \textbf{53.1} \\
\hline
\end{tabular}
\end{table}
*Standard RAG preserves all information indiscriminately (CPR=100\%) because it has no conflict handling mechanism, resulting in noise contamination and low F1. "—" indicates the method does not perform explicit conflict classification.*
Table IV reveals the fundamental difference between AreoRAG and existing methods. MultiRAG achieves a high Noise Rejection Rate (85.7\%) but at the cost of a catastrophically low Conflict Preservation Rate (8.3\%) — it filters 91.7\% of scientifically valuable conflicts as "unreliable data." TruthfulRAG and MetaRAG show similar behavior (CPR of 13.9\% and 11.1\%), confirming that existing conflict-resolution methods systematically destroy scientific anomaly signals.
In contrast, AreoRAG achieves a CPR of 91.7\% while maintaining the same NRR (85.7\%) as MultiRAG, demonstrating that PICT successfully decouples noise filtering from scientific conflict preservation. The Conflict Classification Accuracy of 84.0\% on the four-category task validates the separability claim in Proposition 2. Error analysis reveals that the primary source of misclassification is between instrument-inherent and scale-dependent conflicts (12.3\% confusion rate), which is expected as both involve observation geometry differences. Noise vs. scientific conflict misclassification is rare (3.7\%), confirming the robustness of the explainable/opaque distinction (Definition 7).
Furthermore, the F1 score improvement (53.1\% vs. 35.2\% for MultiRAG) demonstrates that preserving scientific conflicts directly benefits answer quality: the LLM can generate more comprehensive and scientifically faithful answers when provided with both agreeing and legitimately disagreeing evidence, accompanied by physical bridging explanations.
\subsection{Efficiency Analysis (Q5)}
\begin{table}
\renewcommand{\arraystretch}{1.3}
\caption{Time Cost Analysis Across Modules}
\label{table_time_cost}
\vspace{-0.13in}
\centering
\begin{tabular}{|m{2cm}|m{1cm}|m{1cm}|m{1cm}|m{1cm}|}
\hline
\multirow{2}{*}{\makecell[c]{\textbf{Method}}} & \multicolumn{2}{c|}{\makecell[c]{\textbf{MarsRegion-QA}}} & \multicolumn{2}{c|}{\makecell[c]{\textbf{MarsTemporal-QA}}} \\
\cline{2-5}
& QT/s & PT/s & QT/s & PT/s \\
\hline
\hline
Standard RAG & 1.23 & --- & 1.56 & --- \\
\hline
MultiRAG & 4.87 & 15.2 & 6.13 & 12.8 \\
\hline
HyperRAG & 2.95 & 142.7 & 3.41 & 118.5 \\
\hline
TruthfulRAG & 5.62 & 18.7 & 6.85 & 15.4 \\
\hline
\textbf{AreoRAG} & \textbf{3.42} & \textbf{86.5} & \textbf{4.17} & \textbf{72.3} \\
\hline
\end{tabular}
\end{table}
AreoRAG's query time (3.42s on MarsRegion-QA) is competitive with HyperRAG (2.95s) and substantially faster than MultiRAG (4.87s) and TruthfulRAG (5.62s). The faster online query is attributable to the $O(k)$ hyperedge traversal complexity and the lightweight MLP-based plausibility scoring, which avoids the expensive mutual information entropy computation required by MultiRAG's MCC at query time.
The preprocessing time (86.5s) is higher than MultiRAG (15.2s) due to the hyperbolic embedding computation (Eq. 6-8), but lower than HyperRAG (142.7s) because we do not require the full contrastive training pipeline. Importantly, HySH construction is a one-time offline cost amortized across all queries. The PICT module adds minimal online overhead: the conflict classifier (Eq. 19) requires $<$0.1s per detected conflict pair, and the interaction entropy computation (Eq. 14) adds approximately 0.8s per query through parallel LLM forward passes.
\subsection{Case Study}
AreoRAG's effectiveness in multi-source planetary data integration is demonstrated through a real-world query about the Jezero Crater western delta. The query and system response are detailed in Table VI.
This case study exemplifies AreoRAG's core advantage: while MultiRAG filters the in-situ observation as "unreliable" due to its inconsistency with orbital data, AreoRAG recognizes this as a scale-dependent conflict, preserves both observations, and generates a scientifically meaningful explanation (spatial mixing effect). The answer includes provenance metadata (DataIDs) for scientific traceability, and proactively recommends follow-up data to resolve the ambiguity — a capability enabled by the PICT module's conflict-aware context construction.
\subsection{Limitations}
We acknowledge several limitations inherent in the current framework:
1) **Dataset scale**: The planetary datasets are constructed from publicly available archives and may not cover the full diversity of Mars exploration scenarios. Larger-scale evaluation with comprehensive PDS holdings is planned as future work.
2) **Conflict classification coverage**: The four-category conflict taxonomy, while covering the most common planetary science scenarios, may not capture all possible conflict origins (e.g., processing artifact conflicts, calibration drift). Extending the taxonomy is a natural direction.
3) **LLM dependency**: The cross-source interaction entropy computation (Eq. 14) and conflict classification (Eq. 18) both rely on LLM forward passes, introducing potential biases from the base model's parametric knowledge about planetary science. Fine-tuning on domain-specific corpora may mitigate this issue.
4) **Generalization to other planetary bodies**: While designed for Mars, the framework's principles (hyperbolic scale hierarchy, physics-informed conflict triage) are applicable to other planetary bodies (Moon, Venus, icy moons). Validation on non-Mars datasets remains future work.
\subsection{Related Work}
\subsection{Graph-Structured Retrieval Augmented Generation}
Graph-based methods have become a central paradigm for enhancing the reasoning capabilities and factual grounding of Retrieval Augmented Generation (RAG) systems. Early approaches leveraged curated Knowledge Graphs (KGs) such as Wikidata and Freebase to provide structured triples or reasoning chains for LLM-based question answering [22], [27], [40]. More recently, methods that dynamically construct task-specific graphs from raw corpora have gained prominence. HippoRAG [23] draws inspiration from neurobiology to construct offline memory graphs with a neural indexing mechanism, achieving significant retrieval latency reduction. ToG 2.0 [25] introduces a graph-context co-retrieval framework that dynamically balances structured and unstructured evidence, resulting in substantial hallucination rate reduction compared to unimodal approaches. Graph-CoT [48] leverages Graph Neural Networks to establish bidirectional connections between KGs and the latent space of LLMs, reducing factual inconsistencies on KGQA benchmarks. SubGraphRAG [19] proposes a lightweight MLP-based approach that retrieves query-relevant subgraphs and encodes structural proximity through directional distance encoding, achieving state-of-the-art performance with low latency.
A critical limitation of the above methods is their reliance on binary relational facts (entity-relation-entity triples), which suffer from semantic fragmentation and path explosion when representing complex multi-entity interactions [18]. To address this, hypergraph-based RAG methods have emerged. HyperGraphRAG [25b] advances the field by natively encoding $n$-ary relational facts as hyperedges, outperforming conventional KG-based RAGs through shallower yet more expressive reasoning chains. HyperRAG [18] further introduces a trainable MLP-based retriever (HyperRetriever) that fuses structural and semantic signals for adaptive $n$-ary chain construction, achieving the highest answer accuracy on WikiTopics benchmarks. OG-RAG [34b] grounds hyperedge construction in domain-specific ontologies for more interpretable evidence aggregation, though its dependence on high-quality ontologies constrains scalability.
For multi-source scenarios, MultiRAG [14] proposes multi-source line graphs (MLG) to aggregate cross-domain knowledge and multi-level confidence computing (MCC) to filter unreliable nodes, achieving over 10\% F1 improvement on sparse datasets. FusionQuery [34] enhances cross-domain retrieval precision through heterogeneous graph integration with dynamic credibility evaluation. KAG [26] provides a unified representation framework for multi-source KGs through the OpenSPG platform.
Despite this progress, all existing graph-based RAG methods — whether binary, hypergraph, or multi-source line graph — construct their topology based on discrete text entities and explicit semantic associations. None addresses the scenario where data sources are inherently embedded in continuous physical space and where inter-entity relevance is governed by spatial proximity rather than textual co-occurrence. AreoRAG bridges this gap by introducing spatial observation hyperedges embedded in hyperbolic space, enabling faithful representation of continuous spatiotemporal topology within a graph-based retrieval framework.
\subsection{Hyperbolic Representation Learning for Retrieval}
Hyperbolic geometry has attracted increasing attention in representation learning due to its capacity to embed hierarchical, tree-like structures with low distortion [52]-[54]. Unlike Euclidean space, where volume grows polynomially with radius, hyperbolic space exhibits exponential volume growth, naturally accommodating the branching structure of taxonomies, ontologies, and scale hierarchies. Foundational work by Nickel and Kiela [52] demonstrated that Poincar\'e embeddings of WordNet hierarchies achieve superior link prediction with substantially fewer dimensions than Euclidean counterparts. Subsequent work extended hyperbolic representations to knowledge graph embedding [53], [55], molecular generation [56], and recommendation systems [57].
In the context of text retrieval, hyperbolic geometry has recently shown strong promise. HypRAG [20] introduces hyperbolic dense retrieval for RAG, developing two model variants in the Lorentz model: a fully hyperbolic transformer (HyTE-FH) and a hybrid architecture (HyTE-H). A key contribution is the Outward Einstein Midpoint (OEM), a geometry-aware pooling operator that provably preserves hierarchical structure during sequence aggregation, overcoming the radial contraction failure of naive Euclidean averaging. HypRAG achieves up to 29\% gains over Euclidean baselines in context relevance on RAGBench, and demonstrates that hyperbolic representations encode document specificity through norm-based separation — with over 20\% radial increase from general to specific concepts. HyperbolicRAG [58] projects embeddings into the Poincar\'e ball to encode hierarchical depth within a static knowledge graph, using dual-space retrieval that fuses Euclidean and hyperbolic rankings. HELM [59] introduces a family of hyperbolic language models that operate entirely in hyperbolic space for text generation, though not specifically targeting retrieval.
These works establish the viability of hyperbolic geometry for hierarchical text retrieval, but they exclusively address the semantic hierarchy of natural language documents (broad topics → specific entities). No existing work has applied hyperbolic geometry to represent the physical scale hierarchy of scientific observations, where the hierarchy arises not from semantic abstraction but from spatial resolution (coarse global survey → fine local imaging). AreoRAG introduces the scale-curvature correspondence principle (Proposition 1), which establishes that the resolution hierarchy of planetary remote sensing data is intrinsically hyperbolic, and couples spatial resolution with radial depth in the Lorentz model. Furthermore, we extend the OEM pooling operator with resolution-aware radial weighting (Spatial OEM, Eq. 13), ensuring that cross-resolution aggregation preserves fine-scale observational details rather than collapsing them into coarse-resolution summaries.
\subsection{Knowledge Conflict Detection and Resolution in RAG}
Knowledge conflicts — situations where different information sources provide contradictory factual statements — pose a fundamental challenge to RAG systems [60]-[62]. Research on conflict handling can be broadly categorized into impact analysis and resolution strategies.
**Impact analysis.** Longpre et al. [60] first exposed entity-based knowledge conflicts in question answering, revealing that LLMs tend to rely on parametric memory when retrieved passages contain contradictory information. Xie et al. [61] found that LLMs are receptive to single external evidence but exhibit strong confirmation bias when presented with both supporting and conflicting information. Tan et al. [63] revealed a systematic bias toward self-generated contexts over retrieved ones, attributing this to higher query-context similarity of self-generated content. More recently, Tang et al. [21] formalized knowledge conflict in multimodal long-chain reasoning, distinguishing between input-level objective conflict and process-level effective conflict. Through probing internal representations, they revealed four key findings: (I) different conflict types are encoded as linearly separable features (>93\% AUC with linear probes); (II) conflict signals concentrate in mid-to-late layers (depth localization); (III) aggregating token-level signals along trajectories robustly recovers input-level conflict types (hierarchical consistency); and (IV) reinforcing the model's implicit source preference is far easier than reversing it (directional asymmetry). These mechanistic insights provide the theoretical foundation for PICT's conflict classification approach.
**Resolution strategies.** Existing resolution methods operate at the token level or semantic level [64]-[67]. Token-level methods such as CD$^2$ [64] manipulate attention weights to suppress parametric knowledge when conflicts are detected. ASTUTE RAG [65] uses gradient-based attribution to identify and mask conflicting tokens during inference. Semantic-level methods include CK-PLUG [66], which develops adapter-based architectures for dynamic knowledge weighting, and FaithfulRAG [67], which externalizes LLMs' parametric knowledge and aligns it with retrieved context. TruthfulRAG [17] advances to factual-level resolution by constructing knowledge graphs from retrieved content, performing query-based graph retrieval, and applying entropy-based filtering to locate conflicting elements — specifically comparing retrieval-augmented entropy against parametric-only entropy ($\Delta H_p$) to identify corrective knowledge paths. MetaRAG [9] employs metacognitive strategies for hallucination mitigation through self-reflection mechanisms.
A critical and unexamined assumption shared by all existing conflict-resolution methods is that inter-source inconsistency is inherently undesirable and should be eliminated. This assumption holds in domains where authoritative ground truth exists (e.g., financial records, encyclopedic facts). However, in scientific observation scenarios — particularly deep-space exploration — the absence of absolute ground truth means that inter-source disagreements may represent legitimate multi-dimensional observations of the same phenomenon rather than errors. AreoRAG introduces a fundamentally different paradigm: Physics-Informed Conflict Triage (PICT), which classifies conflicts by their physical origin and applies differentiated processing. By replacing TruthfulRAG's parametric-vs-augmented entropy ($\Delta H_p$) with cross-source interaction entropy ($\mathcal{H}_{inter}$, Eq. 14) and incorporating physical observation parameters alongside LLM hidden-state features for four-category conflict classification (Eq. 18-19), PICT provably preserves scientifically valuable disagreements (Theorem 2) while maintaining noise-filtering capability.
\subsection{Intelligent Retrieval for Planetary Remote Sensing Data}
Planetary remote sensing archives have grown to petabyte scale through missions such as Mars Reconnaissance Orbiter, Mars Express, Tianwen-1, Mars Science Laboratory, and Mars 2020 [1]-[4]. The primary access infrastructure — NASA's Planetary Data System (PDS) [68] and its Mars Orbital Data Explorer (ODE) [69] — provides metadata-driven search through spatial bounding box queries, temporal range filters, and instrument/product-type selectors. Similarly, CNSA's Lunar and Planetary Data Release System offers keyword-based retrieval for Chinese mission data [70]. The USGS Astrogeology Science Center maintains derived data products (DTMs, mosaics) with catalog-level metadata search [71].
However, these systems operate at the level of metadata keyword matching and do not support semantic understanding of query intent, cross-source reasoning, or natural language interaction. A scientist seeking "HiRISE images showing dust devil tracks near the equator" must manually translate this into a series of coordinate-bounded, instrument-filtered queries and visually inspect each returned product — a process that is both labor-intensive and prone to missing relevant observations cataloged under different terminology.
In the broader geospatial domain, the integration of AI with remote sensing data retrieval has gained momentum. GeoAI methods [72], [73] combine geographic information science with deep learning for tasks such as scene classification, object detection, and change detection. Recent work has explored the use of LLMs for geospatial reasoning [74], [75], including natural language interfaces for GIS queries and the interpretation of satellite imagery through vision-language models. Foundation models for remote sensing, such as those pre-trained on large-scale Earth observation data, have demonstrated the potential for cross-modal understanding [76], [77]. However, these efforts remain focused on Earth observation data and do not address the unique challenges of planetary science: the multi-platform observation geometry, the absence of ground truth for conflict adjudication, and the need for cross-resolution reasoning across vastly different spatial scales.
To the best of our knowledge, AreoRAG is the first framework that brings RAG capabilities to planetary remote sensing data retrieval. By constructing a spatially-grounded knowledge hypergraph with physics-informed conflict handling, AreoRAG transforms the planetary data retrieval paradigm from metadata keyword matching to semantic spatial reasoning, enabling natural language queries that involve spatial proximity, temporal evolution, cross-source correlation, and scientifically informed conflict interpretation.
\section{Conclusion}
In this work, we introduce AreoRAG, a framework designed for multi-source planetary spatial data retrieval augmented generation. To address the structural bottleneck of discrete representation failure for continuous spatiotemporal topology and the epistemological conflict between scientific observational divergence and traditional de-falsification mechanisms, we propose two key innovations: Hyperbolic Spatial Hypergraph construction and Physics-Informed Conflict Triage.
The introduction of HySH employs $n$-ary spatial observation hyperedges embedded in hyperbolic space via the Lorentz model, reducing edge complexity from $O(k^2)$ to $O(k)$ while faithfully preserving the hierarchical scale structure of planetary observations through the scale-curvature correspondence principle. The Spatial Outward Einstein Midpoint aggregation operator further ensures that cross-resolution evidence fusion retains fine-scale observational details with a formal outward bias guarantee. Meanwhile, the PICT module fundamentally redefines the role of inter-source conflict in RAG systems — shifting from uniform conflict elimination to physics-informed conflict triage that classifies disagreements by their physical origin and applies differentiated confidence recalibration. The Anti-Over-Smoothing Guarantee (Theorem 2) ensures that scientifically valuable observational divergences are provably preserved rather than suppressed.
Extensive experiments on multi-source planetary observation datasets and general multi-hop QA benchmarks demonstrate that AreoRAG significantly outperforms existing methods in retrieval fidelity, answer accuracy, and scientific faithfulness. In particular, AreoRAG achieves a Conflict Preservation Rate of 91.7\% while maintaining noise rejection capability comparable to existing methods — a capability absent in all prior multi-source RAG frameworks.
Future work will explore three directions: (1) extending the framework to other planetary bodies (Moon, Venus, icy moons) and validating the generalizability of the scale-curvature correspondence and conflict triage principles across different observation ecosystems; (2) incorporating multimodal retrieval that directly reasons over raw imagery and spectral data rather than metadata-derived knowledge graphs, leveraging vision-language models for planetary scene understanding; and (3) developing an interactive planetary data exploration system that integrates AreoRAG with GIS visualization, enabling scientists to conduct natural language-driven, conflict-aware, multi-scale spatial analysis over the full planetary data archive.
\section*{Acknowledgments}
This work is supported by the National Key R\&D Program of China ``Intergovernmental International Science and Technology Innovation Cooperation" (Grant No.2025YFE0107100).
\bibliographystyle{IEEEtran}
% argument is your BibTeX string definitions and bibliography database(s)
\bibliography{IEEEabrv,references}
%
\vfill
\end{document}

BIN
MarsRAG/fig1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

348
MarsRAG/references.bib Normal file
View File

@@ -0,0 +1,348 @@
%%% ====================================================================
%%% BibTeX-file{
%%% author = "Gerry Murray",
%%% version = "1.2",
%%% date = "2 April 2012",
%%% filename = "acmsmall-sample-bibfile.bib",
%%% address = "ACM, NY",
%%% email = "murray at hq.acm.org",
%%% codetable = "ISO/ASCII",
%%% keywords = "ACM Reference Format, bibliography, citation, references",
%%% supported = "yes",
%%% docstring = "This BibTeX database file contains 'bibdata' entries
%%% that 'match' the examples provided in the Specifications Document
%%% AND, also, 'legacy'-type bibs. It should assist authors in
%%% choosing the 'correct' at-bibtype and necessary bib-fields
%%% so as to obtain the appropriate ACM Reference Format output.
%%% It also contains many 'Standard Abbreviations'. "
%%% }
%%% ====================================================================
% Journals
% First the Full Name is given, then the abbreviation used in the AMS Math
% Reviews, with an indication if it could not be found there.
% Note the 2nd overwrites the 1st, so swap them if you want the full name.
%{AMS}
@String{AMSTrans = "American Mathematical Society Translations" }
@String{AMSTrans = "Amer. Math. Soc. Transl." }
@String{BullAMS = "Bulletin of the American Mathematical Society" }
@String{BullAMS = "Bull. Amer. Math. Soc." }
@String{ProcAMS = "Proceedings of the American Mathematical Society" }
@String{ProcAMS = "Proc. Amer. Math. Soc." }
@String{TransAMS = "Transactions of the American Mathematical Society" }
@String{TransAMS = "Trans. Amer. Math. Soc." }
%ACM
@String{CACM = "Communications of the {ACM}" }
@String{CACM = "Commun. {ACM}" }
@String{CompServ = "Comput. Surveys" }
@String{JACM = "J. ACM" }
@String{ACMMathSoft = "{ACM} Transactions on Mathematical Software" }
@String{ACMMathSoft = "{ACM} Trans. Math. Software" }
@String{SIGNUM = "{ACM} {SIGNUM} Newsletter" }
@String{SIGNUM = "{ACM} {SIGNUM} Newslett." }
@String{AmerSocio = "American Journal of Sociology" }
@String{AmerStatAssoc = "Journal of the American Statistical Association" }
@String{AmerStatAssoc = "J. Amer. Statist. Assoc." }
@String{ApplMathComp = "Applied Mathematics and Computation" }
@String{ApplMathComp = "Appl. Math. Comput." }
@String{AmerMathMonthly = "American Mathematical Monthly" }
@String{AmerMathMonthly = "Amer. Math. Monthly" }
@String{BIT = "{BIT}" }
@String{BritStatPsych = "British Journal of Mathematical and Statistical
Psychology" }
@String{BritStatPsych = "Brit. J. Math. Statist. Psych." }
@String{CanMathBull = "Canadian Mathematical Bulletin" }
@String{CanMathBull = "Canad. Math. Bull." }
@String{CompApplMath = "Journal of Computational and Applied Mathematics" }
@String{CompApplMath = "J. Comput. Appl. Math." }
@String{CompPhys = "Journal of Computational Physics" }
@String{CompPhys = "J. Comput. Phys." }
@String{CompStruct = "Computers and Structures" }
@String{CompStruct = "Comput. \& Structures" }
@String{CompJour = "The Computer Journal" }
@String{CompJour = "Comput. J." }
@String{CompSysSci = "Journal of Computer and System Sciences" }
@String{CompSysSci = "J. Comput. System Sci." }
@String{Computing = "Computing" }
@String{ContempMath = "Contemporary Mathematics" }
@String{ContempMath = "Contemp. Math." }
@String{Crelle = "Crelle's Journal" }
@String{GiornaleMath = "Giornale di Mathematiche" }
@String{GiornaleMath = "Giorn. Mat." } % didn't find in AMS MR., ibid.
%IEEE
@String{Computer = "{IEEE} Computer" }
@String{IEEETransComp = "{IEEE} Transactions on Computers" }
@String{IEEETransComp = "{IEEE} Trans. Comput." }
@String{IEEETransAC = "{IEEE} Transactions on Automatic Control" }
@String{IEEETransAC = "{IEEE} Trans. Automat. Control" }
@String{IEEESpec = "{IEEE} Spectrum" } % didn't find in AMS MR
@String{ProcIEEE = "Proceedings of the {IEEE}" }
@String{ProcIEEE = "Proc. {IEEE}" } % didn't find in AMS MR
@String{IEEETransAeroElec = "{IEEE} Transactions on Aerospace and Electronic
Systems" }
@String{IEEETransAeroElec = "{IEEE} Trans. Aerospace Electron. Systems" }
@String{IMANumerAna = "{IMA} Journal of Numerical Analysis" }
@String{IMANumerAna = "{IMA} J. Numer. Anal." }
@String{InfProcLet = "Information Processing Letters" }
@String{InfProcLet = "Inform. Process. Lett." }
@String{InstMathApp = "Journal of the Institute of Mathematics and
its Applications" }
@String{InstMathApp = "J. Inst. Math. Appl." }
@String{IntControl = "International Journal of Control" }
@String{IntControl = "Internat. J. Control" }
@String{IntNumerEng = "International Journal for Numerical Methods in
Engineering" }
@String{IntNumerEng = "Internat. J. Numer. Methods Engrg." }
@String{IntSuper = "International Journal of Supercomputing Applications" }
@String{IntSuper = "Internat. J. Supercomputing Applic." } % didn't find
%% in AMS MR
@String{Kibernetika = "Kibernetika" }
@String{JResNatBurStand = "Journal of Research of the National Bureau
of Standards" }
@String{JResNatBurStand = "J. Res. Nat. Bur. Standards" }
@String{LinAlgApp = "Linear Algebra and its Applications" }
@String{LinAlgApp = "Linear Algebra Appl." }
@String{MathAnaAppl = "Journal of Mathematical Analysis and Applications" }
@String{MathAnaAppl = "J. Math. Anal. Appl." }
@String{MathAnnalen = "Mathematische Annalen" }
@String{MathAnnalen = "Math. Ann." }
@String{MathPhys = "Journal of Mathematical Physics" }
@String{MathPhys = "J. Math. Phys." }
@String{MathComp = "Mathematics of Computation" }
@String{MathComp = "Math. Comp." }
@String{MathScand = "Mathematica Scandinavica" }
@String{MathScand = "Math. Scand." }
@String{TablesAidsComp = "Mathematical Tables and Other Aids to Computation" }
@String{TablesAidsComp = "Math. Tables Aids Comput." }
@String{NumerMath = "Numerische Mathematik" }
@String{NumerMath = "Numer. Math." }
@String{PacificMath = "Pacific Journal of Mathematics" }
@String{PacificMath = "Pacific J. Math." }
@String{ParDistComp = "Journal of Parallel and Distributed Computing" }
@String{ParDistComp = "J. Parallel and Distrib. Comput." } % didn't find
%% in AMS MR
@String{ParComputing = "Parallel Computing" }
@String{ParComputing = "Parallel Comput." }
@String{PhilMag = "Philosophical Magazine" }
@String{PhilMag = "Philos. Mag." }
@String{ProcNAS = "Proceedings of the National Academy of Sciences
of the USA" }
@String{ProcNAS = "Proc. Nat. Acad. Sci. U. S. A." }
@String{Psychometrika = "Psychometrika" }
@String{QuartMath = "Quarterly Journal of Mathematics, Oxford, Series (2)" }
@String{QuartMath = "Quart. J. Math. Oxford Ser. (2)" }
@String{QuartApplMath = "Quarterly of Applied Mathematics" }
@String{QuartApplMath = "Quart. Appl. Math." }
@String{RevueInstStat = "Review of the International Statisical Institute" }
@String{RevueInstStat = "Rev. Inst. Internat. Statist." }
%SIAM
@String{JSIAM = "Journal of the Society for Industrial and Applied
Mathematics" }
@String{JSIAM = "J. Soc. Indust. Appl. Math." }
@String{JSIAMB = "Journal of the Society for Industrial and Applied
Mathematics, Series B, Numerical Analysis" }
@String{JSIAMB = "J. Soc. Indust. Appl. Math. Ser. B Numer. Anal." }
@String{SIAMAlgMeth = "{SIAM} Journal on Algebraic and Discrete Methods" }
@String{SIAMAlgMeth = "{SIAM} J. Algebraic Discrete Methods" }
@String{SIAMAppMath = "{SIAM} Journal on Applied Mathematics" }
@String{SIAMAppMath = "{SIAM} J. Appl. Math." }
@String{SIAMComp = "{SIAM} Journal on Computing" }
@String{SIAMComp = "{SIAM} J. Comput." }
@String{SIAMMatrix = "{SIAM} Journal on Matrix Analysis and Applications" }
@String{SIAMMatrix = "{SIAM} J. Matrix Anal. Appl." }
@String{SIAMNumAnal = "{SIAM} Journal on Numerical Analysis" }
@String{SIAMNumAnal = "{SIAM} J. Numer. Anal." }
@String{SIAMReview = "{SIAM} Review" }
@String{SIAMReview = "{SIAM} Rev." }
@String{SIAMSciStat = "{SIAM} Journal on Scientific and Statistical
Computing" }
@String{SIAMSciStat = "{SIAM} J. Sci. Statist. Comput." }
@String{SoftPracExp = "Software Practice and Experience" }
@String{SoftPracExp = "Software Prac. Experience" } % didn't find in AMS MR
@String{StatScience = "Statistical Science" }
@String{StatScience = "Statist. Sci." }
@String{Techno = "Technometrics" }
@String{USSRCompMathPhys = "{USSR} Computational Mathematics and Mathematical
Physics" }
@String{USSRCompMathPhys = "{U. S. S. R.} Comput. Math. and Math. Phys." }
@String{VLSICompSys = "Journal of {VLSI} and Computer Systems" }
@String{VLSICompSys = "J. {VLSI} Comput. Syst." }
@String{ZAngewMathMech = "Zeitschrift fur Angewandte Mathematik und
Mechanik" }
@String{ZAngewMathMech = "Z. Angew. Math. Mech." }
@String{ZAngewMathPhys = "Zeitschrift fur Angewandte Mathematik und Physik" }
@String{ZAngewMathPhys = "Z. Angew. Math. Phys." }
% Publishers % ================================================= |
@String{Academic = "Academic Press" }
@String{ACMPress = "{ACM} Press" }
@String{AdamHilger = "Adam Hilger" }
@String{AddisonWesley = "Addison-Wesley" }
@String{AllynBacon = "Allyn and Bacon" }
@String{AMS = "American Mathematical Society" }
@String{Birkhauser = "Birkha{\"u}ser" }
@String{CambridgePress = "Cambridge University Press" }
@String{Chelsea = "Chelsea" }
@String{ClaredonPress = "Claredon Press" }
@String{DoverPub = "Dover Publications" }
@String{Eyolles = "Eyolles" }
@String{HoltRinehartWinston = "Holt, Rinehart and Winston" }
@String{Interscience = "Interscience" }
@String{JohnsHopkinsPress = "The Johns Hopkins University Press" }
@String{JohnWileySons = "John Wiley and Sons" }
@String{Macmillan = "Macmillan" }
@String{MathWorks = "The Math Works Inc." }
@String{McGrawHill = "McGraw-Hill" }
@String{NatBurStd = "National Bureau of Standards" }
@String{NorthHolland = "North-Holland" }
@String{OxfordPress = "Oxford University Press" } %address Oxford or London?
@String{PergamonPress = "Pergamon Press" }
@String{PlenumPress = "Plenum Press" }
@String{PrenticeHall = "Prentice-Hall" }
@String{SIAMPub = "{SIAM} Publications" }
@String{Springer = "Springer-Verlag" }
@String{TexasPress = "University of Texas Press" }
@String{VanNostrand = "Van Nostrand" }
@String{WHFreeman = "W. H. Freeman and Co." }
@INPROCEEDINGS{Wu25MultiRAG,
author={Wu, Wenlong and Wang, Haofen and Li, Bohan and Huang, Peixuan and Zhao, Xinzhe and Liang, Lei},
booktitle={2025 IEEE 41st International Conference on Data Engineering (ICDE)},
title={MultiRAG: A Knowledge-Guided Framework for Mitigating Hallucination in Multi-Source Retrieval Augmented Generation},
year={2025},
volume={},
number={},
pages={3070-3083},
}
@article{Wang26marsretrieval,
title={MarsRetrieval: Benchmarking Vision-Language Models for Planetary-Scale Geospatial Retrieval on Mars},
author={Wang, Shuoyuan and Wang, Yiran and Wei, Hongxin},
journal={arXiv preprint arXiv:2602.13961},
year={2026}
}
@article{McEwen24HiRISE,
title = {The high-resolution imaging science experiment (HiRISE) in the MRO extended science phases (20092023)},
journal = {Icarus},
volume = {419},
pages = {115795},
year = {2024},
issn = {0019-1035},
author = {A.S. McEwen and S. Byrne and C. Hansen and I.J. Daubar and S. Sutton and C.M. Dundas and N. Bardabelias and N. Baugh and J. Bergstrom and R. Beyer and K.M. Block and V.J. Bray and J.C. Bridges and M. Chojnacki and S.J. Conway and W.A. Delamere and T. Ebben and A. Espinosa and A. Fennema and J. Grant and V.C. Gulick and K.E. Herkenhoff and R. Heyd and R. Leis and L. Ojha and S. Papendick and C. Schaller and N. Thomas and L.L. Tornabene and C. Weitz and S.A. Wilson},
}
@article{Malin07CTX,
author = {Malin, Michael C. and Bell III, James F. and Cantor, Bruce A. and Caplinger, Michael A. and Calvin, Wendy M. and Clancy, R. Todd and Edgett, Kenneth S. and Edwards, Lawrence and Haberle, Robert M. and James, Philip B. and Lee, Steven W. and Ravine, Michael A. and Thomas, Peter C. and Wolff, Michael J.},
title = {Context Camera Investigation on board the Mars Reconnaissance Orbiter},
journal = {Journal of Geophysical Research: Planets},
volume = {112},
number = {E5},
pages = {},
year = {2007}
}
@article{Murchie07CRISM,
author = {Murchie, S. and Arvidson, R. and Bedini, P. and Beisser, K. and Bibring, J.-P. and Bishop, J. and Boldt, J. and Cavender, P. and Choo, T. and Clancy, R. T. and Darlington, E. H. and Des Marais, D. and Espiritu, R. and Fort, D. and Green, R. and Guinness, E. and Hayes, J. and Hash, C. and Heffernan, K. and Hemmler, J. and Heyler, G. and Humm, D. and Hutcheson, J. and Izenberg, N. and Lee, R. and Lees, J. and Lohr, D. and Malaret, E. and Martin, T. and McGovern, J. A. and McGuire, P. and Morris, R. and Mustard, J. and Pelkey, S. and Rhodes, E. and Robinson, M. and Roush, T. and Schaefer, E. and Seagrave, G. and Seelos, F. and Silverglate, P. and Slavney, S. and Smith, M. and Shyong, W.-J. and Strohbehn, K. and Taylor, H. and Thompson, P. and Tossman, B. and Wirzburger, M. and Wolff, M.},
title = {Compact Reconnaissance Imaging Spectrometer for Mars (CRISM) on Mars Reconnaissance Orbiter (MRO)},
journal = {Journal of Geophysical Research: Planets},
volume = {112},
number = {E5},
pages = {},
keywords = {Mars, spectroscopy, CRISM, MRO, Mars Reconnaissance Orbiter, Mars composition},
year = {2007}
}
@article{Smith01MOLA,
author = {Smith, David E. and Zuber, Maria T. and Frey, Herbert V. and Garvin, James B. and Head, James W. and Muhleman, Duane O. and Pettengill, Gordon H. and Phillips, Roger J. and Solomon, Sean C. and Zwally, H. Jay and Banerdt, W. Bruce and Duxbury, Thomas C. and Golombek, Matthew P. and Lemoine, Frank G. and Neumann, Gregory A. and Rowlands, David D. and Aharonson, Oded and Ford, Peter G. and Ivanov, Anton B. and Johnson, Catherine L. and McGovern, Patrick J. and Abshire, James B. and Afzal, Robert S. and Sun, Xiaoli},
title = {Mars Orbiter Laser Altimeter: Experiment summary after the first year of global mapping of Mars},
journal = {Journal of Geophysical Research: Planets},
volume = {106},
number = {E10},
pages = {23689-23722},
year = {2001}
}
@article{Grotzinger12Curiosity,
author = {Grotzinger, John P. and Crisp, Joy and Vasavada, Ashwin R. and Anderson, Robert C. and Baker, Charles J. and Barry, Robert and Blake, David F. and Conrad, Pamela and Edgett, Kenneth S. and Ferdowski, Bobak and Gellert, Ralf and Gilbert, John B. and Golombek, Matt and Gómez-Elvira, Javier and Hassler, Donald M. and Jandura, Louise and Litvak, Maxim and Mahaffy, Paul and Maki, Justin and Meyer, Michael and Malin, Michael C. and Mitrofanov, Igor and Simmonds, John J. and Vaniman, David and Welch, Richard V. and Wiens, Roger C.},
title = {Mars Science Laboratory Mission and Science Investigation},
journal = {Space Science Reviews},
year = {2012},
volume = {170},
number = {1},
pages = {5--56},
}
@article{Li21ZhuRong,
author = {Li, Chunlai and Zhang, Rongqiao and Yu, Dengyun and Dong, Guangliang and Liu, Jianjun and Geng, Yan and Sun, Zezhou and Yan, Wei and Ren, Xin and Su, Yan and Zuo, Wei and Zhang, Tielong and Cao, Jinbin and Fang, Guangyou and Yang, Jianfeng and Shu, Rong and Lin, Yangting and Zou, Yongliao and Liu, Dawei and Liu, Bin and Kong, Deqing and Zhu, Xinying and Ouyang, Ziyuan},
title = {China's Mars Exploration Mission and Science Investigation},
journal = {Space Science Reviews},
year = {2021},
volume = {217},
number = {4},
pages = {57},
}
@ARTICLE{Cai25LLM,
author={Cai, Weilin and Jiang, Juyong and Wang, Fan and Tang, Jing and Kim, Sunghun and Huang, Jiayi},
journal={IEEE Transactions on Knowledge and Data Engineering},
title={A Survey on Mixture of Experts in Large Language Models},
year={2025},
volume={37},
number={7},
pages={3896-3915},
}
@inproceedings{Lewis20RAG,
author = {Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and K\"{u}ttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rockt\"{a}schel, Tim and Riedel, Sebastian and Kiela, Douwe},
title = {Retrieval-augmented generation for knowledge-intensive NLP tasks},
year = {2020},
isbn = {9781713829546},
publisher = {Curran Associates Inc.},
address = {Red Hook, NY, USA},
booktitle = {Proceedings of the 34th International Conference on Neural Information Processing Systems},
articleno = {793},
numpages = {16},
location = {Vancouver, BC, Canada},
series = {NIPS '20}
}
@article{Pan24KGandLLM,
author = {Pan, Shirui and Luo, Linhao and Wang, Yufei and Chen, Chen and Wang, Jiapu and Wu, Xindong},
title = {Unifying Large Language Models and Knowledge Graphs: A Roadmap},
year = {2024},
issue_date = {July 2024},
publisher = {IEEE Educational Activities Department},
address = {USA},
volume = {36},
number = {7},
issn = {1041-4347},
journal = {IEEE Trans. on Knowl. and Data Eng.},
month = jul,
pages = {35803599},
numpages = {20}
}
@inproceedings{Zhou24hallucination,
author = {Zhou, Yujia and Liu, Zheng and Jin, Jiajie and Nie, Jian-Yun and Dou, Zhicheng},
title = {Metacognitive Retrieval-Augmented Large Language Models},
year = {2024},
isbn = {9798400701719},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
booktitle = {Proceedings of the ACM Web Conference 2024},
pages = {14531463},
numpages = {11},
keywords = {llms, metacognition, retrieval-augmented generation},
location = {Singapore, Singapore},
series = {WWW '24}
}