修改latex的双引号

2026-02-02 14:41:35 +08:00
parent 4bef1dfe15
commit 54bc3a2abf
5 changed files with 86 additions and 71 deletions
--- a/rs_retrieval.aux
+++ b/rs_retrieval.aux
@@ -49,20 +49,22 @@
 \@writefile{toc}{\contentsline {section}{\numberline {IV}I/O-aware Indexing Structure}{4}{}\protected@file@percent }
 \newlabel{sec:Index}{{IV}{4}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox  {IV-A}}Index schema design}{4}{}\protected@file@percent }
 \newlabel{eqn_pre_gridkey}{{5}{4}}
 \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Index schema design}}{5}{}\protected@file@percent }
 \newlabel{fig:index}{{1}{5}}
 \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Retrieval-time Execution}}{5}{}\protected@file@percent }
 \newlabel{fig_ST_Query}{{2}{5}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox  {IV-B}}Retrieval-time Execution}{5}{}\protected@file@percent }
-\newlabel{eqn_pre_spatial_query}{{5}{5}}
+\newlabel{eqn_pre_lookup_return}{{6}{5}}
 \newlabel{eqn_pre_spatial_query}{{7}{5}}
 \citation{Hong25HDCC}
 \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox  {IV-C}}Why I/O-aware}{6}{}\protected@file@percent }
 \@writefile{toc}{\contentsline {section}{\numberline {V}Hybrid Concurrency-Aware I/O Coordination}{6}{}\protected@file@percent }
 \newlabel{sec:CC}{{V}{6}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox  {V-A}}Retrieval Admission and I/O Plan Generation}{6}{}\protected@file@percent }
-\newlabel{eq:io_plan}{{6}{6}}
+\newlabel{eq:io_plan}{{8}{6}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox  {V-B}}Contention Estimation and Path Selection}{6}{}\protected@file@percent }
-\newlabel{eqn_tuning_table}{{7}{6}}
+\newlabel{eqn_tuning_table}{{9}{6}}
 \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Hybrid Concurrency-Aware I/O Coordination.}}{7}{}\protected@file@percent }
 \newlabel{fig:cc}{{3}{7}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox  {V-C}}Deterministic Coordinated and Non-deterministic Execution}{7}{}\protected@file@percent }
@@ -70,8 +72,8 @@
 \@writefile{toc}{\contentsline {section}{\numberline {VI}I/O Stack Tuning}{8}{}\protected@file@percent }
 \newlabel{sec:Tuning}{{VI}{8}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox  {VI-A}}Formulation of Online I/O Tuning}{8}{}\protected@file@percent }
-\newlabel{eqn_tuning_table}{{8}{8}}
+\newlabel{eqn_tuning_table}{{10}{8}}
-\newlabel{eqn_tuning_table}{{9}{8}}
+\newlabel{eqn_tuning_table}{{11}{8}}
 \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox  {VI-B}}Surrogate-Assisted GMAB for Online I/O Tuning}{8}{}\protected@file@percent }
 \@writefile{loa}{\contentsline {algocf}{\numberline {1}{\ignorespaces Surrogate-Assisted Genetic Multi-Armed Bandit (SA-GMAB)}}{9}{}\protected@file@percent }
 \newlabel{alg:sa-gmab}{{1}{9}}
@@ -153,7 +155,7 @@
 \@writefile{lof}{\contentsline {subfigure}{\numberline{(b)}{\ignorespaces {Time (mins)}}}{14}{}\protected@file@percent }
 \newlabel{fig:tune_exp1}{{12}{14}}
 \@writefile{toc}{\contentsline {subsubsection}{\numberline {\mbox  {VII-D}1}Convergence Speed and Tuning Cost}{14}{}\protected@file@percent }
-\newlabel{eq:roti}{{10}{14}}
+\newlabel{eq:roti}{{12}{14}}
 \@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces Mode Switching}}{14}{}\protected@file@percent }
 \newlabel{fig:tune_exp3}{{13}{14}}
 \@writefile{toc}{\contentsline {subsubsection}{\numberline {\mbox  {VII-D}2}Adaptation to Workload Shifts}{14}{}\protected@file@percent }
--- a/rs_retrieval.log
+++ b/rs_retrieval.log
@@ -1,4 +1,4 @@
-This is pdfTeX, Version 3.141592653-2.6-1.40.25 (MiKTeX 23.4) (preloaded format=pdflatex 2025.10.23)  2 FEB 2026 11:13
+This is pdfTeX, Version 3.141592653-2.6-1.40.25 (MiKTeX 23.4) (preloaded format=pdflatex 2025.10.23)  2 FEB 2026 14:38
 entering extended mode
 restricted \write18 enabled.
 %&-line parsing enabled.
@@ -469,23 +469,23 @@ File: fig/st-query.png Graphic file (type png)
 Package pdftex.def Info: fig/st-query.png  used on input line 184.
 (pdftex.def)             Requested size: 158.99377pt x 365.21727pt.
 [4]
-Underfull \hbox (badness 2293) in paragraph at lines 237--240
+Underfull \hbox (badness 2293) in paragraph at lines 239--242
 []\OT1/ptm/m/n/10 The I/O-aware in-dex en-ables ef-fi-cient spatio-temporal
 []
-Underfull \hbox (badness 4752) in paragraph at lines 237--240
+Underfull \hbox (badness 4752) in paragraph at lines 239--242
 \OT1/ptm/m/n/10 range re-trievals by di-rectly trans-lat-ing re-trieval pred-i-
 []
-Underfull \hbox (badness 3735) in paragraph at lines 237--240
+Underfull \hbox (badness 3735) in paragraph at lines 239--242
 \OT1/ptm/m/n/10 cates into win-dowed read plans, while avoid-ing both
 []
-Underfull \hbox (badness 5756) in paragraph at lines 237--240
+Underfull \hbox (badness 5756) in paragraph at lines 239--242
 \OT1/ptm/m/n/10 Given a user-specified spatio-temporal re-trieval $\OML/cmm/m/i
 t/10 q \OT1/cmr/m/n/10 =
 []
@@ -494,28 +494,28 @@ t/10 q \OT1/cmr/m/n/10 =
 <fig/cc.png, id=38, 1046.4696pt x 340.6326pt>
 File: fig/cc.png Graphic file (type png)
 <use fig/cc.png>
-Package pdftex.def Info: fig/cc.png  used on input line 307.
+Package pdftex.def Info: fig/cc.png  used on input line 304.
 (pdftex.def)             Requested size: 464.39685pt x 151.16782pt.
 [6] [7 <./fig/cc.png>]
-Underfull \hbox (badness 4518) in paragraph at lines 409--409
+Underfull \hbox (badness 4518) in paragraph at lines 406--406
 []\OT1/ptm/b/n/10 Algorithm 1: \OT1/ptm/m/n/10 Surrogate-Assisted Ge-netic Mult
 i-
 []
 LaTeX Font Info:    Trying to load font information for OT1+pcr on input line 4
-17.
+14.
 (D:\software\ctex\MiKTeX\tex/latex/psnfss\ot1pcr.fd
 File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr.
 ) [8]
 Underfull \vbox (badness 1097) has occurred while \output is active []
-Overfull \hbox (0.88164pt too wide) in paragraph at lines 497--497
+Overfull \hbox (0.88164pt too wide) in paragraph at lines 494--494
 []|\OT1/ptm/b/n/8 Resolution| 
 []
-Overfull \hbox (2.45601pt too wide) in paragraph at lines 495--509
+Overfull \hbox (2.45601pt too wide) in paragraph at lines 492--506
 [][] 
 []
@@ -523,22 +523,22 @@ Overfull \hbox (2.45601pt too wide) in paragraph at lines 495--509
 <exp/index_exp1_1.pdf, id=55, 253.94875pt x 206.7725pt>
 File: exp/index_exp1_1.pdf Graphic file (type pdf)
 <use exp/index_exp1_1.pdf>
-Package pdftex.def Info: exp/index_exp1_1.pdf  used on input line 575.
+Package pdftex.def Info: exp/index_exp1_1.pdf  used on input line 572.
 (pdftex.def)             Requested size: 111.27748pt x 90.60472pt.
 <exp/index_exp1_2.pdf, id=56, 253.94875pt x 204.765pt>
 File: exp/index_exp1_2.pdf Graphic file (type pdf)
 <use exp/index_exp1_2.pdf>
-Package pdftex.def Info: exp/index_exp1_2.pdf  used on input line 581.
+Package pdftex.def Info: exp/index_exp1_2.pdf  used on input line 578.
 (pdftex.def)             Requested size: 111.27748pt x 89.72505pt.
 <exp/index_exp2_1.pdf, id=57, 253.94875pt x 214.8025pt>
 File: exp/index_exp2_1.pdf Graphic file (type pdf)
 <use exp/index_exp2_1.pdf>
-Package pdftex.def Info: exp/index_exp2_1.pdf  used on input line 598.
+Package pdftex.def Info: exp/index_exp2_1.pdf  used on input line 595.
 (pdftex.def)             Requested size: 111.27748pt x 94.12335pt.
 <exp/index_exp2_2.pdf, id=58, 253.94875pt x 196.735pt>
 File: exp/index_exp2_2.pdf Graphic file (type pdf)
 <use exp/index_exp2_2.pdf>
-Package pdftex.def Info: exp/index_exp2_2.pdf  used on input line 604.
+Package pdftex.def Info: exp/index_exp2_2.pdf  used on input line 601.
 (pdftex.def)             Requested size: 111.27748pt x 86.20642pt.
 [10 <./exp/index_exp1_1.pdf> <./exp/index_exp1_2.pdf
@@ -556,27 +556,27 @@ iple pdfs with page group included in a single page
 <exp/index_exp3_1.pdf, id=131, 253.94875pt x 194.7275pt>
 File: exp/index_exp3_1.pdf Graphic file (type pdf)
 <use exp/index_exp3_1.pdf>
-Package pdftex.def Info: exp/index_exp3_1.pdf  used on input line 622.
+Package pdftex.def Info: exp/index_exp3_1.pdf  used on input line 619.
 (pdftex.def)             Requested size: 105.4204pt x 80.83417pt.
 <exp/index_exp3_2.pdf, id=132, 253.94875pt x 196.735pt>
 File: exp/index_exp3_2.pdf Graphic file (type pdf)
 <use exp/index_exp3_2.pdf>
-Package pdftex.def Info: exp/index_exp3_2.pdf  used on input line 628.
+Package pdftex.def Info: exp/index_exp3_2.pdf  used on input line 625.
 (pdftex.def)             Requested size: 105.4204pt x 81.6675pt.
 <exp/index_exp3_3.pdf, id=133, 253.94875pt x 204.765pt>
 File: exp/index_exp3_3.pdf Graphic file (type pdf)
 <use exp/index_exp3_3.pdf>
-Package pdftex.def Info: exp/index_exp3_3.pdf  used on input line 636.
+Package pdftex.def Info: exp/index_exp3_3.pdf  used on input line 633.
 (pdftex.def)             Requested size: 130.08621pt x 104.8943pt.
 <exp/index_exp4_2.pdf, id=134, 253.94875pt x 194.7275pt>
 File: exp/index_exp4_2.pdf Graphic file (type pdf)
 <use exp/index_exp4_2.pdf>
-Package pdftex.def Info: exp/index_exp4_2.pdf  used on input line 655.
+Package pdftex.def Info: exp/index_exp4_2.pdf  used on input line 652.
 (pdftex.def)             Requested size: 114.79138pt x 88.02173pt.
 <exp/index_exp4_1.pdf, id=135, 253.94875pt x 200.75pt>
 File: exp/index_exp4_1.pdf Graphic file (type pdf)
 <use exp/index_exp4_1.pdf>
-Package pdftex.def Info: exp/index_exp4_1.pdf  used on input line 661.
+Package pdftex.def Info: exp/index_exp4_1.pdf  used on input line 658.
 (pdftex.def)             Requested size: 106.5929pt x 84.26234pt.
 [11 <./exp/index_exp3_1.pdf> <./exp/index_exp3_2.pdf
@@ -595,7 +595,7 @@ iple pdfs with page group included in a single page
 pdfTeX warning: pdflatex.exe (file ./exp/index_exp4_1.pdf): PDF inclusion: mult
 iple pdfs with page group included in a single page
 >]
-Underfull \hbox (badness 1939) in paragraph at lines 676--678
+Underfull \hbox (badness 1939) in paragraph at lines 673--675
 []\OT1/ptm/m/n/10 For com-par-i-son, we eval-u-ate the fol-low-ing ex-e-cu-tion
 []
@@ -603,36 +603,36 @@ Underfull \hbox (badness 1939) in paragraph at lines 676--678
 <exp/cc_exp1_3.pdf, id=192, 253.94875pt x 205.76875pt>
 File: exp/cc_exp1_3.pdf Graphic file (type pdf)
 <use exp/cc_exp1_3.pdf>
-Package pdftex.def Info: exp/cc_exp1_3.pdf  used on input line 685.
+Package pdftex.def Info: exp/cc_exp1_3.pdf  used on input line 682.
 (pdftex.def)             Requested size: 151.76744pt x 122.97867pt.
 <exp/cc_exp1_2.pdf, id=193, 253.94875pt x 205.76875pt>
 File: exp/cc_exp1_2.pdf Graphic file (type pdf)
 <use exp/cc_exp1_2.pdf>
-Package pdftex.def Info: exp/cc_exp1_2.pdf  used on input line 687.
+Package pdftex.def Info: exp/cc_exp1_2.pdf  used on input line 684.
 (pdftex.def)             Requested size: 151.76744pt x 122.97867pt.
 <exp/cc_exp1_1.pdf, id=194, 253.94875pt x 205.76875pt>
 File: exp/cc_exp1_1.pdf Graphic file (type pdf)
 <use exp/cc_exp1_1.pdf>
-Package pdftex.def Info: exp/cc_exp1_1.pdf  used on input line 691.
+Package pdftex.def Info: exp/cc_exp1_1.pdf  used on input line 688.
 (pdftex.def)             Requested size: 151.76744pt x 122.97867pt.
 <exp/cc_exp3_1.pdf, id=195, 253.94875pt x 205.76875pt>
 File: exp/cc_exp3_1.pdf Graphic file (type pdf)
 <use exp/cc_exp3_1.pdf>
-Package pdftex.def Info: exp/cc_exp3_1.pdf  used on input line 715.
+Package pdftex.def Info: exp/cc_exp3_1.pdf  used on input line 712.
 (pdftex.def)             Requested size: 110.10678pt x 89.21667pt.
 <exp/cc_exp3_2.pdf, id=196, 253.94875pt x 196.735pt>
 File: exp/cc_exp3_2.pdf Graphic file (type pdf)
 <use exp/cc_exp3_2.pdf>
-Package pdftex.def Info: exp/cc_exp3_2.pdf  used on input line 721.
+Package pdftex.def Info: exp/cc_exp3_2.pdf  used on input line 718.
 (pdftex.def)             Requested size: 113.62068pt x 88.02258pt.
 [12]
 <exp/cc_exp4.pdf, id=200, 253.94875pt x 210.7875pt>
 File: exp/cc_exp4.pdf Graphic file (type pdf)
 <use exp/cc_exp4.pdf>
-Package pdftex.def Info: exp/cc_exp4.pdf  used on input line 739.
+Package pdftex.def Info: exp/cc_exp4.pdf  used on input line 736.
 (pdftex.def)             Requested size: 130.08621pt x 107.97943pt.
-Underfull \hbox (badness 1874) in paragraph at lines 760--761
+Underfull \hbox (badness 1874) in paragraph at lines 757--758
 \OT1/ptm/m/n/10 high-impact pa-ram-e-ter se-lec-tion and Re-in-force-ment
 []
@@ -660,17 +660,17 @@ pdfs with page group included in a single page
 <exp/tune_exp1_1.pdf, id=270, 253.94875pt x 208.78pt>
 File: exp/tune_exp1_1.pdf Graphic file (type pdf)
 <use exp/tune_exp1_1.pdf>
-Package pdftex.def Info: exp/tune_exp1_1.pdf  used on input line 771.
+Package pdftex.def Info: exp/tune_exp1_1.pdf  used on input line 768.
 (pdftex.def)             Requested size: 110.10678pt x 90.52228pt.
 <exp/tune_exp1_2.pdf, id=271, 253.94875pt x 204.765pt>
 File: exp/tune_exp1_2.pdf Graphic file (type pdf)
 <use exp/tune_exp1_2.pdf>
-Package pdftex.def Info: exp/tune_exp1_2.pdf  used on input line 777.
+Package pdftex.def Info: exp/tune_exp1_2.pdf  used on input line 774.
 (pdftex.def)             Requested size: 113.62068pt x 91.61536pt.
 <exp/tune_exp3_1.pdf, id=272, 253.94875pt x 216.81pt>
 File: exp/tune_exp3_1.pdf Graphic file (type pdf)
 <use exp/tune_exp3_1.pdf>
-Package pdftex.def Info: exp/tune_exp3_1.pdf  used on input line 800.
+Package pdftex.def Info: exp/tune_exp3_1.pdf  used on input line 797.
 (pdftex.def)             Requested size: 130.08621pt x 111.06456pt.
 Underfull \vbox (badness 10000) has occurred while \output is active []
@@ -683,16 +683,32 @@ ple pdfs with page group included in a single page
 pdfTeX warning: pdflatex.exe (file ./exp/tune_exp3_1.pdf): PDF inclusion: multi
 ple pdfs with page group included in a single page
->] (rs_retrieval.bbl) [15] (rs_retrieval.aux)
+>]
 Underfull \hbox (badness 2495) in paragraph at lines 812--813
 []\OT1/ptm/m/n/10 This work is sup-ported by the Na-tional Key R&D
 []
 Underfull \hbox (badness 2799) in paragraph at lines 812--813
 \OT1/ptm/m/n/10 Pro-gram of China ``In-ter-gov-ern-men-tal In-ter-na-tional Sci
 -
 []
 Underfull \hbox (badness 7576) in paragraph at lines 812--813
 \OT1/ptm/m/n/10 ence and Tech-nol-ogy In-no-va-tion Co-op-er-a-tion" (Grant
 []
 (rs_retrieval.bbl) [15] (rs_retrieval.aux)
 LaTeX Warning: There were multiply-defined labels.
 ) 
 Here is how much of TeX's memory you used:
- 5745 strings out of 476331
+ 5747 strings out of 476331
- 98021 string characters out of 5797649
+ 98061 string characters out of 5797649
 1882660 words of memory out of 5000000
- 26051 multiletter control sequences out of 15000+600000
+ 26053 multiletter control sequences out of 15000+600000
 561830 words of font info for 131 fonts, out of 8000000 for 9000
 1145 hyphenation exceptions out of 8191
 62i,17n,67p,1675b,499s stack positions out of 10000i,1000n,20000p,200000b,200000s
@@ -714,7 +730,7 @@ urier/ucrr8a.pfb><D:/software/ctex/MiKTeX/fonts/type1/urw/times/utmb8a.pfb><D:/
 software/ctex/MiKTeX/fonts/type1/urw/times/utmbi8a.pfb><D:/software/ctex/MiKTeX
 /fonts/type1/urw/times/utmr8a.pfb><D:/software/ctex/MiKTeX/fonts/type1/urw/time
 s/utmri8a.pfb>
-Output written on rs_retrieval.pdf (15 pages, 2366099 bytes).
+Output written on rs_retrieval.pdf (15 pages, 2366253 bytes).
 PDF statistics:
 400 PDF objects out of 1000 (max. 8388607)
 0 named destinations out of 1000 (max. 500000)
--- a/rs_retrieval.pdf
+++ b/rs_retrieval.pdf
--- a/rs_retrieval.synctex.gz
+++ b/rs_retrieval.synctex.gz
--- a/rs_retrieval.tex
+++ b/rs_retrieval.tex
@@ -53,7 +53,7 @@
 \maketitle
 \begin{abstract}
-High-performance remote sensing analytics workflows require ingesting and retrieving massive image archives to support real-time spatio-temporal applications. While modern systems utilize window-based I/O reading to reduce data transfer, they face a dual bottleneck: (1) the prohibitive overhead of runtime geospatial computations caused by the decoupling of logical indexing from physical storage, and (2) severe storage-level I/O contention triggered by uncoordinated concurrent reads. To address these limitations, we present a comprehensive I/O-aware retrieval approach based on a novel "Index-as-an-Execution-Plan" paradigm. We introduce a dual-layer inverted index that serves as an I/O planner, pre-materializing grid-to-pixel mappings to completely eliminate runtime geometric calculations. Furthermore, we design a hybrid concurrency-aware I/O coordination protocol that adaptively integrates Calvin-style deterministic ordering with optimistic execution, effectively converting I/O contention into request merging opportunities. To handle fluctuating workloads, we incorporate a Surrogate-Assisted Genetic Multi-Armed Bandit (SA-GMAB) for automatic parameter tuning. Evaluated on a distributed cluster with Martian datasets, the experimental results indicate that: (1) I/O-aware indexing reduces retrieval latency by an order of magnitude; (2) hybrid concurrency-aware I/O coordination achieves a 54x speedup under high contention through request merging and automates optimal mode switching; and (3) SA-GMAB has the fastest convergence speed and recovers from workload shifts $2\times$ faster than TunIO.
+High-performance remote sensing analytics workflows require ingesting and retrieving massive image archives to support real-time spatio-temporal applications. While modern systems utilize window-based I/O reading to reduce data transfer, they face a dual bottleneck: (1) the prohibitive overhead of runtime geospatial computations caused by the decoupling of logical indexing from physical storage, and (2) severe storage-level I/O contention triggered by uncoordinated concurrent reads. To address these limitations, we present a comprehensive I/O-aware retrieval approach based on a novel ``Index-as-an-Execution-Plan" paradigm. We introduce a dual-layer inverted index that serves as an I/O planner, pre-materializing grid-to-pixel mappings to completely eliminate runtime geometric calculations. Furthermore, we design a hybrid concurrency-aware I/O coordination protocol that adaptively integrates Calvin-style deterministic ordering with optimistic execution, effectively converting I/O contention into request merging opportunities. To handle fluctuating workloads, we incorporate a Surrogate-Assisted Genetic Multi-Armed Bandit (SA-GMAB) for automatic parameter tuning. Evaluated on a distributed cluster with Martian datasets, the experimental results indicate that: (1) I/O-aware indexing reduces retrieval latency by an order of magnitude; (2) hybrid concurrency-aware I/O coordination achieves a 54x speedup under high contention through request merging and automates optimal mode switching; and (3) SA-GMAB has the fastest convergence speed and recovers from workload shifts $2\times$ faster than TunIO.
 \end{abstract}
 \begin{IEEEkeywords}
@@ -70,14 +70,14 @@ Existing RS data management systems \cite{LEWIS17datacube, Yan21RS_manage1, liu2
 The second phase is the data extraction phase, where the system reads the actual pixel data from the identified raw image files stored in distributed file systems or object stores. A critical observation in modern high-performance RS analytics is that the primary system bottleneck has fundamentally shifted from the first phase to the second. While the metadata search completes in milliseconds, the end-to-end retrieval latency is now dominated by the massive I/O overhead required to fetch, decompress, and process large-scale raw images. Traditional systems attempted to reduce I/O overhead by pre-slicing tiles and building pyramids (e.g., approaches used in Google Earth Engine \cite{gorelick17GEE} that store metadata in HBase and serve pre-tiled image pyramids), but aggressive tiling increases management complexity and produces many small files. More recent Cloud-Optimized GeoTIFF (COG) formats and COG-aware frameworks \cite{LEWIS17datacube}, \cite{riotiler25riotiler} exploit internal overviews and window-based I/O to read only the portions of files that spatially intersect a retrieval. 
 \par
-While window-based I/O effectively reduces raw data transfer, it introduces a new computational burden due to the decoupling of logical indexing from physical storage. Current systems operate on a "Search-then-Compute-then-Read" model: after identifying candidate files, they must perform fine-grained, per-image geospatial computations at runtime to map retrieval coordinates to precise file offsets and clip boundaries. This runtime geometric resolution becomes computationally prohibitive when processing a large volume of candidate images, often negating the benefits of I/O reduction. Moreover, under concurrent workloads, the lack of coordination among these independent read requests leads to severe I/O contention and storage thrashing, rendering traditional indexing-centric optimizations insufficient for real-time applications.
+While window-based I/O effectively reduces raw data transfer, it introduces a new computational burden due to the decoupling of logical indexing from physical storage. Current systems operate on a ``Search-then-Compute-then-Read" model: after identifying candidate files, they must perform fine-grained, per-image geospatial computations at runtime to map retrieval coordinates to precise file offsets and clip boundaries. This runtime geometric resolution becomes computationally prohibitive when processing a large volume of candidate images, often negating the benefits of I/O reduction. Moreover, under concurrent workloads, the lack of coordination among these independent read requests leads to severe I/O contention and storage thrashing, rendering traditional indexing-centric optimizations insufficient for real-time applications.
-To address the aforementioned problems, we propose a novel "Index-as-an-Execution-Plan" paradigm to bound the retrieval latency. Unlike conventional approaches that treat indexing and I/O execution as separate stages, our approach integrates fine-grained partial retrieval directly into the indexing structure. By pre-materializing the mapping between logical spatial grids and physical pixel windows, our system enables deterministic I/O planning without runtime geometric computation. To further ensure scalability, we introduce a concurrency control protocol tailored for spatio-temporal range retrievals and an automatic I/O tuning mechanism. The principal contributions of this paper are summarized as follows:
+To address the aforementioned problems, we propose a novel ``Index-as-an-Execution-Plan" paradigm to bound the retrieval latency. Unlike conventional approaches that treat indexing and I/O execution as separate stages, our approach integrates fine-grained partial retrieval directly into the indexing structure. By pre-materializing the mapping between logical spatial grids and physical pixel windows, our system enables deterministic I/O planning without runtime geometric computation. To further ensure scalability, we introduce a concurrency control protocol tailored for spatio-temporal range retrievals and an automatic I/O tuning mechanism. The principal contributions of this paper are summarized as follows:
 \begin{enumerate}
 	\item We propose an I/O-aware index schema. Instead of merely returning candidate image identifiers, our index directly translates high-level spatio-temporal predicates into concrete, byte-level windowed read plans. This design bridges the semantic gap between logical retrievals and physical storage, eliminating expensive runtime geospatial computations and ensuring that I/O cost is strictly proportional to the retrieval footprint.
-	\item We propose a hybrid concurrency-aware I/O coordination protocol. This protocol adapts transaction processing principles by integrating Calvin-style deterministic ordering \cite{Thomson12Calvin} with optimistic execution \cite{Lim17OCC}. It shifts the focus from protecting database rows to coordinating shared I/O flows. This protocol dynamically switches strategies based on spatial contention, effectively converting "I/O contention" into "request merging opportunities."
+	\item We propose a hybrid concurrency-aware I/O coordination protocol. This protocol adapts transaction processing principles by integrating Calvin-style deterministic ordering \cite{Thomson12Calvin} with optimistic execution \cite{Lim17OCC}. It shifts the focus from protecting database rows to coordinating shared I/O flows. This protocol dynamically switches strategies based on spatial contention, effectively converting I/O contention into request merging opportunities.
 	\item We propose an automatic I/O tuning method to improve the I/O performance of spatio-temporal range retrievals over RS data. The method extends an existing AI-powered I/O tuning framework \cite{Rajesh24TunIO} based on a surrogate-assisted genetic multi-armed bandit algorithm \cite{Preil25GMAB}.
 \end{enumerate}
@@ -93,7 +93,7 @@ Section~\ref{sec:EXP}  presents the experiments and results.
 Section~\ref{sec:Con} concludes this paper with a summary.
 \section{Related Work}\label{sec:RW}
-This section describes the most salient studies of I/O-efficient spatio-temporal retrieval processing, concurrency control and I/O performance tuning.
+This section describes the most salient studies of I/O-efficient spatio-temporal retrieval processing, concurrency control, and I/O performance tuning.
 \subsection{I/O-Efficient Spatio-Temporal Retrieval Processing}
 Efficient spatio-temporal query processing for RS data has been extensively studied, with early efforts primarily focusing on metadata organization and index-level pruning in relational database systems. Traditional approaches typically extend tree-based spatial indexes, such as R-tree \cite{Strobl08PostGIS}, quadtree \cite{Tang12Quad-Tree}, and their spatio-temporal variants \cite{Simoes16PostGIST}, to organize image footprints together with temporal attributes, and are commonly implemented on relational backends (e.g., MySQL and PostgreSQL). These methods provide efficient range filtering for moderate-scale datasets, but their reliance on balanced tree structures often leads to high maintenance overhead and limited scalability as the volume of remote sensing metadata grows rapidly. With the continuous increase in data volume and ingestion rate, recent systems have gradually shifted toward grid-based spatio-temporal indexing schemes deployed on distributed NoSQL stores. By encoding spatial footprints into uniform spatial grids \cite{suwardi15geohash, Yan21RS_manage1} or space-filling curves \cite{liu24mstgi, Yang24GridMesa} and combining them with temporal identifiers, these approaches enable lightweight index construction and better horizontal scalability on backends such as HBase and Elasticsearch. Such grid-based indexes can effectively reduce the candidate search space through coarse-grained pruning and are more suitable for large-scale, continuously growing remote sensing archives. 
@@ -102,7 +102,7 @@ Efficient spatio-temporal query processing for RS data has been extensively stud
 However, index pruning alone is insufficient to guarantee end-to-end retrieval efficiency for remote sensing workloads, where individual images are usually large and retrieval results require further pixel-level processing. To reduce the amount of raw I/O, Google Earth Engine \cite{gorelick17GEE} relies on tiling and multi-resolution pyramids that physically split images into small blocks. While more recent solutions leverage COG and window-based I/O to enable partial reads from monolithic image files, frameworks such as OpenDataCube \cite{LEWIS17datacube} exploit these features to read only the image regions intersecting a retrieval window, thereby reducing unnecessary data transfer. Nevertheless, after candidate images are identified, most systems still perform fine-grained geospatial computations for each image, including coordinate transformations and precise pixel-window derivation, which may incur substantial overhead when many images are involved.
 \subsection{Concurrency Control}
-Concurrency control has long been studied to provide correctness and high throughput in multi-user database and storage systems, with two broad paradigms dominating the literature: deterministic scheduling \cite{Thomson12Calvin, hong2025deterministic} and non-deterministic schemes \cite{Bernstein812PL}, \cite{KungR81OCC}. Hybrid approaches \cite{WangK16MVOCC}, \cite{Hong25HDCC} that adaptively combine these paradigms seek to exploit the low-conflict efficiency of deterministic execution while retaining the flexibility of optimistic techniques. More recent proposals such as OOCC target read-heavy, disaggregated settings by reducing validation and round-trips for read-only transactions, achieving low latency under OLTP-like workloads \cite{Wu25OOCC}. These methods are primarily optimized for record- or key-level access patterns: their metrics and designs emphasize transaction latency, abort rates, and throughput under workloads with small, well-defined read/write sets.
+Concurrency control has long been studied to provide correctness and high throughput in multi-user database and storage systems, with two broad paradigms dominating the literature: deterministic scheduling \cite{Thomson12Calvin, hong2025deterministic} and non-deterministic schemes \cite{Bernstein812PL}, \cite{KungR81OCC}. Hybrid approaches \cite{WangK16MVOCC}, \cite{Hong25HDCC} that adaptively combine these paradigms seek to exploit the low-conflict efficiency of deterministic execution while retaining the flexibility of optimistic techniques. More recent proposals, such as OOCC \cite{Wu25OOCC}, target read-heavy, disaggregated settings by reducing validation and round-trips for read-only transactions, achieving low latency under OLTP-like workloads. These methods are primarily optimized for record- or key-level access patterns: their metrics and designs emphasize transaction latency, abort rates, and throughput under workloads with small, well-defined read/write sets.
 \par
 Overall, existing concurrency control mechanisms are largely designed around transaction-level correctness and throughput, assuming record- or key-based access patterns and treating storage I/O as a black box. Their optimization objectives rarely account for I/O amplification or fine-grained storage contention induced by concurrent range retrievals. Consequently, these approaches are ill-suited for data-intensive spatio-temporal workloads, where coordinating overlapping window reads and mitigating storage-level interference are critical to achieving scalable performance under multi-user access.
@@ -209,10 +209,12 @@ As illustrated in Fig.~\ref{fig:index}(b), the row key of the I2G table is the \
 \par
 \textit{Grid–Window Mapping.}
-This column family records the list of grid cells intersected by the image together with their corresponding pixel windows in the image coordinate space. Each entry has the form
+This column family records the list of grid cells intersected by the image together with their corresponding pixel windows in the image coordinate space. Each entry has the form:
-\[
+
 \begin{equation}
 	\label{eqn_pre_gridkey}
 	\langle \textit{GridKey}, W_{ImageKey\_GridKey} \rangle,
-\]
+\end{equation}
 where \textit{GridKey} identifies a grid cell at the chosen global resolution, and $W_{ImageKey\_GridKey}$ denotes the minimal pixel bounding rectangle within the image that exactly covers that grid cell.
 \par
@@ -250,9 +252,10 @@ Each grid cell corresponds to a unique 64-bit \textit{GridKey}, which directly m
 \par
 \textbf{Candidate Image Retrieval with Temporal Pruning.}
 Given the enumerated grid set $\{g_1, \ldots, g_k\}$, the retrieval processor performs a batched multi-get on the G2I table. Each G2I row corresponds to a single grid cell and stores the identifiers of all images whose spatial footprints intersect that cell. For each grid $g_i$, the lookup returns:
-\[
+\begin{equation}
 	\label{eqn_pre_lookup_return}
 	G2I[g_i] = \{ imgKey_1, \ldots, imgKey_m \}.
-\]
+\end{equation}
 \par
 All retrieved image identifiers are unioned to form the spatial candidate set
@@ -285,13 +288,7 @@ This tight coupling fundamentally changes the optimization objective. Instead of
 \par
 \textbf{Theoretical Cost Analysis.}
-To rigorously quantify the performance advantage, we revisit the retrieval cost model defined in Eq. (\ref{eqn:cost_total}):
+To rigorously quantify the performance advantage, we revisit the retrieval cost model defined in Eq. (\ref{eqn:cost_total}). In traditional full-image reading systems, although the geospatial computation cost is negligible ($C_{geo} = 0$) as no clipping is performed, the I/O cost $C_{io}$ is determined by the full file size. Consequently, the total latency is entirely dominated by massive I/O overhead, rendering $C_{meta}$ (typically milliseconds) irrelevant.
 \begin{equation*}
 	Cost(Q) = C_{meta}(Q) + \sum_{R \in \mathcal{R}_Q} \left( C_{geo}(R, Q) + C_{io}(R, Q) \right).
 \end{equation*}
 \par
 In traditional full-image reading systems, although the geospatial computation cost is negligible ($C_{geo} = 0$) as no clipping is performed, the I/O cost $C_{io}$ is determined by the full file size. Consequently, the total latency is entirely dominated by massive I/O overhead, rendering $C_{meta}$ (typically milliseconds) irrelevant.
 \par
 Existing window-based I/O systems (e.g., ODC or COG-aware libraries) successfully reduce the I/O cost to the size of the requested window. However, this reduction comes at the expense of a significant surge in $C_{geo}$. For every candidate image, the system must perform on-the-fly coordinate transformations and polygon clipping to calculate read offsets. When a retrieval involves thousands of images, the accumulated CPU time ($\sum C_{geo}$) becomes a new bottleneck (e.g., hundreds of milliseconds to seconds), often negating the benefits of I/O reduction (detailed quantitative comparisons are provided in Sec.~\ref{sec:Index_exp_2}).
@@ -561,7 +558,7 @@ For comparison, we compare three representative execution schemes:
 \begin{enumerate}
 	\item \textbf{Baseline 1 (Full-file Retrieval):} A traditional system that utilizes spatio-temporal indexing for metadata filtering but performs full-file retrieval during data extraction.
 	\item \textbf{Baseline 2 (Window-based I/O):} A state-of-the-art system (e.g., OpenDataCube) that supports fine-grained partial reading to minimize I/O volume, representing the theoretical optimum for data selectivity.
-	\item \textbf{Ours (I/O-aware Indexing):} The proposed approach utilizing the dual-layer G2I and I2G inverted structure, which pre-materializes grid-to-pixel mappings to enable deterministic partial reading without runtime geometric computations.
+	\item \textbf{Ours (I/O-aware Indexing):} The proposed approach uses the dual-layer G2I and I2G inverted structure, which pre-materializes grid-to-pixel mappings to enable deterministic partial reading without runtime geometric computations.
 \end{enumerate}
 \subsubsection{I/O Selectivity Analysis}\label{sec:Index_exp_1}
@@ -585,7 +582,7 @@ For comparison, we compare three representative execution schemes:
 \end{figure}
 \par
-First, we evaluated the effectiveness of data reduction by measuring the I/O selectivity, defined as the ratio of the retrieved data volume to the total file size. Fig.~\ref{fig:index_exp1} compares our method against Baseline 1 and Baseline 2. As illustrated in Fig.~\ref{fig:index_exp1}(a), Baseline 1 always reads the entire image regardless of the proportion of the intersection between the query range and the image. In contrast, both Baseline 2 and Ours significantly reduce I/O traffic by enabling partial reads. It is worth noting that our method incurs slightly higher I/O volume compared to the theoretically optimal Baseline 2. This marginal data redundancy is attributed to the grid alignment effect: our index retrieves pixel blocks based on fixed grid boundaries, whereas Baseline 2 performs precise geospatial clipping. Fig.~\ref{fig:index_exp1}(b) further presents the distribution of unnecessary data fraction. While our method introduces a small amount of "over-reading" due to grid padding, it successfully avoids the massive data waste observed in Baseline 1. As we will demonstrate in the next section, this slight compromise in I/O precision is a strategic trade-off that eliminates expensive runtime computations.
+First, we evaluated the effectiveness of data reduction by measuring the I/O selectivity, defined as the ratio of the retrieved data volume to the total file size. Fig.~\ref{fig:index_exp1} compares our method against Baseline 1 and Baseline 2. As illustrated in Fig.~\ref{fig:index_exp1}(a), Baseline 1 always reads the entire image regardless of the proportion of the intersection between the query range and the image. In contrast, both Baseline 2 and Ours significantly reduce I/O traffic by enabling partial reads. It is worth noting that our method incurs slightly higher I/O volume compared to the theoretically optimal Baseline 2. This marginal data redundancy is attributed to the grid alignment effect: our index retrieves pixel blocks based on fixed grid boundaries, whereas Baseline 2 performs precise geospatial clipping. Fig.~\ref{fig:index_exp1}(b) further presents the distribution of unnecessary data fraction. While our method introduces a small amount of over-reading due to grid padding, it successfully avoids the massive data waste observed in Baseline 1. As we will demonstrate in the next section, this slight compromise in I/O precision is a strategic trade-off that eliminates expensive runtime computations.
 \subsubsection{End-to-End Retrieval Latency}\label{sec:Index_exp_2}
@@ -639,9 +636,9 @@ To empirically validate the cost model proposed in Eq.~\ref{eqn:cost_total}, we
 \end{figure}
 \par
-To quantify the individual contributions of the G2I (coarse filtering) and I2G (fine-grained access) components, we decomposed the system into four variants. Fig.~\ref{fig:index_exp3} breaks down the performance in terms of I/O volume and latency components. Fig.~\ref{fig:index_exp3}(a) confirms that removing either component leads to suboptimal I/O behavior. The "No Index" and "G2I Only" variants result in 100\% I/O volume, as they lack the window information required for partial access. Conversely, "I2G Only" and "Full" (Ours) achieve minimal I/O volume ($\approx 10\%$).
+To quantify the individual contributions of the G2I (coarse filtering) and I2G (fine-grained access) components, we decomposed the system into four variants. Fig.~\ref{fig:index_exp3} breaks down the performance in terms of I/O volume and latency components. Fig.~\ref{fig:index_exp3}(a) confirms that removing either component leads to suboptimal I/O behavior. The \textit{No Index} and \textit{G2I Only} variants result in 100\% I/O volume, as they lack the window information required for partial access. Conversely, \textit{I2G Only} and \textit{G2I+I2G} achieve minimal I/O volume ($\approx 10\%$).
-Fig.~\ref{fig:index_exp3}(b) reveals the latency breakdown. "No Index" suffers from both high full table scanning cost and high storage I/O cost. "G2I Only" efficiently reduces metadata lookup time ($\approx 50$ ms) but fails to reduce storage I/O ($\approx 8000$ ms). Although "I2G Only" minimizes storage I/O ($\approx 100$ ms), it incurs prohibitive metadata lookup overhead ($\approx 1500$ ms) because the system must scan the entire I2G table to identify relevant images without spatial pruning. "G2I + I2G" achieves the best performance, maintaining low metadata latency ($\approx 60$ ms) via G2I pruning while ensuring minimal storage I/O ($\approx 100$ ms) via I2G windowing.
+Fig.~\ref{fig:index_exp3}(b) reveals the latency breakdown. \textit{No Index} suffers from both high full table scanning cost and high storage I/O cost. \textit{G2I Only} efficiently reduces metadata lookup time ($\approx 50$ ms) but fails to reduce storage I/O ($\approx 8000$ ms). Although \textit{I2G Only} minimizes storage I/O ($\approx 100$ ms), it incurs prohibitive metadata lookup overhead ($\approx 1500$ ms) because the system must scan the entire I2G table to identify relevant images without spatial pruning. \textit{G2I+I2G} achieves the best performance, maintaining low metadata latency ($\approx 60$ ms) via G2I pruning while ensuring minimal storage I/O ($\approx 100$ ms) via I2G windowing.
 Moreover, the choice of grid resolution (Zoom Level) is a critical parameter that dictates the trade-off between metadata management overhead and I/O precision. To justify our selection of Zoom Level 14, we conducted a sensitivity analysis by varying the grid resolution from Level 12 to Level 16 under a fixed workload of medium-scale range queries. Fig.~\ref{fig:index_exp3_3} illustrates the latency breakdown across different resolutions. The results reveal a clear convex trajectory in total query latency, driven by two opposing forces. For coarse-grained grids (Level $\le 13$), while metadata lookup is extremely fast ($C_{meta} < 30$ ms) due to the small number of grid keys, the I/O cost is prohibitively high. Large grid cells force the system to read significant amounts of irrelevant pixel data outside the actual query boundary, serving as the dominant bottleneck. Conversely, finer grids (Level 15, 16) maximize I/O precision, reducing $C_{io}$ to its theoretical minimum. However, this comes at the expense of increased metadata volume. A single query may intersect thousands of Level 16 micro-grids, causing $C_{meta}$ to surge drastically ($>100$ ms) due to the overhead of scanning and processing massive key lists in the G2I/I2G tables. As evidenced by the trough in the total latency curve, Zoom Level 14 represents the optimal spot for our dataset. At this resolution, the grid cell size roughly matches the typical internal tile size of remote sensing images, keeping I/O waste low while maintaining a manageable number of index keys. Consequently, our system adopts Level 14 as the default global configuration.
@@ -671,7 +668,7 @@ Finally, we evaluated the scalability and cost of maintaining the index. Fig.~\r
 In this section, we evaluate the proposed hybrid coordination mechanism on a distributed storage cluster to assess its scalability, robustness under contention, and internal storage efficiency.
 \par
-To systematically control the workload characteristics, we developed a synthetic workload generator. We define the Spatial Overlap Ratio ($\sigma$) to quantify the extent of shared data regions among concurrent queries, ranging from $\sigma=0$ (disjoint) to $\sigma=0.9$ (highly concentrated hotspots). The number of concurrent clients varies from $N=1$ to $N=64$. It is worth noting that given the data-intensive nature of retrievals where a single request triggers GB-scale I/O and complex decoding, 64 concurrent streams are sufficient to fully saturate the aggregate I/O bandwidth and CPU resources of our experimental cluster, representing a heavy-load scenario in operational scientific computing environments.
+To systematically control the workload characteristics, we developed a synthetic workload generator. We define the Spatial Overlap Ratio ($\sigma$) to quantify the extent of shared data regions among concurrent queries, ranging from $\sigma=0$ (disjoint) to $\sigma=0.9$ (highly concentrated hotspots). The number of concurrent clients varies from $N=1$ to $N=64$. It is worth noting that, given the data-intensive nature of retrievals where a single request triggers GB-scale I/O and complex decoding, 64 concurrent streams are sufficient to fully saturate the aggregate I/O bandwidth and CPU resources of our experimental cluster, representing a heavy-load scenario in operational scientific computing environments.
 For comparison, we evaluate the following execution schemes:
 \begin{enumerate}
@@ -701,7 +698,7 @@ To evaluate the system's robustness under different workload characteristics, we
 The results reveal a fundamental divergence in how the two systems respond to data contention. As shown in Fig.~\ref{fig:cc_exp1}(a), when query footprints are spatially dispersed, the opportunities for physical I/O merging are minimal. Consequently, the performance of both systems is primarily constrained by the aggregate physical bandwidth of the storage cluster. Both approaches exhibit linear latency growth with respect to the client count. At $N=64$, the Baseline reaches a mean latency of approx. 37,000 ms, while our method records approx. 30,000 ms. Although our method maintains a slight performance edge due to the reduced read amplification provided by the I/O-aware index, it inevitably degrades to a linear query processing mode similar to the Baseline. This confirms that without spatial locality to leverage request collapsing, the system is bound by the hardware's I/O throughput limits.
-A sharp performance divergence is observed as the overlap ratio increases to $\sigma=0.8$ (Fig.~\ref{fig:cc_exp1}(b)). The Baseline suffers from severe performance degradation, with latency spiking from 37,000 ms (at $\sigma=0.2$) to 60,000 ms (at $\sigma=0.8$) under peak load. This deterioration is attributed to the "I/O blender effect" and lock convoys: highly concurrent requests competing for the same index pages and disk blocks cause excessive disk seek thrashing and thread blocking, significantly reducing effective throughput. Conversely, our method demonstrates \textit{sub-linear scalability} in this scenario. The latency at $N=64$ drops significantly to 1,100 ms—a $54\times$ speedup over the Baseline. This result validates the efficacy of the \textit{Request Collapse} mechanism. As $\sigma$ increases, the probability of multiple logical queries targeting the same physical byte ranges rises, allowing the scheduler to merge $N$ concurrent requests into a single physical I/O operation.
+A sharp performance divergence is observed as the overlap ratio increases to $\sigma=0.8$ (Fig.~\ref{fig:cc_exp1}(b)). The Baseline suffers from severe performance degradation, with latency spiking from 37,000 ms (at $\sigma=0.2$) to 60,000 ms (at $\sigma=0.8$) under peak load. This deterioration is attributed to the I/O blender effect and lock convoys: highly concurrent requests competing for the same index pages and disk blocks cause excessive disk seek thrashing and thread blocking, significantly reducing effective throughput. Conversely, our method demonstrates sub-linear scalability in this scenario. The latency at $N=64$ drops significantly to 1,100 ms—a $54\times$ speedup over the Baseline. This result validates the efficacy of the \textit{Request Collapse} mechanism. As $\sigma$ increases, the probability of multiple logical queries targeting the same physical byte ranges rises, allowing the scheduler to merge $N$ concurrent requests into a single physical I/O operation.
 The medium-overlap scenario (Fig.~\ref{fig:cc_exp1}(c)) serves as a transition point, where our method achieves a mean latency of approx. 6,000 ms at peak load, compared to 40,000 ms for the Baseline. This indicates that the system's efficiency scales dynamically with the degree of data contention. The experimental results demonstrate the workload-adaptive nature of the proposed architecture. While the system performs comparably to traditional approaches under dispersed workloads (limited by physical bandwidth), its advantages become order-of-magnitude significant in data-intensive, high-contention scenarios, effectively turning I/O contention into an opportunity for optimization.
@@ -782,7 +779,7 @@ For comparison, we benchmark against three representative tuning strategies:
 We first initiated a cold-start tuning session to evaluate how efficiently each method identifies high-quality configurations starting from a default, unoptimized state. Fig.~\ref{fig:tune_exp1}(a) reports the convergence trajectory of the best-observed latency over tuning steps.
-As illustrated in Fig.~\ref{fig:tune_exp1}(a), the three methods exhibit distinct search behaviors. The GA baseline demonstrates the slowest convergence. It exhibits a staircase-like descent with prolonged plateaus, requiring over 100 steps to reduce latency significantly. This sluggishness is attributed to its "blind" mutation mechanism, which lacks historical memory and repeatedly explores ineffective parameter spaces. The RL-based TunIO outperforms GA but still suffers from a slow start. While it eventually reaches a competitive latency ($\approx 277$ ms at step 140), its exploration phase is costly. The reinforcement learning agent requires a substantial number of interaction samples to learn the complex mapping between I/O parameters and reward signals. Our method achieves the fastest latency drop, rapidly decreasing from $500$ ms to a near-optimal zone ($\approx 315$ ms) within a short window. Unlike GA and TunIO, SA-GMAB leverages the surrogate model to pre-screen candidates. By effectively pruning unpromising configurations before they incur actual execution costs, SA-GMAB maximizes the information gain per step, making it particularly suitable for online scenarios where tuning overhead must be minimized.
+As illustrated in Fig.~\ref{fig:tune_exp1}(a), the three methods exhibit distinct search behaviors. The GA baseline demonstrates the slowest convergence. It exhibits a staircase-like descent with prolonged plateaus, requiring over 100 steps to reduce latency significantly. This sluggishness is attributed to its mutation mechanism, which lacks historical memory and repeatedly explores ineffective parameter spaces. The RL-based TunIO outperforms GA but still suffers from a slow start. While it eventually reaches a competitive latency ($\approx 277$ ms at step 140), its exploration phase is costly. The reinforcement learning agent requires a substantial number of interaction samples to learn the complex mapping between I/O parameters and reward signals. Our method achieves the fastest latency drop, rapidly decreasing from $500$ ms to a near-optimal zone ($\approx 315$ ms) within a short window. Unlike GA and TunIO, SA-GMAB leverages the surrogate model to pre-screen candidates. By effectively pruning unpromising configurations before they incur actual execution costs, SA-GMAB maximizes the information gain per step, making it particularly suitable for online scenarios where tuning overhead must be minimized.
 To strictly quantify the cost-effectiveness of the tuning process, we adopt the \textit{Return on Tuning Investment} (RoTI) metric proposed in TunIO \cite{Rajesh24TunIO}. We define the application performance $\mathcal{P}$ as the reciprocal of the query latency (i.e., $\mathcal{P} \propto 1/\mathcal{L}$). The RoTI metric is formalized as follows:
@@ -790,7 +787,7 @@ To strictly quantify the cost-effectiveness of the tuning process, we adopt the
 	\label{eq:roti}
 	RoTI(t) = \frac{\mathcal{P}_{achieved}(t) - \mathcal{P}_{initial}}{t},
 \end{equation}
-where $t$ denotes the cumulative tuning time (overhead). $\mathcal{P}_{initial} = 1 / \mathcal{L}_{0}$ represents the baseline performance derived from the default configuration, and $\mathcal{P}_{achieved}(t) = 1 / \mathcal{L}_{t}$ represents the maximum performance achieved up to time $t$. Functionally, this metric represents the "performance gain purchased per unit of tuning time." A higher RoTI value signifies that the optimizer rapidly identifies low-latency configurations with minimal computational overhead.
+where $t$ denotes the cumulative tuning time (overhead). $\mathcal{P}_{initial} = 1 / \mathcal{L}_{0}$ represents the baseline performance derived from the default configuration, and $\mathcal{P}_{achieved}(t) = 1 / \mathcal{L}_{t}$ represents the maximum performance achieved up to time $t$. Functionally, this metric represents the performance gain purchased per unit of tuning time. A higher RoTI value signifies that the optimizer rapidly identifies low-latency configurations with minimal computational overhead.
 Fig.~\ref{fig:tune_exp1}(b) plots the RoTI curves over time. Our method (SA-GMAB) reaches a remarkable RoTI peak ($\approx 100$) at the early stage ($t=825$). This indicates that SA-GMAB yields the highest immediate return on investment, successfully locating high-quality configurations when the tuning budget is strictly limited. In contrast, TunIO peaks at a significantly lower value ($\approx 68$), while GA remains flat and inefficient ($\approx 46$). This confirms that the surrogate-assisted mechanism effectively amplifies the value of each exploration step. All curves exhibit a decaying trend as time progresses ($t \rightarrow \infty$). This is expected behavior: as the system converges to the global optimum, the marginal performance gain ($\Delta \mathcal{P}$) saturates while the accumulated time $t$ continues to grow. Notably, SA-GMAB's RoTI decays faster in the late stages simply because it has already exhausted the potential for improvement much earlier than the baselines.
@@ -807,12 +804,12 @@ We further investigated the system's resilience in non-stationary environments.
 Fig.~\ref{fig:tune_exp3} illustrates the latency evolution before and after the shift. At $t=60$, the workload transition causes an immediate performance collapse across all methods, with latency spiking from a stable $\approx 50$ ms to $>300$ ms. This confirms that the configuration optimal for the previous phase is detrimental in the new environment. The GA-based method fails to adapt effectively. Post-shift, its latency hovers around $290-300$ ms. Lacking a mechanism to quickly reset or guide exploration, the genetic algorithm remains trapped in the local optima of the previous workload, exhibiting almost zero recovery within the observation window. TunIO manages to reduce latency but at a slow pace. It takes 40 steps to lower the latency from 308 ms to 134 ms ($t=100$). While the RL agent eventually learns the new reward function, the high sample complexity delays the recovery, leaving the system in a suboptimal state for a prolonged period. In contrast, SA-GMAB executes a decisive recovery. By leveraging the surrogate model to filter high-uncertainty candidates, it rapidly identifies the new optimal region. The latency drops to $\approx 88$ ms at $t=80$ and further stabilizes at $\approx 74$ ms at $t=100$.
 \section{Conclusions}\label{sec:Con}
-This paper presents a I/O-aware retrieval approach designed to bound retrieval latency and maximize throughput for large-scale spatio-temporal analytics. By introducing the "Index-as-an-Execution-Plan" paradigm, the dual-layer inverted index bridges the semantic gap between logical indexing and physical storage, effectively shifting the computational burden from retrieval time to ingestion time. To address the scalability challenges in concurrent environments, we developed a hybrid concurrency-aware I/O coordination protocol that adaptively switches between deterministic ordering and optimistic execution based on spatial contention. Furthermore, to handle the complexity of parameter configuration in fluctuating workloads, we integrated the SA-GMAB method for online automatic I/O tuning. The experimental results indicate that: (1) I/O-aware indexing achieves an order-of-magnitude latency reduction with negligible storage overhead; (2) the hybrid coordination protocol realizes a $54\times$ throughput improvement in high-overlap scenarios; and (3) the SA-GMAB method recovers from workload shifts $2\times$ faster than RL baselines while maximizing RoTI.
+This paper presents an I/O-aware retrieval approach designed to bound retrieval latency and maximize throughput for large-scale spatio-temporal analytics. By introducing the ``Index-as-an-Execution-Plan" paradigm, the dual-layer inverted index bridges the semantic gap between logical indexing and physical storage, effectively shifting the computational burden from retrieval time to ingestion time. To address the scalability challenges in concurrent environments, we developed a hybrid concurrency-aware I/O coordination protocol that adaptively switches between deterministic ordering and optimistic execution based on spatial contention. Furthermore, to handle the complexity of parameter configuration in fluctuating workloads, we integrated the SA-GMAB method for online automatic I/O tuning. The experimental results indicate that: (1) I/O-aware indexing achieves an order-of-magnitude latency reduction with negligible storage overhead; (2) the hybrid coordination protocol realizes a $54\times$ throughput improvement in high-overlap scenarios; and (3) the SA-GMAB method recovers from workload shifts $2\times$ faster than RL baselines while maximizing RoTI.
 \section*{Acknowledgments}
-This should be a simple paragraph before the References to thank those individuals and institutions who have supported your work on this article.
+This work is supported by the National Key R\&D Program of China ``Intergovernmental International Science and Technology Innovation Cooperation" (Grant No.2025YFE0107100).