rs-retrieval/mySkyline-v7.5.tex


%% bare_jrnl_compsoc.tex
%% V1.3
%% 2007/01/11
%% by Michael Shell
%% See:
%% http://www.michaelshell.org/
%% for current contact information.
%%
%% This is a skeleton file demonstrating the use of IEEEtran.cls
%% (requires IEEEtran.cls version 1.7 or later) with an IEEE Computer
%% Society journal paper.
%%
%% Support sites:
%% http://www.michaelshell.org/tex/ieeetran/
%% http://www.ctan.org/tex-archive/macros/latex/contrib/IEEEtran/
%% and
%% http://www.ieee.org/

%%*************************************************************************
%% Legal Notice:
%% This code is offered as-is without any warranty either expressed or
%% implied; without even the implied warranty of MERCHANTABILITY or
%% FITNESS FOR A PARTICULAR PURPOSE!
%% User assumes all risk.
%% In no event shall IEEE or any contributor to this code be liable for
%% any damages or losses, including, but not limited to, incidental,
%% consequential, or any other damages, resulting from the use or misuse
%% of any information contained here.
%%
%% All comments are the opinions of their respective authors and are not
%% necessarily endorsed by the IEEE.
%%
%% This work is distributed under the LaTeX Project Public License (LPPL)
%% ( http://www.latex-project.org/ ) version 1.3, and may be freely used,
%% distributed and modified. A copy of the LPPL, version 1.3, is included
%% in the base LaTeX documentation of all distributions of LaTeX released
%% 2003/12/01 or later.
%% Retain all contribution notices and credits.
%% ** Modified files should be clearly indicated as such, including  **
%% ** renaming them and changing author support contact information. **
%%
%% File list of work: IEEEtran.cls, IEEEtran_HOWTO.pdf, bare_adv.tex,
%%                    bare_conf.tex, bare_jrnl.tex, bare_jrnl_compsoc.tex
%%*************************************************************************

% *** Authors should verify (and, if needed, correct) their LaTeX system  ***
% *** with the testflow diagnostic prior to trusting their LaTeX platform ***
% *** with production work. IEEE's font choices can trigger bugs that do  ***
% *** not appear when using other class files.                            ***
% The testflow support page is at:
% http://www.michaelshell.org/tex/testflow/


% Note that the a4paper option is mainly intended so that authors in
% countries using A4 can easily print to A4 and see how their papers will
% look in print - the typesetting of the document will not typically be
% affected with changes in paper size (but the bottom and side margins will).
% Use the testflow package mentioned above to verify correct handling of
% both paper sizes by the user's LaTeX system.
%
% Also note that the "draftcls" or "draftclsnofoot", not "draft", option
% should be used if it is desired that the figures are to be displayed in
% draft mode.
%
% The Computer Society usually requires 10pt for submissions.
%
\documentclass[9pt,journal,cspaper,compsoc]{IEEEtran}
\linespread{0.885}
%
% If IEEEtran.cls has not been installed into the LaTeX system files,
% manually specify the path to it like:
% \documentclass[12pt,journal,compsoc]{../sty/IEEEtran}


% Some very useful LaTeX packages include:
% (uncomment the ones you want to load)


% *** MISC UTILITY PACKAGES ***
%
%\usepackage{ifpdf}
% Heiko Oberdiek's ifpdf.sty is very useful if you need conditional
% compilation based on whether the output is pdf or dvi.
% usage:
% \ifpdf
%   % pdf code
% \else
%   % dvi code
% \fi
% The latest version of ifpdf.sty can be obtained from:
% http://www.ctan.org/tex-archive/macros/latex/contrib/oberdiek/
% Also, note that IEEEtran.cls V1.7 and later provides a builtin
% \ifCLASSINFOpdf conditional that works the same way.
% When switching from latex to pdflatex and vice-versa, the compiler may
% have to be run twice to clear warning/error messages.


% *** CITATION PACKAGES ***
%
\ifCLASSOPTIONcompsoc
  % IEEE Computer Society needs nocompress option
  % requires cite.sty v4.0 or later (November 2003)
  % \usepackage[nocompress]{cite}
\else
  % normal IEEE
  % \usepackage{cite}
\fi
% cite.sty was written by Donald Arseneau
% V1.6 and later of IEEEtran pre-defines the format of the cite.sty package
% \cite{} output to follow that of IEEE. Loading the cite package will
% result in citation numbers being automatically sorted and properly
% "compressed/ranged". e.g., [1], [9], [2], [7], [5], [6] without using
% cite.sty will become [1], [2], [5]--[7], [9] using cite.sty. cite.sty's
% \cite will automatically add leading space, if needed. Use cite.sty's
% noadjust option (cite.sty V3.8 and later) if you want to turn this off.
% cite.sty is already installed on most LaTeX systems. Be sure and use
% version 4.0 (2003-05-27) and later if using hyperref.sty. cite.sty does
% not currently provide for hyperlinked citations.
% The latest version can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/cite/
% The documentation is contained in the cite.sty file itself.
%
% Note that some packages require special options to format as the Computer
% Society requires. In particular, Computer Society  papers do not use
% compressed citation ranges as is done in typical IEEE papers
% (e.g., [1]-[4]). Instead, they list every citation separately in order
% (e.g., [1], [2], [3], [4]). To get the latter we need to load the cite
% package with the nocompress option which is supported by cite.sty v4.0
% and later. Note also the use of a CLASSOPTION conditional provided by
% IEEEtran.cls V1.7 and later.


% *** GRAPHICS RELATED PACKAGES ***
%
\ifCLASSINFOpdf
  \usepackage[pdftex]{graphicx}

  % declare the path(s) where your graphic files are
  \graphicspath{{../pdf/}{../jpeg/}}
  % and their extensions so you won't have to specify these with
  % every instance of \includegraphics
  \DeclareGraphicsExtensions{.pdf,.jpeg,.png}
\else
  % or other class option (dvipsone, dvipdf, if not using dvips). graphicx
  % will default to the driver specified in the system graphics.cfg if no
  % driver is specified.
  \usepackage[dvips]{graphicx}

  % declare the path(s) where your graphic files are
  \graphicspath{{../eps/}}
  % and their extensions so you won't have to specify these with
  % every instance of \includegraphics
  \DeclareGraphicsExtensions{.eps}
\fi
% graphicx was written by David Carlisle and Sebastian Rahtz. It is
% required if you want graphics, photos, etc. graphicx.sty is already
% installed on most LaTeX systems. The latest version and documentation can
% be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/required/graphics/
% Another good source of documentation is "Using Imported Graphics in
% LaTeX2e" by Keith Reckdahl which can be found as epslatex.ps or
% epslatex.pdf at: http://www.ctan.org/tex-archive/info/
%
% latex, and pdflatex in dvi mode, support graphics in encapsulated
% postscript (.eps) format. pdflatex in pdf mode supports graphics
% in .pdf, .jpeg, .png and .mps (metapost) formats. Users should ensure
% that all non-photo figures use a vector format (.eps, .pdf, .mps) and
% not a bitmapped formats (.jpeg, .png). IEEE frowns on bitmapped formats
% which can result in "jaggedy"/blurry rendering of lines and letters as
% well as large increases in file sizes.
%
% You can find documentation about the pdfTeX application at:
% http://www.tug.org/applications/pdftex


% *** MATH PACKAGES ***
%
\usepackage[cmex10]{amsmath}
% A popular package from the American Mathematical Society that provides
% many useful and powerful commands for dealing with mathematics. If using
% it, be sure to load this package with the cmex10 option to ensure that
% only type 1 fonts will utilized at all point sizes. Without this option,
% it is possible that some math symbols, particularly those within
% footnotes, will be rendered in bitmap form which will result in a
% document that can not be IEEE Xplore compliant!
%
% Also, note that the amsmath package sets \interdisplaylinepenalty to 10000
% thus preventing page breaks from occurring within multiline equations. Use:
\interdisplaylinepenalty=2500
% after loading amsmath to restore such page breaks as IEEEtran.cls normally
% does. amsmath.sty is already installed on most LaTeX systems. The latest
% version and documentation can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/required/amslatex/math/


% *** SPECIALIZED LIST PACKAGES ***
%
\usepackage{algorithmic}
% algorithmic.sty was written by Peter Williams and Rogerio Brito.
% This package provides an algorithmic environment fo describing algorithms.
% You can use the algorithmic environment in-text or within a figure
% environment to provide for a floating algorithm. Do NOT use the algorithm
% floating environment provided by algorithm.sty (by the same authors) or
% algorithm2e.sty (by Christophe Fiorio) as IEEE does not use dedicated
% algorithm float types and packages that provide these will not provide
% correct IEEE style captions. The latest version and documentation of
% algorithmic.sty can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/algorithms/
% There is also a support site at:
% http://algorithms.berlios.de/index.html
% Also of interest may be the (relatively newer and more customizable)
% algorithmicx.sty package by Szasz Janos:
% http://www.ctan.org/tex-archive/macros/latex/contrib/algorithmicx/


% *** ALIGNMENT PACKAGES ***
%
\usepackage{array}
% Frank Mittelbach's and David Carlisle's array.sty patches and improves
% the standard LaTeX2e array and tabular environments to provide better
% appearance and additional user controls. As the default LaTeX2e table
% generation code is lacking to the point of almost being broken with
% respect to the quality of the end results, all users are strongly
% advised to use an enhanced (at the very least that provided by array.sty)
% set of table tools. array.sty is already installed on most systems. The
% latest version and documentation can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/required/tools/


%\usepackage{mdwmath}
%\usepackage{mdwtab}
% Also highly recommended is Mark Wooding's extremely powerful MDW tools,
% especially mdwmath.sty and mdwtab.sty which are used to format equations
% and tables, respectively. The MDWtools set is already installed on most
% LaTeX systems. The lastest version and documentation is available at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/mdwtools/


% IEEEtran contains the IEEEeqnarray family of commands that can be used to
% generate multiline equations as well as matrices, tables, etc., of high
% quality.

\usepackage{makecell}


\usepackage{eqparbox}
% Also of notable interest is Scott Pakin's eqparbox package for creating
% (automatically sized) equal width boxes - aka "natural width parboxes".
% Available at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/eqparbox/


% *** SUBFIGURE PACKAGES ***
\ifCLASSOPTIONcompsoc
\usepackage[tight,normalsize,sf,SF]{subfigure}
\else
\usepackage[tight,footnotesize]{subfigure}
\fi
% subfigure.sty was written by Steven Douglas Cochran. This package makes it
% easy to put subfigures in your figures. e.g., "Figure 1a and 1b". For IEEE
% work, it is a good idea to load it with the tight package option to reduce
% the amount of white space around the subfigures. Computer Society papers
% use a larger font and \sffamily font for their captions, hence the
% additional options needed under compsoc mode. subfigure.sty is already
% installed on most LaTeX systems. The latest version and documentation can
% be obtained at:
% http://www.ctan.org/tex-archive/obsolete/macros/latex/contrib/subfigure/
% subfigure.sty has been superceeded by subfig.sty.


%\ifCLASSOPTIONcompsoc
%  \usepackage[caption=false]{caption}
%  \usepackage[font=normalsize,labelfont=sf,textfont=sf]{subfig}
%\else
%  \usepackage[caption=false]{caption}
%  \usepackage[font=footnotesize]{subfig}
%\fi
% subfig.sty, also written by Steven Douglas Cochran, is the modern
% replacement for subfigure.sty. However, subfig.sty requires and
% automatically loads Axel Sommerfeldt's caption.sty which will override
% IEEEtran.cls handling of captions and this will result in nonIEEE style
% figure/table captions. To prevent this problem, be sure and preload
% caption.sty with its "caption=false" package option. This is will preserve
% IEEEtran.cls handing of captions. Version 1.3 (2005/06/28) and later
% (recommended due to many improvements over 1.2) of subfig.sty supports
% the caption=false option directly:
%\ifCLASSOPTIONcompsoc
%  \usepackage[caption=false,font=normalsize,labelfont=sf,textfont=sf]{subfig}
%\else
%  \usepackage[caption=false,font=footnotesize]{subfig}
%\fi
%
% The latest version and documentation can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/subfig/
% The latest version and documentation of caption.sty can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/caption/


% *** FLOAT PACKAGES ***
%
%\usepackage{fixltx2e}
% fixltx2e, the successor to the earlier fix2col.sty, was written by
% Frank Mittelbach and David Carlisle. This package corrects a few problems
% in the LaTeX2e kernel, the most notable of which is that in current
% LaTeX2e releases, the ordering of single and double column floats is not
% guaranteed to be preserved. Thus, an unpatched LaTeX2e can allow a
% single column figure to be placed prior to an earlier double column
% figure. The latest version and documentation can be found at:
% http://www.ctan.org/tex-archive/macros/latex/base/


%\usepackage{stfloats}
% stfloats.sty was written by Sigitas Tolusis. This package gives LaTeX2e
% the ability to do double column floats at the bottom of the page as well
% as the top. (e.g., "\begin{figure*}[!b]" is not normally possible in
% LaTeX2e). It also provides a command:
%\fnbelowfloat
% to enable the placement of footnotes below bottom floats (the standard
% LaTeX2e kernel puts them above bottom floats). This is an invasive package
% which rewrites many portions of the LaTeX2e float routines. It may not work
% with other packages that modify the LaTeX2e float routines. The latest
% version and documentation can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/sttools/
% Documentation is contained in the stfloats.sty comments as well as in the
% presfull.pdf file. Do not use the stfloats baselinefloat ability as IEEE
% does not allow \baselineskip to stretch. Authors submitting work to the
% IEEE should note that IEEE rarely uses double column equations and
% that authors should try to avoid such use. Do not be tempted to use the
% cuted.sty or midfloat.sty packages (also by Sigitas Tolusis) as IEEE does
% not format its papers in such ways.


%\ifCLASSOPTIONcaptionsoff
%  \usepackage[nomarkers]{endfloat}
% \let\MYoriglatexcaption\caption
% \renewcommand{\caption}[2][\relax]{\MYoriglatexcaption[#2]{#2}}
%\fi
% endfloat.sty was written by James Darrell McCauley and Jeff Goldberg.
% This package may be useful when used in conjunction with IEEEtran.cls'
% captionsoff option. Some IEEE journals/societies require that submissions
% have lists of figures/tables at the end of the paper and that
% figures/tables without any captions are placed on a page by themselves at
% the end of the document. If needed, the draftcls IEEEtran class option or
% \CLASSINPUTbaselinestretch interface can be used to increase the line
% spacing as well. Be sure and use the nomarkers option of endfloat to
% prevent endfloat from "marking" where the figures would have been placed
% in the text. The two hack lines of code above are a slight modification of
% that suggested by in the endfloat docs (section 8.3.1) to ensure that
% the full captions always appear in the list of figures/tables - even if
% the user used the short optional argument of \caption[]{}.
% IEEE papers do not typically make use of \caption[]'s optional argument,
% so this should not be an issue. A similar trick can be used to disable
% captions of packages such as subfig.sty that lack options to turn off
% the subcaptions:
% For subfig.sty:
% \let\MYorigsubfloat\subfloat
% \renewcommand{\subfloat}[2][\relax]{\MYorigsubfloat[]{#2}}
% For subfigure.sty:
% \let\MYorigsubfigure\subfigure
% \renewcommand{\subfigure}[2][\relax]{\MYorigsubfigure[]{#2}}
% However, the above trick will not work if both optional arguments of
% the \subfloat/subfig command are used. Furthermore, there needs to be a
% description of each subfigure *somewhere* and endfloat does not add
% subfigure captions to its list of figures. Thus, the best approach is to
% avoid the use of subfigure captions (many IEEE journals avoid them anyway)
% and instead reference/explain all the subfigures within the main caption.
% The latest version of endfloat.sty and its documentation can obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/endfloat/
%
% The IEEEtran \ifCLASSOPTIONcaptionsoff conditional can also be used
% later in the document, say, to conditionally put the References on a
% page by themselves.


% *** PDF, URL AND HYPERLINK PACKAGES ***
%
%\usepackage{url}
% url.sty was written by Donald Arseneau. It provides better support for
% handling and breaking URLs. url.sty is already installed on most LaTeX
% systems. The latest version can be obtained at:
% http://www.ctan.org/tex-archive/macros/latex/contrib/misc/
% Read the url.sty source comments for usage information. Basically,
% \url{my_url_here}.


% *** Do not adjust lengths that control margins, column widths, etc. ***
% *** Do not use packages that alter fonts (such as pslatex).         ***
% There should be no need to do such things with IEEEtran.cls V1.6 and later.
% (Unless specifically asked to do so by the journal or conference you plan
% to submit to, of course. )


% special characters
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}


% for algorithm
\usepackage[linesnumbered,lined,ruled]{algorithm2e}

\usepackage{enumerate}
\usepackage{amssymb}

\usepackage{xcolor}   % 加载颜色宏包

% correct bad hyphenation here
\hyphenation{op-tical net-works semi-conduc-tor}


\newcommand\toPrintComments{true}
\newcommand{\kol}[1]{
\ifthenelse{\equal{\toPrintComments}{true}}{
{\em#1}
}{}
}


\begin{document}
%
% paper title
% can use linebreaks \\ within to get better formatting as desired
\title{An I/O efficient approach for concurrent spatio-temporal range queries over large-scale remote sensing image data}
%
%
% author names and IEEE memberships
% note positions of commas and nonbreaking spaces ( ~ ) LaTeX will not break
% a structure at a ~ so this keeps an author's name from being broken across
% two lines.
% use \thanks{} to gain access to the first footnote area
% a separate \thanks must be used for each paragraph as LaTeX2e's \thanks
% was not built to handle multiple paragraphs
%
%
%\IEEEcompsocitemizethanks is a special \thanks that produces the bulleted
% lists the Computer Society journals use for "first footnote" author
% affiliations. Use \IEEEcompsocthanksitem which works much like \item
% for each affiliation group. When not in compsoc mode,
% \IEEEcompsocitemizethanks becomes like \thanks and
% \IEEEcompsocthanksitem becomes a line break with idention. This
% facilitates dual compilation, although admittedly the differences in the
% desired content of \author between the different types of papers makes a
% one-size-fits-all approach a daunting prospect. For instance, compsoc
% journal papers have the author affiliations above the "Manuscript
% received ..."  text while in non-compsoc journals this is reversed. Sigh.

%%\author{Michael~Shell,~\IEEEmembership{Member,~IEEE,}
%%        John~Doe,~\IEEEmembership{Fellow,~OSA,}
%%        and~Jane~Doe,~\IEEEmembership{Life~Fellow,~IEEE}% <-this % stops a space
%%\IEEEcompsocitemizethanks{\IEEEcompsocthanksitem M. Shell is with the Department
%%of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta,
%%GA, 30332.\protect\\
% note need leading \protect in front of \\ to get a newline within \thanks as
% \\ is fragile and will error, could use \hfil\break instead.
%%E-mail: see http://www.michaelshell.org/contact.html
%%\IEEEcompsocthanksitem J. Doe and J. Doe are with Anonymous University.}% <-this % stops a space
%%\thanks{}}


\author{Ze~Deng,
        Yue Wang,
        Tao Liu,
        Schahram Dustdar,\IEEEmembership{Fellow,~IEEE,}
        Rajiv Ranjan,
        Albert Zomaya, \IEEEmembership{Fellow,~IEEE,}
        Yizhi Liu
        and  Lizhe~Wang$^{\dagger}$, ~\IEEEmembership{Fellow,~IEEE,}

        % <-this % stops a space

\IEEEcompsocitemizethanks{
\IEEEcompsocthanksitem Z. Deng, L. Wang (Corresponding author, lizhe.Wang@gmail.com), Y. Wang, T. Liu, and Y. Liu are with the School of Computer Science, China University of Geosciences, Wuhan, 430078, P.R.China.

\IEEEcompsocthanksitem Z. Deng, and L. Wang (Corresponding author, lizhe.Wang@gmail.com) are also with Hubei Key Laboratory of Intelligent Geo-Information Processing, China University of Geosciences, Wuhan 430074, China.

\IEEEcompsocthanksitem Schahram Dustdar is with the Technische Universit$\ddot{a}$t Wien, Austria.

\IEEEcompsocthanksitem R. Ranjan is with School of Computing, Newcastle University, U.K.

\IEEEcompsocthanksitem A. Zomaya is with the School of Information Technologies, The University of Sydney, Sydney, Australia.


%%\IEEEcompsocthanksitem M. Shell is with the Department
%%of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta,
%%GA, 30332.\protect\\
% note need leading \protect in front of \\ to get a newline within \thanks as
% \\ is fragile and will error, could use \hfil\break instead.
%%%E-mail: see http://www.michaelshell.org/contact.html
%%\IEEEcompsocthanksitem J. Doe and J. Doe are with Anonymous %%University.
}% <-this % stops a space
\thanks{}
}

% note the % following the last \IEEEmembership and also \thanks -
% these prevent an unwanted space from occurring between the last author name
% and the end of the author line. i.e., if you had this:
%
% \author{....lastname \thanks{...} \thanks{...} }
%                     ^------------^------------^----Do not want these spaces!
%
% a space would be appended to the last name and could cause every name on that
% line to be shifted left slightly. This is one of those "LaTeX things". For
% instance, "\textbf{A} \textbf{B}" will typeset as "A B" not "AB". To get
% "AB" then you have to do: "\textbf{A}\textbf{B}"
% \thanks is no different in this regard, so shield the last } of each \thanks
% that ends a line with a % and do not let a space in before the next \thanks.
% Spaces after \IEEEmembership other than the last one are OK (and needed) as
% you are supposed to have spaces between the names. For what it is worth,
% this is a minor point as most people would not even notice if the said evil
% space somehow managed to creep in.


% The paper headers
\markboth{IEEE Transactions on Computers,~Vol.~XX, No.~X, January~2014}%
{Shell \MakeLowercase{\textit{et al.}}: Bare Demo of IEEEtran.cls for Computer Society Journals}
% The only time the second header will appear is for the odd numbered pages
% after the title page when using the twoside option.
%
% *** Note that you probably will NOT want to include the author's ***
% *** name in the headers of peer review papers.                   ***
% You can use \ifCLASSOPTIONpeerreview for conditional compilation here if
% you desire.


% The publisher's ID mark at the bottom of the page is less important with
% Computer Society journal papers as those publications place the marks
% outside of the main text columns and, therefore, unlike regular IEEE
% journals, the available text space is not reduced by their presence.
% If you want to put a publisher's ID mark on the page you can do it like
% this:
%\IEEEpubid{0000--0000/00\$00.00~\copyright~2007 IEEE}
% or like this to get the Computer Society new two part style.
%\IEEEpubid{\makebox[\columnwidth]{\hfill 0000--0000/00/\$00.00~\copyright~2007 IEEE}%
%\hspace{\columnsep}\makebox[\columnwidth]{Published by the IEEE Computer Society\hfill}}
% Remember, if you use this you must call \IEEEpubidadjcol in the second
% column for its text to clear the IEEEpubid mark (Computer Society jorunal
% papers don't need this extra clearance.)


% for Computer Society papers, we must declare the abstract and index terms
% PRIOR to the title within the \IEEEcompsoctitleabstractindextext IEEEtran
% command as these need to go into the title area created by \maketitle.
\IEEEcompsoctitleabstractindextext{%
\begin{abstract}
%\boldmath
High-performance remote sensing analytics workflows require ingesting and querying massive image archives to support real-time spatio-temporal applications. While modern systems utilize window-based I/O reading to reduce data transfer, they face a dual bottleneck: the prohibitive overhead of runtime geospatial computations caused by the decoupling of logical indexing from physical storage, and severe storage-level I/O contention triggered by uncoordinated concurrent reads. To address these limitations, we present a comprehensive I/O-aware query processing approach based on a novel "Index-as-an-Execution-Plan" paradigm. We introduce a dual-layer inverted structure that serves as a deterministic I/O planner, pre-materializing grid-to-pixel mappings to completely eliminate runtime geometric calculations. Furthermore, we design a hybrid concurrency-aware I/O coordination protocol that adaptively integrates Calvin-style deterministic ordering with optimistic execution, effectively converting I/O contention into request merging opportunities. To handle fluctuating workloads, we incorporate a Surrogate-Assisted Genetic Multi-Armed Bandit mechanism for automatic parameter tuning. Evaluated on a distributed cluster with Sentinel-2 datasets, our approach reduces end-to-end latency by an order of magnitude compared to standard window-based reading, achieves linear throughput scaling under high concurrency, and demonstrates superior convergence speed in automatic tuning.

\end{abstract}
% IEEEtran.cls defaults to using nonbold math in the Abstract.
% This preserves the distinction between vectors and scalars. However,
% if the journal you are submitting to favors bold math in the abstract,
% then you can use LaTeX's standard command \boldmath at the very start
% of the abstract to achieve this. Many IEEE journals frown on math
% in the abstract anyway. In particular, the Computer Society does
% not want either math or citations to appear in the abstract.

% Note that keywords are not normally used for peer review papers.
\begin{keywords}
Remote sensing data management, Spatio-temporal range queries, I/O-aware indexing, Concurrency control, I/O tuning
\end{keywords}}


% make the title area
\maketitle


% To allow for easy dual compilation without having to reenter the
% abstract/keywords data, the \IEEEcompsoctitleabstractindextext text will
% not be used in maketitle, but will appear (i.e., to be "transported")
% here as \IEEEdisplaynotcompsoctitleabstractindextext when compsoc mode
% is not selected <OR> if conference mode is selected - because compsoc
% conference papers position the abstract like regular (non-compsoc)
% papers do!
\IEEEdisplaynotcompsoctitleabstractindextext
% \IEEEdisplaynotcompsoctitleabstractindextext has no effect when using
% compsoc under a non-conference mode.


% For peer review papers, you can put extra information on the cover
% page as needed:
% \ifCLASSOPTIONpeerreview
% \begin{center} \bfseries EDICS Category: 3-BBND \end{center}
% \fi
%
% For peerreview papers, this IEEEtran command inserts a page break and
% creates the second title. It will be ignored for other modes.
\IEEEpeerreviewmaketitle


\section{Introduction}
% Computer Society journal papers do something a tad strange with the very
% first section heading (almost always called "Introduction"). They place it
% ABOVE the main text! IEEEtran.cls currently does not do this for you.
% However, You can achieve this effect by making LaTeX jump through some
% hoops via something like:
%
%\ifCLASSOPTIONcompsoc
%  \noindent\raisebox{2\baselineskip}[0pt][0pt]%
%  {\parbox{\columnwidth}{\section{Introduction}\label{sec:introduction}%
%  \global\everypar=\everypar}}%
%  \vspace{-1\baselineskip}\vspace{-\parskip}\par
%\else
%  \section{Introduction}\label{sec:introduction}\par
%\fi
%
% Admittedly, this is a hack and may well be fragile, but seems to do the
% trick for me. Note the need to keep any \label that may be used right
% after \section in the above as the hack puts \section within a raised box.


% The very first letter is a 2 line initial drop letter followed
% by the rest of the first word in caps (small caps for compsoc).
%
% form to use if the first word consists of a single letter:
% \IEEEPARstart{A}{demo} file is ....
%
% form to use if you need the single drop letter followed by
% normal text (unknown if ever used by IEEE):
% \IEEEPARstart{A}{}demo file is ....
%
% Some journals put the first two words in caps:
% \IEEEPARstart{T}{his demo} file is ....
%
% Here we have the typical use of a "T" for an initial drop letter
% and "HIS" in caps to complete the first word.
%%\IEEEPARstart{T}{his} demo file is intended to serve as a ``starter file''
%%for IEEE Computer Society journal papers produced under \LaTeX\ using
%%IEEEtran.cls version 1.7 and later.
% You must have at least 2 lines in the paragraph with the drop letter
% (should never be an issue)
%%I wish you the best of success.

%%\hfill mds

%%\hfill January 11, 2007
\IEEEPARstart{A} massive amount of remote sensing (RS) data, characterized by high spatial, temporal, and spectral resolutions, is being generated at an unprecedented speed due to the rapid advancement of Earth observation missions \cite{Ma15RS_bigdata}. For instance, NASA’s AVIRIS-NG acquires nearly 9 GB of data per hour, while the EO-1 Hyperion sensor generates over 1.6 TB daily \cite{Haut21DDL_RS}. Beyond the sheer volume of data, these datasets are increasingly subjected to intensive concurrent access from global research communities and real-time emergency response systems (e.g., multi-departmental coordination during natural disasters). Consequently, modern RS platforms are required to provide not only massive storage capacity but also high-throughput query capabilities to satisfy the simultaneous demands of numerous spatio-temporal analysis tasks.

\par
Existing RS data management systems \cite{LEWIS17datacube, Yan21RS_manage1, liu24mstgi} typically decompose a spatio-temporal range query into a decoupled two-phase execution model. The first phase is the metadata filtering phase, which utilizes spatio-temporal metadata (e.g., footprints, timestamps) to identify candidate image files that intersect the query predicate. Recent advancements have transitioned from traditional tree-based indexes \cite{Strobl08PostGIS, Simoes16PostGIST} to scalable distributed schemes based on grid encodings and space-filling curves, such as GeoHash \cite{suwardi15geohash}, GeoSOT \cite{Yan21RS_manage1}, and GeoMesa \cite{hughes15geomesa}. By leveraging these high-dimensional indexing structures, the search complexity of the first phase has been effectively reduced to $O(\log N)$ or even $O(1)$, making metadata discovery extremely efficient even for billion-scale datasets.

\par
The second phase is the data extraction phase, where the system reads the actual pixel data from the identified raw image files stored in distributed file systems or object stores. A critical observation in modern high-performance RS analytics is that the primary system bottleneck has fundamentally shifted from the first phase to the second. While the metadata search completes in milliseconds, the end-to-end query latency is now dominated by the massive I/O overhead required to fetch, decompress, and process large-scale raw images. Traditional systems attempted to reduce I/O overhead by pre-slicing tiles and building pyramids (e.g., approaches used in Google Earth Engine \cite{gorelick17GEE} that store metadata in HBase and serve pre-tiled image pyramids), but aggressive tiling increases management complexity and produces many small files. More recent Cloud-Optimized GeoTIFF (COG) formats and COG-aware frameworks \cite{LEWIS17datacube}, \cite{riotiler25riotiler} exploit internal overviews and window-based I/O to read only the portions of files that spatially intersect a query. 

While window-based I/O effectively reduces raw data transfer, it introduces a new "computation wall" due to the decoupling of logical indexing from physical storage. Current state-of-the-art systems operate on a "Search-then-Compute-then-Read" model: after identifying candidate files, they must perform fine-grained, per-image geospatial computations at runtime to map query coordinates to precise file offsets and clip boundaries. This runtime geometric resolution ($C_{geo}$) becomes computationally prohibitive when processing a large volume of candidate images, often negating the benefits of I/O reduction. Moreover, under concurrent workloads, the lack of coordination among these independent read requests leads to severe I/O contention and storage thrashing, rendering traditional indexing-centric optimizations insufficient for real-time applications.

To address the problems above, we propose a novel "Index-as-an-Execution-Plan" paradigm to strictly bound the query latency. Unlike conventional approaches that treat indexing and I/O execution as separate stages, our approach integrates fine-grained partial querying directly into the indexing structure. By pre-materializing the mapping between logical spatial grids and physical pixel windows, our system enables deterministic I/O planning without runtime geometric computation. To further ensure scalability, we introduce a concurrency control protocol tailored for spatio-temporal range queries and an automatic I/O tuning mechanism. The principal contributions of this paper are summarized as follows:

\begin{enumerate} 
	\item We propose an I/O-aware "Index-as-an-Execution-Plan" schema. Instead of merely returning candidate image identifiers, our index directly translates high-level spatio-temporal predicates into concrete, byte-level windowed read plans. This design bridges the semantic gap between logical queries and physical storage, eliminating expensive runtime geospatial computations and ensuring that I/O cost is proportional strictly to the query footprint.

	\item We propose a hybrid concurrency-aware I/O coordination protocol. This protocol adapts transaction processing principles by integrating Calvin-style deterministic ordering \cite{Thomson12Calvin} with optimistic execution \cite{Lim17OCC}. It shifts the focus from protecting database rows to coordinating shared I/O flows. This protocol dynamically switches strategies based on spatial contention, effectively converting "I/O contention" into "request merging opportunities."

	\item We proposed an automatic I/O tuning method to improve the I/O performance of spatio-temporal range queries over remote sensing data. The method extends an existing AI-powered I/O tuning framework \cite{Rajesh24TunIO} based on a surrogate-assisted genetic multi-armed bandits algorithm \cite{Preil25GMAB}.
\end{enumerate}

\par
The remainder of this paper is organized as follows:
Section~\ref{sec:RW} presents the related work.
Section~\ref{sec:DF} proposes the definition concerning the spatio-temporal range query problem.
Section~\ref{sec:Index} proposes the indexing structre.
Section~\ref{sec:CC} proposes the hybrid concurrency control protocol.
Section~\ref{sec:Tuning} proposes the method of I/O stack tuning.
Section~\ref{sec:EXP}  presents the experiments and results.
Section~\ref{sec:Con} concludes this paper with a summary.

\section{Related Work}\label{sec:RW}
This section describes the most salient studies of I/O-efficient spatio-temporal query processing, concurrency control and I/O Performance Tuning.

\subsection{I/O-Efficient Spatio-Temporal Query Processing}
Efficient spatio-temporal query processing for remote sensing data has been extensively studied, with early efforts primarily focusing on metadata organization and index-level pruning in relational database systems. Traditional approaches typically extend tree-based spatial indexes, such as R-tree \cite{Strobl08PostGIS}, quadtree \cite{Tang12Quad-Tree}, and their spatio-temporal variants \cite{Simoes16PostGIST}, to organize image footprints together with temporal attributes, and are commonly implemented on relational backends (e.g., MySQL and PostgreSQL). These methods provide efficient range filtering for moderate-scale datasets, but their reliance on balanced tree structures often leads to high maintenance overhead and limited scalability as the volume of remote sensing metadata grows rapidly. With the continuous increase in data volume and ingestion rate, recent systems have gradually shifted toward grid-based spatio-temporal indexing schemes deployed on distributed NoSQL stores. By encoding spatial footprints into uniform spatial grids using GeoHash \cite{suwardi15geohash}, GeoSOT \cite{Yan21RS_manage1}, or space-filling curves \cite{hughes15geomesa}, \cite{liu24mstgi}, and combining them with temporal identifiers, these approaches enable lightweight index construction and better horizontal scalability on backends such as HBase and Elasticsearch. Such grid-based indexes can effectively reduce the candidate search space through coarse-grained pruning and are more suitable for large-scale, continuously growing remote sensing archives. 

\par
However, index pruning alone is insufficient to guarantee end-to-end query efficiency for remote sensing workloads, where individual images are usually large and query results require further pixel-level processing. To reduce the amount of raw I/O, Google Earth system \cite{gorelick17GEE} rely on tiling and multi-resolution pyramids that physically split images into small blocks. While more recent solutions leverage COG and window-based I/O to enable partial reads from monolithic image files. Frameworks such as OpenDataCube \cite{LEWIS17datacube} exploit these features to read only the image regions intersecting a query window, thereby reducing unnecessary data transfer. Nevertheless, after candidate images are identified, most systems still perform fine-grained geospatial computations for each image, including coordinate transformations and precise pixel-window derivation, which may incur substantial overhead when many images are involved.

\subsection{Concurrency Control}
Concurrency control has long been studied to provide correctness and high throughput in multi-user database and storage systems, with two broad paradigms dominating the literature: deterministic scheduling \cite{Thomson12Calvin} and non-deterministic schemes \cite{Bernstein812PL}, \cite{KungR81OCC}. Hybrid approaches \cite{WangK16MVOCC}, \cite{Hong25HDCC} that adaptively combine these paradigms seek to exploit the low-conflict efficiency of deterministic execution while retaining the flexibility of optimistic techniques. More recent proposals such as OOCC target read-heavy, disaggregated settings by reducing validation and round-trips for read-only transactions, achieving low latency under OLTP-like workloads \cite{Wu25OOCC}. These CC families are primarily optimized for record- or key-level access patterns: their metrics and designs emphasize transaction latency, abort rates, and throughput under workloads with small, well-defined read/write sets.

\par
Overall, existing concurrency control mechanisms are largely designed around transaction-level correctness and throughput, assuming record- or key-based access patterns and treating storage I/O as a black box. Their optimization objectives rarely account for I/O amplification or fine-grained storage contention induced by concurrent range queries. Consequently, these approaches are ill-suited for data-intensive spatio-temporal workloads, where coordinating overlapping window reads and mitigating storage-level interference are critical to achieving scalable performance under multi-user access.

\subsection{I/O Performance Tuning in Storage Systems}
I/O performance tuning has been extensively studied in the context of HPC and data-intensive storage systems, where complex multi-layer I/O stacks expose a large number of tunable parameters. These parameters span different layers, including application-level I/O libraries, middleware, and underlying storage systems, and their interactions often lead to highly non-linear performance behaviors. As a result, manual tuning is time-consuming and error-prone, motivating a wide range of auto-tuning approaches.

\par
Several studies focus on improving the efficiency of the tuning pipeline itself by reformulating the search space or optimization objectives. Chen et al. \cite{Chen21Tuning1} proposed a meta multi-objectivization (MMO) model that introduces auxiliary performance objectives to mitigate premature convergence to local optima. While such techniques can improve optimization robustness, they are largely domain-agnostic and do not explicitly account for the characteristics of I/O-intensive workloads. Other works, such as the contextual bandit-based approach by Bez et al. \cite{Bez20TuningLayer}, optimize specific layers of the I/O stack (e.g., I/O forwarding) by exploiting observed access patterns. However, these methods are primarily designed for administrator-level tuning and target isolated components rather than end-to-end application I/O behavior.

\par
User-level I/O tuning has also been explored, most notably by H5Tuner \cite{Behzad13HDF5}, which employs genetic algorithms to optimize the configuration of the HDF5 I/O library. Although effective for single-layer tuning, H5Tuner does not consider cross-layer interactions and lacks mechanisms for reducing tuning cost, such as configuration prioritization or early stopping.

\par
More recently, TunIO \cite{Rajesh24TunIO} proposed an AI-powered I/O tuning framework that explicitly targets the growing configuration spaces of modern I/O stacks. TunIO integrates several advanced techniques, including I/O kernel extraction, smart selection of high-impact parameters, and reinforcement learning–driven early stopping, to balance tuning cost and performance gain across multiple layers. Despite its effectiveness, TunIO and related frameworks primarily focus on single-application or isolated workloads, assuming stable access patterns during tuning. Query-level I/O behaviors, such as fine-grained window access induced by spatio-temporal range queries, as well as interference among concurrent users, are generally outside the scope of existing I/O tuning approaches.

\section{Definition}\label{sec:DF}
This section formalizes the spatio-temporal range query problem and establishes the cost models for query execution. We assume a distributed storage environment where large-scale remote sensing images are stored as objects or files.

\par
Definition~1 (Spatio-temporal Remote Sensing Image). A remote sensing image $R$ is defined as a tuple:
\vspace{-0.05in}
\begin{equation}
	\label{eqn:pre_rs}
	R=\langle id, \Omega, \mathcal{D}, t \rangle,
\end{equation}
where $id$ is the unique identifier; $\Omega = [0, W] \times [0, H]$ denotes the pixel coordinate space; $\mathcal{D}$ represents the raw pixel data; and $t$ is the temporal validity interval. The image is associated with a spatial footprint $MBR(R)$ in the global coordinate reference system.

\par
Definition 2 (Spatio-temporal Range Query). Given a dataset $\mathbb{R}$, a query $Q$ is defined by a spatio-temporal predicate $Q = \langle S, T \rangle$, where $S$ is the spatial bounding box and $T$ is the time interval. The query result set $\mathcal{R}_Q$ is defined as:
\vspace{-0.05in}
\begin{equation}
	\label{eqn:pre_st_query}
	\mathcal{R}_Q=R\in \mathbb{R}\mid MBR\left( R \right) \cap S\ne \emptyset \land R.t\cap T\ne \emptyset .
\end{equation}

\par
For each $R \in \mathcal{R}_Q$, the system must return the pixel matrix corresponding to the intersection region $MBR(R) \cap S$.

\par
Definition 3 (Query Execution Cost Model). The execution latency of a query $Q$, denoted as $Cost(Q)$, is composed of two phases: metadata filtering and data extraction.
\begin{equation}
	\label{eqn:cost_total}
	Cost\left( Q \right) =C_{meta}\left( Q \right) +\sum_{R\in \mathcal{R}_Q}{\left( C_{geo}\left( R,Q \right) +C_{io}\left( R,Q \right) \right)}.
\end{equation}

\par
Here, $C_{meta}(Q)$ is the cost of identifying candidate images $\mathcal{R}_Q$ using indices. The data extraction cost for each image consists of two components: geospatial computation cost ($C_{geo}$) and I/O access cost ($C_{io}$). $C_{geo}$ is the CPU time required to calculate the pixel-to-geographic mapping, determine the exact read windows (offsets and lengths), and handle boundary clipping. In window-based partial reading schemes, this cost is non-negligible due to the complexity of coordinate transformations. $C_{io}$ is the latency to fetch the actual binary data from storage.

\par
Definition ~4 (Concurrent Spatio-temporal Queries). Let $\mathcal{Q} = \{Q_1, Q_2, \ldots, Q_N\}$ denote a set of spatio-temporal range queries issued concurrently by multiple users.
Each query $Q_i$ independently specifies a spatio-temporal window $\langle S_i, T_i \rangle$ and may overlap with others in both spatial and temporal dimensions. Concurrent execution of $\mathcal{Q}$ may induce overlapping partial reads over the same images or image regions, leading to redundant I/O and storage-level contention if queries are processed independently.

\par
\textbf{Problem Statement (Latency-Optimized Concurrent Query Processing).} Given a dataset $\mathbb{R}$ and a concurrent workload $\mathcal{Q}$, the objective is to minimize the total execution latency:
\vspace{-0.05in}
\begin{equation}
	\label{eqn_pre_objective}
	\min \sum_{Q_i\in \mathcal{Q}}{\left( C_{meta}\left( Q_i \right) +\sum_{R\in \mathcal{R}_{Q_i}}{\left( C_{geo}\left( R,Q_i \right) +C_{io}\left( R,Q_i \right) \right)} \right)},
\end{equation}
subject to:
\begin{enumerate}
	\item \textit{Correctness:} The returned data must strictly match the spatio-temporal predicate defined in Eq. (\ref{eqn:pre_st_query}).
	\item \textit{Isolation:} Concurrent reads must effectively share I/O bandwidth without causing starvation or excessive thrashing.
\end{enumerate}

\section{I/O-aware Indexing stucture}\label{sec:Index}
This section introduces the details of indexing structre for spatio-temporal range query over remote sensing image data.

\begin{figure*}[htb]
	\centering
	\includegraphics[width=0.90\textwidth]{fig/index.png}
	\caption{Index schema design.}
	\label{fig:index}
\end{figure*}

\subsection{Index schema design}
\par
To enable I/O-efficient spatio-temporal query processing, we first decompose the global spatial domain into a uniform grid that serves as the basic unit for query pruning and data access coordination. Specifically, we adopt a fixed-resolution global tiling scheme based on the Web Mercator (or EPSG:4326) coordinate system, using zoom level 14 to partition the Earth’s surface into fine-grained grid cells (experiments show that the 14-level grid has the highest indexing efficiency which can be referred to Section~\ref{sec:Index_exp_3}). This resolution strikes a practical balance between spatial selectivity and index size: finer levels would significantly increase metadata volume and maintenance cost, while coarser levels would reduce pruning effectiveness and lead to unnecessary image I/O. At this scale, each grid cell typically corresponds to a spatial extent comparable to common query footprints and to the internal tiling granularity used by modern raster formats, making it well suited for partial data access.

\par
\textbf{Grid-to-Image Mapping (G2I).} 
Based on the grid decomposition, we construct a grid-centric inverted index to associate spatial units with covering images. In our system, each grid cell is assigned a unique \emph{GridKey}, encoded as a 64-bit Z-order value to preserve spatial locality and enable efficient range scans in key-value stores such as HBase. The \emph{G2I table} stores one row per grid cell, where the row key is the GridKey and the value maintains the list of image identifiers (ImageKeys) whose spatial footprints intersect the corresponding cell, as illustrated in Fig.~\ref{fig:index}(a).

\par
This grid-to-image mapping allows query processing to begin with a lightweight enumeration of grid cells covered by a query region, followed by direct lookups of candidate images via exact GridKey matches. By treating each grid cell as an independent spatial bucket, the G2I table provides efficient metadata-level pruning and avoids costly geometric intersection tests over large image footprints.

\par
However, the G2I table alone is insufficient for I/O-efficient query execution. While it identifies which images are relevant to a given grid cell, it does not capture how the grid cell maps to pixel regions within each image. As a result, a grid-only representation cannot directly guide partial reads and would still require per-image geospatial computations at query time. Therefore, the G2I table functions as a coarse spatial filter and must be complemented by an image-centric structure that materializes the correspondence between grid cells and pixel windows, enabling fine-grained, window-based I/O.

\par
\textbf{Image-to-Grid Mapping (I2G).}
To complement the grid-centric G2I table and enable fine-grained, I/O-efficient data access, we introduce an image-centric inverted structure, referred to as the Image-to-Grid mapping (I2G). In contrast to G2I, which organizes metadata by spatial grids, the I2G table stores all grid-level access information of a remote sensing image in a single row. Each image therefore occupies exactly one row in the table, significantly improving locality during query execution.

\par
As illustrated in Fig.~\ref{fig:index}(b), the row key of the I2G table is the \emph{ImageKey}, i.e., the unique identifier of a remote sensing image. The row value is organized into three column families, each serving a distinct role in query-time pruning and I/O coordination:

\par
\textit{Grid–Window Mapping.}
This column family records the list of grid cells intersected by the image together with their corresponding pixel windows in the image coordinate space. Each entry has the form
\[
\langle \textit{GridKey}, W_{ImageKey\_GridKey} \rangle,
\]
where \textit{GridKey} identifies a grid cell at the chosen global resolution, and $W_{ImageKey\_GridKey}$ denotes the minimal pixel bounding rectangle within the image that exactly covers that grid cell.

\par
These precomputed window offsets allow the query executor to directly issue windowed reads on large raster files without loading entire images into memory or recomputing geographic-to-pixel transformations at query time. As a result, grid cells become the smallest unit of coordinated I/O, enabling precise partial reads and effective elimination of redundant disk accesses.

\par
\textit{Temporal Metadata.}
To support spatio-temporal range queries, each image row includes a lightweight temporal column family that stores its acquisition time information, such as the sensing timestamp or time interval. This metadata enables efficient temporal filtering to be performed jointly with spatial grid matching, without consulting external catalogs or secondary indexes.

\par
\textit{Storage Pointer.}
This column family contains the information required to retrieve image data from the underlying storage system. It stores a stable file identifier, such as an object key in an object store (e.g., MinIO/S3) or an absolute path in a POSIX-compatible file system. By decoupling logical image identifiers from physical storage locations, this design supports flexible deployment across heterogeneous storage backends while allowing the query engine to directly access image files once relevant pixel windows have been identified.

\par
The I2G table offers several advantages. First, all grid-level access information for the same image is colocated in a single row, avoiding repeated random lookups and improving cache locality during query execution. Second, by materializing grid-to-window correspondences at ingestion time, the system completely avoids expensive per-query geometric computations and directly translates spatial overlap into byte-range I/O requests. Third, the number of rows in the I2G table scales with the number of images rather than the number of grid cells, substantially reducing metadata volume and maintenance overhead.

\par
During data ingestion, the grid–window mappings are generated by projecting grid boundaries into the image coordinate system using the image’s georeferencing parameters. This process requires only lightweight affine or RPC transformations and does not involve storing explicit geometries or performing polygon clipping. As a result, the I2G structure enables efficient partial reads while keeping metadata compact and ingestion costs manageable.

\subsection{Query-time Execution}

\begin{figure}
	\centering
	\includegraphics[width=2.2in]{fig/st-query.png}
	\caption{Query-time Execution}
	\label{fig_ST_Query}
\end{figure}

The I/O-aware index enables efficient spatio-temporal range queries by directly translating query predicates into windowed read plans, while avoiding both full-image loading and expensive geometric computations. Given a user-specified spatio-temporal query
$q = \langle [x_{\min}, y_{\min}, x_{\max}, y_{\max}], [t_s, t_e] \rangle$,
the system resolves the query through three consecutive stages: \emph{Grid Enumeration}, \emph{Candidate Image Retrieval with Temporal Pruning}, and \emph{Windowed Read Plan Generation}. As illustrated in Fig.~\ref{fig_ST_Query}, this execution pipeline bridges high-level query predicates and low-level I/O operations in a fully deterministic manner.

\par
\textbf{Grid Enumeration.}
As shown in Step~1 and Step~2 of Fig.~\ref{fig_ST_Query}, the query execution starts by rasterizing the spatial footprint of $q$ into the fixed global grid at zoom level 14. Instead of performing recursive space decomposition as in quadtrees or hierarchical spatial indexes, our system enumerates the minimal set of grid cells
$\{g_1, \ldots, g_k\}$
whose footprints intersect the query bounding box.

\par
Each grid cell corresponds to a unique 64-bit \textit{GridKey}, which directly matches the primary key of the G2I table. This design has important implications: grid enumeration has constant depth and low computational cost and the resulting GridKeys can be directly used as lookup keys without any geometric refinement. Consequently, spatial key generation is reduced to simple arithmetic operations on integer grid coordinates.

\par
\textbf{Candidate Image Retrieval with Temporal Pruning.}
Given the enumerated grid set $\{g_1, \ldots, g_k\}$, the query processor performs a batched multi-get on the G2I table. Each G2I row corresponds to a single grid cell and stores the identifiers of all images whose spatial footprints intersect that cell. For each grid $g_i$, the lookup returns:
\[
G2I[g_i] = \{ imgKey_1, \ldots, imgKey_m \}.
\]

\par
All retrieved image identifiers are unioned to form the spatial candidate set
$C_s = \bigcup_{i=1}^{k} G2I[g_i]$.
This step eliminates the need for per-image polygon intersection tests that are commonly required in spatial databases and data cube systems.

\par
To incorporate the temporal constraint $[t_s, t_e]$, each candidate image in $C_s$ is further filtered using the temporal column family of the Image-to-Grid (I2G) table. Images whose acquisition time does not intersect the query interval are discarded early, yielding the final candidate set $C$. This lightweight temporal pruning is performed without accessing any image data and introduces negligible overhead.

\par
\textbf{Windowed Read Plan Generation.}
As shown in Step~3 of Fig.~\ref{fig_ST_Query}, the final stage translates the candidate image set into a concrete I/O plan. For each image $I \in C$, the query executor issues a selective range-get on the I2G table to retrieve only the grid–window mappings relevant to the query grids:

\begin{equation}
	\label{eqn_pre_spatial_query}
I2G\left[ I,\{g_1,...,g_k\} \right] =\left\{ W_{I\_g_i}\mid g_i\cap I\ne \emptyset \right\} .
\end{equation}

\par
Each $W_{I\_g_i}$ specifies the exact pixel window in the original raster file that corresponds to grid cell $g_i$. Since these window offsets are precomputed during ingestion, query execution requires only key-based lookups and arithmetic filtering. No geographic coordinate transformation, polygon clipping, or raster–vector intersection is performed at query time.

\par
The resulting collection of pixel windows constitutes a \emph{windowed read plan}, which can be directly translated into byte-range I/O requests against the storage backend. This approach avoids loading entire scenes and ensures that the total I/O volume is proportional to the queried spatial extent rather than the image size.

\subsection{Why I/O-aware}
The key reason our indexing design is I/O-aware lies in the fact that the index lookup results are not merely candidate identifiers, but constitute a concrete I/O access plan. Unlike traditional spatial indexes, where query processing yields a set of objects that must still be fetched through opaque storage accesses, our Grid-to-Image and Image-to-Grid lookups deterministically produce the exact pixel windows to be read from disk. As a result, the logical query plan and the physical I/O plan are tightly coupled: resolving a spatio-temporal predicate directly specifies which byte ranges should be accessed and which can be skipped.

\par
This tight coupling fundamentally changes the optimization objective. Instead of minimizing index traversal cost or result-set size, the system explicitly minimizes data movement by ensuring that disk I/O is proportional to the query’s spatio-temporal footprint. Consequently, the index serves as an execution-aware abstraction that bridges query semantics and storage behavior, enabling predictable, bounded I/O under both single-query and concurrent workloads.

\par
\textbf{Theoretical Cost Analysis.}
To rigorously quantify the performance advantage, we revisit the query cost model defined in Eq. (\ref{eqn:cost_total}):
\begin{equation*}
	Cost(Q) = C_{meta}(Q) + \sum_{R \in \mathcal{R}_Q} \left( C_{geo}(R, Q) + C_{io}(R, Q) \right).
\end{equation*}

\par
In traditional full-image reading systems, although the geospatial computation cost is negligible ($C_{geo} = 0$) as no clipping is performed, the I/O cost $C_{io}$ is determined by the full file size. Consequently, the total latency is entirely dominated by massive I/O overhead, rendering $C_{meta}$ (typically milliseconds) irrelevant.

\par
Existing window-based I/O systems (e.g., ODC or COG-aware libraries) successfully reduce the I/O cost to the size of the requested window. However, this reduction comes at the expense of a significant surge in $C_{geo}$. For every candidate image, the system must perform on-the-fly coordinate transformations and polygon clipping to calculate read offsets. When a query involves thousands of images, the accumulated CPU time ($\sum C_{geo}$) becomes a new bottleneck (e.g., hundreds of milliseconds to seconds), often negating the benefits of I/O reduction (detailed quantitative comparisons are provided in Sec.~\ref{sec:Index_exp_2}).

\par
In contrast, our I/O-aware indexing approach fundamentally alters this trade-off. By materializing the grid-to-pixel mapping in the I2G table, we effectively shift the computational burden from query time to ingestion time. Although the two-phase lookup (G2I and I2G) introduces a slight overhead compared to simple tree traversals, $C_{meta}$ remains in the order of milliseconds—orders of magnitude smaller than disk I/O latency. Since the precise pixel windows are pre-calculated and stored, the runtime geospatial computation is effectively eliminated, i.e., $C_{geo} = 0$. The system retains the minimal I/O cost characteristic of window-based approaches, fetching only relevant byte ranges. Therefore, our design achieves the theoretical minimum for both computation and I/O components within the query execution critical path.

\section{Hybrid Concurrency-Aware I/O Coordination}\label{sec:CC}
In this section, we propose a hybrid coordination mechanism that adaptively employs either lock-free non-deterministic execution or deterministic coordinated scheduling based on the real-time contention level of spatio-temporal workloads.

\begin{figure}
	\centering
	\includegraphics[width=3.0in]{fig/cc.png}
	\caption{Hybrid Concurrency-Aware I/O Coordination.}
	\label{fig:cc}
\end{figure}


\subsection{Query Admission and I/O Plan Generation} 
When a spatio-temporal range query $Q$ arrives, the system first performs index-driven plan generation. The query footprint is rasterized into the global grid to enumerate the intersecting grid cells. The G2I table is then consulted to retrieve the set of candidate images, followed by selective lookups in the I2G table to obtain the corresponding pixel windows.

\par
As a result, each query is translated into an explicit \emph{I/O access plan} consisting of image–window pairs:
\vspace{-0.05in}
\begin{equation}
	\label{eq:io_plan}
	Plan\left( Q \right) =\left\{ \left( img_1,w_1 \right) ,\left( img_1,w_2 \right) ,\left( img_3,w_5 \right) ,... \right\},
\end{equation}
where each window $w$ denotes a concrete pixel range to be accessed via byte-range I/O. Upon admission, the system assigns each query a unique \emph{QueryID} and records its arrival timestamp.

\subsection{Contention Estimation and Path Selection}
To minimize the overhead of global ordering in low-contention scenarios, the system introduces a Contention-Aware Switch. Upon the arrival of a query batch $\mathcal{Q} = \{Q_1, Q_2, ..., Q_n\}$, the system first estimates the Spatial Overlap Ratio ($\sigma$) among their generated I/O plans.

\par
Let $A(Plan(Q_i))$ be the aggregate spatial area of all pixel windows in the I/O plan of query $Q_i$. The overlap ratio $\sigma$ for a batch is defined as:
\vspace{-0.05in}
\begin{equation}
	\vspace{-0.05in}
	\label{eqn_tuning_table}
	\sigma = 1 - \frac{\text{A}(\bigcup_{i=1}^n Plan(Q_i))}{\sum_{i=1}^n \text{A}(Plan(Q_i))},
\end{equation}
where $\sigma \in [0, 1]$. A high $\sigma$ indicates that multiple queries are competing for the same image regions, leading to high I/O amplification if executed independently.

\par
The system utilizes a rule-based assignment mechanism similar to HDCC \cite{Hong25HDCC} to select the execution path:
\begin{enumerate}
	\item Path A (Non-deterministic/OCC-style): If $\sigma < \tau$ (where $\tau$ is a configurable threshold), queries proceed directly to execution to maximize concurrency.
	\item Path B (Deterministic/Calvin-style): If $\sigma \ge \tau$, queries are routed to the Global I/O Plan Queue for coordinated merging.
\end{enumerate}

\subsection{Deterministic Coordinated and Non-deterministic Execution}
When $\sigma \ge \tau$, the system switches to a deterministic path to mitigate storage-level contention and I/O amplification, as shown in Fig.~\ref{fig:cc}. To coordinate concurrent access to shared storage resources, we introduce a \emph{Global I/O Plan Queue} that enforces a deterministic ordering over all admitted I/O plans. Each windowed access $(img, w)$ derived from incoming queries is inserted into this queue according to a predefined policy, such as FIFO based on arrival time or lexicographic ordering by $(timestamp, QueryID)$.

\par
This design is inspired by deterministic scheduling in systems such as Calvin, but differs fundamentally in its scope: the ordering is imposed on \emph{window-level I/O operations} rather than on transactions. As a result, accesses to the same image region across different queries follow a globally consistent order, preventing uncontrolled interleaving of reads and reducing contention at the storage layer. The deterministic ordering also provides a stable foundation for subsequent I/O coordination and sharing.

\par
The core of our approach lies in coordinating concurrent windowed reads at the image level. Windows originating from different queries may overlap spatially, be adjacent, or even be identical. Executing these requests independently would lead to redundant reads and excessive I/O amplification.

\par
To address this, the system performs three coordination steps within each scheduling interval. Stage 1: Global De-duplication. The system first extracts all windowed access pairs $(img, w)$ from the admitted queries and inserts them into a global window set ($\mathcal{W}_{total}$). If multiple queries $Q_1, Q_2, ..., Q_n$ request the same pixel window $w$ from image $img$, the system retains only one unique entry in $\mathcal{W}_{total}$. This stage ensures that any specific byte range is identified as a single logical requirement, effectively preventing the redundant retrieval of overlapping spatial grids. Stage 2: Range Merding. After de-duplication, the system analyzes the physical disk offsets of all unique windows in $\mathcal{W}_{total}$. Following the principle of improving access locality, windows that are physically contiguous or separated by a gap smaller than a threshold $\theta$ are merged into a single read. Stage 3: Dispatching. This stage maintains a mapping between the physical byte-offsets in the buffer and the logical window requirements of each active query. Each query $Q_i$ receives only the exact pixel windows $w \in Plan(Q_i)$ it originally requested. This is achieved via zero-copy memory mapping where possible, or by slicing the shared system buffer into local thread-wise structures. This ensures that while the physical I/O is shared to reduce amplification, the logical execution of each query remains independent and free from irrelevant data interference.

\par
For example, when $Q_1$ requests grids $\{1, 2\}$ and $Q_2$ requests grids $\{2, 3\}$, Stage 1 identifies the unique requirement set $\{1, 2, 3\}$. Stage 2 then merges these into a single contiguous I/O operation covering the entire range $[1, 3]$. In Stage 3, the dispatcher identifies memory offsets corresponding to grids $1$ and $2$ within the buffer and maps these slices to the private cache of $Q_1$. For $Q_2$, similarly, the dispatcher extracts and delivers slices for grids $2$ and $3$ to $Q_2$.

\par
Through these mechanisms, concurrent queries collaboratively share I/O, and the execution unit becomes a coordinated window read rather than an isolated request. Importantly, this coordination operates entirely at the I/O planning level and does not require any form of locking or transaction-level synchronization.

\par
When contention remains below the threshold ($\sigma < \tau$), the system prioritizes low latency over merging efficiency by adopting an optimistic dispatch mechanism, as shown in Fig.~\ref{fig:cc}. Instead of undergoing heavy-weight sorting, I/O plans are immediately offloaded to the execution engine. By utilizing thread-local sublists, each thread independently handles its byte-range requests.

\subsection{Optimistic Read Execution and Completion}
Once a coordinated window read is scheduled, the system issues the corresponding byte-range I/O request immediately. Read execution is fully optimistic: there is no validation phase, no abort, and no rollback. This is enabled by the immutability of remote-sensing imagery and by the deterministic ordering of I/O plans, which together ensure consistent and repeatable read behavior.

\par
A query is considered complete when all windows in its I/O plan have been served and the associated local processing (e.g., reprojection or mosaicking) has finished. By eliminating validation overhead and allowing read execution to proceed independently once scheduled, the system achieves low-latency query completion while maintaining predictable I/O behavior under concurrency.

\par
Overall, this concurrency-aware I/O coordination mechanism reinterprets concurrency control as a problem of \emph{coordinating shared I/O flows}. By operating at the granularity of windowed reads and leveraging deterministic ordering and optimistic execution, it effectively reduces redundant I/O and improves scalability for multi-user spatio-temporal query workloads.

\section{I/O Stack Tuning}\label{sec:Tuning}
We first describe an I/O stack tuning problem and then the surrogate-assisted GMAB algorithm is proposed to solve the problem.

\subsection{Formulation of Online I/O Tuning}
% TODO 这一节的小标题：Tuning Model？
We study a concurrency spatio-temporal query engine that processes many range queries at the same time. The system works on large remote sensing images stored in shared storage. Different from traditional HPC jobs or single-application I/O workloads, the system does not run one fixed job. Instead, it keeps receiving a stream of user queries. Each query is turned into many small I/O operations that often touch overlapping regions in large raster files.

\par
Let $\mathcal{Q} = \{Q_1, Q_2, \ldots, Q_N\}$ denote a stream of spatio-temporal range queries submitted by multiple users. Each query $q$ is decomposed by the I/O-aware index into a set of grid-aligned spatial windows based on a predefined global grid system. These windows are further mapped to sub-regions of one or more large remote sensing images. In this way, every query produces an I/O execution context $c= \langle W,M,S \rangle$, where $W$ describes the set of image windows to be accessed, including their sizes, spatial overlap, and distribution across images. $M$ captures window-level coordination opportunities, such as window merging, deduplication, or shared reads across concurrent queries. $S$ represents system-level execution decisions, including batching strategies, I/O scheduling order, and concurrency limits. Importantly, the I/O behavior of the system is not determined solely by static application code, but emerges dynamically from the interaction between query workloads, execution plans, and system policies.

\par
The goal of I/O tuning in this system is to optimize the performance of query-induced I/O execution under continuous, concurrent workloads. We focus on minimizing the observed I/O cost per query, which may be measured by metrics such as average query latency, effective I/O throughput, or amortized disk read time. Let $\theta \in \varTheta$ denote a tuning configuration, where each configuration specifies a combination of system-level I/O control parameters, including window batching size, merge thresholds, queue depth, concurrency limits, and selected storage-level parameters exposed to the engine. Unlike traditional I/O tuning frameworks, the decision variables $\theta$ are applied at the query execution level, rather than at application startup or compilation time.

\par
For a given tuning configuration $\theta $ and execution context $c$, the observed I/O performance is inherently stochastic due to: interference among concurrent queries; shared storage contention; variability in window overlap and access locality. We model the observed performance outcome as a random variable:
\vspace{-0.05in}
\begin{equation}
	\vspace{-0.05in}
	\label{eqn_tuning_table}
	Y\left( \theta ,c \right) =f\left( \theta ,c \right) +\epsilon ,
\end{equation}
where $f\left( \cdot \right) $ is an unknown performance function and $\epsilon$ captures stochastic noise. Moreover, as query workloads evolve over time, the distribution of execution contexts $c$ may change, making the tuning problem non-stationary.

\par
Given a stream of queries $\mathcal{Q}$ and the resulting sequence of execution contexts $\left\{ c_t \right\} $, the problem is to design an online tuning strategy that adaptively selects tuning configurations $\theta _t$ for query execution, so as to minimize the long-term expected I/O cost:
\vspace{-0.05in}
\begin{equation}
	\vspace{-0.05in}
	\label{eqn_tuning_table}
	\min_{\left\{ \theta _t \right\}}\mathbb{E}\left[ \sum_{t=1}^T{Y}\left( \theta _t,c_t \right) \right] ,
\end{equation}
subject to practical constraints on tuning overhead and system stability.

% TODO：加个限制条件的表格

\subsection{Surrogate-Assisted GMAB for Online I/O Tuning}

\begin{algorithm}[!htb]
	\caption{Surrogate-Assisted Genetic Multi-Armed Bandit (SA-GMAB)}
	\label{alg:sa-gmab}
	\SetKwInOut{Input}{Input}\SetKwInOut{Output}{Output}
	
	\Input{Configuration space $\Theta$, Initial population size $P$, Exploration parameter $\alpha$, Surrogate update interval $\Delta$}
	\Output{Online selection of I/O coordination configuration $\theta_t$}
	
	\BlankLine
	\tcp{Initialization}
	Initialize memory table $\mathcal{M} = \emptyset$\;
	Initialize surrogate model $\tilde{f}$ with empty training data\;
	Generate an initial population $\mathcal{P}_0 \subset \Theta$\;
	Set tuning step counter $t \leftarrow 0$\;
	
	\BlankLine
	\tcp{Online Tuning Loop}
	\While{arrival of query $q_t$ with execution context $c_t$}{
		
		\tcp{Candidate Generation}
		Apply genetic operators (selection, crossover, mutation) on current population to generate candidate set $\mathcal{C}_t \subset \Theta$\;
		
		\tcp{Surrogate-based Pre-evaluation}
		\ForEach{$\theta \in \mathcal{C}_t$}{
			$\hat{r}_\theta \leftarrow \tilde{f}(\theta, c_t)$\;
		}
		
		\tcp{Candidate Filtering}
		Select top-$K$ configurations $\mathcal{C}'_t \subset \mathcal{C}_t$ based on $\hat{r}_\theta$ or uncertainty\;
		
		\tcp{Bandit-based Selection}
		\ForEach{$\theta \in \mathcal{C}'_t$}{
			$\text{Score}(\theta) = \hat{\mu}_\theta + \alpha \sqrt{\frac{\log(t+1)}{n_\theta + 1}}$\;
		}
		Select configuration: $\theta_t = \arg\max_{\theta \in \mathcal{C}'_t} \text{Score}(\theta)$\;
		
		\tcp{Query Execution \& Reward Observation}
		Execute query $q_t$ using I/O coordination policy $\theta_t$\;
		Measure performance outcome and compute reward $r_t$\;
		
		\tcp{State Update}
		Update memory entry for $\theta_t$: $n_{\theta_t} \leftarrow n_{\theta_t} + 1$\;
		$\hat{\mu}_{\theta_t} \leftarrow \hat{\mu}_{\theta_t} + \frac{r_t - \hat{\mu}_{\theta_t}}{n_{\theta_t}}$\;
		Update population $\mathcal{P}$ by inserting $\theta_t$ (optionally evicting low-performing ones)\;
		
		\If{$t \bmod \Delta = 0$}{
			Retrain surrogate model $\tilde{f}$ using observations in $\mathcal{M}$\;
		}
		$t \leftarrow t + 1$\;
	}
\end{algorithm}

\par
To address the online I/O tuning problem, we use a Surrogate-Assisted Genetic Multi-Armed Bandit (SA-GMAB) framework. It combines genetic search, bandit-style exploration, and a simple performance model. The goal is to handle workloads where behavior changes over time, where results are random, and where queries may affect each other. The main steps of this framework are shown in Algorithm~\ref{alg:sa-gmab}.

\par
We first initialize the memory table and the surrogate model, and then generate an initial population of configurations (lines 1-–4). In our system, each arm is an I/O tuning configuration $\theta \in \varTheta$. A configuration is a group of I/O control parameters, such as merge thresholds, batch size, queue depth, and limits on parallel requests. The space of possible configurations is large and discrete. It is not possible to list or test all of them. So we do not fix all arms in advance. Instead, new configurations are created dynamically by genetic operators during candidate generation (line 6). Each configuration acts as a policy that tells the system how to run I/O plans during a scheduling period.

\par
When a query $q_t$ with context $c_t$ arrives, the framework enters the online tuning loop (line 5). For this query, a set of candidate configurations is created through selection, crossover, and mutation (line 6). For every candidate configuration, the surrogate model predicts its reward under the current context (lines 7–-9). These predicted rewards are then used to filter and keep only the top promising configurations, or those with high uncertainty (line 10).

\par
When a configuration $\theta$ is used to process a query $q_t$ with context $c_t$, the system observes a random performance result $Y_t=Y\left( \theta ,c_t \right)$. We define the reward as a simple transformation of I/O cost so that a higher reward means better performance. A common form is the negative latency of the query, or the negative I/O time per unit work. Because other queries run at the same time, the reward may change even for the same configuration. Thus, many samples are needed to estimate the expected reward.

\par
For the remaining candidates, the framework computes a bandit score using both historical average reward and exploration term (lines 11--13), and then selects the configuration with the highest score (line 14). In this way, the method prefers configurations that have performed well before, but it also tries configurations that have been used only a few times.

\par
The selected configuration is then applied to execute the query (line 15). After execution, the system observes the performance result and converts it into a reward value (line 16). For each configuration $\theta$, the system keeps a memory entry that records how many times it has been used and its average reward. These values are updated after each execution (lines 17-–18). This keeps all historical observations instead of discarding older ones, so estimates become more accurate over time, and poor configurations are not repeatedly tried.

The selected configuration may also be added into the population, while poor ones may be removed (line 19). The surrogate model is retrained periodically using data stored in memory (lines 20-–22), so that its predictions follow the most recent workload. The tuning step counter is then increased (line 23), and the framework continues with the next query (line 24).

\section{Performance Evaluation}\label{sec:EXP}
First, we introduce the experimental setup, covering the dataset characteristics, query workload generation, and the distributed cluster environment. Then, we present the experimental results evaluating the proposed I/O-aware indexing structure, the hybrid concurrency-aware I/O coordination mechanism, and the online I/O tuning framework, respectively.

\subsection{Experimental Setup}

\subsubsection{Dataset}
We employed a large-scale real-world remote sensing dataset derived from the Sentinel-2 mission \footnote[1]{https://sentinel.esa.int/web/sentinel/missions/sentinel-2}, specifically the Level-2A atmospherically corrected products. The dataset comprises multi-spectral images covering global land surfaces from 2019 to 2023. To simulate a cloud-native storage environment, all images are converted into Cloud-Optimized GeoTIFF (COG) format and stored in a distributed object store. The statistics of the dataset are summarized in Table~\ref{table_dataset}.

% table 1: Dataset
\begin{table}
	\renewcommand{\arraystretch}{1.3}
	\caption{Dataset Statistics}
	\label{table_dataset}
	\vspace{-0.13in}
	\centering
	\begin{tabular}{|m{1.5cm}|m{1.5cm}|m{1.5cm}|m{2.0cm}|}
		\hline
		\makecell[c]{\textbf{Dataset}} &\bfseries Resolution & \bfseries Time Span & \bfseries Total Volume \\
		\hline
		\hline
		\makecell[c]{Sentinel-2}&\makecell[c]{10m - 60m} & \makecell[c]{2019--2023} & \makecell[c]{15.4 TB}\\
		\hline
		\makecell[c]{Landsat-8}&\makecell[c]{30m} & \makecell[c]{2020--2022} & \makecell[c]{4.2 TB}\\
		\hline
	\end{tabular}
\end{table}

\subsubsection{Query Workload}
\par
To evaluate the system performance under diverse scenarios, we developed a synthetic workload generator that simulates concurrent spatio-temporal range queries. The query parameters are configured as follows:
\begin{itemize}
	\item \textbf{Spatial Extent:} The spatial range of queries follows a log-uniform distribution, ranging from small tile-level access ($0.001\%$ of the scene) to large-scale regional mosaics ($1\%$ to $100\%$ of the scene).
	\item \textbf{Temporal Range:} Each query specifies a time interval randomly chosen between 1 day and 1 month.
	\item \textbf{Concurrency \& Contention:} The number of concurrent clients $N$ varies from 1 to 64. To test the coordination mechanism, we control the Spatial Overlap Ratio $\sigma \in [0, 0.9]$ to simulate workloads ranging from disjoint access to highly concentrated hotspots.
\end{itemize}

\subsubsection{Experimental Environment}
\label{sec_exp_env}
All experiments are conducted on a cluster with 9 homogenous nodes (1 master node and 8 worker nodes). The cluster is connected via a 10Gbps high-speed Ethernet to ensure that network bandwidth is not the primary bottleneck compared to storage I/O. Table \ref{table_config} lists the detailed hardware and software configurations. The I/O-aware index (G2I/I2G) is deployed on HBase, while the raw image data is served by a MinIO distributed object storage cluster.

% table 2: Environment
\begin{table}
	\renewcommand{\arraystretch}{1.3}
	\caption{Cluster Configurations}
	\label{table_config}
	\vspace{-0.13in}
	\centering
	\begin{tabular}{|m{2.2cm}|m{5.5cm}|}
		\hline
		\multicolumn{2}{|c|}{\textbf{Hardware Configuration (Per Node)}} \\
		\hline
		\makecell[c]{CPU} & Dual Intel Xeon Gold 6248 (20 cores, 2.50GHz)\\
		\hline
		\makecell[c]{Memory} & \makecell[c]{128GB DDR4 ECC}\\
		\hline
		\makecell[c]{Storage} & \makecell[c]{4TB NVMe SSD (Data) + 500GB SSD (OS)}\\
		\hline
		\makecell[c]{Network} & \makecell[c]{10 Gigabit Ethernet (10GbE)}\\
		\hline\hline
		
		\multicolumn{2}{|c|}{\textbf{Software Stack}} \\
		\hline
		\makecell[c]{OS} & \makecell[c]{Ubuntu 20.04 LTS} \\
		\hline
		\makecell[c]{Storage} & \makecell[c]{Hadoop 3.3.1, HBase 2.4.5, Lustre}\\
		\hline
		\makecell[c]{Framework} & \makecell[c]{OpenJDK 11, Spark 3.2.1}\\
		\hline
	\end{tabular}
\end{table}


\subsection{Evaluating the data indexing structure}
In the following experiments, we measured the indexing on a single node in the cluster, bacause each nodes needs to the indexing for spatial query. We investigated of query performance of the indexing for remote sensing images.

\subsubsection{I/O Selectivity Analysis}\label{sec:Index_exp_1}

\begin{figure}[tb]
	\centering
	\subfigure[I/O Selectivity]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.98\textwidth]{exp/index_exp1_1.pdf}
		\end{minipage}
	}
	\label{fig:index_exp1_1}
	\subfigure[[Unnecessary data fraction]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.905\textwidth]{exp/index_exp1_2.pdf}
		\end{minipage}
	}
	\label{fig:index_exp1_2}
	\caption{The computing cost of spatial-keyword skylines}
	\label{fig:index_exp1}
\end{figure}

\par
First, we evaluated the effectiveness of data reduction by measuring the I/O selectivity, defined as the ratio of the retrieved data volume to the total file size. Fig.~\ref{fig:index_exp1} compares our method against Baseline 1 (full-file retrieval) and Baseline 2 (exact window-based reading, e.g., OpenDataCube). As illustrated in Fig.~\ref{fig:index_exp1}(a), Baseline 1 exhibits a linear increase in I/O volume proportional to the file size, resulting in poor selectivity regardless of the query footprint. In contrast, both Baseline 2 and Ours significantly reduce I/O traffic by enabling partial reads. It is worth noting that our method incurs slightly higher I/O volume (approximately $16\%-23\%$ of the file size for small queries) compared to the theoretically optimal Baseline 2 ($10\%-20\%$). This marginal data redundancy is attributed to the grid alignment effect: our index retrieves pixel blocks based on fixed grid boundaries, whereas Baseline 2 performs precise geospatial clipping. Fig.~\ref{fig:index_exp1}(b) further presents the distribution of unnecessary data fraction. While our method introduces a small amount of "over-reading" due to grid padding, it successfully avoids the massive data waste observed in Baseline 1. As we will demonstrate in the next section, this slight compromise in I/O precision is a strategic trade-off that eliminates expensive runtime computations.

\subsubsection{End-to-End Query Latency}\label{sec:Index_exp_2}

\begin{figure}[tb]
	\centering
	\subfigure[The query latency]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.98\textwidth]{exp/index_exp2_1.pdf}
		\end{minipage}
	}
	\label{fig:index_exp2_1}
	\subfigure[Latency Breakdownn]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.905\textwidth]{exp/index_exp2_2.pdf}
		\end{minipage}
	}
	\label{fig:index_exp2_2}
	\caption{End-to-End Query Latency}
	\label{fig:index_exp2}
\end{figure}

\par
We next measured the end-to-end query latency to verify whether the I/O reduction translates into time efficiency. Fig.~\ref{fig:index_exp2}(a) reports the mean and 95th percentile (P95) latency across varying query footprint ratios (log scale).The results reveal three distinct performance behaviors:Baseline 1 shows a high and flat latency curve ($\approx 4500$ ms), dominated by the cost of transferring entire images.Baseline 2, despite its optimal I/O selectivity, exhibits a significant latency floor ($\approx 380$ ms for small queries). This overhead stems from the on-the-fly geospatial computations required to calculate precise read windows.Ours achieves the lowest latency, ranging from 34 ms to 59 ms for typical tile-level queries ($10^{-4}$ coverage).Crucially, for small-to-medium queries, our method outperforms Baseline 2 by an order of magnitude. The gap between the two curves highlights the advantage of our deterministic indexing approach: by pre-materializing grid-to-window mappings, we eliminate runtime coordinate transformations. Although our I/O volume is slightly larger (as shown in Sec.~\ref{sec:Index_exp_1}), the time saved by avoiding computational overhead far outweighs the cost of transferring a few extra kilobytes of padding data.

To empirically validate the cost model proposed in Eq.~\ref{eqn:cost_total}, we further decomposed the query latency into three components: metadata lookup ($C_{meta}$), geospatial computation ($C_{geo}$), and I/O access ($C_{io}$). Fig.~\ref{fig:index_exp2}(b) presents the time consumption breakdown for a representative medium-scale query (involving approx. 50 image tiles). As expected, the latency of Baseline 1 is entirely dominated by $C_{io}$ ($>99\%$), rendering $C_{meta}$ and $C_{geo}$ negligible. The massive data transfer masks all other overheads. While $C_{io}$ of Baseline 2 is successfully reduced to the window size, a new bottleneck emerges in $C_{geo}$. The runtime coordinate transformations and polygon clipping consume nearly $70\%$ of the total execution time (approx. 350 ms). This observation confirms our theoretical analysis that window-based I/O shifts the bottleneck from storage to CPU. The proposed method exhibits a balanced profile. Although $C_{meta}$ increases slightly (approx. 60 ms) due to the two-phase index lookup (G2I + I2G), this cost is well-amortized. Crucially, $C_{geo}$ is effectively eliminated ($<1$ ms) thanks to the pre-computed grid-window mappings. Consequently, our approach achieves a total latency of approx. 150 ms, providing a $3\times$ speedup over Baseline 2 by removing the computational bottleneck without regressing on I/O performance.

\subsubsection{Ablation Study}\label{sec:Index_exp_3}
\begin{figure}[tb]
	\centering
	\subfigure[I/O Reduction Analysis]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.9\textwidth]{exp/index_exp3_1.pdf}
		\end{minipage}
	}
	\label{fig:index_exp3_1}
	\subfigure[Latency Breakdown]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.9\textwidth]{exp/index_exp3_2.pdf}
		\end{minipage}
	}
	\label{fig:index_exp3_2}
	\caption{Ablation Analysis}
	\label{fig:index_exp3}
\end{figure}

\begin{figure}
	\centering
	\includegraphics[width=1.8in]{exp/index_exp3_3.pdf}
	\caption{Impact of grid resolution on query latency}
	\label{fig:index_exp3_3}
\end{figure}

\par
To quantify the individual contributions of the G2I (coarse filtering) and I2G (fine-grained access) components, we decomposed the system into four variants. Fig.~\ref{fig:index_exp3} breaks down the performance in terms of I/O volume and latency components (Metadata Lookup vs. Storage I/O). Fig.~\ref{fig:index_exp3}(a) confirms that removing either component leads to suboptimal I/O behavior. The "No Index" and "G2I Only" variants result in 100\% I/O volume (full-file reads), as they lack the window information required for partial access. Conversely, "I2G Only" and "Full" (Ours) achieve minimal I/O volume ($\approx 10\%$).However, I/O volume alone does not tell the full story. Fig.~\ref{fig:index_exp3}(b) reveals the latency breakdown:No Index: Suffers from both high metadata scanning cost (full table scan) and high storage I/O cost.G2I Only: Efficiently reduces metadata lookup time ($\approx 50$ ms) but fails to reduce storage I/O ($\approx 8000$ ms).I2G Only: Although it minimizes storage I/O ($\approx 100$ ms), it incurs prohibitive metadata lookup overhead ($\approx 1500$ ms) because the system must scan the entire I2G table to identify relevant images without spatial pruning.G2I + I2G (Ours): Achieves the "best of both worlds," maintaining low metadata latency ($\approx 60$ ms) via G2I pruning while ensuring minimal storage I/O ($\approx 100$ ms) via I2G windowing.

Moreover, the choice of grid resolution (Zoom Level) is a critical parameter that dictates the trade-off between metadata management overhead ($C_{meta}$) and I/O precision ($C_{io}$). To justify our selection of Zoom Level 14, we conducted a sensitivity analysis by varying the grid resolution from Level 12 to Level 16 under a fixed workload of medium-scale range queries. Fig.~\ref{fig:index_exp3_3} illustrates the latency breakdown across different resolutions. The results reveal a clear convex trajectory in total query latency, driven by two opposing forces. For coarse-grained grids (Level $\le 13$), while metadata lookup is extremely fast ($C_{meta} < 30$ ms) due to the small number of grid keys, the I/O cost ($C_{io}$) is prohibitively high. Large grid cells force the system to read significant amounts of irrelevant pixel data outside the actual query boundary (high read amplification), serving as the dominant bottleneck. Conversely, finer grids (Level 15, 16) maximize I/O precision, reducing $C_{io}$ to its theoretical minimum. However, this comes at the cost of an explosion in metadata volume. A single query may intersect thousands of Level 16 micro-grids, causing $C_{meta}$ to surge drastically ($>280$ ms) due to the overhead of scanning and processing massive key lists in the G2I/I2G tables. As evidenced by the trough in the total latency curve, Zoom Level 14 represents the optimal "sweet spot" for our dataset. At this resolution, the grid cell size (approx. $20 \times 20$ meters at the equator) roughly matches the typical internal tile size of remote sensing images, keeping I/O waste low while maintaining a manageable number of index keys. Consequently, our system adopts Level 14 as the default global configuration.

\subsubsection{Index Construction and Storage Overhead}
\begin{figure}[tb]
	\centering
	\subfigure[Ingestion Scalability]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.9\textwidth]{exp/index_exp4_1.pdf}
		\end{minipage}
	}
	\label{fig:index_exp4_1}
	\subfigure[Storage Consumption Overhead]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.9\textwidth]{exp/index_exp4_2.pdf}
		\end{minipage}
	}
	\label{fig:index_exp4_2}
	\caption{Index Construction and Storage Overhead}
	\label{fig:index_exp4}
\end{figure}

\par
Finally, we evaluated the scalability and cost of maintaining the index. Fig.~\ref{fig:index_exp4} compares our method against PostGIS (R-tree) and GeoMesa (Z-order) during the ingestion of $10^6$ images.Fig.~\ref{fig:index_exp4}(a) illustrates the ingestion throughput. PostGIS exhibits a degrading trend as the dataset grows, bottlenecked by the logarithmic cost of R-tree rebalancing. In contrast, Ours maintains a stable throughput ($\approx 2100$ img/sec). Although slightly lower than the lightweight GeoMesa ($\approx 2500$ img/sec) due to the dual-table write overhead, our method demonstrates linear scalability suitable for high-velocity streaming data.Regarding storage cost (Fig.~\ref{fig:index_exp4}(b)), our index occupies approximately 0.83\% of the raw data size. While this is higher than GeoMesa (0.15\%) and PostGIS (0.51\%) due to the storage of grid-window mappings, it remains strictly below the 1\% threshold. This result validates that the proposed method achieves significant performance gains with a negligible storage penalty.

\subsection{Evaluating the Concurrency Control}
In this section, we evaluate the proposed hybrid coordination mechanism on a distributed storage cluster to assess its scalability, robustness under contention, and internal storage efficiency. We investigated end-to-end latency, throughput, tail latency, and I/O amplification under varying degrees of concurrency and spatial contention.

\par
To systematically control the workload characteristics, we developed a synthetic workload generator. We define the \textit{Spatial Overlap Ratio} ($\sigma$) to quantify the extent of shared data regions among concurrent queries, ranging from $\sigma=0$ (disjoint) to $\sigma=0.9$ (highly concentrated hotspots). The number of concurrent clients varies from $N=1$ to $N=64$.
For comparison, we evaluate the following execution schemes:
\begin{enumerate}
	\item \textbf{Baseline A (Naive):} Queries function as isolated threads with independent I/O execution.
	\item \textbf{Baseline B (Shared Index):} Metadata access is shared, but data retrieval remains uncoordinated, representing the state-of-the-practice in systems like GeoMesa.
	\item \textbf{Ours:} The proposed mechanism featuring contention-aware switching, global I/O plan ordering, and window merging.
\end{enumerate}

\subsubsection{Concurrency Scalability}
\begin{figure}[tb]
	\centering
	\subfigure[The query latency]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.98\textwidth]{exp/cc_exp1_1.pdf}
		\end{minipage}
	}
	\label{fig:cc_exp1_1}
	\subfigure[Aggregate Throughput]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.905\textwidth]{exp/cc_exp1_2.pdf}
		\end{minipage}
	}
	\label{fig:cc_exp1_2}
	\caption{The computing cost of spatial-keyword skylines}
	\label{fig:cc_exp1}
\end{figure}

\par
First, we investigated the system scalability by increasing the number of concurrent clients from 1 to 64 under a high-overlap scenario ($\sigma \approx 0.8$). Fig.~\ref{fig:cc_exp1} reports the mean latency, P95 tail latency, and aggregate throughput. Note that the latency axes in Fig.~\ref{fig:cc_exp1}(a) are plotted on a log scale to visualize the orders-of-magnitude difference.

\par
As shown in Fig.~\ref{fig:cc_exp1}(a), both Baseline A and Baseline B exhibit exponential latency degradation. At 64 clients, the mean latency of Baseline A spikes to 12,000 ms, indicating severe storage saturation. This bottleneck arises from the ``I/O blender effect,'' where randomized concurrent reads trigger severe disk seek thrashing. In contrast, Ours maintains a stable latency profile, increasing only marginally to 110 ms at 64 clients.

\par
Fig.~\ref{fig:cc_exp1}(b) further demonstrates the throughput advantage. While Baselines saturate at approximately 16--32 clients, Ours demonstrates super-linear throughput scaling relative to logical requests. This is attributed to the request collapse mechanism, where higher concurrency increases the probability of window merging, thereby reducing the physical I/O cost per query.

\subsubsection{Tail Latency and Contention Sensitivity}
\begin{figure}[tb]
	\centering
	\subfigure[Tail Latency Sensitivity]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.9\textwidth]{exp/cc_exp2_1.pdf}
		\end{minipage}
	}
	\label{fig:cc_exp2_1}
	\subfigure[Fairness under Contention]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.9\textwidth]{exp/cc_exp2_2.pdf}
		\end{minipage}
	}
	\label{fig:cc_exp2_2}
	\caption{Tail Latency and Contention Sensitivity}
	\label{fig:cc_exp2}
\end{figure}

\par
Next, we fixed the concurrency at $N=32$ and swept the Spatial Overlap Ratio $\sigma$ from 0 to 0.9 to evaluate the system's resilience to hotspots. Fig.~\ref{fig:cc_exp2} depicts the P95 latency and fairness index.

\par
Intuitively, higher contention typically degrades performance. However, Fig.~\ref{fig:cc_exp2}(a) reveals a \emph{counter-intuitive} phenomenon for our system: the P95 latency remains flat ($\approx 48$ ms) even as $\sigma$ approaches 0.9. This indicates that our coordination mechanism successfully converts contention'' into optimization opportunities'' via window merging. Conversely, both Baselines exhibit a sharp ``performance cliff'' when $\sigma > 0.5$, with Baseline A reaching 8,500 ms at $\sigma=0.9$.

\par
Furthermore, Fig.~\ref{fig:cc_exp2}(b) shows that our system maintains a Jain’s Fairness Index near 1.0, whereas Baselines drop to 0.25--0.35. This confirms that the deterministic plan queue effectively prevents the starvation of queries accessing contended regions.

\subsubsection{Storage-Level Effects and Request Collapse}
\begin{figure}[tb]
	\centering
	\subfigure[Data Volume Reduction]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.9\textwidth]{exp/cc_exp3_1.pdf}
		\end{minipage}
	}
	\label{fig:cc_exp3_1}
	\subfigure[Request Collapse (IOPS)]{
		\begin{minipage}[b]{0.227\textwidth}
			\includegraphics[width=0.9\textwidth]{exp/cc_exp3_2.pdf}
		\end{minipage}
	}
	\label{fig:cc_exp3_2}
	\caption{Storage-Level Effects and Request Collapse}
	\label{fig:cc_exp3}
\end{figure}

\begin{figure}
	\centering
	\includegraphics[width=1.8in]{exp/cc_exp3_3.pdf}
	\caption{Merging Efficiency}
	\label{fig:cc_exp3_3}
\end{figure}

\par
To explain the performance gains observed above, we analyzed the internal I/O behavior. Fig.~\ref{fig:cc_exp3} compares the physical data movement against logical query demands. Note that in this experiment, Baseline A and Baseline B are grouped as a single baseline, as neither implements window-level coordination.

\par
Fig.~\ref{fig:cc_exp3}(a) and Fig.~\ref{fig:cc_exp3}(b) demonstrate the Request Collapse effect. While 64 concurrent clients generate 12,800 IOPS in the baseline, our system collapses them into fewer than 600 physical operations. Fig.~\ref{fig:cc_exp3_3} quantifies this using the Merging Efficiency. As the overlap ratio $\sigma$ increases, the I/O amplification factor of our system drops linearly from 1.0 to 0.15. This mathematically proves that the throughput gains are derived from a fundamental reduction in physical I/O volume rather than mere CPU scheduling.

\subsubsection{Deterministic vs Non-Deterministic Modes}
\begin{figure}
	\centering
	\includegraphics[width=1.8in]{exp/cc_exp4_1.pdf}
	\caption{Adaptive Performance Switching}
	\label{fig:cc_exp4}
\end{figure}

\par
We then validated the effectiveness of the hybrid switching logic by comparing it against static Forced Optimistic'' and Forced Deterministic'' policies. As shown in Fig.~\ref{fig:cc_exp4}, the static policies exhibit distinct weaknesses: the Deterministic mode incurs high coordination overhead ($\approx 60$ ms) at low $\sigma$, while the Optimistic mode suffers from exponential thrashing at high $\sigma$. The Hybrid curve successfully tracks the lower performance envelope of the two.

\subsubsection{Microbenchmark of Window Merging}
\begin{figure*}[htb]
	\centering
	\subfigure[Reduction Pipeline]{\label{fig:cc_exp5_1}
		\includegraphics[width=2.1in]{exp/cc_exp5_1.pdf}}
	\subfigure[Run Length Distribution]{\label{fig:cc_exp5_2}
		\includegraphics[width=2.1in]{exp/cc_exp5_2.pdf}}
	%\subfigure[]{\label{fig:trans_candidate}
		%\includegraphics[width=0.6in]{trans_candidate.eps}}
	\subfigure[Cost-Benefit Analysis]{\label{fig:cc_exp5_3}
		\includegraphics[width=2.1in]{exp/cc_exp5_3.pdf}}
	%\subfigure[]{\label{fig:diagram3}
		%\includegraphics[width=0.7in]{routing.eps}}
	\caption{% (a) Illustration of avoiding couplers.
		Microbenchmark of Window Merging}
	\label{fig:cc_exp5}
\end{figure*}

\par
Finally, we dissected the efficiency of the three-stage reduction pipeline. Fig.~\ref{fig:cc_exp5_1} shows that the combination of De-duplication (Stage 1) and Range Merging (Stage 2) achieves a cumulative reduction in request count consistent with the findings in Section 5.3.3.

\par
Fig.~\ref{fig:cc_exp5_2} presents the Run Length Distribution (CDF) of I/O requests. The proposed mechanism shifts the I/O pattern from small, fragmented reads (typical in baselines) to larger, sequential chunks, which significantly amortizes disk seek times.Fig.~\ref{fig:cc_exp5_3} presents the cost-benefit analysis. The CPU overhead of the dispatcher remains negligible ($< 2.5 \mu s$ per window) compared to the benefit of achieving a $>90\%$ zero-copy ratio, verifying that the algorithmic complexity of coordination yields a high return on investment in terms of system throughput.

\subsection{Evaluating the I/O tuning}
In this section, we evaluate the effectiveness of the proposed SA-GMAB tuning framework. The experiments are designed to verify four key properties: fast convergence speed, robustness against stochastic noise, adaptability to workload shifts, and tangible end-to-end performance gains.

\subsubsection{Convergence Speed and Tuning Cost}
\begin{figure*}[htb]
	\centering
	\subfigure[Convergence Speed]{\label{fig:tune_exp1_1}
		\includegraphics[width=2.1in]{exp/tune_exp1_1.pdf}}
	\subfigure[Cumulative Tuning Overhead]{\label{fig:tune_exp1_2}
		\includegraphics[width=2.1in]{exp/tune_exp1_2.pdf}}
	%\subfigure[]{\label{fig:trans_candidate}
		%\includegraphics[width=0.6in]{trans_candidate.eps}}
	\subfigure[Search Efficiency]{\label{fig:tune_exp1_3}
		\includegraphics[width=2.1in]{exp/tune_exp1_3.pdf}}
	%\subfigure[]{\label{fig:diagram3}
		%\includegraphics[width=0.7in]{routing.eps}}
	\caption{% (a) Illustration of avoiding couplers.
		Convergence Speed and Tuning Cost}
	\label{fig:tune_exp1}
\end{figure*}

\par
First, we initiated a cold-start tuning session to evaluate how efficiently each method identifies high-quality configurations. Fig.~\ref{fig:tune_exp1} reports the convergence trajectory, cumulative tuning cost, and search efficiency.

\par
As shown in Fig.~\ref{fig:tune_exp1_1}, the \textbf{Default} configuration remains trapped in a high-latency state ($\approx 450$ ms). While \textbf{H5Tuner} and \textbf{TunIO} gradually improve performance, they exhibit slow decay rates, requiring over 80 steps to stabilize. In contrast, \textbf{SA-GMAB} achieves a sharp drop in best-observed latency within the first 15--20 steps. This acceleration is attributed to the surrogate model, which effectively prunes unpromising configurations before costly execution.

\par
Fig.~\ref{fig:tune_exp1_2} plots the cumulative tuning overhead (regret). The steep slope of the GA-based baselines indicates that they repeatedly explore poor configurations due to their memory-less nature. Our method exhibits the flattest curve, minimizing the cumulative performance loss during exploration. Furthermore, Fig.~\ref{fig:tune_exp1_3} confirms the high sample efficiency: SA-GMAB reaches the near-optimal zone ($\approx 50$ ms) after evaluating significantly fewer unique configurations compared to H5Tuner and TunIO.

\subsubsection{Robustness under Stochastic Interference}
\begin{figure*}[htb]
	\centering
	\subfigure[Reward Stability under Noise]{\label{fig:tune_exp2_1}
		\includegraphics[width=2.1in]{exp/tune_exp2_1.pdf}}
	\subfigure[Regret Growth]{\label{fig:tune_exp2_2}
		\includegraphics[width=2.1in]{exp/tune_exp2_2.pdf}}
	%\subfigure[]{\label{fig:trans_candidate}
		%\includegraphics[width=0.6in]{trans_candidate.eps}}
	\subfigure[Configuration Stability]{\label{fig:tune_exp2_3}
		\includegraphics[width=2.1in]{exp/tune_exp2_3.pdf}}
	%\subfigure[]{\label{fig:diagram3}
		%\includegraphics[width=0.7in]{routing.eps}}
	\caption{% (a) Illustration of avoiding couplers.
		Robustness under Stochastic Interference}
	\label{fig:tune_exp2}
\end{figure*}

\par
In concurrent I/O environments, performance measurements are inherently noisy. Fig.~\ref{fig:tune_exp2} evaluates the robustness of the tuning algorithms under such stochastic interference.

\par
Fig.~\ref{fig:tune_exp2_1} tracks the instantaneous reward over time. \textbf{H5Tuner} exhibits high variance, frequently dropping to low-reward regions because it discards good configurations that perform poorly once due to transient noise. In contrast, \textbf{SA-GMAB} maintains a stable high-reward trajectory. By aggregating historical observations in the memory table, our method ``smooths out'' the noise and correctly identifies optimal configurations despite fluctuations. Fig.~\ref{fig:tune_exp2_3} further breaks down the decision quality. Our method selects the \textbf{Optimal Configuration} for \textbf{88\%} of the rounds, whereas H5Tuner selects it only \textbf{35\%} of the time, wasting the majority of its budget on suboptimal or poor parameters.

\subsubsection{Adaptation to Workload Shifts}
\begin{figure*}[htb]
	\centering
	\subfigure[Response to Workload Shift]{\label{fig:tune_exp3_1}
		\includegraphics[width=2.1in]{exp/tune_exp3_1.pdf}}
	\subfigure[Parameter Adaptation]{\label{fig:tune_exp3_2}
		\includegraphics[width=2.1in]{exp/tune_exp3_2.pdf}}
	%\subfigure[]{\label{fig:trans_candidate}
		%\includegraphics[width=0.6in]{trans_candidate.eps}}
	\subfigure[Speed of Adaptation]{\label{fig:tune_exp3_3}
		\includegraphics[width=2.1in]{exp/tune_exp3_3.pdf}}
	%\subfigure[]{\label{fig:diagram3}
		%\includegraphics[width=0.7in]{routing.eps}}
	\caption{% (a) Illustration of avoiding couplers.
		Adaptation to Workload Shifts}
	\label{fig:tune_exp3}
\end{figure*}

\par
We then investigated the system's ability to adapt to non-stationary environments. We introduced a sudden workload shift at $t=60$, changing the query pattern from sparse random access to dense sequential scans.

\par
As illustrated in Fig.~\ref{fig:tune_exp3_1}, the shift causes an immediate latency spike ($>300$ ms) for all methods. The \textbf{Default} policy fails to adapt. \textbf{H5Tuner} reacts sluggishly, requiring many generations to evolve parameters for the new regime. \textbf{SA-GMAB}, however, detects the context change and leverages its surrogate model to rapidly propose new candidates, achieving a full recovery to the new optimal latency ($\approx 80$ ms) within fewer than 15 batches (Fig.~\ref{fig:tune_exp3_3}).Fig.~\ref{fig:tune_exp3_2} traces the evolution of the \textit{Merge Threshold} parameter. While baselines drift slowly, our method executes a decisive shift from 0.2 to 0.8, effectively locking onto the new optimal region required by the sequential workload.

\subsubsection{Impact on End-to-End Query Performance}
\begin{figure*}[htb]
	\centering
	\subfigure[Steady-State Stability]{\label{fig:tune_exp4_1}
		\includegraphics[width=2.1in]{exp/tune_exp4_1.pdf}}
	\subfigure[End-to-End Throughput]{\label{fig:tune_exp4_2}
		\includegraphics[width=2.1in]{exp/tune_exp4_2.pdf}}
	%\subfigure[]{\label{fig:trans_candidate}
		%\includegraphics[width=0.6in]{trans_candidate.eps}}
	\subfigure[I/O Efficiency Tuning]{\label{fig:tune_exp4_3}
		\includegraphics[width=2.1in]{exp/tune_exp4_3.pdf}}
	%\subfigure[]{\label{fig:diagram3}
		%\includegraphics[width=0.7in]{routing.eps}}
	\caption{% (a) Illustration of avoiding couplers.
		Impact on End-to-End Query Performance}
	\label{fig:tune_exp4}
\end{figure*}

\par
Finally, we measured the steady-state performance of the fully optimized system. Fig.~\ref{fig:tune_exp4} compares the end-to-end metrics across different tuning methods.

\par
Fig.~\ref{fig:tune_exp4_1} presents a latency trace during steady-state operation. While \textbf{Default} suffers from high latency and \textbf{GA-based} methods exhibit jitter due to unstable exploration, \textbf{SA-GMAB} maintains a consistently low and smooth latency profile ($\approx 45$ ms). This stability is critical for meeting SLA requirements in real-time analytics. Fig.~\ref{fig:tune_exp4_2} summarizes the aggregate throughput gain. Our method achieves a \textbf{5.6$\times$} improvement over the default configuration. Fig.~\ref{fig:tune_exp4_3} reveals the underlying reason: under high contention, the tuner automatically selects aggressive batching and merging parameters, driving the I/O amplification factor down to \textbf{0.2}. This confirms that SA-GMAB effectively aligns the system configuration with real-time workload characteristics to maximize I/O efficiency.


\section{Conclusions}\label{sec:Con}
Modern high-performance remote sensing data management systems face a critical bottleneck shift from metadata discovery to data extraction, driven by prohibitive runtime geospatial computations ($C_{geo}$) and severe I/O contention under concurrent access. This paper presents a comprehensive I/O-aware query processing framework designed to strictly bound query latency and maximize throughput for large-scale spatio-temporal analytics. By introducing the "Index-as-an-Execution-Plan" paradigm and a dual-layer inverted structure (G2I and I2G), we bridge the semantic gap between logical indexing and physical storage, effectively shifting the computational burden from query time to ingestion time.To address the scalability challenges in multi-user environments, we developed a hybrid concurrency-aware I/O coordination protocol that adaptively switches between deterministic ordering and optimistic execution based on spatial contention. Furthermore, to handle the complexity of parameter configuration in fluctuating workloads, we integrated a Surrogate-Assisted Genetic Multi-Armed Bandit (SA-GMAB) mechanism for online automatic I/O tuning.Our empirical evaluation on large-scale Sentinel-2 datasets demonstrates that the proposed I/O-aware index reduces end-to-end latency by an order of magnitude compared to standard window-based reading approaches. The hybrid coordination mechanism effectively converts I/O contention into request merging opportunities, achieving linear throughput scaling significantly superior to traditional isolated execution. Additionally, the SA-GMAB tuning method exhibits faster convergence speed and greater robustness against stochastic noise compared to existing genetic baselines. These findings provide a scalable and predictable path for next-generation remote sensing platforms to support real-time, data-intensive concurrent workloads.

% if have a single appendix:
%\appendix[Proof of the Zonklar Equations]
% or
%\appendix  % for no appendix heading
% do not use \section anymore after \appendix, only \section*
% is possibly needed

% use appendices with more than one appendix
% then use \section to start each appendix
% you must declare a \section before using any
% \subsection or using \label (\appendices by itself
% starts a section numbered zero.)
%


%%\appendices
%%\section{Proof of the First Zonklar Equation}
%%Appendix one text goes here.

% you can choose not to have a title for an appendix
% if you want by leaving the argument blank
%%\section{}
%%Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.Appendix two text goes here.


% use section* for acknowledgement
\ifCLASSOPTIONcompsoc
  % The Computer Society usually uses the plural form
  \section*{Acknowledgments}
This work is supported in part by the National Natural
Science Foundation of China (No. U21A2013, No. 41925007 and No. 62076224), Open Research Project of The Hubei Key Laboratory of Intelligent Geo-Information Processing(KLIGIP-2019B14).
\else
  % regular IEEE prefers the singular form
 % \section*{Acknowledgment}
%%Dr. L. Wang's work is funded by ``One-Hundred Talents Program'' of
%Chinese Academy of Sciences. Drs. X. Chen, Z. Deng and D. Chen were
%supported in part by the by the National Natural Science Foundation of
%China (No. 61272314), the Program for New Century Excellent Talents in
%University (NCET-11-0722), the Excellent Youth Foundation of Hubei
%Scientific Committee (No. 2012FFA025), the Natural Science Foundation
%of Hubei Province (No. 2011CDB159), the Specialized Research Fund for
%the Doctoral Program of Higher Education (20110145110010), the
%Fundamental Research Funds for the Central Universities, China University of Geosciences(Wuhan) (No.
%CUG120114, No. CUG130617), and Beijing Microelectronics Technology Institute under
%the University Research Programme (No. BM-KJ-FK-WX-20130731-0013).

\fi


% Can use something like this to put references on a page
% by themselves when using endfloat and the captionsoff option.
\ifCLASSOPTIONcaptionsoff
  \newpage
\fi


% trigger a \newpage just before the given reference
% number - used to balance the columns on the last page
% adjust value as needed - may need to be readjusted if
% the document is modified later
%\IEEEtriggeratref{8}
% The "triggered" command can be changed if desired:
%\IEEEtriggercmd{\enlargethispage{-5in}}

% references section

% can use a bibliography generated by BibTeX as a .bbl file
% BibTeX documentation can be easily obtained at:
% http://www.ctan.org/tex-archive/biblio/bibtex/contrib/doc/
% The IEEEtran BibTeX style support page is at:
% http://www.michaelshell.org/tex/ieeetran/bibtex/
\bibliographystyle{IEEEtran}
\bibliography{bib/references}

% argument is your BibTeX string definitions and bibliography database(s)
%\bibliography{IEEEabrv,../bib/paper}
%
% <OR> manually copy in the resultant .bbl file
% set second argument of \begin to the number of references
% (used to reserve space for the reference number labels box)
%%\begin{thebibliography}{1}

%%\bibitem{IEEEhowto:kopka}
%This is an example of a book reference
%%H. Kopka and P.W. Daly, \emph{A Guide to {\LaTeX}}, third ed. Harlow, U.K.: Addison-Wesley, 1999.


%This is an example of a Transactions article reference
%D.S. Coming and O.G. Staadt, "Velocity-Aligned Discrete Oriented Polytopes for Dynamic Collision Detection," IEEE Trans. Visualization and Computer Graphics, vol.?4,?no.?,?pp. 1-12,?Jan/Feb?2008, doi:10.1109/TVCG.2007.70405.

%This is an example of a article from a conference proceeding
%H. Goto, Y. Hasegawa, and M. Tanaka, "Efficient Scheduling Focusing on the Duality of MPL Representation," Proc. IEEE Symp. Computational Intelligence in Scheduling (SCIS '07), pp. 57-64, Apr. 2007, doi:10.1109/SCIS.2007.367670.

%This is an example of a PrePrint reference
%J.M.P. Martinez, R.B. Llavori, M.J.A. Cabo, and T.B. Pedersen, "Integrating Data Warehouses with Web Data: A Survey," IEEE Trans. Knowledge and Data Eng., preprint, 21 Dec. 2007, doi:10.1109/TKDE.2007.190746.

%Again, see the IEEEtrans_HOWTO.pdf for several more bibliographical examples. Also, more style examples
%can be seen at http://www.computer.org/author/style/transref.htm
%%\end{thebibliography}

% biography section
%
% If you have an EPS/PDF photo (graphicx package needed) extra braces are
% needed around the contents of the optional argument to biography to prevent
% the LaTeX parser from getting confused when it sees the complicated
% \includegraphics command within an optional argument. (You could create
% your own custom macro containing the \includegraphics command to make things
% simpler here.)
%\begin{biography}[{\includegraphics[width=1in,height=1.25in,clip,keepaspectratio]{mshell}}]{Michael Shell}

%\begin{biography}[{\includegraphics[width=1in,height=1.25in,clip,keepaspectratio]{bio/dz.pdf}}]{Ze Deng}


% or if you just want to reserve a space for a photo:
%\begin{IEEEbiography}[{ \includegraphics[width=1.2in,height=1.25in,angle=180,clip,
%keepaspectratio]{dz.eps}}]{Dan Chen} (M' 2002)
%received the B.Sc. degree in applied physics from Wuhan University, Wuhan, China, and the M.Eng. degree in computer science from Huazhong University of Science and Technology, Wuhan, China. He received the M.Eng. and the Ph.D. in computer engineering fromNanyang Technological University, Singapore.

%%He is currently a Professor, Head of the Department of Network Engineering, and the Director of the Scientific Computing lab with School of Computer Science, China University of Geosciences, Wuhan, China. He was a HEFCE Research Fellow with the University of Birmingham, U.K. His research interests include computer-based modelling and simulation, high performance computing, and neuroinformatics.
%\end{IEEEbiography}


%\begin{IEEEbiography}[{ \includegraphics[width=1in,height=1.25in,angle =-90,keepaspectratio]{bio/dz.eps}}]{Ze Deng}
%%received the B.Sc. degree from China
%%University of Geosciences, the M.Eng. degree
%%from Yunnan University, and the Ph.D.
%%degree from Huazhong University of Science
%%and Technology, China. He is currently
%%an Assistant Professor with the School
%%of Computer Science, China University of
%%Geosciences, Wuhan, China. He is currently also
%%a Postdoctor with the Faculty of Resources , China University of
%%Geosciences, Wuhan, China.
%\end{IEEEbiography}


%\begin{IEEEbiography}[{ \includegraphics[width=1in,height=1.25in,angle =-90,clip,keepaspectratio]{bio/wxm.eps}}]{Xiaomin Wu}
%%received the B.Sc. degree from China University of Geosciences. He is currently
%%master degree candidate with the School of Computer Science, China University of
%%Geosciences, Wuhan, China. His research interests include data management, highperformance
%%computing, and neuroinformatics.
%\end{IEEEbiography}

%\begin{IEEEbiography}[{ \includegraphics[width=1in,height=1.25in,angle =180,clip,keepaspectratio]{bio/Wang.eps}}]{Lizhen Wang} (SM’ 2009)
%received the
%%B.Eng. degree (with honors) in electrical engineering with a minor in applied mathematics
%%and the M.Eng. degree in electrical engineering, both from Tsinghua University,
%%Beijing, China, and the Doctor of Engineering in applied computer science (magna cum
%%laude) from University Karlsruhe (now Karlsruhe Institute of Technology), Karlsruhe,
%%Germany. He is a ”100-Talent Program” Professor at Institute of Remote Sensing \& Digital Earth, Chinese Academy of Sciences (CAS), Beijing, China and a ”ChuTian” Chair Professor at School of Computer Science, China University of Geosciences, Wuhan, China.
%\end{IEEEbiography}

%\begin{IEEEbiography}[{ \includegraphics[width=1in,height=1.25in,angle =0,clip,keepaspectratio]{bio/Chen.eps}}]{Xiaodao Chen}
%%received the B.Eng. degree in telecommunication from the Wuhan University of Technology, Wuhan, China, in 2006, the he M.Sc. degree in electrical engineering from Michigan Technological University,
%%Houghton, USA, in 2009, and the Ph.D. in computer engineering from Michigan Technological University, Houghton, USA, in 2012. He is currently an Assistant Professor with School of Computer Science, China University of Geosciences, Wuhan, China.
%\end{IEEEbiography}

%\begin{IEEEbiography}[{ \includegraphics[width=1in,height=1.25in,angle =180,clip,keepaspectratio]{bio/Rajiv.eps}}]{Rajiv Ranjan}
%%is a Research Scientist and a
%%Julius Fellow in CSIRO Computational Informatics
%%Division (formerly known as CSIRO
%%ICT Centre). His expertise is in datacenter
%%cloud computing, application provisioning,
%%and performance optimization. He has a PhD
%%(2009) in Engineering from the University of
%%Melbourne. He has published 62 scientific,
%%peer-reviewed papers (7 books, 25 journals,
%%25 conferences, and 5 book chapters). His
%%hindex is 20, with a lifetime citation count of
%%1660+ (Google Scholar). His papers have also received 140+ ISI
%%citations. 70\% of his journal papers and 60\% of conference papers
%%have been A*/A ranked ERA publication. Dr. Ranjan has been invited
%%to serve as the Guest Editor for leading distributed systems journals
%%including IEEE Transactions on Cloud Computing, Future Generation
%%Computing Systems, and Software Practice and Experience. One
%%of his papers was in 2011’s top computer science journal, IEEE
%%Communication Surveys and Tutorials.

%\end{IEEEbiography}

%\begin{IEEEbiography}[{ \includegraphics[width=1in,height=1.25in,angle =0,clip,keepaspectratio]{bio/zomaya.eps}}]{Albert Zomaya} (F’ 2004)
%%is currently the Chair Professor of High Performance Computing
%%\& Networking and Australian Research
%%Council Professorial Fellow in the
%%School of Information Technologies, The University
%%of Sydney. He is also the Director of
%%the Centre for Distributed and High Performance
%%Computing which was established in
%%late 2009. Professor Zomaya held the CISCO
%%Systems Chair Professor of Internetworking
%%during the period 2002-2007 and also was
%%Head of school for 2006-2007 in the same school. Prior to his current
%%appointment he was a Full Professor in the School of Electrical, Electronic
%%and Computer Engineering at the University of Western Australia,
%%where he also led the Parallel Computing Research Laboratory
%%during the period 1990-2002. He served as Associate-, Deputy-, and
%%Acting-Head in the same department, and held numerous visiting
%%positions and has extensive industry involvement. Professor Zomaya
%%received his PhD from the Department of Automatic Control and
%%Systems Engineering, Sheffield University in the United Kingdom.

%\end{IEEEbiography}


%\begin{IEEEbiography}[{ \includegraphics[width=1.2in,height=1.25in,angle=180,clip,
%keepaspectratio]{bio/Dan.eps}}]{Dan Chen} (M' 2002)
%%received the B.Sc. degree in applied physics from Wuhan University, Wuhan, China, and the M.Eng. degree in computer science from Huazhong University of Science and Technology, Wuhan, China. He received the M.Eng. and the Ph.D. in computer engineering from Nanyang Technological University, Singapore. He is currently a Professor, Head of the Department of Network Engineering, and the Director of the Scientific Computing lab with School of Computer Science, China University of Geosciences, Wuhan, China. He was an HEFCE Research Fellow with the University of Birmingham, U.K. His research interests include modelling and simulation of complex systems, neuroinformatics, and high performance computing.
%\end{IEEEbiography}


% if you will not have a photo at all:
%\begin{IEEEbiographynophoto}{John Doe}
%%Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.
%\end{IEEEbiographynophoto}

% insert where needed to balance the two columns on the last page with
% biographies
%\newpage

%\begin{IEEEbiographynophoto}{Jane Doe}
%%Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.Biography text here.
%\end{IEEEbiographynophoto}

% You can push biographies down or up by placing
% a \vfill before or after them. The appropriate
% use of \vfill depends on what kind of text is
% on the last page and whether or not the columns
% are being equalized.

%\vfill

% Can be used to pull up biographies so that the bottom of the last one
% is flush with the other column.
%\enlargethispage{-5in}


% that's all folks
\end{document}