修改bib引用2

2026-01-19 14:06:59 +08:00
parent 35793c43ea
commit e73f6893fb
3 changed files with 39 additions and 78 deletions
--- a/references.bib
+++ b/references.bib
@@ -1,112 +1,80 @@
-@article{gorelickGoogleEarthEngine2017,
-  title = {Google {{Earth Engine}}: {{Planetary-scale}} Geospatial Analysis for Everyone},
-  shorttitle = {Google {{Earth Engine}}},
-  author = {Gorelick, Noel and Hancher, Matt and Dixon, Mike and Ilyushchenko, Simon and Thau, David and Moore, Rebecca},
-  year = 2017,
-  month = dec,
-  journal = {Remote Sensing of Environment},
-  series = {Big {{Remotely Sensed Data}}: Tools, Applications and Experiences},
-  volume = {202},
-  pages = {18--27},
-  issn = {0034-4257},
-  doi = {10.1016/j.rse.2017.06.031},
-  urldate = {2026-01-18},
-  abstract = {Google Earth Engine is a cloud-based platform for planetary-scale geospatial analysis that brings Google's massive computational capabilities to bear on a variety of high-impact societal issues including deforestation, drought, disaster, disease, food security, water management, climate monitoring and environmental protection. It is unique in the field as an integrated platform designed to empower not only traditional remote sensing scientists, but also a much wider audience that lacks the technical capacity needed to utilize traditional supercomputers or large-scale commodity cloud computing resources.},
-  keywords = {Analysis,Big data,Cloud computing,Data democratization,Earth Engine,Platform},
-  file = {E\:\\zotero\\storage\\63PKW7Q5\\Gorelick 等 - 2017 - Google Earth Engine Planetary-scale geospatial analysis for everyone.pdf;E\:\\zotero\\storage\\E9IJLV2Y\\S0034425717302900.html}
-}
-
-@article{GridMesaNoSQLbasedBig2024,
-  title = {{{GridMesa}}: {{A NoSQL-based}} Big Spatial Data Management System with an Adaptive Grid Approximation Model},
-  shorttitle = {{{GridMesa}}},
+@article{2024gridmesa,
+  title = {GridMesa: A NoSQL-based big spatial data management system with an adaptive grid approximation model},
  year = 2024,
-  month = jun,
  journal = {Future Generation Computer Systems},
  volume = {155},
  pages = {324--339},
  publisher = {North-Holland},
  issn = {0167-739X},
  doi = {10.1016/j.future.2024.02.010},
-  urldate = {2025-05-13},
-  abstract = {Due to the urgent demand for managing massive spatial data, various spatial data management systems built on distributed NoSQL databases have emerged.\dots},
-  langid = {english},
-  file = {E:\zotero\storage\28M2A6FZ\2024 - GridMesa A NoSQL-based big spatial data managemen.pdf}
+  urldate = {2025-05-13}
 }

-@article{hongHybridApproachIntegrating2025,
-  title = {A {{Hybrid Approach}} to {{Integrating Deterministic}} and {{Non-Deterministic Concurrency Control}} in {{Database Systems}}},
+@article{gorelick2017google,
+  title = {Google Earth Engine: Planetary-scale geospatial analysis for everyone},
+  author = {Gorelick, Noel and Hancher, Matt and Dixon, Mike and Ilyushchenko, Simon and Thau, David and Moore, Rebecca},
+  year = 2017,
+  journal = {Remote Sensing of Environment},
+  series = {Big Remotely Sensed Data: tools, applications and experiences},
+  volume = {202},
+  pages = {18--27},
+  issn = {0034-4257},
+  doi = {10.1016/j.rse.2017.06.031},
+  urldate = {2026-01-18}
+}
+
+@article{hong2025hybrid,
+  title = {A Hybrid Approach to Integrating Deterministic and Non-Deterministic Concurrency Control in Database Systems},
  author = {Hong, Yinhao and Zhao, Hongyao and Lu, Wei and Du, Xiaoyong and Chen, Yuxing and Pan, Anqun and Zheng, Lixiong},
  year = 2025,
-  month = aug,
  journal = {Proc. VLDB Endow.},
  volume = {18},
  number = {5},
  pages = {1376--1389},
  issn = {2150-8097},
  doi = {10.14778/3718057.3718066},
-  urldate = {2025-10-28},
-  abstract = {Deterministic and non-deterministic concurrency control algorithms have shown respective advantages under diverse workloads. Thus, a natural idea is to blend them together. However, because deterministic algorithms work with stringent assumptions, e.g., batched execution and non-interactive transactions, they hardly work together with non-deterministic algorithms. To address this issue, we propose HDCC, a hybrid approach that adaptively employs Calvin and OCC, which have distinct concurrency control and logging schemes, in the same database system. To ensure serializability and recovery correctness, we introduce lock-sharing, global validation, and two-log-interleaving mechanisms. Additionally, we introduce a rule-based assignment mechanism to dynamically select Calvin or OCC based on workload characteristics. Experimental results using TPC-C and YCSB benchmarks demonstrate that HDCC surpasses existing hybrid approaches by up to 3.1\texttimes.},
-  file = {E:\zotero\storage\UIIZSXL7\Hong 等 - 2025 - A Hybrid Approach to Integrating Deterministic and Non-Deterministic Concurrency Control in Database.pdf}
+  urldate = {2025-10-28}
 }

-@article{lewisAustralianGeoscienceData2017a,
-  title = {The {{Australian Geoscience Data Cube}} --- {{Foundations}} and Lessons Learned},
+@article{lewis2017australiana,
+  title = {The Australian Geoscience Data Cube --- Foundations and lessons learned},
  author = {Lewis, Adam and Oliver, Simon and Lymburner, Leo and Evans, Ben and Wyborn, Lesley and Mueller, Norman and Raevksi, Gregory and Hooke, Jeremy and Woodcock, Rob and Sixsmith, Joshua and Wu, Wenjun and Tan, Peter and Li, Fuqin and Killough, Brian and Minchin, Stuart and Roberts, Dale and Ayers, Damien and Bala, Biswajit and Dwyer, John and Dekker, Arnold and Dhu, Trevor and Hicks, Andrew and Ip, Alex and Purss, Matt and Richards, Clare and Sagar, Stephen and Trenham, Claire and Wang, Peter and Wang, Lan-Wei},
  year = 2017,
-  month = dec,
  journal = {Remote Sensing of Environment},
-  series = {Big {{Remotely Sensed Data}}: Tools, Applications and Experiences},
+  series = {Big Remotely Sensed Data: tools, applications and experiences},
  volume = {202},
  pages = {276--292},
  issn = {0034-4257},
  doi = {10.1016/j.rse.2017.03.015},
-  urldate = {2026-01-18},
-  abstract = {The Australian Geoscience Data Cube (AGDC) aims to realise the full potential of Earth observation data holdings by addressing the Big Data challenges of volume, velocity, and variety that otherwise limit the usefulness of Earth observation data. There have been several iterations and AGDC version 2 is a major advance on previous work. The foundations and core components of the AGDC are: (1) data preparation, including geometric and radiometric corrections to Earth observation data to produce standardised surface reflectance measurements that support time-series analysis, and collection management systems which track the provenance of each Data Cube product and formalise re-processing decisions; (2) the software environment used to manage and interact with the data; and (3) the supporting high performance computing environment provided by the Australian National Computational Infrastructure (NCI). A growing number of examples demonstrate that our data cube approach allows analysts to extract rich new information from Earth observation time series, including through new methods that draw on the full spatial and temporal coverage of the Earth observation archives. To enable easy-uptake of the AGDC, and to facilitate future cooperative development, our code is developed under an open-source, Apache License, Version 2.0. This open-source approach is enabling other organisations, including the Committee on Earth Observing Satellites (CEOS), to explore the use of similar data cubes in developing countries.},
-  keywords = {Australian Geoscience Data Cube,Big data,Collection management,Data cube,Geometric correction,High performance computing,High performance data,Landsat,Pixel quality,Time-series},
-  file = {E:\zotero\storage\9QEM6RZG\S0034425717301086.html}
+  urldate = {2026-01-18}
 }

-@article{MSTGIMultiscaleSpatiotemporal,
-  title = {{{MSTGI}}: A Multi-Scale Spatio-Temporal Grid Index Model for Remote-Sensing Big Data Retrieval},
-  shorttitle = {{{MSTGI}}},
+@article{mstgi,
+  title = {MSTGI: a multi-scale spatio-temporal grid index model for remote-sensing big data retrieval},
  journal = {Remote Sensing Letters},
-  urldate = {2025-05-13},
-  abstract = {To promote the transformation of remote sensing (RS) data into geoscience knowledge, it is necessary to provide better data discovery capabilities, especially when large amounts of RS data have bee...},
-  langid = {english},
-  file = {E\:\\zotero\\storage\\MNGVJEEV\\MSTGI a multi-scale spatio-temporal grid index mo.pdf;E\:\\zotero\\storage\\LHUZXQP8\\2150704X.2023.html}
+  urldate = {2025-05-13}
 }

-@article{preilGeneticMultiArmedBandits2025,
-  title = {Genetic {{Multi-Armed Bandits}}: {{A Reinforcement Learning Inspired Approach}} for {{Simulation Optimization}}},
-  shorttitle = {Genetic {{Multi-Armed Bandits}}},
+@article{preil2025genetic,
+  title = {Genetic Multi-Armed Bandits: A Reinforcement Learning Inspired Approach for Simulation Optimization},
  author = {Preil, Deniz and Krapp, Michael},
  year = 2025,
-  month = apr,
  journal = {IEEE Transactions on Evolutionary Computation},
  volume = {29},
  number = {2},
  pages = {360--374},
  issn = {1941-0026},
  doi = {10.1109/TEVC.2024.3524505},
-  urldate = {2025-11-03},
-  abstract = {Many real-world problems are inherently stochastic, complicating, or even precluding the use of analytical methods. These problems are often characterized by high dimensionality, large solution spaces, and numerous local optima, which make finding optimal solutions challenging. Therefore, simulation optimization is frequently employed. This article specifically focuses on the discrete case, also known as discrete optimization via simulation. Despite their adaptions for stochastic problems, previous evolutionary algorithms face a major limitation in these problems. They discard all information about solutions that are not involved in the most recent population. However, this is ineffective, as each simulation observation gathered over the course of iterations provides valuable information that should guide the selection of subsequent solutions. Inspired by the domain of reinforcement learning (RL), we propose a novel memory concept for evolutionary algorithms that ensures global convergence and significantly improves their finite time performance. Unlike previous evolutionary algorithms, our approach permanently preserves simulation observations to progressively improve the accuracy of sample means when revisiting solutions in later iterations. Moreover, the selection of new solutions is based on the entire memory rather than just the last population. The numerical experiments demonstrate that this novel approach, which combines a genetic algorithm (GA) with such memory, consistently outperforms popular convergent state-of-the-art benchmark algorithms in a large variety of established test problems while requiring considerably less computational effort. This marks the so-called genetic multi-armed bandit (MAB) as one of the currently most powerful algorithms for solving stochastic problems.},
-  keywords = {Complexity theory,Convergence,Evolutionary computation,Genetic algorithms,Genetic algorithms (GAs),Genetics,multi-armed bandits (MABs),Optimization,Program processors,Reinforcement learning,reinforcement learning (RL),Resource management,Search problems,simulation,simulation optimization},
-  file = {E:\zotero\storage\4NPKAMXS\Preil和Krapp - 2025 - Genetic Multi-Armed Bandits A Reinforcement Learning Inspired Approach for Simulation Optimization.pdf}
+  urldate = {2025-11-03}
 }

-@inproceedings{rajeshTunIOAIpoweredFramework2024,
-  title = {{{TunIO}}: {{An AI-powered Framework}} for {{Optimizing HPC I}}/{{O}}},
-  shorttitle = {{{TunIO}}},
-  booktitle = {2024 {{IEEE International Parallel}} and {{Distributed Processing Symposium}} ({{IPDPS}})},
+@inproceedings{rajesh2024tunio,
+  title = {TunIO: An AI-powered Framework for Optimizing HPC I/O},
+  booktitle = {2024 IEEE International Parallel and Distributed Processing Symposium (IPDPS)},
  author = {Rajesh, Neeraj and Bateman, Keith and Bez, Jean Luca and Byna, Suren and Kougkas, Anthony and Sun, Xian-He},
  year = 2024,
-  month = may,
  pages = {494--505},
  issn = {1530-2075},
  doi = {10.1109/IPDPS57955.2024.00050},
-  urldate = {2025-06-10},
-  abstract = {I/O operations are a known performance bottleneck of HPC applications. To achieve good performance, users often employ an iterative multistage tuning process to find an optimal I/O stack configuration. However, an I/O stack contains multiple layers, such as high-level I/O libraries, I/O middleware, and parallel file systems, and each layer has many parameters. These parameters and layers are entangled and influenced by each other. The tuning process is time-consuming and complex. In this work, we present TunIO, an AI-powered I/O tuning framework that implements several techniques to balance the tuning cost and performance gain, including tuning the high-impact parameters first. Furthermore, TunIO analyzes the application source code to extract its I/O kernel while retaining all statements necessary to perform I/O. It utilizes a smart selection of high-impact configuration parameters of the given tuning objective. Finally, it uses a novel Reinforcement Learning (RL)-driven early stopping mechanism to balance the cost and performance gain. Experimental results show that TunIO leads to a reduction of up to {$\approx$}73\% in tuning time while achieving the same performance gain when compared to H5Tuner. It achieves a significant performance gain/cost of 208.4 MBps/min (I/O bandwidth for each minute spent in tuning) over existing approaches under our testing.},
-  langid = {english},
-  keywords = {AI-powered I/O tuning,autotuning,Costs,I/O performance optimization,Kernel,Libraries,Middleware,Performance gain,Reinforcement learning,source code transformations,Source coding,storage stack tuning},
-  file = {E:\zotero\storage\WK48YMSE\Rajesh 等 - 2024 - TunIO An AI-powered Framework for Optimizing HPC .pdf}
+  urldate = {2025-06-10}
 }
--- a/国内外研究现状/1.时空数据查询与读取.md
+++ b/国内外研究现状/1.时空数据查询与读取.md
@@ -1,7 +1,3 @@
---
-bibliography: ../references.bib
---
+遥感数据的高效时空查询处理已经得到了广泛的研究，早期的工作主要集中在关系数据库系统中的元数据组织和索引级修剪。传统的方法通常扩展基于树的空间索引，如R-tree，四叉树及其时空变体，将图像足迹与时间属性组织在一起，并且通常在关系后端（例如MySQL和PostgreSQL）上实现。这些方法为中等规模的数据集提供了有效的距离过滤，但随着遥感元数据量的快速增长，它们对平衡树结构的依赖往往导致较高的维护开销和有限的可扩展性。随着数据量和数据摄取速度的不断增加，近年来的系统逐渐转向部署在分布式NoSQL存储上的基于网格的时空索引方案。通过使用GeoHash、GeoSOT或空间填充曲线[@mstgi],[@2024gridmesa]将空间足迹编码为统一的空间网格，并将其与时间标识符结合，这些方法可以实现轻量级索引构建，并在HBase和Elasticsearch等后端具有更好的水平可扩展性。这种基于网格的索引可以通过粗粒度的剪枝有效地减少候选搜索空间，更适合于大规模、持续增长的遥感档案。

-遥感数据的高效时空查询处理已经得到了广泛的研究，早期的工作主要集中在关系数据库系统中的元数据组织和索引级修剪。传统的方法通常扩展基于树的空间索引，如R-tree，四叉树及其时空变体，将图像足迹与时间属性组织在一起，并且通常在关系后端（例如MySQL和PostgreSQL）上实现。这些方法为中等规模的数据集提供了有效的距离过滤，但随着遥感元数据量的快速增长，它们对平衡树结构的依赖往往导致较高的维护开销和有限的可扩展性。随着数据量和数据摄取速度的不断增加，近年来的系统逐渐转向部署在分布式NoSQL存储上的基于网格的时空索引方案。通过使用GeoHash、GeoSOT或空间填充曲线[@GridMesaNoSQLbasedBig2024; @MSTGIMultiscaleSpatiotemporal]将空间足迹编码为统一的空间网格，并将其与时间标识符结合，这些方法可以实现轻量级索引构建，并在HBase和Elasticsearch等后端具有更好的水平可扩展性。这种基于网格的索引可以通过粗粒度的剪枝有效地减少候选搜索空间，更适合于大规模、持续增长的遥感档案。
-
-然而，对于遥感工作负载，单靠索引修剪不足以保证端到端的查询效率，因为遥感工作负载中的单个图像通常很大，查询结果需要进一步的像素级处理。为了减少原始I/O的数量，谷歌Earth系统[@gorelickGoogleEarthEngine2017]依赖于平铺和多分辨率金字塔，将图像物理地分成小块。而最近的解决方案则利用COG和基于窗口的I/O来实现对整体映像文件的部分读取。OpenDataCube[@lewisAustralianGeoscienceData2017a]等框架利用这些特性只读取与查询窗口相交的图像区域，从而减少不必要的数据传输。然而，在确定候选图像之后，大多数系统仍然对每个图像执行细粒度的地理空间计算，包括坐标转换和精确的像素窗口推导，当涉及许多图像时，这可能会产生大量开销。
+然而，对于遥感工作负载，单靠索引修剪不足以保证端到端的查询效率，因为遥感工作负载中的单个图像通常很大，查询结果需要进一步的像素级处理。为了减少原始I/O的数量，谷歌Earth系统[@gorelick2017google]依赖于平铺和多分辨率金字塔，将图像物理地分成小块。而最近的解决方案则利用COG和基于窗口的I/O来实现对整体映像文件的部分读取。OpenDataCube[@lewis2017australiana]等框架利用这些特性只读取与查询窗口相交的图像区域，从而减少不必要的数据传输。然而，在确定候选图像之后，大多数系统仍然对每个图像执行细粒度的地理空间计算，包括坐标转换和精确的像素窗口推导，当涉及许多图像时，这可能会产生大量开销。
--- a/绪论.md
+++ b/绪论.md
@@ -1,18 +1,15 @@
---
-bibliography: references.bib
---
-
 # 智能火星平台关键技术研究

 ## 1.1 选题背景

 ## 1.2 核心科学问题

-1.  时空数据查询与读取效率低： 现有的遥感数据管理系统，如OpenDataCube、GeoMesa，通常将时空范围查询分解为解耦的两阶段执行模型。第一个阶段是元数据查询阶段，它利用时空元数据（例如足迹、时间戳）来识别与查询谓词相交的候选图像文件。最近的进展已经从传统的基于树的索引转变为基于网格编码和空间填充曲线的可扩展分布式方案，如GeoHash、GeoSOT和GeoMesa。通过利用这些高维索引结构，第一阶段的搜索复杂度已经有效地降低到$O(\log N)$甚至$O(1)$，使得元数据发现即使对于十亿规模的数据集也非常高效。 第二阶段是数据提取阶段，系统从存储在分布式文件系统或对象存储中的已识别的原始图像文件中读取实际像素数据。在现代高性能遥感数据分析中，一个关键的观察是，主要的系统瓶颈已经从根本上从第一阶段转移到第二阶段。虽然元数据搜索在几毫秒内完成，但端到端查询延迟现在主要由获取、解压缩和处理大规模原始图像所需的大量I/O开销所主导。传统的系统试图通过预切片和构建金字塔来减少I/O开销（例如，谷歌Earth Engine中使用的方法，它将元数据存储在HBase中并提供预平铺图像金字塔），但是激进的平铺增加了管理的复杂性并产生了许多小文件。最近的云优化GeoTIFF （COG）格式和支持COG的框架利用内部概述和基于窗口的I/O，只读取在空间上与查询相交的文件部分。基于windows的I/O有效地减少了原始数据传输。然而，这些系统仍然需要细粒度的、逐图像的地理空间计算来映射查询坐标并确定精确的剪辑/马赛克边界。当处理大量的候选图像时，这样的操作在计算上变得令人望而却步。此外，并发工作负载下的I/O性能进一步降低，其中多个重叠的数据检索请求争夺有限的I/O带宽和存储吞吐量，使得传统的以索引为中心的优化不足以满足实时应用程序。
+1. 时空数据查询与读取效率低：
+现有的遥感数据管理系统，如OpenDataCube、GeoMesa，通常将时空范围查询分解为解耦的两阶段执行模型。第一个阶段是元数据查询阶段，它利用时空元数据（例如足迹、时间戳）来识别与查询谓词相交的候选图像文件。最近的进展已经从传统的基于树的索引转变为基于网格编码和空间填充曲线的可扩展分布式方案，如GeoHash、GeoSOT和GeoMesa。通过利用这些高维索引结构，第一阶段的搜索复杂度已经有效地降低到$O(\log N)$甚至$O(1)$，使得元数据发现即使对于十亿规模的数据集也非常高效。 第二阶段是数据提取阶段，系统从存储在分布式文件系统或对象存储中的已识别的原始图像文件中读取实际像素数据。在现代高性能遥感数据分析中，一个关键的观察是，主要的系统瓶颈已经从根本上从第一阶段转移到第二阶段。虽然元数据搜索在几毫秒内完成，但端到端查询延迟现在主要由获取、解压缩和处理大规模原始图像所需的大量I/O开销所主导。传统的系统试图通过预切片和构建金字塔来减少I/O开销（例如，谷歌Earth Engine中使用的方法，它将元数据存储在HBase中并提供预平铺图像金字塔），但是激进的平铺增加了管理的复杂性并产生了许多小文件。最近的云优化GeoTIFF （COG）格式和支持COG的框架利用内部概述和基于窗口的I/O，只读取在空间上与查询相交的文件部分。基于windows的I/O有效地减少了原始数据传输。然而，这些系统仍然需要细粒度的、逐图像的地理空间计算来映射查询坐标并确定精确的剪辑/马赛克边界。当处理大量的候选图像时，这样的操作在计算上变得令人望而却步。此外，并发工作负载下的I/O性能进一步降低，其中多个重叠的数据检索请求争夺有限的I/O带宽和存储吞吐量，使得传统的以索引为中心的优化不足以满足实时应用程序。

 ## 1.3 主要研究内容

-1.  大规模遥感影像数据并发时空范围查询的I/O高效方法：\
-    首先，我们提出了一种具有I/O意识的索引方法，以实现对大规模遥感图像的细粒度部分查询和读取避免昂贵的每次查询地理空间计算；通过修改事务级混合并发方法[@hongHybridApproachIntegrating2025]，将Calvin确定性并发控制与乐观并发控制相结合，提出了一种用于并发环境下时空范围查询的混合并发控制协议。此外，为了提高遥感数据时空范围查询的I/O性能，提出了一种自动I/O调优方法。该方法扩展了现有的基于代理辅助的遗传多臂老虎机算法[@preilGeneticMultiArmedBandits2025]的AI驱动I/O调优框架[@rajeshTunIOAIpoweredFramework2024]。
+1. 大规模遥感影像数据并发时空范围查询的I/O高效方法：
+首先，我们提出了一种具有I/O意识的索引方法，以实现对大规模遥感图像的细粒度部分查询和读取避免昂贵的每次查询地理空间计算；通过修改事务级混合并发方法[@hong2025hybrid]，将Calvin确定性并发控制与乐观并发控制相结合，提出了一种用于并发环境下时空范围查询的混合并发控制协议。此外，为了提高遥感数据时空范围查询的I/O性能，提出了一种自动I/O调优方法。该方法扩展了现有的基于代理辅助的遗传多臂老虎机算法[@preil2025genetic]的AI驱动I/O调优框架@rajesh2024tunio。

 ## 1.4 主要创新点和贡献