Files
secondo/Algebras/CDACSpatialJoin/CacheTest.cpp
2026-01-23 17:03:45 +08:00

595 lines
22 KiB
C++

/*
----
This file is part of SECONDO.
Copyright (C) 2019,
Faculty of Mathematics and Computer Science,
Database Systems for New Applications.
SECONDO is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
SECONDO is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with SECONDO; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
----
//[<] [\ensuremath{<}]
//[>] [\ensuremath{>}]
\setcounter{tocdepth}{2}
\tableofcontents
1 CacheTest Operator
1.1 Imports
*/
#include <iostream>
#include <ostream>
#include <random>
#include "CacheTest.h"
#include "Utils.h"
#include "QueryProcessor.h" // -> AlgebraManager.h -> NestedList.h
#include "Symbols.h"
#include "StandardTypes.h"
#include "ListUtils.h"
using namespace cdacspatialjoin;
using namespace std;
/*
1.2 Class OperatorInfo
A subclass of class ~OperatorInfo~ is defined with information on the operator.
*/
class CacheTest::Info : public OperatorInfo {
public:
Info() {
name = "cacheTest";
signature = "int -> bool";
syntax = "cacheTest (_)";
meaning = "Tests the speed and cooperation of the available caches and "
"the main memory by performing both sequential and random read "
"accesses to array scopes of different sizes. The intensity "
"parameter should be 128 or higher for results to be "
"significant. Test duration is a few seconds * intensity.";
example = "query cacheTest(128);";
}
};
std::shared_ptr<Operator> CacheTest::getOperator() {
return std::make_shared<Operator>(
Info(),
&CacheTest::valueMapping,
&CacheTest::typeMapping);
}
/*
1.3 Type Mapping
*/
ListExpr CacheTest::typeMapping(ListExpr args) {
// check the number of arguments
if (nl->ListLength(args) != 1)
return listutils::typeError("One argument expected.");
// check the type of argument 1
const ListExpr intensity = nl->First(args);
if (!CcInt::checkType(intensity))
return listutils::typeError("argument 1: int expected");
return nl->SymbolAtom(CcBool::BasicType());
}
/*
1.4 Value Mapping
*/
int CacheTest::valueMapping(
Word* args, Word& result, int message, Word& local, Supplier s) {
auto intensity = static_cast<size_t>(
static_cast<CcInt*>(args[0].addr)->GetValue());
#ifdef TIMER_USES_PAPI
// Timer::testPAPIOverhead(cout);
#endif
// report the available caches
CacheInfos::report(cout);
// test sequential and random access
CacheTestLocalInfo::ACCESS_TYPE accessTypes[] {
CacheTestLocalInfo::ACCESS_TYPE::sequentialAccess,
CacheTestLocalInfo::ACCESS_TYPE::reverseSequentialAccess,
CacheTestLocalInfo::ACCESS_TYPE::sequentialTwoLists,
CacheTestLocalInfo::ACCESS_TYPE::randomAccess };
for (CacheTestLocalInfo::ACCESS_TYPE accessType : accessTypes) {
CacheTestLocalInfo li {cout, accessType, intensity};
li.test(cout);
}
// print warning if intensity is small (small value may of course be used
// for purposes of testing the operator)
if (intensity < CacheTestLocalInfo::RECOMMENDED_INTENSITY) {
cout << endl << "##### please use intensity = "
<< CacheTestLocalInfo::RECOMMENDED_INTENSITY << " or higher "
<< "for significant results #####" << endl;
}
cout << endl;
qp->ResultStorage<CcBool>(result, s).Set(true, true);
return 0;
}
// ========================================================
/*
1.5 CacheTestLocalInfo class
*/
CacheTestLocalInfo::CacheTestLocalInfo(ostream& out,
const ACCESS_TYPE accessType_,
const size_t intensity_ /* = 128 */) :
accessType(accessType_),
testCount(intensity_ == 0 ? 1 : NORMAL_TEST_COUNT),
intensity(intensity_ == 0 ? 1 : intensity_) {
// print cache test title
out << setfill(' ') << endl;
switch (accessType) {
case sequentialAccess:
out << "Cache test with sequential read access:" << endl;
break;
case reverseSequentialAccess:
out << "Cache test with reverse sequential read access:" << endl;
break;
case sequentialTwoLists:
out << "Cache test with sequential read access on two lists:" << endl;
break;
case randomAccess:
out << "Cache test with random read access:" << endl;
break;
default:
assert (false); // unexpected type
break;
}
// since random number generation takes a lot of time, each random array
// entry is only accessed "intensity / randomDenominator" times (on average),
// rather than "intensity" times:
static_assert (1024 % randomDenom == 0,
"randomDenom must be a factor of 1024");
// retrieve the size of the largest cache (e.g. 6 MB L3 cache); in case
// getCacheInfo() does not work on this system, use 8 MB as a standard value
size_t maxCacheSize = 8 * 1024 * 1024;
maxCacheLevel = 0;
CacheInfoPtr cacheInfo = nullptr;
for (unsigned level = 5; level > 0; --level) {
cacheInfo = CacheInfos::getCacheInfo(CacheType::Data, level);
if (cacheInfo) {
maxCacheSize = cacheInfo->sizeInBytes;
maxCacheLevel = level;
break;
}
}
// create a "data" array of integer values that is at least 8 times larger
// than the largest available cache
size_t dataByteCount = 1024 * 1024; // at least 1 MiB
while (dataByteCount < maxCacheSize)
dataByteCount *= 2;
dataByteCount *= 8;
dataCount = dataByteCount / ENTRY_BYTE_COUNT;
data = new entryType[dataCount];
// create another array large enough to "clear" all caches when starting a
// test (so cache content from a previous test does not influence the test)
overwriteDataCount = maxCacheSize / ENTRY_BYTE_COUNT;
overwriteData = new entryType[overwriteDataCount];
for (size_t i = 0; i < overwriteDataCount; ++i)
overwriteData[i] = i;
// explain the tests to be performed
out << "- using data array of " << formatInt(dataCount) << " entries"
<< " * " << formatInt(ENTRY_BYTE_COUNT) << " bytes = "
<< formatInt(ENTRY_BYTE_COUNT * dataCount / (1024 * 1024)) << " MiB"
<< endl;
switch (accessType) {
case sequentialAccess: {
out << "- sequentially reading scopes of different sizes, "
<< "repeating each scope " << intensity << " times" << endl;
out << "- e.g., array entries 0..1023 (scope size 8 KiB) are read "
<< intensity << " times, then entries 1024..2047 etc." << endl;
break;
}
case reverseSequentialAccess: {
out << "- reading scopes of different sizes in reverse sequential "
<< "order, repeating each scope " << intensity << " times" << endl;
out << "- e.g., array entries 0..1023 (scope size 8 KiB) are read "
<< intensity << " times, then entries 1024..2047 etc." << endl;
break;
}
case sequentialTwoLists: {
out << "- for different scope sizes, two scopes are randomly selected "
<< "and then scanned " << intensity << " times, alternately "
<< "reading entries from scope 1 and 2" << endl;
out << "- e.g., a read sequence is entries 0, 8192, 1, 8193, 2, 8194, "
<< "..., 1023, 9215 for scope size 8 KiB." << endl;
break;
}
case randomAccess: {
out << "- reading random entries from scopes of different sizes; "
<< "on average, each entry is read "
<< intensity / (double)randomDenom << " times" << endl;
out << "- e.g., " << intensity * 1024 / (double)randomDenom << " "
<< "random entries are read from array scope 0..1023 "
<< "(scope size 8 KiB), then random entries from scope 1024..2047 "
<< "etc." << endl;
break;
}
default:
assert (false); // unexpected type
break;
}
if (testCount > 1) {
out << "- each test is performed " << testCount << " times, "
<< "average results are reported" << endl;
}
out << "- horizontal separators show into which cache level the different "
<< "scope sizes fit" << endl;
out << endl;
// print the table header
out << " scope size | read access duration |";
#ifdef TIMER_USES_PAPI
out << " L1-I Misses | L1-Data Misses | L2 Misses | L3 Misses |";
#endif
out << " test avg - loops only = access only" << endl;
}
CacheTestLocalInfo::~CacheTestLocalInfo() {
delete[] data;
delete[] overwriteData;
}
void CacheTestLocalInfo::test(ostream& out) {
// initialize the random number generators with a constant seed to ensure
// reproducibility of the test
unsigned long RND_SEED = 1;
// check sums are used to ensure the compiler does not "optimize away" the
// array accesses (which are without effect otherwise)
size_t sum1 = 0;
size_t sum2 = 0;
// start with a scope size of 1 KiB, then double it in each loop
size_t scopeSizeKiB = 1;
unsigned cacheLevel = 1; // will be increased if scopes exceed cache size
bool printCacheLevel = true;
size_t dataByteCount = dataCount * ENTRY_BYTE_COUNT;
Timer timer { TASK_COUNT };
// loop over scope sizes
while (dataByteCount % (scopeSizeKiB * 1024) == 0) {
// omit last test for accessType sequentialTwoLists (as the scope size
// is now the full list, but two different lists should be scanned)
size_t scopeCount = dataByteCount / (scopeSizeKiB * 1024);
if (accessType == sequentialTwoLists && scopeCount == 1)
break;
// perform test
timer.reset();
testScope(scopeSizeKiB, RND_SEED, sum1, sum2, timer);
// determine whether the size of the previous cache level has been
// exceeded by the scope size in this test
CacheInfoPtr cacheInfo =
CacheInfos::getCacheInfo(CacheType::Data, cacheLevel);
if (cacheInfo && (scopeSizeKiB > cacheInfo->getSizeInKiB())) {
++cacheLevel;
printCacheLevel = true;
}
// report test results
reportTest(out, scopeSizeKiB, cacheLevel, printCacheLevel, timer);
printCacheLevel = false;
// increase scope size for the next test
scopeSizeKiB *= 2;
}
// report check sums. This ensures that the compiler does not
// "optimize away" sum1 and sum2 and therefore should not be deleted
out << "(check-sums: " << formatInt(sum1) << ", " << formatInt(sum2)
<< ")" << endl;
}
void CacheTestLocalInfo::testScope(const size_t scopeSizeKiB,
const unsigned long rndSeed,
size_t& sum1, size_t& sum2, Timer& timer) {
const size_t entriesPerScope = scopeSizeKiB * 1024 / ENTRY_BYTE_COUNT;
const size_t scopeCount = dataCount / entriesPerScope;
const size_t scopeCountHalf = scopeCount / 2;
const size_t iterationsPerScope = (accessType == randomAccess) ?
entriesPerScope * intensity / randomDenom :
entriesPerScope * intensity;
assert (entriesPerScope > 0);
assert (scopeCount * entriesPerScope == dataCount);
if (accessType == sequentialTwoLists)
assert (scopeCount % 2 == 0);
// create a random sequence of scopes
std::mt19937 rndGenerator(rndSeed);
std::uniform_int_distribution<size_t> randomScope(0, scopeCount - 1);
auto randomScopeStart = new size_t[scopeCount];
if (accessType == reverseSequentialAccess) {
// start at the last entry of a scope
for (size_t i = 0; i < scopeCount; ++i)
randomScopeStart[i] = (i + 1) * entriesPerScope - 1;
} else {
// start at the first entry of a scope
for (size_t i = 0; i < scopeCount; ++i)
randomScopeStart[i] = i * entriesPerScope;
}
for (size_t i = 0; i < scopeCount; ++i){
size_t j = randomScope(rndGenerator);
std::swap(randomScopeStart[i], randomScopeStart[j]);
}
// prepare data[] by writing into each entry the index of the next entry
// that must be visited within the scope
if (accessType == sequentialAccess || accessType == sequentialTwoLists) {
createSequentialCycles(scopeCount, entriesPerScope);
} else if (accessType == reverseSequentialAccess) {
createReverseSequentialCycles(scopeCount, entriesPerScope);
} else if (accessType == randomAccess) {
createRandomCycles(scopeCount, entriesPerScope, rndSeed);
} else {
assert (false); // unexpected accessType
}
// perform the test (testCount) times
for (unsigned test = 0; test < testCount; ++test) {
// clear all caches from data from the last test
overwriteCaches(sum2);
// perform the actual test; the use of locality depends on the scope
// size ("entriesPerScope")
timer.start(CacheTestTask::fullTest);
if (accessType == sequentialAccess ||
accessType == reverseSequentialAccess ||
accessType == randomAccess) {
// iterate over the scopes of the given size
for (size_t scope = 0; scope < scopeCount; ++scope) {
// the start index is the first index of this scope
size_t index = randomScopeStart[scope]; // scope * entriesPerScope;
// access the scope's entries (intensity) times
for (size_t entry = 0; entry < iterationsPerScope; ++entry) {
index = data[index];
}
// ensure that the loop is not "optimized away"
sum1 += index;
}
} else if (accessType == sequentialTwoLists) {
// the outer loop uses only half the scopeCount as two scopes will be
// accessed each time
for (size_t scope = 0; scope < scopeCountHalf; ++scope) {
// randomly select two different scopes
size_t index1 = randomScopeStart[scope];
size_t index2 = randomScopeStart[scopeCountHalf + scope];
// alternately access the entries of the scopes (intensity) times
for (size_t entry = 0; entry < iterationsPerScope; ++entry) {
index1 = data[index1];
index2 = data[index2];
}
// ensure that the loop is not "optimized away"
sum1 += index1 + index2;
}
} else {
assert (false); // unexpected accessType
}
timer.stop();
// measure the time used for loops and random number generation only
// (without data access) to subtract it from the first duration
timer.start(CacheTestTask::loopTest);
if (accessType == sequentialAccess ||
accessType == reverseSequentialAccess ||
accessType == randomAccess) {
// increment sum2 using the same loop ranges as above
for (size_t scope = 0; scope < scopeCount; ++scope) {
size_t index = randomScopeStart[scope];
for (size_t entry = 0; entry < iterationsPerScope; ++entry) {
++index;
}
// ensure that the loop is not "optimized away"
sum2 += index;
}
} else if (accessType == sequentialTwoLists) {
// increment sum2 using the same loop ranges and scopes as above
for (size_t scope = 0; scope < scopeCountHalf; ++scope) {
size_t index1 = randomScopeStart[scope];
size_t index2 = randomScopeStart[scopeCountHalf + scope];
for (size_t entry = 0; entry < iterationsPerScope; ++entry) {
++index1;
++index2;
}
// ensure that the loop is not "optimized away"
sum2 += index1 + index2;
}
} else {
assert (false); // unexpected accessType
}
timer.stop();
}
delete[] randomScopeStart;
}
void CacheTestLocalInfo::createSequentialCycles(const size_t scopeCount,
const size_t entriesPerScope) const {
// iterate over the scopes of the given size
for (size_t scope = 0; scope < scopeCount; ++scope) {
size_t offset = scope * entriesPerScope;
// set each data entry to the index of the next entry
size_t loopEnd = offset + entriesPerScope;
for (size_t entry = offset; entry < loopEnd; ++entry)
data[entry] = entry + 1;
// set last entry in this scope to the index of the first in scope
data[loopEnd - 1] = offset;
}
}
void CacheTestLocalInfo::createReverseSequentialCycles(const size_t scopeCount,
const size_t entriesPerScope) const {
// iterate over the scopes of the given size
for (size_t scope = 0; scope < scopeCount; ++scope) {
size_t offset = scope * entriesPerScope;
size_t loopEnd = offset + entriesPerScope;
// set first entry in this scope to the index of the last in scope
data[offset] = loopEnd - 1;
// set each data entry to the index of the next entry
for (size_t entry = offset + 1; entry < loopEnd; ++entry)
data[entry] = entry - 1;
}
}
void CacheTestLocalInfo::createRandomCycles(size_t scopeCount,
size_t entriesPerScope, const unsigned long rndSeed) const {
// initialize the random number generator
std::mt19937 rndGenerator(rndSeed);
// use auxiliary array
auto aux = new entryType[entriesPerScope];
// iterate over the scopes of the given size
for (size_t scope = 0; scope < scopeCount; ++scope) {
const size_t start = scope * entriesPerScope;
// fill aux array with entry indices of this scope,
// omitting first entry (e.g., aux = { 1025, 1026, ... 2047 })
size_t auxSize = entriesPerScope - 1;
for (size_t i = 0; i < auxSize; ++i)
aux[i] = start + i + 1;
// fill data with a random sequence of indices in this scope that
// form a single cycle (so, by following this cycle, all entries in
// this scope are being visited)
size_t entry = start;
while (auxSize > 0) {
// get random aux index
std::uniform_int_distribution<size_t> randomAux(0, auxSize - 1);
const size_t auxIndex = randomAux(rndGenerator);
const size_t nextEntry = aux[auxIndex];
data[entry] = nextEntry;
entry = nextEntry;
// remove entry from aux (replacing it with the last aux entry)
aux[auxIndex] = aux[--auxSize];
}
// set last entry in this sequence to the index of the first in scope
data[entry] = start;
}
/*
// test the sequence
for (size_t entry = 0; entry < entriesPerScope; ++entry)
aux[entry] = 0;
size_t index = 0;
for (size_t entry = 0; entry < entriesPerScope; ++entry) {
++aux[index];
index = data[index];
}
for (size_t entry = 0; entry < entriesPerScope; ++entry)
assert (aux[entry] == 1);
*/
delete[] aux;
}
void CacheTestLocalInfo::overwriteCaches(size_t& sum) {
// clear all caches by sequentially reading the overwriteData
// which is large enough to fill the largest cache on this machine
size_t count = overwriteDataCount;
for (size_t i = 0; i < count; ++i)
sum += overwriteData[i];
for (size_t i = 0; i < count; ++i)
sum -= overwriteData[i];
}
void CacheTestLocalInfo::reportTest(ostream& out, const size_t scopeSizeKiB,
const unsigned cacheLevel, const bool printCacheLevel,
Timer& timer) const {
if (printCacheLevel) {
// print horizontal separator
out << "-----------------+------------------------+";
#ifdef TIMER_USES_PAPI
out << "-------------+----------------+---------------+---------------+";
#endif
out << "-------------------------------------" << endl;
// report which cache level the current scope size fits into
if (cacheLevel <= maxCacheLevel)
out << "L" << cacheLevel << " "; // e.g., "L2 " for L2 data cache
else
out << "RAM";
} else {
out << " ";
}
// determine the number of read access operations performed
double accessCount = dataCount * intensity;
if (accessType == randomAccess) {
accessCount /= static_cast<double>(randomDenom);
} // otherwise, keep accessCount
// get average duration of the test (the timer keeps track of the number of
// task calls (testCount) and can therefore provide the average values)
const Task* fullTest = timer.getTask(CacheTestTask::fullTest);
const Task* loopTest = timer.getTask(CacheTestTask::loopTest);
const clock_t fullTestTime = fullTest->getAvgTime();
const clock_t loopTestTime = loopTest->getAvgTime();
const clock_t arrayAccessTime = fullTestTime - loopTestTime;
const auto arrayAccessTimePer1E9 = static_cast<clock_t>(
arrayAccessTime * 1.0E9 / accessCount);
// report test result (i.e. one line of the result table)
// note that 1E09 = German "Milliarde" = English "billion" (used here)
// but 1E12 = German "Billion" = English "trillion" (not used here)
out << setw(9) << formatInt(scopeSizeKiB) << " KiB |"
<< setw(11) << formatMillis(arrayAccessTimePer1E9) << " per billion |";
#ifdef TIMER_USES_PAPI
out << setw(12) << formatInt(fullTest->getAvgL1InstrCacheMisses()) << " |"
<< setw(15) << formatInt(fullTest->getAvgL1DataCacheMisses()) << " |"
<< setw(14) << formatInt(fullTest->getAvgL2CacheMisses()) << " |"
<< setw(14) << formatInt(fullTest->getAvgL3CacheMisses()) << " |";
#endif
out << setw(10) << formatMillis(fullTestTime) << " - "
<< setw(10) << formatMillis(loopTestTime) << " = "
<< setw(10) << formatMillis(arrayAccessTime) << endl;
}