Skip to content

Commit

Permalink
Fixes to the archives checksumming performance (and correctness) (#1914)
Browse files Browse the repository at this point in the history
* Attempt to fix the performance degradation during the archives checksumming
* Report the game/map archives checksum acquisition time
* Try to scrape date available in ArchiveCache17.lua
  • Loading branch information
lhog authored Jan 24, 2025
1 parent 0d20f38 commit 3b024ff
Show file tree
Hide file tree
Showing 13 changed files with 85 additions and 74 deletions.
4 changes: 4 additions & 0 deletions rts/Game/PreGame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ void CPreGame::StartServer(const std::string& setupscript)
startGameSetup->LoadStartPositions();

{
const auto st = spring_gettime();
archiveScanner->ResetNumFilesHashed();
const std::string mapArchive = archiveScanner->ArchiveFromName(startGameSetup->mapName);
const auto mapChecksum = archiveScanner->GetArchiveCompleteChecksumBytes(mapArchive);
Expand All @@ -323,6 +324,9 @@ void CPreGame::StartServer(const std::string& setupscript)
sha512::dump_digest(modChecksum, modChecksumHex);

LOG("[PreGame::%s]\n\tmod-checksum=%s\n\tmap-checksum=%s", __func__, modChecksumHex.data(), mapChecksumHex.data());
LOG("[PreGame::%s] Game/Map archives checksum acquisition took = %ld microseconds", __func__, (spring_gettime() - connectTimer).toMilliSecsi());

archiveScanner->WriteCache();
}

good_fpu_control_registers("before CGameServer creation");
Expand Down
93 changes: 53 additions & 40 deletions rts/System/FileSystem/ArchiveScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "System/Exceptions.h"
#include "System/Threading/ThreadPool.h"
#include "System/FileSystem/RapidHandler.h"
#include "System/FileSystem/Archives/PoolArchive.h"
#include "System/Log/ILog.h"
#include "System/Threading/SpringThreading.h"
#include "System/UnorderedMap.hpp"
Expand Down Expand Up @@ -53,7 +54,7 @@ LOG_REGISTER_SECTION_GLOBAL(LOG_SECTION_ARCHIVESCANNER)
* but mapping them all, every time to make the list is)
*/

constexpr static int INTERNAL_VER = 17;
constexpr static int INTERNAL_VER = 18;


/*
Expand Down Expand Up @@ -611,20 +612,22 @@ void CArchiveScanner::ReadCache()
{
Clear();

const std::array<std::string, 2> cachePaths {
FileSystem::EnsurePathSepAtEnd(FileSystem::GetCacheDir() ) + IntToString(INTERNAL_VER, "ArchiveCache%i.lua"),
FileSystem::EnsurePathSepAtEnd(FileSystem::GetOldCacheDir()) + IntToString(INTERNAL_VER, "ArchiveCache%i.lua")
};

for (const auto& cachePath : cachePaths) {
if (ReadCacheData(cachePath)) {
break;
const auto oldCacheFile = FileSystem::EnsurePathSepAtEnd(FileSystem::GetCacheDir()) + IntToString(INTERNAL_VER - 1, "ArchiveCache%i.lua");
cacheFile = FileSystem::EnsurePathSepAtEnd(FileSystem::GetCacheDir()) + IntToString(INTERNAL_VER , "ArchiveCache%i.lua");

if (!FileSystem::FileExists(cacheFile)) {
// Try to save initial scanning of assets, but will have to redo hashing
// as the previous version had bugs in that area
if (ReadCacheData(oldCacheFile, true)) {
// nullify hashes
for (auto& ai : archiveInfos) {
memset(ai.checksum, 0, sizeof(ai.checksum));
isDirty = true;
}
}
}

// file to write to in WriteCache()
cacheFile = cachePaths.front();

ReadCacheData(GetFilepath());
ScanAllDirs();
}

Expand Down Expand Up @@ -953,18 +956,35 @@ bool CArchiveScanner::GetArchiveChecksum(const std::string& archiveName, Archive
if (ar == nullptr)
return false;

int numParallelFileReads = 1; // compressed archives have a mutex, can't read files in parallel
#ifdef _WIN32
static constexpr int NUM_PARALLEL_FILE_READS_SD = 4;
#else
// Linux FS even on spinning disk seems far more tollerant to parallel reads, use all threads
const int NUM_PARALLEL_FILE_READS_SD = ThreadPool::GetNumThreads();
#endif // _WIN32

if (ar->GetType() == ARCHIVE_TYPE_SDD) {
if (bool isOnSpinningDisk = FileSystem::IsPathOnSpinningDisk(archiveName); isOnSpinningDisk) {
// decrease spinning disk thrashing, read using one thread only
numParallelFileReads = 1;
} else {
numParallelFileReads = ThreadPool::GetNumThreads();
}
int numParallelFileReads;

switch (ar->GetType())
{
case ARCHIVE_TYPE_SDP: {
auto isOnSpinningDisk = FileSystem::IsPathOnSpinningDisk(CPoolArchive::GetPoolRootDirectory(archiveName));
// each file is one gzip instance, can MT
numParallelFileReads = isOnSpinningDisk ? NUM_PARALLEL_FILE_READS_SD : ThreadPool::GetNumThreads();
} break;
case ARCHIVE_TYPE_SDD: {
auto isOnSpinningDisk = FileSystem::IsPathOnSpinningDisk(archiveName);
// just a file, can MT
numParallelFileReads = isOnSpinningDisk ? NUM_PARALLEL_FILE_READS_SD : ThreadPool::GetNumThreads();
} break;
case ARCHIVE_TYPE_SDZ: [[fallthrough]]; // mutex locked, not thread safe, makes no sense to throw more threads on it
case ARCHIVE_TYPE_SD7: [[fallthrough]]; // mutex locked, not thread safe, makes no sense to throw more threads on it
default: // just default to 1 thread
numParallelFileReads = 1;
break;
}

numParallelFileReads = std::max(numParallelFileReads, 1);
numParallelFileReads = std::min(numParallelFileReads, ThreadPool::GetNumThreads());

// load ignore list, and insert all files to check in lowercase format
std::unique_ptr<IFileFilter> ignore(CreateIgnoreFilter(ar.get()));
Expand All @@ -979,14 +999,14 @@ bool CArchiveScanner::GetArchiveChecksum(const std::string& archiveName, Archive
fileNames.reserve(ar->NumFiles());
fileHashes.reserve(ar->NumFiles());

for (unsigned fid = 0; fid != ar->NumFiles(); ++fid) {
const std::pair<std::string, int>& info = ar->FileInfo(fid);
for (unsigned fid = 0; fid < ar->NumFiles(); ++fid) {
const auto& [filename, fileSize] = ar->FileInfo(fid);

if (ignore->Match(info.first))
if (ignore->Match(filename))
continue;

// create case-insensitive hashes
fileNames.push_back(StringToLower(info.first));
fileNames.push_back(StringToLower(filename));
fileHashes.emplace_back();
}

Expand All @@ -1000,20 +1020,14 @@ bool CArchiveScanner::GetArchiveChecksum(const std::string& archiveName, Archive

std::counting_semaphore sem(numParallelFileReads);

auto ComputeHashesTask = [&ar, &fileNames, &fileHashes, &sem, this](size_t fidx) -> void {
const auto& fileName = fileNames[fidx];
auto ComputeHashesTask = [&ar, &fileHashes, &sem, this](size_t fidx) -> void {
auto& fileHash = fileHashes[fidx];
auto& fileBuffer = fileBuffers[ThreadPool::GetThreadNum()];
fileBuffer.clear();

sem.acquire();
ar->GetFile(fileName, fileBuffer);
numFilesHashed.fetch_add(static_cast<uint32_t>(ar->CalcHash(fidx, fileHash.data(), fileBuffer)));
sem.release();

if (!fileBuffer.empty())
sha512::calc_digest(fileBuffer.data(), fileBuffer.size(), fileHash.data());

numFilesHashed.fetch_add(1);
};

for (size_t i = 0; i < fileNames.size(); ++i) {
Expand All @@ -1031,12 +1045,11 @@ bool CArchiveScanner::GetArchiveChecksum(const std::string& archiveName, Archive
}
#else
for_mt(0, fileNames.size(), [&](const int i) {
const auto& fileName = fileNames[i];
auto& fileHash = fileHashes[i];
auto ComputeHashesTask = [&ar, &fileName, &fileHash]() -> void {
ar->CalcHash(ar->FindFile(fileName), fileHash.data(), fileBuffers[ThreadPool::GetThreadNum()]);
auto& fileHash = fileHashes[i];
auto ComputeHashesTask = [&ar, &fileHash](int fidx) -> void {
ar->CalcHash(fidx, fileHash.data(), fileBuffers[ThreadPool::GetThreadNum()]);
};
ComputeHashesTask();
ComputeHashesTask(i);
});
#endif

Expand All @@ -1060,7 +1073,7 @@ bool CArchiveScanner::GetArchiveChecksum(const std::string& archiveName, Archive
}


bool CArchiveScanner::ReadCacheData(const std::string& filename)
bool CArchiveScanner::ReadCacheData(const std::string& filename, bool loadOldVersion)
{
std::lock_guard<decltype(scannerMutex)> lck(scannerMutex);
if (!FileSystem::FileExists(filename)) {
Expand All @@ -1080,7 +1093,7 @@ bool CArchiveScanner::ReadCacheData(const std::string& filename)

// Do not load old version caches
const int ver = archiveCacheTbl.GetInt("internalver", (INTERNAL_VER + 1));
if (ver != INTERNAL_VER)
if (ver != INTERNAL_VER && !loadOldVersion)
return false;

for (int i = 1; archivesTbl.KeyExists(i); ++i) {
Expand Down
5 changes: 3 additions & 2 deletions rts/System/FileSystem/ArchiveScanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ class CArchiveScanner
void Clear();
void Reload();

void WriteCache();

std::string ArchiveFromName(const std::string& versionedName) const;
std::string NameFromArchive(const std::string& archiveName) const;
std::string GameHumanNameFromArchive(const std::string& archiveName) const;
Expand Down Expand Up @@ -189,7 +191,6 @@ class CArchiveScanner

private:
void ReadCache();
void WriteCache();

ArchiveInfo& GetAddArchiveInfo(const std::string& lcfn);
BrokenArchive& GetAddBrokenArchive(const std::string& lcfn);
Expand All @@ -207,7 +208,7 @@ class CArchiveScanner
std::string SearchMapFile(const IArchive* ar, std::string& error);


bool ReadCacheData(const std::string& filename);
bool ReadCacheData(const std::string& filename, bool loadOldVersion = false);
void WriteCacheData(const std::string& filename);

IFileFilter* CreateIgnoreFilter(IArchive* ar);
Expand Down
4 changes: 0 additions & 4 deletions rts/System/FileSystem/Archives/BufferedArchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@

#include <cassert>

spring::mutex CBufferedArchive::archiveLock;


CBufferedArchive::~CBufferedArchive()
{
// filter archives for which only {map,mod}info.lua was accessed
Expand All @@ -21,7 +18,6 @@ CBufferedArchive::~CBufferedArchive()

bool CBufferedArchive::GetFile(unsigned int fid, std::vector<std::uint8_t>& buffer)
{
std::scoped_lock lck(archiveLock);
assert(IsFileId(fid));

int ret = 0;
Expand Down
6 changes: 0 additions & 6 deletions rts/System/FileSystem/Archives/BufferedArchive.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,6 @@ class CBufferedArchive : public IArchive

// indexed by file-id
std::vector<FileBuffer> fileCache;
// neither 7zip (.sd7) nor minizip (.sdz) are thread-safe
// zlib (used to extract pool archive .gz entries) should
// not need this, but currently each buffered GetFileImpl
// call is protected
static spring::mutex archiveLock;

private:
uint32_t cacheSize = 0;
uint32_t fileCount = 0;
Expand Down
14 changes: 11 additions & 3 deletions rts/System/FileSystem/Archives/PoolArchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,7 @@ CPoolArchive::CPoolArchive(const std::string& name): CBufferedArchive(name)
stats.reserve(1024);

// get pool dir from .sdp absolute path
assert(FileSystem::IsAbsolutePath(name));
poolRootDir = FileSystem::GetParent(FileSystem::GetDirectory(name));
assert(!poolRootDir.empty());
poolRootDir = GetPoolRootDirectory(name);

while (gz_really_read(in, &length, 1)) {
if (!gz_really_read(in, &c_name, length)) break;
Expand Down Expand Up @@ -123,6 +121,16 @@ CPoolArchive::~CPoolArchive()
}
}

std::string CPoolArchive::GetPoolRootDirectory(const std::string& sdpName)
{
// get pool dir from .sdp absolute path
assert(FileSystem::IsAbsolutePath(sdpName));
std::string poolRootDir = FileSystem::GetParent(FileSystem::GetDirectory(sdpName));
assert(!poolRootDir.empty());

return poolRootDir;
}

int CPoolArchive::GetFileImpl(unsigned int fid, std::vector<std::uint8_t>& buffer)
{
assert(IsFileId(fid));
Expand Down
2 changes: 1 addition & 1 deletion rts/System/FileSystem/Archives/PoolArchive.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class CPoolArchive : public CBufferedArchive
memcpy(hash, fd.shasum.data(), sha512::SHA_LEN);
return (memcmp(fd.shasum.data(), dummyFileHash.data(), sizeof(fd.shasum)) != 0);
}

static std::string GetPoolRootDirectory(const std::string& sdpName);
protected:
int GetFileImpl(unsigned int fid, std::vector<std::uint8_t>& buffer) override;

Expand Down
6 changes: 3 additions & 3 deletions rts/System/FileSystem/Archives/SevenZipArchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ CSevenZipArchive::CSevenZipArchive(const std::string& name)
, allocImp({SzAlloc, SzFree})
, allocTempImp({SzAllocTemp, SzFreeTemp})
{
std::lock_guard<spring::mutex> lck(archiveLock);
std::scoped_lock lck(archiveLock);

constexpr const size_t kInputBufSize = (size_t)1 << 18;

Expand Down Expand Up @@ -179,7 +179,7 @@ CSevenZipArchive::CSevenZipArchive(const std::string& name)

CSevenZipArchive::~CSevenZipArchive()
{
std::lock_guard<spring::mutex> lck(archiveLock);
std::scoped_lock lck(archiveLock);

if (outBuffer != nullptr) {
IAlloc_Free(&allocImp, outBuffer);
Expand All @@ -193,7 +193,7 @@ CSevenZipArchive::~CSevenZipArchive()

int CSevenZipArchive::GetFileImpl(unsigned int fid, std::vector<std::uint8_t>& buffer)
{
// assert(archiveLock.locked());
std::scoped_lock lck(archiveLock);
assert(IsFileId(fid));

size_t offset = 0;
Expand Down
2 changes: 2 additions & 0 deletions rts/System/FileSystem/Archives/SevenZipArchive.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ class CSevenZipArchive : public CBufferedArchive
void FileInfo(unsigned int fid, std::string& name, int& size) const override;

private:
static inline spring::mutex archiveLock;

// actual data is in BufferedArchive
struct FileEntry {
int fp;
Expand Down
6 changes: 4 additions & 2 deletions rts/System/FileSystem/Archives/ZipArchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ IArchive* CZipArchiveFactory::DoCreateArchive(const std::string& filePath) const

CZipArchive::CZipArchive(const std::string& archiveName): CBufferedArchive(archiveName)
{
std::lock_guard<spring::mutex> lck(archiveLock);
std::scoped_lock lck(archiveLock);

if ((zip = unzOpen(archiveName.c_str())) == nullptr) {
LOG_L(L_ERROR, "[%s] error opening \"%s\"", __func__, archiveName.c_str());
Expand Down Expand Up @@ -63,7 +63,7 @@ CZipArchive::CZipArchive(const std::string& archiveName): CBufferedArchive(archi

CZipArchive::~CZipArchive()
{
std::lock_guard<spring::mutex> lck(archiveLock);
std::scoped_lock lck(archiveLock);

if (zip != nullptr) {
unzClose(zip);
Expand All @@ -86,6 +86,8 @@ void CZipArchive::FileInfo(unsigned int fid, std::string& name, int& size) const
// than one file at a time
int CZipArchive::GetFileImpl(unsigned int fid, std::vector<std::uint8_t>& buffer)
{
std::scoped_lock lck(archiveLock);

// Prevent opening files on missing/invalid archives
if (zip == nullptr)
return -4;
Expand Down
4 changes: 3 additions & 1 deletion rts/System/FileSystem/Archives/ZipArchive.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ class CZipArchive : public CBufferedArchive
}
#endif

protected:
private:
static inline spring::mutex archiveLock;

unzFile zip;

// actual data is in BufferedArchive
Expand Down
12 changes: 1 addition & 11 deletions rts/System/FileSystem/FileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,14 +267,4 @@ const std::string& FileSystem::GetCacheDir()
{
static const std::string cacheDir = EnsureNoPathSepAtEnd(GetCacheBaseDir());
return cacheDir;
}

const std::string& FileSystem::GetOldCacheDir()
{
// old code
static const std::string cacheType[2] = { "dev-", "rel-" };
static const std::string cacheVersion = SpringVersion::GetMajor() + cacheType[SpringVersion::IsRelease()] + SpringVersion::GetBranch();
static const std::string cacheDir = EnsurePathSepAtEnd(GetCacheBaseDir()) + cacheVersion;
return cacheDir;
}

}
1 change: 0 additions & 1 deletion rts/System/FileSystem/FileSystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ class FileSystem : public FileSystemAbstraction

static const std::string& GetCacheBaseDir();
static const std::string& GetCacheDir();
static const std::string& GetOldCacheDir();
};

#endif // !FILE_SYSTEM_H

0 comments on commit 3b024ff

Please sign in to comment.