Skip to content

Commit

Permalink
Modify scraper code to accommodate all cpid total credit computation …
Browse files Browse the repository at this point in the history
…(part 1)

This commit modifies the file manifest side of the scraper to accomodate the
calculation and storage of all cpid total credit for each project. This
is the total credit computed across the entire project without regard to
whether the cpids are active or not, and is required for automatic greylisting
rules.
  • Loading branch information
jamescowens committed Jan 8, 2025
1 parent 9ec2140 commit 946595c
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 42 deletions.
85 changes: 43 additions & 42 deletions src/gridcoin/scraper/scraper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ bool MarkScraperFileManifestEntryNonCurrent(ScraperFileManifestEntry& entry);
* @param all_cpid_total_credit
*/
void AlignScraperFileManifestEntries(const fs::path& file, const std::string& filetype, const std::string& sProject,
const bool& excludefromcsmanifest, const double& all_cpid_total_credit);
const bool& excludefromcsmanifest, const double& all_cpid_total_credit, const bool& no_records);
/**
* @brief Constructs the scraper statistics from the current state of the scraper, which is all of the in scope files at the
* time the function is called
Expand Down Expand Up @@ -2113,7 +2113,7 @@ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist)
}

// Save host xml files to file manifest map with exclude from CSManifest flag set to true.
AlignScraperFileManifestEntries(host_file, "host", prjs.m_name, true, 0);
AlignScraperFileManifestEntries(host_file, "host", prjs.m_name, true, 0, false);
}

return true;
Expand Down Expand Up @@ -2287,7 +2287,7 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist)
// If in explorer mode and new file downloaded, save team xml files to file manifest map with exclude from CSManifest
// flag set to true. If not in explorer mode, this is not necessary, because the team xml file is just temporary and
// can be discarded after processing.
if (explorer_mode() && bDownloadFlag) AlignScraperFileManifestEntries(team_file, "team", prjs.m_name, true, 0);
if (explorer_mode() && bDownloadFlag) AlignScraperFileManifestEntries(team_file, "team", prjs.m_name, true, 0, false);

// If require team whitelist is set and bETagChanged is true, then process the file. This also populates/updated the
// team whitelist TeamIDs in the TeamIDMap and the ETag entries in the ProjTeamETags map.
Expand Down Expand Up @@ -2571,7 +2571,7 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist)

// If in explorer mode, save user (rac) source xml files to file manifest map with exclude from CSManifest flag set
// to true.
if (explorer_mode()) AlignScraperFileManifestEntries(rac_file, "user_source", prjs.m_name, true, all_cpid_total_credit);
if (explorer_mode()) AlignScraperFileManifestEntries(rac_file, "user_source", prjs.m_name, true, all_cpid_total_credit, false);
} // for prjs : projectWhitelist

// Get the global verified beacons and copy the incoming verified beacons from the
Expand Down Expand Up @@ -2624,8 +2624,8 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil
auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; };
auto require_team_whitelist_membership = []() { LOCK(cs_ScraperGlobals); return REQUIRE_TEAM_WHITELIST_MEMBERSHIP; };

// Set fileerror flag to true until made false by the completion of one successful injection of user stats into stream.
bool bfileerror = true;
// Set no_records to true until made false by the completion of one successful injection of user stats into stream.
bool no_records = true;

// If passed an empty file, immediately return false.
if (file.string().empty())
Expand Down Expand Up @@ -2735,6 +2735,8 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil
continue;
}

all_cpid_total_credit += cpid_total_credit;

// We do NOT want to add a just verified CPID to the statistics this iteration, if it was
// not already active, because we may be halfway through processing the set of projects.
// Instead, add to the incoming verification map (above), which will be handled in the
Expand Down Expand Up @@ -2785,35 +2787,15 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil
<< std::endl;

// If we get here at least once then there is at least one CPID being put in the file.
// So set the bfileerror flag to false.
bfileerror = false;
// So set the no_records flag to false.
no_records = false;
}
else
{
builder.append(line);
}
}

if (bfileerror)
{
_log(logattribute::WARNING, "ProcessProjectRacFileByCPID", "Data processing of " + file.string()
+ " yielded no CPIDs with stats; file may have been truncated. Removing source file.");

ingzfile.close();
outgzfile.flush();
outgzfile.close();

// Remove the source file because it was bad. (Probable incomplete download.)
if (fs::exists(file))
fs::remove(file);

// Remove the errored out processed file.
if (fs::exists(gzetagfile))
fs::remove(gzetagfile);

return false;
}

_log(logattribute::INFO, "ProcessProjectRacFileByCPID", "Finished processing " + file.string());

ingzfile.close();
Expand Down Expand Up @@ -2849,7 +2831,7 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil

// Here, regardless of explorer mode, save processed rac files to file manifest map with exclude from CSManifest flag
// set to false.
AlignScraperFileManifestEntries(gzetagfile, "user", project, false, all_cpid_total_credit);
AlignScraperFileManifestEntries(gzetagfile, "user", project, false, all_cpid_total_credit, no_records);

_log(logattribute::INFO, "ProcessProjectRacFileByCPID", "Complete Process");

Expand Down Expand Up @@ -3236,7 +3218,7 @@ EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest)

void AlignScraperFileManifestEntries(const fs::path& file, const std::string& filetype,
const std::string& sProject, const bool& excludefromcsmanifest,
const double& all_cpid_total_credit)
const double& all_cpid_total_credit, const bool& no_records)
{
ScraperFileManifestEntry NewRecord;

Expand All @@ -3254,6 +3236,7 @@ void AlignScraperFileManifestEntries(const fs::path& file, const std::string& fi
NewRecord.excludefromcsmanifest = excludefromcsmanifest;
NewRecord.filetype = filetype;
NewRecord.all_cpid_total_credit = all_cpid_total_credit;
NewRecord.no_records = no_records;

// Code block to lock StructScraperFileManifest during record insertion and delete because we want this atomic.
{
Expand Down Expand Up @@ -3398,7 +3381,7 @@ bool LoadScraperFileManifest(const fs::path& file)
}

// This handles startup with legacy manifest file without the all_cpid_total_credit column.
if (vline.size() >= 8) {
if (vline.size() >= 9) {
// In scraper for superblock v3 and autogreylist, we have to record total credit across all cpids, regardless
// of whether they are active beaconholders to support auto greylisting.

Expand All @@ -3412,10 +3395,23 @@ bool LoadScraperFileManifest(const fs::path& file)
}

LoadEntry.all_cpid_total_credit = all_cpid_total_credit;

uint32_t uint32_no_records = 0;

if (!ParseUInt32(vline[8], &uint32_no_records)) {
// This shouldn't happen given the conditional above, but to be thorough...
_log(logattribute::ERR, __func__, "The \"no_records\" field not parsed correctly for a manifest "
"entry. Skipping.");
continue;
}

LoadEntry.no_records = (bool) uint32_no_records;

} else {
// The default if the field is not there is user. (Because scraper ver 1 all files in the manifest are
// user.)
LoadEntry.filetype = "user";
// This defaults to zero for earlier manifests, since this data was not collected.
LoadEntry.all_cpid_total_credit = 0.0;
// This defaults to false, since the older logic was to only retain files with records.
LoadEntry.no_records = false;
}

// Lock cs_StructScraperFileManifest before updating
Expand Down Expand Up @@ -3450,7 +3446,7 @@ bool StoreScraperFileManifest(const fs::path& file)

_log(logattribute::INFO, "StoreScraperFileManifest", "Started processing " + file.string());

//Lock StructScraperFileManifest during serialize to string.
// Lock StructScraperFileManifest during serialize to string.
{
LOCK(cs_StructScraperFileManifest);

Expand All @@ -3461,8 +3457,9 @@ bool StoreScraperFileManifest(const fs::path& file)
<< "Project,"
<< "Filename,"
<< "ExcludeFromCSManifest,"
<< "Filetype"
<< "All_cpid_total_credit"
<< "Filetype,"
<< "All_cpid_total_credit,"
<< "No_records"
<< "\n";

for (auto const& entry : StructScraperFileManifest.mScraperFileManifest)
Expand All @@ -3475,8 +3472,9 @@ bool StoreScraperFileManifest(const fs::path& file)
+ entry.second.project + ","
+ entry.first + ","
+ ToString(entry.second.excludefromcsmanifest) + ","
+ entry.second.filetype
+ ToString(entry.second.all_cpid_total_credit)
+ entry.second.filetype + ","
+ ToString(entry.second.all_cpid_total_credit) + ","
+ ToString((uint32_t) entry.second.no_records)
+ "\n";
stream << sScraperFileManifestEntry;
}
Expand Down Expand Up @@ -3858,10 +3856,13 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState()
LOCK(cs_StructScraperFileManifest);

for (const auto& entry : StructScraperFileManifest.mScraperFileManifest) {
// Count as active if current, not marked as to be excluded, and also not greylisted.
// Count as active if current, not marked as to be excluded, not greylisted, and file has records.
if (entry.second.current
&& !entry.second.excludefromcsmanifest
&& !greylist.Contains(entry.second.project)) nActiveProjects++;
&& !greylist.Contains(entry.second.project)
&& !entry.second.no_records) {
nActiveProjects++;
}
}
}
double dMagnitudePerProject = NETWORK_MAGNITUDE / nActiveProjects;
Expand All @@ -3877,7 +3878,7 @@ ScraperStatsAndVerifiedBeacons GetScraperStatsByCurrentFileManifestState()
for (auto const& entry : StructScraperFileManifest.mScraperFileManifest)
{

if (entry.second.current && !entry.second.excludefromcsmanifest)
if (entry.second.current && !entry.second.excludefromcsmanifest && !entry.second.no_records)
{
std::string project = entry.first;
fs::path file = pathScraper / entry.second.filename;
Expand Down
1 change: 1 addition & 0 deletions src/gridcoin/scraper/scraper.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ struct ScraperFileManifestEntry
bool excludefromcsmanifest = true;
std::string filetype;
double all_cpid_total_credit = 0;
bool no_records = true;
};

/**
Expand Down

0 comments on commit 946595c

Please sign in to comment.