Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Usage metrics for 2024.3 release #1345

Merged
merged 10 commits into from
Dec 14, 2024
2 changes: 1 addition & 1 deletion config/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"analytics": {
"url": "https://data.getodk.cloud/v1/key/eOZ7S4bzyUW!g1PF6dIXsnSqktRuewzLTpmc6ipBtRq$LDfIMTUKswCexvE0UwJ9/projects/1/forms/odk-analytics/submissions",
"formId": "odk-analytics",
"version": "v2024.2.0_1"
"version": "v2024.3.0_1"
},
"s3blobStore": {}
}
Expand Down
10 changes: 8 additions & 2 deletions lib/data/analytics.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const metricsTemplate = {
"num_unique_collectors": {},
"database_size": {},
"uses_external_db": 0,
"uses_external_blob_store": 0,
"sso_enabled": 0,
"num_client_audit_attachments": 0,
"num_client_audit_attachments_failures": 0,
Expand All @@ -41,8 +42,14 @@ const metricsTemplate = {
"num_offline_entity_branches": 0,
"num_offline_entity_interrupted_branches": 0,
"num_offline_entity_submissions_reprocessed": 0,
"num_offline_entity_submissions_force_processed": 0,
"max_entity_submission_delay": 0,
"avg_entity_submission_delay": 0
"avg_entity_submission_delay": 0,
"max_entity_branch_delay": 0,
"num_xml_only_form_defs": 0,
"num_blob_files": 0,
"num_blob_files_on_s3": 0,
"num_reset_failed_to_pending_count": 0
},
"projects": [
{
Expand Down Expand Up @@ -151,7 +158,6 @@ const metricsTemplate = {
}
},
"other": {
"has_description": 0,
"description_length": 0
},
"datasets": [{
Expand Down
146 changes: 129 additions & 17 deletions lib/model/query/analytics.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ const databaseExternal = (dbHost) => () =>
? 0
: 1);


const blobStoreExternal = (s3Settings) => () =>
(s3Settings && s3Settings.server
? 1
: 0);

////////////////////////////////////////////////////////
// SQL QUERIES

Expand Down Expand Up @@ -139,6 +145,24 @@ WHERE
const countClientAuditRows = () => ({ oneFirst }) => oneFirst(sql`
SELECT count(*) FROM client_audits`);

const countXmlOnlyFormDefs = () => ({ oneFirst }) => oneFirst(sql`
SELECT count(*)
FROM form_defs
WHERE "xlsBlobId" IS NULL
AND "keyId" IS NULL
AND "version" NOT LIKE '%[upgrade]%'`);

const countBlobFiles = () => ({ one }) => one(sql`
SELECT
COUNT(*) AS total_blobs,
SUM(CASE WHEN "s3_status" = 'uploaded' THEN 1 ELSE 0 END) AS uploaded_blobs
FROM blobs`);

const countResetFailedToPending = () => ({ oneFirst }) => oneFirst(sql`
SELECT COUNT(*)
FROM audits
WHERE action = 'blobs.s3.failed-to-pending'`);

// PER PROJECT
// Users
const countUsersPerRole = () => ({ all }) => all(sql`
Expand Down Expand Up @@ -397,7 +421,7 @@ group by f."projectId"`);
// Datasets
const getDatasets = () => ({ all }) => all(sql`
SELECT
ds.id, ds."projectId", COUNT(DISTINCT p.id) num_properties, COUNT(DISTINCT e.id) num_entities_total,
ds.id, ds."projectId", COUNT(DISTINCT e.id) num_entities_total,
COUNT(DISTINCT CASE WHEN e."createdAt" >= current_date - cast(${DAY_RANGE} as int) THEN e.id END) num_entities_recent,
COUNT(DISTINCT CASE WHEN e."updatedAt" IS NOT NULL THEN e.id END) num_entities_updated_total,
COUNT(DISTINCT CASE WHEN e."updatedAt" >= current_date - cast(${DAY_RANGE} as int) THEN e.id END) num_entities_updated_recent,
Expand All @@ -412,9 +436,14 @@ SELECT
MAX(COALESCE(updates.update_api_total, 0)) num_entity_updates_api_total,
MAX(COALESCE(updates.update_api_recent, 0)) num_entity_updates_api_recent,
MAX(COALESCE(conflict_stats.conflicts, 0)) num_entity_conflicts,
MAX(COALESCE(conflict_stats.resolved, 0)) num_entity_conflicts_resolved
MAX(COALESCE(conflict_stats.resolved, 0)) num_entity_conflicts_resolved,
MAX(COALESCE(creates.create_sub_total, 0)) num_entity_create_sub_total,
MAX(COALESCE(creates.create_sub_recent, 0)) num_entity_create_sub_recent,
MAX(COALESCE(creates.create_api_total, 0)) num_entity_create_api_total,
MAX(COALESCE(creates.create_api_recent, 0)) num_entity_create_api_recent,
MAX(COALESCE(bulk_creates.total, 0)) num_entity_create_bulk_total,
MAX(COALESCE(bulk_creates.recent, 0)) num_entity_create_bulk_recent
FROM datasets ds
LEFT JOIN ds_properties p ON p."datasetId" = ds.id AND p."publishedAt" IS NOT NULL
LEFT JOIN entities e ON e."datasetId" = ds.id
LEFT JOIN (dataset_form_defs dfd
JOIN form_defs fd ON fd.id = dfd."formDefId"
Expand Down Expand Up @@ -450,6 +479,33 @@ FROM datasets ds
WHERE a."action" = 'entity.update.version'
GROUP BY e."datasetId"
) as updates ON ds.id = updates."datasetId"
LEFT JOIN (
SELECT
COUNT (1) total,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like we usually don't leave a space between the function name and the parentheses.

SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent,
SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL THEN 1 ELSE 0 END) create_sub_total,
SUM (CASE WHEN a."details"->'submissionDefId' IS NOT NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int)
THEN 1 ELSE 0 END) create_sub_recent,
SUM (CASE WHEN a."details"->'submissionDefId' IS NULL THEN 1 ELSE 0 END) create_api_total,
SUM (CASE WHEN a."details"->'submissionDefId' IS NULL AND a."loggedAt" >= current_date - cast(${DAY_RANGE} as int)
THEN 1 ELSE 0 END) create_api_recent,
e."datasetId"
FROM audits a
JOIN entities e on CAST((a.details ->> 'entityId'::TEXT) AS integer) = e.id
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is ::TEXT needed?

WHERE a."action" = 'entity.create'
GROUP BY e."datasetId"
) as creates ON ds.id = creates."datasetId"
LEFT JOIN (
SELECT count(1) total,
SUM (CASE WHEN a."loggedAt" >= current_date - cast(${DAY_RANGE} as int) THEN 1 ELSE 0 END) recent,
e."datasetId"
FROM audits a
JOIN entity_def_sources eds on CAST((a.details ->> 'sourceId'::TEXT) AS integer) = eds."id"
JOIN entity_defs ed on ed."sourceId" = eds.id AND root=true
JOIN entities e on ed."entityId" = e.id
WHERE a."action" = 'entity.bulk.create'
GROUP BY e."datasetId"
) as bulk_creates on ds.id = bulk_creates."datasetId"
LEFT JOIN (
SELECT COUNT (1) conflicts,
SUM (CASE WHEN e."conflict" IS NULL THEN 1 ELSE 0 END) resolved,
Expand All @@ -476,6 +532,15 @@ WHERE audits.action = 'entity.bulk.create'
GROUP BY ds.id, ds."projectId"
`);

const getDatasetProperties = () => ({ all }) => all(sql`
SELECT
ds.id, ds."projectId", COUNT(DISTINCT p.id) num_properties
FROM datasets ds
LEFT JOIN ds_properties p ON p."datasetId" = ds.id AND p."publishedAt" IS NOT NULL
WHERE ds."publishedAt" IS NOT NULL
GROUP BY ds.id, ds."projectId";
`);


// Offline entities

Expand Down Expand Up @@ -523,10 +588,13 @@ FROM duplicateRuns;

// Number of submissions temporarily held in backlog but were automatically
// removed from backlog when preceeding submission came in
const countSubmissionReprocess = () => ({ oneFirst }) => oneFirst(sql`
SELECT COUNT(*)
const countSubmissionBacklogEvents = () => ({ one }) => one(sql`
SELECT
COUNT(CASE WHEN "action" = 'submission.backlog.hold' THEN 1 END) AS "submission.backlog.hold",
COUNT(CASE WHEN "action" = 'submission.backlog.reprocess' THEN 1 END) AS "submission.backlog.reprocess",
COUNT(CASE WHEN "action" = 'submission.backlog.force' THEN 1 END) AS "submission.backlog.force"
FROM audits
WHERE "action" = 'submission.backlog.reprocess'
WHERE "action" IN ('submission.backlog.hold', 'submission.backlog.reprocess', 'submission.backlog.force')
`);

// Measure how much time entities whose source is a submission.create
Expand Down Expand Up @@ -554,6 +622,21 @@ JOIN submission_defs as sd
ON eds."submissionDefId" = sd.id;
`);

const measureMaxEntityBranchTime = () => ({ oneFirst }) => oneFirst(sql`
SELECT
COALESCE(MAX(AGE(max_created_at, min_created_at)), '0 seconds'::interval) AS max_time_difference
FROM (
SELECT
"branchId",
"entityId",
MIN("createdAt") AS min_created_at,
MAX("createdAt") AS max_created_at
FROM entity_defs
GROUP BY "branchId", "entityId"
HAVING "branchId" IS NOT NULL
) AS subquery;
`);

// Other
const getProjectsWithDescriptions = () => ({ all }) => all(sql`
select id as "projectId", length(trim(description)) as description_length from projects where coalesce(trim(description),'')!=''`);
Expand All @@ -575,11 +658,12 @@ const projectMetrics = () => (({ Analytics }) => runSequentially([
Analytics.countSubmissionsByUserType,
Analytics.getProjectsWithDescriptions,
Analytics.getDatasets,
Analytics.getDatasetEvents
Analytics.getDatasetEvents,
Analytics.getDatasetProperties
]).then(([ userRoles, appUsers, deviceIds, pubLinks,
forms, formGeoRepeats, formsEncrypt, formStates, reusedIds,
subs, subStates, subEdited, subComments, subUsers,
projWithDesc, datasets, datasetEvents ]) => {
projWithDesc, datasets, datasetEvents, datasetProperties ]) => {
const projects = {};

// users
Expand Down Expand Up @@ -692,9 +776,13 @@ const projectMetrics = () => (({ Analytics }) => runSequentially([
const eventsRow = datasetEvents.find(d => (d.projectId === row.projectId && d.id === row.id)) ||
{ num_bulk_create_events_total: 0, num_bulk_create_events_recent: 0, biggest_bulk_upload: 0 };

// Properties row
const propertiesRow = datasetProperties.find(d => (d.projectId === row.projectId && d.id === row.id)) ||
{ num_properties: 0 };

project.datasets.push({
id: row.id,
num_properties: row.num_properties,
num_properties: propertiesRow.num_properties,
num_creation_forms: row.num_creation_forms,
num_followup_forms: row.num_followup_forms,
num_entities: { total: row.num_entities_total, recent: row.num_entities_recent },
Expand All @@ -710,14 +798,18 @@ const projectMetrics = () => (({ Analytics }) => runSequentially([

// 2024.1 metrics
num_bulk_create_events: { total: eventsRow.num_bulk_create_events_total, recent: eventsRow.num_bulk_create_events_recent },
biggest_bulk_upload: eventsRow.biggest_bulk_upload
biggest_bulk_upload: eventsRow.biggest_bulk_upload,

// 2024.3 metrics
num_entity_creates_sub: { total: row.num_entity_create_sub_total, recent: row.num_entity_create_sub_recent },
num_entity_creates_api: { total: row.num_entity_create_api_total, recent: row.num_entity_create_api_recent },
num_entity_creates_bulk: { total: row.num_entity_create_bulk_total, recent: row.num_entity_create_bulk_recent }
});
}

// other
for (const row of projWithDesc) {
const project = _getProject(projects, row.projectId);
project.other.has_description = 1;
project.other.description_length = row.description_length;
}

Expand All @@ -739,15 +831,19 @@ const previewMetrics = () => (({ Analytics }) => runSequentially([
Analytics.countClientAuditAttachments,
Analytics.countClientAuditProcessingFailed,
Analytics.countClientAuditRows,
Analytics.countXmlOnlyFormDefs,
Analytics.countBlobFiles,
Analytics.countResetFailedToPending,
Analytics.countOfflineBranches,
Analytics.countInterruptedBranches,
Analytics.countSubmissionReprocess,
Analytics.countSubmissionBacklogEvents,
Analytics.measureEntityProcessingTime,
Analytics.measureMaxEntityBranchTime,
Analytics.projectMetrics
]).then(([db, encrypt, bigForm, admins, audits,
archived, managers, viewers, collectors,
caAttachments, caFailures, caRows,
oeBranches, oeInterruptedBranches, oeSubReprocess, oeProcessingTime,
caAttachments, caFailures, caRows, xmlDefs, blobFiles, resetFailedToPending,
oeBranches, oeInterruptedBranches, oeBacklogEvents, oeProcessingTime, oeBranchTime,
projMetrics]) => {
const metrics = clone(metricsTemplate);
// system
Expand Down Expand Up @@ -785,10 +881,20 @@ const previewMetrics = () => (({ Analytics }) => runSequentially([
// 2024.2.0 offline entity metrics
metrics.system.num_offline_entity_branches = oeBranches;
metrics.system.num_offline_entity_interrupted_branches = oeInterruptedBranches;
metrics.system.num_offline_entity_submissions_reprocessed = oeSubReprocess;
metrics.system.num_offline_entity_submissions_reprocessed = oeBacklogEvents['submission.backlog.reprocess'];

metrics.system.max_entity_submission_delay = oeProcessingTime.max_wait;
metrics.system.avg_entity_submission_delay = oeProcessingTime.avg_wait;

// 2024.3.0 offline entity metrics
metrics.system.num_offline_entity_submissions_force_processed = oeBacklogEvents['submission.backlog.force'];
metrics.system.max_entity_branch_delay = oeBranchTime;
metrics.system.num_xml_only_form_defs = xmlDefs;
metrics.system.uses_external_blob_store = Analytics.blobStoreExternal(config.get('default.external.s3blobStore'));
metrics.system.num_blob_files = blobFiles.total_blobs;
metrics.system.num_blob_files_on_s3 = blobFiles.uploaded_blobs;
metrics.system.num_reset_failed_to_pending_count = resetFailedToPending;

return metrics;
}));

Expand All @@ -812,12 +918,16 @@ module.exports = {
biggestForm,
databaseSize,
databaseExternal,
blobStoreExternal,
countAdmins,
countAppUsers,
countBlobFiles,
countResetFailedToPending,
countDeviceIds,
countClientAuditAttachments,
countClientAuditProcessingFailed,
countClientAuditRows,
countXmlOnlyFormDefs,
countForms,
countFormsEncrypted,
countFormFieldTypes,
Expand All @@ -840,9 +950,11 @@ module.exports = {
getLatestAudit,
getDatasets,
getDatasetEvents,
getDatasetProperties,
countOfflineBranches,
countInterruptedBranches,
countSubmissionReprocess,
countSubmissionBacklogEvents,
measureEntityProcessingTime,
measureElapsedEntityTime
measureElapsedEntityTime,
measureMaxEntityBranchTime
};
Loading
Loading