Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add core__procedure table #329

Merged
merged 1 commit into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cumulus_library/.sqlfluff
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,18 @@ schema =
'gender': True,
'address': True,
'birthdate': True
},
'procedure': {
'encounter': {
'reference': True,
},
'id': True,
'performedPeriod': {
'start': True, 'end': True,
},
'subject': {
'reference': True,
},
}
}
source_table = source_table
Expand Down
26 changes: 26 additions & 0 deletions cumulus_library/builders/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,32 @@ def count_patient(
fhir_resource="patient",
)

def count_procedure(
self,
table_name: str,
source_table: str,
table_cols: list,
where_clauses: list | None = None,
min_subject: int | None = None,
) -> str:
"""wrapper method for constructing procedure counts tables

:param table_name: The name of the table to create. Must start with study prefix
:param source_table: The table to create counts data from
:param table_cols: The columns from the source table to add to the count table
:param where_clauses: An array of where clauses to use for filtering the data
:param min_subject: An integer setting the minimum bin size for inclusion
(default: 10)
"""
return self.get_count_query(
table_name,
source_table,
table_cols,
where_clauses=where_clauses,
min_subject=min_subject,
fhir_resource="procedure",
)

# End of wrapper section
# ----------------------------------------------------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ class CountableFhirResource(Enum):
DIAGNOSTICREPORT = "diagnosticreport"
DOCUMENTREFERENCE = "documentreference"
ENCOUNTER = "encounter"
MEDICATION = "medication"
MEDICATIONREQUEST = "medicationrequest"
NONE = None
OBSERVATION = "observation"
PATIENT = "patient"
MEDICATION = "medication"
MEDICATIONREQUEST = "medicationrequest"
PROCEDURE = "procedure"


@dataclass
Expand Down
35 changes: 35 additions & 0 deletions cumulus_library/studies/core/builder_procedure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import cumulus_library
from cumulus_library.studies.core.core_templates import core_templates
from cumulus_library.template_sql import sql_utils

expected_table_cols = {
"procedure": {
"id": [],
"status": [],
"subject": sql_utils.REFERENCE,
"encounter": sql_utils.REFERENCE,
"performedDateTime": [],
"performedPeriod": ["start", "end"],
}
}


class CoreProcedureBuilder(cumulus_library.BaseTableBuilder):
display_text = "Creating Procedure tables..."

def prepare_queries(self, *args, config: cumulus_library.StudyConfig, **kwargs):
code_sources = [
sql_utils.CodeableConceptConfig(
source_table="procedure",
column_hierarchy=[("category", dict)],
target_table="core__procedure_dn_category",
),
sql_utils.CodeableConceptConfig(
source_table="procedure",
column_hierarchy=[("code", dict)],
target_table="core__procedure_dn_code",
),
]
self.queries += sql_utils.denormalize_complex_objects(config.db, code_sources)
validated_schema = sql_utils.validate_schema(config.db, expected_table_cols)
self.queries.append(core_templates.get_core_template("procedure", validated_schema))
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ in this table.
AND BOOL_OR(ec.table_name = 'documentreference')
AND BOOL_OR(ec.table_name = 'medicationrequest')
AND BOOL_OR(ec.table_name = 'observation')
AND BOOL_OR(ec.table_name = 'procedure')
Copy link
Contributor Author

@mikix mikix Dec 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I think this deserves a tiny callout.

For context, the way completion works is that we ignore any encounters for which all the listed associated tables have not been loaded by the ETL yet.

So adding a new resource like this to the list of "required resources" means:

  • Transition pain: if a site hasn't ETL'd procedures yet, the next time they rebuild core, all encounters will disappear.
  • Ongoing pain: core requires more and more resources (like procedures) to be exported and ETL'd before it can function as expected

However... This pain is also... the point of completion tracking?

Would we want to draw a line between "resources we really care about" and "resources we kind of care about"? If we didn't add Procedure to the encounter-completion check, but your study used core__procedure, you would now be subject to the reasons we added the completion table - engineers ETL'ing data behind the scenes can cause inconsistent/incomplete results when querying.

This "ignore encounters" trick was so that studies didn't have to know about the whole completion table feature. They just would inner join on encounters at some point and incomplete data would be ignored. But another approach is maybe we have a list of 2nd-tier resources for which studies are expected to manually check the completion tables? (the logic isn't necessarily fun, but we could write some docs with examples)

Or we just accept the pain points listed above as we add resources.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm of two minds about this:

  • Long term, I think this is ok/most correct.
  • Short term, we are getting a lot of partial drips of data, and this makes it harder for folks to spin up/participate

I kinda want to have a discussion at the product level about what's best here and then circle back on the implementation side of things? We can take this as is for now and then backsolve later if we want.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After discussion on slack, we're still OK with this for now. Will land as-is.

) AS is_complete
FROM etl__completion_encounters AS ece
INNER JOIN temp_completion_times AS tct ON tct.encounter_id = ece.encounter_id
Expand Down
93 changes: 93 additions & 0 deletions cumulus_library/studies/core/core_templates/procedure.sql.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
{% import 'core_utils.jinja' as utils %}

-- This table includes all fields of interest to the US Core Procedure profile.
-- EXCEPT FOR:
-- * the 'performedAge' and 'performedRange' fields, simply because they are annoying to
-- represent and not frequently used. They aren't even marked as Must Support by the profile
-- (heck, neither is performedPeriod, but we include that since EHRs often like to use periods)
--
-- AND ADDING:
-- * the `category` field, because it's helpful for classification
-- * the `encounter` field, because come on, why is it left out of the US Core profile
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you've got to be kidding me

--
-- There are lots of interesting possible fields to support from the base FHIR spec that aren't
-- in the US Core profile, like reasonCode, bodySite, and outcome. But EHR support seems low since
-- they aren't in the profile, so they have been left out so far.
--
-- US Core profile for reference:
-- * http://hl7.org/fhir/us/core/STU4/StructureDefinition-us-core-procedure.html

CREATE TABLE core__procedure AS
WITH temp_procedure AS (
SELECT
{{- utils.basic_cols('procedure', 'src', ['id']) }},
{{-
utils.nullable_cols(
'procedure',
'src',
[
'status',
('subject', 'reference', 'subject_ref'),
('encounter', 'reference', 'encounter_ref'),
],
schema
)
}},
{{-
utils.truncate_date_cols(
'procedure',
'src',
[
('performedDateTime', 'day'),
('performedDateTime', 'week'),
('performedDateTime', 'month'),
('performedDateTime', 'year'),
('performedPeriod', 'start', 'performedPeriod_start_day', 'day'),
('performedPeriod', 'start', 'performedPeriod_start_week', 'week'),
('performedPeriod', 'start', 'performedPeriod_start_month', 'month'),
('performedPeriod', 'start', 'performedPeriod_start_year', 'year'),
('performedPeriod', 'end', 'performedPeriod_end_day', 'day'),
('performedPeriod', 'end', 'performedPeriod_end_week', 'week'),
('performedPeriod', 'end', 'performedPeriod_end_month', 'month'),
('performedPeriod', 'end', 'performedPeriod_end_year', 'year'),
],
schema
)
}}
FROM "procedure" AS src
)

SELECT
tp.id,
tp.status,

dn_category.code AS category_code,
dn_category.system AS category_system,
dn_category.display AS category_display,

dn_code.code AS code_code,
dn_code.system AS code_system,
dn_code.display AS code_display,

tp.performedDateTime_day,
tp.performedDateTime_week,
tp.performedDateTime_month,
tp.performedDateTime_year,

tp.performedPeriod_start_day,
tp.performedPeriod_start_week,
tp.performedPeriod_start_month,
tp.performedPeriod_start_year,

tp.performedPeriod_end_day,
tp.performedPeriod_end_week,
tp.performedPeriod_end_month,
tp.performedPeriod_end_year,

concat('Procedure/', tp.id) AS procedure_ref,
tp.subject_ref,
tp.encounter_ref

FROM temp_procedure AS tp
LEFT JOIN core__procedure_dn_code AS dn_code ON tp.id = dn_code.id
LEFT JOIN core__procedure_dn_category AS dn_category ON tp.id = dn_category.id;
16 changes: 16 additions & 0 deletions cumulus_library/studies/core/count_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,21 @@ def count_core_patient(self):
cols = ["gender", "race_display", "ethnicity_display"]
return self.count_patient(table_name, from_table, cols)

def count_core_procedure(self, duration: str = "month"):
table_name = self.get_table_name("count_procedure", duration=duration)
from_table = self.get_table_name("procedure")
cols = [
["category_display", "varchar", None],
["code_display", "varchar", None],
# The performed date is annoyingly spread across three fields: performedDateTime,
# performedPeriod.start, and performedPeriod.end.
# Rather than do some fancy collation, we just use performedDateTime.
# It's the only "must support" performed field, and period seems less common.
# These core counts are just a rough idea of the data, not a polished final product.
[f"performedDateTime_{duration}", "date", None],
]
return self.count_procedure(table_name, from_table, cols)

def prepare_queries(self, *args, **kwargs):
super().prepare_queries(*args, **kwargs)
self.queries = [
Expand All @@ -144,6 +159,7 @@ def prepare_queries(self, *args, **kwargs):
self.count_core_encounter_priority(duration="month"),
self.count_core_medicationrequest(duration="month"),
self.count_core_observation_lab(duration="month"),
self.count_core_procedure(duration="month"),
self.count_core_patient(),
]

Expand Down
2 changes: 2 additions & 0 deletions cumulus_library/studies/core/manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ file_names = [
"builder_documentreference.py",
"builder_medicationrequest.py",
"builder_observation.py",
"builder_procedure.py",
"observation_type.sql",
"meta_date.sql",
"count_core.py"
Expand All @@ -31,6 +32,7 @@ count_list = [
"core__count_medicationrequest_month",
"core__count_observation_lab_month",
"core__count_patient",
"core__count_procedure_month",
]
meta_list = [
"core__meta_date",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,7 @@ temp_encounter_completion AS (
AND BOOL_OR(ec.table_name = 'documentreference')
AND BOOL_OR(ec.table_name = 'medicationrequest')
AND BOOL_OR(ec.table_name = 'observation')
AND BOOL_OR(ec.table_name = 'procedure')
) AS is_complete
FROM etl__completion_encounters AS ece
INNER JOIN temp_completion_times AS tct ON tct.encounter_id = ece.encounter_id
Expand Down Expand Up @@ -740,6 +741,7 @@ temp_encounter_completion AS (
AND BOOL_OR(ec.table_name = 'documentreference')
AND BOOL_OR(ec.table_name = 'medicationrequest')
AND BOOL_OR(ec.table_name = 'observation')
AND BOOL_OR(ec.table_name = 'procedure')
) AS is_complete
FROM etl__completion_encounters AS ece
INNER JOIN temp_completion_times AS tct ON tct.encounter_id = ece.encounter_id
Expand Down
Loading
Loading