Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add survey fix views and adapt code for re-consenting participants #50

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
4 changes: 3 additions & 1 deletion neurobooth_terra/views/sql/rc_demographic_clean.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ SELECT
dem.redcap_event_name,
(regexp_match(REPLACE(dem.redcap_event_name, 'enrollment', 'v1'), 'v(\d+)_arm_\d+'))[1]::int AS redcap_sequence_num,
(regexp_match(REPLACE(dem.redcap_event_name, 'enrollment', 'v1'), 'v\d+_arm_(\d+)'))[1]::int AS redcap_study_arm,
pinfo.test_subject_boolean,
bdata.test_subject_boolean,
CASE
WHEN dem.demographic_complete = 2 THEN TRUE
ELSE FALSE
Expand Down Expand Up @@ -152,6 +152,8 @@ SELECT
FROM rc_demographic dem
RIGHT OUTER JOIN rc_participant_and_consent_information pinfo
ON dem.subject_id = pinfo.subject_id -- Should by a many-to-one join
RIGHT OUTER JOIN rc_baseline_data bdata
ON dem.subject_id = bdata.subject_id
LEFT OUTER JOIN subject subj
ON pinfo.subject_id = subj.subject_id
ORDER BY
Expand Down
2 changes: 1 addition & 1 deletion neurobooth_terra/views/sql/v_longitudinal_summary.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ WITH latest_diagnosis AS (
MAX(visit.neurobooth_visit_dates) AS last_visit,
MAX(visit.neurobooth_visit_dates) - MIN(visit.neurobooth_visit_dates) AS total_days
FROM rc_visit_dates visit
JOIN rc_participant_and_consent_information subj_info
JOIN rc_baseline_data subj_info
ON visit.subject_id = subj_info.subject_id
WHERE subj_info.test_subject_boolean = FALSE
GROUP BY
Expand Down
50 changes: 36 additions & 14 deletions scripts/redcap_metadata_to_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
from neurobooth_terra.views.views import create_views, drop_views
from config import ssh_args, db_args, project

# %%

# ------
# Let us first define the surveys and their survey IDs that we want to fetch.
# This information can be found on Redcap. To fetch Redcap data, you will
# also need to define the NEUROBOOTH_REDCAP_TOKEN environment variable.
Expand Down Expand Up @@ -80,17 +81,10 @@
'pgic_followup_visits': 175964, # patient_global_impression_of_change_since_last_time_point
'cortical_basal_ganglia_functional_scale': 175965,
'psp_staging': 133558,
'baseline_data': 184345,
}
# ------

# TODOs
# table column mapping
# email regardless of error/warning

# %%
# Next, we fetch the metadata table. This table is the master table
# that contains columns and their information. It can be used to infer
# information about the columns: example, what choices are available for a
# particular question.

def _correct_response_array(metadata_df):
'''Correct response array values for field_name
Expand Down Expand Up @@ -133,6 +127,12 @@ def _correct_response_array(metadata_df):

return metadata_df


# ------
# Next, we fetch the metadata table. This table is the master table
# that contains columns and their information. It can be used to infer
# information about the columns: example, what choices are available for a
# particular question.
print('Fetching metadata ...')
metadata = project.export_metadata(format='df')
# make any correction to redcap data dictionary as soon as it is received from redcap
Expand All @@ -143,11 +143,10 @@ def _correct_response_array(metadata_df):
'text_validation_type_or_show_slider_number']
metadata = metadata[metadata_fields]
metadata.to_csv('data_dictionary.csv')
# print(metadata.loc['prom_ataxia_54', 'select_choices_or_calculations'])

print('[Done]')
# ------

# metadata = metadata[metadata.redcap_form_name.isin(
# ['subject', 'participant_and_consent_information', 'demograph'])]

for column in ['section_header', 'field_label']:
metadata[column] = metadata[column].apply(
Expand Down Expand Up @@ -207,12 +206,28 @@ def _correct_response_array(metadata_df):

# for table_id, table_info in table_infos.items():
for table_id, _ in survey_ids.items():

table_info = table_infos[table_id]
print(f'Overwriting table {table_id}')
drop_table('rc_' + table_id, conn)

# ------
# As we re-consent participants, consent tables such as 'consent_nih_sca'
# and 'participant_and_consent_information' get duplicate rows for
# the same subject_id with incremented redcap_repeat_instance column integers.
# This leads to a duplicate primary key error as primary keys [subject_id,
# redcap_event_name] are the same across these rows.
#
# To resolve this, the redcap_repeat_instance column is also added as a primary key
primary_keys = ['subject_id', 'redcap_event_name']
consent_tables = ['participant_and_consent_information', 'consent_nih_sca']
if table_id in consent_tables:
primary_keys = ['subject_id', 'redcap_event_name', 'redcap_repeat_instance']
# ------

table = create_table('rc_' + table_id, conn, table_info['columns']+table_info['indicator_columns'],
table_info['dtypes']+(['smallint[]']*len(table_info['indicator_columns'])),
primary_key=['subject_id', 'redcap_event_name'])
primary_key=primary_keys)
df = fetch_survey(project, survey_name=table_id,
survey_id=survey_ids[table_id])
df = df.rename(columns={'record_id': 'subject_id'})
Expand All @@ -224,6 +239,13 @@ def _correct_response_array(metadata_df):
warn(f'Skipping {table_id} because of missing complete col')
continue
df = df[df[complete_col[0]] == 2]
# For the consent tables we only want to keep rows where
# redcap_repeat_instance column has non-null values
# Additional context: the redcap_repeat_instance column is
# autogenerated by REDCap and rows with null values are
# generated by REDCap for some reason
if table_id in consent_tables:
df = df[df['redcap_repeat_instance'] >= 1]

report_cols = set([col.split('___')[0] for col in df.columns])
extra_cols = report_cols - (set(table_info['columns']) |
Expand Down
Loading