Skip to content

Commit

Permalink
Merge pull request #218 from phac-nml/update_drug_key
Browse files Browse the repository at this point in the history
Updating ARG Drug Table
  • Loading branch information
emarinier authored Dec 10, 2024
2 parents 26770da + 22197c0 commit b6bad2f
Show file tree
Hide file tree
Showing 6 changed files with 1,378 additions and 1,357 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Unreleased

* Removed ARG drug key entries with "None" or missing resistance.
* The ARG drug key for salmonella acrB 717 has changed to acrB 171.

# Version 0.10.0

* Updated the Plasmidfinder database to use the January 18th 2023 release.
Expand Down
14 changes: 11 additions & 3 deletions scripts/data-conversion/pointfinder-drug-resistance.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{
"data": {
"text/plain": [
"dict_keys(['Salmonella', 'Shigella E. coli', 'Campylobacter'])"
"dict_keys(['Salmonella', 'Shigella E. coli', 'Campylobacter', 'NCBI AMRfinder'])"
]
},
"execution_count": 1,
Expand All @@ -28,7 +28,7 @@
"source": [
"import pandas as pd\n",
"\n",
"pointfinder_file = '../../drug-key-update/pointfinder 072621.xlsx'\n",
"pointfinder_file = '../../pointfinder.xlsx'\n",
"\n",
"pointfinder_excel = pd.ExcelFile(pointfinder_file)\n",
"sheets_df_map_orig = {n: pd.read_excel(pointfinder_excel, sheet_name=n, header=None) for n in pointfinder_excel.sheet_names}\n",
Expand Down Expand Up @@ -407,6 +407,14 @@
"source": [
"pointfinder_df_reduced.to_csv('../../staramr/databases/resistance/data/ARG_drug_key_pointfinder.tsv', sep='\\t', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "938df9d4-d16a-4855-aee0-5208fadfad68",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -425,7 +433,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
"version": "3.12.3"
}
},
"nbformat": 4,
Expand Down
101 changes: 56 additions & 45 deletions scripts/data-conversion/resfinder-drug-resistance.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,25 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"id": "ba3db1bf-344d-40c7-a4d4-fb5dab85a822",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['Aminoglycoside', 'B-lactam', 'Colistin', 'Fosfomycin', 'Fusidic acid', 'Glycopeptide', 'Macrolide', 'Nitroimidazole', 'Oxazolidinone', 'Phenicol', 'Quinolone', 'Rifampicin', 'Sulphonamide', 'Tetracycline', 'Trimethoprim', 'Non-functional'])"
"dict_keys(['Aminoglycoside', 'B-lactam', 'Colistin', 'Fosfomycin', 'Fusidic acid', 'Glycopeptide', 'Macrolide', 'Nitroimidazole', 'Oxazolidinone', 'Phenicol', 'Quinolone', 'Sulphonamide', 'Rifampicin', 'Tetracycline', 'Trimethoprim', 'Non-functional'])"
]
},
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"resfinder_file = '../../drug-key-update/Resfinder 3.0 drug key 072621.xlsx'\n",
"resfinder_file = '../../resfinder.xlsx'\n",
"\n",
"resfinder_excel = pd.ExcelFile(resfinder_file)\n",
"sheets_df_map_orig = {n: pd.read_excel(resfinder_excel, sheet_name=n, header=None) for n in resfinder_excel.sheet_names}\n",
Expand All @@ -45,7 +45,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 4,
"id": "247c35d5-3075-4a6f-aad8-c6da5b908b7b",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -109,7 +109,7 @@
"2 aminoglycoside aac(6')-30-aac(6')-Ib'_1_AJ584652 gentamicin NaN"
]
},
"execution_count": 2,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -130,7 +130,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"id": "7ad1a1b3-e7a3-4617-94ce-cf4515f67fe1",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -211,7 +211,7 @@
"2 NaN NaN "
]
},
"execution_count": 3,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -233,40 +233,43 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 6,
"id": "fb20e101-6ba9-4d7c-be25-6f3bac08ec07",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
" 3069\n",
"Other\n",
" 3070\n",
"formerly aadB 19\n",
"formerly aadE 2\n",
"formerly strA 2\n",
"AMRfinderShig/Ecoeffluxchromosomal wildtype pump 2\n",
" formerly strA_2 1\n",
" formerly strA_3 1\n",
"formerly strA 2\n",
"ResfinderSalmonellaaminoglycosidecryptic chromosomal wildtype gene, hidden 1\n",
"AMRfinderEcocolistintwo-component system sensor histidine kinase, multiple genes involved, effects not known 1\n",
"AMRfinderEcofosfomycinglycerol-3-phosphate transporter, drug not tested 1\n",
"Resfinder BNEcomacrolidesremoved from updated Resfinder 1\n",
"Resfinder JessSal/Ecoeffluxdisinfectants 1\n",
"ResfinderShig/Ecoeffluxchromosomal wildtype pump, hidden 1\n",
"AMRfindermultipleSTREPTOGRAMINdrug not tested 1\n",
"AMRfindermultipleQUATERNARY AMMONIUMdrug not tested, biocide 1\n",
"AMRfindermultipleSTREPTOTHRICINdrug not tested 1\n",
"ResfinderSalmonellaaminoglycosidecryptic chromosomal wildtype gene, hidden 1\n",
"AMRfinderShig/Ecoeffluxchromosomal wildtype pump, similar to AR pump in Klebsiella, may be involved in biofilm formation 1\n",
"ResfinderSalmonellaquinolonescommon mutation, little or no effect on MICs, hidden 1\n",
"AMRfinderSalmonellaeffluxchromosomal wildtype pump, may be involved in virulence 1\n",
"AMRfinderShig/Ecoeffluxchromosomal wildtype pump, similar to AR pump in Klebsiella, may be involved in biofilm formation 1\n",
"formerly aadA4 1\n",
"AMRfinderShig/EcoB-lactamchromosomal wildtype gene, promoter mutations needed for function 1\n",
"databaseorganismdrug classnotes 1\n",
"formerly strB_3 1\n",
" formerly strA_4 1\n",
" formerly strA_3 1\n",
" formerly strA_2 1\n",
"formerly aadA4 1\n",
"*due to the higher breakpoints for some Enterobacteriaceae including shigella and E. coli, PMQRs are predicted to confer decreased susceptibility to fluoroquinolones but not intermediate susceptibility or resistance. 1\n",
"Name: Other, dtype: int64"
"Name: count, dtype: int64"
]
},
"execution_count": 4,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -291,7 +294,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 7,
"id": "1cbeebcf-cebb-4fb1-b063-b4bea7dc06cb",
"metadata": {},
"outputs": [
Expand All @@ -304,15 +307,15 @@
"3 aac(6')-Iaj_1_AB709942\n",
"4 aac(6')-Ian_1_AP014611\n",
" ... \n",
"3107 dfrA17_6_AF180469\n",
"3108 dfrA17_7_AB196349\n",
"3109 dfrA17_8_AM932673\n",
"3110 dfrA17_9_FJ807902\n",
"3111 dfrA17_10_AM937244\n",
"Name: gene_accession, Length: 3112, dtype: object"
"3110 dfrA17_6_AF180469\n",
"3111 dfrA17_7_AB196349\n",
"3112 dfrA17_8_AM932673\n",
"3113 dfrA17_9_FJ807902\n",
"3114 dfrA17_10_AM937244\n",
"Name: gene_accession, Length: 3115, dtype: object"
]
},
"execution_count": 5,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -323,7 +326,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 9,
"id": "52028c02-0f54-4fc0-844d-f77e971fce1f",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -398,43 +401,43 @@
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3107</th>\n",
" <th>3110</th>\n",
" <td>trimethoprim</td>\n",
" <td>dfrA17_6</td>\n",
" <td>AF180469</td>\n",
" <td>trimethoprim</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3108</th>\n",
" <th>3111</th>\n",
" <td>trimethoprim</td>\n",
" <td>dfrA17_7</td>\n",
" <td>AB196349</td>\n",
" <td>trimethoprim</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3109</th>\n",
" <th>3112</th>\n",
" <td>trimethoprim</td>\n",
" <td>dfrA17_8</td>\n",
" <td>AM932673</td>\n",
" <td>trimethoprim</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3110</th>\n",
" <th>3113</th>\n",
" <td>trimethoprim</td>\n",
" <td>dfrA17_9</td>\n",
" <td>FJ807902</td>\n",
" <td>trimethoprim</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3111</th>\n",
" <th>3114</th>\n",
" <td>trimethoprim</td>\n",
" <td>dfrA17_10</td>\n",
" <td>AM937244</td>\n",
" <td>trimethoprim</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3098 rows × 4 columns</p>\n",
"<p>3099 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
Expand All @@ -445,16 +448,16 @@
"3 aminoglycoside aac(6')-Iaj_1 AB709942 gentamicin\n",
"4 aminoglycoside aac(6')-Ian_1 AP014611 gentamicin\n",
"... ... ... ... ...\n",
"3107 trimethoprim dfrA17_6 AF180469 trimethoprim\n",
"3108 trimethoprim dfrA17_7 AB196349 trimethoprim\n",
"3109 trimethoprim dfrA17_8 AM932673 trimethoprim\n",
"3110 trimethoprim dfrA17_9 FJ807902 trimethoprim\n",
"3111 trimethoprim dfrA17_10 AM937244 trimethoprim\n",
"3110 trimethoprim dfrA17_6 AF180469 trimethoprim\n",
"3111 trimethoprim dfrA17_7 AB196349 trimethoprim\n",
"3112 trimethoprim dfrA17_8 AM932673 trimethoprim\n",
"3113 trimethoprim dfrA17_9 FJ807902 trimethoprim\n",
"3114 trimethoprim dfrA17_10 AM937244 trimethoprim\n",
"\n",
"[3098 rows x 4 columns]"
"[3099 rows x 4 columns]"
]
},
"execution_count": 6,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -485,7 +488,7 @@
"resfinder_df['Drug'] = resfinder_df['Drug'].str.strip()\n",
"\n",
"# Now, get rid of spaces and replace with commas\n",
"resfinder_df['Drug'] = resfinder_df['Drug'].str.replace('\\s+', ',', regex=True)\n",
"resfinder_df['Drug'] = resfinder_df['Drug'].str.replace(r'\\s+', ',', regex=True)\n",
"\n",
"# Now fix up specific cases where there should be spaces\n",
"resfinder_df['Drug'] = resfinder_df['Drug'].str.replace(',acid', ' acid')\n",
Expand All @@ -507,7 +510,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 10,
"id": "44663836-3cb9-49e4-bd78-2466a782d3d3",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -570,7 +573,7 @@
"151 NaN NaN NaN NaN aac(6')-IIc_1 NC_012555 "
]
},
"execution_count": 7,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -590,13 +593,21 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 11,
"id": "be0dddd6-92db-4720-9632-3ccfa5bfbd7a",
"metadata": {},
"outputs": [],
"source": [
"resfinder_df.to_csv('../../staramr/databases/resistance/data/ARG_drug_key_resfinder.tsv', sep='\\t', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36e8944e-6acc-4fa7-9424-8d51c657644f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -615,7 +626,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
"version": "3.12.3"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ salmonella parE 514 ciprofloxacin I/R,nalidixic acid
salmonella parE 521 ciprofloxacin I/R,nalidixic acid
salmonella 16S_rrsD 1065 spectinomycin
salmonella 16S_rrsD 1192 spectinomycin
salmonella parC 57 None
salmonella acrB 717 azithromycin
salmonella acrB 171 azithromycin
escherichia_coli gyrA 51 ciprofloxacin I/R,nalidixic acid
escherichia_coli gyrA 67 ciprofloxacin I/R,nalidixic acid
escherichia_coli gyrA 81 ciprofloxacin I/R,nalidixic acid
Expand All @@ -62,7 +61,6 @@ escherichia_coli gyrB 136 aminocoumarin
escherichia_coli gyrB 426 ciprofloxacin I/R,nalidixic acid
escherichia_coli gyrB 447 ciprofloxacin I/R,nalidixic acid
escherichia_coli parC 56 ciprofloxacin I/R,nalidixic acid
escherichia_coli parC 57 None
escherichia_coli parC 60 ciprofloxacin I/R,nalidixic acid
escherichia_coli parC 78 ciprofloxacin I/R,nalidixic acid
escherichia_coli parC 80 ciprofloxacin I/R,nalidixic acid
Expand Down
Loading

0 comments on commit b6bad2f

Please sign in to comment.