Skip to content

Commit

Permalink
Merge pull request #1153 from griffithlab/ref_match
Browse files Browse the repository at this point in the history
Fix bug where the incorrect neoantigen fasta entry was used for ref proteome search
  • Loading branch information
susannasiebert authored Dec 12, 2024
2 parents e29e082 + 7905faf commit 346b0db
Show file tree
Hide file tree
Showing 28 changed files with 13,145 additions and 13,166 deletions.
2 changes: 2 additions & 0 deletions docs/pvacseq/output_files.rst
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ included eptiopes, selecting the best-scoring epitope, and which values are outp
- Description
* - ``ID``
- A unique identifier for the variant
* - ``Index``
- A unique identifier for the variant and Best Transcript
* - ``HLA Alleles`` (multiple)
- For each HLA allele in the run, the number of this variant's epitopes that bound well
to the HLA allele (with median/lowest mutant binding affinity < binding_threshold)
Expand Down
4 changes: 2 additions & 2 deletions pvactools/lib/aggregate_all_epitopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def determine_used_percentile_algorithms(self, prediction_algorithms, el_algorit

def determine_columns_used_for_aggregation(self, prediction_algorithms, el_algorithms):
used_columns = [
"Chromosome", "Start", "Stop", "Reference", "Variant",
"Index", "Chromosome", "Start", "Stop", "Reference", "Variant",
"Transcript", "Transcript Support Level", "Biotype", "Transcript Length", "Variant Type", "Mutation",
"Protein Position", "Gene Name", "HLA Allele",
"Mutation Position", "MT Epitope Seq", "WT Epitope Seq",
Expand Down Expand Up @@ -754,7 +754,7 @@ def assemble_result_line(self, best, key, vaf_clonal, hla, anno_count, included_
problematic_positions = best['Problematic Positions'] if 'Problematic Positions' in best else 'None'
tsl = best['Transcript Support Level'] if best['Transcript Support Level'] == "Not Supported" or pd.isna(best['Transcript Support Level']) else str(int(best['Transcript Support Level']))

out_dict = { 'ID': key }
out_dict = { 'ID': key, 'Index': best['Index'] }
out_dict.update({ k.replace('HLA-', ''):v for k,v in sorted(hla.items()) })
out_dict.update({
'Gene': best["Gene Name"],
Expand Down
39 changes: 8 additions & 31 deletions pvactools/lib/calculate_reference_proteome_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,22 +259,10 @@ def metric_headers(self):


def _input_tsv_type(self, line):
if self.file_type == 'pVACseq' and 'Index' in line:
return 'full'
elif self.file_type != 'pVACseq' and 'Mutation' in line:
return 'full'
else:
if 'Best Peptide' in line:
return 'aggregated'

def _get_full_peptide(self, line, mt_records_dict, wt_records_dict):
for record_id in mt_records_dict.keys():
(rest_record_id, variant_type, aa_change) = record_id.rsplit(".", 2)
(count, gene, transcript) = rest_record_id.split(".", 2)
(parsed_aa_change, pos, wt_aa, mt_aa) = index_to_aggregate_report_aa_change(aa_change, variant_type)
if line['Best Transcript'] == transcript and line['AA Change'] == parsed_aa_change:
return (mt_records_dict[record_id], wt_records_dict[record_id], variant_type, mt_aa, wt_aa)
print("Unable to find full_peptide for variant {}".format(line['ID']))
return (None, None, variant_type, mt_aa, wt_aa)
else:
return 'full'

def _get_peptide(self, line, mt_records_dict, wt_records_dict):
## Get epitope, peptide and full_peptide
Expand All @@ -287,28 +275,17 @@ def _get_peptide(self, line, mt_records_dict, wt_records_dict):
else:
if self._input_tsv_type(line) == 'aggregated':
epitope = line['Best Peptide']
(full_peptide, wt_peptide, variant_type, mt_amino_acids, wt_amino_acids) = self._get_full_peptide(line, mt_records_dict, wt_records_dict)
if full_peptide is None:
return None, None
if variant_type != 'FS':
if line['Pos'] == 'NA':
mt_pos = None
for i,(wt_aa,mt_aa) in enumerate(zip(wt_peptide,full_peptide)):
if wt_aa != mt_aa:
mt_pos = i
break
if mt_pos is None:
return None, full_peptide
else:
mt_pos = int(line['Pos'].split('-')[0])
(rest_record_id, variant_type, aa_change) = line['Index'].rsplit(".", 2)
(_, mt_pos, wt_amino_acids, mt_amino_acids) = index_to_aggregate_report_aa_change(aa_change, variant_type)
mt_pos = int(line['Pos'].split('-')[0])
else:
epitope = line['MT Epitope Seq']
full_peptide = mt_records_dict[line['Index']]
wt_peptide = wt_records_dict[line['Index']]
variant_type = line['Variant Type']
if variant_type != 'FS':
mt_pos = int(line['Mutation Position'].split('-')[0])
(wt_amino_acids, mt_amino_acids) = line['Mutation'].split('/')
full_peptide = mt_records_dict[line['Index']]
wt_peptide = wt_records_dict[line['Index']]

# get peptide
subpeptide_position = full_peptide.index(epitope)
Expand Down
Loading

0 comments on commit 346b0db

Please sign in to comment.