From 5ea79c9b4dca37a64435e4b327b6bfa54c432e51 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Wed, 5 Jul 2023 16:21:15 +0300
Subject: [PATCH 01/29] Refactor unneeded variable naming

---
 src/services/extract_matching_cases.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/services/extract_matching_cases.py b/src/services/extract_matching_cases.py
index 11324df..1763ecf 100644
--- a/src/services/extract_matching_cases.py
+++ b/src/services/extract_matching_cases.py
@@ -15,9 +15,9 @@ def __init__(self, offset_results: dict, offset_range: tuple, reference_db):
 
     def extract_candidates_matching_selected_offset(self) -> dict:
         extracted_candidates = {}
-        for key, value in self.offset_results.items():
+        for transcript_id, value in self.offset_results.items():
             strand, offsets = value["strand"], value["offsets"]
-            reference_id, transcript_id = value["reference_id"], key
+            reference_id = value["reference_id"]
             if strand == "-":
                 offsets.reverse()
             for offset_exon_idx in range(1, len(offsets)-1):

From 49e935aabf57a489cccc8702110b3974a5d033a6 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Wed, 5 Jul 2023 16:25:23 +0300
Subject: [PATCH 02/29] Skip exons with no number

---
 src/services/extract_matching_cases.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/services/extract_matching_cases.py b/src/services/extract_matching_cases.py
index 1763ecf..67b8d28 100644
--- a/src/services/extract_matching_cases.py
+++ b/src/services/extract_matching_cases.py
@@ -28,6 +28,8 @@ def extract_candidates_matching_selected_offset(self) -> dict:
                         str(offset_exon_number) + ".start" + \
                         '.offset_' + str(offsets[offset_exon_idx][0])
                     for exon in self.reference_db.children(reference_id, featuretype='exon'):
+                        if 'exon_number' not in exon.attributes:
+                            continue
                         if int(exon['exon_number'][0]) == offset_exon_number:
                             extracted_candidates[entry_key] = {
                                 "transcript_id": transcript_id,
@@ -43,6 +45,8 @@ def extract_candidates_matching_selected_offset(self) -> dict:
                         str(offset_exon_number) + ".end" + \
                         '.offset_' + str(offsets[offset_exon_idx][1])
                     for exon in self.reference_db.children(reference_id, featuretype='exon'):
+                        if 'exon_number' not in exon.attributes:
+                            continue
                         if int(exon['exon_number'][0]) == offset_exon_number:
                             extracted_candidates[entry_key] = {
                                 "transcript_id": transcript_id,

From c8dee5778072b21fc6ee12daadbd1446ab40fdaa Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Wed, 5 Jul 2023 16:37:06 +0300
Subject: [PATCH 03/29] skip cases with no values when making graphs

---
 src/services/graph_manager.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/services/graph_manager.py b/src/services/graph_manager.py
index e9b00e0..519cb58 100644
--- a/src/services/graph_manager.py
+++ b/src/services/graph_manager.py
@@ -21,6 +21,8 @@ def construct_bar_chart_from_dict(self,
                                       title: str,
                                       x_label: str,
                                       y_label: str):
+        if not len(graph_values):
+            return
         normalized_graph_values = self.normalize_values(graph_values)
         values = sorted(normalized_graph_values.items())
         x_values, y_values = zip(*values)

From 1961d83b0aba871eb63b15e969d27b03b66bf68e Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Wed, 5 Jul 2023 17:06:13 +0300
Subject: [PATCH 04/29] add log-folder

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index af94810..99738b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,4 @@ offsets.txt
 htmlcov/
 logs/
 dx1-logs/
+dx3-logs/

From 114d2ffcc645a591f0d552ed31b67eb1b5db4efc Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 09:00:14 +0300
Subject: [PATCH 05/29] Remove the use of attributes with gffutils

---
 src/services/extract_matching_cases.py | 39 +++++++++++---------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/src/services/extract_matching_cases.py b/src/services/extract_matching_cases.py
index 67b8d28..ee5d8b4 100644
--- a/src/services/extract_matching_cases.py
+++ b/src/services/extract_matching_cases.py
@@ -2,7 +2,8 @@ class MatchingCasesExtractor:
 
     def __init__(self, offset_results: dict, offset_range: tuple, reference_db):
         """
-            Extract candidates matching the selected offset.
+            Extract candidates matching the selected offset. Currently first and last exon are not considered.
+            These are omitted with the magic numbers in the second for loop.
 
         Args:
             offset_results (dict): a dictionary containing the offset results for each transcript
@@ -16,42 +17,36 @@ def __init__(self, offset_results: dict, offset_range: tuple, reference_db):
     def extract_candidates_matching_selected_offset(self) -> dict:
         extracted_candidates = {}
         for transcript_id, value in self.offset_results.items():
-            strand, offsets = value["strand"], value["offsets"]
-            reference_id = value["reference_id"]
-            if strand == "-":
-                offsets.reverse()
+            strand, offsets, reference_id = value["strand"], value["offsets"], value["reference_id"]
             for offset_exon_idx in range(1, len(offsets)-1):
-                offset_exon_number = offset_exon_idx + 1
-                if abs(offsets[offset_exon_idx][0]) >= self.offset_range[0] and \
-                        abs(offsets[offset_exon_idx][0]) <= self.offset_range[1]:
+                start_is_in_range = bool(abs(offsets[offset_exon_idx][0]) >= self.offset_range[0] and
+                                         abs(offsets[offset_exon_idx][0]) <= self.offset_range[1])
+                if start_is_in_range:
                     entry_key = transcript_id + ".exon_" + \
-                        str(offset_exon_number) + ".start" + \
+                        str(offset_exon_idx) + ".start" + \
                         '.offset_' + str(offsets[offset_exon_idx][0])
-                    for exon in self.reference_db.children(reference_id, featuretype='exon'):
-                        if 'exon_number' not in exon.attributes:
-                            continue
-                        if int(exon['exon_number'][0]) == offset_exon_number:
+                    for exon_number, exon in enumerate(self.reference_db.children(reference_id, featuretype='exon', order_by='start')):
+                        if exon_number == offset_exon_idx:
                             extracted_candidates[entry_key] = {
                                 "transcript_id": transcript_id,
                                 "strand": strand,
-                                "exon_number": offset_exon_number,
+                                "exon_number": offset_exon_idx,
                                 "location_type": "start",
                                 "location": exon.start + offsets[offset_exon_idx][0],
                                 "offset": offsets[offset_exon_idx][0]
                             }
-                if abs(offsets[offset_exon_idx][1]) >= self.offset_range[0] and \
-                        abs(offsets[offset_exon_idx][1]) <= self.offset_range[1]:
+                end_is_in_range = bool(abs(offsets[offset_exon_idx][1]) >= self.offset_range[0] and
+                                       abs(offsets[offset_exon_idx][1]) <= self.offset_range[1])
+                if end_is_in_range:
                     entry_key = transcript_id + ".exon_" + \
-                        str(offset_exon_number) + ".end" + \
+                        str(offset_exon_idx) + ".end" + \
                         '.offset_' + str(offsets[offset_exon_idx][1])
-                    for exon in self.reference_db.children(reference_id, featuretype='exon'):
-                        if 'exon_number' not in exon.attributes:
-                            continue
-                        if int(exon['exon_number'][0]) == offset_exon_number:
+                    for exon_number, exon in enumerate(self.reference_db.children(reference_id, featuretype='exon', order_by='start')):
+                        if exon_number == offset_exon_idx:
                             extracted_candidates[entry_key] = {
                                 "transcript_id": transcript_id,
                                 "strand": strand,
-                                "exon_number": offset_exon_number,
+                                "exon_number": offset_exon_idx,
                                 "location_type": "end",
                                 "location": exon.end + offsets[offset_exon_idx][1],
                                 "offset": offsets[offset_exon_idx][1]

From 41f30b2cfdf7ffa7369c912d147699d8e9a78434 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 10:00:59 +0300
Subject: [PATCH 06/29] Add min threshold for img cases

---
 src/config.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/config.py b/src/config.py
index bfb317a..72a252b 100644
--- a/src/config.py
+++ b/src/config.py
@@ -30,6 +30,7 @@
 CIGAR_RESULTS_LOG = os.path.join(LOG_FILE_DIR, cigar_results)
 TITLE_FILE_LENGTH = os.getenv("TITLE_FILE_LENGTH") or 50
 DEFAULT_WINDOW_SIZE = os.getenv("DEFAULT_WINDOW_SIZE") or 8
+CREATE_IMG_N_TRESHOLD = os.getenv("CREATE_IMG_N_TRESHOLD") or 100
 
 if not os.path.exists(LOG_DIR):
     os.mkdir(LOG_DIR)

From 58dfef7e7339f42883075b593a05e292ea3d01da Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 10:01:12 +0300
Subject: [PATCH 07/29] Add default values, add img-treshold

---
 src/services/argument_parser.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/services/argument_parser.py b/src/services/argument_parser.py
index 76ee974..ad2e5c7 100644
--- a/src/services/argument_parser.py
+++ b/src/services/argument_parser.py
@@ -2,12 +2,13 @@
 import json
 
 from config import DEFAULT_WINDOW_SIZE
+from config import CREATE_IMG_N_TRESHOLD
 
 
 def init_argparser():
     parser = argparse.ArgumentParser(
         description='compAna: a tool for comparing annotations',
-        usage='python3 compAna.py -i <input.gtf> [-f] [-s]'
+        usage='python3 src/compana.py -g <gffcompare_gtf> -r <reference_gtf> -a <reference_fasta> [-o <offset>] [-f] [-s] [-c <class_code>] [-j <json>] [-b <reads_bam>] [-t <reads_tsv>] [-w <window_size>] [-e]'
     )
     parser.add_argument(
         '-g', '--gffcompare_gtf',
@@ -56,11 +57,16 @@ def init_argparser():
     parser.add_argument(
         '-w', '--window_size',
         help='window from which indels and mismatches are to be searched in interesting locations',
-        metavar='')
+        metavar='',
+        default=DEFAULT_WINDOW_SIZE)
     parser.add_argument(
         '-e', '--extended_debug',
         help='enable extended debug output',
         action='store_true')
+    parser.add_argument(
+        '-i', '--img_n_threshold',
+        help='threshold for the n of cases for creating images',
+        default=CREATE_IMG_N_TRESHOLD)
 
     parser_args = parser.parse_args()
     parser_dict = vars(parser_args)
@@ -90,7 +96,4 @@ def init_argparser():
             max(0, offset_range[0]), max(0, offset_range[1])
         )
 
-    if not parser_args.window_size:
-        parser_dict["window_size"] = DEFAULT_WINDOW_SIZE
-
     return parser_args

From cde3f4f245866c4db4354472cc4dbc7032021339 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 10:01:43 +0300
Subject: [PATCH 08/29] create skeleton for closest canonicals validation for
 imgs

---
 src/services/log_manager.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index 6d8539a..a704e84 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -59,6 +59,11 @@ def compute_indel_results(self):
         })
         return indel_results
 
+    def validate_img_should_be_created_for_closest_canonical_dict_entry(self, key: tuple, nucleotide_pair: tuple, cases: dict):
+        if sum(cases.values()) < 100:
+            return False
+        pass
+
     def generate_closest_canonicals_graphs(self, dict_of_canonicals: dict):
         output_manager.output_line({
             "line": "Creating graphs for closest canonicals",

From 451ff899a8db775f64457e0cbab3faf2051d5a50 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 10:27:26 +0300
Subject: [PATCH 09/29] Update template for args

---
 arguments_template.json | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arguments_template.json b/arguments_template.json
index 1ed2c86..5dc65c7 100644
--- a/arguments_template.json
+++ b/arguments_template.json
@@ -4,9 +4,10 @@
     "reference_gtf": "<file.gtf>",
     "reference_fasta": "<file.fa>",
     "gffcompare_gtf": "<file.gtf>",
-    "offset": 0,
-    "class_code": "j c k",
-    "force": false,
+    "offset": [0, 6],
+    "class_code": "j c k s x",
     "window_size": 8,
+    "min_reads_for_graph": 100,
+    "force": false,
     "extended_debugging": false
 }
\ No newline at end of file

From 8a8c8e493f5d5d4700b33e09bed575c93e31e84c Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 10:27:43 +0300
Subject: [PATCH 10/29] rename argument to be more descriptive

---
 src/services/argument_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/services/argument_parser.py b/src/services/argument_parser.py
index ad2e5c7..5abed91 100644
--- a/src/services/argument_parser.py
+++ b/src/services/argument_parser.py
@@ -64,7 +64,7 @@ def init_argparser():
         help='enable extended debug output',
         action='store_true')
     parser.add_argument(
-        '-i', '--img_n_threshold',
+        '-m', '--min_reads_for_graph',
         help='threshold for the n of cases for creating images',
         default=CREATE_IMG_N_TRESHOLD)
 

From 5c08346447dfda3dd0bd0a4a2d9d34c7abd1d0f2 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 10:28:00 +0300
Subject: [PATCH 11/29] Add validation for closest canonicals imgs

---
 src/services/log_manager.py | 40 +++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index a704e84..92a1790 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -59,23 +59,31 @@ def compute_indel_results(self):
         })
         return indel_results
 
-    def validate_img_should_be_created_for_closest_canonical_dict_entry(self, key: tuple, nucleotide_pair: tuple, cases: dict):
-        if sum(cases.values()) < 100:
+    def validate_img_should_be_created_for_closest_canonical_dict_entry(self, parser_args, key: tuple, nucleotide_pair: tuple, cases: dict):
+        if sum(cases.values()) < int(parser_args.min_reads_for_graph):
             return False
-        pass
+        acceptor_site_canonicals = ["AG", "AC"]
+        donor_site_canonicals = ["GT", "GC", "AT"]
+        if key[1] == 'start' and nucleotide_pair[1] not in acceptor_site_canonicals:
+            return False
+        if key[1] == 'end' and nucleotide_pair[1] not in donor_site_canonicals:
+            return False
+        return True
 
-    def generate_closest_canonicals_graphs(self, dict_of_canonicals: dict):
+    def generate_closest_canonicals_graphs(self, parser_args, dict_of_canonicals: dict):
         output_manager.output_line({
             "line": "Creating graphs for closest canonicals",
             "is_info": True
         })
-        minimum_case_count = 100
 
         for key, nucleotides in dict_of_canonicals.items():
             for nucleotide_pair, cases in nucleotides.items():
-                case_count = sum(cases.values())
-                if case_count < minimum_case_count:
+                if not self.validate_img_should_be_created_for_closest_canonical_dict_entry(
+                    parser_args, key, nucleotide_pair, cases
+                ):
                     continue
+                case_count = sum(cases.values())
+
                 title = f"{nucleotide_pair}: strand: {key[0]}, exon location: {key[1]}, offset: {key[2]}, direction of pair: {key[3]}, n of cases: {case_count}"
                 filename = "closest-canonicals." + ".strand_" + str(key[0]) + ".exon-loc-" + \
                     str(key[1]) + ".offset-(" + str(key[2]) + ")" + \
@@ -148,9 +156,7 @@ def compute_closest_canonicals_dict(self):
 
         return closest_canonicals_dict
 
-    def generate_json_overview_dict_for_closest_canonicals(self):
-        closest_canonicals_dict = self.compute_closest_canonicals_dict()
-        self.generate_closest_canonicals_graphs(closest_canonicals_dict)
+    def generate_json_overview_dict_for_closest_canonicals(self, closest_canonicals_dict: dict):
 
         json_overview = {}
         for key, canonical_values in closest_canonicals_dict.items():
@@ -160,8 +166,7 @@ def generate_json_overview_dict_for_closest_canonicals(self):
                     canonical_values[item])
         return json_overview
 
-    def write_closest_canonicals_log_to_file(self, parser_args):
-        json_overview = self.generate_json_overview_dict_for_closest_canonicals()
+    def write_closest_canonicals_log_to_file(self, parser_args, json_overview: dict):
 
         with open(FASTA_OVERVIEW_FILE, "w", encoding="utf-8") as file:
             file.write("# Overview\n")
@@ -258,6 +263,15 @@ def write_debug_files(self):
                 "is_info": True
             })
 
+    def generate_output_for_closest_canonicals(self, parser_args):
+        closest_canonicals_dict = self.compute_closest_canonicals_dict()
+        closest_canonicals_json_dict = self.generate_json_overview_dict_for_closest_canonicals(
+            closest_canonicals_dict)
+        self.generate_closest_canonicals_graphs(
+            parser_args, closest_canonicals_dict)
+        self.write_closest_canonicals_log_to_file(
+            parser_args, closest_canonicals_json_dict)
+
     def execute_log_file_creation(self, matching_cases_dict: dict, parser_args):
 
         output_manager.output_line({
@@ -267,7 +281,7 @@ def execute_log_file_creation(self, matching_cases_dict: dict, parser_args):
 
         self.matching_cases_dict = matching_cases_dict
 
-        self.write_closest_canonicals_log_to_file(parser_args)
+        self.generate_output_for_closest_canonicals(parser_args)
         self.generate_indel_graphs()
 
         if parser_args.extended_debug:

From 6afa7ce4bb087451e993f12379055cd31650debf Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 10:29:02 +0300
Subject: [PATCH 12/29] Accept lower case canonicals in validation

---
 src/services/log_manager.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index 92a1790..32179b2 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -64,9 +64,9 @@ def validate_img_should_be_created_for_closest_canonical_dict_entry(self, parser
             return False
         acceptor_site_canonicals = ["AG", "AC"]
         donor_site_canonicals = ["GT", "GC", "AT"]
-        if key[1] == 'start' and nucleotide_pair[1] not in acceptor_site_canonicals:
+        if key[1] == 'start' and str(nucleotide_pair[1]).upper() not in acceptor_site_canonicals:
             return False
-        if key[1] == 'end' and nucleotide_pair[1] not in donor_site_canonicals:
+        if key[1] == 'end' and str(nucleotide_pair[1]).upper() not in donor_site_canonicals:
             return False
         return True
 

From 3d0b7e698006ce10b5ca229bd4a14051db7560c4 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 11:10:13 +0300
Subject: [PATCH 13/29] Rename offset-log file

---
 src/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/config.py b/src/config.py
index 72a252b..3c524d7 100644
--- a/src/config.py
+++ b/src/config.py
@@ -10,7 +10,7 @@
     print("Warning! .env file not found.")
 
 temporary_dir_path = os.getenv("TEMPORARY_DIR") or "temporary_files"
-offset_log = os.getenv("OFFSET_LOG") or "offset_log.txt"
+offset_log = os.getenv("OFFSET_LOG_FILE") or "debug_offsets.log"
 test_file_directory = os.getenv(
     "TEST_FILE_DIRECTORY") or "src/tests/files"
 

From 6df55a8e45fe50d51368479362ca147efb102381 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 11:10:28 +0300
Subject: [PATCH 14/29] Improve stdout. Move indel-results to file

---
 src/services/log_manager.py | 50 ++++++++++++++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 6 deletions(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index 32179b2..9649527 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -75,6 +75,7 @@ def generate_closest_canonicals_graphs(self, parser_args, dict_of_canonicals: di
             "line": "Creating graphs for closest canonicals",
             "is_info": True
         })
+        graphs_created = 0
 
         for key, nucleotides in dict_of_canonicals.items():
             for nucleotide_pair, cases in nucleotides.items():
@@ -98,19 +99,50 @@ def generate_closest_canonicals_graphs(self, parser_args, dict_of_canonicals: di
                     x_label=f"Number of errors (n of cases: {case_count})",
                     y_label="Portion of reads",
                 )
+                graphs_created += 1
         output_manager.output_line({
-            "line": f"Graphs for closest canonicals created.",
+            "line": f"Done. A total of {graphs_created} graphs created for closest canonicals.",
             "is_info": True
         })
 
-    def generate_indel_graphs(self):
+    def write_indel_results_to_file(self, indel_results):
         indel_results = self.compute_indel_results()
+        filepath = os.path.join(LOG_FILE_DIR, 'indel_results.log')
+        results = []
+        total_reads_in_indel_results = 0
+        for key, value in indel_results.items():
+            results.append(
+                f"in/del: {key[0]}, strand: {key[1]}, exon location: {key[2]}, offset: {key[3]}, n of cases: {sum(value.values())}: {value}\n")
+            output_manager.output_line({
+                "line": f"in/del: {key[0]}, strand: {key[1]}, exon location: {key[2]}, offset: {key[3]}, n of cases: {sum(value.values())}: {value}",
+                "is_info": True
+            })
+            total_reads_in_indel_results += sum(value.values())
+
+        with open(filepath, "w", encoding="utf-8") as file:
+            file.writelines(results)
+            file.write(
+                f"Total number of reads in indel results: {total_reads_in_indel_results}\n")
+        output_manager.output_line({
+            "line": f"Done. Indel-results written to: {filepath}",
+            "is_info": True
+        })
+
+    def validate_indel_grahp_should_be_created(self, parser_args, cases: dict):
+        if sum(cases.values()) < int(parser_args.min_reads_for_graph):
+            return False
+        return True
+
+    def generate_indel_graphs(self, parser_args, indel_results):
         output_manager.output_line({
-            "line": "Insertions and deletions found at given locations",
+            "line": "Generating graphs for insertions and deletions found at given locations",
             "is_info": True
         })
         total_reads_in_indel_results = 0
+        graphs_created = 0
         for key, value in indel_results.items():
+            if not self.validate_indel_grahp_should_be_created(parser_args, value):
+                continue
             title = f"Type: {key[0]}, strand: {key[1]}, exon location: {key[2]}, offset: {key[3]}, n of cases: {sum(value.values())}"
             filename = "indel." + str(key[0]) + ".strand_" + str(key[1]) + ".exon-loc-" + \
                 str(key[2]) + ".offset-(" + str(key[3]) + ")"
@@ -126,12 +158,13 @@ def generate_indel_graphs(self):
                 x_label=f"Number of errors (n of cases: {sum(value.values())})",
                 y_label="Portion of reads",
             )
+            graphs_created += 1
         output_manager.output_line({
             "line": f"Total number of reads in indel results: {total_reads_in_indel_results}",
             "is_info": True
         })
         output_manager.output_line({
-            "line": f"Graphs for {len(indel_results)} cases created.",
+            "line": f"Graphs for {graphs_created} cases created.",
             "is_info": True
         })
 
@@ -272,17 +305,22 @@ def generate_output_for_closest_canonicals(self, parser_args):
         self.write_closest_canonicals_log_to_file(
             parser_args, closest_canonicals_json_dict)
 
+    def generate_output_for_indels(self, parser_args):
+        indel_results = self.compute_indel_results()
+        self.generate_indel_graphs(parser_args, indel_results)
+        self.write_indel_results_to_file(indel_results)
+
     def execute_log_file_creation(self, matching_cases_dict: dict, parser_args):
 
         output_manager.output_line({
             "line": "Creating log-files",
-            "is_info": True
+            "is_title": True
         })
 
         self.matching_cases_dict = matching_cases_dict
 
         self.generate_output_for_closest_canonicals(parser_args)
-        self.generate_indel_graphs()
+        self.generate_output_for_indels(parser_args)
 
         if parser_args.extended_debug:
             self.debug_logs["matching_cases_dict"] = self.matching_cases_dict

From 7c9ebffa53aa21eb5dfc56b82ffcc14d8e5c594f Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 12:11:38 +0300
Subject: [PATCH 15/29] remove unneeded output

---
 src/services/log_manager.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index 9649527..b768ee0 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -113,10 +113,6 @@ def write_indel_results_to_file(self, indel_results):
         for key, value in indel_results.items():
             results.append(
                 f"in/del: {key[0]}, strand: {key[1]}, exon location: {key[2]}, offset: {key[3]}, n of cases: {sum(value.values())}: {value}\n")
-            output_manager.output_line({
-                "line": f"in/del: {key[0]}, strand: {key[1]}, exon location: {key[2]}, offset: {key[3]}, n of cases: {sum(value.values())}: {value}",
-                "is_info": True
-            })
             total_reads_in_indel_results += sum(value.values())
 
         with open(filepath, "w", encoding="utf-8") as file:

From f08f2e2ba4f791ad01548ec542b75574f79cb989 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 12:19:23 +0300
Subject: [PATCH 16/29] remove unneeded output

---
 src/services/log_manager.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index b768ee0..a7b62c4 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -142,10 +142,6 @@ def generate_indel_graphs(self, parser_args, indel_results):
             title = f"Type: {key[0]}, strand: {key[1]}, exon location: {key[2]}, offset: {key[3]}, n of cases: {sum(value.values())}"
             filename = "indel." + str(key[0]) + ".strand_" + str(key[1]) + ".exon-loc-" + \
                 str(key[2]) + ".offset-(" + str(key[3]) + ")"
-            output_manager.output_line({
-                "line": f"in/del: {key[0]}, strand: {key[1]}, exon location: {key[2]}, offset: {key[3]}, n of cases: {sum(value.values())}: {value}",
-                "is_info": True
-            })
             total_reads_in_indel_results += sum(value.values())
             graph_manager.construct_bar_chart_from_dict(
                 graph_values=value,

From 0e95e47d6d3b7d82fbc026aef50d0b7aae5c707a Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 12:28:45 +0300
Subject: [PATCH 17/29] Remove unneeded debug stdout

---
 src/services/argument_parser.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/services/argument_parser.py b/src/services/argument_parser.py
index 5abed91..f898ef6 100644
--- a/src/services/argument_parser.py
+++ b/src/services/argument_parser.py
@@ -86,7 +86,6 @@ def init_argparser():
         parser_dict["offset"] = (0, float('inf'))
     else:
         offset_range = []
-        print(parser_args.offset)
         for element in parser_args.offset:
             if isinstance(element, str) and len(element) == 3:
                 offset_range.append(float('inf'))

From ff4f327bf9fee123242e20a86c714f0cfcd51963 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 12:28:58 +0300
Subject: [PATCH 18/29] Only print errors if there are errors

---
 src/services/log_manager.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index a7b62c4..8d553c5 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -35,10 +35,10 @@ def write_alignment_errors_to_file(self):
 
     def compute_indel_results(self):
         indel_results = {}
-        count_no_indel_errors = 0
+        count_indel_errors = 0
         for matching_case in self.matching_cases_dict.values():
             if 'indel_errors' not in matching_case:
-                count_no_indel_errors += 1
+                count_indel_errors += 1
                 self.alignment_erros.append(str(matching_case) + "\n")
                 continue
             for indel_type, indel_dict in matching_case['indel_errors'].items():
@@ -53,10 +53,12 @@ def compute_indel_results(self):
                     if indel_dict_key not in indel_results[key]:
                         indel_results[key][indel_dict_key] = 0
                     indel_results[key][indel_dict_key] += indel_event_count
-        output_manager.output_line({
-            "line": f"Number of cases without indel errors: {count_no_indel_errors}",
-            "is_error": True
-        })
+
+        if count_indel_errors:
+            output_manager.output_line({
+                "line": f"Number of cases without indel errors: {count_indel_errors}",
+                "is_error": True
+            })
         return indel_results
 
     def validate_img_should_be_created_for_closest_canonical_dict_entry(self, parser_args, key: tuple, nucleotide_pair: tuple, cases: dict):
@@ -106,7 +108,6 @@ def generate_closest_canonicals_graphs(self, parser_args, dict_of_canonicals: di
         })
 
     def write_indel_results_to_file(self, indel_results):
-        indel_results = self.compute_indel_results()
         filepath = os.path.join(LOG_FILE_DIR, 'indel_results.log')
         results = []
         total_reads_in_indel_results = 0

From dd98c024d518b1c337a95613d02fa0145b9adefa Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 12:32:11 +0300
Subject: [PATCH 19/29] Improve stdout clarity

---
 src/services/log_manager.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index 8d553c5..cdf70f2 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -56,7 +56,7 @@ def compute_indel_results(self):
 
         if count_indel_errors:
             output_manager.output_line({
-                "line": f"Number of cases without indel errors: {count_indel_errors}",
+                "line": f"Indel results: number of cases without indel errors: {count_indel_errors}",
                 "is_error": True
             })
         return indel_results
@@ -74,7 +74,7 @@ def validate_img_should_be_created_for_closest_canonical_dict_entry(self, parser
 
     def generate_closest_canonicals_graphs(self, parser_args, dict_of_canonicals: dict):
         output_manager.output_line({
-            "line": "Creating graphs for closest canonicals",
+            "line": "Closest canonicals: creating graphs.",
             "is_info": True
         })
         graphs_created = 0
@@ -103,7 +103,7 @@ def generate_closest_canonicals_graphs(self, parser_args, dict_of_canonicals: di
                 )
                 graphs_created += 1
         output_manager.output_line({
-            "line": f"Done. A total of {graphs_created} graphs created for closest canonicals.",
+            "line": f"Closest canonicals: done. A total of {graphs_created} graphs created for closest canonicals.",
             "is_info": True
         })
 
@@ -119,9 +119,9 @@ def write_indel_results_to_file(self, indel_results):
         with open(filepath, "w", encoding="utf-8") as file:
             file.writelines(results)
             file.write(
-                f"Total number of reads in indel results: {total_reads_in_indel_results}\n")
+                f"Indel results: Total number of reads in indel results: {total_reads_in_indel_results}\n")
         output_manager.output_line({
-            "line": f"Done. Indel-results written to: {filepath}",
+            "line": f"Indel results: done. Results written to: {filepath}",
             "is_info": True
         })
 
@@ -132,7 +132,7 @@ def validate_indel_grahp_should_be_created(self, parser_args, cases: dict):
 
     def generate_indel_graphs(self, parser_args, indel_results):
         output_manager.output_line({
-            "line": "Generating graphs for insertions and deletions found at given locations",
+            "line": "Indel results: generating graphs for insertions and deletions found at given locations",
             "is_info": True
         })
         total_reads_in_indel_results = 0
@@ -153,11 +153,11 @@ def generate_indel_graphs(self, parser_args, indel_results):
             )
             graphs_created += 1
         output_manager.output_line({
-            "line": f"Total number of reads in indel results: {total_reads_in_indel_results}",
+            "line": f"Indel results: total number of reads in indel results: {total_reads_in_indel_results}",
             "is_info": True
         })
         output_manager.output_line({
-            "line": f"Graphs for {graphs_created} cases created.",
+            "line": f"Indel results: graphs for {graphs_created} cases created.",
             "is_info": True
         })
 
@@ -255,7 +255,7 @@ def write_closest_canonicals_log_to_file(self, parser_args, json_overview: dict)
             file.close()
 
         output_manager.output_line({
-            "line": "Results written to file: " + FASTA_OVERVIEW_FILE,
+            "line": "Closest canonicals: results written to file: " + FASTA_OVERVIEW_FILE,
             "is_info": True
         })
 
@@ -306,7 +306,7 @@ def generate_output_for_indels(self, parser_args):
     def execute_log_file_creation(self, matching_cases_dict: dict, parser_args):
 
         output_manager.output_line({
-            "line": "Creating log-files",
+            "line": "CREATING LOG-FILES",
             "is_title": True
         })
 

From 9bbcf796ba6a45538fd03bfde81f4c251521e612 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 12:41:26 +0300
Subject: [PATCH 20/29] Add warning to stdout

---
 src/services/output_manager.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/services/output_manager.py b/src/services/output_manager.py
index 4dbce95..e66cb0d 100644
--- a/src/services/output_manager.py
+++ b/src/services/output_manager.py
@@ -21,6 +21,7 @@ def output_line(self, config: dict):
         additional_line_breaks = config.get('additional_line_breaks', 0)
         is_info = config.get('is_info', False)
         is_error = config.get('is_error', False)
+        is_warning = config.get('is_warning', False)
         title_line_length = config.get('title_line_length', 50)
         save_to_log = config.get('save_to_log', True)
         if is_title:
@@ -29,6 +30,8 @@ def output_line(self, config: dict):
             line = f"INFO: [{datetime.now().strftime('%Y-%m-%d %H:%M')}]: {line}"
         if is_error:
             line = f"ERROR: [{datetime.now().strftime('%Y-%m-%d %H:%M')}]: {line}"
+        if is_warning:
+            line = f"WARNING: [{datetime.now().strftime('%Y-%m-%d %H:%M')}]: {line}"
         print(line, end=end_line)
         if additional_line_breaks:
             print("\n" * additional_line_breaks, end="")

From 276b3d7a3ab4c565bf140ce3615765f4482cbcf6 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 12:41:45 +0300
Subject: [PATCH 21/29] Change error to warning in stdout

---
 src/services/bam_manager.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/services/bam_manager.py b/src/services/bam_manager.py
index 8271987..be55310 100644
--- a/src/services/bam_manager.py
+++ b/src/services/bam_manager.py
@@ -99,11 +99,11 @@ def process_bam_file(self, reads_and_references: dict):
             output_manager.output_line({
                 "line": str(prev_processed_reads_counter) +
                 " iterations extracted an already processed read from the BAM-file",
-                "is_error": True,
+                "is_warning": True,
             })
 
         output_manager.output_line({
-            "line": "\nFinished",
+            "line": "Processing BAM-file finished.",
             "is_info": True
         })
 

From ac527d3e2b006f8e427a5ed3553f8d2d8b8014a4 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 12:57:30 +0300
Subject: [PATCH 22/29] Improve stdout clarity

---
 src/services/log_manager.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index cdf70f2..bc4ea35 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -116,12 +116,13 @@ def write_indel_results_to_file(self, indel_results):
                 f"in/del: {key[0]}, strand: {key[1]}, exon location: {key[2]}, offset: {key[3]}, n of cases: {sum(value.values())}: {value}\n")
             total_reads_in_indel_results += sum(value.values())
 
+        summary_line = f"Indel results: Total number of reads in indel results -count: {total_reads_in_indel_results} (Note: one read can be related to multiple matching cases, or be related to multiple transcripts) \n"
+
         with open(filepath, "w", encoding="utf-8") as file:
             file.writelines(results)
-            file.write(
-                f"Indel results: Total number of reads in indel results: {total_reads_in_indel_results}\n")
+            file.write(summary_line)
         output_manager.output_line({
-            "line": f"Indel results: done. Results written to: {filepath}",
+            "line": summary_line,
             "is_info": True
         })
 

From 0158669b206b41502691ec764ac720a24a770d7f Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 13:08:35 +0300
Subject: [PATCH 23/29] Refactor stdout to match more with pipeline in
 documentation

---
 src/services/bam_manager.py            |  6 +++---
 src/services/extract_matching_cases.py | 15 +++++++++++++++
 src/services/fasta_extractor.py        |  8 ++++----
 src/services/log_manager.py            |  2 +-
 src/services/offset_computation.py     | 11 +++++++----
 src/tests/fasta_extractor_test.py      |  2 +-
 6 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/src/services/bam_manager.py b/src/services/bam_manager.py
index be55310..6078119 100644
--- a/src/services/bam_manager.py
+++ b/src/services/bam_manager.py
@@ -109,7 +109,7 @@ def process_bam_file(self, reads_and_references: dict):
 
     def output_heading_information(self):
         output_manager.output_line({
-            "line": "PROCESSING BAM-FILE",
+            "line": "ITERATE READS",
             "is_title": True
         })
         output_manager.output_line({
@@ -157,12 +157,12 @@ def generate_dictionaries(self):
         log_manager.debug_logs['reads_and_references'] = reads_and_references
 
         output_manager.output_line({
-            "line": "NUMBER OF MATCHING CASES:" + str(len(self.matching_cases_dict)),
+            "line": "Number of matching cases:" + str(len(self.matching_cases_dict)),
             "is_info": True
         })
 
         output_manager.output_line({
-            "line": "NUMBER OF READS: " + str(len(reads_and_references)),
+            "line": "Number of reads: " + str(len(reads_and_references)),
             "is_info": True
         })
 
diff --git a/src/services/extract_matching_cases.py b/src/services/extract_matching_cases.py
index ee5d8b4..2606bff 100644
--- a/src/services/extract_matching_cases.py
+++ b/src/services/extract_matching_cases.py
@@ -1,3 +1,6 @@
+from services.output_manager import default_output_manager as output_manager
+
+
 class MatchingCasesExtractor:
 
     def __init__(self, offset_results: dict, offset_range: tuple, reference_db):
@@ -15,6 +18,14 @@ def __init__(self, offset_results: dict, offset_range: tuple, reference_db):
         self.reference_db = reference_db
 
     def extract_candidates_matching_selected_offset(self) -> dict:
+        output_manager.output_line({
+            "line": "EXTRACTING CASES",
+            "is_title": True
+        })
+        output_manager.output_line({
+            "line": f"Extracting candidates in the given offset range {self.offset_range}",
+            "is_info": True
+        })
         extracted_candidates = {}
         for transcript_id, value in self.offset_results.items():
             strand, offsets, reference_id = value["strand"], value["offsets"], value["reference_id"]
@@ -51,4 +62,8 @@ def extract_candidates_matching_selected_offset(self) -> dict:
                                 "location": exon.end + offsets[offset_exon_idx][1],
                                 "offset": offsets[offset_exon_idx][1]
                             }
+        output_manager.output_line({
+            "line": f"Found {len(extracted_candidates)} candidates in the given offset range",
+            "is_info": True
+        })
         return extracted_candidates
diff --git a/src/services/fasta_extractor.py b/src/services/fasta_extractor.py
index 69f8a56..4748c49 100644
--- a/src/services/fasta_extractor.py
+++ b/src/services/fasta_extractor.py
@@ -32,11 +32,11 @@ def extract_characters_at_given_coordinates(self, coordinates: tuple):
 
     def output_section_header(self):
         output_manager.output_line({
-            "line": "FASTA EXTRACTION",
+            "line": "CLOSEST CANONICALS",
             "is_title": True
         })
         output_manager.output_line({
-            "line": "Fethching reference fasta file...",
+            "line": "Fethching reference FASTA-file.",
             "is_info": True
         })
 
@@ -44,7 +44,7 @@ def check_errors(self) -> bool:
         errors = False
         if not self.fasta:
             output_manager.output_line({
-                "line": "fasta-file not found. Please check path and try again. Moving to next section.",
+                "line": "FASTA-file not found. Please check path and try again. Moving to next section.",
                 "is_error": True
             })
             errors = True
@@ -122,6 +122,6 @@ def execute_fasta_extraction(self):
         self.iterate_matching_cases()
 
         output_manager.output_line({
-            "line": "Phase finished.",
+            "line": "Finished.",
             "is_info": True
         })
diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index bc4ea35..32571b8 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -307,7 +307,7 @@ def generate_output_for_indels(self, parser_args):
     def execute_log_file_creation(self, matching_cases_dict: dict, parser_args):
 
         output_manager.output_line({
-            "line": "CREATING LOG-FILES",
+            "line": "CREATING LOG-FILES AND GRAPHS",
             "is_title": True
         })
 
diff --git a/src/services/offset_computation.py b/src/services/offset_computation.py
index 4534f6a..22e6316 100644
--- a/src/services/offset_computation.py
+++ b/src/services/offset_computation.py
@@ -78,11 +78,11 @@ def fetch_exons(transcript, class_code, gffcompare_db, reference_db):
 
 def execute_offset_computation(class_code: str, gffcompare_db, reference_db, extended_debug: bool) -> dict:
     output_manager.output_line({
-        "line": "ANNOTATION COMPARISON",
+        "line": "COMPUTE OFFSETS",
         "is_title": True
     })
     output_manager.output_line({
-        "line": "Counting offsets for class code instances for: " + class_code,
+        "line": "Computing offsets for class code instances for: " + class_code,
         "is_info": True
     })
 
@@ -107,10 +107,13 @@ def execute_offset_computation(class_code: str, gffcompare_db, reference_db, ext
 
     if extended_debug:
         log_manager.offset_results = offset_results_as_dict
-
+        output_manager.output_line({
+            "line": "Finished.",
+            "is_info": True
+        })
     else:
         output_manager.output_line({
-            "line": "offset computation finished. Hint: to output results into a file, enable extended debug.",
+            "line": "Offset computation finished. Hint: to output results into a file, enable extended debug.",
             "is_info": True
         })
     return offset_results_as_dict
diff --git a/src/tests/fasta_extractor_test.py b/src/tests/fasta_extractor_test.py
index 26cefc5..8c8bde7 100644
--- a/src/tests/fasta_extractor_test.py
+++ b/src/tests/fasta_extractor_test.py
@@ -64,7 +64,7 @@ def test_an_empty_string_title_generates_a_line_of_selected_character(self):
         extractor = FastaExtractor(self.fasta_config)
         extractor.output_section_header()
         captured = self.capsys.readouterr()
-        assert 'FASTA EXTRACTION' in captured.out
+        assert 'CLOSEST CANONICALS' in captured.out
 
     def test_successful_execution_of_extraction_updates_dictionary(self):
         extractor = FastaExtractor(self.fasta_config)

From 8106efb51b25cde45ad8f9df478a732d246ad02b Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 13:15:00 +0300
Subject: [PATCH 24/29] Improve stdout

---
 src/services/bam_manager.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/services/bam_manager.py b/src/services/bam_manager.py
index 6078119..52b2ee2 100644
--- a/src/services/bam_manager.py
+++ b/src/services/bam_manager.py
@@ -27,6 +27,10 @@ def initialize_file(self, filename: str):
         self.samfile = pysam.AlignmentFile(filename, "rb")
 
     def process_bam_file(self, reads_and_references: dict):
+        output_manager.output_line({
+            "line": "Iterating reads and counting indels. This may take a while.",
+            "is_info": True
+        })
         count = 0
         errors = []
         set_of_processed_reads = set()
@@ -156,18 +160,16 @@ def generate_dictionaries(self):
         log_manager.debug_logs['transcripts_and_reads'] = transcripts_and_reads
         log_manager.debug_logs['reads_and_references'] = reads_and_references
 
-        output_manager.output_line({
-            "line": "Number of matching cases:" + str(len(self.matching_cases_dict)),
-            "is_info": True
-        })
+        cases_line = f"Number of matching cases: {len(self.matching_cases_dict)}, " + \
+            f"number of reads: {len(reads_and_references)}\n"
 
         output_manager.output_line({
-            "line": "Number of reads: " + str(len(reads_and_references)),
+            "line": cases_line,
             "is_info": True
         })
 
         output_manager.output_line({
-            "line": "Analyzing offset of reads. This may take a while.",
+            "line": "Note: one read may be related to multiple matching cases.",
             "is_info": True
         })
 

From f77a8fb08ec2d9977b027605f5d3245a1bcc1038 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 13:20:44 +0300
Subject: [PATCH 25/29] Refactor based on pylint feedback

---
 src/services/log_manager.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index 32571b8..fc2308d 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -61,7 +61,11 @@ def compute_indel_results(self):
             })
         return indel_results
 
-    def validate_img_should_be_created_for_closest_canonical_dict_entry(self, parser_args, key: tuple, nucleotide_pair: tuple, cases: dict):
+    def validate_img_should_be_created_for_closest_canonical_dict_entry(self,
+                                                                        parser_args,
+                                                                        key: tuple,
+                                                                        nucleotide_pair: tuple,
+                                                                        cases: dict):
         if sum(cases.values()) < int(parser_args.min_reads_for_graph):
             return False
         acceptor_site_canonicals = ["AG", "AC"]
@@ -87,7 +91,8 @@ def generate_closest_canonicals_graphs(self, parser_args, dict_of_canonicals: di
                     continue
                 case_count = sum(cases.values())
 
-                title = f"{nucleotide_pair}: strand: {key[0]}, exon location: {key[1]}, offset: {key[2]}, direction of pair: {key[3]}, n of cases: {case_count}"
+                title = f"{nucleotide_pair}: strand: {key[0]}, exon location: {key[1]}, offset: {key[2]}, " + \
+                    f"direction of pair: {key[3]}, n of cases: {case_count}"
                 filename = "closest-canonicals." + ".strand_" + str(key[0]) + ".exon-loc-" + \
                     str(key[1]) + ".offset-(" + str(key[2]) + ")" + \
                     ".direction-" + \
@@ -113,10 +118,13 @@ def write_indel_results_to_file(self, indel_results):
         total_reads_in_indel_results = 0
         for key, value in indel_results.items():
             results.append(
-                f"in/del: {key[0]}, strand: {key[1]}, exon location: {key[2]}, offset: {key[3]}, n of cases: {sum(value.values())}: {value}\n")
+                f"in/del: {key[0]}, strand: {key[1]}, exon location: {key[2]}, " +
+                f"offset: {key[3]}, n of cases: {sum(value.values())}: {value}\n")
             total_reads_in_indel_results += sum(value.values())
 
-        summary_line = f"Indel results: Total number of reads in indel results -count: {total_reads_in_indel_results} (Note: one read can be related to multiple matching cases, or be related to multiple transcripts) \n"
+        summary_line = f"Indel results: count of reads in indel results: {total_reads_in_indel_results}" + \
+            "(Note: one read can be related to multiple matching cases, " + \
+            "or be related to multiple transcripts) \n"
 
         with open(filepath, "w", encoding="utf-8") as file:
             file.writelines(results)
@@ -141,7 +149,8 @@ def generate_indel_graphs(self, parser_args, indel_results):
         for key, value in indel_results.items():
             if not self.validate_indel_grahp_should_be_created(parser_args, value):
                 continue
-            title = f"Type: {key[0]}, strand: {key[1]}, exon location: {key[2]}, offset: {key[3]}, n of cases: {sum(value.values())}"
+            title = f"Type: {key[0]}, strand: {key[1]}, exon location: {key[2]}, " + \
+                f"offset: {key[3]}, n of cases: {sum(value.values())}"
             filename = "indel." + str(key[0]) + ".strand_" + str(key[1]) + ".exon-loc-" + \
                 str(key[2]) + ".offset-(" + str(key[3]) + ")"
             total_reads_in_indel_results += sum(value.values())

From bc9d34a41ab8cd6e606fc5a8defcb930240fc3e1 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 13:55:07 +0300
Subject: [PATCH 26/29] refactor code based on pylint feedback

---
 src/compana.py                         |  2 +-
 src/services/alignment_parser.py       | 27 +++++++++-------------
 src/services/bam_manager.py            | 20 +++++++----------
 src/services/db_initializer.py         |  6 +++--
 src/services/extract_matching_cases.py | 16 ++++++++-----
 src/services/fasta_extractor.py        | 14 +++++++-----
 src/services/graph_manager.py          |  3 +--
 src/services/log_manager.py            | 31 ++++++++++++++++----------
 src/services/offset_computation.py     |  9 ++++++--
 src/services/output_manager.py         |  3 ++-
 src/tests/alignment_parser_test.py     |  9 ++++----
 src/tests/bam_manager_test.py          |  6 ++---
 12 files changed, 77 insertions(+), 69 deletions(-)

diff --git a/src/compana.py b/src/compana.py
index 72c9eb8..4491c2c 100644
--- a/src/compana.py
+++ b/src/compana.py
@@ -49,7 +49,7 @@ def run_pipeline(parser_args):
             parser_args.reads_bam,
             parser_args.reads_tsv,
             matching_cases_dict)
-        bam_manager.execute(parser_args.window_size)
+        bam_manager.execute(int(parser_args.window_size))
 
     log_manager.execute_log_file_creation(matching_cases_dict, parser_args)
 
diff --git a/src/services/alignment_parser.py b/src/services/alignment_parser.py
index 369c213..6f7abca 100644
--- a/src/services/alignment_parser.py
+++ b/src/services/alignment_parser.py
@@ -1,10 +1,3 @@
-import pysam
-
-from services.output_manager import default_output_manager as output_manager
-from services.log_manager import default_log_manager as log_manager
-from config import DEFAULT_WINDOW_SIZE
-
-
 class AlignmentParser:
 
     def __init__(self):
@@ -13,9 +6,6 @@ def __init__(self):
             Follows the singleton pattern. 
         """
 
-        # TODO: add to configuration or to user arguments (window size)
-        self.window_size = int(DEFAULT_WINDOW_SIZE)
-
     def extract_location_from_cigar_string(self,
                                            cigar_tuples: list,
                                            reference_start: int,
@@ -25,11 +15,12 @@ def extract_location_from_cigar_string(self,
         alignment_position = 0
         ref_position = 0
 
-        for idx, cigar_code in enumerate(cigar_tuples):
+        for cigar_code in cigar_tuples:
 
             if cigar_code[0] in [0, 2, 3, 7, 8]:
                 ref_position += cigar_code[1]
-            if ref_position <= relative_position and not reference_start + ref_position == reference_end:
+            if ref_position <= relative_position and not \
+                    reference_start + ref_position == reference_end:
                 alignment_position += cigar_code[1]
             else:
                 return alignment_position + (cigar_code[1] - (ref_position - relative_position))
@@ -39,7 +30,8 @@ def extract_location_from_cigar_string(self,
     def count_indels_from_cigar_codes_in_given_window(self,
                                                       cigar_tuples: list,
                                                       aligned_location: int,
-                                                      loc_type: str):
+                                                      loc_type: str,
+                                                      window_size: int):
         """
         Get cigar codes in a given window.
 
@@ -62,16 +54,17 @@ def count_indels_from_cigar_codes_in_given_window(self,
         debug_list = []
 
         if loc_type == "end":
-            aligned_location = aligned_location - self.window_size + 1
+            aligned_location = aligned_location - window_size + 1
 
         for cigar_code in cigar_tuples:
-            if self.window_size == len(cigar_code_list):
+            if window_size == len(cigar_code_list):
                 break
             if location + cigar_code[1] > aligned_location:
                 overlap = location + \
                     cigar_code[1] - (aligned_location + len(cigar_code_list))
                 cigar_code_list.extend(
-                    [cigar_code[0] for _ in range(min(self.window_size - len(cigar_code_list), overlap))])
+                    [cigar_code[0] for _ in range(min(window_size -
+                                                      len(cigar_code_list), overlap))])
             location += cigar_code[1]
 
         debug_list.append(cigar_code_list)
@@ -84,7 +77,7 @@ def count_indels_from_cigar_codes_in_given_window(self,
         result['deletions'] = deletions
         result['insertions'] = insertions
 
-        if deletions >= self.window_size or insertions >= self.window_size:
+        if deletions >= window_size or insertions >= window_size:
             debug_list.append(
                 f"deletions: {deletions}, insertions: {insertions}")
             return True, debug_list, result
diff --git a/src/services/bam_manager.py b/src/services/bam_manager.py
index 52b2ee2..80ab5e6 100644
--- a/src/services/bam_manager.py
+++ b/src/services/bam_manager.py
@@ -1,4 +1,3 @@
-import os
 import pysam
 
 from services.read_management import create_dict_of_transcripts_and_reads
@@ -6,8 +5,6 @@
 from services.alignment_parser import default_alignment_parser as alignment_parser
 from services.log_manager import default_log_manager as log_manager
 
-from config import LOG_FILE_DIR
-
 
 class BamManager:
 
@@ -15,18 +12,16 @@ def __init__(self,
                  bam_path: str,
                  tsv_path: str,
                  matching_cases_dict: dict):
+        # pylint: disable=no-member
 
         self.bam_path = bam_path
         self.tsv_path = tsv_path
+        self.samfile = pysam.AlignmentFile(self.bam_path, "rb")
 
         self.matching_cases_dict = matching_cases_dict
         self.reads_and_transcripts = {}
 
-    def initialize_file(self, filename: str):
-        # pylint: disable=no-member
-        self.samfile = pysam.AlignmentFile(filename, "rb")
-
-    def process_bam_file(self, reads_and_references: dict):
+    def process_bam_file(self, reads_and_references: dict, window_size: int):
         output_manager.output_line({
             "line": "Iterating reads and counting indels. This may take a while.",
             "is_info": True
@@ -65,7 +60,8 @@ def process_bam_file(self, reads_and_references: dict):
 
                     idx_corrected_location = location - 1
 
-                    if read.reference_start > idx_corrected_location or read.reference_end < idx_corrected_location:
+                    if read.reference_start > idx_corrected_location \
+                            or read.reference_end < idx_corrected_location:
                         errors.append(
                             f"Non-matching location: {read.query_name}, {matching_case_key}\t")
                         continue
@@ -87,7 +83,8 @@ def process_bam_file(self, reads_and_references: dict):
                     error, debug_list, result = alignment_parser.count_indels_from_cigar_codes_in_given_window(
                         read.cigartuples,
                         aligned_location,
-                        loc_type)
+                        loc_type,
+                        window_size)
                     for key, value in result.items():
                         if value not in self.matching_cases_dict[matching_case_key]['indel_errors'][key]:
                             self.matching_cases_dict[matching_case_key]['indel_errors'][key][value] = 0
@@ -181,5 +178,4 @@ def execute(self, window_size: int):
 
         reads_and_references = self.generate_dictionaries()
 
-        self.initialize_file(self.bam_path)
-        self.process_bam_file(reads_and_references)
+        self.process_bam_file(reads_and_references, window_size)
diff --git a/src/services/db_initializer.py b/src/services/db_initializer.py
index 4b903e6..c90107e 100644
--- a/src/services/db_initializer.py
+++ b/src/services/db_initializer.py
@@ -32,13 +32,15 @@ def init_databases(gffcompare_gtf: str, reference_gtf: str, force=False) -> tupl
 
         if not force and db_exists:
             output_manager.output_line({
-                "line": f"{key}: database file already exists. Using existing database. Use -f to force overwriting existing db-files.",
+                "line": f"{key}: database file already exists. Using existing database. " +
+                "Use -f to force overwriting existing db-files.",
                 "is_info": True
             })
 
         else:
             output_manager.output_line({
-                "line": f"{key}: database file does not exist. Creating database. This might take a while.",
+                "line": f"{key}: database file does not exist. " +
+                "Creating database. This might take a while.",
                 "is_info": True
             })
             gffutils.create_db(
diff --git a/src/services/extract_matching_cases.py b/src/services/extract_matching_cases.py
index 2606bff..c5426b2 100644
--- a/src/services/extract_matching_cases.py
+++ b/src/services/extract_matching_cases.py
@@ -5,8 +5,9 @@ class MatchingCasesExtractor:
 
     def __init__(self, offset_results: dict, offset_range: tuple, reference_db):
         """
-            Extract candidates matching the selected offset. Currently first and last exon are not considered.
-            These are omitted with the magic numbers in the second for loop.
+            Extract candidates matching the selected offset. Currently first and last 
+            exon are not considered. These are omitted with the magic numbers in the 
+            second for loop.
 
         Args:
             offset_results (dict): a dictionary containing the offset results for each transcript
@@ -30,13 +31,15 @@ def extract_candidates_matching_selected_offset(self) -> dict:
         for transcript_id, value in self.offset_results.items():
             strand, offsets, reference_id = value["strand"], value["offsets"], value["reference_id"]
             for offset_exon_idx in range(1, len(offsets)-1):
-                start_is_in_range = bool(abs(offsets[offset_exon_idx][0]) >= self.offset_range[0] and
-                                         abs(offsets[offset_exon_idx][0]) <= self.offset_range[1])
+                start_is_in_range = bool(
+                    abs(offsets[offset_exon_idx][0]) >= self.offset_range[0] and
+                    abs(offsets[offset_exon_idx][0]) <= self.offset_range[1])
                 if start_is_in_range:
                     entry_key = transcript_id + ".exon_" + \
                         str(offset_exon_idx) + ".start" + \
                         '.offset_' + str(offsets[offset_exon_idx][0])
-                    for exon_number, exon in enumerate(self.reference_db.children(reference_id, featuretype='exon', order_by='start')):
+                    for exon_number, exon in enumerate(self.reference_db.children(
+                            reference_id, featuretype='exon', order_by='start')):
                         if exon_number == offset_exon_idx:
                             extracted_candidates[entry_key] = {
                                 "transcript_id": transcript_id,
@@ -52,7 +55,8 @@ def extract_candidates_matching_selected_offset(self) -> dict:
                     entry_key = transcript_id + ".exon_" + \
                         str(offset_exon_idx) + ".end" + \
                         '.offset_' + str(offsets[offset_exon_idx][1])
-                    for exon_number, exon in enumerate(self.reference_db.children(reference_id, featuretype='exon', order_by='start')):
+                    for exon_number, exon in enumerate(self.reference_db.children(
+                            reference_id, featuretype='exon', order_by='start')):
                         if exon_number == offset_exon_idx:
                             extracted_candidates[entry_key] = {
                                 "transcript_id": transcript_id,
diff --git a/src/services/fasta_extractor.py b/src/services/fasta_extractor.py
index 4748c49..52fd131 100644
--- a/src/services/fasta_extractor.py
+++ b/src/services/fasta_extractor.py
@@ -1,7 +1,6 @@
-import json
 from pyfaidx import Fasta
 from services.output_manager import default_output_manager as output_manager
-from config import FASTA_OVERVIEW_FILE, DEFAULT_WINDOW_SIZE
+from config import DEFAULT_WINDOW_SIZE
 
 
 class FastaExtractor:
@@ -44,7 +43,8 @@ def check_errors(self) -> bool:
         errors = False
         if not self.fasta:
             output_manager.output_line({
-                "line": "FASTA-file not found. Please check path and try again. Moving to next section.",
+                "line": "FASTA-file not found. Please check path and try again. " +
+                "Moving to next section.",
                 "is_error": True
             })
             errors = True
@@ -98,8 +98,9 @@ def iterate_matching_cases(self):
                 splice_cite_location = value["location"] - 2
             else:
                 splice_cite_location = value["location"] + 1
-            coordinates = (key.split('.')[
-                           1], splice_cite_location - self.window_size, splice_cite_location + self.window_size)
+            coordinates = (key.split('.')[1],
+                           splice_cite_location - self.window_size,
+                           splice_cite_location + self.window_size)
             nucleotides = self.extract_characters_at_given_coordinates(
                 coordinates)
             if value["location_type"] == "start":
@@ -115,7 +116,8 @@ def execute_fasta_extraction(self):
             return
 
         output_manager.output_line({
-            "line": f"Searching closest possible canonical sites for provided offset range: {self.offset}",
+            "line": "Searching closest possible canonical " +
+            f"sites for provided offset range: {self.offset}",
             "is_info": True
         })
 
diff --git a/src/services/graph_manager.py b/src/services/graph_manager.py
index 519cb58..3c3dfe5 100644
--- a/src/services/graph_manager.py
+++ b/src/services/graph_manager.py
@@ -1,6 +1,5 @@
 import os
 import matplotlib.pyplot as plt
-import numpy as np
 
 from config import LOG_FILE_DIR
 
@@ -21,7 +20,7 @@ def construct_bar_chart_from_dict(self,
                                       title: str,
                                       x_label: str,
                                       y_label: str):
-        if not len(graph_values):
+        if not graph_values:
             return
         normalized_graph_values = self.normalize_values(graph_values)
         values = sorted(normalized_graph_values.items())
diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index fc2308d..39a000e 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -22,7 +22,8 @@ def write_offset_results_to_file(self):
                 "transcript_id\treference_id\tclass_code\tstrand\toffsets\n")
             for key, value in self.offset_results.items():
                 file.write(
-                    f"{key}\t{value['reference_id']}\t{value['class_code']}\t{value['strand']}\t{value['offsets']}\n")
+                    f"{key}\t{value['reference_id']}\t{value['class_code']}\t" +
+                    f"{value['strand']}\t{value['offsets']}\n")
 
     def write_alignment_errors_to_file(self):
 
@@ -43,12 +44,11 @@ def compute_indel_results(self):
                 continue
             for indel_type, indel_dict in matching_case['indel_errors'].items():
                 key = (
-                    indel_type, matching_case['strand'], matching_case['location_type'], matching_case['offset'])
+                    indel_type, matching_case['strand'],
+                    matching_case['location_type'],
+                    matching_case['offset'])
                 if key not in indel_results:
                     indel_results[key] = {}
-                # TODO: A more pythonic way but harder to read?
-                # indel_results[key] = {k: indel_results[key].get(k, 0) + indel_dict.get(k, 0)
-                #                       for k in set(list(indel_results[key].keys()) + list(indel_dict.keys()))}
                 for indel_dict_key, indel_event_count in indel_dict.items():
                     if indel_dict_key not in indel_results[key]:
                         indel_results[key][indel_dict_key] = 0
@@ -56,7 +56,8 @@ def compute_indel_results(self):
 
         if count_indel_errors:
             output_manager.output_line({
-                "line": f"Indel results: number of cases without indel errors: {count_indel_errors}",
+                "line": "Indel results: number of cases without indel errors: " +
+                f"{count_indel_errors}",
                 "is_error": True
             })
         return indel_results
@@ -91,7 +92,8 @@ def generate_closest_canonicals_graphs(self, parser_args, dict_of_canonicals: di
                     continue
                 case_count = sum(cases.values())
 
-                title = f"{nucleotide_pair}: strand: {key[0]}, exon location: {key[1]}, offset: {key[2]}, " + \
+                title = f"{nucleotide_pair}: strand: {key[0]}, exon location: {key[1]}, " + \
+                    f"offset: {key[2]}, " + \
                     f"direction of pair: {key[3]}, n of cases: {case_count}"
                 filename = "closest-canonicals." + ".strand_" + str(key[0]) + ".exon-loc-" + \
                     str(key[1]) + ".offset-(" + str(key[2]) + ")" + \
@@ -108,7 +110,8 @@ def generate_closest_canonicals_graphs(self, parser_args, dict_of_canonicals: di
                 )
                 graphs_created += 1
         output_manager.output_line({
-            "line": f"Closest canonicals: done. A total of {graphs_created} graphs created for closest canonicals.",
+            "line": f"Closest canonicals: done. A total of {graphs_created} " +
+            "graphs created for closest canonicals.",
             "is_info": True
         })
 
@@ -122,7 +125,8 @@ def write_indel_results_to_file(self, indel_results):
                 f"offset: {key[3]}, n of cases: {sum(value.values())}: {value}\n")
             total_reads_in_indel_results += sum(value.values())
 
-        summary_line = f"Indel results: count of reads in indel results: {total_reads_in_indel_results}" + \
+        summary_line = "Indel results: count of reads in indel results: " + \
+            f"{total_reads_in_indel_results}" + \
             "(Note: one read can be related to multiple matching cases, " + \
             "or be related to multiple transcripts) \n"
 
@@ -141,7 +145,8 @@ def validate_indel_grahp_should_be_created(self, parser_args, cases: dict):
 
     def generate_indel_graphs(self, parser_args, indel_results):
         output_manager.output_line({
-            "line": "Indel results: generating graphs for insertions and deletions found at given locations",
+            "line": "Indel results: generating graphs for insertions and deletions " +
+            "found at given locations",
             "is_info": True
         })
         total_reads_in_indel_results = 0
@@ -163,7 +168,8 @@ def generate_indel_graphs(self, parser_args, indel_results):
             )
             graphs_created += 1
         output_manager.output_line({
-            "line": f"Indel results: total number of reads in indel results: {total_reads_in_indel_results}",
+            "line": "Indel results: total number of reads in indel results: " +
+            f"{total_reads_in_indel_results}",
             "is_info": True
         })
         output_manager.output_line({
@@ -245,7 +251,8 @@ def write_closest_canonicals_log_to_file(self, parser_args, json_overview: dict)
                 file.write(
                     "This section contains the results in table-format. ")
                 file.write(
-                    "To disable this section, set extended debug 'false' in provided arguments.  \n")
+                    "To disable this section, set extended debug 'false' " +
+                    "in provided arguments.  \n")
                 current_chromosome = "chr1"
                 file.write(f'\n### {current_chromosome}\n')
                 file.write(
diff --git a/src/services/offset_computation.py b/src/services/offset_computation.py
index 22e6316..7004aaa 100644
--- a/src/services/offset_computation.py
+++ b/src/services/offset_computation.py
@@ -76,7 +76,11 @@ def fetch_exons(transcript, class_code, gffcompare_db, reference_db):
     return aligned_exons, reference_exons
 
 
-def execute_offset_computation(class_code: str, gffcompare_db, reference_db, extended_debug: bool) -> dict:
+def execute_offset_computation(
+        class_code: str,
+        gffcompare_db,
+        reference_db,
+        extended_debug: bool) -> dict:
     output_manager.output_line({
         "line": "COMPUTE OFFSETS",
         "is_title": True
@@ -113,7 +117,8 @@ def execute_offset_computation(class_code: str, gffcompare_db, reference_db, ext
         })
     else:
         output_manager.output_line({
-            "line": "Offset computation finished. Hint: to output results into a file, enable extended debug.",
+            "line": "Offset computation finished. " +
+            "Hint: to output results into a file, enable extended debug.",
             "is_info": True
         })
     return offset_results_as_dict
diff --git a/src/services/output_manager.py b/src/services/output_manager.py
index e66cb0d..445c2bf 100644
--- a/src/services/output_manager.py
+++ b/src/services/output_manager.py
@@ -9,7 +9,8 @@ def __init__(self):
         self.log_output = []
 
     def output_line(self, config: dict):
-        """Outputs given line to console. Can be used to output titles, info, errors and more. A simple custom logger.
+        """Outputs given line to console. Can be used to output titles,
+        info, warnings, errors and more. A simple custom logger.
 
         Args:
             config (dict): Configuration options for the output line.
diff --git a/src/tests/alignment_parser_test.py b/src/tests/alignment_parser_test.py
index 0120478..274e467 100644
--- a/src/tests/alignment_parser_test.py
+++ b/src/tests/alignment_parser_test.py
@@ -127,6 +127,7 @@ def test_case_that_has_it_s_location_at_final_match_returns_correct_value(self):
 class TestIndelCountingFromCigarCodes(TestCase):
     def setUp(self):
         self.parser = AlignmentParser()
+        self.window_size = 8
 
     def test_indel_counter_returns_false_and_an_empty_debug_list_for_given_empty_list(self):
         cigar_tuples = []
@@ -136,13 +137,12 @@ def test_indel_counter_returns_false_and_an_empty_debug_list_for_given_empty_lis
         expected_result, expected_debug_list, expected_result = False, [
             []], {'deletions': 0, 'insertions': 0}
         result, debug_list, result = alignment_parser.count_indels_from_cigar_codes_in_given_window(
-            cigar_tuples, aligned_location, loc_type)
+            cigar_tuples, aligned_location, loc_type, self.window_size)
         self.assertEqual((result, debug_list, result),
                          (expected_result, expected_debug_list, expected_result))
 
     def test_indels_are_counted_correctly(self):
         cigar_tuples = [(0, 20), (2, 3), (1, 2), (0, 10)]
-        self.parser.window_size = 8
         aligned_location = 27
         loc_type = "end"
         strand = "+"
@@ -150,13 +150,12 @@ def test_indels_are_counted_correctly(self):
             2, 2, 2, 1, 1, 0, 0, 0], {'deletions': 3, 'insertions': 2}
 
         errors, debug_list, result = alignment_parser.count_indels_from_cigar_codes_in_given_window(
-            cigar_tuples, aligned_location, loc_type)
+            cigar_tuples, aligned_location, loc_type, self.window_size)
         self.assertEqual((errors, debug_list[0], result),
                          (expected_errors, expected_debug_list, expected_result))
 
     def test_full_window_of_dels_returns_true_for_errors(self):
         cigar_tuples = [(0, 20), (2, 8), (1, 2), (0, 10)]
-        self.parser.window_size = 8
         aligned_location = 20
         loc_type = "start"
         strand = "+"
@@ -164,6 +163,6 @@ def test_full_window_of_dels_returns_true_for_errors(self):
             2, 2, 2, 2, 2, 2, 2, 2], {'deletions': 8, 'insertions': 0}
 
         errors, debug_list, result = alignment_parser.count_indels_from_cigar_codes_in_given_window(
-            cigar_tuples, aligned_location, loc_type)
+            cigar_tuples, aligned_location, loc_type, self.window_size)
         self.assertEqual((errors, debug_list[0], result),
                          (expected_errors, expected_debug_list, expected_result))
diff --git a/src/tests/bam_manager_test.py b/src/tests/bam_manager_test.py
index dfb4152..7572f55 100644
--- a/src/tests/bam_manager_test.py
+++ b/src/tests/bam_manager_test.py
@@ -71,10 +71,9 @@ def test_bam_manager_execute_runs_without_errors(self):
 class TestBamReader(TestCase):
 
     def setUp(self):
-        self.bam_manager = BamManager("", "", {})
         test_bam_file = os.path.join(
             TEST_FILE_DIR, "Mouse.ONT.R9.4.sim.RE.no_gtf.transcript925.ch1.nnic.bam")
-        self.bam_manager.initialize_file(test_bam_file)
+        self.bam_manager = BamManager(test_bam_file, "", {})
 
     def test_bam_reader_runs_without_errors(self):
         reads_and_references = {
@@ -89,4 +88,5 @@ def test_bam_reader_runs_without_errors(self):
             }
 
         }
-        self.bam_manager.process_bam_file(reads_and_references)
+        window_size = 10
+        self.bam_manager.process_bam_file(reads_and_references, window_size)

From d9d8b04244d3408dcf1d223f39c5611bb9a093d1 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 14:01:03 +0300
Subject: [PATCH 27/29] Fix issue with extra line-break in stdout

---
 src/services/bam_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/services/bam_manager.py b/src/services/bam_manager.py
index 80ab5e6..b7a7659 100644
--- a/src/services/bam_manager.py
+++ b/src/services/bam_manager.py
@@ -158,7 +158,7 @@ def generate_dictionaries(self):
         log_manager.debug_logs['reads_and_references'] = reads_and_references
 
         cases_line = f"Number of matching cases: {len(self.matching_cases_dict)}, " + \
-            f"number of reads: {len(reads_and_references)}\n"
+            f"number of reads: {len(reads_and_references)}"
 
         output_manager.output_line({
             "line": cases_line,

From 1c4fe03df6441b8f23564addbd0e464c1501d909 Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 14:04:13 +0300
Subject: [PATCH 28/29] remove indel results being printed twice

---
 src/services/log_manager.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index 39a000e..caeafb9 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -126,9 +126,9 @@ def write_indel_results_to_file(self, indel_results):
             total_reads_in_indel_results += sum(value.values())
 
         summary_line = "Indel results: count of reads in indel results: " + \
-            f"{total_reads_in_indel_results}" + \
+            f"{total_reads_in_indel_results} " + \
             "(Note: one read can be related to multiple matching cases, " + \
-            "or be related to multiple transcripts) \n"
+            "or be related to multiple transcripts). \n"
 
         with open(filepath, "w", encoding="utf-8") as file:
             file.writelines(results)
@@ -149,7 +149,6 @@ def generate_indel_graphs(self, parser_args, indel_results):
             "found at given locations",
             "is_info": True
         })
-        total_reads_in_indel_results = 0
         graphs_created = 0
         for key, value in indel_results.items():
             if not self.validate_indel_grahp_should_be_created(parser_args, value):
@@ -158,7 +157,7 @@ def generate_indel_graphs(self, parser_args, indel_results):
                 f"offset: {key[3]}, n of cases: {sum(value.values())}"
             filename = "indel." + str(key[0]) + ".strand_" + str(key[1]) + ".exon-loc-" + \
                 str(key[2]) + ".offset-(" + str(key[3]) + ")"
-            total_reads_in_indel_results += sum(value.values())
+
             graph_manager.construct_bar_chart_from_dict(
                 graph_values=value,
                 filename=filename,
@@ -167,11 +166,7 @@ def generate_indel_graphs(self, parser_args, indel_results):
                 y_label="Portion of reads",
             )
             graphs_created += 1
-        output_manager.output_line({
-            "line": "Indel results: total number of reads in indel results: " +
-            f"{total_reads_in_indel_results}",
-            "is_info": True
-        })
+
         output_manager.output_line({
             "line": f"Indel results: graphs for {graphs_created} cases created.",
             "is_info": True

From 1929ebac693c66f4a4d0c91d18ebd7f093a1b04e Mon Sep 17 00:00:00 2001
From: Heidi Holappa <heidi.holappa@helsinki.fi>
Date: Thu, 6 Jul 2023 14:21:23 +0300
Subject: [PATCH 29/29] remove extra line-break

---
 src/services/log_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/services/log_manager.py b/src/services/log_manager.py
index caeafb9..4a55a81 100644
--- a/src/services/log_manager.py
+++ b/src/services/log_manager.py
@@ -128,7 +128,7 @@ def write_indel_results_to_file(self, indel_results):
         summary_line = "Indel results: count of reads in indel results: " + \
             f"{total_reads_in_indel_results} " + \
             "(Note: one read can be related to multiple matching cases, " + \
-            "or be related to multiple transcripts). \n"
+            "or be related to multiple transcripts)."
 
         with open(filepath, "w", encoding="utf-8") as file:
             file.writelines(results)