From 8bff07ca1b80d9a42648a8bb07cca2e8801c560f Mon Sep 17 00:00:00 2001 From: Gregor Rot Date: Fri, 22 Dec 2023 14:29:41 +0100 Subject: [PATCH 1/4] motifs + gene_id fix --- splicekit/core/features.py | 2 +- splicekit/core/motifs.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/splicekit/core/features.py b/splicekit/core/features.py index 4d08fd6..c3f8a29 100755 --- a/splicekit/core/features.py +++ b/splicekit/core/features.py @@ -48,7 +48,7 @@ def load_genes(): if gene["gene_name"]=="": gene["gene_name"] = atts.get("gene", "") if gene["gene_name"]=="": - gene["gene_name"] = gene["gene_id"] + gene["gene_name"] = gene_id # because we have a feature called "genes" (and exons, donor_anchors, acceptor_anchors and junctions) # this stores gene counts gene_info = gene.get("genes", {}) diff --git a/splicekit/core/motifs.py b/splicekit/core/motifs.py index efb2909..c059214 100644 --- a/splicekit/core/motifs.py +++ b/splicekit/core/motifs.py @@ -734,9 +734,9 @@ def cluster(cutoff=9): f.close() def process(): - #if config.scanRBP: - # make_scanRBP() - # plot_scanRBP() + if config.scanRBP: + make_scanRBP() + plot_scanRBP() make_logos() dreme() make_distance() From 98f8934597d7477d70d7f2eb1bce61180e77c0f3 Mon Sep 17 00:00:00 2001 From: Gregor Rot Date: Fri, 22 Dec 2023 14:30:58 +0100 Subject: [PATCH 2/4] annotation fix --- splicekit/core/annotation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/splicekit/core/annotation.py b/splicekit/core/annotation.py index 85ac707..b094304 100644 --- a/splicekit/core/annotation.py +++ b/splicekit/core/annotation.py @@ -125,7 +125,7 @@ def make_comparisons(): if sep_temp!="": test_group_id = f"{test_sample}_{sep_temp}" control_group_id = f"{control_sample}_{dmso_hash[tuple(temp_control)]}_{sep_temp}" - comparison_name = f"{test_sample}_{control_sample}_{dmso_hash[tuple(temp_control)]}_{sep_temp}" + comparison_name = f"{test_sample}_{control_sample}{dmso_hash[tuple(temp_control)]}_{sep_temp}" else: test_group_id = f"{test_sample}" control_group_id = f"{control_sample}{dmso_hash[tuple(temp_control)]}" From 4cac3df87275d86655b26d41390d4cf975ad4885 Mon Sep 17 00:00:00 2001 From: Gregor Rot Date: Tue, 9 Jan 2024 12:07:19 +0100 Subject: [PATCH 3/4] splicecompare + promisc --- splicekit/clusterlogfc/__init__.py | 1 + splicekit/core/promisc.py | 7 ++++--- splicekit/splicecompare | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/splicekit/clusterlogfc/__init__.py b/splicekit/clusterlogfc/__init__.py index ac79110..809be5c 100644 --- a/splicekit/clusterlogfc/__init__.py +++ b/splicekit/clusterlogfc/__init__.py @@ -19,6 +19,7 @@ from matplotlib import pyplot as plt import matplotlib.font_manager import seaborn as sns +import gzip def compute_distance(compound_name1, compound_name2, fname): if not os.path.exists(fname): diff --git a/splicekit/core/promisc.py b/splicekit/core/promisc.py index 323b0b5..d44b42e 100644 --- a/splicekit/core/promisc.py +++ b/splicekit/core/promisc.py @@ -2,6 +2,7 @@ import sys import splicekit.core.annotation as annotation import splicekit.config as config +import gzip def toint(temp): try: @@ -16,10 +17,10 @@ def same_junction(data1, data2): return False def process(): - annotation.read_comparisons() + annotation.make_comparisons() results = {} # populate result list - f = open("results/results_edgeR_junctions.tab", "rt") + f = gzip.open("results/edgeR/junctions_results_fdr005.tab.gz", "rt") header = f.readline().replace("\r", "").replace("\n", "").split("\t") r = f.readline() while r: @@ -40,7 +41,7 @@ def process(): results[id][rdata["comparison"]+"_junction"] = 1 reported_junction = {} - fout_junction = open("results/results_edgeR_junctions_promisc.tab", "wt") + fout_junction = gzip.open("results/junctions_promisc.tab.gz", "wt") header = ["gene_name", "gene_id", "chr", "strand", "feature_start", "feature_stop", "row_sum"] for (comparison, _, _, _, _) in annotation.comparisons: header.append(comparison) diff --git a/splicekit/splicecompare b/splicekit/splicecompare index 2fb96a9..0537f41 100755 --- a/splicekit/splicecompare +++ b/splicekit/splicecompare @@ -5,6 +5,7 @@ import sys import argparse import seaborn as sns import matplotlib.pyplot as plt +import gzip parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('splicekit_run1', help="Main folder with splicekit run 1") @@ -36,7 +37,7 @@ for feature_type in args.features.split(","): fout = open(f"{args.fname_out}_{feature_type}.tab", "wt") results = {} for splicekit_run, comparison, comparison_fname in [(args.splicekit_run1, args.comparison1, args.comparison1_fname), (args.splicekit_run2, args.comparison2, args.comparison2_fname)]: - f = open(os.path.join(splicekit_run, "results", f"results_edgeR_{feature_type}", f"{comparison_fname}"), "rt") + f = gzip.open(os.path.join(splicekit_run, "results", "edgeR", feature_type, f"{comparison_fname}.gz"), "rt") header = f.readline().replace("\r", "").replace("\n", "").split("\t") r = f.readline() while r: From 125f2fa366c81d96c4856a54548c4211c63b8dfd Mon Sep 17 00:00:00 2001 From: Gregor Rot Date: Fri, 12 Jan 2024 09:52:41 +0100 Subject: [PATCH 4/4] Annotation, motifs, reports --- splicekit/core/annotation.py | 8 +++++--- splicekit/core/motifs.py | 7 +++++-- splicekit/core/report.py | 10 ++++++++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/splicekit/core/annotation.py b/splicekit/core/annotation.py index b094304..1949d80 100644 --- a/splicekit/core/annotation.py +++ b/splicekit/core/annotation.py @@ -180,7 +180,7 @@ def write_comparisons(): job_rmats_instance = job_rmats.format(container=splicekit.config.container, core_path=os.path.dirname(core.__file__), comparison_name=comparison_name, job_name="rmats_"+comparison_name, gtf_path=splicekit.config.gtf_path[:-3]) f_rmats.write(job_rmats_instance) f_rmats.close() - row = [comparison_name, ",".join(str(el) for el in test_ids), ",".join(str(el) for el in control_ids), control_group_id, test_group_id] + row = [comparison_name, ",".join(str(el) for el in control_ids), ",".join(str(el) for el in test_ids), control_group_id, test_group_id] comps_table.write("\t".join(row) + "\n") comps_table.close() write_edgeR_jobs() @@ -209,10 +209,12 @@ def write_edgeR_jobs(): sample_membership = {} for (comparison_name, control_set, test_set, control_group_id, test_group_id) in annotation.comparisons: for (sample_id, _, _, _) in control_set: - assert(sample_membership.get(sample_id, None) in [None, control_group_id]) + # in some rare cases, the same sample can be part of diverse control groups + #assert(sample_membership.get(sample_id, None) in [None, control_group_id]) sample_membership[sample_id] = control_group_id for (sample_id, _, _, _) in test_set: - assert(sample_membership.get(sample_id, None) in [None, test_group_id]) + # in some rare cases, the same sample can be part of diverse test groups + #assert(sample_membership.get(sample_id, None) in [None, test_group_id]) sample_membership[sample_id] = test_group_id sample_membership = [sample_membership[sample_id] for sample_id in annotation.samples] for (comparison_name, control_set, test_set, control_group_id, test_group_id) in annotation.comparisons: diff --git a/splicekit/core/motifs.py b/splicekit/core/motifs.py index c059214..7ce72db 100644 --- a/splicekit/core/motifs.py +++ b/splicekit/core/motifs.py @@ -605,7 +605,7 @@ def make_distance(): print(f"{module_name} make_distance | start") len_background, background = get_background() for feature_type in motif_criteria.keys(): - treatment_seq = treatment_seq_bytype[f"{feature_type}_all"] # use all to do the clustering and dendrogram + treatment_seq = treatment_seq_bytype.get(f"{feature_type}_all", {}) # use all to do the clustering and dendrogram f = open(f"results/motifs/{feature_type}_donor_pattern_dm.tab", "wt") f.write("\t".join(["treatment1", "treatment2", "donnor_pattern_distance"]) + "\n") m = len(treatment_seq.keys()) # 179 for ps180 @@ -639,7 +639,7 @@ def cluster(cutoff=9): from matplotlib import pyplot as plt for feature_type in motif_criteria.keys(): treatments = set() - for treatment, seqs in treatment_seq_bytype[f"{feature_type}_all"].items(): + for treatment, seqs in treatment_seq_bytype.get(f"{feature_type}_all", {}).items(): treatments.add(treatment) treatments = list(treatments) @@ -656,6 +656,9 @@ def cluster(cutoff=9): r = f.readline() f.close() + if len(cmd)==0: + continue + Z = linkage(cdm, 'ward') cutree = cut_tree(Z, n_clusters=cutoff) # cut dendrogram at point where we have args.cutoff clusters clusters = {} diff --git a/splicekit/core/report.py b/splicekit/core/report.py index 7ef3c19..a611aff 100644 --- a/splicekit/core/report.py +++ b/splicekit/core/report.py @@ -96,8 +96,14 @@ def edgeR_feature(feature_name, version=""): assembly = {"homo_sapiens":"hg38", "mus_musculus":"mm39"}[config.species] tracks_fixed = ["gene-refseq", "transcript-refseq", "transcript-ensembl"] tracks_control, tracks_test = comparisons[comparison] - tracks_control = [track+'_bw' for track in tracks_control] # New JBrowse2 --> bigwig files are called by id_bw - tracks_test = [track+'_bw' for track in tracks_test] # New JBrowse2 --> bigwig files are called by id_bw + + if config.jbrowse2_url.startswith("https://genomebrowser"): + tracks_control = [track.lstrip("0")+'_bw' for track in tracks_control] + tracks_test = [track.lstrip("0")+'_bw' for track in tracks_test] + else: + tracks_control = [track+'_bw' for track in tracks_control] # New JBrowse2 --> bigwig files are called by id_bw + tracks_test = [track+'_bw' for track in tracks_test] # New JBrowse2 --> bigwig files are called by id_bw + tracks = ",".join(tracks_test[:3]+tracks_control[:3]+tracks_fixed) f_from=int(data["feature_start"]) f_to=int(data["feature_stop"])