Skip to content

Commit

Permalink
Merge pull request #47 from bedapub/edgeR2
Browse files Browse the repository at this point in the history
Edge r2
  • Loading branch information
grexor authored Jan 12, 2024
2 parents d828c81 + 125f2fa commit d50eca9
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 16 deletions.
1 change: 1 addition & 0 deletions splicekit/clusterlogfc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from matplotlib import pyplot as plt
import matplotlib.font_manager
import seaborn as sns
import gzip

def compute_distance(compound_name1, compound_name2, fname):
if not os.path.exists(fname):
Expand Down
10 changes: 6 additions & 4 deletions splicekit/core/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def make_comparisons():
if sep_temp!="":
test_group_id = f"{test_sample}_{sep_temp}"
control_group_id = f"{control_sample}_{dmso_hash[tuple(temp_control)]}_{sep_temp}"
comparison_name = f"{test_sample}_{control_sample}_{dmso_hash[tuple(temp_control)]}_{sep_temp}"
comparison_name = f"{test_sample}_{control_sample}{dmso_hash[tuple(temp_control)]}_{sep_temp}"
else:
test_group_id = f"{test_sample}"
control_group_id = f"{control_sample}{dmso_hash[tuple(temp_control)]}"
Expand Down Expand Up @@ -180,7 +180,7 @@ def write_comparisons():
job_rmats_instance = job_rmats.format(container=splicekit.config.container, core_path=os.path.dirname(core.__file__), comparison_name=comparison_name, job_name="rmats_"+comparison_name, gtf_path=splicekit.config.gtf_path[:-3])
f_rmats.write(job_rmats_instance)
f_rmats.close()
row = [comparison_name, ",".join(str(el) for el in test_ids), ",".join(str(el) for el in control_ids), control_group_id, test_group_id]
row = [comparison_name, ",".join(str(el) for el in control_ids), ",".join(str(el) for el in test_ids), control_group_id, test_group_id]
comps_table.write("\t".join(row) + "\n")
comps_table.close()
write_edgeR_jobs()
Expand Down Expand Up @@ -209,10 +209,12 @@ def write_edgeR_jobs():
sample_membership = {}
for (comparison_name, control_set, test_set, control_group_id, test_group_id) in annotation.comparisons:
for (sample_id, _, _, _) in control_set:
assert(sample_membership.get(sample_id, None) in [None, control_group_id])
# in some rare cases, the same sample can be part of diverse control groups
#assert(sample_membership.get(sample_id, None) in [None, control_group_id])
sample_membership[sample_id] = control_group_id
for (sample_id, _, _, _) in test_set:
assert(sample_membership.get(sample_id, None) in [None, test_group_id])
# in some rare cases, the same sample can be part of diverse test groups
#assert(sample_membership.get(sample_id, None) in [None, test_group_id])
sample_membership[sample_id] = test_group_id
sample_membership = [sample_membership[sample_id] for sample_id in annotation.samples]
for (comparison_name, control_set, test_set, control_group_id, test_group_id) in annotation.comparisons:
Expand Down
2 changes: 1 addition & 1 deletion splicekit/core/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def load_genes():
if gene["gene_name"]=="":
gene["gene_name"] = atts.get("gene", "")
if gene["gene_name"]=="":
gene["gene_name"] = gene["gene_id"]
gene["gene_name"] = gene_id
# because we have a feature called "genes" (and exons, donor_anchors, acceptor_anchors and junctions)
# this stores gene counts
gene_info = gene.get("genes", {})
Expand Down
13 changes: 8 additions & 5 deletions splicekit/core/motifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ def make_distance():
print(f"{module_name} make_distance | start")
len_background, background = get_background()
for feature_type in motif_criteria.keys():
treatment_seq = treatment_seq_bytype[f"{feature_type}_all"] # use all to do the clustering and dendrogram
treatment_seq = treatment_seq_bytype.get(f"{feature_type}_all", {}) # use all to do the clustering and dendrogram
f = open(f"results/motifs/{feature_type}_donor_pattern_dm.tab", "wt")
f.write("\t".join(["treatment1", "treatment2", "donnor_pattern_distance"]) + "\n")
m = len(treatment_seq.keys()) # 179 for ps180
Expand Down Expand Up @@ -639,7 +639,7 @@ def cluster(cutoff=9):
from matplotlib import pyplot as plt
for feature_type in motif_criteria.keys():
treatments = set()
for treatment, seqs in treatment_seq_bytype[f"{feature_type}_all"].items():
for treatment, seqs in treatment_seq_bytype.get(f"{feature_type}_all", {}).items():
treatments.add(treatment)

treatments = list(treatments)
Expand All @@ -656,6 +656,9 @@ def cluster(cutoff=9):
r = f.readline()
f.close()

if len(cmd)==0:
continue

Z = linkage(cdm, 'ward')
cutree = cut_tree(Z, n_clusters=cutoff) # cut dendrogram at point where we have args.cutoff clusters
clusters = {}
Expand Down Expand Up @@ -734,9 +737,9 @@ def cluster(cutoff=9):
f.close()

def process():
#if config.scanRBP:
# make_scanRBP()
# plot_scanRBP()
if config.scanRBP:
make_scanRBP()
plot_scanRBP()
make_logos()
dreme()
make_distance()
Expand Down
7 changes: 4 additions & 3 deletions splicekit/core/promisc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import splicekit.core.annotation as annotation
import splicekit.config as config
import gzip

def toint(temp):
try:
Expand All @@ -16,10 +17,10 @@ def same_junction(data1, data2):
return False

def process():
annotation.read_comparisons()
annotation.make_comparisons()
results = {}
# populate result list
f = open("results/results_edgeR_junctions.tab", "rt")
f = gzip.open("results/edgeR/junctions_results_fdr005.tab.gz", "rt")
header = f.readline().replace("\r", "").replace("\n", "").split("\t")
r = f.readline()
while r:
Expand All @@ -40,7 +41,7 @@ def process():
results[id][rdata["comparison"]+"_junction"] = 1

reported_junction = {}
fout_junction = open("results/results_edgeR_junctions_promisc.tab", "wt")
fout_junction = gzip.open("results/junctions_promisc.tab.gz", "wt")
header = ["gene_name", "gene_id", "chr", "strand", "feature_start", "feature_stop", "row_sum"]
for (comparison, _, _, _, _) in annotation.comparisons:
header.append(comparison)
Expand Down
10 changes: 8 additions & 2 deletions splicekit/core/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,14 @@ def edgeR_feature(feature_name, version=""):
assembly = {"homo_sapiens":"hg38", "mus_musculus":"mm39"}[config.species]
tracks_fixed = ["gene-refseq", "transcript-refseq", "transcript-ensembl"]
tracks_control, tracks_test = comparisons[comparison]
tracks_control = [track+'_bw' for track in tracks_control] # New JBrowse2 --> bigwig files are called by id_bw
tracks_test = [track+'_bw' for track in tracks_test] # New JBrowse2 --> bigwig files are called by id_bw

if config.jbrowse2_url.startswith("https://genomebrowser"):
tracks_control = [track.lstrip("0")+'_bw' for track in tracks_control]
tracks_test = [track.lstrip("0")+'_bw' for track in tracks_test]
else:
tracks_control = [track+'_bw' for track in tracks_control] # New JBrowse2 --> bigwig files are called by id_bw
tracks_test = [track+'_bw' for track in tracks_test] # New JBrowse2 --> bigwig files are called by id_bw

tracks = ",".join(tracks_test[:3]+tracks_control[:3]+tracks_fixed)
f_from=int(data["feature_start"])
f_to=int(data["feature_stop"])
Expand Down
3 changes: 2 additions & 1 deletion splicekit/splicecompare
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import sys
import argparse
import seaborn as sns
import matplotlib.pyplot as plt
import gzip

parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('splicekit_run1', help="Main folder with splicekit run 1")
Expand Down Expand Up @@ -36,7 +37,7 @@ for feature_type in args.features.split(","):
fout = open(f"{args.fname_out}_{feature_type}.tab", "wt")
results = {}
for splicekit_run, comparison, comparison_fname in [(args.splicekit_run1, args.comparison1, args.comparison1_fname), (args.splicekit_run2, args.comparison2, args.comparison2_fname)]:
f = open(os.path.join(splicekit_run, "results", f"results_edgeR_{feature_type}", f"{comparison_fname}"), "rt")
f = gzip.open(os.path.join(splicekit_run, "results", "edgeR", feature_type, f"{comparison_fname}.gz"), "rt")
header = f.readline().replace("\r", "").replace("\n", "").split("\t")
r = f.readline()
while r:
Expand Down

0 comments on commit d50eca9

Please sign in to comment.