Skip to content

Commit

Permalink
Merge Importer and TaskCreator into one.
Browse files Browse the repository at this point in the history
Fix the only unit test we have for the Ruby sub-system.

Signed-off-by: Anurag Priyam <[email protected]>
  • Loading branch information
yeban committed Nov 20, 2014
1 parent 1995881 commit b731520
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 94 deletions.
9 changes: 0 additions & 9 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -129,15 +129,6 @@ task 'import', [:annotations_file] do |t, args|
Importer.new(annotations_file).run
end

desc 'Create tasks'
task 'tasks:create', [:annotations_file] do |t, args|
require_relative 'app'
App.init_config
App.load_services
annotations_file = args[:annotations_file]
TaskCreator.new(annotations_file).run
end

desc 'IRb.'
task 'irb' do
require_relative 'app'
Expand Down
9 changes: 9 additions & 0 deletions models/genome.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
class Genome < Sequel::Model
one_to_many :ref_seqs

# Returns Sequel::Dataset of all mRNAs in this genome.
#
# Must call `all` on the return value to get an Array.
def mRNAs
Feature.
where(type: 'mRNA').
where(ref_seq_id: ref_seqs.map(&:seq_id))
end
end
55 changes: 53 additions & 2 deletions services/importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ def initialize(annotations_file)
end

def run
#format
#update_tracklist
format and update_tracklist
Genome.db.transaction do
register_genome
register_ref_seqs
register_annotations
create_curation_tasks
end
end

Expand Down Expand Up @@ -127,4 +127,55 @@ def nclist_to_features(ref, classes, nclist)
end
list
end

# Create tasks clubbing overlapping mRNAs into one.
def create_curation_tasks
# Feature loci on all refs, sorted and grouped by ref.
# [
# {
# ref: ...,
# ids: [],
# start_coordinates: [],
# end_coordinates: []
# },
# ...
# ]
loci_all_ref = genome.mRNAs.
select(Sequel.function(:array_agg, Sequel.lit('"id" ORDER BY "start"')).as(:ids),
Sequel.function(:array_agg, Sequel.lit('"start" ORDER BY "start"')).as(:start_coordinates),
Sequel.function(:array_agg, Sequel.lit('"end" ORDER BY "start"')).as(:end_coordinates),
:ref_seq_id).group(:ref_seq_id)

loci_all_ref.each do |loci_one_ref|
groups = call_overlaps loci_one_ref
groups.each do |group|
ids = group.delete :ids
t = Task.create group
t.difficulty = ids.length
t.save
end
end
end

# Group overlapping loci together regardless of feature strand.
#
# About overlapping genes: http://www.biomedcentral.com/1471-2164/9/169.
def call_overlaps(loci_one_ref)
# Ref being processed.
ref = loci_one_ref[:ref_seq_id]

groups = [] # [{start: , end: , gene_ids: []}, ...]
loci_one_ref[:ids].each_with_index do |id, i|
start = loci_one_ref[:start_coordinates][i]
_end = loci_one_ref[:end_coordinates][i]

if not groups.empty? and start < groups.last[:end] # overlap
groups.last[:ids] << id
groups.last[:end] = [groups.last[:end], _end].max
else
groups << {ref_seq_id: ref, start: start, end: _end, ids: [id]}
end
end
groups
end
end
76 changes: 0 additions & 76 deletions services/task_creator.rb

This file was deleted.

15 changes: 8 additions & 7 deletions tests/rb/test_overlap_detection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,29 @@

def data
@data ||= {ref: 'test_ref',
feature_ids: [1, 2, 3, 4, 5, 6, 7],
feature_start_coordinates: [10, 30, 40, 50, 100, 110, 130],
feature_end_coordinates: [20, 70, 60, 80, 150, 120, 140]
ids: [1, 2, 3, 4, 5, 6, 7],
start_coordinates: [10, 30, 40, 50, 100, 110, 130],
end_coordinates: [20, 70, 60, 80, 150, 120, 140]
}
end

def overlaps
@groups ||= Importer.new(nil).call_overlaps(data)
@groups ||=
Importer.new('data/annotations/Solenopsis_invicta/Si_gnF.gff').call_overlaps(data)
end

it "should work" do
assert overlaps.length == 3

assert overlaps[0][:feature_ids].length == 1
assert overlaps[0][:ids].length == 1
assert overlaps[0][:start] == 10
assert overlaps[0][:end] == 20

assert overlaps[1][:feature_ids].length == 3
assert overlaps[1][:ids].length == 3
assert overlaps[1][:start] == 30
assert overlaps[1][:end] == 80

assert overlaps[2][:feature_ids].length == 3
assert overlaps[2][:ids].length == 3
assert overlaps[2][:start] == 100
assert overlaps[2][:end] == 150
end
Expand Down

0 comments on commit b731520

Please sign in to comment.