-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbin_markers.pl
executable file
·121 lines (104 loc) · 3.3 KB
/
bin_markers.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/perl
#A script to bin markers in RIL or CSSLs.
#This is needed for dense genotyping
#Markers should be imputed first for best results. Missing data creates bins.
#inputvariables tab delimited marker file and maximum distance of flanking markers to use for imputation
#marker file layout (tab delimited)
## marker individual1 individual2 etc.
#marker names as chr underscore position e.g., S1_10000
#markers must be coded as parent 1 (0) and partent 2 (2) and missing (-1)
#NOTE!! Bins will overlap because of recombination breakpoint uncertainty.
use strict;
use Getopt::Std;
use Data::Dumper;
#reading options
our ($opt_i);
getopts('i:');
if (!$opt_i) {
print STDERR "\nInput file (-i) required\n\n\n";
}
my @accessions;
my @markers;
my %markerchr;
my %markerpos;
my $firstline = 1;
my $secondline = 1;
my @prev_row;
my $bin_start_marker;
my $bin_end_marker;
my $prevchr;
open INFILE, "<", $opt_i or die "No such input file $opt_i";
while (<INFILE>) {
chomp $_;
if ($firstline == 1) {
my @header = split('\t',$_);
my @header = @header[1.. $#header];
print "SNP\tRange\t".join("\t",@header)."\n";
$firstline = 0;
next;
}
my @row = split('\t', $_);
my $marker = $row[0];
push @markers, $marker;
my @markerinfo = split('_',$marker);
$markerchr{$marker} = $markerinfo[0];
$markerpos{$marker} = $markerinfo[1];
@row = @row[1 .. $#row];
if ($secondline == 1) {
$bin_start_marker = $marker;
$bin_end_marker = $marker;
@prev_row = @row;
$secondline = 0;
$prevchr = $markerinfo[0];
}
if ($prevchr ne $markerinfo[0]) {
$bin_start_marker = $marker;
$bin_end_marker = $marker;
@prev_row = @row;
$prevchr = $markerinfo[0];
}
if (join(",",@row) eq join(",",@prev_row)) {
$bin_end_marker = $marker;
#note, this will misbehave if end of one chromosome has exact same marker scores as the start of the next
#above should be fixed now
}
else {
my @startmarkerinfo = split('_',$bin_start_marker);
my @endmarkerinfo = split('_',$bin_end_marker);
my $centerpos = ($markerinfo[1] - $startmarkerinfo[1])/2 + $startmarkerinfo[1];
my $closest_to_center=10000000000;
my $centermarker;
foreach my $checkmarker (@markers) {
if ($markerchr{$checkmarker} eq $markerinfo[0]) {
my $markerdist = abs($markerpos{$checkmarker} - $centerpos);
if ( $markerdist < $closest_to_center) {
$closest_to_center = $markerdist;
$centermarker = $checkmarker;
}
}
}
print $centermarker."\t".$markerinfo[0]."_".$startmarkerinfo[1]."-".$markerinfo[1]."\t";
print join("\t",@prev_row)."\n";
@prev_row = @row;
$bin_start_marker = $bin_end_marker;
$bin_end_marker = $marker;
}
}
close INFILE;
# deal with last line
my @startmarkerinfo = split('_',$bin_start_marker);
my @endmarkerinfo = split('_',$bin_end_marker);
my $centerpos = ($startmarkerinfo[1] - $endmarkerinfo[1])/2 + $startmarkerinfo[1];
my $closest_to_center=10000000000;
my $centermarker;
foreach my $checkmarker (@markers) {
if ($markerchr{$checkmarker} eq $startmarkerinfo[0]) {
my $markerdist = abs($markerpos{$checkmarker} - $centerpos);
if ( $markerdist < $closest_to_center) {
$closest_to_center = $markerdist;
$centermarker = $checkmarker;
}
}
}
print $centermarker."\t".$startmarkerinfo[0]."_".$startmarkerinfo[1]."-".$endmarkerinfo[1]."\t";
print join("\t",@prev_row)."\n";