-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenesExpr.go
154 lines (140 loc) · 4.12 KB
/
genesExpr.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/* Copyright (C) 2016 Philipp Benner
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gonetics
/* -------------------------------------------------------------------------- */
import "fmt"
import "bufio"
import "compress/gzip"
import "os"
import "io"
import "strconv"
import "strings"
/* i/o
* -------------------------------------------------------------------------- */
// Parse expression data from a GTF file (gene transfer format). The data
// is added as a meta column named "expr" to the gene list. Parameters:
// geneIdName: Name of the optional field containing the gene id
// exprIdName: Name of the optional field containing the expression data
// genes: List of query genes
func (genes *Genes) ReadGTFExpr(r io.Reader, geneIdName, exprIdName string) error {
granges := GRanges{}
granges.ReadGTF(r, []string{geneIdName, exprIdName}, []string{"[]string", "[]float64"}, nil)
// slice containing expression values
expr := make([]float64, genes.Length())
geneIds := granges.GetMetaStr(geneIdName)
exprVal := granges.GetMetaFloat(exprIdName)
if len(geneIds) == 0 {
return fmt.Errorf("invalid geneIdName `%s'", geneIdName)
}
if len(exprVal) == 0 {
return fmt.Errorf("invalid exprIdName `%s'", exprIdName)
}
for i := 0; i < granges.Length(); i++ {
if j, ok := genes.FindGene(geneIds[i]); ok {
expr[j] += exprVal[i]
}
}
genes.AddMeta("expr", expr)
return nil
}
func (genes *Genes) ImportGTFExpr(filename string, geneIdName, exprIdName string) error {
var r io.Reader
// open file
f, err := os.Open(filename)
if err != nil {
return err
}
defer f.Close()
// check if file is gzipped
if isGzip(filename) {
g, err := gzip.NewReader(f)
if err != nil {
return err
}
defer g.Close()
r = g
} else {
r = f
}
return genes.ReadGTFExpr(r, geneIdName, exprIdName)
}
// Import expression data from cufflinks. The data is added to the gene
// list as a meta column named "expr".
func (genes *Genes) ReadCufflinksFPKMTracking(r io.Reader, verbose bool) error {
scanner := bufio.NewScanner(r)
expr := make([]float64, genes.Length())
// parse header
scanner.Scan();
if err := scanner.Err(); err != nil {
return err
}
header := strings.Fields(scanner.Text())
if len(header) != 13 || header[0] != "tracking_id" ||
(header[9] != "FPKM" && header[9] != "RPKM") {
return fmt.Errorf("invalid header")
}
// parse data
for scanner.Scan() {
if err := scanner.Err(); err != nil {
return err
}
fields := strings.Fields(scanner.Text())
if len(fields) == 0 {
continue
}
if len(fields) != 13 {
return fmt.Errorf("file must have 13 columns")
}
if fields[12] == "OK" {
geneStr := fields[0]
exprStr := fields[9]
if i, ok := genes.FindGene(geneStr); ok {
t, err := strconv.ParseFloat(exprStr, 64)
if err != nil {
return err
}
expr[i] += t
} else {
if verbose {
fmt.Fprintf(os.Stderr, "`%s' not present in gene list!\n", geneStr)
}
}
}
}
genes.AddMeta("expr", expr)
return nil
}
func (genes *Genes) ImportCufflinksFPKMTracking(filename string, verbose bool) error {
var r io.Reader
// open file
f, err := os.Open(filename)
if err != nil {
return err
}
defer f.Close()
// check if file is gzipped
if isGzip(filename) {
g, err := gzip.NewReader(f)
if err != nil {
return err
}
defer g.Close()
r = g
} else {
r = f
}
return genes.ReadCufflinksFPKMTracking(r, verbose)
}