-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtree_prune.R
55 lines (49 loc) · 1.52 KB
/
tree_prune.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/Rscript
args = commandArgs(trailingOnly = T)
# test if there is at least one argument: if not, return an error
if (length(args) == 0) {
stop("At least one argument must be supplied (input file).n",
call. = FALSE)
} else if (length(args) == 2) {
# default output file
args[3] = "tree/retain.tsv"
}
library(biomformat )
#load the data
biom2 = read_biom(args[1])
tax = read.csv(args[2], sep = '\t', stringsAsFactors = F)[-3]
seq_prune = function(b, tax) {
#Prune the sequences keeping only one seq that correspond to each
#taxonomy assignment
#extract the otu table
columns = b$columns
rows = b$rows
data = b$data
#loop
d = data.frame(stringsAsFactors = F)
for (i in 1:length(rows)) {
otu_id = rows[[i]][[1]]
taxa = tax[tax == otu_id, 'taxonomy']
count = sum(data[[i]])
if (count < length(columns)) {
next
}
if (!(taxa %in% rownames(d))) {
d[taxa, 'id'] = otu_id
d[taxa, 'count'] = count
next
}
if (count > d[taxa, 'count']) {
d[taxa, 'id'] = otu_id
d[taxa, 'count'] = count
}
}
return(d)
}
write.table(
seq_prune(biom2, tax)$id,
file = args[3],
sep = '\t',
row.names = F,
col.names = 'id'
)