-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathedgR.R
136 lines (115 loc) · 5.23 KB
/
edgR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# library -----------------------------------------------------------------
library(edgeR)
library(limma)
library(Glimma)
library(org.Mm.eg.db)
library(gplots)
library(RColorBrewer)
library(NMF)
# counts ------------------------------------------------------------------
mycount_77 <- "/Users/benson/Documents/project/RNA-seq1-3/myTMM.RDS" %>%
readRDS() %>% as.data.frame()
mycount_df <- mycount_77 %>% as.data.frame()%>%
select(.,
ip_Y_V_S_CON,ip_Y_V_S_DMS,ip_Y_V_S_AZA,ip_Y_V_S_DAC,
ip_Y_V_S_AS,ip_Y_V_S_CO,ip_Y_V_S_LCD,ip_Y_V_S_HCD,
ip_Y_V_S_BAP, ip_Y_V_S_AS_BAP, ip_Y_V_S_CO_BAP,
ip_Y_V_S_LCD_BAP,ip_Y_V_S_HCD_BAP) %>%
setNames(c("Y_ST_CON","Y_ST_DMS","Y_ST_AZA","Y_ST_DAC",
"Y_ST_AS","Y_ST_CO","Y_ST_LCD","Y_ST_HCD",
"Y_ST_BAP", "Y_ST_AS_BAP", "Y_ST_CO_BAP",
"Y_ST_LCD_BAP","Y_ST_HCD_BAP"))
sampleinfo <- data.frame(row = names(mycount_df),
sample = names(mycount_df),
treatment = str_sub(names(mycount_df), start=6),
culture = str_sub(names(mycount_df), start=3,end=4),
cell = str_sub(names(mycount_df), start=1,end=1)) %>%
column_to_rownames('row')
# pre load ----------------------------------------------------------------
group_names <- c("Y_ST_CON","Y_ST_DMS","Y_ST_AZA","Y_ST_DAC",
"Y_ST_AS","Y_ST_CO","Y_ST_LCD","Y_ST_HCD",
"Y_ST_BAP", "Y_ST_AS_BAP", "Y_ST_CO_BAP",
"Y_ST_LCD_BAP","Y_ST_HCD_BAP")
# anno --------------------------------------------------------------------
annotation_df <- "/Users/benson/Documents/project/RNA-seq1-3/anno_gene.RDS" %>%
readRDS() %>% as.data.frame() %>% select("ENTREZID","SYMBOL") %>% na.omit()
# load data ---------------------------------------------------------------
### create edgeR object
y <- DGEList(mycount_df)
group <- group_names
group <- factor(group)
y$samples$group <- group
y$genes <- annotation_df
head(y$samples)
### library size
y$samples$lib.size
# we can also adjust the labelling if we want
barplot(y$samples$lib.size/1e06, names=colnames(y), las=2, ann=FALSE, cex.names=0.75)
mtext(side = 1, text = "Samples", line = 4)
mtext(side = 2, text = "Library size (millions)", line = 3)
title("Barplot of library sizes")
### Get log2 counts per million
logcounts <- y %>% as.matrix() %>% log2()
# Check distributions of samples using boxplots
boxplot(logcounts, xlab="", ylab="Log2 counts per million",las=2)
# Let's add a blue horizontal line that corresponds to the median logCPM
abline(h=median(logcounts),col="blue")
title("Boxplots of logCPMs (unnormalised)")
### Multidimensional scaling plots
plotMDS(y,xlab = "PC1",ylab = "PC2",pch = 16)
# We specify the option to let us plot two plots side-by-sde
par(mfrow=c(1,2))
# Let's set up colour schemes for CellType
# How many cell types and in what order are they stored?
sampleinfo$sample <- factor(sampleinfo$sample)
sampleinfo$treatment <- factor(sampleinfo$treatment)
sampleinfo$culture <- factor(sampleinfo$culture)
sampleinfo$cell <- factor(sampleinfo$cell)
levels(sampleinfo$sample)
levels(sampleinfo$treatment)
levels(sampleinfo$culture)
levels(sampleinfo$cell)
# 'CON' = '#E0E0E0', 'DMS' = '#ADADAD', 'AZA' = '#FFD2D2', 'DAC' = '#FF9797',
# 'AS' = '#FFFF37', 'CO' = '#FF5151',
# 'LCD' = '#0080FF', 'HCD' = '#005AB5', 'BAP' = '#00DB00',
# 'AS_BAP' = '#FFDC35', 'CO_BAP' = '#EA0000',
# 'LCD_BAP' = '#0000E3', 'HCD_BAP' = '#000079'
# clone = c('WT' = '#FF2D2D', 'L858R' = '#FF9224',
# 'DEL19' = '#BE77FF', 'YAP' = '#6F00D2'))
col.sample <- c("black")[sampleinfo$sample]
col.treatment <- c('#FFFF37','#FFDC35','#FFD2D2','#00DB00','#FF5151','#EA0000',
'#E0E0E0','#FF9797','#ADADAD','#005AB5','#000079','#0080FF',
'#0000E3')[sampleinfo$treatment]
col.culture <- c("#0072E3","darkblue")[sampleinfo$culture]
col.cell <- c('#FF9224','#6F00D2')[sampleinfo$cell]
# Redo the MDS with cell type colouring
plotMDS(y,col=col.sample,xlab = "PC1",ylab = "PC2")
title("sample")
plotMDS(y,col=col.treatment,xlab = "PC1",ylab = "PC2")
legend("topleft",fill=c('#FFFF37','#FFDC35','#FFD2D2','#00DB00','#FF5151','#EA0000',
'#E0E0E0','#FF9797','#ADADAD','#005AB5','#000079','#0080FF',
'#0000E3'),
legend=levels(sampleinfo$treatment))
title("Treatment")
plotMDS(y,col=col.culture,xlab = "PC1",ylab = "PC2")
legend("topleft",fill=c("darkblue","#0072E3"),legend=levels(sampleinfo$culture))
title("Protocol(Short-term/Long-term)")
plotMDS(y,col=col.cell,xlab = "PC1",ylab = "PC2")
legend("topleft",fill=c("orange","#6F00D2"),legend=levels(sampleinfo$cell))
title("Cell type")
# 創建一個 26x26 的矩陣,初始化為全零
matrix_data <- matrix(0, nrow = 26, ncol = 26)
# 將對角線上的元素設置為 1
diag(matrix_data) <- 1
# 檢視結果
colnames(matrix_data) <- group_names
par(mfrow=c(1,1))
v <- voom(y,matrix_data,plot = TRUE,weights = 1)
fit <- lmFit(v)
names(fit)
cont.matrix <- makeContrasts(minus=ip_L_V_L_LCD_BAP-ip_L_V_L_HCD_BAP,levels=matrix_data)
fit.cont <- contrasts.fit(fit, cont.matrix)
summa.fit <- decideTests(fit.cont)
summary(summa.fit)
par(mfrow=c(1,2))
plotMD(fit.cont,coef=1,status=fit.cont[,"ip_L_V_L_LCD_BAP"], values = c(-1, 1), hl.col=c("blue","red"))