Daniel Fürth
Table of Contents:
- Create a table of raw data files
- Import the data
- Make a master data frame with all data.
- Make a plot
- Same for MS data
- Bargraph of TFP-ester labeling gels
- Absorption spectrum
The raw data text files are in the folder ./data/lc/
. Lets list the
content of that folder:
folder <- './data/lc'
files <- dir(folder)
files
[1] "Azido_PEG_tetrazine_chromatogram_280nm.txt"
[2] "Azido_PEG_tetrazine_MS.txt"
[3] "DBCO488_chromatogram_280nm.txt"
[4] "DBCO488_MS.txt"
[5] "DBCO594_chromatogram_280nm.txt"
[6] "DBCO594_MS.txt"
[7] "RXN488_chromatogram_280nm.txt"
[8] "RXN488_MS.txt"
[9] "RXN594_chromatogram_280nm.txt"
[10] "RXN594_MS.txt"
We have two types of files: MS
and chromatogram
. Lets check the
presence of string MS
and chromatogram
in the filenames using
grepl()
command and make a boolean index based on it:
ms <- grepl("MS", files)
chromatogram <- grepl("chromatogram", files)
Lets make a data frame object, myfiles
, that stores all info about our
files including the full file path:
myfiles<-data.frame(folder, files, ms, chromatogram)
#show the data frame in console
myfiles
folder files ms chromatogram
1 ./data/lc Azido_PEG_tetrazine_chromatogram_280nm.txt FALSE TRUE
2 ./data/lc Azido_PEG_tetrazine_MS.txt TRUE FALSE
3 ./data/lc DBCO488_chromatogram_280nm.txt FALSE TRUE
4 ./data/lc DBCO488_MS.txt TRUE FALSE
5 ./data/lc DBCO594_chromatogram_280nm.txt FALSE TRUE
6 ./data/lc DBCO594_MS.txt TRUE FALSE
7 ./data/lc RXN488_chromatogram_280nm.txt FALSE TRUE
8 ./data/lc RXN488_MS.txt TRUE FALSE
9 ./data/lc RXN594_chromatogram_280nm.txt FALSE TRUE
10 ./data/lc RXN594_MS.txt TRUE FALSE
#get the names of the the variables
names(myfiles)
[1] "folder" "files" "ms" "chromatogram"
add full file paths:
myfiles$path<-dir(folder, full.names=TRUE)
#get the names of the the variables
names(myfiles)
[1] "folder" "files" "ms" "chromatogram" "path"
Next lets import a chromatogram file. We can open the file manually and
find that on line 76 we have the text Raw data:
followed by the data
in TSV format.
76 Raw Data:
77 Time(min) Step(sec) Value(mAU)
78 0.000000 0.00 0.000000
79 0.001667 0.10 0.000030
80 0.003333 0.10 0.000580
81 0.005000 0.10 0.002410
We can simply then import the data into a data frame by adding
skip = 76
and the read.table()
command will skip all the line suntil
line 77. If we also add header=TRUE
we will import the variable names
into the header of the data frame.
data<-read.table(myfiles$path[1], skip=76, header=TRUE)
head(data) #just show the first 6 lines of the table, tail() command shows the last 6 lines.
Time.min. Step.sec. Value.mAU.
1 0.000000 0.0 0.00000
2 0.001667 0.1 0.00003
3 0.003333 0.1 0.00058
4 0.005000 0.1 0.00241
5 0.006667 0.1 0.00534
6 0.008333 0.1 0.00690
Lets plot the chromatogram as a line plot:
plot(data$Time.min., data$Value.mAU., type='l')
Lets examine the different files we have from the chromatogram:
myfiles[myfiles$chromatogram,]
folder files ms chromatogram
1 ./data/lc Azido_PEG_tetrazine_chromatogram_280nm.txt FALSE TRUE
3 ./data/lc DBCO488_chromatogram_280nm.txt FALSE TRUE
5 ./data/lc DBCO594_chromatogram_280nm.txt FALSE TRUE
7 ./data/lc RXN488_chromatogram_280nm.txt FALSE TRUE
9 ./data/lc RXN594_chromatogram_280nm.txt FALSE TRUE
path
1 ./data/lc/Azido_PEG_tetrazine_chromatogram_280nm.txt
3 ./data/lc/DBCO488_chromatogram_280nm.txt
5 ./data/lc/DBCO594_chromatogram_280nm.txt
7 ./data/lc/RXN488_chromatogram_280nm.txt
9 ./data/lc/RXN594_chromatogram_280nm.txt
We can see that the sample name preceeds the first _
in the file name
so lets add a variable sample
with that text string:
#split the string by _ character
underscore_split<-strsplit( myfiles$files, "_")
underscore_split
[[1]]
[1] "Azido" "PEG" "tetrazine" "chromatogram" "280nm.txt"
[[2]]
[1] "Azido" "PEG" "tetrazine" "MS.txt"
[[3]]
[1] "DBCO488" "chromatogram" "280nm.txt"
[[4]]
[1] "DBCO488" "MS.txt"
[[5]]
[1] "DBCO594" "chromatogram" "280nm.txt"
[[6]]
[1] "DBCO594" "MS.txt"
[[7]]
[1] "RXN488" "chromatogram" "280nm.txt"
[[8]]
[1] "RXN488" "MS.txt"
[[9]]
[1] "RXN594" "chromatogram" "280nm.txt"
[[10]]
[1] "RXN594" "MS.txt"
Here underscore_split
is a list object where each file name is an
entry in the list and it then has a nested list within where text
between each “_” is entered as a single entry. We can then extract the
first entry from each list using lapply()
“list apply” function:
# Create a new list 'sample.name' by applying a function to each element of 'underscore_split'
sample.name <- lapply(underscore_split, function(x) {
return(x[1]) # Return the first element (index 1) of each element in 'underscore_split'
})
# Display the resulting 'sample.name' list
sample.name
[[1]]
[1] "Azido"
[[2]]
[1] "Azido"
[[3]]
[1] "DBCO488"
[[4]]
[1] "DBCO488"
[[5]]
[1] "DBCO594"
[[6]]
[1] "DBCO594"
[[7]]
[1] "RXN488"
[[8]]
[1] "RXN488"
[[9]]
[1] "RXN594"
[[10]]
[1] "RXN594"
# Check class of the object
class(sample.name)
[1] "list"
Here sample.name
is a object with the class list. If we now want to go
from list object to a character vector to insert it into our data frame
we can do like this:
myfiles$sample<-unlist(sample.name)
myfiles
folder files ms chromatogram
1 ./data/lc Azido_PEG_tetrazine_chromatogram_280nm.txt FALSE TRUE
2 ./data/lc Azido_PEG_tetrazine_MS.txt TRUE FALSE
3 ./data/lc DBCO488_chromatogram_280nm.txt FALSE TRUE
4 ./data/lc DBCO488_MS.txt TRUE FALSE
5 ./data/lc DBCO594_chromatogram_280nm.txt FALSE TRUE
6 ./data/lc DBCO594_MS.txt TRUE FALSE
7 ./data/lc RXN488_chromatogram_280nm.txt FALSE TRUE
8 ./data/lc RXN488_MS.txt TRUE FALSE
9 ./data/lc RXN594_chromatogram_280nm.txt FALSE TRUE
10 ./data/lc RXN594_MS.txt TRUE FALSE
path sample
1 ./data/lc/Azido_PEG_tetrazine_chromatogram_280nm.txt Azido
2 ./data/lc/Azido_PEG_tetrazine_MS.txt Azido
3 ./data/lc/DBCO488_chromatogram_280nm.txt DBCO488
4 ./data/lc/DBCO488_MS.txt DBCO488
5 ./data/lc/DBCO594_chromatogram_280nm.txt DBCO594
6 ./data/lc/DBCO594_MS.txt DBCO594
7 ./data/lc/RXN488_chromatogram_280nm.txt RXN488
8 ./data/lc/RXN488_MS.txt RXN488
9 ./data/lc/RXN594_chromatogram_280nm.txt RXN594
10 ./data/lc/RXN594_MS.txt RXN594
You can see how we now have a variable myfiles$sample
with all the
sample names:
myfiles$sample
[1] "Azido" "Azido" "DBCO488" "DBCO488" "DBCO594" "DBCO594" "RXN488"
[8] "RXN488" "RXN594" "RXN594"
Some stats, showing that each sample has two files (one MS
and one
chromatogram
):
table(myfiles$sample)
Azido DBCO488 DBCO594 RXN488 RXN594
2 2 2 2 2
Lets create a new data frame called trace
where we have all the traces
from each experiment into one file:
#create an empty data frame where we will store our data
trace<-data.frame(sample = character(), time = numeric(), value = numeric())
#loop through each chromatogram file and load it in and add it to trace data frame
for(i in which(myfiles$chromatogram)){
tmp<-read.table(myfiles$path[i], skip=76, header=TRUE)
trace.tmp<-data.frame(
sample = myfiles$sample[i],
time = tmp$Time.min,
value = tmp$Value.mAU
)
#add the just loaded file into master data frame, trace
trace <- rbind(trace, trace.tmp)
}
Lets check the new data frame trace
:
table(trace$sample)
Azido DBCO488 DBCO594 RXN488 RXN594
1801 1801 1801 1801 1801
So we have each sample loaded with 1801 measurements in each.
mysamples<- rev(unique(trace$sample)) #reverse order because we want reactions RXN at bottom so one reads the graph top to bottom.
scale.factor.y <- 1.2 #adds some space between lines when we stack them on top.
text.under.y <- 0.2 #placement of the text label under the trace.
text.under.x <- 0.1 #placement of the text label under the trace.
plot(trace$time[trace$sample == mysamples[1]],
trace$value[trace$sample == mysamples[1]],
type='l',
xlab='Time',
ylab='',
ylim=c(-max(trace$value)*text.under.y, scale.factor.y*max(trace$value)*length(mysamples))
)
for(i in seq_along(mysamples)){
#k is a variable that adds some space for sample 3 and above so the two reactions are vertically seperated from the rest.
if(i > 2){
k <- 280
}else{
k <- 0
}
lines(trace$time[trace$sample == mysamples[i]],
trace$value[trace$sample == mysamples[i]]+scale.factor.y*max(trace$value)*(i-1)+k, col=i )
text(max(trace$time)*text.under.x, scale.factor.y*max(trace$value)*(i-1)-max(trace$value)*text.under.y+k, mysamples[i], col=i)
}
Lets make this graph a little bit prettier, using ColorBrewer2.org for color panels:
if(!require(RColorBrewer)){
install.packages('RColorBrewer')
}
Loading required package: RColorBrewer
library(RColorBrewer)
mylabels <- c('AZdye594-PEG1-N-bis(PEG2-Tz)',
'AZdye488-PEG1-N-bis(PEG2-Tz)',
'AZdye594-DBCO',
'AZdye488-DBCO',
'Azido-PEG1-N-bis(PEG2-Tz)')
color <- c(RColorBrewer::brewer.pal(6, 'PuOr')[c(5,1,6,2)], 'black')
scale.factor.y <- 1.2 #adds some space between lines when we stack them on top.
text.under.y <- 0.3 #placement of the text label under the trace.
text.under.x <- 0.31 #placement of the text label under the trace.
#normalize values
normalize <- function(num_vec){
maxnv <- max(num_vec, na.rm = TRUE)
md<-num_vec[1]
return((num_vec-md) / (maxnv))
}
#create a new variable 'norm' which normalizes values for each compound
trace$norm<-with(trace, ave(value, sample, FUN = normalize))
quartz(width=5.5, height=5.6)
par(mar=c(2,1,0,1))
plot(trace$time[trace$sample == mysamples[1]],
trace$norm[trace$sample == mysamples[1]],
type='l',
xlab='',
ylab='',
ylim=c(-max(trace$norm)*text.under.y, scale.factor.y^2*max(trace$norm)*length(mysamples)),
xlim = c(1, 2.5),
axes=FALSE,
col=color[1]
)
for(i in seq_along(mysamples)){
#k is a variable that adds some space for sample 3 and above so the two reactions are vertically seperated from the rest.
if(i > 2){
k <- 1.5
}else{
k <- 0
}
lines(trace$time[trace$sample == mysamples[i]],
trace$norm[trace$sample == mysamples[i]]+scale.factor.y*max(trace$norm)*(i-1)+k, col=color[i])
text(max(trace$time)*text.under.x, scale.factor.y*max(trace$norm)*(i-1)-max(trace$norm)*text.under.y+k, mylabels[i], col=color[i], pos=4, cex = 0.8)
}
Lets save the figure as a PDF:
quartz.save(file='./pdf/S02b.pdf', type='pdf')
quartz_off_screen
2
You can view the PDF here: https://github.com/furthlab/FluorPLA/blob/main/pdf/S02b.pdf
Row 44 here has the Raw data:
tag. Unfortuantely the header contains
some illegal characters for R to import it straightforward with the
read.table()
command. We will get the error:
tmp<-read.table("./data/lc/Azido_PEG_tetrazine_MS.txt", skip=44, header=TRUE)
Error in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, : line 1 did not have 7 elements
So instead we will skip the header column name line and import directly from line 45 and add the variable names manually:
#create an empty data frame where we will store our data
msdata<-data.frame(mass.mz = character(), intensity = integer(), intensity.perc = numeric())
#loop through each MS file and load it in and add it to msdata data frame
for(i in which(myfiles$ms)){
tmp<-read.table(myfiles$path[i], skip=45, header=FALSE)
msdata.tmp<-data.frame(
sample = myfiles$sample[i],
mass.mz = tmp[,1],
intensity = tmp[,2],
intensity.perc = tmp[,3]
)
#add the just loaded file into master data frame, msdata
msdata <- rbind(msdata, msdata.tmp)
}
Lets make a mass spec plot function:
#' Create a Mass Spectrometry Plot
#'
#' Generates a mass spectrometry plot to visualize mass-to-charge ratio (m/z) and intensity percentage.
#'
#' @param x A data frame containing two columns: 'mass.mz' for mass-to-charge ratio and 'intensity.perc' for intensity percentage.
#' @param xlim A numeric vector of length 2, specifying the x-axis limits for the plot.
#' @param main Title of the plot.
#' @param col The color for the lines in the plot.
#'
#' @details This function plots mass spectrometry data, where 'mass.mz' represents the mass-to-charge ratio (m/z)
#' and 'intensity.perc' represents the intensity percentage.
#'
#' @examples
#' # Sample data frame 'ms_data'
#' ms_data <- data.frame(mass.mz = c(600, 700, 800, 900),
#' intensity.perc = c(10, 30, 60, 40))
#'
#' # Create a mass spectrometry plot
#' ms.plot(ms_data, xlim = c(500, 1000), col = 'blue')
#'
#' @seealso \code{\link{plot}}, \code{\link{axis}}, \code{\link{lines}}
#'
#' @return NULL (a plot is displayed)
#'
#' @author Daniel Fürth
#'
#' @export
ms.plot <- function(x, xlim=c(500, 2000), main = '', col='black'){
par(yaxs='i', xaxs='i')
plot(x$mass.mz, x$intensity.perc, type='n', xlim=xlim, xlab='m/z', ylab='%', axes=F)
title(main = main, col.main = col)
axis(1, at=seq(xlim[1], xlim[2], length.out=7))
axis(2, at=c(0,25,50,75,100), las=1)
lin<-lapply(seq_along(x$mass.mz), function(l){lines(rep(x$mass.mz[l],2), c(x$intensity.perc[l], 0), col=col )})
}
Lets try it:
ms.plot(msdata[msdata$sample=='Azido',])
Nice. Now we also need to add peak-detection to it. We will use the package MassSpecWavelet.
#check if devtools is installed. It is used to
if(!require(devtools)){
install.packages('devtools')
}
Loading required package: devtools
Loading required package: usethis
library(devtools)
#check if MassSpecWavelet is installed, it is used to call peaks in MS data.
if(!require(MassSpecWavelet)){
devtools::install_github('https://github.com/zeehio/MassSpecWavelet')
}
Loading required package: MassSpecWavelet
library(MassSpecWavelet)
peakInfo <- peakDetectionCWT(msdata[msdata$sample=='Azido',]$intensity, SNR.Th=3)
majorPeakInfo <- peakInfo$majorPeakInfo
peakIndex <- majorPeakInfo$peakIndex
ms.plot(msdata[msdata$sample=='Azido',])
xpos <-msdata[msdata$sample=='Azido',]$mass.mz[peakIndex]
ypos <- msdata[msdata$sample=='Azido',]$intensity.perc[peakIndex]
points(xpos, ypos, col='red', pch=21, xpd=TRUE)
#pos=3 will print the label above the point, 1 = under, 2 = left, 4 = right.
text(xpos, ypos, round(xpos,2), pos=3, xpd=TRUE)
S lets add this to out function:
Then lets plot all the samples on one row each:
msSamples <- unique(msdata$sample)
par(mfrow=c(ceiling(length(msSamples)/2), 2))
for(i in seq_along(msSamples) ){
ms.plot(msdata[msdata$sample==msSamples[i],], main=msSamples[i], col=i)
}
Plot a figure:
quartz(width=6.8, height=5.4)
# Set up a 2x1 layout for subplots
par(mfrow = c(2, 1), mar = c(3, 4, 1, 1))
# Plot the first sample data
ms.plot(msdata[msdata$sample == 'RXN488',], col = RColorBrewer::brewer.pal(6, 'PuOr')[2])
# Add label for the first sample
text(1300, 90, mylabels[2], col = RColorBrewer::brewer.pal(6, 'PuOr')[2])
# Reset the layout for the second subplot
par(mar = c(4, 4, 1, 1))
# Plot the second sample data
ms.plot(msdata[msdata$sample == 'RXN594',], col = RColorBrewer::brewer.pal(6, 'PuOr')[6])
# Add label for the second sample
text(1300, 90, mylabels[1], col = RColorBrewer::brewer.pal(6, 'PuOr')[6])
Lets save the figure as a PDF:
quartz.save(file='./pdf/S02c.pdf', type='pdf')
quartz_off_screen
2
gel <- read.table('./data/gels/csv/2023-09-08_GoatAlexa488.csv', header = TRUE, sep = ',')
refDOL <- 5 #ThermoFisher A21200
gel$norm <- (gel$Mean/gel$Mean[1])*refDOL
gel$norm[5:8] <- (gel$Mean[5:8]/gel$Mean[5])*refDOL
#add a character to the name to get alphabetical ordering of the groups we want in the plot
gel$sample <- rep( c('A Goat-Alexa488', 'B 1:3', 'C 1:9', 'D 1:15') , 2)
avg<-tapply(gel$norm, gel$sample, mean)
quartz(width=3.6, height=6.1)
par(mar=c(6,4,1,0))
bar <- barplot(avg, ylab='Degree of Labeling (DOL)', las=1, xaxt="n")
labs <- c('Goat-Alexa488', '1:3', '1:9', '1:15')
text(cex=1, x=c(bar[1]-.55, bar[-1]), y=c(-0.8,rep(-0.4,3)), labs, xpd=TRUE, srt=45)
text(mean(bar[-1]), -1.1, 'Goat-PEG4-TCO\nAZdye488-bis(PEG2-Tz)', xpd=TRUE)
lines(c(bar[2]-.25, bar[4]+.25), rep(-0.75, 2), xpd = TRUE)
Save as PDF:
quartz.save(file='./pdf/S02f.pdf', type='pdf')
quartz_off_screen
2
The output from the spectrophotometer is a CSV file. Values are read in as character strings so we have to recast to numeric. Data starts from column 26 and each row is a sample.
spec <- read.table('data/spectra/absorption.csv', sep=',', header=TRUE)
ex <- data.frame(sample = character(), wavelength = integer(), absorbance = numeric())
for(i in 1:nrow(spec)){
ex.tmp <- data.frame(sample = spec$Sample.Name[i], wavelength = as.integer( substr(names(spec[,26:ncol(spec)]), 2,4) ), absorbance = as.numeric(spec[i,26:ncol(spec)]) )
ex <-rbind(ex, ex.tmp)
}
# Subset the data frame and remove rows with wavelength under 450
ex <- rbind(subset(ex, sample %in% unique(ex$sample)[c(1, 2)] & wavelength >= 380 & wavelength < 675) ,
subset(ex, sample %in% unique(ex$sample)[c(3, 4)] & wavelength >= 450 & wavelength < 675) )
ex$absorbance[ex$sample %in% c(unique(ex$sample)[c(1,2)])]<-ex$absorbance[ex$sample %in% c(unique(ex$sample)[c(1,2)])]/max(ex$absorbance[ex$sample %in% c(unique(ex$sample)[c(1,2)])])
ex$absorbance[ex$sample %in% c(unique(ex$sample)[c(3,4)])]<-ex$absorbance[ex$sample %in% c(unique(ex$sample)[c(3,4)])]/max(ex$absorbance[ex$sample %in% c(unique(ex$sample)[c(3,4)])])
par(yaxs='i', xaxs='i')
plot(ex$wavelength, ex$absorbance, type='n', ylab='Absorbance (AU)', xlab = 'Wavelength', las=1, xlim=c(380, 700), ylim=c(0,1), axes=F)
color <- c('green3', 'green4', 'red', 'red4')
unique_samples <- unique(ex$sample)
# Loop through each unique sample and create a polygon plot
for (sample_name in unique_samples) {
# Subset the data for the current sample
sample_data <- ex[ex$sample == sample_name, ]
# Create a polygon plot for the current sample
polygon(c(sample_data$wavelength,
sample_data$wavelength[nrow(sample_data)],
sample_data$wavelength[1],
sample_data$wavelength[1]),
c(sample_data$absorbance, 0, 0, sample_data$absorbance[1]),
col = color[match(sample_name, unique_samples)],
border = "black")
# Add a legend for sample names
legend("topright", legend = unique_samples, fill = color)
}
Change colors and make a figure with correct dimensions to fit in the larger figure layout:
col2hex <- function(cname)
{
colMat <- col2rgb(cname)
rgb(
red=colMat[1,]/255,
green=colMat[2,]/255,
blue=colMat[3,]/255
)
}
quartz(width= 16, height=4.8)
par(yaxs='i', xaxs='i')
plot(ex$wavelength, ex$absorbance, type='n', ylab='Absorbance (AU)', xlab = 'Wavelength (nm)', las=1, xlim=c(380, 700), ylim=c(0,1), axes=F)
color <- c('green3', 'green4', 'red', 'red4')
unique_samples <- unique(ex$sample)
peak.max <- numeric()
# Loop through each unique sample and create a polygon plot
for (sample_name in unique_samples) {
# Subset the data for the current sample
sample_data <- ex[ex$sample == sample_name, ]
# Create a polygon plot for the current sample
polygon(c(sample_data$wavelength,
sample_data$wavelength[nrow(sample_data)],
sample_data$wavelength[1],
sample_data$wavelength[1]),
c(sample_data$absorbance, 0, 0, sample_data$absorbance[1]),
col = paste0( col2hex( color[match(sample_name, unique_samples)] ), '70' ),
border = color[match(sample_name, unique_samples)], lwd=2, xpd=TRUE )
# Add a legend for sample names
legend("topright", legend = unique_samples, fill = color)
peak.max <- c(peak.max, sample_data$wavelength[which.max(sample_data$absorbance)] )
}
axis(1)
axis(2, las=1, at=c(0,0.5,1))
quartz.save(file="pdf/S02b_excitation.pdf", type='pdf')
quartz_off_screen
2
peak.max
[1] 490 496 589 592