Skip to content

Commit

Permalink
update threads
Browse files Browse the repository at this point in the history
  • Loading branch information
jgranja24 committed Jan 13, 2020
1 parent c865f2e commit 3c35dc7
Show file tree
Hide file tree
Showing 21 changed files with 87 additions and 652 deletions.
Binary file modified .DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion R/ArchRBrowser.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ ArchRRegionTrack <- function(
tileSize = 100,
minCells = 25,
normMethod = "ReadsInTSS",
threads = 1,
threads = getArchRThreads(),
ylim = NULL,
baseSize = 7,
borderWidth = 0.4,
Expand Down
2 changes: 1 addition & 1 deletion R/CoAccessibility.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ addCoAccessibility <- function(
log2Norm = TRUE,
seed = 1,
knnMethod = "nabor",
threads = 1,
threads = getArchRThreads(),
...
){

Expand Down
2 changes: 1 addition & 1 deletion R/ComputeEmbedding.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ addEmbedding <- function(
saveModel = TRUE,
seed = 1,
force = FALSE,
threads = 1,
threads = getArchRThreads(),
embeddingParams = list(),
...
){
Expand Down
2 changes: 1 addition & 1 deletion R/CreateArrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ createArrowFiles <- function(
addGeneScoreMat = TRUE,
GeneScoreMatParams = list(),
force = FALSE,
threads = 1,
threads = getArchRThreads(),
parallelParam = NULL,
verboseHeader = TRUE,
verboseAll = FALSE,
Expand Down
2 changes: 1 addition & 1 deletion R/DoubletsScores.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ addDoubletScores <- function(
UMAPParams = list(),
LSIParams = list(sampleCells = NULL),
outDir = "QualityControl",
threads = 1,
threads = getArchRThreads(),
parallelParam = NULL,
verboseHeader = TRUE,
verboseAll = FALSE,
Expand Down
2 changes: 1 addition & 1 deletion R/Footprinting.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ plotFootprints <- function(
width = 4,
addDOC = TRUE,
useSink = TRUE,
threads = 1,
threads = getArchRThreads(),
verboseHeader = TRUE,
verboseAll = FALSE,
...
Expand Down
12 changes: 8 additions & 4 deletions R/GroupCoverages.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ addGroupCoverages <- function(
maxReplicates = 5,
sampleRatio = 0.8,
kmerLength = 6,
threads = 1,
threads = getArchRThreads(),
parallelParam = "mclapply",
force = FALSE,
verboseHeader = TRUE,
Expand Down Expand Up @@ -149,7 +149,8 @@ addGroupCoverages <- function(
args$covDir <- file.path(getOutputDirectory(ArchRProj), "GroupCoverages", groupBy)
args$parallelParam <- parallelParam
args$threads <- threads
args$verbose <- verboseAll
args$verboseHeader <- verboseHeader
args$verboseAll <- verboseAll
args$tstart <- tstart
args$registryDir <- file.path(getOutputDirectory(ArchRProj), "GroupCoverages", "batchRegistry")

Expand Down Expand Up @@ -213,10 +214,13 @@ addGroupCoverages <- function(
chromLengths,
covDir,
tstart,
verbose = TRUE,
verboseHeader = TRUE,
verboseAll = FALSE,
...
){

.messageDiffTime(sprintf("Creating Group Coverage %s of %s", i, length(cellGroups)), tstart, verbose = verboseHeader)

#Cells
cellGroupi <- cellGroups[[i]]

Expand All @@ -243,7 +247,7 @@ addGroupCoverages <- function(

if(k %% 3 == 0){
.messageDiffTime(sprintf("Group %s of %s, Read Fragments %s of %s!", i,
length(cellGroups), k, length(availableChr)), tstart, verbose = verbose)
length(cellGroups), k, length(availableChr)), tstart, verbose = verboseAll)
}

it <- 0
Expand Down
2 changes: 1 addition & 1 deletion R/LatentSemanticIndexing.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ addIterativeLSI <- function(
clusterParams = list(),
runHarmony = FALSE,
harmonyParams = list(),
threads = 1,
threads = getArchRThreads(),
seed = 1,
verboseHeader = TRUE,
verboseAll = FALSE,
Expand Down
2 changes: 1 addition & 1 deletion R/MarkerFeatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ markerFeatures <- function(
testMethod = "wilcoxon",
maxCells = 500,
scaleTo = 10^4,
threads = 1,
threads = getArchRThreads(),
k = 100,
bufferRatio = 0.8,
binarize = FALSE,
Expand Down
2 changes: 1 addition & 1 deletion R/MatrixCNV.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ addCNVMatrix <- function(
windowSize = 10e6,
stepSize = 2e6,
excludeChr = c("chrM","chrY"),
threads = 1,
threads = getArchRThreads(),
parallelParam = NULL,
force = FALSE,
...
Expand Down
2 changes: 1 addition & 1 deletion R/MatrixDeviations.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ addDeviationsMatrix <- function(
matrixName = NULL,
out = c("z", "deviations"),
binarize = FALSE,
threads = 1,
threads = getArchRThreads(),
parallelParam = NULL,
force = FALSE,
...
Expand Down
4 changes: 2 additions & 2 deletions R/MatrixFeatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ addFeatureMatrix <- function(
matrixName = "FeatureMatrix",
ceiling = Inf,
binarize = FALSE,
threads = 1,
threads = getArchRThreads(),
parallelParam = NULL,
force = FALSE,
...
Expand Down Expand Up @@ -80,7 +80,7 @@ addPeakMatrix <- function(
ceiling = 4,
binarize = FALSE,
parallelParam = NULL,
threads = 1,
threads = getArchRThreads(),
force = FALSE,
...
){
Expand Down
2 changes: 1 addition & 1 deletion R/MatrixGeneScores.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ addGeneScoreMatrix <- function(
scaleTo = 10000,
excludeChr = c("chrY","chrM"),
blacklist = ifelse(inherits(input, "ArchRProject"), getBlacklist(input), NULL),
threads = 1,
threads = getArchRThreads(),
parallelParam = NULL,
force = FALSE,
...
Expand Down
2 changes: 1 addition & 1 deletion R/MatrixTiles.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ addTileMatrix <- function(
tileSize = 500,
binarize = TRUE,
excludeChr = c("chrM","chrY"),
threads = 1,
threads = getArchRThreads(),
parallelParam = NULL,
force = FALSE,
...
Expand Down
2 changes: 1 addition & 1 deletion R/ReproduciblePeakSet.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ addReproduciblePeakSet <- function(
genomeAnno = getGenomeAnnotation(ArchRProj),
geneAnno = getGeneAnnotation(ArchRProj),
additionalParams = "--nomodel --nolambda",
threads = 1,
threads = getArchRThreads(),
parallelParam = "mclapply",
force = FALSE,
verboseHeader = TRUE,
Expand Down
2 changes: 1 addition & 1 deletion R/Trajectory.R
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ getTrajectory <- function(
varCutOff = 0.1,
maxFeatures = 25000,
groupEvery = 2,
threads = 1,
threads = getArchRThreads(),
scaleTo = 10000,
log2Norm = TRUE,
smooth = TRUE,
Expand Down
Binary file modified vignettes/.DS_Store
Binary file not shown.
Binary file added vignettes/Articles/.DS_Store
Binary file not shown.
86 changes: 63 additions & 23 deletions vignettes/Articles/tutorial.Rmd
Original file line number Diff line number Diff line change
@@ -1,14 +1,35 @@
---
title: "Getting Started With ArchR"
output:
html_document:
toc: true # table of content true
toc_depth: 3 # upto three depths of headings (specified by #, ## and ###)
number_sections: true ## if you want number sections at each table header
#theme: cosmo # many options for theme, this one is my favorite.
highlight: tango # specifies the syntax highlighting style
code_download: true
toc_float:
collapsed: false
smooth_scroll: false
---


```{r image, include=FALSE}
knitr::include_graphics(c("../../images/ArchRProject_Schematic.png", "../../images/ArchR_FunctionSchematic.png", "../../images/tutorial_1_UMAP-Clusters.pdf", "../../images/tutorial_2_tracks.pdf", "../../images/tutorial_3_MarkerGeneScores.pdf", "../../images/tutorial_4_MarkerGeneHeatmap.pdf"))
knitr::include_graphics(
c(
"../../images/ArchRProject_Schematic.png",
"../../images/ArchR_FunctionSchematic.png",
"../../images/tutorial_1_UMAP-Clusters.pdf",
"../../images/tutorial_2_tracks.pdf",
"../../images/tutorial_3_MarkerGeneScores.pdf",
"../../images/tutorial_4_MarkerGeneHeatmap.pdf"
)
)
```

The following tutorial shows the basics of setting up and interacting with an ArchR Project using a gold-standard dataset of hematopoietic cells ( CITATION ). This tutorial and all of the accompanying vignettes assume that you are running ArchR __locally__. Once all of these steps work for you, feel free to [set up ArchR to work in a cluster environment](articles/Articles/clusterComputing.html). This tutorial does not explain every detail of every step. Please see the [Vignettes section](articles/index.html) for more details on each major analytical step and all of the major features of ArchR.

### What is an `ArrowFile` / `ArchRProject`?
# What is an `ArrowFile` / `ArchRProject`?

The base unit of an analytical project in ArchR is called an `ArrowFile`. Each `ArrowFile`, stores all of the data associated with an individual sample. Here, a sample would be the most detailed unit of analysis desired (for ex. a single replicate of a particular condition). During creation and as additional analyses are performed, ArchR updates and edits each `ArrowFile` to contain additional layers of information.
Then, an `ArchRProject` allows you to associate these `ArrowFiles` together into a single analytical framework.
Expand All @@ -19,7 +40,7 @@ Certain actions can be taken directly on `ArrowFiles` while other actions are ta

![](../../images/ArchR_FunctionSchematic.png){width=400px}

### Getting Set Up
# Getting Set Up

The first thing we do is set up our working directory, load our genome annotations, and set the number of threads we would like to use. Depending on the configuration of your local environment, you may need to modify the number of `threads` used below.

Expand All @@ -44,7 +65,7 @@ genomeAnno <- genomeAnnoHg19
addArchRThreads()
```

### Creating Arrow Files
# Creating Arrow Files

For this tutorial, we will download a collection of fragment files. Fragment files are one of the base file types of the 10x Genomics analytical platform and can be easily created from any bam file. See [the ArchR input types vignette](articles/Articles/inputFiles.html) for information on making your own fragment files. Once we have our fragment files, we provide their names as a vector to `createArrowFiles`. During creation, some basic matrices and data is added to each `ArrowFile` including a `TileMatrix` containing insertion counts across genome-wide 500-bp bins.

Expand All @@ -64,7 +85,7 @@ ArrowFiles <- createArrowFiles(
)
```

### Tidying up our data and creating an `ArchRProject`
# Tidying up our data and creating an `ArchRProject`

One major source of trouble in single-cell data is the contribution of "doublets" to the analysis. A doublet refers to a single droplet that received a single barcoded bead and more than one nucleus. This causes the reads from more than one cell to appear as a single cell. We remove these computationally and describe this doublet removal process in more depth in the [doublet removal vignette](articles/Articles/doubletRemoval.html).

Expand All @@ -84,7 +105,7 @@ proj <- ArchRProject(
proj <- filterDoublets(proj)
```

### Dimensionality Reduction
# Dimensionality Reduction {.tabset .tabset-fade .tabset-pills}

At this point, we have an ArchR project that is ready to be used in downstream visualizations and analyses. The first thing we will do is use an iterative latent semantic indexing (LSI) approach to define clusters in our data. Once we have identified clusters in our data, we can plot a UMAP embedding. For more details, see the [dimensionality reduction vignette](articles/Articles/dimReduction.html).

Expand All @@ -98,12 +119,15 @@ proj <- addIterativeLSI(
)
#Identify Clusters from Iterative LSI
proj <- addClusters(input = proj, reducedDims = "IterativeLSI", resolution = 1.2)
#The larger the resolution the more clusters will be called. The lower the resolution hte less clusters will be called.
#It is recommended to compare the results from your clusters and your embeddings and find params that best agree across
#both analyses for clarity.
proj <- addClusters(input = proj, reducedDims = "IterativeLSI", resolution = 0.6)
#Add Imputation Weights for Visualization
#Add Imputation Weights for imputing numerical values based on Magic (see van Dijk et. al. 2018).
proj <- addImputeWeights(ArchRProj = proj)
#Compute a UMAP embedding to visualize our tiled matrix
#Compute a UMAP embedding to visualize our tiled accessibility matrix in a 2-d setting.
proj <- addEmbedding(
ArchRProj = proj,
reducedDims = "IterativeLSI",
Expand All @@ -112,19 +136,23 @@ proj <- addEmbedding(
force = TRUE
)
#Plot the UMAP Embedding with Metadata Overlayed
#Plot the UMAP Embedding with Metadata Overlayed such as Experimental Sample and Clusters.
#To change plotting aesthetics see ?plotEmbedding parameters.
plotList <- list()
plotList[[1]] <- plotEmbedding(ArchRProj = proj, colorBy = "colData", name = "Sample")
plotList[[2]] <- plotEmbedding(ArchRProj = proj, colorBy = "colData", name = "Clusters", plotParams = list(labelMeans=TRUE))
plotPDF(plotList = plotList, name = "UMAP-Samples-Clusters", width = 6, height = 6, ArchRProj = proj)
```

Using our tutorial data, your UMAP plots should look like [this](../../images/tutorial_1_UMAP-Clusters.pdf). (Note if you see a blank space below try firefox or safari)
***

## UMAP w/ Clusters
This [plot](../../images/tutorial_1_UMAP-Clusters.pdf) shows gene experimental samples and clusters described above overlayed onto the UMAP embedding. (Note if you see a blank space below try firefox or safari)<br/><br/>
<center>
![Alt](../../images/tutorial_1_UMAP-Clusters.pdf){width=600 height=600}
![Alt](../../images/tutorial_1_UMAP-Clusters.pdf){width=450 height=450}
<center>

### Identifying Cluster Cell Types Using Marker Genes
# Identifying Cluster Cell Types Using Marker Genes {.tabset .tabset-fade .tabset-pills}

In order to understand which clusters correspond to which cell types, we use a supervised approach based on prior knowledge of the genes that are active in specific cell types. We determine _gene activity scores_ for each putative marker gene based on chromatin accessibility signal in the region surrounding the gene's promoter. We can then overlay these _gene activity scores_ on our UMAP embedding to visualize the relationship between gene activity and cluster. For more details, see the [marker genes vignette](articles/Articles/geneScores.html).

Expand Down Expand Up @@ -156,16 +184,31 @@ heatmapGS <- markerHeatmap(
plotPDF(heatmapGS, name = "GS-Marker-Heatmap", width = 8, height = 12, ArchRProj = proj)
```

Using our tutorial data, your gene activity score UMAP plots should look like [this](articles/Articles/Tutorial_geneScores_UMAP.pdf). (Note if you see a blank space below try firefox or safari)
***

## UMAP GeneScores
This [plot](../../images/tutorial_3_MarkerGeneScores.pdf) shows gene activity scores of the marker genes described above overlayed onto the UMAP embedding. The gene scores are imputed using Magic when running addImupteWeights. (Note if you see a blank space below try firefox or safari)<br/><br/>
<center>
![Alt](../../images/tutorial_3_MarkerGeneScores.pdf){width=450 height=600} ![Alt](../../images/tutorial_2_tracks.pdf){width=450 height=600}
![Alt](../../images/tutorial_3_MarkerGeneScores.pdf){width=450 height=450}
<center>

### Creating a Reproducible Peak Set
## Track-Plots
<br/><br/>
<center>
![Alt](../../images/tutorial_2_tracks.pdf){width=450 height=600}
<center>

## Marker GeneScores Heatmap
<br/><br/>
<center>
![Alt](../../images/tutorial_4_MarkerGeneHeatmap.pdf){width=450 height=600}
<center>

# Creating a Reproducible Peak Set {.tabset .tabset-fade .tabset-pills}

One of the most complicated aspects about ATAC-seq and scATAC-seq analysis is the generation of a reproducible and robust peak set. In ArchR, we use an iterative overlap removal process that we first described in [Corces* & Granja* et al. Science 2018](https://www.ncbi.nlm.nih.gov/pubmed/30361341). This process is described in detail in the [peak calling vignette](articles/Articles/peakCalling.html).

To robustly call peaks, we first merge the sparse single-cell data into pseudo-bulk replicates by aggregating the insertions from many individual cells into a single group. We make multiple pseudo-bulk replicates for each cluster to enable an assessment of peak reproducibility. This process of pseudo-bulk generation is described in detail in the [pseudo-bulk generation vignette](articles/Articles/pseudobulkGeneration.html). We than call peaks using [MACS2 v???](https://github.com/taoliu/MACS) and perform our iterative overlap removal. Once we obtain a finalized peak set, we collect insertion counts in each peak for each single cell and associate this with the corresponding `ArrowFile` via the `ArchRProject`.
To robustly call peaks, we first merge the sparse single-cell data into pseudo-bulk replicates by aggregating the insertions from many individual cells into a single group. We make multiple pseudo-bulk replicates for each cluster to enable an assessment of peak reproducibility. This process of pseudo-bulk generation is described in detail in the [pseudo-bulk generation vignette](articles/Articles/pseudobulkGeneration.html). We than call peaks using [MACS2](https://github.com/taoliu/MACS) and perform our iterative overlap removal. Once we obtain a finalized peak set, we collect insertion counts in each peak for each single cell and associate this with the corresponding `ArrowFile` via the `ArchRProject`.

```{r eval=FALSE}
#Create Group Coverage Files that can be used for downstream analysis (~5-10 minutes)
Expand All @@ -178,7 +221,7 @@ proj <- addReproduciblePeakSet(ArchRProj = proj, groupBy = "Clusters", threads =
proj <- addPeakMatrix(ArchRProj = proj, threads = threads, force = TRUE)
```

### Identifying Marker Peaks
# Identifying Marker Peaks

Often times, we are interested to know which peaks are unique to an individual cluster or a small group of clusters. We can do this in an unsupervised fashion in ArchR:

Expand All @@ -194,7 +237,7 @@ plotPDF(heatmapPeaks, name = "Peak-Marker-Heatmap", width = 8, height = 12, Arch

Using our tutorial data, your marker peak heatmap should look like [this](articles/Articles/Tutorial_markerPeaks_HEATMAP.pdf).

### Performing Motif Enrichments
# Performing Motif Enrichments

QQQQQQQ - I think the concept of TF deviations is still abstract to most people. Does ArchR do motif enrichment with hypergeometric test as well??

Expand Down Expand Up @@ -226,7 +269,7 @@ plotPDF(plotList = plotList, name = "Plot-UMAP-TileLSI-MotifMatrix", width = 6,

Using our tutorial data, your marker peak heatmap should look like [this](articles/Articles/Tutorial_motifEnrichment_UMAP.pdf).

### Performing TF Footprinting
# Performing TF Footprinting

Transcription factor footprinting can also be done in ArchR with a single command. We note that the footprints generated by the tutorial data are not as clean as would be desired but this is because of the small size of the tutorial dataset.

Expand All @@ -237,9 +280,6 @@ seFoot <- plotFootprints(proj, positions = getPositions(proj)[motifs])
```


click here: [link2](#tips)
### Additional Tips {#tips}




Expand Down
609 changes: 0 additions & 609 deletions vignettes/Articles/tutorial.html

This file was deleted.

0 comments on commit 3c35dc7

Please sign in to comment.