Skip to content

Commit

Permalink
Merge pull request #9 from pawelqs/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
pawelqs authored Jan 16, 2024
2 parents 6e7d841 + cbeb581 commit 74207bd
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 63 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: readthis
Title: Read results from misc genomic tools
Version: 0.5.0
Version: 0.6.0
Authors@R:
person("Paweł", "Kuś", , "[email protected]", role = c("aut", "cre"))
Description: This package is for reading output files of: variant callers,
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export(read_ascat_files)
export(read_clip_all)
export(read_clip_all_wide)
export(read_clip_best_lambda)
export(read_facets_cnvs)
export(read_facets_cnas)
export(read_mutect_snvs)
export(read_strelka_somatic_snvs)
import(dplyr)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# readthis 0.6.0
* Naming convention changed from CNVs to CNAs

# readthis 0.5.0
* Reading ASCAT SNVs

Expand Down
34 changes: 17 additions & 17 deletions R/ascat.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
## ------------------------------- Export --------------------------------------

#' Read ASCAT CNV calls
#' Read ASCAT CNA calls
#'
#' Reads the CNV variant calls and sample statistics from
#' [ASCAT](https://github.com/VanLoo-lab/ascat) CNV caller
#' Reads the CNA variant calls and sample statistics from
#' [ASCAT](https://github.com/VanLoo-lab/ascat) CNA caller
#'
#' @param path Can be either:
#' 1) path to a single csv file with ASCAT CNV calls
#' 2) tibble with sample_id, cnvs, and (optionally) sample_statistics columns
#' 1) path to a single csv file with ASCAT CNA calls
#' 2) tibble with sample_id, cnas, and (optionally) sample_statistics columns
#' containing sample_ids and paths.
#' 3) directory containing multiple ASCAT files with "*.csv" and
#' "*.samplestatistics.*" names.
Expand Down Expand Up @@ -62,32 +62,32 @@ read_ascat_files <- function(path,
ascat <- read_ascat_files_from_dir(path, sample_id_pattern)
}

ascat$cnvs <- use_chrom_naming_convention(ascat$cnvs, chrom_convention)
ascat$cnas <- use_chrom_naming_convention(ascat$cnas, chrom_convention)
structure(ascat, class = c("cevo_ASCAT"))
}


## ----------------------- Higher level functions -----------------------------

read_ascat_files_single <- function(path, sample_statistics, sample_id) {
cnvs <- read_ascat_cnvs(path) |>
cnas <- read_ascat_cnas(path) |>
mutate(sample_id = sample_id, .before = "chrom")
if (!is.null(sample_statistics) && is_single_file(sample_statistics)) {
stats <- read_ascat_samplestatistics(sample_statistics) |>
mutate(sample_id = sample_id, .before = "normal_contamination")
} else {
stats <- empty_ascat_samplestatistics()
}
lst(cnvs, sample_statistics = stats)
lst(cnas, sample_statistics = stats)
}



read_ascat_files_from_dataframe <- function(path) {
cnvs <- path |>
cnas <- path |>
select("sample_id", "csv") |>
deframe() |>
map(read_ascat_cnvs) |>
map(read_ascat_cnas) |>
bind_rows(.id = "sample_id")
if (is.null(path[["sample_statistics"]])) {
stats <- empty_ascat_samplestatistics()
Expand All @@ -99,37 +99,37 @@ read_ascat_files_from_dataframe <- function(path) {
map(read_ascat_samplestatistics) |>
bind_rows(.id = "sample_id")
}
lst(cnvs, sample_statistics = stats)
lst(cnas, sample_statistics = stats)
}


read_ascat_files_from_dir <- function(path, sample_id_pattern) {
csv_files <- get_files(path, ".csv", sample_id_pattern)
cnvs <- csv_files |>
map(read_ascat_cnvs) |>
cnas <- csv_files |>
map(read_ascat_cnas) |>
bind_rows(.id = "sample_id")

stat_files <- get_files(path, "samplestatistics", sample_id_pattern)
stats <- stat_files |>
map(read_ascat_samplestatistics) |>
bind_rows(.id = "sample_id")
lst(cnvs, sample_statistics = stats)
lst(cnas, sample_statistics = stats)
}


## --------------------------- Base functions ----------------------------------

read_ascat_cnvs <- function(path) {
read_ascat_cnas <- function(path) {
csv_cols <- c("i", "chrom", "start", "end", "normal_cn_total", "normal_cn_minor", "total_cn", "minor_cn")
cnvs <- read_csv(path, col_names = csv_cols, col_types = "dcdddddd") |>
cnas <- read_csv(path, col_names = csv_cols, col_types = "dcdddddd") |>
mutate(major_cn = .data$total_cn - .data$minor_cn) |>
select(
"chrom", "start", "end",
"total_cn", "major_cn", "minor_cn",
normal_cn = "normal_cn_total"
)

cnvs
cnas
}


Expand Down
26 changes: 13 additions & 13 deletions R/facets.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
## ------------------------------- Export --------------------------------------

#' Read FACETS CNV calls
#' Read FACETS CNA calls
#'
#' Reads the variant calls from [FACETS](https://github.com/mskcc/facets/tree/master)
#' CNV caller
#' CNA caller
#'
#' @param path Can be either:
#' 1) path to a single file, sample ID can be passed using sample_id argument
Expand All @@ -17,14 +17,14 @@
#' library(readthis)
#'
#' file1 <- system.file("extdata", "FACETS", "S1.csv", package = "readthis")
#' read_facets_cnvs(file1)
#' read_facets_cnas(file1)
#'
#' file2 <- system.file("extdata", "FACETS", "S2.csv", package = "readthis")
#' files <- c(S1 = file1, S2 = file2)
#' read_facets_cnvs(files)
#' read_facets_cnas(files)
#'
#' dir <- system.file("extdata", "FACETS", package = "readthis")
#' read_facets_cnvs(dir)
#' read_facets_cnas(dir)
#'
#' @name facets
NULL
Expand All @@ -35,14 +35,14 @@ NULL
#' read_facets_csv() reads a single csv file. If sample_id is not provided,
#' file path is used
#' @export
read_facets_cnvs <- function(path,
read_facets_cnas <- function(path,
sample_id = path,
chrom_convention = "UCSC") {
if (is_single_file(path)) {
cnvs <- read_facets_csv(path) |>
cnas <- read_facets_csv(path) |>
mutate(sample_id = sample_id, .before = "chrom")
} else if (is_list_of_files(path)) {
cnvs <- map(path, read_facets_csv) |>
cnas <- map(path, read_facets_csv) |>
bind_rows(.id = "sample_id")
} else if (is_single_dir(path)) {
files <- list.files(path, full.names = TRUE)
Expand All @@ -51,13 +51,13 @@ read_facets_cnvs <- function(path,
map_chr(last) |>
str_replace(".csv", "")
names(files) <- sample_ids
cnvs <- files |>
cnas <- files |>
set_names(sample_ids) |>
map(read_facets_csv) |>
bind_rows(.id = "sample_id")
}

cnvs |>
cnas |>
use_chrom_naming_convention(chrom_convention)
}

Expand All @@ -66,10 +66,10 @@ read_facets_cnvs <- function(path,


read_facets_csv <- function(file, chrom_convention = "UCSC") {
cnvs <- read_csv(file, col_types = "cddddddddiidiidd") |>
cnas <- read_csv(file, col_types = "cddddddddiidiidd") |>
prepare_FACETS_columns()
class(cnvs) <- c("cevo_FACETS", class(cnvs))
cnvs
class(cnas) <- c("cevo_FACETS", class(cnas))
cnas
}


Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ devtools::install_github("pawelqs/readthis")

- [Strelka2](https://github.com/Illumina/strelka) (somatic SNVs only): `read_strelka_somatic_snvs()`
- [Mutect2](https://github.com/Illumina/strelka) SNVs: `read_mutect_snvs()`
- [FACETS](https://github.com/mskcc/facets) CNVs: `read_facets_cnvs()`
- [ASCAT](https://github.com/VanLoo-lab/ascat) CNVs: `read_ascat_files()` (does not work with all the files)
- [FACETS](https://github.com/mskcc/facets) CNAs: `read_facets_cnas()`
- [ASCAT](https://github.com/VanLoo-lab/ascat) CNAs: `read_ascat_files()` (does not work with all the files)

**Other tools:**

Expand Down
14 changes: 7 additions & 7 deletions man/facets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions man/read_ascat_files.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 11 additions & 11 deletions tests/testthat/test-ascat.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
test_that("read_ascat_csv() works", {
path <- test_path("ASCAT", "S1.csv")
res <- read_ascat_cnvs(path)
res <- read_ascat_cnas(path)
expect_s3_class(res, "tbl")
expect_equal(dim(res), c(10, 7))
})
Expand All @@ -17,24 +17,24 @@ test_that("read_ascat_samplestatistics() works", {



test_that("read_ascat_files() works with cnvs only", {
test_that("read_ascat_files() works with cnas only", {
path <- test_path("ASCAT", "S1.csv")
res <- read_ascat_files(path)
expect_s3_class(res, "cevo_ASCAT")
expect_equal(dim(res$cnvs), c(10, 8))
expect_equal(unique(res$cnvs$sample_id), test_path("ASCAT", "S1.csv"))
expect_equal(dim(res$cnas), c(10, 8))
expect_equal(unique(res$cnas$sample_id), test_path("ASCAT", "S1.csv"))
})



test_that("read_ascat_files() works with cnvs and sample statistics", {
test_that("read_ascat_files() works with cnas and sample statistics", {
path <- test_path("ASCAT", "S1.csv")
sample_statistics <- test_path("ASCAT", "S1.samplestatistics.txt")
res <- read_ascat_files(path, sample_statistics)
expect_s3_class(res, "cevo_ASCAT")
expect_equal(dim(res$cnvs), c(10, 8))
expect_equal(dim(res$cnas), c(10, 8))
expect_equal(dim(res$sample_statistics), c(1, 8))
expect_equal(unique(res$cnvs$sample_id), test_path("ASCAT", "S1.csv"))
expect_equal(unique(res$cnas$sample_id), test_path("ASCAT", "S1.csv"))
expect_equal(unique(res$sample_statistics$sample_id), test_path("ASCAT", "S1.csv"))
})

Expand All @@ -50,7 +50,7 @@ test_that("read_ascat_files() works with tibble of files", {
)
res <- read_ascat_files(path)
expect_s3_class(res, "cevo_ASCAT")
expect_equal(dim(res$cnvs), c(20, 8))
expect_equal(dim(res$cnas), c(20, 8))
expect_equal(dim(res$sample_statistics), c(2, 8))
expect_equal(unique(res$sample_statistics$sample_id), c("S1", "S2"))
})
Expand All @@ -67,9 +67,9 @@ test_that("read_ascat_files() works with tibble of files with NAs", {
)
res <- read_ascat_files(path)
expect_s3_class(res, "cevo_ASCAT")
expect_equal(dim(res$cnvs), c(20, 8))
expect_equal(dim(res$cnas), c(20, 8))
expect_equal(dim(res$sample_statistics), c(1, 8))
expect_equal(unique(res$cnvs$sample_id), c("S1", "S2"))
expect_equal(unique(res$cnas$sample_id), c("S1", "S2"))
expect_equal(unique(res$sample_statistics$sample_id), "S1")
})

Expand All @@ -79,7 +79,7 @@ test_that("read_ascat_files() works with directory", {
path <- test_path("ASCAT")
res <- read_ascat_files(path)
expect_s3_class(res, "cevo_ASCAT")
expect_equal(dim(res$cnvs), c(20, 8))
expect_equal(dim(res$cnas), c(20, 8))
expect_equal(dim(res$sample_statistics), c(2, 8))
expect_equal(unique(res$sample_statistics$sample_id), c("S1", "S2"))
})
12 changes: 6 additions & 6 deletions tests/testthat/test-facets.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
test_that("read_facets_cnvs() works with single file", {
test_that("read_facets_cnas() works with single file", {
path <- test_path("FACETS", "S1.csv")
res <- read_facets_cnvs(path)
res <- read_facets_cnas(path)
expect_s3_class(res, "cevo_FACETS")
expect_equal(dim(res), c(64, 18))
expect_equal(unique(res$sample_id), path)
})


test_that("read_facets_cnvs() works with dir path file", {
test_that("read_facets_cnas() works with dir path file", {
path <- test_path("FACETS")
res <- read_facets_cnvs(path)
res <- read_facets_cnas(path)
expect_s3_class(res, "cevo_FACETS")
expect_equal(dim(res), c(128, 18))
expect_equal(unique(res$sample_id), c("S1", "S2"))
})


test_that("read_facets_cnvs() works with list of files", {
test_that("read_facets_cnas() works with list of files", {
path <- test_path("FACETS")
path <- list.files(path, full.names = TRUE) |>
set_names(c("S1", "S2"))
res <- read_facets_cnvs(path)
res <- read_facets_cnas(path)
expect_s3_class(res, "cevo_FACETS")
expect_equal(dim(res), c(128, 18))
expect_equal(unique(res$sample_id), c("S1", "S2"))
Expand Down

0 comments on commit 74207bd

Please sign in to comment.