From 3b01e8d56060fa7d44f57a077c686b405d1b156f Mon Sep 17 00:00:00 2001 From: nGit Date: Mon, 25 May 2020 12:28:59 +0200 Subject: [PATCH] Improved column matching for OHLCVA The functions to match and extract OHLCVA columns return information about OHLCVA data in the input. The implemented logic aims to find the right column also when columns with similar names exist. This change resolves issues in matching the correct column(s). Known bugs are when the column name has multiple similar matches. E.g. the name is similar to the `symbol`. The solution is first attempting an exact column name match. When this fails the fallback is to search for column name(s) ending with the matching term. This change also removes code duplication that existed between related functions (e.g. `has.Op` and `Op`), since both functions attempted to match column names with `grep`. As requested test coverage is added with tinytest. --- DESCRIPTION | 2 +- R/OHLC.transformations.R | 157 ++++++++++++++++++------------------ inst/tinytest/stock.rda | Bin 0 -> 2807 bytes inst/tinytest/test_OHLCVA.R | 121 +++++++++++++++++++++++++++ tests/tinytest.R | 5 ++ 5 files changed, 205 insertions(+), 80 deletions(-) create mode 100644 inst/tinytest/stock.rda create mode 100644 inst/tinytest/test_OHLCVA.R create mode 100644 tests/tinytest.R diff --git a/DESCRIPTION b/DESCRIPTION index 57f1c438..d98243f0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,7 @@ Authors@R: c( ) Depends: R (>= 3.2.0), xts(>= 0.9-0), zoo, TTR(>= 0.2), methods Imports: curl -Suggests: DBI,RMySQL,RSQLite,timeSeries,XML,downloader,jsonlite(>= 1.1) +Suggests: DBI,RMySQL,RSQLite,timeSeries,XML,downloader,jsonlite(>= 1.1), tinytest Description: Specify, build, trade, and analyse quantitative financial trading strategies. LazyLoad: yes License: GPL-3 diff --git a/R/OHLC.transformations.R b/R/OHLC.transformations.R index 98cbf135..c4ad479b 100644 --- a/R/OHLC.transformations.R +++ b/R/OHLC.transformations.R @@ -181,128 +181,129 @@ OHLCV <- function(x) `Op` <- function(x) { - if(has.Op(x)) - return(x[,grep('Open',colnames(x),ignore.case=TRUE)]) - stop('subscript out of bounds: no column name containing "Open"') + col <- has.Op(x, which = TRUE) + if(!identical(col, integer(0))) { + return(x[, col]) + } else { + stop('subscript out of bounds: Open column not found') + } } - `has.Op` <- function(x,which=FALSE) { - colAttr <- attr(x, "Op") - if(!is.null(colAttr)) - return(if(which) colAttr else TRUE) - - loc <- grep('Open',colnames(x),ignore.case=TRUE) - if(!identical(loc,integer(0))) { - return(if(which) loc else TRUE) - } else FALSE + n <- which(colnames(x) %in% c("Op", "Open")) + + if(identical(n,integer(0))) { + n <- grep('Open$', colnames(x), ignore.case = TRUE) + } + + return(if(which) n else !identical(n,integer(0))) } - `Hi` <- function(x) { - if(has.Hi(x)) - return(x[,grep('High',colnames(x),ignore.case=TRUE)]) - stop('subscript out of bounds: no column name containing "High"') + col <- has.Hi(x, which = TRUE) + if(!identical(col, integer(0))) { + return(x[, col]) + } else { + stop('subscript out of bounds: High column not found') + } } - `has.Hi` <- function(x,which=FALSE) { - colAttr <- attr(x, "Hi") - if(!is.null(colAttr)) - return(if(which) colAttr else TRUE) - - loc <- grep('High',colnames(x),ignore.case=TRUE) - if(!identical(loc,integer(0))) { - return(if(which) loc else TRUE) - } else FALSE + n <- which(colnames(x) %in% c("Hi", "High")) + + if(identical(n,integer(0))) { + n <- grep('High$', colnames(x), ignore.case = TRUE) + } + + return(if(which) n else !identical(n,integer(0))) } - `Lo` <- function(x) { - if(has.Lo(x)) - return(x[,grep('Low',colnames(x),ignore.case=TRUE)]) - stop('subscript out of bounds: no column name containing "Low"') + col <- has.Lo(x, which = TRUE) + if(!identical(col, integer(0))) { + return(x[, col]) + } else { + stop('subscript out of bounds: Low column not found') + } } - `has.Lo` <- function(x,which=FALSE) { - colAttr <- attr(x, "Lo") - if(!is.null(colAttr)) - return(if(which) colAttr else TRUE) - - loc <- grep('Low',colnames(x),ignore.case=TRUE) - if(!identical(loc,integer(0))) { - return(if(which) loc else TRUE) - } else FALSE + n <- which(colnames(x) %in% c("Lo", "Low")) + + if(identical(n,integer(0))) { + n <- grep('Low$', colnames(x), ignore.case = TRUE) + } + + return(if(which) n else !identical(n,integer(0))) } - `Cl` <- function(x) { - if(has.Cl(x)) - return(x[,grep('Close',colnames(x),ignore.case=TRUE)]) - stop('subscript out of bounds: no column name containing "Close"') + col <- has.Cl(x, which = TRUE) + if(!identical(col, integer(0))) { + return(x[, col]) + } else { + stop('subscript out of bounds: Close column not found') + } } `has.Cl` <- function(x,which=FALSE) { - colAttr <- attr(x, "Cl") - if(!is.null(colAttr)) - return(if(which) colAttr else TRUE) - - loc <- grep('Close',colnames(x),ignore.case=TRUE) - if(!identical(loc,integer(0))) { - return(if(which) loc else TRUE) - } else FALSE + n <- which(colnames(x) %in% c("Cl", "Close")) + + if(identical(n,integer(0))) { + n <- grep('Close$', colnames(x), ignore.case = TRUE) + } + + return(if(which) n else !identical(n,integer(0))) } - `Vo` <- function(x) { - #vo <- grep('Volume',colnames(x)) - #if(!identical(vo,integer(0))) - if(has.Vo(x)) - return(x[,grep('Volume',colnames(x),ignore.case=TRUE)]) - stop('subscript out of bounds: no column name containing "Volume"') + col <- has.Vo(x, which = TRUE) + if(!identical(col, integer(0))) { + return(x[, col]) + } else { + stop('subscript out of bounds: Volume column not found') + } } `has.Vo` <- function(x,which=FALSE) { - colAttr <- attr(x, "Vo") - if(!is.null(colAttr)) - return(if(which) colAttr else TRUE) - - loc <- grep('Volume',colnames(x),ignore.case=TRUE) - if(!identical(loc,integer(0))) { - return(if(which) loc else TRUE) - } else FALSE + n <- which(colnames(x) %in% c("Vo", "Vol", "Volume")) + + if(identical(n,integer(0))) { + n <- grep('Volume$', colnames(x), ignore.case = TRUE) + } + + return(if(which) n else !identical(n,integer(0))) } - `Ad` <- function(x) { - if(has.Ad(x)) - return(x[,grep('Adjusted',colnames(x),ignore.case=TRUE)]) - stop('subscript out of bounds: no column name containing "Adjusted"') + col <- has.Ad(x, which = TRUE) + if(!identical(col, integer(0))) { + return(x[, col]) + } else { + stop('subscript out of bounds: Adjusted column not found') + } } `has.Ad` <- function(x,which=FALSE) { - colAttr <- attr(x, "Ad") - if(!is.null(colAttr)) - return(if(which) colAttr else TRUE) - - loc <- grep('Adjusted',colnames(x),ignore.case=TRUE) - if(!identical(loc,integer(0))) { - return(if(which) loc else TRUE) - } else FALSE + n <- which(colnames(x) %in% c("Ad", "Adj", "Adjusted")) + + if(identical(n,integer(0))) { + n <- grep('Adjusted$', colnames(x), ignore.case = TRUE) + } + + return(if(which) n else !identical(n,integer(0))) } - `OpCl` <- function(x) { @@ -310,7 +311,6 @@ function(x) colnames(xx) <- paste("OpCl",deparse(substitute(x)),sep='.') xx } - `OpOp` <- function(x) { @@ -318,7 +318,6 @@ function(x) colnames(xx) <- paste("OpOp",deparse(substitute(x)),sep='.') xx } - `ClCl` <- function(x) { diff --git a/inst/tinytest/stock.rda b/inst/tinytest/stock.rda new file mode 100644 index 0000000000000000000000000000000000000000..14386799c74a0f8e750fe5e27f0956ca980a96cb GIT binary patch literal 2807 zcmVe5{X(Prtt1o3&RIkBipA)B5*Nyp?K}tTt+d zGji|<;S8(rz}w}F65R8b`-)Q2`oD`kFYB{s2URT-66+UvJ<0bk!|aRXuFXspk6vRXy}*+2&_Xsd`51 zpdDcsRQ+#}c?BW;RQ(9gU)WRJF6U`g-~M!bfKRTfU*`FS+>eDH`fb<|n`h>oZ5yMC zl=Y68b9bwv7xWknVb4^6H}d>~(#G#rjgzozT!H`QVdynmVQrkDJm=1)qMEIf0}r$; zRn5fn2^Ve;Q_W8dN8O$2s_`TJzPa#J_=aFrY{$N63qSN>%!_~h;*&BQ8-y z9Q#9TfgZtp8?{*HE0jxEN5(B~F`xU8Pa_BYWqiZ>`oKT4!AF7F-8ZSmS=M_1{W(DS z+qd%!m(?du*10|O>(Anxb{BDIe@1-THOBoM>-RSN(oM>t(4$8&Z{yg{-?HwPfD^vl zzr_9H)OVSE9)>*W>rqEhhjH6|&pL|z% zW;67M8N=tvm?v`x^}U3=GaGTc*05eZ$yd`jS09m&0$86Jv`as<2-bZC^UwSW z+S8A){^uFT7n}>87hD^S$v^B@<3m2@+=ybXPm>=GuG>*O`o+vp4b4F~mJ zWnZ)I=2NJT$vHG5Q6JNZJ~uts_qpgl)9(B3%=<3pYcuw>Oq|!2k=MQk&aRjKGWokP z`=A}`)`#~%0&y#e+e^8YvJkwbPo`eRp}Tl~9sCpn;J+YWixB3c8uQi*^)+n%*vGj) z_hA1T8+i|$BwyI~KuhN9J@U*#;u^E=CiLmdx9~-uYn#|F3UNI+|L-s#YuG1@Lt{U< zSbwb+;&G9moPDr%PKRCx=jXmezMQPTgFNlX}nIb%OIk z9@ofA+86LcAHp~|clt2y_aF~2ZqW!lCgw0dcVJhXBEQ-E&i4cHG3q5gAYUw{zSnpk z#9{u7D#(MN-}XIf#IgQg5_gdI_j%a4ci-Fh&r|H5Yn&&}hjEwpb^!d<4pEZN?&ZOg z_g)vuS1E_EzuzKX2=?_h@&xY$%kM1JJhp^`S75>(&j=wQY=3WnE@64)TTQ z&HHQz=ergAjQQ2;Q4jAYCwWLafqXmoe&`~PILUv`qwwEJ{~cxU-<1x3T_00#65p%r ze6x?Vb(~u-_B;Dq>%cnOd=$aH<$IXEi1$Vjgo@_a(}IQL`*PnebLX1w6Uj=wmh zp5l4(dE($OsdxP9vGVztQJtl|_*?t&{Mi(VPaDu#K2M3fBk>7`v*2IAMk&W6wvp%J zX3xgCobRMxsZF}cb!HF8Whp!S@V;B7@2w;CRf9cQE zxOZDh+3}#<&pzB$p37PBX{20N&6V*^-g#X5k%_Qjh;Pqc zeDSt?k5Z?l>708zrorFmHH+`XG^MpSsuV2lp*(+bw4=;$XJG7Sd{Vp!=R*R;pzT|4@Q@vVPJeXMaWx`@r z&pun)cW6!P`||#Tby3??eSV1AbHsU7zmk7yL;KmPNc%}$yrGjS+9$`ibnZ~aSDf4E zmp0ujwB|XyzN1tB(bo5wMo!Ps{Z->aRQ%Wfu-;pef5~g(|E_hPDW#&Nuk}7SFRxLo zbuaw1R>|ITLpAGG57M^OxA+0~&SSp+zvO#G@h<-xkk8QJx^SQ1uSOnake-y4{V=2q z4lYRwk%L^4lrIOXH zaA{|OOK%Tc`b6L|(qT8_PjbLZCbj~8Vk+gV@UzwcKf4I_=Xe7@X8`bXRsuiQ1NgaRus<&p z_<2i!pZ6>9dG7;1zZdM!UjY34>%iyt1U^3x_yu!df5Bzo7vcgaSvV8;ML)xCK@e~S zIlvX118(t~z%6+Lc9%>7e#sf&3)=!;m;wCK39!5Ld*GL~1b!J_C`y(c2Y&fc__?Af z@GIhhUvUWdm5qU4^)l?Q8V&ra?|@(ZBJitYfG>(xq=h-7+zs$#xlQl4oYC&@ zMec!d50GbqV*fpOIC@OPa8co~SHHepE8u*i|BV$l+PjR;J#9s?(NXrtF%c0JbXIRJ zN55-GaA@em6}#cSWtBg>w^tRa`VOo#5}%4{9ulNi)CjEt%4f85$iFyVWn`po`Jj`C zJgma}IA~+R*-Eld<^A_Udwi%x+o_bx==*8AZ(ko8Zr98DAM6HK4Ag=QcZLghB?5@M zOMAX{g5HZ1!|l)S3^j+?>O&$%*_VIFu_?WLB z^YvrCR{m4I_O67-Dx0xK5LRWB_}}qxl}8Zkqe!=E<&HhV=V}!nm1kDJvi}j4RxeW? z%AYmd`<3x!wO*m!`!+8}n%F4EoK@w+&30UG6+Ur2cZtV$+dP@~%+s{u8r_ J8xArZ004f3<=p@P literal 0 HcmV?d00001 diff --git a/inst/tinytest/test_OHLCVA.R b/inst/tinytest/test_OHLCVA.R new file mode 100644 index 00000000..1f78b864 --- /dev/null +++ b/inst/tinytest/test_OHLCVA.R @@ -0,0 +1,121 @@ +## OHLC test-cases, including underlying supporting functions. + +# Load stock test data (originating from getSymbols()) +stock <- readRDS("stock.rda") +cols <- colnames(stock) + +# Test is/has OHLC functions to be true. +expect_true(has.Op(stock)) +expect_true(has.Hi(stock)) +expect_true(has.Lo(stock)) +expect_true(has.Cl(stock)) +expect_true(has.Vo(stock)) +expect_true(is.HLC(stock)) +expect_true(all(has.HLC(stock))) +expect_true(is.OHLC(stock)) +expect_true(all(has.OHLC(stock))) +expect_true(is.OHLCV(stock)) +expect_true(all(has.OHLCV(stock))) + +# Test which for has/OHLC functions. +expect_equal(has.Op(stock, which = TRUE), 1) +expect_equal(has.Hi(stock, which = TRUE), 2) +expect_equal(has.Lo(stock, which = TRUE), 3) +expect_equal(has.Cl(stock, which = TRUE), 4) +expect_equal(has.Vo(stock, which = TRUE), 5) +expect_equal(has.HLC(stock, which = TRUE), c(2,3,4)) +expect_equal(has.OHLC(stock, which = TRUE), c(1,2,3,4)) +expect_equal(has.OHLCV(stock, which = TRUE), c(1,2,3,4,5)) + +# Test return correct OHLC column(s). +expect_identical(colnames(Op(stock)), cols[1]) +expect_identical(colnames(Hi(stock)), cols[2]) +expect_identical(colnames(Lo(stock)), cols[3]) +expect_identical(colnames(Cl(stock)), cols[4]) +expect_identical(colnames(HLC(stock)), cols[c(2,3,4)]) +expect_identical(colnames(OHLC(stock)), cols[c(1,2,3,4)]) +expect_identical(colnames(OHLCV(stock)), cols[c(1,2,3,4,5)]) + +# Test sample matrix regression +data(sample_matrix, package = "xts") +sample <- as.xts(sample_matrix) +expect_equal(has.Op(sample, which = TRUE), 1) +expect_equal(has.Hi(sample, which = TRUE), 2) +expect_equal(has.Lo(sample, which = TRUE), 3) +expect_equal(has.Cl(sample, which = TRUE), 4) +expect_identical(colnames(Op(sample)), "Open") +expect_identical(colnames(Hi(sample)), "High") +expect_identical(colnames(Lo(sample)), "Low") +expect_identical(colnames(Cl(sample)), "Close") + +# Test "Open" columns with column "Op". +colnames(stock) <- gsub("MSFT.Open", "Op", cols) +expect_true(has.Op(stock)) +expect_equal(has.Op(stock, which = TRUE), 1) +expect_identical(colnames(Op(stock)), "Op") + +# Test "High" columns with column "Hi". +colnames(stock) <- gsub("MSFT.High", "Hi", cols) +expect_true(has.Hi(stock)) +expect_equal(has.Hi(stock, which = TRUE), 2) +expect_identical(colnames(Hi(stock)), "Hi") + +# Test "Low" columns with column "Lo". +colnames(stock) <- gsub("MSFT.Low", "Lo", cols) +expect_true(has.Lo(stock)) +expect_equal(has.Lo(stock, which = TRUE), 3) +expect_identical(colnames(Lo(stock)), "Lo") + +# Test "Close" columns with column "Cl". +colnames(stock) <- gsub("MSFT.Close", "Cl", cols) +expect_true(has.Cl(stock)) +expect_equal(has.Cl(stock, which = TRUE), 4) +expect_identical(colnames(Cl(stock)), "Cl") + +# Test "Volume" columns with column "Vo". +colnames(stock) <- gsub("MSFT.Volume", "Vo", cols) +expect_true(has.Vo(stock)) +expect_equal(has.Vo(stock, which = TRUE), 5) +expect_identical(colnames(Vo(stock)), "Vo") + +# Test "Adjusted" columns with column "Ad". +colnames(stock) <- gsub("MSFT.Adjusted", "Ad", cols) +expect_true(has.Ad(stock)) +expect_equal(has.Ad(stock, which = TRUE), 6) +expect_identical(colnames(Ad(stock)), "Ad") + +# Test "Open" columns with symbol "OPEN". +colnames(stock) <- gsub("MSFT", "OPEN", cols) +expect_true(has.Op(stock)) +expect_equal(has.Op(stock, which = TRUE), 1) +expect_identical(colnames(Op(stock)), colnames(stock)[1]) + +# Test "High" columns with symbol "HIGH". +colnames(stock) <- gsub("MSFT", "HIGH", cols) +expect_true(has.Hi(stock)) +expect_equal(has.Hi(stock, which = TRUE), 2) +expect_identical(colnames(Hi(stock)), colnames(stock)[2]) + +# Test "Low" columns with symbol "LOW". +colnames(stock) <- gsub("MSFT", "LOW", cols) +expect_true(has.Lo(stock)) +expect_equal(has.Lo(stock, which = TRUE), 3) +expect_identical(colnames(Lo(stock)), colnames(stock)[3]) + +# Test "Close" columns with symbol "CLOSE". +colnames(stock) <- gsub("MSFT", "CLOSE", cols) +expect_true(has.Cl(stock)) +expect_equal(has.Cl(stock, which = TRUE), 4) +expect_identical(colnames(Cl(stock)), colnames(stock)[4]) + +# Test "Volume" columns with symbol "VOLUME". +colnames(stock) <- gsub("MSFT", "VOLUME", cols) +expect_true(has.Vo(stock)) +expect_equal(has.Vo(stock, which = TRUE), 5) +expect_identical(colnames(Vo(stock)), colnames(stock)[5]) + +# Test "Adjusted" columns with symbol "ADJUSTED". +colnames(stock) <- gsub("MSFT", "ADJUSTED", cols) +expect_true(has.Ad(stock)) +expect_equal(has.Ad(stock, which = TRUE), 6) +expect_identical(colnames(Ad(stock)), colnames(stock)[6]) diff --git a/tests/tinytest.R b/tests/tinytest.R new file mode 100644 index 00000000..b79d9f48 --- /dev/null +++ b/tests/tinytest.R @@ -0,0 +1,5 @@ + +if ( requireNamespace("tinytest", quietly=TRUE) ){ + tinytest::test_package("quantmod") +} +