From df250cba6391ddd7dce8bd2683482b83759e4783 Mon Sep 17 00:00:00 2001 From: Jan Marvin Garbuszus Date: Mon, 2 Oct 2023 15:42:06 +0200 Subject: [PATCH] experimental * clone_worksheet_to() --- R/class-workbook.R | 337 +++++++++++++++++++++++++++++++++++++++++++ R/helper-functions.R | 42 ++++++ man/wbWorkbook.Rd | 26 ++++ 3 files changed, 405 insertions(+) diff --git a/R/class-workbook.R b/R/class-workbook.R index 7ec4000f9..ad25f6b1d 100644 --- a/R/class-workbook.R +++ b/R/class-workbook.R @@ -1059,6 +1059,343 @@ wbWorkbook <- R6::R6Class( invisible(self) }, + #' @description + #' Clone a workbooksheet to another workbook + #' @param old name of worksheet to clone + #' @param new name of new worksheet to add + #' @param to name of new worksheet to add + clone_worksheet_to = function(old = current_sheet(), new = next_sheet(), to = NULL) { + + assert_workbook(to) + + sheet <- new + to$.__enclos_env__$private$validate_new_sheet(sheet) + new <- sheet + + old <- private$get_sheet_index(old) + + newSheetIndex <- length(to$worksheets) + 1L + to$.__enclos_env__$private$set_current_sheet(newSheetIndex) + sheetId <- to$.__enclos_env__$private$get_sheet_id_max() # checks for length of worksheets + + if (!all(self$charts$chartEx == "")) { + warning( + "The file you have loaded contains chart extensions. At the moment,", + " cloning worksheets can damage the output." + ) + } + + # not the best but a quick fix + new_raw <- new + new <- replace_legal_chars(new) + + ## copy visibility from cloned sheet! + visible <- rbindlist(xml_attr(self$workbook$sheets[[old]], "sheet"))$state + + ## Add sheet to workbook.xml + to$append_sheets( + xml_node_create( + "sheet", + xml_attributes = c( + name = new, + sheetId = sheetId, + state = visible, + `r:id` = paste0("rId", newSheetIndex) + ) + ) + ) + + ## append to worksheets list + to$append("worksheets", self$worksheets[[old]]$clone(deep = TRUE)) + + ## TODO why do we have sheet names all over the place ... + to$.__enclos_env__$private$original_sheet_names <- c( + to$.__enclos_env__$private$original_sheet_names, + new + ) + + ## update content_tyes + ## add a drawing.xml for the worksheet + # FIXME only add what is needed. If no previous drawing is found, don't + # add a new one + to$append("Content_Types", c( + if (self$is_chartsheet[old]) { + sprintf('', newSheetIndex) + } else { + sprintf('', newSheetIndex) + } + )) + + ## Update xl/rels + to$append( + "workbook.xml.rels", + if (self$is_chartsheet[old]) { + sprintf('', newSheetIndex) + } else { + sprintf('', newSheetIndex) + } + ) + + ## create sheet.rels to simplify id assignment + to$worksheets_rels[[newSheetIndex]] <- self$worksheets_rels[[old]] + + old_drawing_sheet <- NULL + + if (length(self$worksheets_rels[[old]])) { + relship <- rbindlist(xml_attr(self$worksheets_rels[[old]], "Relationship")) + relship$typ <- basename(relship$Type) + old_drawing_sheet <- as.integer(gsub("\\D+", "", relship$Target[relship$typ == "drawing"])) + } + + if (length(old_drawing_sheet) && length(self$worksheets[[old_drawing_sheet]]$relships$drawing)) { + + drawing_id <- self$worksheets[[old_drawing_sheet]]$relships$drawing + + new_drawing_sheet <- length(self$drawings) + 1L + + to$append("drawings_rels", self$drawings_rels[[drawing_id]]) + + # give each chart its own filename (images can re-use the same file, but charts can't) + to$drawings_rels[[new_drawing_sheet]] <- + # TODO Can this be simplified? There's a bit going on here + vapply( + to$drawings_rels[[new_drawing_sheet]], + function(rl) { + # is rl here a length of 1? + stopifnot(length(rl) == 1L) # lets find out... if this fails, just remove it + chartfiles <- reg_match(rl, "(?<=charts/)chart[0-9]+\\.xml") + + for (cf in chartfiles) { + chartid <- nrow(to$charts) + 1L + newname <- stri_join("chart", chartid, ".xml") + old_chart <- as.integer(gsub("\\D+", "", cf)) + to$charts <- rbind(to$charts, self$charts[old_chart, ]) + + # Read the chartfile and adjust all formulas to point to the new + # sheet name instead of the clone source + + chart <- to$charts$chart[chartid] + to$charts$rels[chartid] <- gsub("?drawing[0-9]+.xml", paste0("drawing", chartid, ".xml"), to$charts$rels[chartid]) + + guard_ws <- function(x) { + if (grepl(" ", x)) x <- shQuote(x, type = "sh") + x + } + + old_sheet_name <- guard_ws(self$sheet_names[[old]]) + new_sheet_name <- guard_ws(new) + + ## we need to replace "'oldname'" as well as "oldname" + chart <- gsub( + old_sheet_name, + new_sheet_name, + chart, + perl = TRUE + ) + + to$charts$chart[chartid] <- chart + + # two charts can not point to the same rels + if (to$charts$rels[chartid] != "") { + to$charts$rels[chartid] <- gsub( + stri_join(old_chart, ".xml"), + stri_join(chartid, ".xml"), + to$charts$rels[chartid] + ) + } + + rl <- gsub(stri_join("(?<=charts/)", cf), newname, rl, perl = TRUE) + } + + rl + + }, + NA_character_, + USE.NAMES = FALSE + ) + + + to$append("drawings", self$drawings[[drawing_id]]) + } + + ## TODO Currently it is not possible to clone a sheet with a slicer in a + # safe way. It will always result in a broken xlsx file which is fixable + # but will not contain a slicer. + + # most likely needs to add slicerCache for each slicer with updated names + + ## SLICERS + + rid <- as.integer(sub("\\D+", "", get_relship_id(obj = to$worksheets_rels[[newSheetIndex]], "slicer"))) + if (length(rid)) { + + warning("Cloning slicers is not yet supported. It will not appear on the sheet.") + to$worksheets_rels[[newSheetIndex]] <- relship_no(obj = to$worksheets_rels[[newSheetIndex]], x = "slicer") + + newid <- length(to$slicers) + 1 + + cloned_slicers <- self$slicers[[old]] + slicer_attr <- xml_attr(cloned_slicers, "slicers") + + # Replace name with name_n. This will prevent the slicer from loading, + # but the xlsx file is not broken + slicer_child <- xml_node(cloned_slicers, "slicers", "slicer") + slicer_df <- rbindlist(xml_attr(slicer_child, "slicer"))[c("name", "cache", "caption", "rowHeight")] + slicer_df$name <- paste0(slicer_df$name, "_n") + slicer_child <- df_to_xml("slicer", slicer_df) + + to$slicers[[newid]] <- xml_node_create("slicers", slicer_child, slicer_attr[[1]]) + + to$worksheets_rels[[newSheetIndex]] <- c( + to$worksheets_rels[[newSheetIndex]], + sprintf("", + rid, + newid) + ) + + to$Content_Types <- c( + to$Content_Types, + sprintf("", newid) + ) + + } + + # The IDs in the drawings array are sheet-specific, so within the new + # cloned sheet the same IDs can be used => no need to modify drawings + vml_id <- self$worksheets[[old]]$relships$vml + cmt_id <- self$worksheets[[old]]$relships$comments + trd_id <- self$worksheets[[old]]$relships$threadedComment + + if (length(vml_id)) { + to$append("vml", self$vml[[vml_id]]) + to$append("vml_rels", self$vml_rels[[vml_id]]) + to$worksheets[[old]]$relships$vml <- length(self$vml) + } + + if (length(cmt_id)) { + to$append("comments", self$comments[cmt_id]) + to$worksheets[[old]]$relships$comments <- length(self$comments) + } + + if (length(trd_id)) { + to$append("threadComments", self$threadComments[cmt_id]) + to$worksheets[[old]]$relships$threadedComment <- length(self$threadComments) + } + + to$is_chartsheet[[newSheetIndex]] <- self$is_chartsheet[[old]] + + to$append("sheetOrder", as.integer(newSheetIndex)) + to$append("sheet_names", new) + to$clone()$.__enclos_env__$private$set_single_sheet_name(pos = newSheetIndex, clean = new, raw = new_raw) + + + ############################ + ## DRAWINGS + + # if we have drawings to clone, remove every table reference from Relationship + + rid <- as.integer(sub("\\D+", "", get_relship_id(obj = to$worksheets_rels[[newSheetIndex]], x = "drawing"))) + + if (length(rid)) { + + to$worksheets_rels[[newSheetIndex]] <- relship_no(obj = to$worksheets_rels[[newSheetIndex]], x = "drawing") + + to$worksheets_rels[[newSheetIndex]] <- c( + to$worksheets_rels[[newSheetIndex]], + sprintf( + '', + rid, + new_drawing_sheet + ) + ) + + } + + ############################ + ## TABLES + ## ... are stored in the $tables list, with the name and sheet as attr + ## and in the worksheets[]$tableParts list. We also need to adjust the + ## worksheets_rels and set the content type for the new table + + # if we have tables to clone, remove every table referece from Relationship + rid <- as.integer(sub("\\D+", "", get_relship_id(obj = to$worksheets_rels[[newSheetIndex]], x = "table"))) + + if (length(rid)) { + + to$worksheets_rels[[newSheetIndex]] <- relship_no(obj = to$worksheets_rels[[newSheetIndex]], x = "table") + + # make this the new sheets object + tbls <- self$tables[self$tables$tab_sheet == old, ] + if (NROW(tbls)) { + + # newid and rid can be different. ids must be unique + if (!is.null(to$tables$tab_xml)) + newid <- max(as.integer(rbindlist(xml_attr(to$tables$tab_xml, "table"))$id)) + seq_along(rid) + else + newid <- 1L + + # add _n to all table names found + tbls$tab_name <- stri_join(tbls$tab_name, "_n") + tbls$tab_sheet <- newSheetIndex + # modify tab_xml with updated name, displayName and id + tbls$tab_xml <- vapply(seq_len(nrow(tbls)), function(x) { + xml_attr_mod(tbls$tab_xml[x], + xml_attributes = c(name = tbls$tab_name[x], + displayName = tbls$tab_name[x], + id = newid[x]) + ) + }, + NA_character_ + ) + + # add new tables to old tables + to$tables <- rbind( + to$tables, + tbls + ) + + to$worksheets[[newSheetIndex]]$tableParts <- sprintf('', rid) + attr(to$worksheets[[newSheetIndex]]$tableParts, "tableName") <- tbls$tab_name + + ## hint: Content_Types will be created once the sheet is written. no need to add tables there + + # increase tables.xml.rels + to$append("tables.xml.rels", rep("", nrow(tbls))) + + # add table.xml to worksheet relationship + to$worksheets_rels[[newSheetIndex]] <- c( + to$worksheets_rels[[newSheetIndex]], + sprintf( + '', + rid, + newid + ) + ) + } + + } + + # TODO: The following items are currently NOT copied/duplicated for the cloned sheet: + # - Comments ??? + # - Slicers + + # update sheet styles + assign("WORKBOOK", self, globalenv()) + style <- get_cellstyle(self, sheet = old) + # only if styles are present + if (!is.null(style)) { + new_sty <- set_cellstyles(to, style = style) + new_s <- unname(new_sty[match(to$worksheets[[newSheetIndex]]$sheet_data$cc$c_s, names(new_sty))]) + to$worksheets[[newSheetIndex]]$sheet_data$cc$c_s <- new_s + } + + clone_shared_strings(self, old, to, newSheetIndex) + + message("cloned worksheet into workbook") + + invisible(self) + }, + ### add data ---- #' @description add data diff --git a/R/helper-functions.R b/R/helper-functions.R index ed72dae24..e346cf4e1 100644 --- a/R/helper-functions.R +++ b/R/helper-functions.R @@ -1351,10 +1351,14 @@ get_cellstyle <- function(wb, sheet = current_sheet(), dims) { xf_xml <- get_cell_styles(wb = wb, sheet = sheet, cell = dims) } + # returns NA if no style found + if (all(is.na(xf_xml))) return(NULL) + lst_out <- vector("list", length = length(xf_xml)) for (i in seq_along(xf_xml)) { + if (is.na(xf_xml[[i]])) next xf_df <- read_xf(read_xml(xf_xml[[i]])) border_id <- which(wb$styles_mgr$border$id == xf_df$borderId) @@ -1445,3 +1449,41 @@ set_cellstyles <- function(wb, style) { st_ids } + +clone_shared_strings <- function(wb_old, old, wb_new, new) { + + empty <- structure(list(), uniqueCount = 0) + + # old has no shared strings + if (identical(wb_old$sharedStrings, empty)) { + return(NULL) + } + + if (identical(wb_new$sharedStrings, empty)) { + + wb_new$append( + "workbook.xml.rels", + "" + ) + + } + + sheet_id <- wb_old$validate_sheet(old) + cc <- wb_old$worksheets[[sheet_id]]$sheet_data$cc + sst_ids <- as.integer(cc$v[cc$c_t == "s"]) + 1 + sst_uni <- unique(sst_ids) + sst <- wb_old$sharedStrings[sst_uni] + + old_len <- length(as.character(wb_new$sharedStrings)) + + wb_new$sharedStrings <- c(as.character(wb_new$sharedStrings), sst) + sst <- xml_node_create("sst", xml_children = wb_new$sharedStrings) + attr(wb_new$sharedStrings, "uniqueCount") <- as.character(length(sst)) + attr(wb_new$sharedStrings, "text") <- xml_si_to_txt(read_xml(sst)) + + sheet_id <- wb_new$validate_sheet(new) + cc <- wb_new$worksheets[[sheet_id]]$sheet_data$cc + cc$v[cc$c_t == "s"] <- as.character(as.integer(cc$v[cc$c_t == "s"]) + old_len) + wb_new$worksheets[[sheet_id]]$sheet_data$cc <- cc + +} diff --git a/man/wbWorkbook.Rd b/man/wbWorkbook.Rd index 77c9759c8..c5f6de5c7 100644 --- a/man/wbWorkbook.Rd +++ b/man/wbWorkbook.Rd @@ -110,6 +110,7 @@ worksheet names.} \item \href{#method-wbWorkbook-add_chartsheet}{\code{wbWorkbook$add_chartsheet()}} \item \href{#method-wbWorkbook-add_worksheet}{\code{wbWorkbook$add_worksheet()}} \item \href{#method-wbWorkbook-clone_worksheet}{\code{wbWorkbook$clone_worksheet()}} +\item \href{#method-wbWorkbook-clone_worksheet_to}{\code{wbWorkbook$clone_worksheet_to()}} \item \href{#method-wbWorkbook-add_data}{\code{wbWorkbook$add_data()}} \item \href{#method-wbWorkbook-add_data_table}{\code{wbWorkbook$add_data_table()}} \item \href{#method-wbWorkbook-add_pivot_table}{\code{wbWorkbook$add_pivot_table()}} @@ -429,6 +430,31 @@ Clone a workbooksheet } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-wbWorkbook-clone_worksheet_to}{}}} +\subsection{Method \code{clone_worksheet_to()}}{ +Clone a workbooksheet to another workbook +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{wbWorkbook$clone_worksheet_to( + old = current_sheet(), + new = next_sheet(), + to = NULL +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{old}}{name of worksheet to clone} + +\item{\code{new}}{name of new worksheet to add} + +\item{\code{to}}{name of new worksheet to add} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-wbWorkbook-add_data}{}}} \subsection{Method \code{add_data()}}{