From dd3c463891c5aa13cc92bedeea15c7dd7e6d64a4 Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Wed, 18 Dec 2024 19:02:41 +0100 Subject: [PATCH 1/8] fn:csv-to-arrays and fn:parse-csv (still incomplete) --- .../java/org/basex/build/csv/CsvOptions.java | 54 ++++++++++-- .../java/org/basex/build/csv/CsvParser.java | 2 +- .../org/basex/gui/dialog/DialogCsvParser.java | 2 +- .../org/basex/io/parse/csv/CsvParser.java | 60 +++++++++---- .../main/java/org/basex/query/QueryError.java | 10 +++ .../java/org/basex/query/func/Function.java | 6 ++ .../org/basex/query/func/csv/CsvParse.java | 2 +- .../basex/query/func/fn/FnCsvToArrays.java | 88 +++++++++++++++++++ .../java/org/basex/util/http/Payload.java | 2 +- .../java/org/basex/util/options/Options.java | 1 + .../org/basex/query/func/FnModuleTest.java | 7 ++ 11 files changed, 210 insertions(+), 24 deletions(-) create mode 100644 basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java diff --git a/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java b/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java index e7a71d5d1c..bfba58730e 100644 --- a/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java +++ b/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java @@ -29,6 +29,14 @@ public class CsvOptions extends Options { public static final BooleanOption LAX = new BooleanOption("lax", true); /** Option: parse quotes. */ public static final BooleanOption QUOTES = new BooleanOption("quotes", true); + /** Option: row delimiter. */ + public static final StringOption ROW_DELIMITER = new StringOption("row-delimiter", "\n"); + /** Option: quote character. */ + public static final StringOption QUOTE_CHARACTER = new StringOption("quote", "\""); + /** Option: trim whitespace. */ + public static final BooleanOption TRIM_WHITSPACE = new BooleanOption("trim-whitespace", false); + /** Option: strict quoting. */ + public static final BooleanOption STRICT_QUOTING = new BooleanOption("strict-quoting", false); /** CSV formats. */ public enum CsvFormat { @@ -70,14 +78,24 @@ public String toString() { @Override public synchronized void assign(final String name, final String value) throws BaseXException { super.assign(name, value); - if(separator() == -1) throw new BaseXException("Invalid separator: '%'", get(SEPARATOR)); + final int s = separator(), r = rowDelimiter(), q = quoteCharacter(); + if(s == -1) throw new BaseXException("Invalid separator: '%'", get(SEPARATOR)); + if(r == -1) throw new BaseXException("Invalid row delimiter: '%'", get(ROW_DELIMITER)); + if(q == -1) throw new BaseXException("Invalid quote character: '%'", get(QUOTE_CHARACTER)); + if(s == q || r == s || q == r) throw new BaseXException("Duplicate CSV delimiter error: '%'", + get(s == q || r == s ? SEPARATOR : QUOTE_CHARACTER)); } @Override public synchronized void assign(final Item name, final Value value, final InputInfo info) throws QueryException { super.assign(name, value, info); - if(separator() == -1) throw OPTION_X.get(info, "Invalid separator: '%'", get(SEPARATOR)); + final int s = separator(), r = rowDelimiter(), q = quoteCharacter(); + if(s == -1) throw OPTION_X.get(info, "Invalid separator: '%'", get(SEPARATOR)); + if(r == -1) throw CSV_SINGLECHAR_X_X.get(info, ROW_DELIMITER.name(), get(ROW_DELIMITER)); + if(q == -1) throw CSV_SINGLECHAR_X_X.get(info, QUOTE_CHARACTER.name(), get(QUOTE_CHARACTER)); + if(s == q || r == s || q == r) throw CSV_DELIMITER_X.get(info, + get(s == q || r == s ? SEPARATOR : QUOTE_CHARACTER)); } /** @@ -89,9 +107,35 @@ public int separator() { for(final CsvSep s : CsvSep.values()) { if(sep.equals(s.toString())) return s.sep; } - if(sep.length() == 1) { - final char ch = sep.charAt(0); - if(XMLToken.valid(ch)) return ch; + if(sep.codePointCount(0, sep.length()) == 1) { + final int cp = sep.codePointAt(0); + if(XMLToken.valid(cp)) return cp; + } + return -1; + } + + /** + * Returns the row delimiter character or {@code -1} if character is invalid. + * @return separator + */ + public int rowDelimiter() { + final String rd = get(ROW_DELIMITER); + if(rd.codePointCount(0, rd.length()) == 1) { + final int cp = rd.codePointAt(0); + if(XMLToken.valid(cp)) return cp; + } + return -1; + } + + /** + * Returns the quote character or {@code -1} if character is invalid. + * @return separator + */ + public int quoteCharacter() { + final String q = get(QUOTE_CHARACTER); + if(q.codePointCount(0, q.length()) == 1) { + final int cp = q.codePointAt(0); + if(XMLToken.valid(cp)) return cp; } return -1; } diff --git a/basex-core/src/main/java/org/basex/build/csv/CsvParser.java b/basex-core/src/main/java/org/basex/build/csv/CsvParser.java index c126e3adbf..4419b2bd56 100644 --- a/basex-core/src/main/java/org/basex/build/csv/CsvParser.java +++ b/basex-core/src/main/java/org/basex/build/csv/CsvParser.java @@ -37,7 +37,7 @@ public CsvParser(final IO source, final MainOptions options, final CsvParserOpti protected void parse() throws IOException { csv = pushJob(new CsvBuilder(copts, builder)); try { - csv.convert(source); + csv.convert(source, null); } catch(final QueryException ex) { throw new QueryIOException(ex); } finally { diff --git a/basex-core/src/main/java/org/basex/gui/dialog/DialogCsvParser.java b/basex-core/src/main/java/org/basex/gui/dialog/DialogCsvParser.java index 96c3cbf21d..fc960ac11b 100644 --- a/basex-core/src/main/java/org/basex/gui/dialog/DialogCsvParser.java +++ b/basex-core/src/main/java/org/basex/gui/dialog/DialogCsvParser.java @@ -127,7 +127,7 @@ boolean action(final boolean active) { lax.setEnabled(head && copts.get(CsvOptions.FORMAT) == CsvFormat.DIRECT); skipEmpty.setEnabled(head); - final Item item = CsvConverter.get(copts).convert(new IOContent(EXAMPLE)); + final Item item = CsvConverter.get(copts).convert(new IOContent(EXAMPLE), null); example.setText(example(MainParser.CSV.name(), EXAMPLE, item)); } catch(final QueryException | IOException ex) { example.setText(error(ex)); diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java index d70de1e33e..6ddbc98fed 100644 --- a/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java @@ -4,6 +4,7 @@ import org.basex.build.csv.*; import org.basex.io.in.*; +import org.basex.query.*; import org.basex.util.*; /** @@ -23,8 +24,16 @@ public final class CsvParser { private final boolean backslashes; /** Column separator (see {@link CsvOptions#SEPARATOR}). */ private final int separator; + /** Row delimiter (see {@link CsvOptions#ROW_DELIMITER}). */ + private final int rowDelimiter; + /** Quote character (see {@link CsvOptions#QUOTE_CHARACTER}). */ + private final int quoteCharacter; /** Parse quotes. */ private final boolean quotes; + /** Trim whitespace. */ + private final boolean trimWhitespace; + /** Strict quoting. */ + private final boolean strictQuoting; /** First entry of a line. */ private boolean first = true; @@ -42,15 +51,21 @@ public CsvParser(final TextInput input, final CsvParserOptions opts, final CsvCo this.conv = conv; header = opts.get(CsvOptions.HEADER); separator = opts.separator(); + rowDelimiter = opts.rowDelimiter(); + quoteCharacter = opts.quoteCharacter(); quotes = opts.get(CsvOptions.QUOTES); backslashes = opts.get(CsvOptions.BACKSLASHES); + trimWhitespace = opts.get(CsvOptions.TRIM_WHITSPACE); + strictQuoting = opts.get(CsvOptions.STRICT_QUOTING); } /** * Parses a CSV expression. + * @param ii input info (can be @null) + * @throws QueryException query exception * @throws IOException query I/O exception */ - public void parse() throws IOException { + public void parse(final InputInfo ii) throws QueryException, IOException { final TokenBuilder entry = new TokenBuilder(); boolean quoted = false; data = !header; @@ -59,33 +74,40 @@ public void parse() throws IOException { while(ch != -1) { if(quoted) { // quoted state - if(ch == '"') { + if(ch == quoteCharacter) { ch = input.read(); - if(ch != '"') { + if(ch != quoteCharacter) { quoted = false; + if(strictQuoting && ch != separator && ch != rowDelimiter && ch != -1) + throw QueryError.CSV_QUOTING_X.get(ii, + new String(Character.toChars(quoteCharacter)) + entry + + new String(Character.toChars(quoteCharacter)) + + new String(Character.toChars(ch))); continue; } - if(backslashes) add(entry, '"'); + if(backslashes) add(entry, quoteCharacter); } else if(ch == '\\' && backslashes) { ch = bs(); } add(entry, ch); - } else if(ch == '"') { - if(quotes && entry.isEmpty()) { + } else if(ch == quoteCharacter) { + if(quotes) { + if(strictQuoting && !entry.isEmpty()) throw QueryError.CSV_QUOTING_X.get(ii, + entry + new String(Character.toChars(quoteCharacter))); // parse quote quoted = true; } else { ch = input.read(); - if(ch != '"' || backslashes) add(entry, '"'); + if(ch != quoteCharacter || backslashes) add(entry, quoteCharacter); continue; } } else if(ch == separator) { // parse separator - record(entry, true); + record(entry, false, false); first = false; - } else if(ch == '\n') { + } else if(ch == rowDelimiter) { // parse newline - record(entry, !entry.isEmpty()); + record(entry, false, true); first = true; data = true; } else { @@ -94,7 +116,9 @@ public void parse() throws IOException { } ch = input.read(); } - record(entry, !entry.isEmpty()); + if(quoted && strictQuoting) + throw QueryError.CSV_QUOTING_X.get(ii, new String(Character.toChars(quoteCharacter)) + entry); + record(entry, true, true); } /** @@ -122,16 +146,22 @@ private static void add(final TokenBuilder entry, final int ch) { /** * Adds a new record and entry. * @param entry entry to be added - * @param record add new record + * @param lastRow whether this is the last row + * @param lastField whether this is the last field of the row * @throws IOException I/O exception */ - private void record(final TokenBuilder entry, final boolean record) throws IOException { + private void record(final TokenBuilder entry, final boolean lastRow, final boolean lastField) + throws IOException { + byte[] field = entry.next(); + if(trimWhitespace) field = Token.trim(field); + final boolean record = !lastRow || field.length > 0; if(record && first && data) conv.record(); if(record || !first) { + if(first && lastField && field.length == 0) return; if(data) { - conv.entry(entry.next()); + conv.entry(field); } else { - conv.header(entry.next()); + conv.header(field); } } } diff --git a/basex-core/src/main/java/org/basex/query/QueryError.java b/basex-core/src/main/java/org/basex/query/QueryError.java index 40592bb731..4765e5ab87 100644 --- a/basex-core/src/main/java/org/basex/query/QueryError.java +++ b/basex-core/src/main/java/org/basex/query/QueryError.java @@ -595,6 +595,15 @@ public enum QueryError { /** Error code. */ CHARINV_X(FOCH, 5, "Invalid name, glyph or codepoint value: %."), + /** Error code. */ + CSV_ERROR_X(FOCV, 1, "CSV processing error: %."), + /** Error code. */ + CSV_QUOTING_X(FOCV, 1, "CSV field quoting error: %."), + /** Error code. */ + CSV_SINGLECHAR_X_X(FOCV, 2, "The value of % is not a single character: %."), + /** Error code. */ + CSV_DELIMITER_X(FOCV, 3, "Duplicate CSV delimiter error: %."), + /** Error code. */ IDDOC(FODC, 1, "Specified node has no document node as root."), /** Error code. */ @@ -1580,6 +1589,7 @@ public enum ErrType { /** Error type. */ FOAY, /** Error type. */ FOCA, /** Error type. */ FOCH, + /** Error type. */ FOCV, /** Error type. */ FODC, /** Error type. */ FODF, /** Error type. */ FODT, diff --git a/basex-core/src/main/java/org/basex/query/func/Function.java b/basex-core/src/main/java/org/basex/query/func/Function.java index 1336f51b17..1e9e48b864 100644 --- a/basex-core/src/main/java/org/basex/query/func/Function.java +++ b/basex-core/src/main/java/org/basex/query/func/Function.java @@ -156,6 +156,9 @@ public enum Function implements AFunction { COUNT(FnCount::new, "count(input)", params(ITEM_ZM), INTEGER_O), /** XQuery function. */ + CSV_TO_ARRAYS(FnCsvToArrays::new, "csv-to-arrays(value[,options])", + params(STRING_ZO, MAP_ZO), ARRAY_ZM, flag(CNS, HOF)), + /** XQuery function. */ CURRENT_DATE(FnCurrentDate::new, "current-date()", params(), DATE_O, flag(NDT)), /** XQuery function. */ @@ -477,6 +480,9 @@ ITEM_ZM, flag(HOF)), OUTERMOST(FnOutermost::new, "outermost(nodes)", params(NODE_ZM), NODE_ZM), /** XQuery function. */ + PARSE_CSV(FnParseCsv::new, "parse-csv(value[,options])", + params(STRING_ZO, MAP_ZO), MAP_O, flag(CNS, HOF)), + /** XQuery function. */ PARSE_IETF_DATE(FnParseIetfDate::new, "parse-ietf-date(value)", params(STRING_ZO), DATE_TIME_ZO), /** XQuery function. */ diff --git a/basex-core/src/main/java/org/basex/query/func/csv/CsvParse.java b/basex-core/src/main/java/org/basex/query/func/csv/CsvParse.java index 3a5d0c4c26..e28f805942 100644 --- a/basex-core/src/main/java/org/basex/query/func/csv/CsvParse.java +++ b/basex-core/src/main/java/org/basex/query/func/csv/CsvParse.java @@ -42,7 +42,7 @@ protected final Expr opt(final CompileContext cc) { protected final Item parse(final IO io, final QueryContext qc) throws QueryException { final CsvParserOptions options = toOptions(arg(1), new CsvParserOptions(), qc); try { - return CsvConverter.get(options).convert(io); + return CsvConverter.get(options).convert(io, info); } catch(final IOException ex) { throw CSV_PARSE_X.get(info, ex); } diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java new file mode 100644 index 0000000000..78e9aa533a --- /dev/null +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java @@ -0,0 +1,88 @@ +package org.basex.query.func.fn; + +import static org.basex.query.QueryError.*; + +import java.io.*; +import java.util.*; + +import org.basex.build.csv.*; +import org.basex.build.csv.CsvOptions.*; +import org.basex.io.*; +import org.basex.io.parse.csv.*; +import org.basex.query.*; +import org.basex.query.value.*; +import org.basex.query.value.map.*; +import org.basex.query.value.seq.*; +import org.basex.util.*; +import org.basex.util.hash.*; +import org.basex.util.options.*; + +/** + * Function implementation. + * + * @author BaseX Team 2005-24, BSD License + * @author Gunther Rademacher + */ +public class FnCsvToArrays extends Parse { + @Override + public Value value(final QueryContext qc) throws QueryException { + final byte[] value = toTokenOrNull(arg(0), qc); + return value != null ? parse(new IOContent(value), qc) : Empty.VALUE; + } + + /** + * Parses the input and creates an XML document. + * @param io input data + * @param qc query context + * @return node + * @throws QueryException query exception + */ + protected final Value parse(final IO io, final QueryContext qc) throws QueryException { + final CsvToArraysOptions options = toOptions(arg(1), new CsvToArraysOptions(), qc); + options.validate(info); + try { + CsvParserOptions cpo = new CsvParserOptions(); + cpo.set(CsvOptions.FORMAT, CsvFormat.XQUERY); + cpo.set(CsvOptions.SEPARATOR, options.get(CsvToArraysOptions.FIELD_DELIMITER)); + cpo.set(CsvOptions.ROW_DELIMITER, options.get(CsvToArraysOptions.ROW_DELIMITER)); + cpo.set(CsvOptions.QUOTE_CHARACTER, options.get(CsvToArraysOptions.QUOTE_CHARACTER)); + cpo.set(CsvOptions.TRIM_WHITSPACE, options.get(CsvToArraysOptions.TRIM_WHITESPACE)); + cpo.set(CsvOptions.QUOTES, true); + cpo.set(CsvOptions.STRICT_QUOTING, true); + XQMap map = (XQMap) CsvConverter.get(cpo).convert(io, info); + return map.get(CsvXQueryConverter.RECORDS); + } catch(final IOException ex) { + throw CSV_ERROR_X.get(info, ex); + } + } + + /** + * Options for fn:parse-csv. + */ + public static class CsvToArraysOptions extends Options { + /** parse-csv option field-delimiter. */ + public static final StringOption FIELD_DELIMITER = new StringOption("field-delimiter", ","); + /** parse-csv option row-delimiter. */ + public static final StringOption ROW_DELIMITER = new StringOption("row-delimiter", "\n"); + /** parse-csv option quote-character. */ + public static final StringOption QUOTE_CHARACTER = new StringOption("quote-character", "\""); + /** parse-csv option trim-whitespace. */ + public static final BooleanOption TRIM_WHITESPACE = new BooleanOption("trim-whitespace", false); + + /** + * Check for error conditions in the current settings. + * @param ii input info + * @throws QueryException query exception + */ + public void validate(final InputInfo ii) throws QueryException { + final IntSet delim = new IntSet(); + for(final StringOption opt : Arrays.asList(FIELD_DELIMITER, ROW_DELIMITER, QUOTE_CHARACTER)) { + final String val = get(opt); + if(val.codePointCount(0, val.length()) != 1) + throw CSV_SINGLECHAR_X_X.get(ii, opt.name(), val); + final int cp = val.codePointAt(0); + if(!delim.add(cp)) throw CSV_DELIMITER_X.get(ii, val); + } + } + } +} diff --git a/basex-core/src/main/java/org/basex/util/http/Payload.java b/basex-core/src/main/java/org/basex/util/http/Payload.java index 88c2ed63a8..724fc65b96 100644 --- a/basex-core/src/main/java/org/basex/util/http/Payload.java +++ b/basex-core/src/main/java/org/basex/util/http/Payload.java @@ -310,7 +310,7 @@ public static Value value(final byte[] body, final MediaType type, } else if(type.isCSV()) { final CsvParserOptions opts = new CsvParserOptions(options.get(MainOptions.CSVPARSER)); opts.assign(type); - return CsvConverter.get(opts).convert(io); + return CsvConverter.get(opts).convert(io, null); } else if(type.is(MediaType.TEXT_HTML)) { final HtmlOptions opts = new HtmlOptions(options.get(MainOptions.HTMLPARSER)); opts.assign(type); diff --git a/basex-core/src/main/java/org/basex/util/options/Options.java b/basex-core/src/main/java/org/basex/util/options/Options.java index 5126a1f231..372d3a8363 100644 --- a/basex-core/src/main/java/org/basex/util/options/Options.java +++ b/basex-core/src/main/java/org/basex/util/options/Options.java @@ -820,6 +820,7 @@ private synchronized void assign(final String name, final Value value, final Inp if(item == null) throw expected.apply(AtomType.INTEGER); result = (int) item.itr(info); } else if(option instanceof StringOption) { + if(item == null) throw expected.apply(AtomType.STRING); result = serialize(value, info); } else if(option instanceof StringsOption) { final StringList list = new StringList(); diff --git a/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java b/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java index 46a5e87740..369757fcc9 100644 --- a/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java +++ b/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java @@ -1731,6 +1731,13 @@ public final class FnModuleTest extends SandboxTest { query("let $n :=
  • return " + func.args(" ($n, $n)"), "
  • "); } + /** Test method. */ + @Test public void parseCsv() { + final Function func = PARSE_CSV; + query(func.args(" ()"), "{}"); + query(func.args(""), "{}"); + } + /** Test method. */ @Test public void parseIetfDate() { final Function func = PARSE_IETF_DATE; From e3be25bfa767c408f0f3b02d45fb2b6a69ad70a5 Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Fri, 27 Dec 2024 14:35:38 +0100 Subject: [PATCH 2/8] add fn:parse-csv implementation --- .../java/org/basex/build/csv/CsvOptions.java | 4 + .../org/basex/io/parse/csv/CsvConverter.java | 6 +- .../org/basex/io/parse/csv/CsvParser.java | 47 +++-- .../main/java/org/basex/query/QueryError.java | 2 + .../java/org/basex/query/func/Function.java | 4 +- .../basex/query/func/fn/FnCsvToArrays.java | 33 ++-- .../org/basex/query/func/fn/FnParseCsv.java | 161 ++++++++++++++++++ .../org/basex/query/value/type/SeqType.java | 4 + .../java/org/basex/util/options/Options.java | 5 +- .../org/basex/query/func/FnModuleTest.java | 7 - 10 files changed, 230 insertions(+), 43 deletions(-) create mode 100644 basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java diff --git a/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java b/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java index bfba58730e..77bdb1926d 100644 --- a/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java +++ b/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java @@ -37,6 +37,10 @@ public class CsvOptions extends Options { public static final BooleanOption TRIM_WHITSPACE = new BooleanOption("trim-whitespace", false); /** Option: strict quoting. */ public static final BooleanOption STRICT_QUOTING = new BooleanOption("strict-quoting", false); + /** Option: trim-rows. */ + public static final BooleanOption TRIM_ROWS = new BooleanOption("trim-rows", false); + /** Option: select-columns. */ + public static final NumbersOption SELECT_COLUMNS = new NumbersOption("select-columns"); /** CSV formats. */ public enum CsvFormat { diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvConverter.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvConverter.java index e690ef7f4e..9579b5ae8c 100644 --- a/basex-core/src/main/java/org/basex/io/parse/csv/CsvConverter.java +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvConverter.java @@ -10,6 +10,7 @@ import org.basex.query.*; import org.basex.query.util.*; import org.basex.query.value.item.*; +import org.basex.util.*; import org.basex.util.list.*; /** @@ -61,15 +62,16 @@ protected CsvConverter(final CsvParserOptions copts) { /** * Converts the specified input to an XQuery value. * @param input input + * @param ii input info (can be {@code null}) * @return result * @throws QueryException query exception * @throws IOException I/O exception */ - public final Item convert(final IO input) throws QueryException, IOException { + public final Item convert(final IO input, final InputInfo ii) throws QueryException, IOException { init(input.url()); try(NewlineInput in = new NewlineInput(input)) { nli = in.encoding(copts.get(CsvParserOptions.ENCODING)); - new CsvParser(in, copts, this).parse(); + new CsvParser(in, copts, this).parse(ii); } return finish(); } diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java index 6ddbc98fed..4d4470d6d7 100644 --- a/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java @@ -5,7 +5,10 @@ import org.basex.build.csv.*; import org.basex.io.in.*; import org.basex.query.*; +import org.basex.query.value.item.*; +import org.basex.query.value.type.*; import org.basex.util.*; +import org.basex.util.list.*; /** * A CSV parser generating parse events similar to a SAX XML parser. @@ -32,21 +35,31 @@ public final class CsvParser { private final boolean quotes; /** Trim whitespace. */ private final boolean trimWhitespace; + /** Trim whitespace. */ + private final boolean trimRows; /** Strict quoting. */ private final boolean strictQuoting; + /** Select columns. */ + private final int[] selectColumns; /** First entry of a line. */ private boolean first = true; + /** Number of fields in first row. */ + private int rowSize = -1; /** Data mode. */ private boolean data; + /** Fields of the current row. */ + private final TokenList fields = new TokenList(); /** * Constructor. * @param input input * @param opts options * @param conv converter + * @throws QueryException query exception */ - public CsvParser(final TextInput input, final CsvParserOptions opts, final CsvConverter conv) { + public CsvParser(final TextInput input, final CsvParserOptions opts, final CsvConverter conv) + throws QueryException { this.input = input; this.conv = conv; header = opts.get(CsvOptions.HEADER); @@ -56,7 +69,12 @@ public CsvParser(final TextInput input, final CsvParserOptions opts, final CsvCo quotes = opts.get(CsvOptions.QUOTES); backslashes = opts.get(CsvOptions.BACKSLASHES); trimWhitespace = opts.get(CsvOptions.TRIM_WHITSPACE); + trimRows = opts.get(CsvOptions.TRIM_ROWS); strictQuoting = opts.get(CsvOptions.STRICT_QUOTING); + selectColumns = opts.get(CsvOptions.SELECT_COLUMNS); + for(final int sc : selectColumns) { + if(sc < 1) throw QueryError.typeError(Int.get(sc), SeqType.POSITIVE_INTEGER_O, null); + } } /** @@ -152,17 +170,24 @@ private static void add(final TokenBuilder entry, final int ch) { */ private void record(final TokenBuilder entry, final boolean lastRow, final boolean lastField) throws IOException { - byte[] field = entry.next(); - if(trimWhitespace) field = Token.trim(field); - final boolean record = !lastRow || field.length > 0; - if(record && first && data) conv.record(); - if(record || !first) { - if(first && lastField && field.length == 0) return; - if(data) { - conv.entry(field); - } else { - conv.header(field); + final byte[] next = entry.next(); + final byte[] field = trimWhitespace ? Token.trim(next) : next; + if(field.length > 0 || !(first && lastField)) fields.add(field); + if(lastField && !(lastRow && fields.isEmpty())) { + if(data) conv.record(); + if(rowSize == -1) rowSize = fields.size(); + final int n = selectColumns.length != 0 ? selectColumns.length + : trimRows ? rowSize : fields.size(); + for(int i = 0; i < n; ++i) { + final int index = selectColumns.length != 0 ? selectColumns[i] - 1 : i; + final byte[] f = index < fields.size() ? fields.get(index) : Token.EMPTY; + if(data) { + conv.entry(f); + } else { + conv.header(f); + } } + fields.reset(); } } } diff --git a/basex-core/src/main/java/org/basex/query/QueryError.java b/basex-core/src/main/java/org/basex/query/QueryError.java index 4765e5ab87..7ac3e1f198 100644 --- a/basex-core/src/main/java/org/basex/query/QueryError.java +++ b/basex-core/src/main/java/org/basex/query/QueryError.java @@ -603,6 +603,8 @@ public enum QueryError { CSV_SINGLECHAR_X_X(FOCV, 2, "The value of % is not a single character: %."), /** Error code. */ CSV_DELIMITER_X(FOCV, 3, "Duplicate CSV delimiter error: %."), + /** Error code. */ + CSV_COLUMNNAME_X(FOCV, 4, "Argument supplied is not a known column name: %."), /** Error code. */ IDDOC(FODC, 1, "Specified node has no document node as root."), diff --git a/basex-core/src/main/java/org/basex/query/func/Function.java b/basex-core/src/main/java/org/basex/query/func/Function.java index 1e9e48b864..a3bdeca181 100644 --- a/basex-core/src/main/java/org/basex/query/func/Function.java +++ b/basex-core/src/main/java/org/basex/query/func/Function.java @@ -157,7 +157,7 @@ public enum Function implements AFunction { params(ITEM_ZM), INTEGER_O), /** XQuery function. */ CSV_TO_ARRAYS(FnCsvToArrays::new, "csv-to-arrays(value[,options])", - params(STRING_ZO, MAP_ZO), ARRAY_ZM, flag(CNS, HOF)), + params(STRING_ZO, MAP_ZO), STRING_O.arrayType().seqType(Occ.ZERO_OR_MORE)), /** XQuery function. */ CURRENT_DATE(FnCurrentDate::new, "current-date()", params(), DATE_O, flag(NDT)), @@ -481,7 +481,7 @@ ITEM_ZM, flag(HOF)), params(NODE_ZM), NODE_ZM), /** XQuery function. */ PARSE_CSV(FnParseCsv::new, "parse-csv(value[,options])", - params(STRING_ZO, MAP_ZO), MAP_O, flag(CNS, HOF)), + params(STRING_ZO, MAP_ZO), MAP_O), /** XQuery function. */ PARSE_IETF_DATE(FnParseIetfDate::new, "parse-ietf-date(value)", params(STRING_ZO), DATE_TIME_ZO), diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java index 78e9aa533a..d8223d4625 100644 --- a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java @@ -27,29 +27,22 @@ public class FnCsvToArrays extends Parse { @Override public Value value(final QueryContext qc) throws QueryException { final byte[] value = toTokenOrNull(arg(0), qc); - return value != null ? parse(new IOContent(value), qc) : Empty.VALUE; - } - - /** - * Parses the input and creates an XML document. - * @param io input data - * @param qc query context - * @return node - * @throws QueryException query exception - */ - protected final Value parse(final IO io, final QueryContext qc) throws QueryException { + if(value == null) return Empty.VALUE; + final IO io = new IOContent(value); final CsvToArraysOptions options = toOptions(arg(1), new CsvToArraysOptions(), qc); options.validate(info); + + final CsvParserOptions cpo = new CsvParserOptions(); + cpo.set(CsvOptions.SEPARATOR, options.get(CsvToArraysOptions.FIELD_DELIMITER)); + cpo.set(CsvOptions.ROW_DELIMITER, options.get(CsvToArraysOptions.ROW_DELIMITER)); + cpo.set(CsvOptions.QUOTE_CHARACTER, options.get(CsvToArraysOptions.QUOTE_CHARACTER)); + cpo.set(CsvOptions.TRIM_WHITSPACE, options.get(CsvToArraysOptions.TRIM_WHITESPACE)); + cpo.set(CsvOptions.FORMAT, CsvFormat.XQUERY); + cpo.set(CsvOptions.QUOTES, true); + cpo.set(CsvOptions.STRICT_QUOTING, true); + try { - CsvParserOptions cpo = new CsvParserOptions(); - cpo.set(CsvOptions.FORMAT, CsvFormat.XQUERY); - cpo.set(CsvOptions.SEPARATOR, options.get(CsvToArraysOptions.FIELD_DELIMITER)); - cpo.set(CsvOptions.ROW_DELIMITER, options.get(CsvToArraysOptions.ROW_DELIMITER)); - cpo.set(CsvOptions.QUOTE_CHARACTER, options.get(CsvToArraysOptions.QUOTE_CHARACTER)); - cpo.set(CsvOptions.TRIM_WHITSPACE, options.get(CsvToArraysOptions.TRIM_WHITESPACE)); - cpo.set(CsvOptions.QUOTES, true); - cpo.set(CsvOptions.STRICT_QUOTING, true); - XQMap map = (XQMap) CsvConverter.get(cpo).convert(io, info); + final XQMap map = (XQMap) CsvConverter.get(cpo).convert(io, info); return map.get(CsvXQueryConverter.RECORDS); } catch(final IOException ex) { throw CSV_ERROR_X.get(info, ex); diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java b/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java new file mode 100644 index 0000000000..977bcf3c04 --- /dev/null +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java @@ -0,0 +1,161 @@ +package org.basex.query.func.fn; + +import static org.basex.query.QueryError.*; +import static org.basex.query.value.type.SeqType.*; + +import java.io.*; +import java.util.*; + +import org.basex.build.csv.*; +import org.basex.build.csv.CsvOptions.*; +import org.basex.io.*; +import org.basex.io.parse.csv.*; +import org.basex.query.*; +import org.basex.query.expr.*; +import org.basex.query.func.fn.FnCsvToArrays.*; +import org.basex.query.util.list.*; +import org.basex.query.value.*; +import org.basex.query.value.array.*; +import org.basex.query.value.item.*; +import org.basex.query.value.map.*; +import org.basex.query.value.type.*; +import org.basex.query.var.*; +import org.basex.util.*; +import org.basex.util.hash.*; +import org.basex.util.options.*; + +/** + * Function implementation. + * + * @author BaseX Team 2005-24, BSD License + * @author Gunther Rademacher + */ +public class FnParseCsv extends Parse { + @Override + public Item item(final QueryContext qc, final InputInfo ii) throws QueryException { + final byte[] value = toZeroToken(arg(0), qc); + final IO io = new IOContent(value); + final ParseCsvOptions options = toOptions(arg(1), new ParseCsvOptions(), qc); + options.validate(info); + + final CsvParserOptions cpo = new CsvParserOptions(); + cpo.set(CsvOptions.SEPARATOR, options.get(CsvToArraysOptions.FIELD_DELIMITER)); + cpo.set(CsvOptions.ROW_DELIMITER, options.get(CsvToArraysOptions.ROW_DELIMITER)); + cpo.set(CsvOptions.QUOTE_CHARACTER, options.get(CsvToArraysOptions.QUOTE_CHARACTER)); + cpo.set(CsvOptions.TRIM_WHITSPACE, options.get(CsvToArraysOptions.TRIM_WHITESPACE)); + cpo.set(CsvOptions.TRIM_ROWS, options.get(ParseCsvOptions.TRIM_ROWS)); + cpo.set(CsvOptions.SELECT_COLUMNS, options.get(ParseCsvOptions.SELECT_COLUMNS)); + + final Value header = options.get(ParseCsvOptions.HEADER); + Value names = null; + if(BOOLEAN_O.instance(header)) cpo.set(CsvOptions.HEADER, toBoolean((Item) header)); + else if(STRING_OM.instance(header)) names = header; + else throw EXP_FOUND_X_X_X.get(ii, STRING_OM, header.seqType(), header); + + cpo.set(CsvOptions.FORMAT, CsvFormat.XQUERY); + cpo.set(CsvOptions.QUOTES, true); + cpo.set(CsvOptions.STRICT_QUOTING, true); + + try { + final XQMap map = (XQMap) CsvConverter.get(cpo).convert(io, info); + final MapBuilder result = new MapBuilder(); + + if(names == null) names = map.get(CsvXQueryConverter.NAMES).atomValue(qc, ii); + result.put(Str.get("columns"), names); + + final MapBuilder cib = new MapBuilder(); + int i = 0; + for(Item name : names) { + ++i; + AStr str = toStr(name, qc); + if(str.length(ii) > 0 && cib.get(name) == null) cib.put(name, Int.get(i)); + } + + final XQMap columnIndex = cib.map(); + result.put(Str.get("column-index"), columnIndex); + final Value rows = map.get(CsvXQueryConverter.RECORDS); + result.put("rows", rows); + + // create get function + + final VarScope vs = new VarScope(); + final SeqType rowType = POSITIVE_INTEGER_O; + final Var row = vs.addNew(new QNm("row"), rowType, qc, ii); + final SeqType colType = new ChoiceItemType( + Arrays.asList(STRING_O, POSITIVE_INTEGER_O)).seqType(); + final Var col = vs.addNew(new QNm("column"), colType, qc, ii); + final Get get = new Get(info, rows, columnIndex, + new Expr[] { new VarRef(ii, row), new VarRef(ii, col)}); + final Var[] params = { row, col}; + final FuncType funcType = FuncType.get(STRING_O, rowType, colType); + result.put("get", + new FuncItem(ii, get, params, AnnList.EMPTY, funcType, params.length, null)); + + return result.map(); + } catch(final IOException ex) { + throw CSV_ERROR_X.get(info, ex); + } + } + + /** + * Get function. + */ + private static final class Get extends Arr { + /** Result rows. */ + final Value rows; + /** Column name to index mapping. */ + final XQMap columnIndex; + + /** + * Constructor. + * @param ii input info + * @param rows result rows + * @param columnIndex column name to index mapping + * @param args function arguments + */ + private Get(final InputInfo ii, final Value rows, final XQMap columnIndex, + final Expr... args) { + super(ii, STRING_O, args); + this.rows = rows; + this.columnIndex = columnIndex; + } + + @Override + public Value value(final QueryContext qc) throws QueryException { + final long rowIndex = toLong(arg(0), qc); + if(rowIndex > rows.size()) return Str.EMPTY; + final XQArray row = (XQArray) rows.itemAt(rowIndex - 1); + if(row == null) return Str.EMPTY; + Item colIndex = toAtomItem(arg(1), qc); + if(STRING_O.instance(colIndex)) { + final Item it = (Item) columnIndex.get(colIndex); + if(it.isEmpty()) throw CSV_COLUMNNAME_X.get(info, colIndex); + colIndex = it; + } + final Value val = row.getInternal(colIndex, qc, info, false); + return val == null ? Str.EMPTY : val; + } + + @Override + public Expr copy(final CompileContext cc, final IntObjMap vm) { + return copyType(new Get(info, rows, columnIndex, copyAll(cc, vm, args()))); + } + + @Override + public void toString(final QueryString qs) { + qs.token("csv-get").params(exprs); + } + } + + /** + * Options for fn:parse-csv. + */ + public static final class ParseCsvOptions extends FnCsvToArrays.CsvToArraysOptions { + /** parse-csv option header. */ + public static final ValueOption HEADER = new ValueOption("header", ITEM_ZM, Bln.FALSE); + /** parse-csv option select-columns. */ + public static final NumbersOption SELECT_COLUMNS = new NumbersOption("select-columns"); + /** parse-csv option trim-rows. */ + public static final BooleanOption TRIM_ROWS = new BooleanOption("trim-rows", false); + } +} diff --git a/basex-core/src/main/java/org/basex/query/value/type/SeqType.java b/basex-core/src/main/java/org/basex/query/value/type/SeqType.java index 5223808559..d620baf8e5 100644 --- a/basex-core/src/main/java/org/basex/query/value/type/SeqType.java +++ b/basex-core/src/main/java/org/basex/query/value/type/SeqType.java @@ -71,6 +71,8 @@ public final class SeqType { public static final SeqType INTEGER_ZO = INTEGER.seqType(ZERO_OR_ONE); /** Zero or more integers. */ public static final SeqType INTEGER_ZM = INTEGER.seqType(ZERO_OR_MORE); + /** Positive integer. */ + public static final SeqType POSITIVE_INTEGER_O = POSITIVE_INTEGER.seqType(); /** Zero or more bytes. */ public static final SeqType BYTE_ZM = BYTE.seqType(ZERO_OR_MORE); @@ -80,6 +82,8 @@ public final class SeqType { public static final SeqType STRING_ZO = STRING.seqType(ZERO_OR_ONE); /** Zero or more strings. */ public static final SeqType STRING_ZM = STRING.seqType(ZERO_OR_MORE); + /** One or more strings. */ + public static final SeqType STRING_OM = STRING.seqType(ONE_OR_MORE); /** Zero or one NCName. */ public static final SeqType NCNAME_ZO = NCNAME.seqType(ZERO_OR_ONE); /** Single language. */ diff --git a/basex-core/src/main/java/org/basex/util/options/Options.java b/basex-core/src/main/java/org/basex/util/options/Options.java index aa33470a29..8702ebcedd 100644 --- a/basex-core/src/main/java/org/basex/util/options/Options.java +++ b/basex-core/src/main/java/org/basex/util/options/Options.java @@ -817,12 +817,15 @@ private synchronized void assign(final String name, final Value value, final Inp if(item == null) throw expected.apply(AtomType.INTEGER); result = (int) item.itr(info); } else if(option instanceof StringOption) { - if(item == null) throw expected.apply(AtomType.STRING); result = serialize(value, info); } else if(option instanceof StringsOption) { final StringList list = new StringList(); for(final Item it : value) list.add(serialize(it, info)); result = list.finish(); + } else if(option instanceof NumbersOption) { + final IntList list = new IntList(); + for(final Item it : value) list.add(Strings.toInt(string(it.string(info)))); + result = list.finish(); } else if(option instanceof EnumOption) { final String string = normalize(serialize(value, info)); final EnumOption eo = (EnumOption) option; diff --git a/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java b/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java index f95f4880e4..f85f49b568 100644 --- a/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java +++ b/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java @@ -1737,13 +1737,6 @@ public final class FnModuleTest extends SandboxTest { query("let $n :=
  • return " + func.args(" ($n, $n)"), "
  • "); } - /** Test method. */ - @Test public void parseCsv() { - final Function func = PARSE_CSV; - query(func.args(" ()"), "{}"); - query(func.args(""), "{}"); - } - /** Test method. */ @Test public void parseIetfDate() { final Function func = PARSE_IETF_DATE; From dd5321783b7a779b0976388de977ebc5d818c892 Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Fri, 27 Dec 2024 18:59:12 +0100 Subject: [PATCH 3/8] add fn:csv-to-xml implementation --- .../java/org/basex/query/expr/ParseExpr.java | 12 ++ .../java/org/basex/query/func/Function.java | 3 + .../basex/query/func/fn/FnCsvToArrays.java | 33 +++-- .../org/basex/query/func/fn/FnCsvToXml.java | 46 +++++++ .../org/basex/query/func/fn/FnParseCsv.java | 117 ++++++++++-------- 5 files changed, 145 insertions(+), 66 deletions(-) create mode 100644 basex-core/src/main/java/org/basex/query/func/fn/FnCsvToXml.java diff --git a/basex-core/src/main/java/org/basex/query/expr/ParseExpr.java b/basex-core/src/main/java/org/basex/query/expr/ParseExpr.java index ea6df8f51a..56fe4fa907 100644 --- a/basex-core/src/main/java/org/basex/query/expr/ParseExpr.java +++ b/basex-core/src/main/java/org/basex/query/expr/ParseExpr.java @@ -341,6 +341,18 @@ protected final boolean toBooleanOrFalse(final Expr expr, final QueryContext qc) * @throws QueryException query exception */ protected final boolean toBoolean(final Item item) throws QueryException { + return toBoolean(item, info); + } + + /** + * Converts an item to a boolean. + * @param item item to be converted + * @param info input info + * @return boolean + * @throws QueryException query exception + */ + protected static final boolean toBoolean(final Item item, final InputInfo info) + throws QueryException { final Type type = item.type; if(type == BOOLEAN) return item.bool(info); if(type.isUntyped()) return Bln.parse(item, info); diff --git a/basex-core/src/main/java/org/basex/query/func/Function.java b/basex-core/src/main/java/org/basex/query/func/Function.java index a3bdeca181..51199fc2aa 100644 --- a/basex-core/src/main/java/org/basex/query/func/Function.java +++ b/basex-core/src/main/java/org/basex/query/func/Function.java @@ -159,6 +159,9 @@ public enum Function implements AFunction { CSV_TO_ARRAYS(FnCsvToArrays::new, "csv-to-arrays(value[,options])", params(STRING_ZO, MAP_ZO), STRING_O.arrayType().seqType(Occ.ZERO_OR_MORE)), /** XQuery function. */ + CSV_TO_XML(FnCsvToXml::new, "csv-to-xml(value[,options])", + params(STRING_ZO, MAP_ZO), DOCUMENT_NODE_O), + /** XQuery function. */ CURRENT_DATE(FnCurrentDate::new, "current-date()", params(), DATE_O, flag(NDT)), /** XQuery function. */ diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java index d8223d4625..c6b889689d 100644 --- a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java @@ -27,22 +27,13 @@ public class FnCsvToArrays extends Parse { @Override public Value value(final QueryContext qc) throws QueryException { final byte[] value = toTokenOrNull(arg(0), qc); - if(value == null) return Empty.VALUE; - final IO io = new IOContent(value); final CsvToArraysOptions options = toOptions(arg(1), new CsvToArraysOptions(), qc); options.validate(info); - final CsvParserOptions cpo = new CsvParserOptions(); - cpo.set(CsvOptions.SEPARATOR, options.get(CsvToArraysOptions.FIELD_DELIMITER)); - cpo.set(CsvOptions.ROW_DELIMITER, options.get(CsvToArraysOptions.ROW_DELIMITER)); - cpo.set(CsvOptions.QUOTE_CHARACTER, options.get(CsvToArraysOptions.QUOTE_CHARACTER)); - cpo.set(CsvOptions.TRIM_WHITSPACE, options.get(CsvToArraysOptions.TRIM_WHITESPACE)); - cpo.set(CsvOptions.FORMAT, CsvFormat.XQUERY); - cpo.set(CsvOptions.QUOTES, true); - cpo.set(CsvOptions.STRICT_QUOTING, true); - + if(value == null) return Empty.VALUE; + final CsvParserOptions parserOpts = options.toCsvParserOptions(); try { - final XQMap map = (XQMap) CsvConverter.get(cpo).convert(io, info); + final XQMap map = (XQMap) CsvConverter.get(parserOpts).convert(new IOContent(value), info); return map.get(CsvXQueryConverter.RECORDS); } catch(final IOException ex) { throw CSV_ERROR_X.get(info, ex); @@ -67,7 +58,7 @@ public static class CsvToArraysOptions extends Options { * @param ii input info * @throws QueryException query exception */ - public void validate(final InputInfo ii) throws QueryException { + void validate(final InputInfo ii) throws QueryException { final IntSet delim = new IntSet(); for(final StringOption opt : Arrays.asList(FIELD_DELIMITER, ROW_DELIMITER, QUOTE_CHARACTER)) { final String val = get(opt); @@ -77,5 +68,21 @@ public void validate(final InputInfo ii) throws QueryException { if(!delim.add(cp)) throw CSV_DELIMITER_X.get(ii, val); } } + + /** + * Convert the options to a CsvParserOptions object. + * @return the CsvParserOptions object + */ + CsvParserOptions toCsvParserOptions() { + final CsvParserOptions parserOpts = new CsvParserOptions(); + parserOpts.set(CsvOptions.SEPARATOR, get(FIELD_DELIMITER)); + parserOpts.set(CsvOptions.ROW_DELIMITER, get(ROW_DELIMITER)); + parserOpts.set(CsvOptions.QUOTE_CHARACTER, get(QUOTE_CHARACTER)); + parserOpts.set(CsvOptions.TRIM_WHITSPACE, get(TRIM_WHITESPACE)); + parserOpts.set(CsvOptions.FORMAT, CsvFormat.XQUERY); + parserOpts.set(CsvOptions.QUOTES, true); + parserOpts.set(CsvOptions.STRICT_QUOTING, true); + return parserOpts; + } } } diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToXml.java b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToXml.java new file mode 100644 index 0000000000..d52876e424 --- /dev/null +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToXml.java @@ -0,0 +1,46 @@ +package org.basex.query.func.fn; + +import static org.basex.query.QueryError.*; + +import java.io.*; + +import org.basex.build.csv.*; +import org.basex.io.*; +import org.basex.io.parse.csv.*; +import org.basex.query.*; +import org.basex.query.func.fn.FnParseCsv.*; +import org.basex.query.value.item.*; +import org.basex.query.value.seq.*; +import org.basex.util.*; +import org.basex.util.hash.*; + +/** + * Function implementation. + * + * @author BaseX Team 2005-24, BSD License + * @author Gunther Rademacher + */ +public class FnCsvToXml extends Parse { + @Override + public Item item(final QueryContext qc, final InputInfo ii) throws QueryException { + final byte[] value = toTokenOrNull(arg(0), qc); + final ParseCsvOptions options = toOptions(arg(1), new ParseCsvOptions(), qc); + options.validate(ii); + + if(value == null) return Empty.VALUE; + final CsvParserOptions parserOpts = options.toCsvParserOptions(); + try { + final CsvXmlConverter converter = new CsvXmlConverter(parserOpts); + final TokenSet names = new TokenSet(); + if(options.columnNames != null) { + for(final Item columnName : options.columnNames) { + final byte[] token = toZeroToken(columnName, qc); + converter.header(names.add(token) ? token : Token.EMPTY); + } + } + return converter.convert(new IOContent(value), ii); + } catch(final IOException ex) { + throw CSV_ERROR_X.get(ii, ex); + } + } +} diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java b/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java index 977bcf3c04..d5cbfc7fd7 100644 --- a/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java @@ -7,12 +7,10 @@ import java.util.*; import org.basex.build.csv.*; -import org.basex.build.csv.CsvOptions.*; import org.basex.io.*; import org.basex.io.parse.csv.*; import org.basex.query.*; import org.basex.query.expr.*; -import org.basex.query.func.fn.FnCsvToArrays.*; import org.basex.query.util.list.*; import org.basex.query.value.*; import org.basex.query.value.array.*; @@ -34,66 +32,33 @@ public class FnParseCsv extends Parse { @Override public Item item(final QueryContext qc, final InputInfo ii) throws QueryException { final byte[] value = toZeroToken(arg(0), qc); - final IO io = new IOContent(value); final ParseCsvOptions options = toOptions(arg(1), new ParseCsvOptions(), qc); - options.validate(info); - - final CsvParserOptions cpo = new CsvParserOptions(); - cpo.set(CsvOptions.SEPARATOR, options.get(CsvToArraysOptions.FIELD_DELIMITER)); - cpo.set(CsvOptions.ROW_DELIMITER, options.get(CsvToArraysOptions.ROW_DELIMITER)); - cpo.set(CsvOptions.QUOTE_CHARACTER, options.get(CsvToArraysOptions.QUOTE_CHARACTER)); - cpo.set(CsvOptions.TRIM_WHITSPACE, options.get(CsvToArraysOptions.TRIM_WHITESPACE)); - cpo.set(CsvOptions.TRIM_ROWS, options.get(ParseCsvOptions.TRIM_ROWS)); - cpo.set(CsvOptions.SELECT_COLUMNS, options.get(ParseCsvOptions.SELECT_COLUMNS)); - - final Value header = options.get(ParseCsvOptions.HEADER); - Value names = null; - if(BOOLEAN_O.instance(header)) cpo.set(CsvOptions.HEADER, toBoolean((Item) header)); - else if(STRING_OM.instance(header)) names = header; - else throw EXP_FOUND_X_X_X.get(ii, STRING_OM, header.seqType(), header); - - cpo.set(CsvOptions.FORMAT, CsvFormat.XQUERY); - cpo.set(CsvOptions.QUOTES, true); - cpo.set(CsvOptions.STRICT_QUOTING, true); + options.validate(ii); + final CsvParserOptions parserOpts = options.toCsvParserOptions(); try { - final XQMap map = (XQMap) CsvConverter.get(cpo).convert(io, info); - final MapBuilder result = new MapBuilder(); - - if(names == null) names = map.get(CsvXQueryConverter.NAMES).atomValue(qc, ii); - result.put(Str.get("columns"), names); - - final MapBuilder cib = new MapBuilder(); + final XQMap map = (XQMap) CsvConverter.get(parserOpts).convert(new IOContent(value), ii); + final Value columns = options.columnNames != null + ? options.columnNames : map.get(CsvXQueryConverter.NAMES).atomValue(qc, ii); + final MapBuilder columnIndexBuilder = new MapBuilder(); int i = 0; - for(Item name : names) { + for(final Item col : columns) { ++i; - AStr str = toStr(name, qc); - if(str.length(ii) > 0 && cib.get(name) == null) cib.put(name, Int.get(i)); + if(toStr(col, qc).length(ii) > 0 && columnIndexBuilder.get(col) == null) { + columnIndexBuilder.put(col, Int.get(i)); + } } + final XQMap columnIndex = columnIndexBuilder.map(); + final Value rows = map.get(CsvXQueryConverter.RECORDS); - final XQMap columnIndex = cib.map(); + final MapBuilder result = new MapBuilder(); + result.put(Str.get("columns"), columns); result.put(Str.get("column-index"), columnIndex); - final Value rows = map.get(CsvXQueryConverter.RECORDS); result.put("rows", rows); - - // create get function - - final VarScope vs = new VarScope(); - final SeqType rowType = POSITIVE_INTEGER_O; - final Var row = vs.addNew(new QNm("row"), rowType, qc, ii); - final SeqType colType = new ChoiceItemType( - Arrays.asList(STRING_O, POSITIVE_INTEGER_O)).seqType(); - final Var col = vs.addNew(new QNm("column"), colType, qc, ii); - final Get get = new Get(info, rows, columnIndex, - new Expr[] { new VarRef(ii, row), new VarRef(ii, col)}); - final Var[] params = { row, col}; - final FuncType funcType = FuncType.get(STRING_O, rowType, colType); - result.put("get", - new FuncItem(ii, get, params, AnnList.EMPTY, funcType, params.length, null)); - + result.put("get", Get.funcItem(rows, columnIndex, qc, ii)); return result.map(); } catch(final IOException ex) { - throw CSV_ERROR_X.get(info, ex); + throw CSV_ERROR_X.get(ii, ex); } } @@ -102,9 +67,9 @@ public Item item(final QueryContext qc, final InputInfo ii) throws QueryExceptio */ private static final class Get extends Arr { /** Result rows. */ - final Value rows; + private final Value rows; /** Column name to index mapping. */ - final XQMap columnIndex; + private final XQMap columnIndex; /** * Constructor. @@ -145,6 +110,29 @@ public Expr copy(final CompileContext cc, final IntObjMap vm) { public void toString(final QueryString qs) { qs.token("csv-get").params(exprs); } + + /** + * Create a function item for the get function. + * @param rows result rows + * @param columnIndex column name to index mapping + * @param qc query context + * @param ii input info + * @return function item + */ + protected static FuncItem funcItem(final Value rows, final XQMap columnIndex, + final QueryContext qc, final InputInfo ii) { + final VarScope vs = new VarScope(); + final SeqType rowType = POSITIVE_INTEGER_O; + final SeqType colType = new ChoiceItemType( + Arrays.asList(STRING_O, POSITIVE_INTEGER_O)).seqType(); + final Var row = vs.addNew(new QNm("row"), rowType, qc, ii); + final Var col = vs.addNew(new QNm("column"), colType, qc, ii); + final Get get = new Get(ii, rows, columnIndex, + new Expr[] { new VarRef(ii, row), new VarRef(ii, col) }); + final Var[] params = { row, col }; + final FuncType funcType = FuncType.get(STRING_O, rowType, colType); + return new FuncItem(ii, get, params, AnnList.EMPTY, funcType, params.length, null); + } } /** @@ -157,5 +145,28 @@ public static final class ParseCsvOptions extends FnCsvToArrays.CsvToArraysOptio public static final NumbersOption SELECT_COLUMNS = new NumbersOption("select-columns"); /** parse-csv option trim-rows. */ public static final BooleanOption TRIM_ROWS = new BooleanOption("trim-rows", false); + + /** Explicit column names. */ + public Value columnNames; + /** Whether to extract the header from the first input row. */ + private boolean extractHeader; + + @Override + void validate(final InputInfo ii) throws QueryException { + super.validate(ii); + final Value header = get(HEADER); + if(BOOLEAN_O.instance(header)) extractHeader = toBoolean((Item) header, ii); + else if(STRING_OM.instance(header)) columnNames = header; + else throw typeError(header, STRING_OM, ii); + } + + @Override + CsvParserOptions toCsvParserOptions() { + final CsvParserOptions parserOpts = super.toCsvParserOptions(); + parserOpts.set(CsvOptions.TRIM_ROWS, get(TRIM_ROWS)); + parserOpts.set(CsvOptions.SELECT_COLUMNS, get(SELECT_COLUMNS)); + parserOpts.set(CsvOptions.HEADER, extractHeader); + return parserOpts; + } } } From 424c1b8a2c9fa8c6031c350fd11e725f88697930 Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Fri, 3 Jan 2025 08:22:32 +0100 Subject: [PATCH 4/8] add csv-to-xml converter --- .../basex/io/parse/csv/CsvXmlConverter.java | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java new file mode 100644 index 0000000000..0a58acd657 --- /dev/null +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java @@ -0,0 +1,93 @@ +package org.basex.io.parse.csv; + +import org.basex.build.csv.*; +import org.basex.query.*; +import org.basex.query.value.item.*; +import org.basex.query.value.node.*; +import org.basex.util.*; + +/** + * This class converts CSV data to XML according to the rules of fn:csv-to-xml. + * + * @author BaseX Team 2005-24, BSD License + * @author Gunther Rademacher + */ +public final class CsvXmlConverter extends CsvConverter { + /** QName. */ + protected static final QNm Q_FN_CSV = new QNm("csv", QueryText.FN_URI); + /** QName. */ + protected static final QNm Q_FN_ROWS = new QNm("rows", QueryText.FN_URI); + /** QName. */ + protected static final QNm Q_FN_ROW = new QNm("row", QueryText.FN_URI); + /** QName. */ + protected static final QNm Q_FN_FIELD = new QNm("field", QueryText.FN_URI); + /** QName. */ + protected static final QNm Q_FN_COLUMNS = new QNm("columns", QueryText.FN_URI); + /** QName. */ + protected static final QNm Q_FN_COLUMN = new QNm("column", QueryText.FN_URI); + + /** Document node. */ + private FBuilder doc; + /** Root node. */ + private FBuilder rows; + /** Record node. */ + private FBuilder record; + + /** + * Constructor. + * @param copts CSV options + * @param header optional header strings + */ + public CsvXmlConverter(final CsvParserOptions copts, final byte[]... header) { + super(copts); + for(final byte[] h : header) header(h); + } + + @Override + protected void record() { + finishRecord(); + record = FElem.build(Q_FN_ROW); + column = -1; + } + + @Override + public void header(final byte[] value) { + headers.add(shared.token(value)); + } + + @Override + protected void entry(final byte[] value) { + ++column; + if(skipEmpty && value.length == 0) return; + + final FBuilder elem = FElem.build(Q_FN_FIELD); + final byte[] name = headers.get(column); + if(name != null && name.length > 0) elem.add(Q_FN_COLUMN, name); + record.add(elem.add(shared.token(value))); + } + + @Override + protected void init(final String uri) { + doc = FDoc.build(Token.token(uri)); + rows = FElem.build(Q_FN_ROWS); + } + + @Override + protected FNode finish() { + finishRecord(); + final FBuilder root = FElem.build(Q_FN_CSV); + if(headers.size() > 0) { + final FBuilder columns = FElem.build(Q_FN_COLUMNS); + for(final byte[] h : headers) columns.add(FElem.build(Q_FN_COLUMN).add(h)); + root.add(columns); + } + return doc.add(root.add(rows)).finish(); + } + + /** + * Finishes a record. + */ + private void finishRecord() { + if(record != null) rows.add(record); + } +} From aafeec778cd400a69cbbe46e5cb01e1338856d75 Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Fri, 3 Jan 2025 16:25:55 +0100 Subject: [PATCH 5/8] add trimming of headers, add test cases, fix typo --- .../java/org/basex/build/csv/CsvOptions.java | 2 +- .../org/basex/io/parse/csv/CsvParser.java | 2 +- .../basex/io/parse/csv/CsvXmlConverter.java | 13 +- .../basex/query/func/fn/FnCsvToArrays.java | 4 +- .../org/basex/query/func/fn/FnCsvToXml.java | 4 +- .../org/basex/query/func/fn/FnParseCsv.java | 22 +- .../org/basex/query/func/FnModuleTest.java | 316 ++++++++++++++++++ 7 files changed, 351 insertions(+), 12 deletions(-) diff --git a/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java b/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java index a0ba0d4c1c..1e8a8caae7 100644 --- a/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java +++ b/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java @@ -34,7 +34,7 @@ public class CsvOptions extends Options { /** Option: quote character. */ public static final StringOption QUOTE_CHARACTER = new StringOption("quote", "\""); /** Option: trim whitespace. */ - public static final BooleanOption TRIM_WHITSPACE = new BooleanOption("trim-whitespace", false); + public static final BooleanOption TRIM_WHITESPACE = new BooleanOption("trim-whitespace", false); /** Option: strict quoting. */ public static final BooleanOption STRICT_QUOTING = new BooleanOption("strict-quoting", false); /** Option: trim-rows. */ diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java index 2de9feb6df..428752ca97 100644 --- a/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java @@ -68,7 +68,7 @@ public CsvParser(final TextInput input, final CsvParserOptions opts, final CsvCo quoteCharacter = opts.quoteCharacter(); quotes = opts.get(CsvOptions.QUOTES); backslashes = opts.get(CsvOptions.BACKSLASHES); - trimWhitespace = opts.get(CsvOptions.TRIM_WHITSPACE); + trimWhitespace = opts.get(CsvOptions.TRIM_WHITESPACE); trimRows = opts.get(CsvOptions.TRIM_ROWS); strictQuoting = opts.get(CsvOptions.STRICT_QUOTING); selectColumns = opts.get(CsvOptions.SELECT_COLUMNS); diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java index 0a58acd657..681b2eae1d 100644 --- a/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvXmlConverter.java @@ -9,7 +9,7 @@ /** * This class converts CSV data to XML according to the rules of fn:csv-to-xml. * - * @author BaseX Team 2005-24, BSD License + * @author BaseX Team, BSD License * @author Gunther Rademacher */ public final class CsvXmlConverter extends CsvConverter { @@ -52,7 +52,16 @@ record = FElem.build(Q_FN_ROW); @Override public void header(final byte[] value) { - headers.add(shared.token(value)); + header(value, true); + } + + /** + * Adds a new header. + * @param value header value + * @param trim whether to trim the header value + */ + public void header(final byte[] value, final boolean trim) { + headers.add(shared.token(trim ? Token.trim(value) : value)); } @Override diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java index c6b889689d..7205c9e8dc 100644 --- a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java @@ -20,7 +20,7 @@ /** * Function implementation. * - * @author BaseX Team 2005-24, BSD License + * @author BaseX Team, BSD License * @author Gunther Rademacher */ public class FnCsvToArrays extends Parse { @@ -78,7 +78,7 @@ CsvParserOptions toCsvParserOptions() { parserOpts.set(CsvOptions.SEPARATOR, get(FIELD_DELIMITER)); parserOpts.set(CsvOptions.ROW_DELIMITER, get(ROW_DELIMITER)); parserOpts.set(CsvOptions.QUOTE_CHARACTER, get(QUOTE_CHARACTER)); - parserOpts.set(CsvOptions.TRIM_WHITSPACE, get(TRIM_WHITESPACE)); + parserOpts.set(CsvOptions.TRIM_WHITESPACE, get(TRIM_WHITESPACE)); parserOpts.set(CsvOptions.FORMAT, CsvFormat.XQUERY); parserOpts.set(CsvOptions.QUOTES, true); parserOpts.set(CsvOptions.STRICT_QUOTING, true); diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToXml.java b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToXml.java index d52876e424..b682f7637c 100644 --- a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToXml.java +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToXml.java @@ -17,7 +17,7 @@ /** * Function implementation. * - * @author BaseX Team 2005-24, BSD License + * @author BaseX Team, BSD License * @author Gunther Rademacher */ public class FnCsvToXml extends Parse { @@ -35,7 +35,7 @@ public Item item(final QueryContext qc, final InputInfo ii) throws QueryExceptio if(options.columnNames != null) { for(final Item columnName : options.columnNames) { final byte[] token = toZeroToken(columnName, qc); - converter.header(names.add(token) ? token : Token.EMPTY); + converter.header(names.add(token) ? token : Token.EMPTY, false); } } return converter.convert(new IOContent(value), ii); diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java b/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java index d5cbfc7fd7..90e98cb4f4 100644 --- a/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java @@ -25,7 +25,7 @@ /** * Function implementation. * - * @author BaseX Team 2005-24, BSD License + * @author BaseX Team, BSD License * @author Gunther Rademacher */ public class FnParseCsv extends Parse { @@ -38,8 +38,22 @@ public Item item(final QueryContext qc, final InputInfo ii) throws QueryExceptio final CsvParserOptions parserOpts = options.toCsvParserOptions(); try { final XQMap map = (XQMap) CsvConverter.get(parserOpts).convert(new IOContent(value), ii); - final Value columns = options.columnNames != null - ? options.columnNames : map.get(CsvXQueryConverter.NAMES).atomValue(qc, ii); + final Value columns; + if(options.columnNames != null) { + columns = options.columnNames; + } else { + final Value names = map.get(CsvXQueryConverter.NAMES).atomValue(qc, ii); + if(parserOpts.get(CsvOptions.TRIM_WHITESPACE)) { + columns = names; + } + else { + final ValueBuilder vb = new ValueBuilder(qc); + for(final Item col : names) { + vb.add(Str.get(Token.trim(toZeroToken(col, qc)))); + }; + columns = vb.value(); + } + } final MapBuilder columnIndexBuilder = new MapBuilder(); int i = 0; for(final Item col : columns) { @@ -156,7 +170,7 @@ void validate(final InputInfo ii) throws QueryException { super.validate(ii); final Value header = get(HEADER); if(BOOLEAN_O.instance(header)) extractHeader = toBoolean((Item) header, ii); - else if(STRING_OM.instance(header)) columnNames = header; + else if(STRING_ZM.instance(header)) columnNames = header; else throw typeError(header, STRING_OM, ii); } diff --git a/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java b/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java index 9726b546cd..a3e0225539 100644 --- a/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java +++ b/basex-core/src/test/java/org/basex/query/func/FnModuleTest.java @@ -394,6 +394,164 @@ public final class FnModuleTest extends SandboxTest { query(func.args(" data( [ (1 to 6) ! <_>{ 1 } ][. > 0 ] )"), 6); } + /** Test method. */ + @Test public void csvToArrays() { + final Function func = CSV_TO_ARRAYS; + + // Handling trivial input: + query(func.args(" ()"), ""); + query(func.args(""), ""); + query(func.args(" char('\\n')"), "[]"); + query(func.args(" ' '", " { 'trim-whitespace': true() }"), ""); + query(func.args(" ' '", " { 'trim-whitespace': false() }"), "[\" \"]"); + query(func.args(" ` {char('\\n')}`", " { 'trim-whitespace': true() }"), "[]"); + query(func.args(" ` {char('\\n')}`", " { 'trim-whitespace': false() }"), "[\" \"]"); + query(func.args(" `{char('\\n')} `", " { 'trim-whitespace': true() }"), "[]"); + query(func.args(" `{char('\\n')} `", " { 'trim-whitespace': false() }"), "[]\n[\" \"]"); + // Using newline separators: + query(func.args( + " `name,city{ char('\\n') }` ||\n" + + " `Bob,Berlin{ char('\\n') }` ||\n" + + " `Alice,Aachen{ char('\\n') }`"), + "[\"name\",\"city\"]\n" + + "[\"Bob\",\"Berlin\"]\n" + + "[\"Alice\",\"Aachen\"]"); + query( + " let $CRLF := `{ char('\\r') }{ char('\\n') }`\n" + + "return " + func.args( + " `name,city{ $CRLF }` ||\n" + + " `Bob,Berlin{ $CRLF }` ||\n" + + " `Alice,Aachen{ $CRLF }`\n"), + "[\"name\",\"city\"]\n" + + "[\"Bob\",\"Berlin\"]\n" + + "[\"Alice\",\"Aachen\"]"); + // Quote handling: + query(func.args( + " string-join(\n" + + " (`\"name\",\"city\"`, `\"Bob\",\"Berlin\"`, `\"Alice\",\"Aachen\"`),\n" + + " char('\\n')\n" + + " )"), + "[\"name\",\"city\"]\n" + + "[\"Bob\",\"Berlin\"]\n" + + "[\"Alice\",\"Aachen\"]"); + query(func.args( + " `\"name\",\"city\"{ char('\\n') }` ||\n" + + " `\"Bob \"\"The Exemplar\"\" Mustermann\",\"Berlin\"{ char('\\n') }`"), + "[\"name\",\"city\"]\n" + + "[\"Bob \"\"The Exemplar\"\" Mustermann\",\"Berlin\"]"); + // Non-default record- and field-delimiters: + query(func.args("name;city\u00A7Bob;Berlin\u00A7Alice;Aachen", + " { \"row-delimiter\": \"\u00A7\", \"field-delimiter\": \";\" }"), + "[\"name\",\"city\"]\n" + + "[\"Bob\",\"Berlin\"]\n" + + "[\"Alice\",\"Aachen\"]"); + // Non-default quote character: + query(func.args( + " string-join(\n" + + " (\"|name|,|city|\", \"|Bob|,|Berlin|\"),\n" + + " char('\\n')\n" + + " )", " { \"quote-character\": \"|\" }"), + "[\"name\",\"city\"]\n" + + "[\"Bob\",\"Berlin\"]"); + // Trimming whitespace in fields: + query(func.args( + " string-join(\n" + + " (\"name ,city \", \"Bob ,Berlin \", \"Alice ,Aachen \"),\n" + + " char('\\n')\n" + + " )", " { \"trim-whitespace\": true() }"), + "[\"name\",\"city\"]\n" + + "[\"Bob\",\"Berlin\"]\n" + + "[\"Alice\",\"Aachen\"]"); + } + + /** Test method. */ + @Test public void csvToXml() { + final Function func = CSV_TO_XML; + final String queryPrefix = + "let $crlf := char('\\r') || char('\\n')\n" + + "let $csv-string := `name,city{ $crlf }Bob,Berlin{ $crlf }Alice,Aachen{ $crlf }`\n" + + "let $csv-uneven-cols := concat(\n" + + " `date,name,city,amount,currency,original amount,note{ $crlf }`,\n" + + " `2023-07-19,Bob,Berlin,10.00,USD,13.99{ $crlf }`,\n" + + " `2023-07-20,Alice,Aachen,15.00{ $crlf }`,\n" + + " `2023-07-20,Charlie,Celle,15.00,GBP,11.99,cake,not a lie{ $crlf }`\n" + + ")\n" + + "return "; + final String resultTag = ""; + + // An empty CSV with default column extraction (false): + query(func.args(" ()"), ""); + query(func.args(""), resultTag + ""); + query(func.args(" char('\\n')"), resultTag + ""); + // An empty CSV with header extraction: + query(func.args("", " { 'header': true() }"), resultTag + ""); + // An empty CSV with explicit column names: + query(func.args("", " { \"header\": (\"name\", \"\", \"city\") }"), resultTag + "" + + "namecity"); + // With defaults for delimiters and quotes, recognizing headers: + query(queryPrefix + func.args(" $csv-string", " { 'header': true() }"), + resultTag + "namecityBobBerlinAliceAachen"); + // Filtering columns + query(queryPrefix + func.args(" $csv-uneven-cols", + " { \"header\": true(), \n" + + " \"select-columns\": (2, 1, 4)\n" + + " }"), + resultTag + "namedateamount" + + "Bob2023-07-19" + + "10.00Alice" + + "2023-07-2015.00" + + "Charlie2023-07-20" + + "15.00"); + // Ragged rows + query(queryPrefix + func.args(" $csv-uneven-cols", " { \"header\": true() }"), + resultTag + "datenamecity" + + "amountcurrencyoriginal amountnote" + + "2023-07-19BobBerlin10.00" + + "USD13.99" + + "2023-07-20Alice" + + "Aachen15.00" + + "2023-07-20CharlieCelle15.00GBP11.99cakenot a lie"); + // Trimming rows to constant width + query(queryPrefix + func.args(" $csv-uneven-cols", + " { \"header\": true(),\n" + + " \"trim-rows\": true()\n" + + " }"), + resultTag + "datenamecity" + + "amountcurrencyoriginal amountnote" + + "2023-07-19BobBerlin10.00" + + "USD13.99" + + "2023-07-20AliceAachen15.00" + + "2023-07-20CharlieCelle15.00" + + "GBP11.99" + + "cake"); + // Specifying a fixed number of columns + query(queryPrefix + func.args(" $csv-uneven-cols", + " { \"header\": true(),\n" + + " \"select-columns\": 1 to 6\n" + + " }"), + resultTag + "datenamecity" + + "amountcurrencyoriginal amount" + + "2023-07-19BobBerlin10.00USD13.992023-07-20AliceAachen15.00" + + "2023-07-20" + + "CharlieCelle15.00GBP11.99"); + } + /** Test method. */ @Test public void decodeFromUri() { final Function func = DECODE_FROM_URI; @@ -1737,6 +1895,164 @@ public final class FnModuleTest extends SandboxTest { query("let $n :=
  • return " + func.args(" ($n, $n)"), "
  • "); } + /** Test method. */ + @Test public void parseCsv() { + final Function func = PARSE_CSV; + final String display = + "let $display := fn($result) {\n" + + " (: tidy up the result for display (function items cannot be properly displayed) :) \n" + + " map:put($result, \"get\", \"(: function :)\")\n" + + "}\n"; + + // Default delimiters, no column headers: + query(display + + "let $input := string-join(\n" + + " (\"name,city\", \"Bob,Berlin\", \"Alice,Aachen\"),\n" + + " char('\\n')\n" + + ")\n" + + "let $result := " + func.args(" $input") + "\n" + + "return (\n" + + " $result => $display(),\n" + + " $result?get(1, 2),\n" + + " $result?get(2, 2)\n" + + ")", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"name\",\"city\"],[\"Bob\",\"Berlin\"]," + + "[\"Alice\",\"Aachen\"]),\"get\":\"(: function :)\"}\n" + + "city\n" + + "Berlin"); + // Default delimiters, column headers: + query(display + + "let $input := string-join(\n" + + " (\"name,city\", \"Bob,Berlin\", \"Alice,Aachen\"),\n" + + " char('\\n')\n" + + ")\n" + + "let $result := " + func.args(" $input", " { \"header\": true() }") + "\n" + + "return (\n" + + " $result => $display(),\n" + + " $result?get(1, \"name\"),\n" + + " $result?get(2, \"city\")\n" + + ")", + "{\"columns\":(\"name\",\"city\"),\"column-index\":{\"name\":1,\"city\":2},\"rows\":(" + + "[\"Bob\",\"Berlin\"],[\"Alice\",\"Aachen\"]),\"get\":\"(: function :)\"}\n" + + "Bob\n" + + "Aachen"); + // Custom delimiters, no column headers: + query(display + + "let $options := {\n" + + " \"row-delimiter\": \"\u00A7\", \n" + + " \"field-delimiter\": \";\", \n" + + " \"quote-character\": \"|\"\n" + + "}\n" + + "let $input := \"|name|;|city|\u00A7|Bob|;|Berlin|\u00A7|Alice|;|Aachen|\"\n" + + "let $result := " + func.args(" $input", " $options") + "\n" + + "return (\n" + + " $result => $display(),\n" + + " $result?get(3, 1)\n" + + ")", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"name\",\"city\"],[\"Bob\",\"Berlin\"]," + + "[\"Alice\",\"Aachen\"]),\"get\":\"(: function :)\"}\n" + + "Alice"); + // Supplied column names: + query(display + + "let $headers := (\"Person\", \"Location\")\n" + + "let $options := { \"header\": $headers, \"row-delimiter\": \";\" }\n" + + "let $input := \"Alice,Aachen;Bob,Berlin;\"\n" + + "let $parsed-csv := " + func.args(" $input", " $options") + "\n" + + "return (\n" + + " $parsed-csv => $display(),\n" + + " $parsed-csv?get(2, \"Location\")\n" + + ")", + "{\"columns\":(\"Person\",\"Location\"),\"column-index\":{\"Person\":1,\"Location\":2}," + + "\"rows\":([\"Alice\",\"Aachen\"],[\"Bob\",\"Berlin\"]),\"get\":\"(: function :)\"}\n" + + "Berlin"); + // Filtering columns, with ragged input and header: true() + query(display + + "let $input := string-join((\n" + + " \"date,name,city,amount,currency,original amount,note\",\n" + + " \"2023-07-19,Bob,Berlin,10.00,USD,13.99\",\n" + + " \"2023-07-20,Alice,Aachen,15.00\",\n" + + " \"2023-07-20,Charlie,Celle,15.00,GBP,11.99,cake,not a lie\"\n" + + "), char('\\n'))\n" + + "let $options := {\n" + + " \"header\": true(),\n" + + " \"select-columns\": (2, 1, 4)\n" + + "}\n" + + "let $result := " + func.args(" $input", " $options") + "\n" + + "return (\n" + + " $result => $display(),\n" + + " $result?get(2, \"amount\")\n" + + ")", + "{\"columns\":(\"name\",\"date\",\"amount\"),\"column-index\":{\"name\":1,\"date\":2," + + "\"amount\":3},\"rows\":([\"Bob\",\"2023-07-19\",\"10.00\"],[\"Alice\",\"2023-07-20\"," + + "\"15.00\"],[\"Charlie\",\"2023-07-20\",\"15.00\"]),\"get\":\"(: function :)\"}\n" + + "15.00"); + // Filtering columns, with supplied column map + query(display + + "let $input := string-join((\n" + + " \"2023-07-20,Alice,Aachen,15.00\",\n" + + " \"2023-07-19,Bob,Berlin,10.00,USD,13.99\",\n" + + " \"2023-07-20,Charlie,Celle,15.00,GBP,11.99,cake,not a lie\"\n" + + "), char('\\n'))\n" + + "let $options := { \n" + + " \"header\": ( \"Person\", \"\", \"Amount\" ),\n" + + " \"select-columns\": (2, 1, 4)\n" + + "}\n" + + "let $result := " + func.args(" $input", " $options") + "\n" + + "return (\n" + + " $result => $display(),\n" + + " $result?get(2, \"Person\"),\n" + + " $result?get(2, \"Amount\")\n" + + ")", + "{\"columns\":(\"Person\",\"\",\"Amount\"),\"column-index\":{\"Person\":1,\"Amount\":3}," + + "\"rows\":([\"Alice\",\"2023-07-20\",\"15.00\"],[\"Bob\",\"2023-07-19\",\"10.00\"]," + + "[\"Charlie\",\"2023-07-20\",\"15.00\"]),\"get\":\"(: function :)\"}\n" + + "Bob\n" + + "10.00"); + // Specifying the number of columns explicitly, with header: false() + query(display + + "let $input := string-join((\n" + + " \"date, name, amount, currency, original amount\",\n" + + " \"2023-07-19,Bob, 10.00, USD, 13.99\",\n" + + " \"2023-07-20,Alice, 15.00\",\n" + + " \"2023-07-20,Charlie, 15.00, GBP, 11.99, extra data\"\n" + + "), char('\\n'))\n" + + "let $options := {\n" + + " \"header\": false(), \n" + + " \"select-columns\": 1 to 5, \n" + + " \"trim-whitespace\" :true()\n" + + "}\n" + + "let $result := " + func.args(" $input", " $options") + "\n" + + "return (\n" + + " $result => $display(),\n" + + " $result?get(4, 3)\n" + + ")", + "{\"columns\":(),\"column-index\":{},\"rows\":([\"date\",\"name\",\"amount\",\"currency\"," + + "\"original amount\"],[\"2023-07-19\",\"Bob\",\"10.00\",\"USD\",\"13.99\"],[\"2023-07-20\"," + + "\"Alice\",\"15.00\",\"\",\"\"],[\"2023-07-20\",\"Charlie\",\"15.00\",\"GBP\",\"11.99\"])," + + "\"get\":\"(: function :)\"}\n" + + "15.00"); + // Specifying the number of columns with a number and header: true() + query(display + + "let $input := string-join((\n" + + " \"date,name,city,amount,currency,original amount,note\",\n" + + " \"2023-07-19,Bob,Berlin,10.00,USD,13.99\",\n" + + " \"2023-07-20,Alice,Aachen,15.00\",\n" + + " \"2023-07-20,Charlie,Celle,15.00,GBP,11.99,cake,not a lie\"\n" + + "), char('\\n'))\n" + + "let $options := { \"header\": true(), \"select-columns\": 1 to 6 }\n" + + "let $result := " + func.args(" $input", " $options") + "\n" + + "return (\n" + + " $result => $display(),\n" + + " $result?get(3, \"original amount\")\n" + + ")", + "{\"columns\":(\"date\",\"name\",\"city\",\"amount\",\"currency\",\"original amount\")," + + "\"column-index\":{\"date\":1,\"name\":2,\"city\":3,\"amount\":4,\"currency\":5," + + "\"original amount\":6},\"rows\":([\"2023-07-19\",\"Bob\",\"Berlin\",\"10.00\",\"USD\"," + + "\"13.99\"],[\"2023-07-20\",\"Alice\",\"Aachen\",\"15.00\",\"\",\"\"],[\"2023-07-20\"," + + "\"Charlie\",\"Celle\",\"15.00\",\"GBP\",\"11.99\"]),\"get\":\"(: function :)\"}\n" + + "11.99"); + } + /** Test method. */ @Test public void parseIetfDate() { final Function func = PARSE_IETF_DATE; From 51147344caa13262fd45ff02018a9d4e02cd8450 Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Fri, 3 Jan 2025 18:57:17 +0100 Subject: [PATCH 6/8] refactoring --- .../java/org/basex/build/csv/CsvOptions.java | 29 +++++++++---------- .../java/org/basex/build/csv/CsvParser.java | 2 +- .../org/basex/gui/dialog/DialogCsvParser.java | 2 +- .../org/basex/io/parse/csv/CsvConverter.java | 11 +++++++ .../org/basex/io/parse/csv/CsvParser.java | 13 +++++---- .../basex/query/func/fn/FnCsvToArrays.java | 1 - .../java/org/basex/util/http/Payload.java | 2 +- 7 files changed, 35 insertions(+), 25 deletions(-) diff --git a/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java b/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java index 1e8a8caae7..bcacd27c2c 100644 --- a/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java +++ b/basex-core/src/main/java/org/basex/build/csv/CsvOptions.java @@ -35,7 +35,7 @@ public class CsvOptions extends Options { public static final StringOption QUOTE_CHARACTER = new StringOption("quote", "\""); /** Option: trim whitespace. */ public static final BooleanOption TRIM_WHITESPACE = new BooleanOption("trim-whitespace", false); - /** Option: strict quoting. */ + /** Option: strict quoting (implies QUOTES). */ public static final BooleanOption STRICT_QUOTING = new BooleanOption("strict-quoting", false); /** Option: trim-rows. */ public static final BooleanOption TRIM_ROWS = new BooleanOption("trim-rows", false); @@ -111,11 +111,7 @@ public int separator() { for(final CsvSep s : CsvSep.values()) { if(sep.equals(s.toString())) return s.sep; } - if(sep.codePointCount(0, sep.length()) == 1) { - final int cp = sep.codePointAt(0); - if(XMLToken.valid(cp)) return cp; - } - return -1; + return validate(sep); } /** @@ -123,12 +119,7 @@ public int separator() { * @return separator */ public int rowDelimiter() { - final String rd = get(ROW_DELIMITER); - if(rd.codePointCount(0, rd.length()) == 1) { - final int cp = rd.codePointAt(0); - if(XMLToken.valid(cp)) return cp; - } - return -1; + return validate(get(ROW_DELIMITER)); } /** @@ -136,9 +127,17 @@ public int rowDelimiter() { * @return separator */ public int quoteCharacter() { - final String q = get(QUOTE_CHARACTER); - if(q.codePointCount(0, q.length()) == 1) { - final int cp = q.codePointAt(0); + return validate(get(QUOTE_CHARACTER)); + } + + /** + * Validates a single code point passed as a string. + * @param single single character string + * @return code point or {@code -1} + */ + private int validate(final String single) { + if(single.codePointCount(0, single.length()) == 1) { + final int cp = single.codePointAt(0); if(XMLToken.valid(cp)) return cp; } return -1; diff --git a/basex-core/src/main/java/org/basex/build/csv/CsvParser.java b/basex-core/src/main/java/org/basex/build/csv/CsvParser.java index fcd708469b..661f8a9836 100644 --- a/basex-core/src/main/java/org/basex/build/csv/CsvParser.java +++ b/basex-core/src/main/java/org/basex/build/csv/CsvParser.java @@ -37,7 +37,7 @@ public CsvParser(final IO source, final MainOptions options, final CsvParserOpti protected void parse() throws IOException { csv = pushJob(new CsvBuilder(copts, builder)); try { - csv.convert(source, null); + csv.convert(source); } catch(final QueryException ex) { throw new QueryIOException(ex); } finally { diff --git a/basex-core/src/main/java/org/basex/gui/dialog/DialogCsvParser.java b/basex-core/src/main/java/org/basex/gui/dialog/DialogCsvParser.java index c037d94962..6d3208702d 100644 --- a/basex-core/src/main/java/org/basex/gui/dialog/DialogCsvParser.java +++ b/basex-core/src/main/java/org/basex/gui/dialog/DialogCsvParser.java @@ -127,7 +127,7 @@ boolean action(final boolean active) { lax.setEnabled(head && copts.get(CsvOptions.FORMAT) == CsvFormat.DIRECT); skipEmpty.setEnabled(head); - final Item item = CsvConverter.get(copts).convert(new IOContent(EXAMPLE), null); + final Item item = CsvConverter.get(copts).convert(new IOContent(EXAMPLE)); example.setText(example(MainParser.CSV.name(), EXAMPLE, item)); } catch(final QueryException | IOException ex) { example.setText(error(ex)); diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvConverter.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvConverter.java index 43b287ca0a..558986de50 100644 --- a/basex-core/src/main/java/org/basex/io/parse/csv/CsvConverter.java +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvConverter.java @@ -59,6 +59,17 @@ protected CsvConverter(final CsvParserOptions copts) { skipEmpty = copts.get(CsvParserOptions.SKIP_EMPTY) && copts.get(CsvOptions.HEADER); } + /** + * Converts the specified input to an XQuery value. + * @param input input + * @return result + * @throws QueryException query exception + * @throws IOException I/O exception + */ + public final Item convert(final IO input) throws QueryException, IOException { + return convert(input, null); + } + /** * Converts the specified input to an XQuery value. * @param input input diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java index 428752ca97..d66467100d 100644 --- a/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java @@ -35,7 +35,7 @@ public final class CsvParser { private final boolean quotes; /** Trim whitespace. */ private final boolean trimWhitespace; - /** Trim whitespace. */ + /** Trim rows. */ private final boolean trimRows; /** Strict quoting. */ private final boolean strictQuoting; @@ -66,11 +66,11 @@ public CsvParser(final TextInput input, final CsvParserOptions opts, final CsvCo separator = opts.separator(); rowDelimiter = opts.rowDelimiter(); quoteCharacter = opts.quoteCharacter(); - quotes = opts.get(CsvOptions.QUOTES); + strictQuoting = opts.get(CsvOptions.STRICT_QUOTING); + quotes = strictQuoting || opts.get(CsvOptions.QUOTES); backslashes = opts.get(CsvOptions.BACKSLASHES); trimWhitespace = opts.get(CsvOptions.TRIM_WHITESPACE); trimRows = opts.get(CsvOptions.TRIM_ROWS); - strictQuoting = opts.get(CsvOptions.STRICT_QUOTING); selectColumns = opts.get(CsvOptions.SELECT_COLUMNS); for(final int sc : selectColumns) { if(sc < 1) throw QueryError.typeError(Int.get(sc), SeqType.POSITIVE_INTEGER_O, null); @@ -109,11 +109,12 @@ public void parse(final InputInfo ii) throws QueryException, IOException { } add(entry, ch); } else if(ch == quoteCharacter) { - if(quotes) { - if(strictQuoting && !entry.isEmpty()) throw QueryError.CSV_QUOTING_X.get(ii, - entry + new String(Character.toChars(quoteCharacter))); + if(quotes && entry.isEmpty()) { // parse quote quoted = true; + } else if (strictQuoting) { + throw QueryError.CSV_QUOTING_X.get(ii, + entry + new String(Character.toChars(quoteCharacter))); } else { ch = input.read(); if(ch != quoteCharacter || backslashes) add(entry, quoteCharacter); diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java index 7205c9e8dc..48115cef80 100644 --- a/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnCsvToArrays.java @@ -80,7 +80,6 @@ CsvParserOptions toCsvParserOptions() { parserOpts.set(CsvOptions.QUOTE_CHARACTER, get(QUOTE_CHARACTER)); parserOpts.set(CsvOptions.TRIM_WHITESPACE, get(TRIM_WHITESPACE)); parserOpts.set(CsvOptions.FORMAT, CsvFormat.XQUERY); - parserOpts.set(CsvOptions.QUOTES, true); parserOpts.set(CsvOptions.STRICT_QUOTING, true); return parserOpts; } diff --git a/basex-core/src/main/java/org/basex/util/http/Payload.java b/basex-core/src/main/java/org/basex/util/http/Payload.java index 3e50eacf67..10b5040a01 100644 --- a/basex-core/src/main/java/org/basex/util/http/Payload.java +++ b/basex-core/src/main/java/org/basex/util/http/Payload.java @@ -310,7 +310,7 @@ public static Value value(final byte[] body, final MediaType type, } else if(type.isCSV()) { final CsvParserOptions opts = new CsvParserOptions(options.get(MainOptions.CSVPARSER)); opts.assign(type); - return CsvConverter.get(opts).convert(io, null); + return CsvConverter.get(opts).convert(io); } else if(type.is(MediaType.TEXT_HTML)) { final HtmlOptions opts = new HtmlOptions(options.get(MainOptions.HTMLPARSER)); opts.assign(type); From 423447889742c93c8f20490044af030a5bcaac32 Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Tue, 7 Jan 2025 13:04:48 +0100 Subject: [PATCH 7/8] keep empty lines in XML result, too; add tests for changed behaviour with respect to empty lines --- .../io/parse/csv/CsvDirectConverter.java | 2 +- .../org/basex/query/func/CsvModuleTest.java | 36 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvDirectConverter.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvDirectConverter.java index 11139c2ca4..f466e3fabb 100644 --- a/basex-core/src/main/java/org/basex/io/parse/csv/CsvDirectConverter.java +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvDirectConverter.java @@ -69,6 +69,6 @@ protected FNode finish() { * Finishes a record. */ private void finishRecord() { - if(record != null && !record.isEmpty()) root.add(record); + if(record != null) root.add(record); } } diff --git a/basex-core/src/test/java/org/basex/query/func/CsvModuleTest.java b/basex-core/src/test/java/org/basex/query/func/CsvModuleTest.java index 906fa5719e..edb079d4ee 100644 --- a/basex-core/src/test/java/org/basex/query/func/CsvModuleTest.java +++ b/basex-core/src/test/java/org/basex/query/func/CsvModuleTest.java @@ -47,7 +47,23 @@ public final class CsvModuleTest extends SandboxTest { parse("X,Y\n1,", header, "1"); parse("X,Y\n1,", skipEmpty + ", " + header, "1"); parse("X,Y\n,1", skipEmpty + ", " + header, "1"); - parse("X,Y\n,", skipEmpty + ", " + header, ""); + // was: ""); + parse("X,Y\n,", skipEmpty + ", " + header, ""); + + // was: ""); + parse("\n", "", ""); + // was: ""); + parse("\n\n", "", ""); + // was: "X"); + parse("\n\nX", "", "X"); + // was: "...XY"); + parse("X\n\nY", "", "...XY"); + parse("X\n", "", "X"); + // was: "X"); + parse("X\n\n", "", "X"); + + parse(" ' \" X\"'", "'quotes': true()", " \" X\""); + parse(" '\"X \" '", "'quotes': true()", "X "); parseError("", "'x': 'y'"); parseError("", "'format': 'abc'"); @@ -60,6 +76,24 @@ public final class CsvModuleTest extends SandboxTest { parse("X\nY", "'header': false(), 'format': 'xquery'", "...[\"X\"],[\"Y\"]"); parse("X\nY", "'header': false(), 'format': 'xquery'", "...\"records\":([\"X\"],[\"Y\"])"); parse("X\nY", "'header': true(), 'format': 'xquery'", "...\"names\":[\"X\"]"); + + parse("", "'format': 'xquery'", "{\"records\":()}"); + // was: "{\"records\":()}"); + parse("\n", "'format': 'xquery'", "{\"records\":[]}"); + // was: "{\"records\":()}"); + parse("\n\n", "'format': 'xquery'", "{\"records\":([],[])}"); + // was: "{\"records\":[\"X\"]}"); + parse("\n\nX", "'format': 'xquery'", "{\"records\":([],[],[\"X\"])}"); + // was: "{\"records\":([\"X\"],[\"Y\"])}"); + parse("X\n\nY", "'format': 'xquery'", "{\"records\":([\"X\"],[],[\"Y\"])}"); + parse("X\n", "'format': 'xquery'", "{\"records\":[\"X\"]}"); + // was: "{\"records\":[\"X\"]}"); + parse("X\n\n", "'format': 'xquery'", "{\"records\":([\"X\"],[])}"); + + parse(" ' \"\"'", "'quotes': true(), 'format': 'xquery'", "{\"records\":[\" \"\"\"]}"); + parse(" ' \" X\"'", "'quotes': true(), 'format': 'xquery'", "{\"records\":[\" \"\" X\"\"\"]}"); + parse(" '\"\" '", "'quotes': true(), 'format': 'xquery'", "{\"records\":[\" \"]}"); + parse(" '\"X \" '", "'quotes': true(), 'format': 'xquery'", "{\"records\":[\"X \"]}"); } /** Test method. */ From 70167d460a42df708738556424423d5ccf80ae0b Mon Sep 17 00:00:00 2001 From: Gunther Rademacher Date: Tue, 7 Jan 2025 13:55:31 +0100 Subject: [PATCH 8/8] minor changes --- .../src/main/java/org/basex/io/parse/csv/CsvParser.java | 6 +++--- .../src/main/java/org/basex/query/func/fn/FnParseCsv.java | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java b/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java index d66467100d..b2c06794c7 100644 --- a/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java +++ b/basex-core/src/main/java/org/basex/io/parse/csv/CsvParser.java @@ -33,11 +33,11 @@ public final class CsvParser { private final int quoteCharacter; /** Parse quotes. */ private final boolean quotes; - /** Trim whitespace. */ + /** Trim whitespace (see {@link CsvOptions#TRIM_WHITESPACE}). */ private final boolean trimWhitespace; - /** Trim rows. */ + /** Trim rows (see {@link CsvOptions#TRIM_ROWS}). */ private final boolean trimRows; - /** Strict quoting. */ + /** Disallow field content outside of quotes. */ private final boolean strictQuoting; /** Select columns. */ private final int[] selectColumns; diff --git a/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java b/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java index 90e98cb4f4..2a803722ce 100644 --- a/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java +++ b/basex-core/src/main/java/org/basex/query/func/fn/FnParseCsv.java @@ -66,8 +66,8 @@ public Item item(final QueryContext qc, final InputInfo ii) throws QueryExceptio final Value rows = map.get(CsvXQueryConverter.RECORDS); final MapBuilder result = new MapBuilder(); - result.put(Str.get("columns"), columns); - result.put(Str.get("column-index"), columnIndex); + result.put("columns", columns); + result.put("column-index", columnIndex); result.put("rows", rows); result.put("get", Get.funcItem(rows, columnIndex, qc, ii)); return result.map();