Skip to content

Commit

Permalink
Support short-circuiting in tsv-filter (retain command argument order…
Browse files Browse the repository at this point in the history
… via getoptInorder). (#8)

Short-circuiting is added by using getopt_inorder (in common) to process and assemble filter operators in the order on the command line. For example, if a numeric field might have a value like 'none', there was no way to run a numeric test on it without triggering a numeric conversion error when processing 'none'. Now, a test for 'none' can precede the numeric test.
  • Loading branch information
jondegenhardt authored Dec 12, 2016
1 parent 9e38a50 commit 979d02a
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 17 deletions.
107 changes: 94 additions & 13 deletions common/src/getopt_inorder.d
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ options in ways where order specified by the user is taken into account.
More details here: https://issues.dlang.org/show_bug.cgi?id=16539
This should only be used when retaining order is important. Though minimized, there are
cases that don't work as expected, the most important involving option arguments starting
with a dash. See the getoptInorder function comments for specifics.
Copyright (c) 2016, eBay Software Foundation
Initially written by Jon Degenhardt
Expand Down Expand Up @@ -54,47 +58,104 @@ private void checkForUnsupportedConfigOptions(T...)(T opts)
}
}

/* hasStopOnFirstNotOption walks the config list returns true if an one of the
/* hasStopOnFirstNotOption walks the config list returns true if one of the
* options in is std.getopt.config.stopOnFirstNonOption.
*/
private bool hasStopOnFirstNonOption(T...)(T opts)
{
bool hasStopOn = false;
static if (opts.length > 0)
{
static if (is(typeof(opts[0]) : std.getopt.config))
{
hasStopOn |= (opts[0] == std.getopt.config.stopOnFirstNonOption);
if (opts[0] == std.getopt.config.stopOnFirstNonOption) return true;
}

checkForUnsupportedConfigOptions(opts[1..$]);
return hasStopOnFirstNonOption(opts[1..$]);
}
return hasStopOn;
return false;
}

unittest
{
int[] vals;

assert(!hasStopOnFirstNonOption(
"a|aa", "aaa VAL", &vals,
"b|bb", "bbb VAL", &vals,
"c|cc", "ccc VAL", &vals,
));

assert(hasStopOnFirstNonOption(
std.getopt.config.stopOnFirstNonOption,
"a|aa", "aaa VAL", &vals,
"b|bb", "bbb VAL", &vals,
"c|cc", "ccc VAL", &vals,
));

assert(hasStopOnFirstNonOption(
"a|aa", "aaa VAL", &vals,
std.getopt.config.stopOnFirstNonOption,
"b|bb", "bbb VAL", &vals,
"c|cc", "ccc VAL", &vals,
));

assert(hasStopOnFirstNonOption(
"a|aa", "aaa VAL", &vals,
"b|bb", "bbb VAL", &vals,
std.getopt.config.stopOnFirstNonOption,
"c|cc", "ccc VAL", &vals,
));

assert(hasStopOnFirstNonOption(
"a|aa", "aaa VAL", &vals,
"b|bb", "bbb VAL", &vals,
"c|cc", "ccc VAL", &vals,
std.getopt.config.stopOnFirstNonOption,
));
}

/* getoptInorder is a cover to std.getopt that processes command line options in the
* order on the command.
*
* This is intended for command line argument processing where the order of arguments
* on the command line is important. The standard library std.getopt routine processes
* options in the order listed in call to getopt.
* options in the order listed in call to getopt. Behavioral changes involve order of
* callback processing and array filling.
*
* Processing in command line order is a change when filling arrays or using callback
* functions. The std.getopt.config.required option is not supported, otherwise this
* routine works the same as getopt.
* Other changes from std.getopt:
* - The std.getopt.config.required option is not supported.
* - Single digits cannot be used as short options. e.g. '-1' cannot be an option.
* - Non-numeric option arguments starting with a dash are not interpreted correctly,
* unless it looks like a negative number or is a single dash. Some examples,
* assuming ("--val") takes one argument:
* ["--val", "-9"] - Okay, "-9" is arg
* ["--val", "-"] - Okay, "-" is arg
* ["--val", "-a"] - Not okay, "-a" is treated as separate option.
*/
GetoptResult getoptInorder(T...)(ref string[] args, T opts)
{
import std.algorithm : min, remove;
import std.typecons : tuple;
debug import std.stdio;

debug import std.stdio;
debug writeln("\n=========================\n");
debug writeln("[getoptInorder] args: ", args, " opts: ", opts);

checkForUnsupportedConfigOptions(opts);
bool configHasStopOnFirstNonOption = hasStopOnFirstNonOption(opts);


bool isOption(string arg)
{
import std.string : isNumeric;
import std.ascii : isDigit;

return
(arg == std.getopt.endOfOptions) ||
(arg.length >= 2 &&
arg[0] == std.getopt.optionChar &&
!(arg[1].isDigit && arg.isNumeric));
}

/* Walk input args, passing one command option at a time to getopt.
* Example - Assume the args array is:
*
Expand All @@ -121,7 +182,7 @@ GetoptResult getoptInorder(T...)(ref string[] args, T opts)
bool helpWanted = false; // Need to track if help is ever requested.
size_t argStart = 1; // Start at 1, index zero is program name.
bool isLastCall = false;

while (!isLastCall)
{
/* This is the last call to getopt if:
Expand All @@ -136,7 +197,7 @@ GetoptResult getoptInorder(T...)(ref string[] args, T opts)
/* Find the next option. */
for (size_t i = argStart + 1; i < args.length; i++)
{
if (args[i].length > 0 && args[i][0] == std.getopt.optionChar)
if (isOption(args[i]))
{
argEnd = i;
break;
Expand Down Expand Up @@ -235,6 +296,26 @@ unittest // Issue 16539
assert(cmdvals == ["1", "2", "3", "4", "5", "6", "7", "8", "9"]);
}

unittest // Dashes
{
auto args = ["program", "-m", "-5", "-n", "-50", "-c", "-"];

int m;
int n;
char c;

getoptInorder(
args,
"m|mm", "integer", &m,
"n|nn", "integer", &n,
"c|cc", "character", &c,
);

assert(m == -5);
assert(n == -50);
assert(c == '-');
}


/* NOTE: The following unit tests have been adapted from unit tests in std.getopt.d
* See https://github.com/dlang/phobos/blob/master/std/getopt.d and
Expand Down
3 changes: 2 additions & 1 deletion tsv-filter/src/tsv-filter.d
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ struct TsvFilterOptions {
*/
auto processArgs (ref string[] cmdArgs) {
import std.getopt;
import getopt_inorder;

/* Command option handlers - One handler for each option. These conform to the
* getopt required handler signature, and separate knowledge the specific command
Expand Down Expand Up @@ -585,7 +586,7 @@ struct TsvFilterOptions {

try {
arraySep = ","; // Use comma to separate values in command line options
auto r = getopt(
auto r = getoptInorder(
cmdArgs,
"help-brief", " Print brief help.", &helpBrief,
"header", " Treat the first line of each file as a header.", &hasHeader,
Expand Down
24 changes: 24 additions & 0 deletions tsv-filter/tests/gold/basic_tests_1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,30 @@ last line



====Short circuit tests===

====[tsv-filter --header --not-blank 1 --str-ne 1:none --eq 1:100 input_num_or_empty.tsv]====
f1 f2 f3
100 21 31
100 24 33

====[tsv-filter --header --or --blank 1 --str-eq 1:none --eq 1:100 input_num_or_empty.tsv]====
f1 f2 f3
100 21 31
22 32
23 33
100 24 33
none 25 34

====[tsv-filter --header --invert --not-blank 1 --str-ne 1:none --eq 1:100 input_num_or_empty.tsv]====
f1 f2 f3
22 32
23 33
none 25 34

====[tsv-filter --header --invert --or --blank 1 --str-eq 1:none --eq 1:100 input_num_or_empty.tsv]====
f1 f2 f3

====String tests===

====[tsv-filter --header --str-eq 3:a input1.tsv]====
Expand Down
6 changes: 3 additions & 3 deletions tsv-filter/tests/gold/error_tests_1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Error: Cannot open file `nosuchfile.tsv' in mode `rb' (No such file or directory
Error processing command line arguments: Invalid option: '--gt 0:10'. Zero is not a valid field index.

====[tsv-filter --header --lt -1:10 input1.tsv]====
Error processing command line arguments: Invalid numeric values in option: '--lt -1:10'. Expected: '--lt <field>:<val>' where <field> and <val> are numbers.
Error processing command line arguments: Missing value for argument --lt.

====[tsv-filter --header --ne abc:15 input1.tsv]====
Error processing command line arguments: Invalid numeric values in option: '--ne abc:15'. Expected: '--ne <field>:<val>' where <field> and <val> are numbers.
Expand All @@ -27,7 +27,7 @@ Error processing command line arguments: Invalid value in option: '--empty 23g'.
Error processing command line arguments: Invalid option: '--str-gt 0:abc'. Zero is not a valid field index.

====[tsv-filter --header --str-lt -1:ABC input1.tsv]====
Error processing command line arguments: Invalid values in option: '--str-lt -1:ABC'. Expected: '--str-lt <field>:<val>' where <field> is a number and <val> a string.
Error processing command line arguments: Missing value for argument --str-lt.

====[tsv-filter --header --str-ne abc:a22 input1.tsv]====
Error processing command line arguments: Invalid values in option: '--str-ne abc:a22'. Expected: '--str-ne <field>:<val>' where <field> is a number and <val> a string.
Expand All @@ -45,7 +45,7 @@ Error processing command line arguments: Invalid values in option: '--iregex a:^
Error processing command line arguments: Invalid option: '--ff-gt 0:1'. Zero is not a valid field index.

====[tsv-filter --header --ff-lt -1:2 input1.tsv]====
Error processing command line arguments: Invalid values in option: '--ff-lt -1:2'. Expected: '--ff-lt <field1>:<field2>' where fields are 1-upped integers.
Error processing command line arguments: Missing value for argument --ff-lt.

====[tsv-filter --header --ff-ne abc:3 input1.tsv]====
Error processing command line arguments: Invalid values in option: '--ff-ne abc:3'. Expected: '--ff-ne <field1>:<field2>' where fields are 1-upped integers.
Expand Down
6 changes: 6 additions & 0 deletions tsv-filter/tests/input_num_or_empty.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
f1 f2 f3
100 21 31
22 32
23 33
100 24 33
none 25 34
8 changes: 8 additions & 0 deletions tsv-filter/tests/tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ runtest ${prog} "--not-empty 1 input_onefield.txt" ${basic_tests_1}
runtest ${prog} "--blank 1 input_onefield.txt" ${basic_tests_1}
runtest ${prog} "--empty 1 input_onefield.txt" ${basic_tests_1}

# Short circuit by order. Ensure not blank or "none" before numeric test.
echo "" >> ${basic_tests_1}; echo "====Short circuit tests===" >> ${basic_tests_1}
runtest ${prog} "--header --not-blank 1 --str-ne 1:none --eq 1:100 input_num_or_empty.tsv" ${basic_tests_1}
runtest ${prog} "--header --or --blank 1 --str-eq 1:none --eq 1:100 input_num_or_empty.tsv" ${basic_tests_1}
runtest ${prog} "--header --invert --not-blank 1 --str-ne 1:none --eq 1:100 input_num_or_empty.tsv" ${basic_tests_1}
runtest ${prog} "--header --invert --or --blank 1 --str-eq 1:none --eq 1:100 input_num_or_empty.tsv" ${basic_tests_1}


# String field tests
echo "" >> ${basic_tests_1}; echo "====String tests===" >> ${basic_tests_1}

Expand Down

0 comments on commit 979d02a

Please sign in to comment.