Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support full regular expression on PatterMatcher.match() #4492

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2006-2021 the original author or authors.
* Copyright 2006-2023 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,13 +19,15 @@
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import org.springframework.util.Assert;

/**
* @author Dave Syer
* @author Dan Garrette
* @author Marten Deinum
* @author Injae Kim
*/
public class PatternMatcher<S> {

Expand Down Expand Up @@ -180,6 +182,19 @@ public static boolean match(String pattern, String str) {
return true;
}

/**
* Tests whether or not a string matches against a regular expression.
* @param regex regular expression to match against. Must not be {@code null}.
* @param str string which must be matched against the regular expression. Must not be {@code null}.
* @return {@code true} if the string matches against the regular expression, or {@code false} otherwise.
*/
public static boolean matchRegex(String regex, String str) {
Assert.notNull(regex, "Regex must not be null");
Assert.notNull(str, "Str must not be null");

return Pattern.matches(regex, str);
}

/**
* <p>
* This method takes a String key and a map from Strings to values of any type. During
Expand All @@ -202,12 +217,23 @@ public S match(String line) {
Assert.notNull(line, "A non-null key must be provided to match against.");

for (String key : sorted) {
if (PatternMatcher.match(key, line)) {
if (match(key, line)) {
value = map.get(key);
break;
}
}

if (value == null) {
for (String key : sorted) {
try {
if (matchRegex(key, line)) {
value = map.get(key);
break;
}
} catch (Throwable ignored) {}
}
}
Comment on lines +226 to +235
Copy link
Contributor Author

@injae-kim injae-kim Nov 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can replace match() to matchRegex(), but I think it makes breaking change.

  • e.g. "*" means matching all lines on match(), but invalid regex format and throws PatternSyntaxException on matchRegex()
  • "abcde" matches "abc*" on match(), but not matches on matchRegex() ("abc*" matches only "abccc.." on regex)

(also PatternMatcherTests failed if we replace match() to matchRegex())

To avoid this breaking change, I intentionally matchRegex() only when there's nothing matched by match()~!


if (value == null) {
throw new IllegalStateException("Could not find a matching pattern for key=[" + line + "]");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
* @author Dan Garrette
* @author Dave Syer
* @author Mahmoud Ben Hassine
* @author Injae Kim
* @since 2.0
*/
class PatternMatchingCompositeLineMapperTests {
Expand All @@ -51,6 +52,7 @@ void testKeyFound() throws Exception {
Map<String, LineTokenizer> tokenizers = new HashMap<>();
tokenizers.put("foo*", line -> new DefaultFieldSet(new String[] { "a", "b" }));
tokenizers.put("bar*", line -> new DefaultFieldSet(new String[] { "c", "d" }));
tokenizers.put("regex.*", line -> new DefaultFieldSet(new String[] { "e", "f" }));
mapper.setTokenizers(tokenizers);

Map<String, FieldSetMapper<Name>> fieldSetMappers = new HashMap<>();
Expand All @@ -62,15 +64,35 @@ void testKeyFound() throws Exception {
assertEquals(new Name("d", "c", 0), name);
}

@Test
void testKeyFoundByRegex() throws Exception {
Map<String, LineTokenizer> tokenizers = new HashMap<>();
tokenizers.put("foo*", line -> new DefaultFieldSet(new String[] { "a", "b" }));
tokenizers.put("bar*", line -> new DefaultFieldSet(new String[] { "c", "d" }));
tokenizers.put("regex.*", line -> new DefaultFieldSet(new String[] { "e", "f" }));
mapper.setTokenizers(tokenizers);

Map<String, FieldSetMapper<Name>> fieldSetMappers = new HashMap<>();
fieldSetMappers.put("foo*", fs -> new Name(fs.readString(0), fs.readString(1), 0));
fieldSetMappers.put("bar*", fs -> new Name(fs.readString(1), fs.readString(0), 0));
fieldSetMappers.put("regex.*", fs -> new Name(fs.readString(0), fs.readString(1), 0));
mapper.setFieldSetMappers(fieldSetMappers);

Name name = mapper.mapLine("regex-ABC_12345", 1);
assertEquals(new Name("e", "f", 0), name);
}

@Test
void testMapperKeyNotFound() {
Map<String, LineTokenizer> tokenizers = new HashMap<>();
tokenizers.put("foo*", line -> new DefaultFieldSet(new String[] { "a", "b" }));
tokenizers.put("bar*", line -> new DefaultFieldSet(new String[] { "c", "d" }));
tokenizers.put("regex.*", line -> new DefaultFieldSet(new String[] { "e", "f" }));
mapper.setTokenizers(tokenizers);

Map<String, FieldSetMapper<Name>> fieldSetMappers = new HashMap<>();
fieldSetMappers.put("foo*", fs -> new Name(fs.readString(0), fs.readString(1), 0));
fieldSetMappers.put("regex.*", fs -> new Name(fs.readString(0), fs.readString(1), 0));
mapper.setFieldSetMappers(fieldSetMappers);

assertThrows(IllegalStateException.class, () -> mapper.mapLine("bar", 1));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
* @author Ben Hale
* @author Dan Garrette
* @author Dave Syer
* @author Injae Kim
*/
class PatternMatchingCompositeLineTokenizerTests {

Expand All @@ -45,6 +46,7 @@ void testEmptyKeyMatchesAnyLine() throws Exception {
Map<String, LineTokenizer> map = new HashMap<>();
map.put("*", new DelimitedLineTokenizer());
map.put("foo", line -> null);
map.put("regex.*", line -> null);
tokenizer.setTokenizers(map);
tokenizer.afterPropertiesSet();
FieldSet fields = tokenizer.tokenize("abc");
Expand All @@ -53,23 +55,43 @@ void testEmptyKeyMatchesAnyLine() throws Exception {

@Test
void testEmptyKeyDoesNotMatchWhenAlternativeAvailable() throws Exception {

Map<String, LineTokenizer> map = new LinkedHashMap<>();
map.put("*", line -> null);
map.put("foo*", new DelimitedLineTokenizer());
map.put("regex.*", line -> null);
tokenizer.setTokenizers(map);
tokenizer.afterPropertiesSet();
FieldSet fields = tokenizer.tokenize("foo,bar");
assertEquals("bar", fields.readString(1));
}

@Test
void testMatchRegex() throws Exception {
Map<String, LineTokenizer> map = new HashMap<>();
map.put("foo", line -> null);
map.put("regex.*", new DelimitedLineTokenizer());
tokenizer.setTokenizers(map);
tokenizer.afterPropertiesSet();
FieldSet fields = tokenizer.tokenize("regex-ABC_12345,REGEX");
assertEquals(2, fields.getFieldCount());
assertEquals("regex-ABC_12345", fields.readString(0));
assertEquals("REGEX", fields.readString(1));
}

@Test
void testNoMatch() throws Exception {
tokenizer.setTokenizers(Collections.singletonMap("foo", (LineTokenizer) new DelimitedLineTokenizer()));
tokenizer.afterPropertiesSet();
assertThrows(IllegalStateException.class, () -> tokenizer.tokenize("nomatch"));
}

@Test
void testNoMatchRegex() throws Exception {
tokenizer.setTokenizers(Collections.singletonMap("foo.*", (LineTokenizer) new DelimitedLineTokenizer()));
tokenizer.afterPropertiesSet();
assertThrows(IllegalStateException.class, () -> tokenizer.tokenize("nomatch"));
}

@Test
void testMatchWithPrefix() throws Exception {
tokenizer.setTokenizers(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2006-2022 the original author or authors.
* Copyright 2006-2023 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,11 +22,13 @@

import java.util.HashMap;
import java.util.Map;
import java.util.regex.PatternSyntaxException;

import org.junit.jupiter.api.Test;

/**
* @author Dan Garrette
* @author Injae Kim
* @since 2.0
*/
class PatternMatcherTests {
Expand All @@ -37,6 +39,7 @@ class PatternMatcherTests {
map.put("an*", 3);
map.put("a*", 2);
map.put("big*", 4);
map.put("bcd.*", 5);
}

private static final Map<String, Integer> defaultMap = new HashMap<>();
Expand All @@ -49,6 +52,15 @@ class PatternMatcherTests {
defaultMap.put("*", 1);
}

private static final Map<String, Integer> regexMap = new HashMap<>();

static {
regexMap.put("abc.*", 1);
regexMap.put("a...e", 2);
regexMap.put("123.[0-9][0-9]\\d", 3);
regexMap.put("*............", 100); // invalid regex format
}

@Test
void testMatchNoWildcardYes() {
assertTrue(PatternMatcher.match("abc", "abc"));
Expand Down Expand Up @@ -104,6 +116,29 @@ void testMatchStarNo() {
assertFalse(PatternMatcher.match("a*c", "abdeg"));
}

@Test
void testMatchRegex() {
assertTrue(PatternMatcher.matchRegex("abc.*", "abcde"));
}

@Test
void testMatchRegex_notMatched() {
assertFalse(PatternMatcher.matchRegex("abc.*", "cdefg"));
assertFalse(PatternMatcher.matchRegex("abc.", "abcde"));
}

@Test
void testMatchRegex_thrown_invalidRegexFormat() {
assertThrows(PatternSyntaxException.class, () -> PatternMatcher.matchRegex("*..", "abc"));
}

@Test
void testMatchRegex_thrown_notNullParam() {
assertThrows(IllegalArgumentException.class, () -> PatternMatcher.matchRegex("regex", null));
assertThrows(IllegalArgumentException.class, () -> PatternMatcher.matchRegex(null, "str"));
}


@Test
void testMatchPrefixSubsumed() {
assertEquals(2, new PatternMatcher<>(map).match("apple").intValue());
Expand All @@ -119,6 +154,11 @@ void testMatchPrefixUnrelated() {
assertEquals(4, new PatternMatcher<>(map).match("biggest").intValue());
}

@Test
void testMatchByRegex() {
assertEquals(5, new PatternMatcher<>(map).match("bcdef12345").intValue());
}

@Test
void testMatchPrefixNoMatch() {
PatternMatcher<Integer> matcher = new PatternMatcher<>(map);
Expand All @@ -140,4 +180,24 @@ void testMatchPrefixDefaultValueNoMatch() {
assertEquals(1, new PatternMatcher<>(defaultMap).match("bat").intValue());
}

@Test
void testMatchRegexPrefix() {
assertEquals(1, new PatternMatcher<>(regexMap).match("abcdefg").intValue());
}

@Test
void testMatchRegexWildCards() {
assertEquals(2, new PatternMatcher<>(regexMap).match("a12De").intValue());
}

@Test
void testMatchRegexDigits() {
assertEquals(3, new PatternMatcher<>(regexMap).match("123-789").intValue());
}

@Test
void testMatchRegexNotMatched() {
assertThrows(IllegalStateException.class, () -> new PatternMatcher<>(regexMap).match("Hello world!"));
}

}