diff --git a/src/edu/pitt/cs/nih/backend/feedback/TextFileFeedbackManager_LibSVM_WordTree.java b/src/edu/pitt/cs/nih/backend/feedback/TextFileFeedbackManager_LibSVM_WordTree.java index 1e072f5..399e1bd 100644 --- a/src/edu/pitt/cs/nih/backend/feedback/TextFileFeedbackManager_LibSVM_WordTree.java +++ b/src/edu/pitt/cs/nih/backend/feedback/TextFileFeedbackManager_LibSVM_WordTree.java @@ -834,6 +834,7 @@ protected String getStartEndPosition(String docID, Map spanMap) t String fn_pathology = Storage_Controller.getPathologyReportFn(); String docText; Pattern pattern = getSearchPatternFromSpanMap(spanMap); + System.out.println(pattern); Matcher m; // search text in colonoscopy text, remove header footer // docText = Preprocess.separateReportHeaderFooter( @@ -939,6 +940,8 @@ public String wordTreeSkippedNGramPatternString(Map spanMap) String[] matchedTokenList = TextUtil.escapeRegex(spanMap.get("matched")).split(" "); + System.out.println("Received span map: " + spanMap); + StringBuilder sb = new StringBuilder(); // matchedTokenList.length >= selectedTokenList.length int skippedN = 0; @@ -978,7 +981,7 @@ public String wordTreeSkippedNGramPatternString(Map spanMap) patternStr = sb.toString().trim().replaceAll(whiteSpaceBeforePunc, "\\\\s{0,1}"); // in case the first skipped n-gram is a punctuation // there would be no white space before the n-gram - patternStr = patternStr.replaceAll(" (?=(\\(\\\\S\\+))", "\\\\s{0,1}"); + patternStr = patternStr.replaceAll(" (?=(\\(\\\\S\\+))", "\\\\s*"); // // quote the string // patternStr = TextUtil.escapeRegex(patternStr); // reverse 's