From 81bbdbbbcf3eeba40b8a2901d274c974d5eb5fcf Mon Sep 17 00:00:00 2001 From: Dusan Varis Date: Wed, 2 Jan 2019 15:41:00 +0100 Subject: [PATCH 1/2] fix issue #782 + add test_chrf --- neuralmonkey/evaluators/chrf.py | 43 +++++++++----------- neuralmonkey/tests/test_chrf.py | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 24 deletions(-) create mode 100644 neuralmonkey/tests/test_chrf.py diff --git a/neuralmonkey/evaluators/chrf.py b/neuralmonkey/evaluators/chrf.py index 904d60116..256da65c3 100644 --- a/neuralmonkey/evaluators/chrf.py +++ b/neuralmonkey/evaluators/chrf.py @@ -1,5 +1,6 @@ from typing import List, Dict from typeguard import check_argument_types +import numpy as np from neuralmonkey.evaluators.evaluator import Evaluator # pylint: disable=invalid-name @@ -25,7 +26,6 @@ def __init__(self, super().__init__(name) self.n = n - self.max_ord = n self.beta_2 = beta**2 self.ignored = [] # type: List[str] @@ -58,44 +58,39 @@ def score_instance(self, / ((self.beta_2 * precision) + recall)) def chr_r(self, hyp_ngrams: NGramDicts, ref_ngrams: NGramDicts) -> float: - recall = 0.0 + count_all = np.zeros(self.n) + count_matched = np.zeros(self.n) for m in range(1, self.n + 1): - count_all = 0 - count_matched = 0 for ngr in ref_ngrams[m - 1]: ref_count = ref_ngrams[m - 1][ngr] - count_all += ref_count + count_all[m - 1] += ref_count if ngr in hyp_ngrams[m - 1]: - count_matched += min(ref_count, hyp_ngrams[m - 1][ngr]) - # Catch division by zero - if count_all != 0.0: - recall += count_matched / count_all - return recall / float(self.max_ord) + count_matched[m - 1] += min( + ref_count, hyp_ngrams[m - 1][ngr]) + return np.mean(np.divide( + count_matched, count_all, out=np.ones_like(count_all), + where=(count_all!=0))) def chr_p(self, hyp_ngrams: NGramDicts, ref_ngrams: NGramDicts) -> float: - precision = 0.0 + count_all = np.zeros(self.n) + count_matched = np.zeros(self.n) for m in range(1, self.n + 1): - count_all = 0 - count_matched = 0 for ngr in hyp_ngrams[m - 1]: hyp_count = hyp_ngrams[m - 1][ngr] - count_all += hyp_count + count_all[m - 1] += hyp_count if ngr in ref_ngrams[m - 1]: - count_matched += min(hyp_count, ref_ngrams[m - 1][ngr]) - # Catch division by zero - if count_all != 0.0: - precision += count_matched / count_all - - return precision / float(self.max_ord) + count_matched[m - 1] += min( + hyp_count, ref_ngrams[m - 1][ngr]) + return np.mean(np.divide( + count_matched, count_all, out=np.ones_like(count_all), + where=(count_all!=0))) def _get_ngrams(self, tokens: List[str], n: int) -> NGramDicts: - if len(tokens) < n: - self.max_ord = len(tokens) - ngr_dicts = [] for m in range(1, n + 1): ngr_dict = {} # type: Dict[str, int] - for i in range(m, len(tokens)): + # if m > len(tokens), return an empty dict + for i in range(m, len(tokens) + 1): ngr = "".join(tokens[i - m:i]) ngr_dict[ngr] = ngr_dict.setdefault(ngr, 0) + 1 ngr_dicts.append(ngr_dict) diff --git a/neuralmonkey/tests/test_chrf.py b/neuralmonkey/tests/test_chrf.py new file mode 100644 index 000000000..1eac7ba69 --- /dev/null +++ b/neuralmonkey/tests/test_chrf.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3.5 + + +import unittest + +from neuralmonkey.evaluators.chrf import ChrFEvaluator + + +CORPUS_DECODED = [ + "colorful thoughts furiously sleep", + "little piglet slept all night", + "working working working working working be be be be be be be", + "ich bin walrus", + "walrus for präsident" +] + +CORPUS_REFERENCE = [ + "the colorless ideas slept furiously", + "pooh slept all night", + "working class hero is something to be", + "I am the working class walrus", + "walrus for president" +] + +TOKENS = ["a", "b", "a"] +NGRAMS = [ + {"a": 2, "b" : 1}, + {"ab": 1, "ba" : 1}, + {"aba" : 1}, + {}] + + +DECODED = [d.split() for d in CORPUS_DECODED] +REFERENCE = [r.split() for r in CORPUS_REFERENCE] + +FUNC = ChrFEvaluator() +FUNC_P = FUNC.chr_p +FUNC_R = FUNC.chr_r +FUNC_NGRAMS = FUNC._get_ngrams + +class TestChrF(unittest.TestCase): + + def test_empty_decoded(self): + # Recall == 0.0 + self.assertEqual(FUNC([[] for _ in DECODED], REFERENCE), 0.0) + + def test_empty_reference(self): + # Precision == 0.0 + self.assertEqual(FUNC([[] for _ in REFERENCE], DECODED), 0.0) + + def test_identical(self): + self.assertEqual(FUNC(REFERENCE, REFERENCE), 1.0) + + def test_empty_sentence(self): + ref_empty = REFERENCE + [[]] + out_empty = DECODED + [["something"]] + score = FUNC(out_empty, ref_empty) + self.assertAlmostEqual(score, 0.38, delta=10) + + def test_chrf(self): + score = FUNC(DECODED, REFERENCE) + self.assertAlmostEqual(score, 0.46, delta=10) + + def test_get_ngrams(self): + tokens = ["a", "b", "a"] + ngrams_out = FUNC_NGRAMS(tokens, 4) + self.assertEqual(len(ngrams_out), 4) + for i, _ in enumerate(NGRAMS): + self.assertDictEqual(ngrams_out[i], NGRAMS[i]) + +if __name__ == "__main__": + unittest.main() From 98f3b3b55f91a4636ab40e83f99b6ab6649584a5 Mon Sep 17 00:00:00 2001 From: Dusan Varis Date: Wed, 2 Jan 2019 16:27:32 +0100 Subject: [PATCH 2/2] addressing reviews + fixing lint errors --- neuralmonkey/evaluators/chrf.py | 30 ++++++++++++++--------------- neuralmonkey/tests/test_chrf.py | 34 ++++++++------------------------- 2 files changed, 23 insertions(+), 41 deletions(-) diff --git a/neuralmonkey/evaluators/chrf.py b/neuralmonkey/evaluators/chrf.py index 256da65c3..5d99cce30 100644 --- a/neuralmonkey/evaluators/chrf.py +++ b/neuralmonkey/evaluators/chrf.py @@ -37,11 +37,11 @@ def score_instance(self, reference: List[str]) -> float: hyp_joined = " ".join(hypothesis) hyp_chars = [x for x in list(hyp_joined) if x not in self.ignored] - hyp_ngrams = self._get_ngrams(hyp_chars, self.n) + hyp_ngrams = _get_ngrams(hyp_chars, self.n) ref_joined = " ".join(reference) ref_chars = [x for x in list(ref_joined) if x not in self.ignored] - ref_ngrams = self._get_ngrams(ref_chars, self.n) + ref_ngrams = _get_ngrams(ref_chars, self.n) if not hyp_chars or not ref_chars: if "".join(hyp_chars) == "".join(ref_chars): @@ -69,7 +69,7 @@ def chr_r(self, hyp_ngrams: NGramDicts, ref_ngrams: NGramDicts) -> float: ref_count, hyp_ngrams[m - 1][ngr]) return np.mean(np.divide( count_matched, count_all, out=np.ones_like(count_all), - where=(count_all!=0))) + where=(count_all != 0))) def chr_p(self, hyp_ngrams: NGramDicts, ref_ngrams: NGramDicts) -> float: count_all = np.zeros(self.n) @@ -83,18 +83,18 @@ def chr_p(self, hyp_ngrams: NGramDicts, ref_ngrams: NGramDicts) -> float: hyp_count, ref_ngrams[m - 1][ngr]) return np.mean(np.divide( count_matched, count_all, out=np.ones_like(count_all), - where=(count_all!=0))) - - def _get_ngrams(self, tokens: List[str], n: int) -> NGramDicts: - ngr_dicts = [] - for m in range(1, n + 1): - ngr_dict = {} # type: Dict[str, int] - # if m > len(tokens), return an empty dict - for i in range(m, len(tokens) + 1): - ngr = "".join(tokens[i - m:i]) - ngr_dict[ngr] = ngr_dict.setdefault(ngr, 0) + 1 - ngr_dicts.append(ngr_dict) - return ngr_dicts + where=(count_all != 0))) + + +def _get_ngrams(tokens: List[str], n: int) -> NGramDicts: + ngr_dicts = [] + for m in range(1, n + 1): + ngr_dict = {} # type: Dict[str, int] + for i in range(m, len(tokens) + 1): + ngr = "".join(tokens[i - m:i]) + ngr_dict[ngr] = ngr_dict.setdefault(ngr, 0) + 1 + ngr_dicts.append(ngr_dict) + return ngr_dicts # pylint: disable=invalid-name diff --git a/neuralmonkey/tests/test_chrf.py b/neuralmonkey/tests/test_chrf.py index 1eac7ba69..2c148ba16 100644 --- a/neuralmonkey/tests/test_chrf.py +++ b/neuralmonkey/tests/test_chrf.py @@ -3,40 +3,21 @@ import unittest -from neuralmonkey.evaluators.chrf import ChrFEvaluator +from neuralmonkey.evaluators.chrf import ChrFEvaluator, _get_ngrams +from neuralmonkey.tests.test_bleu import DECODED, REFERENCE -CORPUS_DECODED = [ - "colorful thoughts furiously sleep", - "little piglet slept all night", - "working working working working working be be be be be be be", - "ich bin walrus", - "walrus for präsident" -] - -CORPUS_REFERENCE = [ - "the colorless ideas slept furiously", - "pooh slept all night", - "working class hero is something to be", - "I am the working class walrus", - "walrus for president" -] - TOKENS = ["a", "b", "a"] NGRAMS = [ - {"a": 2, "b" : 1}, - {"ab": 1, "ba" : 1}, - {"aba" : 1}, + {"a": 2, "b": 1}, + {"ab": 1, "ba": 1}, + {"aba": 1}, {}] - - -DECODED = [d.split() for d in CORPUS_DECODED] -REFERENCE = [r.split() for r in CORPUS_REFERENCE] FUNC = ChrFEvaluator() FUNC_P = FUNC.chr_p FUNC_R = FUNC.chr_r -FUNC_NGRAMS = FUNC._get_ngrams + class TestChrF(unittest.TestCase): @@ -63,10 +44,11 @@ def test_chrf(self): def test_get_ngrams(self): tokens = ["a", "b", "a"] - ngrams_out = FUNC_NGRAMS(tokens, 4) + ngrams_out = _get_ngrams(tokens, 4) self.assertEqual(len(ngrams_out), 4) for i, _ in enumerate(NGRAMS): self.assertDictEqual(ngrams_out[i], NGRAMS[i]) + if __name__ == "__main__": unittest.main()