Skip to content

Commit

Permalink
fix issue #782 + add test_chrf
Browse files Browse the repository at this point in the history
  • Loading branch information
varisd committed Jan 2, 2019
1 parent b4ea78c commit 81bbdbb
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 24 deletions.
43 changes: 19 additions & 24 deletions neuralmonkey/evaluators/chrf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List, Dict
from typeguard import check_argument_types
import numpy as np
from neuralmonkey.evaluators.evaluator import Evaluator

# pylint: disable=invalid-name
Expand All @@ -25,7 +26,6 @@ def __init__(self,
super().__init__(name)

self.n = n
self.max_ord = n
self.beta_2 = beta**2

self.ignored = [] # type: List[str]
Expand Down Expand Up @@ -58,44 +58,39 @@ def score_instance(self,
/ ((self.beta_2 * precision) + recall))

def chr_r(self, hyp_ngrams: NGramDicts, ref_ngrams: NGramDicts) -> float:
recall = 0.0
count_all = np.zeros(self.n)
count_matched = np.zeros(self.n)
for m in range(1, self.n + 1):
count_all = 0
count_matched = 0
for ngr in ref_ngrams[m - 1]:
ref_count = ref_ngrams[m - 1][ngr]
count_all += ref_count
count_all[m - 1] += ref_count
if ngr in hyp_ngrams[m - 1]:
count_matched += min(ref_count, hyp_ngrams[m - 1][ngr])
# Catch division by zero
if count_all != 0.0:
recall += count_matched / count_all
return recall / float(self.max_ord)
count_matched[m - 1] += min(
ref_count, hyp_ngrams[m - 1][ngr])
return np.mean(np.divide(
count_matched, count_all, out=np.ones_like(count_all),
where=(count_all!=0)))

def chr_p(self, hyp_ngrams: NGramDicts, ref_ngrams: NGramDicts) -> float:
precision = 0.0
count_all = np.zeros(self.n)
count_matched = np.zeros(self.n)
for m in range(1, self.n + 1):
count_all = 0
count_matched = 0
for ngr in hyp_ngrams[m - 1]:
hyp_count = hyp_ngrams[m - 1][ngr]
count_all += hyp_count
count_all[m - 1] += hyp_count
if ngr in ref_ngrams[m - 1]:
count_matched += min(hyp_count, ref_ngrams[m - 1][ngr])
# Catch division by zero
if count_all != 0.0:
precision += count_matched / count_all

return precision / float(self.max_ord)
count_matched[m - 1] += min(
hyp_count, ref_ngrams[m - 1][ngr])
return np.mean(np.divide(
count_matched, count_all, out=np.ones_like(count_all),
where=(count_all!=0)))

def _get_ngrams(self, tokens: List[str], n: int) -> NGramDicts:
if len(tokens) < n:
self.max_ord = len(tokens)

ngr_dicts = []
for m in range(1, n + 1):
ngr_dict = {} # type: Dict[str, int]
for i in range(m, len(tokens)):
# if m > len(tokens), return an empty dict
for i in range(m, len(tokens) + 1):
ngr = "".join(tokens[i - m:i])
ngr_dict[ngr] = ngr_dict.setdefault(ngr, 0) + 1
ngr_dicts.append(ngr_dict)
Expand Down
72 changes: 72 additions & 0 deletions neuralmonkey/tests/test_chrf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python3.5


import unittest

from neuralmonkey.evaluators.chrf import ChrFEvaluator


CORPUS_DECODED = [
"colorful thoughts furiously sleep",
"little piglet slept all night",
"working working working working working be be be be be be be",
"ich bin walrus",
"walrus for präsident"
]

CORPUS_REFERENCE = [
"the colorless ideas slept furiously",
"pooh slept all night",
"working class hero is something to be",
"I am the working class walrus",
"walrus for president"
]

TOKENS = ["a", "b", "a"]
NGRAMS = [
{"a": 2, "b" : 1},
{"ab": 1, "ba" : 1},
{"aba" : 1},
{}]


DECODED = [d.split() for d in CORPUS_DECODED]
REFERENCE = [r.split() for r in CORPUS_REFERENCE]

FUNC = ChrFEvaluator()
FUNC_P = FUNC.chr_p
FUNC_R = FUNC.chr_r
FUNC_NGRAMS = FUNC._get_ngrams

class TestChrF(unittest.TestCase):

def test_empty_decoded(self):
# Recall == 0.0
self.assertEqual(FUNC([[] for _ in DECODED], REFERENCE), 0.0)

def test_empty_reference(self):
# Precision == 0.0
self.assertEqual(FUNC([[] for _ in REFERENCE], DECODED), 0.0)

def test_identical(self):
self.assertEqual(FUNC(REFERENCE, REFERENCE), 1.0)

def test_empty_sentence(self):
ref_empty = REFERENCE + [[]]
out_empty = DECODED + [["something"]]
score = FUNC(out_empty, ref_empty)
self.assertAlmostEqual(score, 0.38, delta=10)

def test_chrf(self):
score = FUNC(DECODED, REFERENCE)
self.assertAlmostEqual(score, 0.46, delta=10)

def test_get_ngrams(self):
tokens = ["a", "b", "a"]
ngrams_out = FUNC_NGRAMS(tokens, 4)
self.assertEqual(len(ngrams_out), 4)
for i, _ in enumerate(NGRAMS):
self.assertDictEqual(ngrams_out[i], NGRAMS[i])

if __name__ == "__main__":
unittest.main()

0 comments on commit 81bbdbb

Please sign in to comment.