Charcoal-SE · user12986714 · Aug 6, 2020 · Aug 6, 2020 · Aug 6, 2020 · Aug 6, 2020
diff --git a/findspam.py b/findspam.py
@@ -39,6 +39,16 @@
 PUNCTUATION_RATIO = 0.42
 REPEATED_CHARACTER_RATIO = 0.20
 IMG_TXT_R_THRES = 0.7
+
+# >>> statistics.mean(result)
+# 0.20483261275004847
+# >>> statistics.median(result)
+# 0.20223865427238322
+# >>> statistics.stdev(result)
+# 0.031230117152319384
+ENTROPY_TOO_LOW = 0.14
+ENTROPY_TOO_HIGH = 0.26
+
 EXCEPTION_RE = r"^Domain (.*) didn't .*!$"
 RE_COMPILE = regex.compile(EXCEPTION_RE)
 COMMON_MALFORMED_PROTOCOLS = [
@@ -617,6 +627,30 @@ def mostly_img(s, site):
     return False, ""
 
 
+@create_rule("post is likely nonsense", title=False,
+             sites=["codegolf.stackexchange.com",
+                    "stackoverflow.com", "ja.stackoverflow.com", "pt.stackoverflow.com",
+                    "es.stackoverflow.com", "islam.stackexchange.com",
+                    "japanese.stackexchange.com", "anime.stackexchange.com",
+                    "hinduism.stackexchange.com", "judaism.stackexchange.com",
+                    "buddhism.stackexchange.com", "chinese.stackexchange.com",
+                    "french.stackexchange.com", "spanish.stackexchange.com",
+                    "portuguese.stackexchange.com", "korean.stackexchange.com",
+                    "ukrainian.stackexchange.com", "italian.stackexchange.com"],
+             max_rep=10000, max_score=10000)
+def nonsense(s, site):
+    if len(s) == 0:
+        return False, ""
+    if "pytest" in sys.modules:
+        return False, ""
+    probability = [float(s.count(x)) / len(s) for x in s]
+    entropy_per_char = -sum([x * math.log2(x) for x in probability]) / len(s)
+
+    if entropy_per_char < ENTROPY_TOO_LOW or entropy_per_char > ENTROPY_TOO_HIGH:
+        return True, "Entropy per char is {:.4f}".format(entropy_per_char)
+    return False, ""
+
+
 # noinspection PyUnusedLocal,PyMissingTypeHints
 @create_rule("repeating characters in {}", stripcodeblocks=True, max_rep=10000, max_score=10000)
 def has_repeating_characters(s, site):