diff --git a/cringebot.py b/cringebot.py
index 3a960c3..9082504 100644
--- a/cringebot.py
+++ b/cringebot.py
@@ -326,6 +326,24 @@ class CringeBotClient(BotClient):
             self.log(traceback.format_exc())
             self.enqueue_deletion(status_id, 300)
 
+# Bogofilter lexer uses these regular expressions:
+#   FRONT_CHAR  [^[:blank:][:cntrl:][:digit:][:punct:]]
+#   MID_CHAR    [^[:blank:][:cntrl:]:$*<>;=()&%#@+|/\\{}^\"?,\[\]]
+#   BACK_CHAR   [^[:blank:][:cntrl:]:$*<>;=()&%#@+|/\\{}^\"?,\[\]._~\'\`\-]
+#   TOKEN       {FRONT_CHAR}({MID_CHAR}*{BACK_CHAR})?
+
+# C standard library defines these character classes:
+#   ispunct()   !"#$%&'()*+,-./:;<=>?@[\]^_`{|}
+#   iscntrl()   Between ASCII codes 0x00 (NUL) and 0x1f (US), plus 0x7f (DEL)
+
+# Contruct regular expression for tokens based on the above:
+PUNCT = r"""!:"#\$%&'\(\)\*\+,\-\./:;<=>\?@\[\\\]\^_`\{\|\}"""
+CNTRL = r"\x00-\x1f\x7f"
+FRONT_CHAR = rf"[^\s{CNTRL}\d{PUNCT}]"
+MID_CHAR = rf"""[^\s{CNTRL}:\$\*<>;=\(\)&%#@\+\|/\\\{{\}}\^"\?,\[\]]"""
+BACK_CHAR = rf"""[^\s{CNTRL}:\$\*<>;=\(\)&%#@\+\|/\\\{{\}}\^"\?,\[\]\._~'`\-]"""
+TOKEN = f"({FRONT_CHAR}({MID_CHAR}*{BACK_CHAR})?)"
+
 def toot_dict_to_mail(toot_dict):
     flags = []
     
@@ -361,8 +379,11 @@ def toot_dict_to_mail(toot_dict):
                 try:
                     with urllib.request.urlopen(media["url"]) as image:
                         ocr_text = pytesseract.image_to_string(Image.open(image))
-                        words = re.findall(r"[^\s]+", ocr_text)
-                        tokens = ["ocr_" + word.lower() for word in words]
+                        #print("ocr_text =", ocr_text)
+                        words = [match[0] for match in filter(lambda match: len(match[1]) > 0, re.findall(TOKEN, ocr_text))]
+                        #print("words =", words)
+                        tokens = ["ocr`" + word.lower() for word in words]
+                        #print("tokens =", tokens)
                         body += "\n\n" + " ".join(tokens)
                 except Exception:
                     print("Skipping OCR on attachment due to exception")