|
|
@ -361,7 +361,7 @@ def toot_dict_to_mail(toot_dict): |
|
|
|
try: |
|
|
|
try: |
|
|
|
with urllib.request.urlopen(media["url"]) as image: |
|
|
|
with urllib.request.urlopen(media["url"]) as image: |
|
|
|
ocr_text = pytesseract.image_to_string(Image.open(image)) |
|
|
|
ocr_text = pytesseract.image_to_string(Image.open(image)) |
|
|
|
words = re.findall(r"\w+", ocr_text) |
|
|
|
words = re.findall(r"[^\s]+", ocr_text) |
|
|
|
tokens = ["ocr_" + word.lower() for word in words] |
|
|
|
tokens = ["ocr_" + word.lower() for word in words] |
|
|
|
body += "\n\n" + " ".join(tokens) |
|
|
|
body += "\n\n" + " ".join(tokens) |
|
|
|
except Exception: |
|
|
|
except Exception: |
|
|
|