You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
89 lines
2.2 KiB
89 lines
2.2 KiB
import subprocess
|
|
from email.utils import format_datetime
|
|
from datetime import datetime
|
|
import quopri
|
|
|
|
DB_DIR = "."
|
|
MAX_HAM = 0.07
|
|
MIN_SPAM = 0.25
|
|
MIN_DEV = 2 * (MIN_SPAM - MAX_HAM) * 0.375
|
|
ROBS = 0.0178
|
|
ROBX = (MAX_HAM + MIN_SPAM) / 2
|
|
COMMAND = ["bogofilter", "-T", "-c", "/dev/null", "-d", DB_DIR, "-o", "{},{}".format(MIN_SPAM, MAX_HAM), "-m", "{},{},{}".format(MIN_DEV, ROBS, ROBX)]
|
|
|
|
|
|
# Categories
|
|
SPAM = "S"
|
|
HAM = "H"
|
|
UNSURE = "U"
|
|
categories = [SPAM, HAM, UNSURE]
|
|
|
|
# Actions
|
|
CLASSIFY = 0
|
|
REGISTER = 1
|
|
LEARN_SPAM = 2
|
|
UNLEARN_SPAM = 3
|
|
LEARN_HAM = 4
|
|
UNLEARN_HAM = 5
|
|
|
|
ACTIONS = {
|
|
CLASSIFY: [],
|
|
REGISTER: ["-u"],
|
|
LEARN_SPAM: ["-s"],
|
|
UNLEARN_SPAM: ["-S"],
|
|
LEARN_HAM: ["-n"],
|
|
UNLEARN_HAM: ["-N"],
|
|
|
|
SPAM: ["-s"],
|
|
HAM: ["-n"],
|
|
UNSURE: []
|
|
}
|
|
|
|
class BogofilterResult:
|
|
def __init__(self, category, score):
|
|
self.category = category
|
|
self.score = score
|
|
|
|
class Mail:
|
|
def __init__(self, headers = {}, body = None):
|
|
self.headers = {**{
|
|
"Date": datetime.now(),
|
|
"Content-Type": "text/html; charset=\"UTF-8\""}, **headers}
|
|
self.body = body
|
|
|
|
def format(self):
|
|
text = str()
|
|
for key, value in self.headers.items():
|
|
if key == "Subject":
|
|
value = "=?utf-8?Q?{}?=".format(quopri.encodestring(bytes(value, "utf-8"), header = True).decode("utf-8"))
|
|
|
|
if key == "Date":
|
|
value = format_datetime(value)
|
|
|
|
text += "{key}: {value}\n".format(key = key, value = value)
|
|
|
|
text += "\n"
|
|
|
|
if self.body:
|
|
text += self.body
|
|
|
|
text += "\n"
|
|
|
|
return text
|
|
|
|
def run(text, actions = [CLASSIFY], category = UNSURE):
|
|
args = []
|
|
for action in actions:
|
|
args.extend(ACTIONS[action])
|
|
|
|
cp = subprocess.run(COMMAND + args, capture_output = True, encoding = "utf-8", input = text)
|
|
arr = cp.stdout.strip().split(" ")
|
|
if len(arr) == 2:
|
|
(category, score) = arr
|
|
return BogofilterResult(category, float(score))
|
|
else:
|
|
if cp.returncode == 3:
|
|
print("Bogofilter error")
|
|
if cp.stdout.strip():
|
|
print(cp.stderr.strip())
|
|
return None
|
|
|