|
|
|
@ -27,6 +27,15 @@ TIME_OF_DAY = { |
|
|
|
|
|
|
|
|
|
class CringeBotClient(BotClient): |
|
|
|
|
def __init__(self, bot, config): |
|
|
|
|
config = { |
|
|
|
|
"db_dir": ".", |
|
|
|
|
"cringe_dir": "data/cringe", |
|
|
|
|
"based_dir": "data/based", |
|
|
|
|
"unsure_dir": "data/unsure", |
|
|
|
|
**config} |
|
|
|
|
|
|
|
|
|
self.db_dir = os.path.join(os.path.dirname(sys.argv[0]), config["db_dir"]) |
|
|
|
|
|
|
|
|
|
# Initialise HTML-to-Markdown converter |
|
|
|
|
self.h2t = html2text.HTML2Text() |
|
|
|
|
self.h2t.ignore_links = True |
|
|
|
@ -56,26 +65,46 @@ class CringeBotClient(BotClient): |
|
|
|
|
# Perform any scheduled deletes |
|
|
|
|
self.deletion_scheduler.run(blocking = False) |
|
|
|
|
|
|
|
|
|
def set_cringe(self, status_id): |
|
|
|
|
self.state["cringe"].add(status_id) |
|
|
|
|
self.state["based"].discard(status_id) |
|
|
|
|
self.state["unsure"].discard(status_id) |
|
|
|
|
def get_cringe_path(self, status_id): |
|
|
|
|
return os.path.join(os.path.dirname(sys.argv[0]), self.config["cringe_dir"], status_id) |
|
|
|
|
|
|
|
|
|
def set_based(self, status_id): |
|
|
|
|
self.state["cringe"].discard(status_id) |
|
|
|
|
self.state["based"].add(status_id) |
|
|
|
|
self.state["unsure"].discard(status_id) |
|
|
|
|
|
|
|
|
|
def set_unsure(self, status_id): |
|
|
|
|
self.state["cringe"].discard(status_id) |
|
|
|
|
self.state["based"].discard(status_id) |
|
|
|
|
self.state["unsure"].add(status_id) |
|
|
|
|
|
|
|
|
|
def set_discard(self, status_id): |
|
|
|
|
self.state["cringe"].discard(status_id) |
|
|
|
|
self.state["based"].discard(status_id) |
|
|
|
|
self.state["unsure"].discard(status_id) |
|
|
|
|
def get_based_path(self, status_id): |
|
|
|
|
return os.path.join(os.path.dirname(sys.argv[0]), self.config["based_dir"], status_id) |
|
|
|
|
|
|
|
|
|
def get_unsure_path(self, status_id): |
|
|
|
|
return os.path.join(os.path.dirname(sys.argv[0]), self.config["unsure_dir"], status_id) |
|
|
|
|
|
|
|
|
|
def is_cringe(self, status_id): |
|
|
|
|
path = self.get_cringe_path(status_id) |
|
|
|
|
return path if os.path.isfile(path) else None |
|
|
|
|
|
|
|
|
|
def is_based(self, status_id): |
|
|
|
|
path = self.get_based_path(status_id) |
|
|
|
|
return path if os.path.isfile(path) else None |
|
|
|
|
|
|
|
|
|
def is_unsure(self, status_id): |
|
|
|
|
path = self.get_unsure_path(status_id) |
|
|
|
|
return path if os.path.isfile(path) else None |
|
|
|
|
|
|
|
|
|
def get_category_path(self, status_id): |
|
|
|
|
return self.is_cringe(status_id) or self.is_based(status_id) or self.is_unsure(status_id) or None |
|
|
|
|
|
|
|
|
|
def delete_and_write(self, delete_path, write_path, text, mode = "a+"): |
|
|
|
|
if delete_path and os.path.isfile(delete_path): |
|
|
|
|
os.remove(delete_path) |
|
|
|
|
os.makedirs(os.path.dirname(write_path), exist_ok = True) |
|
|
|
|
with open(write_path, mode, encoding = "utf-8") as text_file: |
|
|
|
|
text_file.write(text) |
|
|
|
|
|
|
|
|
|
def make_cringe(self, status_id, text): |
|
|
|
|
self.delete_and_write(self.get_category_path(status_id), self.get_cringe_path(status_id), text + "\n") |
|
|
|
|
|
|
|
|
|
def make_based(self, status_id, text): |
|
|
|
|
self.delete_and_write(self.get_category_path(status_id), self.get_based_path(status_id), text + "\n") |
|
|
|
|
|
|
|
|
|
def make_unsure(self, status_id, text): |
|
|
|
|
self.delete_and_write(self.get_category_path(status_id), self.get_unsure_path(status_id), text + "\n") |
|
|
|
|
|
|
|
|
|
# Look for replies to the bot and return True if commands were processed |
|
|
|
|
def process_commands(self, status): |
|
|
|
|
status_id = status["id"] |
|
|
|
@ -101,11 +130,6 @@ class CringeBotClient(BotClient): |
|
|
|
|
target_status = self.api.status(target_id) |
|
|
|
|
target_mail_text = toot_dict_to_mail(target_status).format() |
|
|
|
|
|
|
|
|
|
# Check if target status was previously classified |
|
|
|
|
was_cringe = target_id in self.state["cringe"] |
|
|
|
|
was_based = target_id in self.state["based"] |
|
|
|
|
was_unsure = target_id in self.state["unsure"] |
|
|
|
|
|
|
|
|
|
tokens = deque(command.split()) |
|
|
|
|
while True: |
|
|
|
|
token = tokens.popleft() |
|
|
|
@ -113,14 +137,14 @@ class CringeBotClient(BotClient): |
|
|
|
|
if event not in ["categorise", "learn"]: |
|
|
|
|
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id}) |
|
|
|
|
return True |
|
|
|
|
elif was_cringe: |
|
|
|
|
elif self.is_cringe(target_id): |
|
|
|
|
break |
|
|
|
|
elif was_based: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM]) |
|
|
|
|
elif self.is_based(target_id): |
|
|
|
|
bogofilter.run(target_mail_text, actions = [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM], db_dir = self.db_dir) |
|
|
|
|
else: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM]) |
|
|
|
|
bogofilter.run(target_mail_text, actions = [bogofilter.LEARN_SPAM], db_dir = self.db_dir) |
|
|
|
|
|
|
|
|
|
self.set_cringe(target_id) |
|
|
|
|
self.make_cringe(target_id, target_mail_text) |
|
|
|
|
self.enqueue_deletion(target_id) |
|
|
|
|
self.respond(status, "Learned as cringe", {"event": "learn", "target": target_id}) |
|
|
|
|
|
|
|
|
@ -130,14 +154,14 @@ class CringeBotClient(BotClient): |
|
|
|
|
if event not in ["categorise", "learn"]: |
|
|
|
|
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id}) |
|
|
|
|
return True |
|
|
|
|
elif was_based: |
|
|
|
|
elif self.is_based(target_id): |
|
|
|
|
break |
|
|
|
|
elif was_cringe: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM]) |
|
|
|
|
elif self.is_cringe(target_id): |
|
|
|
|
bogofilter.run(target_mail_text, actions = [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM], db_dir = self.db_dir) |
|
|
|
|
else: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM]) |
|
|
|
|
bogofilter.run(target_mail_text, actions = [bogofilter.LEARN_HAM], db_dir = self.db_dir) |
|
|
|
|
|
|
|
|
|
self.set_based(target_id) |
|
|
|
|
self.make_based(target_id, target_mail_text) |
|
|
|
|
self.unqueue_deletion(target_id) |
|
|
|
|
self.respond(status, "Learned as based", {"event": "learn", "target": target_id}) |
|
|
|
|
|
|
|
|
@ -147,14 +171,14 @@ class CringeBotClient(BotClient): |
|
|
|
|
if event not in ["categorise", "learn"]: |
|
|
|
|
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id}) |
|
|
|
|
return True |
|
|
|
|
elif was_unsure: |
|
|
|
|
elif self.is_unsure(target_id): |
|
|
|
|
break |
|
|
|
|
elif was_cringe: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM]) |
|
|
|
|
elif was_based: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM]) |
|
|
|
|
elif self.is_cringe(target_id): |
|
|
|
|
bogofilter.run(target_mail_text, actions = [bogofilter.UNLEARN_SPAM], db_dir = self.db_dir) |
|
|
|
|
elif self.is_based(target_id): |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM], db_dir = self.db_dir) |
|
|
|
|
|
|
|
|
|
self.set_unsure(target_id) |
|
|
|
|
self.make_unsure(target_id, target_mail_text) |
|
|
|
|
self.unqueue_deletion(target_id) |
|
|
|
|
self.respond(status, "Unlearned", {"event": "learn", "target": target_id}) |
|
|
|
|
|
|
|
|
@ -188,7 +212,7 @@ class CringeBotClient(BotClient): |
|
|
|
|
# Format and log plain-text preview |
|
|
|
|
md_text = self.h2t.handle(status["content"]) |
|
|
|
|
preview = toot_dict_to_mail(status) |
|
|
|
|
preview.body = md_text |
|
|
|
|
preview.change_body(md_text) |
|
|
|
|
preview_text = preview.format() |
|
|
|
|
|
|
|
|
|
self.log() |
|
|
|
@ -199,23 +223,23 @@ class CringeBotClient(BotClient): |
|
|
|
|
if self.process_commands(status): |
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER] if self.config["register"] else [bogofilter.CLASSIFY]) |
|
|
|
|
result = bogofilter.run(mail_text, actions = [bogofilter.CLASSIFY, bogofilter.REGISTER] if self.config["register"] else [bogofilter.CLASSIFY], db_dir = self.db_dir) |
|
|
|
|
bogo_report = "Bogofilter: Category={}, Score={}".format(result.category, "{:.4f}".format(result.score)) |
|
|
|
|
if result.category == bogofilter.SPAM: |
|
|
|
|
self.log("CRINGE: Enqueuing status {} for deletion".format(status_id)) |
|
|
|
|
if self.config["register"]: |
|
|
|
|
self.set_cringe(status_id) |
|
|
|
|
self.make_cringe(status_id, mail_text) |
|
|
|
|
self.enqueue_deletion(status_id) |
|
|
|
|
self.respond(status, "Categorised as cringe\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id}) |
|
|
|
|
elif result.category == bogofilter.HAM: |
|
|
|
|
self.log("BASED: Not enqueueing status {} for deletion".format(status_id)) |
|
|
|
|
if self.config["register"]: |
|
|
|
|
self.set_based(status_id) |
|
|
|
|
self.make_based(status_id, mail_text) |
|
|
|
|
self.respond(status, "Categorised as based\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id}) |
|
|
|
|
else: |
|
|
|
|
self.log("UNSURE: Not enqueueing status {} for deletion".format(status_id)) |
|
|
|
|
if self.config["register"]: |
|
|
|
|
self.set_unsure(status_id) |
|
|
|
|
self.make_unsure(status_id, mail_text) |
|
|
|
|
self.respond(status, "Categorised as unsure\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id}) |
|
|
|
|
|
|
|
|
|
def on_load_state(self): |
|
|
|
@ -269,12 +293,10 @@ class CringeBotClient(BotClient): |
|
|
|
|
try: |
|
|
|
|
self.log("Deleting status {}".format(status_id)) |
|
|
|
|
self.api.status_delete(status_id) |
|
|
|
|
self.set_discard(status_id) |
|
|
|
|
if status_id in self.state["own"]: |
|
|
|
|
del self.state["own"][status_id] |
|
|
|
|
except MastodonNotFoundError: |
|
|
|
|
self.log("Cannot find status {} on server".format(status_id)) |
|
|
|
|
self.set_discard(status_id) |
|
|
|
|
except Exception: |
|
|
|
|
self.log(traceback.format_exc()) |
|
|
|
|
self.enqueue_deletion(status_id, 300) |
|
|
|
|