|
|
|
@ -7,11 +7,25 @@ import json |
|
|
|
|
import bogofilter |
|
|
|
|
import html2text |
|
|
|
|
from collections import deque |
|
|
|
|
import toml |
|
|
|
|
|
|
|
|
|
from mastodon import Mastodon, MastodonNotFoundError |
|
|
|
|
|
|
|
|
|
from bot import Bot, BotClient |
|
|
|
|
|
|
|
|
|
SEASON = { |
|
|
|
|
**{ i : "spring" for i in range(3, 6) }, |
|
|
|
|
**{ i : "summer" for i in range(6, 9) }, |
|
|
|
|
**{ i : "autumn" for i in range(9, 12) }, |
|
|
|
|
**{ i : "winter" for i in [12, 1, 2] }} |
|
|
|
|
|
|
|
|
|
TIME_OF_DAY = { |
|
|
|
|
**{ i : "night" for i in range(0, 4) }, |
|
|
|
|
**{ i : "early" for i in range(4, 8) }, |
|
|
|
|
**{ i : "morning" for i in range(8, 12) }, |
|
|
|
|
**{ i : "afternoon" for i in range(12, 18) }, |
|
|
|
|
**{ i : "evening" for i in range(18, 24) }} |
|
|
|
|
|
|
|
|
|
class CringeBotClient(BotClient): |
|
|
|
|
def __init__(self, bot, config): |
|
|
|
|
# Initialise HTML-to-Markdown converter |
|
|
|
@ -21,15 +35,20 @@ class CringeBotClient(BotClient): |
|
|
|
|
# Create scheduler for deferred deletion of posts |
|
|
|
|
self.deletion_scheduler = sched.scheduler(time.time, time.sleep) |
|
|
|
|
|
|
|
|
|
super().__init__(bot, config) |
|
|
|
|
super().__init__(bot, {"register": False, **config}) |
|
|
|
|
|
|
|
|
|
# Send DM reply to message, appropriately tagged, and schedules it for deferred deletion |
|
|
|
|
def respond(self, status, message): |
|
|
|
|
def respond(self, status, message, context): |
|
|
|
|
self.log("Responding with:") |
|
|
|
|
self.log(message) |
|
|
|
|
reply = self.api.status_reply(status, "{}\n{}".format(message, self.config["tag"]), visibility = "direct", untag = True) |
|
|
|
|
self.log() |
|
|
|
|
|
|
|
|
|
reply = self.api.status_reply(status, message, visibility = "direct", untag = True) |
|
|
|
|
|
|
|
|
|
self.state["own"][reply["id"]] = context |
|
|
|
|
self.enqueue_deletion(reply["id"]) |
|
|
|
|
time.sleep(1) |
|
|
|
|
|
|
|
|
|
time.sleep(self.config["rate_limit"]) |
|
|
|
|
|
|
|
|
|
def on_start(self): |
|
|
|
|
self.deletion_report() |
|
|
|
@ -58,121 +77,117 @@ class CringeBotClient(BotClient): |
|
|
|
|
self.state["based"].discard(status_id) |
|
|
|
|
self.state["unsure"].discard(status_id) |
|
|
|
|
|
|
|
|
|
# Look for and process commands in the replies to the bot's notifications and return True if commands were processed |
|
|
|
|
# Look for replies to the bot and return True if commands were processed |
|
|
|
|
def process_commands(self, status): |
|
|
|
|
# Check if status is a reply to another status |
|
|
|
|
replied_id = status.get("in_reply_to_id", None) |
|
|
|
|
if not replied_id: |
|
|
|
|
return False |
|
|
|
|
status_id = status["id"] |
|
|
|
|
parent_id = status["in_reply_to_id"] |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
# Fetch replied-to status |
|
|
|
|
replied_status = self.api.status(replied_id) |
|
|
|
|
replied_tokens = self.h2t.handle(replied_status["content"]).split() |
|
|
|
|
# Check if bot owns the parent status |
|
|
|
|
if parent_id not in self.state["own"]: |
|
|
|
|
return False |
|
|
|
|
|
|
|
|
|
# Check if it belongs to the bot |
|
|
|
|
if not self.config["tag"] in replied_tokens: |
|
|
|
|
return False |
|
|
|
|
context = self.state["own"][parent_id] |
|
|
|
|
event = context["event"] |
|
|
|
|
|
|
|
|
|
status_id = status["id"] |
|
|
|
|
target_id = context["target"] |
|
|
|
|
|
|
|
|
|
# Enqueue user command status for deletion |
|
|
|
|
self.enqueue_deletion(status_id) |
|
|
|
|
# Enqueue command status for deletion |
|
|
|
|
self.enqueue_deletion(status_id) |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
command = self.h2t.handle(status["content"]).strip() |
|
|
|
|
self.log("Received command: {}".format(command)) |
|
|
|
|
|
|
|
|
|
# Find the intended target of the command (the status that the bot originally replied to with a classification) |
|
|
|
|
target_status_id = replied_status.get("in_reply_to_id", None) |
|
|
|
|
if not target_status_id: |
|
|
|
|
self.respond(status, "Target status is missing") |
|
|
|
|
return True |
|
|
|
|
|
|
|
|
|
try: |
|
|
|
|
command = self.h2t.handle(status["content"]).strip() |
|
|
|
|
self.log("Received command: {}".format(command)) |
|
|
|
|
|
|
|
|
|
# Fetch the target status |
|
|
|
|
target_status = self.api.status(target_status_id) |
|
|
|
|
target_mail_text = toot_dict_to_mail(target_status).format() |
|
|
|
|
|
|
|
|
|
# Check if target status was previously classified |
|
|
|
|
was_cringe = target_status_id in self.state["cringe"] |
|
|
|
|
was_based = target_status_id in self.state["based"] |
|
|
|
|
was_unsure = target_status_id in self.state["unsure"] |
|
|
|
|
|
|
|
|
|
tokens = deque(command.split()) |
|
|
|
|
while True: |
|
|
|
|
token = tokens.popleft() |
|
|
|
|
if token == "cringe": |
|
|
|
|
if was_cringe: |
|
|
|
|
break |
|
|
|
|
elif was_based: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM]) |
|
|
|
|
else: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM]) |
|
|
|
|
|
|
|
|
|
self.set_cringe(target_status_id) |
|
|
|
|
self.enqueue_deletion(target_status_id) |
|
|
|
|
self.respond(status, "Learned as cringe") |
|
|
|
|
|
|
|
|
|
# Fetch the target status |
|
|
|
|
target_status = self.api.status(target_id) |
|
|
|
|
target_mail_text = toot_dict_to_mail(target_status).format() |
|
|
|
|
|
|
|
|
|
# Check if target status was previously classified |
|
|
|
|
was_cringe = target_id in self.state["cringe"] |
|
|
|
|
was_based = target_id in self.state["based"] |
|
|
|
|
was_unsure = target_id in self.state["unsure"] |
|
|
|
|
|
|
|
|
|
tokens = deque(command.split()) |
|
|
|
|
while True: |
|
|
|
|
token = tokens.popleft() |
|
|
|
|
if token == "cringe": |
|
|
|
|
if event not in ["categorise", "learn"]: |
|
|
|
|
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id}) |
|
|
|
|
return True |
|
|
|
|
elif was_cringe: |
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
elif token == "based": |
|
|
|
|
if was_based: |
|
|
|
|
break |
|
|
|
|
elif was_cringe: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM]) |
|
|
|
|
else: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM]) |
|
|
|
|
|
|
|
|
|
self.set_based(target_status_id) |
|
|
|
|
self.unqueue_deletion(target_status_id) |
|
|
|
|
self.respond(status, "Learned as based") |
|
|
|
|
|
|
|
|
|
elif was_based: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM]) |
|
|
|
|
else: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM]) |
|
|
|
|
|
|
|
|
|
self.set_cringe(target_id) |
|
|
|
|
self.enqueue_deletion(target_id) |
|
|
|
|
self.respond(status, "Learned as cringe", {"event": "learn", "target": target_id}) |
|
|
|
|
|
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
elif token == "based": |
|
|
|
|
if event not in ["categorise", "learn"]: |
|
|
|
|
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id}) |
|
|
|
|
return True |
|
|
|
|
elif was_based: |
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
elif token == "unlearn": |
|
|
|
|
if was_unsure: |
|
|
|
|
break |
|
|
|
|
elif was_cringe: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM]) |
|
|
|
|
elif was_based: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM]) |
|
|
|
|
|
|
|
|
|
self.set_unsure(target_status_id) |
|
|
|
|
self.unqueue_deletion(target_status_id) |
|
|
|
|
self.respond(status, "Unlearned") |
|
|
|
|
|
|
|
|
|
elif was_cringe: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM]) |
|
|
|
|
else: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM]) |
|
|
|
|
|
|
|
|
|
self.set_based(target_id) |
|
|
|
|
self.unqueue_deletion(target_id) |
|
|
|
|
self.respond(status, "Learned as based", {"event": "learn", "target": target_id}) |
|
|
|
|
|
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
elif token == "unlearn": |
|
|
|
|
if event not in ["categorise", "learn"]: |
|
|
|
|
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id}) |
|
|
|
|
return True |
|
|
|
|
elif was_unsure: |
|
|
|
|
break |
|
|
|
|
elif was_cringe: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM]) |
|
|
|
|
elif was_based: |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM]) |
|
|
|
|
|
|
|
|
|
except IndexError: |
|
|
|
|
self.respond(status, "Invalid command") |
|
|
|
|
except MastodonNotFoundError: |
|
|
|
|
self.respond(status, "Target status is missing") |
|
|
|
|
self.set_unsure(target_id) |
|
|
|
|
self.unqueue_deletion(target_id) |
|
|
|
|
self.respond(status, "Unlearned", {"event": "learn", "target": target_id}) |
|
|
|
|
|
|
|
|
|
return True |
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
except IndexError: |
|
|
|
|
self.respond(status, "Invalid command", {"event": "error", "type": "syntax", "target": target_id}) |
|
|
|
|
except MastodonNotFoundError: |
|
|
|
|
return False |
|
|
|
|
self.respond(status, "Could not fetch target status", {"event": "error", "type": "fetch", "target": target_id}) |
|
|
|
|
|
|
|
|
|
return True |
|
|
|
|
|
|
|
|
|
def on_status(self, status): |
|
|
|
|
# Ignore statuses from other accounts |
|
|
|
|
if status["account"]["id"] != self.api.me()["id"]: |
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Ignore statuses this account boosts |
|
|
|
|
if status["reblog"]: |
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
status_id = status["id"] |
|
|
|
|
|
|
|
|
|
# Extract plain text |
|
|
|
|
md_text = self.h2t.handle(status["content"]) |
|
|
|
|
|
|
|
|
|
# Ignore bot's own statuses |
|
|
|
|
if self.config["tag"] in md_text.split(): |
|
|
|
|
if status_id in self.state["own"]: |
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Create faux HTML email of status |
|
|
|
|
mail_text = toot_dict_to_mail(status).format() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Format and log plain-text preview |
|
|
|
|
md_text = self.h2t.handle(status["content"]) |
|
|
|
|
preview = toot_dict_to_mail(status) |
|
|
|
|
preview.body = md_text |
|
|
|
|
preview_text = preview.format() |
|
|
|
@ -185,22 +200,24 @@ class CringeBotClient(BotClient): |
|
|
|
|
if self.process_commands(status): |
|
|
|
|
return |
|
|
|
|
|
|
|
|
|
result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER]) |
|
|
|
|
result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER] if self.config["register"] else [bogofilter.CLASSIFY]) |
|
|
|
|
bogo_report = "Bogofilter: Category={}, Score={}".format(result.category, "{:.4f}".format(result.score)) |
|
|
|
|
status_id = status["id"] |
|
|
|
|
if result.category == bogofilter.SPAM: |
|
|
|
|
self.log("CRINGE: Enqueuing status {} for deletion".format(status_id)) |
|
|
|
|
self.set_cringe(status_id) |
|
|
|
|
if self.config["register"]: |
|
|
|
|
self.set_cringe(status_id) |
|
|
|
|
self.enqueue_deletion(status_id) |
|
|
|
|
self.respond(status, "Categorised as cringe\n{}".format(bogo_report)) |
|
|
|
|
self.respond(status, "Categorised as cringe\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id}) |
|
|
|
|
elif result.category == bogofilter.HAM: |
|
|
|
|
self.log("BASED: Not enqueueing status {} for deletion".format(status_id)) |
|
|
|
|
self.set_based(status_id) |
|
|
|
|
self.respond(status, "Categorised as based\n{}".format(bogo_report)) |
|
|
|
|
if self.config["register"]: |
|
|
|
|
self.set_based(status_id) |
|
|
|
|
self.respond(status, "Categorised as based\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id}) |
|
|
|
|
else: |
|
|
|
|
self.log("UNSURE: Not enqueueing status {} for deletion".format(status_id)) |
|
|
|
|
self.set_unsure(status_id) |
|
|
|
|
self.respond(status, "Categorised as unsure\n{}".format(bogo_report)) |
|
|
|
|
if self.config["register"]: |
|
|
|
|
self.set_unsure(status_id) |
|
|
|
|
self.respond(status, "Categorised as unsure\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id}) |
|
|
|
|
|
|
|
|
|
def on_load_state(self): |
|
|
|
|
state = { |
|
|
|
@ -208,6 +225,7 @@ class CringeBotClient(BotClient): |
|
|
|
|
"cringe": [], |
|
|
|
|
"based": [], |
|
|
|
|
"unsure": [], |
|
|
|
|
"own": {}, |
|
|
|
|
**super().on_load_state()} |
|
|
|
|
|
|
|
|
|
state["cringe"] = set(state["cringe"]) |
|
|
|
@ -233,8 +251,10 @@ class CringeBotClient(BotClient): |
|
|
|
|
def deletion_report(self): |
|
|
|
|
self.log("{} status(es) queued for deletion".format(len(self.deletion_scheduler.queue))) |
|
|
|
|
|
|
|
|
|
def enqueue_deletion(self, status_id): |
|
|
|
|
self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(60 * self.config["max_age"], 1, self.queued_delete, argument=(status_id,), kwargs={})} |
|
|
|
|
def enqueue_deletion(self, status_id, delay = None): |
|
|
|
|
if delay is None: |
|
|
|
|
delay = 60 * self.config["max_age"] |
|
|
|
|
self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(delay, 1, self.queued_delete, argument=(status_id,), kwargs={})} |
|
|
|
|
self.deletion_report() |
|
|
|
|
|
|
|
|
|
def unqueue_deletion(self, status_id): |
|
|
|
@ -249,77 +269,51 @@ class CringeBotClient(BotClient): |
|
|
|
|
def queued_delete(self, status_id): |
|
|
|
|
try: |
|
|
|
|
self.log("Deleting status {}".format(status_id)) |
|
|
|
|
self.set_discard(status_id) |
|
|
|
|
self.api.status_delete(status_id) |
|
|
|
|
self.set_discard(status_id) |
|
|
|
|
if status_id in self.state["own"]: |
|
|
|
|
del self.state["own"][status_id] |
|
|
|
|
except MastodonNotFoundError: |
|
|
|
|
self.log("Cannot find status {} on server".format(status_id)) |
|
|
|
|
self.set_discard(status_id) |
|
|
|
|
except Exception: |
|
|
|
|
self.log(traceback.format_exc()) |
|
|
|
|
self.enqueue_deletion(status_id, 300) |
|
|
|
|
|
|
|
|
|
def toot_dict_to_mail(toot_dict): |
|
|
|
|
flags = [] |
|
|
|
|
|
|
|
|
|
if toot_dict.get("sensitive", False): |
|
|
|
|
flags.append(toot_dict["visibility"]) |
|
|
|
|
|
|
|
|
|
if toot_dict["sensitive"]: |
|
|
|
|
flags.append("sensitive") |
|
|
|
|
|
|
|
|
|
if toot_dict.get("poll", False): |
|
|
|
|
if toot_dict["poll"]: |
|
|
|
|
flags.append("poll") |
|
|
|
|
|
|
|
|
|
if toot_dict.get("reblog", False): |
|
|
|
|
flags.append("reblog") |
|
|
|
|
|
|
|
|
|
if toot_dict.get("reblogged", False): |
|
|
|
|
flags.append("reblogged") |
|
|
|
|
|
|
|
|
|
if toot_dict.get("favourited", False): |
|
|
|
|
flags.append("favourited") |
|
|
|
|
|
|
|
|
|
if toot_dict.get("bookmarked", False): |
|
|
|
|
flags.append("bookmarked") |
|
|
|
|
|
|
|
|
|
if toot_dict.get("pinned", False): |
|
|
|
|
flags.append("pinned") |
|
|
|
|
|
|
|
|
|
flags = ", ".join(flags) |
|
|
|
|
|
|
|
|
|
headers = {} |
|
|
|
|
|
|
|
|
|
if toot_dict.get("account") and toot_dict["account"].get("acct"): |
|
|
|
|
headers["From"] = toot_dict["account"]["acct"] |
|
|
|
|
|
|
|
|
|
if toot_dict.get("created_at"): |
|
|
|
|
headers["Date"] = toot_dict["created_at"] |
|
|
|
|
|
|
|
|
|
if toot_dict.get("visibility"): |
|
|
|
|
headers["X-Visibility"] = toot_dict["visibility"] |
|
|
|
|
|
|
|
|
|
if len(toot_dict["media_attachments"]) > 0: |
|
|
|
|
flags.append("attachments") |
|
|
|
|
|
|
|
|
|
if len(flags) > 0: |
|
|
|
|
headers["X-Flags"] = flags |
|
|
|
|
time = [] |
|
|
|
|
now = datetime.now() |
|
|
|
|
time.append(SEASON[now.month]) |
|
|
|
|
time.append(TIME_OF_DAY[now.hour]) |
|
|
|
|
|
|
|
|
|
if toot_dict.get("spoiler_text"): |
|
|
|
|
headers = {} |
|
|
|
|
|
|
|
|
|
headers["From"] = toot_dict["account"]["acct"] |
|
|
|
|
headers["X-Flags"] = ", ".join(flags) |
|
|
|
|
headers["X-Time"] = ", ".join(time) |
|
|
|
|
if len(toot_dict["spoiler_text"]) > 0: |
|
|
|
|
headers["Subject"] = toot_dict["spoiler_text"] |
|
|
|
|
|
|
|
|
|
if toot_dict.get("replies_count", 0) > 0: |
|
|
|
|
headers["X-Replies-Count"] = toot_dict["replies_count"] |
|
|
|
|
|
|
|
|
|
if len(toot_dict.get("media_attachments", [])) > 0: |
|
|
|
|
headers["X-Attachments-Count"] = len(toot_dict["media_attachments"]) |
|
|
|
|
|
|
|
|
|
if toot_dict.get("reblogs_count", 0) > 0: |
|
|
|
|
headers["X-Reblogs-Count"] = toot_dict["reblogs_count"] |
|
|
|
|
|
|
|
|
|
if toot_dict.get("favourites_count", 0) > 0: |
|
|
|
|
headers["X-Favourites-Count"] = toot_dict["favourites_count"] |
|
|
|
|
|
|
|
|
|
if toot_dict.get("content") and len(toot_dict["content"]) > 0: |
|
|
|
|
body = toot_dict["content"] |
|
|
|
|
else: |
|
|
|
|
body = None |
|
|
|
|
body = toot_dict["content"] |
|
|
|
|
|
|
|
|
|
return bogofilter.Mail(headers = headers, body = body) |
|
|
|
|
|
|
|
|
|
with open("config.json") as json_file: |
|
|
|
|
bot = Bot(CringeBotClient, json.load(json_file)) |
|
|
|
|
bot = Bot(CringeBotClient, toml.load("config.toml")) |
|
|
|
|
#with open("config.json") as json_file: |
|
|
|
|
# bot = Bot(CringeBotClient, json.load(json_file)) |
|
|
|
|
bot.start() |
|
|
|
|
|
|
|
|
|
while True: |
|
|
|
|