Change to TOML config format, add time context bogofilter tokens

master
Thor 3 years ago
parent 5e3f2deb75
commit 08913045b2
  1. 2
      .gitignore
  2. 40
      config.toml.example
  3. 198
      cringebot.py

2
.gitignore vendored

@ -1,4 +1,4 @@
config.json config.toml
wordlist.db wordlist.db
__pycache__ __pycache__
.* .*

@ -0,0 +1,40 @@
name = "cringebot"
[defaults]
# Name of the application as listed in the account settings on the server
app_name = "Cringebot"
# Minimum seconds between requests to the server
rate_limit = 3
# Seconds between retrying failed requests to the server
retry_rate = 60
# Seconds between polling the server for updates
poll_interval = 15
# Minutes until cringe statuses are deleted
max_age = 90
[clients]
# By default, the client name is used as the hostname of the server and
# the base name of the files associated with the client, unless overridden
# below.
[clients."mastodon.social"]
# The URL of the server that the client connects to
#base_url = "https://mastodon.social"
# Where to store the authorisation key for the client
#client_file = "secret/mastodon.social.client"
# Where to store the authorisation key for the user account
#user_file = "secret/mastodon.social.user"
# Where to store the persisted state of the client
#state_file = "state/mastodon.social.state"
# Whether or not to register (learn from) each categorised status
# Enabling this gives the bot more data to work with, but errors
# in categorisation will self-reinforce over time if not corrected.
#register = false

@ -7,11 +7,25 @@ import json
import bogofilter import bogofilter
import html2text import html2text
from collections import deque from collections import deque
import toml
from mastodon import Mastodon, MastodonNotFoundError from mastodon import Mastodon, MastodonNotFoundError
from bot import Bot, BotClient from bot import Bot, BotClient
SEASON = {
**{ i : "spring" for i in range(3, 6) },
**{ i : "summer" for i in range(6, 9) },
**{ i : "autumn" for i in range(9, 12) },
**{ i : "winter" for i in [12, 1, 2] }}
TIME_OF_DAY = {
**{ i : "night" for i in range(0, 4) },
**{ i : "early" for i in range(4, 8) },
**{ i : "morning" for i in range(8, 12) },
**{ i : "afternoon" for i in range(12, 18) },
**{ i : "evening" for i in range(18, 24) }}
class CringeBotClient(BotClient): class CringeBotClient(BotClient):
def __init__(self, bot, config): def __init__(self, bot, config):
# Initialise HTML-to-Markdown converter # Initialise HTML-to-Markdown converter
@ -21,15 +35,20 @@ class CringeBotClient(BotClient):
# Create scheduler for deferred deletion of posts # Create scheduler for deferred deletion of posts
self.deletion_scheduler = sched.scheduler(time.time, time.sleep) self.deletion_scheduler = sched.scheduler(time.time, time.sleep)
super().__init__(bot, config) super().__init__(bot, {"register": False, **config})
# Send DM reply to message, appropriately tagged, and schedules it for deferred deletion # Send DM reply to message, appropriately tagged, and schedules it for deferred deletion
def respond(self, status, message): def respond(self, status, message, context):
self.log("Responding with:") self.log("Responding with:")
self.log(message) self.log(message)
reply = self.api.status_reply(status, "{}\n{}".format(message, self.config["tag"]), visibility = "direct", untag = True) self.log()
reply = self.api.status_reply(status, message, visibility = "direct", untag = True)
self.state["own"][reply["id"]] = context
self.enqueue_deletion(reply["id"]) self.enqueue_deletion(reply["id"])
time.sleep(1)
time.sleep(self.config["rate_limit"])
def on_start(self): def on_start(self):
self.deletion_report() self.deletion_report()
@ -58,101 +77,97 @@ class CringeBotClient(BotClient):
self.state["based"].discard(status_id) self.state["based"].discard(status_id)
self.state["unsure"].discard(status_id) self.state["unsure"].discard(status_id)
# Look for and process commands in the replies to the bot's notifications and return True if commands were processed # Look for replies to the bot and return True if commands were processed
def process_commands(self, status): def process_commands(self, status):
# Check if status is a reply to another status status_id = status["id"]
replied_id = status.get("in_reply_to_id", None) parent_id = status["in_reply_to_id"]
if not replied_id:
return False
try:
# Fetch replied-to status
replied_status = self.api.status(replied_id)
replied_tokens = self.h2t.handle(replied_status["content"]).split()
# Check if it belongs to the bot # Check if bot owns the parent status
if not self.config["tag"] in replied_tokens: if parent_id not in self.state["own"]:
return False return False
status_id = status["id"] context = self.state["own"][parent_id]
event = context["event"]
# Enqueue user command status for deletion target_id = context["target"]
self.enqueue_deletion(status_id)
# Find the intended target of the command (the status that the bot originally replied to with a classification) # Enqueue command status for deletion
target_status_id = replied_status.get("in_reply_to_id", None) self.enqueue_deletion(status_id)
if not target_status_id:
self.respond(status, "Target status is missing")
return True
try: try:
command = self.h2t.handle(status["content"]).strip() command = self.h2t.handle(status["content"]).strip()
self.log("Received command: {}".format(command)) self.log("Received command: {}".format(command))
# Fetch the target status # Fetch the target status
target_status = self.api.status(target_status_id) target_status = self.api.status(target_id)
target_mail_text = toot_dict_to_mail(target_status).format() target_mail_text = toot_dict_to_mail(target_status).format()
# Check if target status was previously classified # Check if target status was previously classified
was_cringe = target_status_id in self.state["cringe"] was_cringe = target_id in self.state["cringe"]
was_based = target_status_id in self.state["based"] was_based = target_id in self.state["based"]
was_unsure = target_status_id in self.state["unsure"] was_unsure = target_id in self.state["unsure"]
tokens = deque(command.split()) tokens = deque(command.split())
while True: while True:
token = tokens.popleft() token = tokens.popleft()
if token == "cringe": if token == "cringe":
if was_cringe: if event not in ["categorise", "learn"]:
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
return True
elif was_cringe:
break break
elif was_based: elif was_based:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM]) bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM])
else: else:
bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM]) bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM])
self.set_cringe(target_status_id) self.set_cringe(target_id)
self.enqueue_deletion(target_status_id) self.enqueue_deletion(target_id)
self.respond(status, "Learned as cringe") self.respond(status, "Learned as cringe", {"event": "learn", "target": target_id})
break break
elif token == "based": elif token == "based":
if was_based: if event not in ["categorise", "learn"]:
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
return True
elif was_based:
break break
elif was_cringe: elif was_cringe:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM]) bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM])
else: else:
bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM]) bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM])
self.set_based(target_status_id) self.set_based(target_id)
self.unqueue_deletion(target_status_id) self.unqueue_deletion(target_id)
self.respond(status, "Learned as based") self.respond(status, "Learned as based", {"event": "learn", "target": target_id})
break break
elif token == "unlearn": elif token == "unlearn":
if was_unsure: if event not in ["categorise", "learn"]:
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
return True
elif was_unsure:
break break
elif was_cringe: elif was_cringe:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM]) bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM])
elif was_based: elif was_based:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM]) bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM])
self.set_unsure(target_status_id) self.set_unsure(target_id)
self.unqueue_deletion(target_status_id) self.unqueue_deletion(target_id)
self.respond(status, "Unlearned") self.respond(status, "Unlearned", {"event": "learn", "target": target_id})
break break
except IndexError: except IndexError:
self.respond(status, "Invalid command") self.respond(status, "Invalid command", {"event": "error", "type": "syntax", "target": target_id})
except MastodonNotFoundError: except MastodonNotFoundError:
self.respond(status, "Target status is missing") self.respond(status, "Could not fetch target status", {"event": "error", "type": "fetch", "target": target_id})
return True return True
except MastodonNotFoundError:
return False
def on_status(self, status): def on_status(self, status):
# Ignore statuses from other accounts # Ignore statuses from other accounts
if status["account"]["id"] != self.api.me()["id"]: if status["account"]["id"] != self.api.me()["id"]:
@ -162,17 +177,17 @@ class CringeBotClient(BotClient):
if status["reblog"]: if status["reblog"]:
return return
# Extract plain text status_id = status["id"]
md_text = self.h2t.handle(status["content"])
# Ignore bot's own statuses # Ignore bot's own statuses
if self.config["tag"] in md_text.split(): if status_id in self.state["own"]:
return return
# Create faux HTML email of status # Create faux HTML email of status
mail_text = toot_dict_to_mail(status).format() mail_text = toot_dict_to_mail(status).format()
# Format and log plain-text preview # Format and log plain-text preview
md_text = self.h2t.handle(status["content"])
preview = toot_dict_to_mail(status) preview = toot_dict_to_mail(status)
preview.body = md_text preview.body = md_text
preview_text = preview.format() preview_text = preview.format()
@ -185,22 +200,24 @@ class CringeBotClient(BotClient):
if self.process_commands(status): if self.process_commands(status):
return return
result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER]) result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER] if self.config["register"] else [bogofilter.CLASSIFY])
bogo_report = "Bogofilter: Category={}, Score={}".format(result.category, "{:.4f}".format(result.score)) bogo_report = "Bogofilter: Category={}, Score={}".format(result.category, "{:.4f}".format(result.score))
status_id = status["id"]
if result.category == bogofilter.SPAM: if result.category == bogofilter.SPAM:
self.log("CRINGE: Enqueuing status {} for deletion".format(status_id)) self.log("CRINGE: Enqueuing status {} for deletion".format(status_id))
if self.config["register"]:
self.set_cringe(status_id) self.set_cringe(status_id)
self.enqueue_deletion(status_id) self.enqueue_deletion(status_id)
self.respond(status, "Categorised as cringe\n{}".format(bogo_report)) self.respond(status, "Categorised as cringe\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
elif result.category == bogofilter.HAM: elif result.category == bogofilter.HAM:
self.log("BASED: Not enqueueing status {} for deletion".format(status_id)) self.log("BASED: Not enqueueing status {} for deletion".format(status_id))
if self.config["register"]:
self.set_based(status_id) self.set_based(status_id)
self.respond(status, "Categorised as based\n{}".format(bogo_report)) self.respond(status, "Categorised as based\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
else: else:
self.log("UNSURE: Not enqueueing status {} for deletion".format(status_id)) self.log("UNSURE: Not enqueueing status {} for deletion".format(status_id))
if self.config["register"]:
self.set_unsure(status_id) self.set_unsure(status_id)
self.respond(status, "Categorised as unsure\n{}".format(bogo_report)) self.respond(status, "Categorised as unsure\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
def on_load_state(self): def on_load_state(self):
state = { state = {
@ -208,6 +225,7 @@ class CringeBotClient(BotClient):
"cringe": [], "cringe": [],
"based": [], "based": [],
"unsure": [], "unsure": [],
"own": {},
**super().on_load_state()} **super().on_load_state()}
state["cringe"] = set(state["cringe"]) state["cringe"] = set(state["cringe"])
@ -233,8 +251,10 @@ class CringeBotClient(BotClient):
def deletion_report(self): def deletion_report(self):
self.log("{} status(es) queued for deletion".format(len(self.deletion_scheduler.queue))) self.log("{} status(es) queued for deletion".format(len(self.deletion_scheduler.queue)))
def enqueue_deletion(self, status_id): def enqueue_deletion(self, status_id, delay = None):
self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(60 * self.config["max_age"], 1, self.queued_delete, argument=(status_id,), kwargs={})} if delay is None:
delay = 60 * self.config["max_age"]
self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(delay, 1, self.queued_delete, argument=(status_id,), kwargs={})}
self.deletion_report() self.deletion_report()
def unqueue_deletion(self, status_id): def unqueue_deletion(self, status_id):
@ -249,77 +269,51 @@ class CringeBotClient(BotClient):
def queued_delete(self, status_id): def queued_delete(self, status_id):
try: try:
self.log("Deleting status {}".format(status_id)) self.log("Deleting status {}".format(status_id))
self.set_discard(status_id)
self.api.status_delete(status_id) self.api.status_delete(status_id)
self.set_discard(status_id)
if status_id in self.state["own"]:
del self.state["own"][status_id]
except MastodonNotFoundError: except MastodonNotFoundError:
self.log("Cannot find status {} on server".format(status_id)) self.log("Cannot find status {} on server".format(status_id))
self.set_discard(status_id)
except Exception: except Exception:
self.log(traceback.format_exc()) self.log(traceback.format_exc())
self.enqueue_deletion(status_id, 300)
def toot_dict_to_mail(toot_dict): def toot_dict_to_mail(toot_dict):
flags = [] flags = []
if toot_dict.get("sensitive", False): flags.append(toot_dict["visibility"])
if toot_dict["sensitive"]:
flags.append("sensitive") flags.append("sensitive")
if toot_dict.get("poll", False): if toot_dict["poll"]:
flags.append("poll") flags.append("poll")
if toot_dict.get("reblog", False): if len(toot_dict["media_attachments"]) > 0:
flags.append("reblog") flags.append("attachments")
if toot_dict.get("reblogged", False):
flags.append("reblogged")
if toot_dict.get("favourited", False):
flags.append("favourited")
if toot_dict.get("bookmarked", False): time = []
flags.append("bookmarked") now = datetime.now()
time.append(SEASON[now.month])
if toot_dict.get("pinned", False): time.append(TIME_OF_DAY[now.hour])
flags.append("pinned")
flags = ", ".join(flags)
headers = {} headers = {}
if toot_dict.get("account") and toot_dict["account"].get("acct"):
headers["From"] = toot_dict["account"]["acct"] headers["From"] = toot_dict["account"]["acct"]
headers["X-Flags"] = ", ".join(flags)
if toot_dict.get("created_at"): headers["X-Time"] = ", ".join(time)
headers["Date"] = toot_dict["created_at"] if len(toot_dict["spoiler_text"]) > 0:
if toot_dict.get("visibility"):
headers["X-Visibility"] = toot_dict["visibility"]
if len(flags) > 0:
headers["X-Flags"] = flags
if toot_dict.get("spoiler_text"):
headers["Subject"] = toot_dict["spoiler_text"] headers["Subject"] = toot_dict["spoiler_text"]
if toot_dict.get("replies_count", 0) > 0:
headers["X-Replies-Count"] = toot_dict["replies_count"]
if len(toot_dict.get("media_attachments", [])) > 0:
headers["X-Attachments-Count"] = len(toot_dict["media_attachments"])
if toot_dict.get("reblogs_count", 0) > 0:
headers["X-Reblogs-Count"] = toot_dict["reblogs_count"]
if toot_dict.get("favourites_count", 0) > 0:
headers["X-Favourites-Count"] = toot_dict["favourites_count"]
if toot_dict.get("content") and len(toot_dict["content"]) > 0:
body = toot_dict["content"] body = toot_dict["content"]
else:
body = None
return bogofilter.Mail(headers = headers, body = body) return bogofilter.Mail(headers = headers, body = body)
with open("config.json") as json_file: bot = Bot(CringeBotClient, toml.load("config.toml"))
bot = Bot(CringeBotClient, json.load(json_file)) #with open("config.json") as json_file:
# bot = Bot(CringeBotClient, json.load(json_file))
bot.start() bot.start()
while True: while True:

Loading…
Cancel
Save