Change to TOML config format, add time context bogofilter tokens

master
Thor 3 years ago
parent 5e3f2deb75
commit 08913045b2
  1. 2
      .gitignore
  2. 40
      config.toml.example
  3. 302
      cringebot.py

2
.gitignore vendored

@ -1,4 +1,4 @@
config.json config.toml
wordlist.db wordlist.db
__pycache__ __pycache__
.* .*

@ -0,0 +1,40 @@
name = "cringebot"
[defaults]
# Name of the application as listed in the account settings on the server
app_name = "Cringebot"
# Minimum seconds between requests to the server
rate_limit = 3
# Seconds between retrying failed requests to the server
retry_rate = 60
# Seconds between polling the server for updates
poll_interval = 15
# Minutes until cringe statuses are deleted
max_age = 90
[clients]
# By default, the client name is used as the hostname of the server and
# the base name of the files associated with the client, unless overridden
# below.
[clients."mastodon.social"]
# The URL of the server that the client connects to
#base_url = "https://mastodon.social"
# Where to store the authorisation key for the client
#client_file = "secret/mastodon.social.client"
# Where to store the authorisation key for the user account
#user_file = "secret/mastodon.social.user"
# Where to store the persisted state of the client
#state_file = "state/mastodon.social.state"
# Whether or not to register (learn from) each categorised status
# Enabling this gives the bot more data to work with, but errors
# in categorisation will self-reinforce over time if not corrected.
#register = false

@ -7,11 +7,25 @@ import json
import bogofilter import bogofilter
import html2text import html2text
from collections import deque from collections import deque
import toml
from mastodon import Mastodon, MastodonNotFoundError from mastodon import Mastodon, MastodonNotFoundError
from bot import Bot, BotClient from bot import Bot, BotClient
SEASON = {
**{ i : "spring" for i in range(3, 6) },
**{ i : "summer" for i in range(6, 9) },
**{ i : "autumn" for i in range(9, 12) },
**{ i : "winter" for i in [12, 1, 2] }}
TIME_OF_DAY = {
**{ i : "night" for i in range(0, 4) },
**{ i : "early" for i in range(4, 8) },
**{ i : "morning" for i in range(8, 12) },
**{ i : "afternoon" for i in range(12, 18) },
**{ i : "evening" for i in range(18, 24) }}
class CringeBotClient(BotClient): class CringeBotClient(BotClient):
def __init__(self, bot, config): def __init__(self, bot, config):
# Initialise HTML-to-Markdown converter # Initialise HTML-to-Markdown converter
@ -21,15 +35,20 @@ class CringeBotClient(BotClient):
# Create scheduler for deferred deletion of posts # Create scheduler for deferred deletion of posts
self.deletion_scheduler = sched.scheduler(time.time, time.sleep) self.deletion_scheduler = sched.scheduler(time.time, time.sleep)
super().__init__(bot, config) super().__init__(bot, {"register": False, **config})
# Send DM reply to message, appropriately tagged, and schedules it for deferred deletion # Send DM reply to message, appropriately tagged, and schedules it for deferred deletion
def respond(self, status, message): def respond(self, status, message, context):
self.log("Responding with:") self.log("Responding with:")
self.log(message) self.log(message)
reply = self.api.status_reply(status, "{}\n{}".format(message, self.config["tag"]), visibility = "direct", untag = True) self.log()
reply = self.api.status_reply(status, message, visibility = "direct", untag = True)
self.state["own"][reply["id"]] = context
self.enqueue_deletion(reply["id"]) self.enqueue_deletion(reply["id"])
time.sleep(1)
time.sleep(self.config["rate_limit"])
def on_start(self): def on_start(self):
self.deletion_report() self.deletion_report()
@ -58,121 +77,117 @@ class CringeBotClient(BotClient):
self.state["based"].discard(status_id) self.state["based"].discard(status_id)
self.state["unsure"].discard(status_id) self.state["unsure"].discard(status_id)
# Look for and process commands in the replies to the bot's notifications and return True if commands were processed # Look for replies to the bot and return True if commands were processed
def process_commands(self, status): def process_commands(self, status):
# Check if status is a reply to another status status_id = status["id"]
replied_id = status.get("in_reply_to_id", None) parent_id = status["in_reply_to_id"]
if not replied_id:
return False
try: # Check if bot owns the parent status
# Fetch replied-to status if parent_id not in self.state["own"]:
replied_status = self.api.status(replied_id) return False
replied_tokens = self.h2t.handle(replied_status["content"]).split()
# Check if it belongs to the bot context = self.state["own"][parent_id]
if not self.config["tag"] in replied_tokens: event = context["event"]
return False
status_id = status["id"] target_id = context["target"]
# Enqueue user command status for deletion # Enqueue command status for deletion
self.enqueue_deletion(status_id) self.enqueue_deletion(status_id)
try:
command = self.h2t.handle(status["content"]).strip()
self.log("Received command: {}".format(command))
# Find the intended target of the command (the status that the bot originally replied to with a classification) # Fetch the target status
target_status_id = replied_status.get("in_reply_to_id", None) target_status = self.api.status(target_id)
if not target_status_id: target_mail_text = toot_dict_to_mail(target_status).format()
self.respond(status, "Target status is missing")
return True # Check if target status was previously classified
was_cringe = target_id in self.state["cringe"]
try: was_based = target_id in self.state["based"]
command = self.h2t.handle(status["content"]).strip() was_unsure = target_id in self.state["unsure"]
self.log("Received command: {}".format(command))
tokens = deque(command.split())
# Fetch the target status while True:
target_status = self.api.status(target_status_id) token = tokens.popleft()
target_mail_text = toot_dict_to_mail(target_status).format() if token == "cringe":
if event not in ["categorise", "learn"]:
# Check if target status was previously classified self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
was_cringe = target_status_id in self.state["cringe"] return True
was_based = target_status_id in self.state["based"] elif was_cringe:
was_unsure = target_status_id in self.state["unsure"]
tokens = deque(command.split())
while True:
token = tokens.popleft()
if token == "cringe":
if was_cringe:
break
elif was_based:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM])
else:
bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM])
self.set_cringe(target_status_id)
self.enqueue_deletion(target_status_id)
self.respond(status, "Learned as cringe")
break break
elif was_based:
elif token == "based": bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM])
if was_based: else:
break bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM])
elif was_cringe:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM]) self.set_cringe(target_id)
else: self.enqueue_deletion(target_id)
bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM]) self.respond(status, "Learned as cringe", {"event": "learn", "target": target_id})
self.set_based(target_status_id) break
self.unqueue_deletion(target_status_id)
self.respond(status, "Learned as based") elif token == "based":
if event not in ["categorise", "learn"]:
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
return True
elif was_based:
break break
elif was_cringe:
elif token == "unlearn": bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM])
if was_unsure: else:
break bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM])
elif was_cringe:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM]) self.set_based(target_id)
elif was_based: self.unqueue_deletion(target_id)
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM]) self.respond(status, "Learned as based", {"event": "learn", "target": target_id})
self.set_unsure(target_status_id) break
self.unqueue_deletion(target_status_id)
self.respond(status, "Unlearned") elif token == "unlearn":
if event not in ["categorise", "learn"]:
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
return True
elif was_unsure:
break break
elif was_cringe:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM])
elif was_based:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM])
except IndexError: self.set_unsure(target_id)
self.respond(status, "Invalid command") self.unqueue_deletion(target_id)
except MastodonNotFoundError: self.respond(status, "Unlearned", {"event": "learn", "target": target_id})
self.respond(status, "Target status is missing")
return True break
except IndexError:
self.respond(status, "Invalid command", {"event": "error", "type": "syntax", "target": target_id})
except MastodonNotFoundError: except MastodonNotFoundError:
return False self.respond(status, "Could not fetch target status", {"event": "error", "type": "fetch", "target": target_id})
return True
def on_status(self, status): def on_status(self, status):
# Ignore statuses from other accounts # Ignore statuses from other accounts
if status["account"]["id"] != self.api.me()["id"]: if status["account"]["id"] != self.api.me()["id"]:
return return
# Ignore statuses this account boosts # Ignore statuses this account boosts
if status["reblog"]: if status["reblog"]:
return return
status_id = status["id"]
# Extract plain text
md_text = self.h2t.handle(status["content"])
# Ignore bot's own statuses # Ignore bot's own statuses
if self.config["tag"] in md_text.split(): if status_id in self.state["own"]:
return return
# Create faux HTML email of status # Create faux HTML email of status
mail_text = toot_dict_to_mail(status).format() mail_text = toot_dict_to_mail(status).format()
# Format and log plain-text preview # Format and log plain-text preview
md_text = self.h2t.handle(status["content"])
preview = toot_dict_to_mail(status) preview = toot_dict_to_mail(status)
preview.body = md_text preview.body = md_text
preview_text = preview.format() preview_text = preview.format()
@ -185,22 +200,24 @@ class CringeBotClient(BotClient):
if self.process_commands(status): if self.process_commands(status):
return return
result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER]) result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER] if self.config["register"] else [bogofilter.CLASSIFY])
bogo_report = "Bogofilter: Category={}, Score={}".format(result.category, "{:.4f}".format(result.score)) bogo_report = "Bogofilter: Category={}, Score={}".format(result.category, "{:.4f}".format(result.score))
status_id = status["id"]
if result.category == bogofilter.SPAM: if result.category == bogofilter.SPAM:
self.log("CRINGE: Enqueuing status {} for deletion".format(status_id)) self.log("CRINGE: Enqueuing status {} for deletion".format(status_id))
self.set_cringe(status_id) if self.config["register"]:
self.set_cringe(status_id)
self.enqueue_deletion(status_id) self.enqueue_deletion(status_id)
self.respond(status, "Categorised as cringe\n{}".format(bogo_report)) self.respond(status, "Categorised as cringe\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
elif result.category == bogofilter.HAM: elif result.category == bogofilter.HAM:
self.log("BASED: Not enqueueing status {} for deletion".format(status_id)) self.log("BASED: Not enqueueing status {} for deletion".format(status_id))
self.set_based(status_id) if self.config["register"]:
self.respond(status, "Categorised as based\n{}".format(bogo_report)) self.set_based(status_id)
self.respond(status, "Categorised as based\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
else: else:
self.log("UNSURE: Not enqueueing status {} for deletion".format(status_id)) self.log("UNSURE: Not enqueueing status {} for deletion".format(status_id))
self.set_unsure(status_id) if self.config["register"]:
self.respond(status, "Categorised as unsure\n{}".format(bogo_report)) self.set_unsure(status_id)
self.respond(status, "Categorised as unsure\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
def on_load_state(self): def on_load_state(self):
state = { state = {
@ -208,6 +225,7 @@ class CringeBotClient(BotClient):
"cringe": [], "cringe": [],
"based": [], "based": [],
"unsure": [], "unsure": [],
"own": {},
**super().on_load_state()} **super().on_load_state()}
state["cringe"] = set(state["cringe"]) state["cringe"] = set(state["cringe"])
@ -233,8 +251,10 @@ class CringeBotClient(BotClient):
def deletion_report(self): def deletion_report(self):
self.log("{} status(es) queued for deletion".format(len(self.deletion_scheduler.queue))) self.log("{} status(es) queued for deletion".format(len(self.deletion_scheduler.queue)))
def enqueue_deletion(self, status_id): def enqueue_deletion(self, status_id, delay = None):
self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(60 * self.config["max_age"], 1, self.queued_delete, argument=(status_id,), kwargs={})} if delay is None:
delay = 60 * self.config["max_age"]
self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(delay, 1, self.queued_delete, argument=(status_id,), kwargs={})}
self.deletion_report() self.deletion_report()
def unqueue_deletion(self, status_id): def unqueue_deletion(self, status_id):
@ -249,77 +269,51 @@ class CringeBotClient(BotClient):
def queued_delete(self, status_id): def queued_delete(self, status_id):
try: try:
self.log("Deleting status {}".format(status_id)) self.log("Deleting status {}".format(status_id))
self.set_discard(status_id)
self.api.status_delete(status_id) self.api.status_delete(status_id)
self.set_discard(status_id)
if status_id in self.state["own"]:
del self.state["own"][status_id]
except MastodonNotFoundError: except MastodonNotFoundError:
self.log("Cannot find status {} on server".format(status_id)) self.log("Cannot find status {} on server".format(status_id))
self.set_discard(status_id)
except Exception: except Exception:
self.log(traceback.format_exc()) self.log(traceback.format_exc())
self.enqueue_deletion(status_id, 300)
def toot_dict_to_mail(toot_dict): def toot_dict_to_mail(toot_dict):
flags = [] flags = []
if toot_dict.get("sensitive", False): flags.append(toot_dict["visibility"])
if toot_dict["sensitive"]:
flags.append("sensitive") flags.append("sensitive")
if toot_dict.get("poll", False): if toot_dict["poll"]:
flags.append("poll") flags.append("poll")
if toot_dict.get("reblog", False): if len(toot_dict["media_attachments"]) > 0:
flags.append("reblog") flags.append("attachments")
if toot_dict.get("reblogged", False):
flags.append("reblogged")
if toot_dict.get("favourited", False):
flags.append("favourited")
if toot_dict.get("bookmarked", False):
flags.append("bookmarked")
if toot_dict.get("pinned", False):
flags.append("pinned")
flags = ", ".join(flags)
headers = {}
if toot_dict.get("account") and toot_dict["account"].get("acct"):
headers["From"] = toot_dict["account"]["acct"]
if toot_dict.get("created_at"):
headers["Date"] = toot_dict["created_at"]
if toot_dict.get("visibility"):
headers["X-Visibility"] = toot_dict["visibility"]
if len(flags) > 0: time = []
headers["X-Flags"] = flags now = datetime.now()
time.append(SEASON[now.month])
time.append(TIME_OF_DAY[now.hour])
if toot_dict.get("spoiler_text"): headers = {}
headers["From"] = toot_dict["account"]["acct"]
headers["X-Flags"] = ", ".join(flags)
headers["X-Time"] = ", ".join(time)
if len(toot_dict["spoiler_text"]) > 0:
headers["Subject"] = toot_dict["spoiler_text"] headers["Subject"] = toot_dict["spoiler_text"]
if toot_dict.get("replies_count", 0) > 0: body = toot_dict["content"]
headers["X-Replies-Count"] = toot_dict["replies_count"]
if len(toot_dict.get("media_attachments", [])) > 0:
headers["X-Attachments-Count"] = len(toot_dict["media_attachments"])
if toot_dict.get("reblogs_count", 0) > 0:
headers["X-Reblogs-Count"] = toot_dict["reblogs_count"]
if toot_dict.get("favourites_count", 0) > 0:
headers["X-Favourites-Count"] = toot_dict["favourites_count"]
if toot_dict.get("content") and len(toot_dict["content"]) > 0:
body = toot_dict["content"]
else:
body = None
return bogofilter.Mail(headers = headers, body = body) return bogofilter.Mail(headers = headers, body = body)
with open("config.json") as json_file: bot = Bot(CringeBotClient, toml.load("config.toml"))
bot = Bot(CringeBotClient, json.load(json_file)) #with open("config.json") as json_file:
# bot = Bot(CringeBotClient, json.load(json_file))
bot.start() bot.start()
while True: while True:

Loading…
Cancel
Save