Change to TOML config format, add time context bogofilter tokens

master
Thor 3 years ago
parent 5e3f2deb75
commit 08913045b2
  1. 2
      .gitignore
  2. 40
      config.toml.example
  3. 302
      cringebot.py

2
.gitignore vendored

@ -1,4 +1,4 @@
config.json
config.toml
wordlist.db
__pycache__
.*

@ -0,0 +1,40 @@
name = "cringebot"
[defaults]
# Name of the application as listed in the account settings on the server
app_name = "Cringebot"
# Minimum seconds between requests to the server
rate_limit = 3
# Seconds between retrying failed requests to the server
retry_rate = 60
# Seconds between polling the server for updates
poll_interval = 15
# Minutes until cringe statuses are deleted
max_age = 90
[clients]
# By default, the client name is used as the hostname of the server and
# the base name of the files associated with the client, unless overridden
# below.
[clients."mastodon.social"]
# The URL of the server that the client connects to
#base_url = "https://mastodon.social"
# Where to store the authorisation key for the client
#client_file = "secret/mastodon.social.client"
# Where to store the authorisation key for the user account
#user_file = "secret/mastodon.social.user"
# Where to store the persisted state of the client
#state_file = "state/mastodon.social.state"
# Whether or not to register (learn from) each categorised status
# Enabling this gives the bot more data to work with, but errors
# in categorisation will self-reinforce over time if not corrected.
#register = false

@ -7,11 +7,25 @@ import json
import bogofilter
import html2text
from collections import deque
import toml
from mastodon import Mastodon, MastodonNotFoundError
from bot import Bot, BotClient
SEASON = {
**{ i : "spring" for i in range(3, 6) },
**{ i : "summer" for i in range(6, 9) },
**{ i : "autumn" for i in range(9, 12) },
**{ i : "winter" for i in [12, 1, 2] }}
TIME_OF_DAY = {
**{ i : "night" for i in range(0, 4) },
**{ i : "early" for i in range(4, 8) },
**{ i : "morning" for i in range(8, 12) },
**{ i : "afternoon" for i in range(12, 18) },
**{ i : "evening" for i in range(18, 24) }}
class CringeBotClient(BotClient):
def __init__(self, bot, config):
# Initialise HTML-to-Markdown converter
@ -21,15 +35,20 @@ class CringeBotClient(BotClient):
# Create scheduler for deferred deletion of posts
self.deletion_scheduler = sched.scheduler(time.time, time.sleep)
super().__init__(bot, config)
super().__init__(bot, {"register": False, **config})
# Send DM reply to message, appropriately tagged, and schedules it for deferred deletion
def respond(self, status, message):
def respond(self, status, message, context):
self.log("Responding with:")
self.log(message)
reply = self.api.status_reply(status, "{}\n{}".format(message, self.config["tag"]), visibility = "direct", untag = True)
self.log()
reply = self.api.status_reply(status, message, visibility = "direct", untag = True)
self.state["own"][reply["id"]] = context
self.enqueue_deletion(reply["id"])
time.sleep(1)
time.sleep(self.config["rate_limit"])
def on_start(self):
self.deletion_report()
@ -58,121 +77,117 @@ class CringeBotClient(BotClient):
self.state["based"].discard(status_id)
self.state["unsure"].discard(status_id)
# Look for and process commands in the replies to the bot's notifications and return True if commands were processed
# Look for replies to the bot and return True if commands were processed
def process_commands(self, status):
# Check if status is a reply to another status
replied_id = status.get("in_reply_to_id", None)
if not replied_id:
return False
status_id = status["id"]
parent_id = status["in_reply_to_id"]
try:
# Fetch replied-to status
replied_status = self.api.status(replied_id)
replied_tokens = self.h2t.handle(replied_status["content"]).split()
# Check if bot owns the parent status
if parent_id not in self.state["own"]:
return False
# Check if it belongs to the bot
if not self.config["tag"] in replied_tokens:
return False
context = self.state["own"][parent_id]
event = context["event"]
status_id = status["id"]
target_id = context["target"]
# Enqueue user command status for deletion
self.enqueue_deletion(status_id)
# Enqueue command status for deletion
self.enqueue_deletion(status_id)
try:
command = self.h2t.handle(status["content"]).strip()
self.log("Received command: {}".format(command))
# Find the intended target of the command (the status that the bot originally replied to with a classification)
target_status_id = replied_status.get("in_reply_to_id", None)
if not target_status_id:
self.respond(status, "Target status is missing")
return True
try:
command = self.h2t.handle(status["content"]).strip()
self.log("Received command: {}".format(command))
# Fetch the target status
target_status = self.api.status(target_status_id)
target_mail_text = toot_dict_to_mail(target_status).format()
# Check if target status was previously classified
was_cringe = target_status_id in self.state["cringe"]
was_based = target_status_id in self.state["based"]
was_unsure = target_status_id in self.state["unsure"]
tokens = deque(command.split())
while True:
token = tokens.popleft()
if token == "cringe":
if was_cringe:
break
elif was_based:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM])
else:
bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM])
self.set_cringe(target_status_id)
self.enqueue_deletion(target_status_id)
self.respond(status, "Learned as cringe")
# Fetch the target status
target_status = self.api.status(target_id)
target_mail_text = toot_dict_to_mail(target_status).format()
# Check if target status was previously classified
was_cringe = target_id in self.state["cringe"]
was_based = target_id in self.state["based"]
was_unsure = target_id in self.state["unsure"]
tokens = deque(command.split())
while True:
token = tokens.popleft()
if token == "cringe":
if event not in ["categorise", "learn"]:
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
return True
elif was_cringe:
break
elif token == "based":
if was_based:
break
elif was_cringe:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM])
else:
bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM])
self.set_based(target_status_id)
self.unqueue_deletion(target_status_id)
self.respond(status, "Learned as based")
elif was_based:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM])
else:
bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM])
self.set_cringe(target_id)
self.enqueue_deletion(target_id)
self.respond(status, "Learned as cringe", {"event": "learn", "target": target_id})
break
elif token == "based":
if event not in ["categorise", "learn"]:
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
return True
elif was_based:
break
elif token == "unlearn":
if was_unsure:
break
elif was_cringe:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM])
elif was_based:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM])
self.set_unsure(target_status_id)
self.unqueue_deletion(target_status_id)
self.respond(status, "Unlearned")
elif was_cringe:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM])
else:
bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM])
self.set_based(target_id)
self.unqueue_deletion(target_id)
self.respond(status, "Learned as based", {"event": "learn", "target": target_id})
break
elif token == "unlearn":
if event not in ["categorise", "learn"]:
self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
return True
elif was_unsure:
break
elif was_cringe:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM])
elif was_based:
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM])
except IndexError:
self.respond(status, "Invalid command")
except MastodonNotFoundError:
self.respond(status, "Target status is missing")
self.set_unsure(target_id)
self.unqueue_deletion(target_id)
self.respond(status, "Unlearned", {"event": "learn", "target": target_id})
return True
break
except IndexError:
self.respond(status, "Invalid command", {"event": "error", "type": "syntax", "target": target_id})
except MastodonNotFoundError:
return False
self.respond(status, "Could not fetch target status", {"event": "error", "type": "fetch", "target": target_id})
return True
def on_status(self, status):
# Ignore statuses from other accounts
if status["account"]["id"] != self.api.me()["id"]:
return
# Ignore statuses this account boosts
if status["reblog"]:
return
status_id = status["id"]
# Extract plain text
md_text = self.h2t.handle(status["content"])
# Ignore bot's own statuses
if self.config["tag"] in md_text.split():
if status_id in self.state["own"]:
return
# Create faux HTML email of status
mail_text = toot_dict_to_mail(status).format()
# Format and log plain-text preview
md_text = self.h2t.handle(status["content"])
preview = toot_dict_to_mail(status)
preview.body = md_text
preview_text = preview.format()
@ -185,22 +200,24 @@ class CringeBotClient(BotClient):
if self.process_commands(status):
return
result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER])
result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER] if self.config["register"] else [bogofilter.CLASSIFY])
bogo_report = "Bogofilter: Category={}, Score={}".format(result.category, "{:.4f}".format(result.score))
status_id = status["id"]
if result.category == bogofilter.SPAM:
self.log("CRINGE: Enqueuing status {} for deletion".format(status_id))
self.set_cringe(status_id)
if self.config["register"]:
self.set_cringe(status_id)
self.enqueue_deletion(status_id)
self.respond(status, "Categorised as cringe\n{}".format(bogo_report))
self.respond(status, "Categorised as cringe\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
elif result.category == bogofilter.HAM:
self.log("BASED: Not enqueueing status {} for deletion".format(status_id))
self.set_based(status_id)
self.respond(status, "Categorised as based\n{}".format(bogo_report))
if self.config["register"]:
self.set_based(status_id)
self.respond(status, "Categorised as based\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
else:
self.log("UNSURE: Not enqueueing status {} for deletion".format(status_id))
self.set_unsure(status_id)
self.respond(status, "Categorised as unsure\n{}".format(bogo_report))
if self.config["register"]:
self.set_unsure(status_id)
self.respond(status, "Categorised as unsure\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
def on_load_state(self):
state = {
@ -208,6 +225,7 @@ class CringeBotClient(BotClient):
"cringe": [],
"based": [],
"unsure": [],
"own": {},
**super().on_load_state()}
state["cringe"] = set(state["cringe"])
@ -233,8 +251,10 @@ class CringeBotClient(BotClient):
def deletion_report(self):
self.log("{} status(es) queued for deletion".format(len(self.deletion_scheduler.queue)))
def enqueue_deletion(self, status_id):
self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(60 * self.config["max_age"], 1, self.queued_delete, argument=(status_id,), kwargs={})}
def enqueue_deletion(self, status_id, delay = None):
if delay is None:
delay = 60 * self.config["max_age"]
self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(delay, 1, self.queued_delete, argument=(status_id,), kwargs={})}
self.deletion_report()
def unqueue_deletion(self, status_id):
@ -249,77 +269,51 @@ class CringeBotClient(BotClient):
def queued_delete(self, status_id):
try:
self.log("Deleting status {}".format(status_id))
self.set_discard(status_id)
self.api.status_delete(status_id)
self.set_discard(status_id)
if status_id in self.state["own"]:
del self.state["own"][status_id]
except MastodonNotFoundError:
self.log("Cannot find status {} on server".format(status_id))
self.set_discard(status_id)
except Exception:
self.log(traceback.format_exc())
self.enqueue_deletion(status_id, 300)
def toot_dict_to_mail(toot_dict):
flags = []
if toot_dict.get("sensitive", False):
flags.append(toot_dict["visibility"])
if toot_dict["sensitive"]:
flags.append("sensitive")
if toot_dict.get("poll", False):
if toot_dict["poll"]:
flags.append("poll")
if toot_dict.get("reblog", False):
flags.append("reblog")
if toot_dict.get("reblogged", False):
flags.append("reblogged")
if toot_dict.get("favourited", False):
flags.append("favourited")
if toot_dict.get("bookmarked", False):
flags.append("bookmarked")
if toot_dict.get("pinned", False):
flags.append("pinned")
flags = ", ".join(flags)
headers = {}
if toot_dict.get("account") and toot_dict["account"].get("acct"):
headers["From"] = toot_dict["account"]["acct"]
if toot_dict.get("created_at"):
headers["Date"] = toot_dict["created_at"]
if toot_dict.get("visibility"):
headers["X-Visibility"] = toot_dict["visibility"]
if len(toot_dict["media_attachments"]) > 0:
flags.append("attachments")
if len(flags) > 0:
headers["X-Flags"] = flags
time = []
now = datetime.now()
time.append(SEASON[now.month])
time.append(TIME_OF_DAY[now.hour])
if toot_dict.get("spoiler_text"):
headers = {}
headers["From"] = toot_dict["account"]["acct"]
headers["X-Flags"] = ", ".join(flags)
headers["X-Time"] = ", ".join(time)
if len(toot_dict["spoiler_text"]) > 0:
headers["Subject"] = toot_dict["spoiler_text"]
if toot_dict.get("replies_count", 0) > 0:
headers["X-Replies-Count"] = toot_dict["replies_count"]
if len(toot_dict.get("media_attachments", [])) > 0:
headers["X-Attachments-Count"] = len(toot_dict["media_attachments"])
if toot_dict.get("reblogs_count", 0) > 0:
headers["X-Reblogs-Count"] = toot_dict["reblogs_count"]
if toot_dict.get("favourites_count", 0) > 0:
headers["X-Favourites-Count"] = toot_dict["favourites_count"]
if toot_dict.get("content") and len(toot_dict["content"]) > 0:
body = toot_dict["content"]
else:
body = None
body = toot_dict["content"]
return bogofilter.Mail(headers = headers, body = body)
with open("config.json") as json_file:
bot = Bot(CringeBotClient, json.load(json_file))
bot = Bot(CringeBotClient, toml.load("config.toml"))
#with open("config.json") as json_file:
# bot = Bot(CringeBotClient, json.load(json_file))
bot.start()
while True:

Loading…
Cancel
Save