Change to TOML config format, add time context bogofilter tokens

3 years ago · 08913045b2
parent 5e3f2deb75
commit 08913045b2
3 changed files with 189 additions and 155 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,4 @@
-config.json
+config.toml
 wordlist.db
 __pycache__
 .*
--- a/config.toml.example
+++ b/config.toml.example
@ -0,0 +1,40 @@
+name = "cringebot"
+
+[defaults]
+# Name of the application as listed in the account settings on the server
+app_name = "Cringebot"
+
+# Minimum seconds between requests to the server
+rate_limit = 3
+
+# Seconds between retrying failed requests to the server
+retry_rate = 60
+
+# Seconds between polling the server for updates
+poll_interval = 15
+
+# Minutes until cringe statuses are deleted
+max_age = 90
+
+[clients]
+    # By default, the client name is used as the hostname of the server and
+    # the base name of the files associated with the client, unless overridden
+    # below.
+    [clients."mastodon.social"]
+
+    # The URL of the server that the client connects to 
+    #base_url = "https://mastodon.social"
+
+    # Where to store the authorisation key for the client
+    #client_file = "secret/mastodon.social.client"
+    
+    # Where to store the authorisation key for the user account
+    #user_file = "secret/mastodon.social.user"
+
+    # Where to store the persisted state of the client
+    #state_file = "state/mastodon.social.state"
+
+    # Whether or not to register (learn from) each categorised status
+    # Enabling this gives the bot more data to work with, but errors
+    # in categorisation will self-reinforce over time if not corrected.
+    #register = false
--- a/cringebot.py
+++ b/cringebot.py
@ -7,11 +7,25 @@ import json
 import bogofilter
 import html2text
 from collections import deque
+import toml

 from mastodon import Mastodon, MastodonNotFoundError

 from bot import Bot, BotClient

+SEASON = {
+    **{ i : "spring" for i in range(3, 6) },
+    **{ i : "summer" for i in range(6, 9) },
+    **{ i : "autumn" for i in range(9, 12) },
+    **{ i : "winter" for i in [12, 1, 2] }}
+
+TIME_OF_DAY = {
+    **{ i : "night" for i in range(0, 4) },
+    **{ i : "early" for i in range(4, 8) },
+    **{ i : "morning" for i in range(8, 12) },
+    **{ i : "afternoon" for i in range(12, 18) },
+    **{ i : "evening" for i in range(18, 24) }}
+
 class CringeBotClient(BotClient):
    def __init__(self, bot, config):
        # Initialise HTML-to-Markdown converter
@ -21,15 +35,20 @@ class CringeBotClient(BotClient):
        # Create scheduler for deferred deletion of posts
        self.deletion_scheduler = sched.scheduler(time.time, time.sleep)
       
-        super().__init__(bot, config)
+        super().__init__(bot, {"register": False, **config})
    
    # Send DM reply to message, appropriately tagged, and schedules it for deferred deletion
-    def respond(self, status, message):
+    def respond(self, status, message, context):
        self.log("Responding with:")
        self.log(message)
-        reply = self.api.status_reply(status, "{}\n{}".format(message, self.config["tag"]), visibility = "direct", untag = True)
+        self.log()
+
+        reply = self.api.status_reply(status, message, visibility = "direct", untag = True)
+        
+        self.state["own"][reply["id"]] = context
        self.enqueue_deletion(reply["id"])
-        time.sleep(1)
+        
+        time.sleep(self.config["rate_limit"])

    def on_start(self):
        self.deletion_report()
@ -58,121 +77,117 @@ class CringeBotClient(BotClient):
        self.state["based"].discard(status_id)
        self.state["unsure"].discard(status_id)

-    # Look for and process commands in the replies to the bot's notifications and return True if commands were processed
+    # Look for replies to the bot and return True if commands were processed
    def process_commands(self, status):
-        # Check if status is a reply to another status
-        replied_id = status.get("in_reply_to_id", None)
-        if not replied_id:
-            return False
+        status_id = status["id"]
+        parent_id = status["in_reply_to_id"]

-        try:
-            # Fetch replied-to status
-            replied_status = self.api.status(replied_id)
-            replied_tokens = self.h2t.handle(replied_status["content"]).split()
+        # Check if bot owns the parent status
+        if parent_id not in self.state["own"]:
+            return False

-            # Check if it belongs to the bot
-            if not self.config["tag"] in replied_tokens:
-                return False
+        context = self.state["own"][parent_id]
+        event = context["event"]

-            status_id = status["id"]
+        target_id = context["target"]

-            # Enqueue user command status for deletion
-            self.enqueue_deletion(status_id) 
+        # Enqueue command status for deletion
+        self.enqueue_deletion(status_id) 
+        
+        try:
+            command = self.h2t.handle(status["content"]).strip()
+            self.log("Received command: {}".format(command))
            
-            # Find the intended target of the command (the status that the bot originally replied to with a classification)
-            target_status_id = replied_status.get("in_reply_to_id", None)
-            if not target_status_id:
-                self.respond(status, "Target status is missing")
-                return True
-
-            try:
-                command = self.h2t.handle(status["content"]).strip()
-                self.log("Received command: {}".format(command))
-                
-                # Fetch the target status
-                target_status = self.api.status(target_status_id)
-                target_mail_text = toot_dict_to_mail(target_status).format()
-                
-                # Check if target status was previously classified
-                was_cringe = target_status_id in self.state["cringe"]
-                was_based = target_status_id in self.state["based"]
-                was_unsure = target_status_id in self.state["unsure"]
- 
-                tokens = deque(command.split())
-                while True:
-                    token = tokens.popleft()
-                    if token == "cringe":
-                        if was_cringe:
-                            break
-                        elif was_based:
-                            bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM])
-                        else:
-                            bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM])
-                        
-                        self.set_cringe(target_status_id)
-                        self.enqueue_deletion(target_status_id)
-                        self.respond(status, "Learned as cringe")
-                        
+            # Fetch the target status
+            target_status = self.api.status(target_id)
+            target_mail_text = toot_dict_to_mail(target_status).format()
+            
+            # Check if target status was previously classified
+            was_cringe = target_id in self.state["cringe"]
+            was_based = target_id in self.state["based"]
+            was_unsure = target_id in self.state["unsure"]
+
+            tokens = deque(command.split())
+            while True:
+                token = tokens.popleft()
+                if token == "cringe":
+                    if event not in ["categorise", "learn"]:
+                        self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
+                        return True
+                    elif was_cringe:
                        break
-
-                    elif token == "based":
-                        if was_based:
-                            break
-                        elif was_cringe:
-                            bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM])
-                        else:
-                            bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM])
-                        
-                        self.set_based(target_status_id)
-                        self.unqueue_deletion(target_status_id)
-                        self.respond(status, "Learned as based")
-                        
+                    elif was_based:
+                        bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM])
+                    else:
+                        bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM])
+                    
+                    self.set_cringe(target_id)
+                    self.enqueue_deletion(target_id)
+                    self.respond(status, "Learned as cringe", {"event": "learn", "target": target_id})
+                    
+                    break
+
+                elif token == "based":
+                    if event not in ["categorise", "learn"]:
+                        self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
+                        return True
+                    elif was_based:
                        break
-
-                    elif token == "unlearn":
-                        if was_unsure:
-                            break
-                        elif was_cringe:
-                            bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM])
-                        elif was_based:
-                            bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM])
-
-                        self.set_unsure(target_status_id)
-                        self.unqueue_deletion(target_status_id)
-                        self.respond(status, "Unlearned")
-
+                    elif was_cringe:
+                        bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM])
+                    else:
+                        bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM])
+                    
+                    self.set_based(target_id)
+                    self.unqueue_deletion(target_id)
+                    self.respond(status, "Learned as based", {"event": "learn", "target": target_id})
+                    
+                    break
+
+                elif token == "unlearn":
+                    if event not in ["categorise", "learn"]:
+                        self.respond(status, "Status is not learnable", {"event": "error", "type": "learnable", "target": target_id})
+                        return True
+                    elif was_unsure:
                        break
+                    elif was_cringe:
+                        bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM])
+                    elif was_based:
+                        bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM])

-            except IndexError:
-                self.respond(status, "Invalid command")
-            except MastodonNotFoundError:
-                self.respond(status, "Target status is missing")
+                    self.set_unsure(target_id)
+                    self.unqueue_deletion(target_id)
+                    self.respond(status, "Unlearned", {"event": "learn", "target": target_id})

-            return True
+                    break

+        except IndexError:
+            self.respond(status, "Invalid command", {"event": "error", "type": "syntax", "target": target_id})
        except MastodonNotFoundError:
-            return False
+            self.respond(status, "Could not fetch target status", {"event": "error", "type": "fetch", "target": target_id})
+
+        return True

    def on_status(self, status):
        # Ignore statuses from other accounts
        if status["account"]["id"] != self.api.me()["id"]:
            return
-
+        
        # Ignore statuses this account boosts
        if status["reblog"]:
            return
+       
+        status_id = status["id"]

-        # Extract plain text
-        md_text = self.h2t.handle(status["content"])
-        
        # Ignore bot's own statuses
-        if self.config["tag"] in md_text.split():
+        if status_id in self.state["own"]:
            return
-        
+       
        # Create faux HTML email of status
        mail_text = toot_dict_to_mail(status).format()
-             
+ 
        # Format and log plain-text preview
+        md_text = self.h2t.handle(status["content"])
        preview = toot_dict_to_mail(status)
        preview.body = md_text
        preview_text = preview.format()
@ -185,22 +200,24 @@ class CringeBotClient(BotClient):
        if self.process_commands(status):
            return

-        result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER])
+        result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER] if self.config["register"] else [bogofilter.CLASSIFY])
        bogo_report = "Bogofilter: Category={}, Score={}".format(result.category, "{:.4f}".format(result.score))
-        status_id = status["id"]
        if result.category == bogofilter.SPAM:
            self.log("CRINGE: Enqueuing status {} for deletion".format(status_id))
-            self.set_cringe(status_id)
+            if self.config["register"]:
+                self.set_cringe(status_id)
            self.enqueue_deletion(status_id)
-            self.respond(status, "Categorised as cringe\n{}".format(bogo_report))
+            self.respond(status, "Categorised as cringe\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
        elif result.category == bogofilter.HAM:
            self.log("BASED: Not enqueueing status {} for deletion".format(status_id))
-            self.set_based(status_id)
-            self.respond(status, "Categorised as based\n{}".format(bogo_report))
+            if self.config["register"]:
+                self.set_based(status_id)
+            self.respond(status, "Categorised as based\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
        else:
            self.log("UNSURE: Not enqueueing status {} for deletion".format(status_id))
-            self.set_unsure(status_id)
-            self.respond(status, "Categorised as unsure\n{}".format(bogo_report))
+            if self.config["register"]:
+                self.set_unsure(status_id)
+            self.respond(status, "Categorised as unsure\n{}".format(bogo_report), context = {"event": "categorise", "target": status_id})
        
    def on_load_state(self):
        state = {
@ -208,6 +225,7 @@ class CringeBotClient(BotClient):
            "cringe": [],
            "based": [],
            "unsure": [],
+            "own": {},
            **super().on_load_state()}

        state["cringe"] = set(state["cringe"])
@ -233,8 +251,10 @@ class CringeBotClient(BotClient):
    def deletion_report(self):
        self.log("{} status(es) queued for deletion".format(len(self.deletion_scheduler.queue)))

-    def enqueue_deletion(self, status_id):
-        self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(60 * self.config["max_age"], 1, self.queued_delete, argument=(status_id,), kwargs={})}
+    def enqueue_deletion(self, status_id, delay = None):
+        if delay is None:
+            delay = 60 * self.config["max_age"]
+        self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(delay, 1, self.queued_delete, argument=(status_id,), kwargs={})}
        self.deletion_report()

    def unqueue_deletion(self, status_id):
@ -249,77 +269,51 @@ class CringeBotClient(BotClient):
    def queued_delete(self, status_id):
        try:
            self.log("Deleting status {}".format(status_id))
-            self.set_discard(status_id)
            self.api.status_delete(status_id)
+            self.set_discard(status_id)
+            if status_id in self.state["own"]:
+                del self.state["own"][status_id]
        except MastodonNotFoundError:
            self.log("Cannot find status {} on server".format(status_id))
+            self.set_discard(status_id)
        except Exception:
            self.log(traceback.format_exc())
+            self.enqueue_deletion(status_id, 300)

 def toot_dict_to_mail(toot_dict):
    flags = []
    
-    if toot_dict.get("sensitive", False):
+    flags.append(toot_dict["visibility"])
+
+    if toot_dict["sensitive"]:
        flags.append("sensitive")
    
-    if toot_dict.get("poll", False):
+    if toot_dict["poll"]:
        flags.append("poll")
-    
-    if toot_dict.get("reblog", False):
-        flags.append("reblog")
-    
-    if toot_dict.get("reblogged", False):
-        flags.append("reblogged")
-    
-    if toot_dict.get("favourited", False):
-        flags.append("favourited")
-    
-    if toot_dict.get("bookmarked", False):
-        flags.append("bookmarked")
-    
-    if toot_dict.get("pinned", False):
-        flags.append("pinned")
-    
-    flags = ", ".join(flags)
-  
-    headers = {}
-
-    if toot_dict.get("account") and toot_dict["account"].get("acct"):
-        headers["From"] = toot_dict["account"]["acct"]
-
-    if toot_dict.get("created_at"):
-        headers["Date"] = toot_dict["created_at"]
-
-    if toot_dict.get("visibility"):
-        headers["X-Visibility"] = toot_dict["visibility"]
+   
+    if len(toot_dict["media_attachments"]) > 0:
+        flags.append("attachments")

-    if len(flags) > 0:
-        headers["X-Flags"] = flags
+    time = []
+    now = datetime.now()
+    time.append(SEASON[now.month])
+    time.append(TIME_OF_DAY[now.hour])

-    if toot_dict.get("spoiler_text"):
+    headers = {}
+    
+    headers["From"] = toot_dict["account"]["acct"]
+    headers["X-Flags"] = ", ".join(flags)
+    headers["X-Time"] = ", ".join(time)
+    if len(toot_dict["spoiler_text"]) > 0:
        headers["Subject"] = toot_dict["spoiler_text"]

-    if toot_dict.get("replies_count", 0) > 0:
-        headers["X-Replies-Count"] = toot_dict["replies_count"]
-
-    if len(toot_dict.get("media_attachments", [])) > 0:
-        headers["X-Attachments-Count"] = len(toot_dict["media_attachments"])
-    
-    if toot_dict.get("reblogs_count", 0) > 0:
-        headers["X-Reblogs-Count"] = toot_dict["reblogs_count"]
-    
-    if toot_dict.get("favourites_count", 0) > 0:
-        headers["X-Favourites-Count"] = toot_dict["favourites_count"]
- 
-    if toot_dict.get("content") and len(toot_dict["content"]) > 0:
-        body = toot_dict["content"]
-    else:
-        body = None
+    body = toot_dict["content"]

    return bogofilter.Mail(headers = headers, body = body)

-with open("config.json") as json_file:
-    bot = Bot(CringeBotClient, json.load(json_file))
+bot = Bot(CringeBotClient, toml.load("config.toml"))
+#with open("config.json") as json_file:
+#    bot = Bot(CringeBotClient, json.load(json_file))
 bot.start()

 while True: