Refactor: Use 'sched' module, make single-threaded

master
Thor 3 years ago
parent 891ed2ac4c
commit 0b4238a347
  1. 10
      bot.py
  2. 176
      cringebot.py

@ -25,7 +25,6 @@ class BotClient:
"user_file": "secret/{}.user".format(config["name"]),
"state_file": "state/{}.state".format(config["name"])}, **config}
self.state_lock = threading.Lock()
self.load_state()
self.poll_thread = threading.Thread(
@ -92,8 +91,7 @@ class BotClient:
self.on_status(status)
with self.state_lock:
self.state["min_status_id"] = status["id"]
self.state["min_status_id"] = status["id"]
time.sleep(self.config["rate_limit"])
statuses = self.api.fetch_previous(statuses)
@ -106,13 +104,11 @@ class BotClient:
def load_state(self):
with self.state_lock:
self.state = self.on_load_state()
self.state = self.on_load_state()
self.on_state_loaded(self.state)
def save_state(self):
with self.state_lock:
self.on_save_state(copy.deepcopy(self.state))
self.on_save_state(self.state)
self.on_state_saved(self.state)
def on_start(self):

@ -1,9 +1,9 @@
import os
import sys
import time
import sched
from datetime import datetime, timezone, timedelta
import json
import threading
import bogofilter
import html2text
from collections import deque
@ -23,42 +23,26 @@ def decode_time(value):
class CringeBotClient(BotClient):
def __init__(self, bot, config):
super().__init__(bot, config)
self.h2t = html2text.HTML2Text()
self.h2t.ignore_links = True
self.spawner_thread = threading.Thread(
target = self.spawner,
name = self.config["name"] + " spawner",
args = (),
kwargs = {},
daemon = True)
self.purger_thread = threading.Thread(
target = self.purger,
name = self.config["name"] + " purger",
args = (),
kwargs = {},
daemon = True)
def on_start(self):
self.spawner_thread.start()
def spawner(self):
self.purger_thread.start()
self.deletion_scheduler = sched.scheduler(time.time, time.sleep)
super().__init__(bot, config)
while True:
self.tracker_report()
time.sleep(60)
def on_start(self):
self.deletion_report()
def respond(self, status, message):
self.log("Responded with:")
self.log(message)
reply = self.api.status_reply(status, "{}\n{}".format(message, self.config["tag"]), visibility = "direct", untag = True)
self.track_status(reply)
self.enqueue_deletion(reply["id"])
time.sleep(1)
def on_poll(self):
self.deletion_scheduler.run(blocking = False)
def on_status(self, status):
if status["account"]["id"] != self.api.me()["id"]:
return
@ -74,6 +58,10 @@ class CringeBotClient(BotClient):
preview = toot_dict_to_mail(status)
preview.body = md_text
preview_text = preview.format()
self.log()
self.log(preview_text)
self.log()
replied_id = status.get("in_reply_to_id", None)
if replied_id:
@ -82,12 +70,11 @@ class CringeBotClient(BotClient):
replied_tokens = self.h2t.handle(replied_status["content"]).split()
if self.config["tag"] in replied_tokens:
self.track_status(status)
self.enqueue_deletion(status["id"])
target_status_id = replied_status.get("in_reply_to_id", None)
if target_status_id:
try:
target_status = self.api.status(target_status_id)
target_timeslot_key = encode_time(target_status["created_at"])
target_mail_text = toot_dict_to_mail(target_status).format()
command = self.h2t.handle(status["content"]).strip()
@ -100,36 +87,36 @@ class CringeBotClient(BotClient):
token = tokens.popleft()
if token == "cringe":
bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM])
self.track_status(target_status)
self.enqueue_deletion(target_status_id)
self.respond(status, "Learned as cringe")
break
elif token == "based":
bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM])
self.expire_status(target_timeslot_key, target_status_id)
self.unqueue_deletion(target_status_id)
self.respond(status, "Learned as based")
break
elif token == "unlearn":
token = tokens.popleft()
if token == "cringe":
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM])
self.expire_status(target_timeslot_key, target_status_id)
self.unqueue_deletion(target_status_id)
self.respond(status, "Unlearned as cringe")
break
elif token == "based":
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM])
self.expire_status(target_timeslot_key, target_status_id)
self.unqueue_deletion(target_status_id)
self.respond(status, "Unlearned as cringe")
break
elif token == "relearn":
token = tokens.popleft()
if token == "cringe":
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM])
self.track_status(target_status)
self.enqueue_deletion(target_status_id)
self.respond(status, "Relearned as cringe")
break
elif token == "based":
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM])
self.expire_status(target_timeslot_key, target_status_id)
self.unqueue_deletion(target_status_id)
self.respond(status, "Relearned as as based")
break
except IndexError:
@ -145,107 +132,54 @@ class CringeBotClient(BotClient):
result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER])
bogo_report = "Bogofilter: Category={}, Score={}".format(result.category, "{:.4f}".format(result.score))
if result.category == bogofilter.SPAM:
self.log("CRINGE: Tracking status with ID {} as cringe".format(status["id"]))
self.log("CRINGE: Enqueuing status {} for deletion".format(status["id"]))
self.respond(status, "Categorised as cringe\n{}".format(bogo_report))
self.track_status(status)
self.enqueue_deletion(status["id"])
elif result.category == bogofilter.UNSURE:
self.log("UNSURE: Not tracking status with ID {} as cringe".format(status["id"]))
self.log("UNSURE: Not enqueueing status {} for deletion".format(status["id"]))
self.respond(status, "Categorised as unsure\n{}".format(bogo_report))
else:
self.log("BASED: Not tracking status with ID {} as cringe".format(status["id"]))
self.log("BASED: Not enqueueing status {} for deletion".format(status["id"]))
self.respond(status, "Categorised as based\n{}".format(bogo_report))
self.log()
self.log(preview_text)
self.log()
def on_load_state(self):
state = {"deletion_queue": {}, **super().on_load_state()}
def purger(self):
while True:
try:
deleted = False
timeslot_key, status_id = self.next_expired()
if not timeslot_key is None:
try:
self.log("Deleting status {} in timeslot {}".format(status_id, timeslot_key))
self.api.status_delete(status_id)
deleted = True
except MastodonNotFoundError:
self.log("Cannot find status {} on server".format(status_id))
self.expire_status(timeslot_key, status_id)
if deleted:
time.sleep(60)
else:
time.sleep(1)
except:
self.log(traceback.format_exc())
time.sleep(60)
for status_id, params in state["deletion_queue"].items():
params["scheduler_event"] = self.deletion_scheduler.enterabs(datetime.fromisoformat(params["time"]).timestamp(), 1, self.queued_delete, argument=(status_id,))
def on_load_state(self):
state = super().on_load_state()
state["timeslots"] = state.get("timeslots", {})
state["timeslots"] = dict(map(lambda kv: (int(kv[0]), set(kv[1])), state["timeslots"]))
return state
def on_save_state(self, state):
state["timeslots"] = list(map(lambda kv: [kv[0], list(kv[1])], state["timeslots"].items()))
state["deletion_queue"] = {event.argument[0]: {"time": datetime.fromtimestamp(event.time, timezone.utc).isoformat()} for event in self.deletion_scheduler.queue}
super().on_save_state(state)
def tracker_report(self):
with self.state_lock:
total_timeslots = len(self.state["timeslots"])
total_statuses = 0
for timeslot_key, status_ids in self.state["timeslots"].items():
total_statuses += len(status_ids)
self.log("Tracking {} statuses across {} timeslots".format(total_statuses, total_timeslots))
def track_status(self, status):
status_id = str(status["id"])
timeslot_key = encode_time(status["created_at"])
with self.state_lock:
if status["reblog"] is None:
timeslots = self.state["timeslots"]
if not timeslot_key in timeslots:
timeslots[timeslot_key] = set()
timeslots[timeslot_key].add(status_id)
def deletion_report(self):
self.log("{} status(es) queued for deletion".format(len(self.deletion_scheduler.queue)))
def enqueue_deletion(self, status_id):
self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(60 * self.config["max_age"], 1, self.queued_delete, argument=(status_id,), kwargs={})}
self.save_state()
self.deletion_report()
def unqueue_deletion(self, status_id):
try:
params = state["deletion_queue"].pop(status_id)
self.deletion_scheduler.cancel(params["scheduler_event"])
except KeyError:
self.log("Cannot unqueue non-queued status {} for deletion".format(status_id))
self.deletion_report()
def next_expired(self):
now = datetime.now(timezone.utc)
min_timeslot_key = encode_time(now - timedelta(minutes = self.config["max_age"]))
with self.state_lock:
timeslot_key, status_ids = next(iter(self.state["timeslots"].items()), (None, None))
if not timeslot_key is None and timeslot_key < min_timeslot_key:
status_id = next(iter(status_ids), None)
else:
timeslot_key = None
status_id = None
return (timeslot_key, status_id)
def expire_status(self, timeslot_key, status_id):
with self.state_lock:
timeslots = self.state["timeslots"]
if timeslot_key in timeslots:
if status_id in timeslots[timeslot_key]:
self.log("Expiring status {} from timeslot {}".format(status_id, timeslot_key))
timeslots[timeslot_key].remove(status_id)
else:
self.log("Cannot expire missing status {} from timeslot {}".format(
status_id, timeslot_key))
if len(timeslots[timeslot_key]) == 0:
self.log("Removing empty timeslot {}".format(timeslot_key))
del timeslots[timeslot_key]
else:
self.log("Cannot expire status {} from missing timeslot {}".format(
status_id, timeslot_key))
def queued_delete(self, status_id):
try:
self.log("Deleting status {}".format(status_id))
self.api.status_delete(status_id)
except MastodonNotFoundError:
self.log("Cannot find status {} on server".format(status_id))
except Exception:
self.log(traceback.format_exc())
def toot_dict_to_mail(toot_dict):
flags = []

Loading…
Cancel
Save