|
|
|
@ -1,9 +1,9 @@ |
|
|
|
|
import os |
|
|
|
|
import sys |
|
|
|
|
import time |
|
|
|
|
import sched |
|
|
|
|
from datetime import datetime, timezone, timedelta |
|
|
|
|
import json |
|
|
|
|
import threading |
|
|
|
|
import bogofilter |
|
|
|
|
import html2text |
|
|
|
|
from collections import deque |
|
|
|
@ -23,42 +23,26 @@ def decode_time(value): |
|
|
|
|
|
|
|
|
|
class CringeBotClient(BotClient): |
|
|
|
|
def __init__(self, bot, config): |
|
|
|
|
super().__init__(bot, config) |
|
|
|
|
|
|
|
|
|
self.h2t = html2text.HTML2Text() |
|
|
|
|
self.h2t.ignore_links = True |
|
|
|
|
|
|
|
|
|
self.spawner_thread = threading.Thread( |
|
|
|
|
target = self.spawner, |
|
|
|
|
name = self.config["name"] + " spawner", |
|
|
|
|
args = (), |
|
|
|
|
kwargs = {}, |
|
|
|
|
daemon = True) |
|
|
|
|
|
|
|
|
|
self.purger_thread = threading.Thread( |
|
|
|
|
target = self.purger, |
|
|
|
|
name = self.config["name"] + " purger", |
|
|
|
|
args = (), |
|
|
|
|
kwargs = {}, |
|
|
|
|
daemon = True) |
|
|
|
|
|
|
|
|
|
def on_start(self): |
|
|
|
|
self.spawner_thread.start() |
|
|
|
|
|
|
|
|
|
def spawner(self): |
|
|
|
|
self.purger_thread.start() |
|
|
|
|
self.deletion_scheduler = sched.scheduler(time.time, time.sleep) |
|
|
|
|
|
|
|
|
|
super().__init__(bot, config) |
|
|
|
|
|
|
|
|
|
while True: |
|
|
|
|
self.tracker_report() |
|
|
|
|
time.sleep(60) |
|
|
|
|
def on_start(self): |
|
|
|
|
self.deletion_report() |
|
|
|
|
|
|
|
|
|
def respond(self, status, message): |
|
|
|
|
self.log("Responded with:") |
|
|
|
|
self.log(message) |
|
|
|
|
reply = self.api.status_reply(status, "{}\n{}".format(message, self.config["tag"]), visibility = "direct", untag = True) |
|
|
|
|
self.track_status(reply) |
|
|
|
|
self.enqueue_deletion(reply["id"]) |
|
|
|
|
time.sleep(1) |
|
|
|
|
|
|
|
|
|
def on_poll(self): |
|
|
|
|
self.deletion_scheduler.run(blocking = False) |
|
|
|
|
|
|
|
|
|
def on_status(self, status): |
|
|
|
|
if status["account"]["id"] != self.api.me()["id"]: |
|
|
|
|
return |
|
|
|
@ -74,6 +58,10 @@ class CringeBotClient(BotClient): |
|
|
|
|
preview = toot_dict_to_mail(status) |
|
|
|
|
preview.body = md_text |
|
|
|
|
preview_text = preview.format() |
|
|
|
|
|
|
|
|
|
self.log() |
|
|
|
|
self.log(preview_text) |
|
|
|
|
self.log() |
|
|
|
|
|
|
|
|
|
replied_id = status.get("in_reply_to_id", None) |
|
|
|
|
if replied_id: |
|
|
|
@ -82,12 +70,11 @@ class CringeBotClient(BotClient): |
|
|
|
|
replied_tokens = self.h2t.handle(replied_status["content"]).split() |
|
|
|
|
|
|
|
|
|
if self.config["tag"] in replied_tokens: |
|
|
|
|
self.track_status(status) |
|
|
|
|
self.enqueue_deletion(status["id"]) |
|
|
|
|
target_status_id = replied_status.get("in_reply_to_id", None) |
|
|
|
|
if target_status_id: |
|
|
|
|
try: |
|
|
|
|
target_status = self.api.status(target_status_id) |
|
|
|
|
target_timeslot_key = encode_time(target_status["created_at"]) |
|
|
|
|
target_mail_text = toot_dict_to_mail(target_status).format() |
|
|
|
|
|
|
|
|
|
command = self.h2t.handle(status["content"]).strip() |
|
|
|
@ -100,36 +87,36 @@ class CringeBotClient(BotClient): |
|
|
|
|
token = tokens.popleft() |
|
|
|
|
if token == "cringe": |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.LEARN_SPAM]) |
|
|
|
|
self.track_status(target_status) |
|
|
|
|
self.enqueue_deletion(target_status_id) |
|
|
|
|
self.respond(status, "Learned as cringe") |
|
|
|
|
break |
|
|
|
|
elif token == "based": |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.LEARN_HAM]) |
|
|
|
|
self.expire_status(target_timeslot_key, target_status_id) |
|
|
|
|
self.unqueue_deletion(target_status_id) |
|
|
|
|
self.respond(status, "Learned as based") |
|
|
|
|
break |
|
|
|
|
elif token == "unlearn": |
|
|
|
|
token = tokens.popleft() |
|
|
|
|
if token == "cringe": |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM]) |
|
|
|
|
self.expire_status(target_timeslot_key, target_status_id) |
|
|
|
|
self.unqueue_deletion(target_status_id) |
|
|
|
|
self.respond(status, "Unlearned as cringe") |
|
|
|
|
break |
|
|
|
|
elif token == "based": |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM]) |
|
|
|
|
self.expire_status(target_timeslot_key, target_status_id) |
|
|
|
|
self.unqueue_deletion(target_status_id) |
|
|
|
|
self.respond(status, "Unlearned as cringe") |
|
|
|
|
break |
|
|
|
|
elif token == "relearn": |
|
|
|
|
token = tokens.popleft() |
|
|
|
|
if token == "cringe": |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_HAM, bogofilter.LEARN_SPAM]) |
|
|
|
|
self.track_status(target_status) |
|
|
|
|
self.enqueue_deletion(target_status_id) |
|
|
|
|
self.respond(status, "Relearned as cringe") |
|
|
|
|
break |
|
|
|
|
elif token == "based": |
|
|
|
|
bogofilter.run(target_mail_text, [bogofilter.UNLEARN_SPAM, bogofilter.LEARN_HAM]) |
|
|
|
|
self.expire_status(target_timeslot_key, target_status_id) |
|
|
|
|
self.unqueue_deletion(target_status_id) |
|
|
|
|
self.respond(status, "Relearned as as based") |
|
|
|
|
break |
|
|
|
|
except IndexError: |
|
|
|
@ -145,107 +132,54 @@ class CringeBotClient(BotClient): |
|
|
|
|
result = bogofilter.run(mail_text, [bogofilter.CLASSIFY, bogofilter.REGISTER]) |
|
|
|
|
bogo_report = "Bogofilter: Category={}, Score={}".format(result.category, "{:.4f}".format(result.score)) |
|
|
|
|
if result.category == bogofilter.SPAM: |
|
|
|
|
self.log("CRINGE: Tracking status with ID {} as cringe".format(status["id"])) |
|
|
|
|
self.log("CRINGE: Enqueuing status {} for deletion".format(status["id"])) |
|
|
|
|
self.respond(status, "Categorised as cringe\n{}".format(bogo_report)) |
|
|
|
|
self.track_status(status) |
|
|
|
|
self.enqueue_deletion(status["id"]) |
|
|
|
|
elif result.category == bogofilter.UNSURE: |
|
|
|
|
self.log("UNSURE: Not tracking status with ID {} as cringe".format(status["id"])) |
|
|
|
|
self.log("UNSURE: Not enqueueing status {} for deletion".format(status["id"])) |
|
|
|
|
self.respond(status, "Categorised as unsure\n{}".format(bogo_report)) |
|
|
|
|
else: |
|
|
|
|
self.log("BASED: Not tracking status with ID {} as cringe".format(status["id"])) |
|
|
|
|
self.log("BASED: Not enqueueing status {} for deletion".format(status["id"])) |
|
|
|
|
self.respond(status, "Categorised as based\n{}".format(bogo_report)) |
|
|
|
|
|
|
|
|
|
self.log() |
|
|
|
|
self.log(preview_text) |
|
|
|
|
self.log() |
|
|
|
|
def on_load_state(self): |
|
|
|
|
state = {"deletion_queue": {}, **super().on_load_state()} |
|
|
|
|
|
|
|
|
|
def purger(self): |
|
|
|
|
while True: |
|
|
|
|
try: |
|
|
|
|
deleted = False |
|
|
|
|
timeslot_key, status_id = self.next_expired() |
|
|
|
|
|
|
|
|
|
if not timeslot_key is None: |
|
|
|
|
try: |
|
|
|
|
self.log("Deleting status {} in timeslot {}".format(status_id, timeslot_key)) |
|
|
|
|
self.api.status_delete(status_id) |
|
|
|
|
deleted = True |
|
|
|
|
|
|
|
|
|
except MastodonNotFoundError: |
|
|
|
|
self.log("Cannot find status {} on server".format(status_id)) |
|
|
|
|
|
|
|
|
|
self.expire_status(timeslot_key, status_id) |
|
|
|
|
|
|
|
|
|
if deleted: |
|
|
|
|
time.sleep(60) |
|
|
|
|
else: |
|
|
|
|
time.sleep(1) |
|
|
|
|
except: |
|
|
|
|
self.log(traceback.format_exc()) |
|
|
|
|
time.sleep(60) |
|
|
|
|
for status_id, params in state["deletion_queue"].items(): |
|
|
|
|
params["scheduler_event"] = self.deletion_scheduler.enterabs(datetime.fromisoformat(params["time"]).timestamp(), 1, self.queued_delete, argument=(status_id,)) |
|
|
|
|
|
|
|
|
|
def on_load_state(self): |
|
|
|
|
state = super().on_load_state() |
|
|
|
|
state["timeslots"] = state.get("timeslots", {}) |
|
|
|
|
state["timeslots"] = dict(map(lambda kv: (int(kv[0]), set(kv[1])), state["timeslots"])) |
|
|
|
|
return state |
|
|
|
|
|
|
|
|
|
def on_save_state(self, state): |
|
|
|
|
state["timeslots"] = list(map(lambda kv: [kv[0], list(kv[1])], state["timeslots"].items())) |
|
|
|
|
state["deletion_queue"] = {event.argument[0]: {"time": datetime.fromtimestamp(event.time, timezone.utc).isoformat()} for event in self.deletion_scheduler.queue} |
|
|
|
|
|
|
|
|
|
super().on_save_state(state) |
|
|
|
|
|
|
|
|
|
def tracker_report(self): |
|
|
|
|
with self.state_lock: |
|
|
|
|
total_timeslots = len(self.state["timeslots"]) |
|
|
|
|
total_statuses = 0 |
|
|
|
|
for timeslot_key, status_ids in self.state["timeslots"].items(): |
|
|
|
|
total_statuses += len(status_ids) |
|
|
|
|
|
|
|
|
|
self.log("Tracking {} statuses across {} timeslots".format(total_statuses, total_timeslots)) |
|
|
|
|
|
|
|
|
|
def track_status(self, status): |
|
|
|
|
status_id = str(status["id"]) |
|
|
|
|
timeslot_key = encode_time(status["created_at"]) |
|
|
|
|
|
|
|
|
|
with self.state_lock: |
|
|
|
|
if status["reblog"] is None: |
|
|
|
|
timeslots = self.state["timeslots"] |
|
|
|
|
if not timeslot_key in timeslots: |
|
|
|
|
timeslots[timeslot_key] = set() |
|
|
|
|
timeslots[timeslot_key].add(status_id) |
|
|
|
|
def deletion_report(self): |
|
|
|
|
self.log("{} status(es) queued for deletion".format(len(self.deletion_scheduler.queue))) |
|
|
|
|
|
|
|
|
|
def enqueue_deletion(self, status_id): |
|
|
|
|
self.state["deletion_queue"][status_id] = {"scheduler_event": self.deletion_scheduler.enter(60 * self.config["max_age"], 1, self.queued_delete, argument=(status_id,), kwargs={})} |
|
|
|
|
self.save_state() |
|
|
|
|
self.deletion_report() |
|
|
|
|
|
|
|
|
|
def unqueue_deletion(self, status_id): |
|
|
|
|
try: |
|
|
|
|
params = state["deletion_queue"].pop(status_id) |
|
|
|
|
self.deletion_scheduler.cancel(params["scheduler_event"]) |
|
|
|
|
except KeyError: |
|
|
|
|
self.log("Cannot unqueue non-queued status {} for deletion".format(status_id)) |
|
|
|
|
|
|
|
|
|
self.deletion_report() |
|
|
|
|
|
|
|
|
|
def next_expired(self): |
|
|
|
|
now = datetime.now(timezone.utc) |
|
|
|
|
min_timeslot_key = encode_time(now - timedelta(minutes = self.config["max_age"])) |
|
|
|
|
|
|
|
|
|
with self.state_lock: |
|
|
|
|
timeslot_key, status_ids = next(iter(self.state["timeslots"].items()), (None, None)) |
|
|
|
|
|
|
|
|
|
if not timeslot_key is None and timeslot_key < min_timeslot_key: |
|
|
|
|
status_id = next(iter(status_ids), None) |
|
|
|
|
else: |
|
|
|
|
timeslot_key = None |
|
|
|
|
status_id = None |
|
|
|
|
|
|
|
|
|
return (timeslot_key, status_id) |
|
|
|
|
|
|
|
|
|
def expire_status(self, timeslot_key, status_id): |
|
|
|
|
with self.state_lock: |
|
|
|
|
timeslots = self.state["timeslots"] |
|
|
|
|
if timeslot_key in timeslots: |
|
|
|
|
if status_id in timeslots[timeslot_key]: |
|
|
|
|
self.log("Expiring status {} from timeslot {}".format(status_id, timeslot_key)) |
|
|
|
|
timeslots[timeslot_key].remove(status_id) |
|
|
|
|
else: |
|
|
|
|
self.log("Cannot expire missing status {} from timeslot {}".format( |
|
|
|
|
status_id, timeslot_key)) |
|
|
|
|
|
|
|
|
|
if len(timeslots[timeslot_key]) == 0: |
|
|
|
|
self.log("Removing empty timeslot {}".format(timeslot_key)) |
|
|
|
|
del timeslots[timeslot_key] |
|
|
|
|
else: |
|
|
|
|
self.log("Cannot expire status {} from missing timeslot {}".format( |
|
|
|
|
status_id, timeslot_key)) |
|
|
|
|
def queued_delete(self, status_id): |
|
|
|
|
try: |
|
|
|
|
self.log("Deleting status {}".format(status_id)) |
|
|
|
|
self.api.status_delete(status_id) |
|
|
|
|
except MastodonNotFoundError: |
|
|
|
|
self.log("Cannot find status {} on server".format(status_id)) |
|
|
|
|
except Exception: |
|
|
|
|
self.log(traceback.format_exc()) |
|
|
|
|
|
|
|
|
|
def toot_dict_to_mail(toot_dict): |
|
|
|
|
flags = [] |
|
|
|
|