commit 513b788d2790d064788907b54a793406e96e4bfe Author: Thor Harald Johansen Date: Sun Oct 18 13:53:31 2020 +0200 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d344ba6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +config.json diff --git a/config.example.json b/config.example.json new file mode 100644 index 0000000..d01ff06 --- /dev/null +++ b/config.example.json @@ -0,0 +1,6 @@ +{ + "max_age": 48, + "instances": [ + "mastodon.social" + ] +} diff --git a/fedidel.py b/fedidel.py new file mode 100644 index 0000000..8dadf4e --- /dev/null +++ b/fedidel.py @@ -0,0 +1,270 @@ +import os +import time +from datetime import datetime, timezone, timedelta +import json +import pprint +import threading +import traceback + +from mastodon import Mastodon, MastodonNotFoundError + +def log_print(source, text): + prefix = "{}: ".format(source) + text = (prefix + text.strip()).replace("\n", "\n" + prefix) + print(text) + +def log_pprint(source, obj): + log(source, pprint.pformat(obj)) + +# Floor datetime to nearest hour +#def floor_dt(dt): +# return dt - timedelta( +# minutes = dt.minute, +# seconds = dt.second, +# microseconds = dt.microsecond) + +def encode_time(dt): + return int(dt.strftime("%Y%m%d%H")) + +def decode_time(value): + return dt.strptime(str(value), "%Y%m%d%H") + +class Instance: + def __init__(self, name, config): + self.name = name + self.config = config + + self.base_url = "https://{}".format(name) + self.client_file = "secret/{}.client".format(name) + self.user_file = "secret/{}.user".format(name) + + self.state_file = "state/{}.state".format(name) + self.state_lock = threading.Lock() + + self.spawner_thread = threading.Thread( + target = self.spawner, + name = self.name + " spawner", + args = (), + kwargs = {}, + daemon = True) + + self.tracker_thread = threading.Thread( + target = self.tracker, + name = self.name + " tracker", + args = (), + kwargs = {}, + daemon = True) + + self.purger_thread = threading.Thread( + target = self.purger, + name = self.name + " purger", + args = (), + kwargs = {}, + daemon = True) + + def setup(self): + if not os.path.exists(self.client_file): + Mastodon.create_app( + 'MastodonDeleter', + api_base_url = self.base_url, + to_file = self.client_file) + + if not os.path.exists(self.user_file): + api = Mastodon( + api_base_url = self.base_url, + client_id = self.client_file) + + auth_url = api.auth_request_url() + + print("Go to:") + print(auth_url) + print() + + auth_code = input("Enter code: ") + + print() + + api.log_in(code = auth_code, to_file = self.user_file) + + def start(self): + self.spawner_thread.start() + + def spawner(self): + self.load_state() + + self.api = Mastodon( + access_token = self.user_file, + api_base_url = self.base_url) + + self.tracker_thread.start() + self.purger_thread.start() + + while True: + self.tracker_report() + time.sleep(60) + + def tracker(self): + my_id = self.api.me()["id"] + + while True: + try: + self.state_lock.acquire() + min_id = self.state["min_id"] + self.state_lock.release() + + statuses = self.api.account_statuses(my_id, min_id = min_id) + + while not statuses is None and len(statuses) > 0: + log_print(self.name, "Found {} new status(es)".format(len(statuses))) + + for status in sorted(statuses, + key = lambda status: status["created_at"]): + self.track_status(status) + + self.save_state() + + statuses = self.api.fetch_previous(statuses) + + # Rate limit (300 requests per 5 minutes, i.e. 1 per second) + time.sleep(1) + + except: + log_print(self.name, traceback.format_exc()) + + time.sleep(60) + + def purger(self): + while True: + try: + + timeslot_key, status_id = self.next_expired() + + if not timeslot_key is None: + log_print(self.name, "Deleting status {} in timeslot {}".format(status_id, timeslot_key)) + + try: + self.api.status_delete(status_id) + + except MastodonNotFoundError: + log_print(self.name, + "Cannot delete missing status {} from server".format(status_id)) + + self.expire_status(timeslot_key, status_id) + time.sleep(60) + else: + time.sleep(1) + except: + log_print(self.name, traceback.format_exc()) + time.sleep(60) + + def load_state(self): + self.state_lock.acquire() + + if not os.path.exists(self.state_file): + self.state = dict( + min_id = "0", + timeslots = {}) + else: + with open(self.state_file) as json_file: + self.state = json.load(json_file) + + self.state["timeslots"] = dict(map(lambda kv: (int(kv[0]), set(kv[1])), self.state["timeslots"])) + + self.state_lock.release() + + def save_state(self): + self.state_lock.acquire() + + json_state = self.state.copy() + json_state["timeslots"] = list(map(lambda kv: [kv[0], list(kv[1])], json_state["timeslots"].items())) + + self.state_lock.release() + + with open(self.state_file, "w") as json_file: + json.dump(json_state, json_file, indent = 4) + + def tracker_report(self): + self.state_lock.acquire() + + total_timeslots = len(self.state["timeslots"]) + total_statuses = 0 + for timeslot_key, status_ids in self.state["timeslots"].items(): + total_statuses += len(status_ids) + + self.state_lock.release() + + log_print(self.name, "Tracking {} statuses across {} timeslots".format( + total_statuses, total_timeslots)) + + def track_status(self, status): + status_id = str(status["id"]) + timeslot_key = encode_time(status["created_at"]) + + self.state_lock.acquire() + if status["reblog"] is None: + timeslots = self.state["timeslots"] + if not timeslot_key in timeslots: + timeslots[timeslot_key] = set() + timeslots[timeslot_key].add(status_id) + + if status_id > self.state["min_id"]: + self.state["min_id"] = status_id + + self.state_lock.release() + + def next_expired(self): + now = datetime.now(timezone.utc) + min_timeslot_key = encode_time(now - timedelta(hours = self.config["max_age"])) + + self.state_lock.acquire() + + timeslot_key, status_ids = next(iter(self.state["timeslots"].items()), (None, None)) + + if not timeslot_key is None and timeslot_key < min_timeslot_key: + status_id = next(iter(status_ids), None) + else: + timeslot_key = None + status_id = None + + + self.state_lock.release() + + return (timeslot_key, status_id) + + def expire_status(self, timeslot_key, status_id): + self.state_lock.acquire() + + timeslots = self.state["timeslots"] + if timeslot_key in timeslots: + if status_id in timeslots[timeslot_key]: + log_print(self.name, "Expiring status {} from timeslot {}".format(status_id, timeslot_key)) + timeslots[timeslot_key].remove(status_id) + else: + log_print(self.name, "Cannot expire missing status {} from timeslot {}".format( + status_id, timeslot_key)) + + if len(timeslots[timeslot_key]) == 0: + log_print(self.name, "Removing empty timeslot {}".format(timeslot_key)) + del timeslots[timeslot_key] + else: + log_print(self.name, "Cannot expire status {} from missing timeslot {}".format( + status_id, timeslot_key)) + + self.state_lock.release() + self.save_state() + +with open("config.json") as json_file: + config = json.load(json_file) + +instances = {} +for name in config["instances"]: + instances[name] = Instance(name = name, config = config) + instances[name].setup() + +start_interval = 60.0 / len(config["instances"]) +for instance in instances.values(): + instance.start() + time.sleep(start_interval) + +while True: + time.sleep(1) diff --git a/fedidel.service b/fedidel.service new file mode 100644 index 0000000..2455476 --- /dev/null +++ b/fedidel.service @@ -0,0 +1,13 @@ +[Unit] +Description=Fediverse YouTube Converter + +[Service] +User=thor +WorkingDirectory=/usr/local/lib/fedidel +ExecStart=/usr/bin/python3 /usr/local/lib/fedidel/deleter.py +Environment=PYTHONUNBUFFERED=1 +SyslogIdentifier=fedidel +Restart=always + +[Install] +WantedBy=multi-user.target diff --git a/secret/.gitignore b/secret/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/secret/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/state/.gitignore b/state/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/state/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore