From 3c71d99c7c6b87833fcd60de620db41c2accde0b Mon Sep 17 00:00:00 2001 From: Thor Harald Johansen Date: Mon, 2 Aug 2021 00:20:12 +0200 Subject: [PATCH] New algorithm --- portbot.py | 184 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 103 insertions(+), 81 deletions(-) diff --git a/portbot.py b/portbot.py index d05c205..c85206d 100644 --- a/portbot.py +++ b/portbot.py @@ -4,19 +4,22 @@ import time import toml import random import re -from datetime import datetime, timedelta, timezone import sched +import math +import string +from datetime import datetime, timedelta, timezone from mastodon import Mastodon, MastodonNotFoundError from fedbot.bot import Bot, BotClient POST_INTERVAL = timedelta(seconds = 15) -TEST = False +TEST = "test" in sys.argv[1:] +PORT_PCT = 67 def next_dt(): dt = datetime.now(timezone.utc) - dt -= timedelta(hours = -1, - minutes = dt.minute, + dt -= timedelta(hours = 0, + minutes = (dt.minute % 15) - 15, seconds = dt.second, microseconds = dt.microsecond) return dt @@ -28,107 +31,126 @@ loaded_config = { **toml.load(config_path)} SUFFIXES = [ - ['ly'], - ['ing'], - ['[bdklmptw]?est$'], - ['[^ious]s$'], - ['ted'], - ['[ei]$', 'ty']] + 'ly$', + 'ing$', + '[bdklmptw]?est$', + '[^ious]s$', + 'ted$', + '[ei]ty$'] def is_suffixed(word): - for suffix in SUFFIXES: - if len(suffix) > len(word): - continue + return any(re.fullmatch(suf, word) for suf in SUFFIXES) - syllables = list(zip(suffix, word[-len(suffix):])) - - if all(re.fullmatch(suf, syl) for suf, syl in syllables): - #print(word, "matched", suffix) - return True +def overlap_words(left_word, right_word): + if left_word == right_word: + return None - return False + offset = 2 + attempts = [] + while offset + 2 <= len(left_word): + if right_word.lower().startswith(left_word.lower()[offset : offset + 2]): + attempts.append(left_word[:offset] + right_word) + #break + offset += 1 + + offset = len(right_word) - 2 + while offset >= 0: + if left_word.lower().endswith(right_word.lower()[offset : offset + 2]): + attempts.append(left_word + right_word[offset + 2:]) + #break + offset -= 1 + + attempts = sorted(attempts, key = lambda w: len(w), reverse = True) + + if len(attempts) == 0: + return None + + return pick_one_word(attempts) + +def word_weight(index, length, power = 2): + a = pow((index + 1) / length, 2) + return int(100000 * a) + +def pick_one_word(words, power = 2, max_len = 12): + words = list(filter(lambda w: len(w) <= max_len, words)) + + if len(words) == 0: + return None + + weights = [word_weight(i, len(words), power = power) for i in range(0, len(words))] + return random.choices(words, weights = weights)[0] class WordMaker: def __init__(self): print("Loading dictionaries") + illegal = set(ch for ch in (string.ascii_uppercase + string.punctuation + string.digits + string.whitespace)) with open ("mhyph.txt", "r", encoding = "mac-roman") as f: - lines = [line.strip() for line in f.readlines()] - lines = filter(lambda w: len(w) > 0 and not re.search(r'[- A-Z]', w), lines) - words = [line.split("•") for line in lines] - words = sorted(words, key = lambda w: len(w), reverse = True) - self.words = words + lines = [l.strip() for l in f.readlines()] + lines = filter(lambda w: len(w) > 0 and not any(ch in illegal for ch in w), lines) + words = [l.replace("•", "") for l in lines] + self.all_words = words + words = list(set(sorted(words, key = lambda w: len(w), reverse = True))) self.first_words = list(filter(lambda w: not is_suffixed(w), words)) - self.plain_words = ["".join(w).lower() for w in words] + self.next_words = words with open("porthyph.txt", "r") as f: lines = [line.strip() for line in f.readlines()] - lines = filter(lambda l: len(l) > 0, lines) - words = [line.split("=") for line in lines] - words = sorted(words, key = lambda w: len(w), reverse = True) - self.alt_words = words - self.plain_words.extend(["".join(w).lower() for w in words]) - - def get_one_word(self, words): - weights = [int(100.0 * (x + 1.0) / len(words)) for x in range(0, len(words))] - return random.choices(words, weights = weights)[0] - - def get_second_word(self, first_word): - first_word = list(first_word) - first_end = first_word[-1] - - if random.randint(0, 100) < 50: - second_dict = self.alt_words + words = list(filter(lambda l: len(l) > 0, lines)) + self.all_words = list(set(sorted([w.lower() for w in [*self.all_words, *words]], key = lambda w: len(w), reverse = True))) + self.port_words = list(set(sorted(words, key = lambda w: len(w), reverse = True))) + + def extend_word2(self, prev_word): + port_dict = random.randint(0, 100) < PORT_PCT + if port_dict: + next_dict = self.port_words else: - second_dict = self.words + next_dict = self.next_words - if random.randint(0, 100) < 50: - second_iter = filter(lambda w: w[0].lower().startswith(first_end.lower()) or first_end.lower().startswith(w[0].lower()), second_dict) - else: - second_iter = filter(lambda w: w[0].lower().startswith(first_end.lower()), second_dict) - second_words = list(second_iter) + new_words = [overlap_words(prev_word, w) for w in next_dict if overlap_words(prev_word, w)] + + while len(new_words) > 0: + new_word = pick_one_word(new_words, power = 2 if port_dict else 4) + if not new_word: + return None + new_words.remove(new_word) + + if new_word.lower() not in self.all_words: + return new_word - while len(second_words) > 0: - second_word_orig = self.get_one_word(second_words) - second_words.remove(second_word_orig) - second_word = [s.lower() for s in second_word_orig] - - word = [*first_word[:-1], *second_word] - if not "".join(word).lower() in self.plain_words: - return word - return None def get_portmanteau(self): target_times = 1 - if random.randint(0, 100) > 50: - words = self.alt_words + port_dict = random.randint(0, 100) < PORT_PCT + if port_dict: + words = self.port_words else: words = self.first_words while True: while True: - word = self.get_one_word(words) + word = pick_one_word(words, power = 2 if port_dict else 4) times = target_times while times > 0: - next_word = self.get_second_word(word) - if next_word is None: + ext_word = self.extend_word2(word) + if ext_word is None: break - word = next_word + word = ext_word times -= 1 if times == 0: break - word_str = "".join(word) - - if len(word_str) < 15: + if len(word) < 15: break - print(word_str) + word = word.lower() + + print(word) - return word_str + return word def get_portmanteaus(self, count = 10): return [self.get_portmanteau() for x in range(0, count)] @@ -141,7 +163,6 @@ class PortBotClient(BotClient): "retry_rate": 60, "poll_interval": 15, **config} - super().__init__(bot, config) @@ -159,28 +180,29 @@ class PortBotClient(BotClient): if status["account"]["id"] != self.my_id: return - if status["created_at"] < datetime.now(timezone.utc) - timedelta(hours = 24) and status["reblogs_count"] == 0 and status["favourites_count"] == 0: - try: - print("Deleting", status["created_at"], status["content"]) - self.api.status_delete(status["id"]) - time.sleep(2) - except MastodonNotFoundError: - pass - pass + #if status["created_at"] < datetime.now(timezone.utc) - timedelta(hours = 24) and status["reblogs_count"] == 0 and status["favourites_count"] == 0: + # try: + # print("Deleting", status["created_at"], status["content"]) + # self.api.status_delete(status["id"]) + # time.sleep(2) + # except MastodonNotFoundError: + # pass def post(): for client_name, client in bot.clients.items(): - words = wm.get_portmanteaus(1) + words = wm.get_portmanteaus(3) + print() if random.randint(0, 100) <= 100: visibility = "public" else: visibility = "unlisted" + dt = next_dt() + if not TEST: client.api.status_post("\n".join(words), visibility = visibility) - - dt = next_dt() - print("Scheduling at", dt) + print("Scheduling at", dt) + if TEST: scheduler.enter(1, 1, post) else: @@ -196,9 +218,9 @@ bot.start() print("Running") dt = next_dt() -print("Scheduling at", dt) if TEST: scheduler.enter(1, 1, post) else: + print("Scheduling at", dt) scheduler.enterabs(dt.timestamp(), 1, post) scheduler.run()