From 3c71d99c7c6b87833fcd60de620db41c2accde0b Mon Sep 17 00:00:00 2001
From: Thor Harald Johansen <thj@thj.no>
Date: Mon, 2 Aug 2021 00:20:12 +0200
Subject: [PATCH] New algorithm

---
 portbot.py | 184 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 103 insertions(+), 81 deletions(-)

diff --git a/portbot.py b/portbot.py
index d05c205..c85206d 100644
--- a/portbot.py
+++ b/portbot.py
@@ -4,19 +4,22 @@ import time
 import toml
 import random
 import re
-from datetime import datetime, timedelta, timezone
 import sched
+import math
+import string
+from datetime import datetime, timedelta, timezone
 
 from mastodon import Mastodon, MastodonNotFoundError
 from fedbot.bot import Bot, BotClient
 
 POST_INTERVAL = timedelta(seconds = 15)
-TEST = False
+TEST = "test" in sys.argv[1:]
+PORT_PCT = 67
 
 def next_dt():
     dt = datetime.now(timezone.utc)
-    dt -= timedelta(hours        = -1,
-                    minutes      = dt.minute,
+    dt -= timedelta(hours        = 0,
+                    minutes      = (dt.minute % 15) - 15,
                     seconds      = dt.second,
                     microseconds = dt.microsecond)
     return dt
@@ -28,107 +31,126 @@ loaded_config = {
     **toml.load(config_path)}
 
 SUFFIXES = [
-    ['ly'],
-    ['ing'],
-    ['[bdklmptw]?est$'],
-    ['[^ious]s$'],
-    ['ted'],
-    ['[ei]$', 'ty']]
+    'ly$',
+    'ing$',
+    '[bdklmptw]?est$',
+    '[^ious]s$',
+    'ted$',
+    '[ei]ty$']
 
 def is_suffixed(word):
-    for suffix in SUFFIXES:
-        if len(suffix) > len(word):
-            continue
+    return any(re.fullmatch(suf, word) for suf in SUFFIXES)
 
-        syllables = list(zip(suffix, word[-len(suffix):]))
-        
-        if all(re.fullmatch(suf, syl) for suf, syl in syllables):
-            #print(word, "matched", suffix)
-            return True
+def overlap_words(left_word, right_word):
+    if left_word == right_word:
+        return None
 
-    return False
+    offset = 2
+    attempts = []
+    while offset + 2 <= len(left_word):
+        if right_word.lower().startswith(left_word.lower()[offset : offset + 2]):
+            attempts.append(left_word[:offset] + right_word)
+            #break
+        offset += 1
+
+    offset = len(right_word) - 2
+    while offset >= 0:
+        if left_word.lower().endswith(right_word.lower()[offset : offset + 2]):
+            attempts.append(left_word + right_word[offset + 2:])
+            #break
+        offset -= 1
+
+    attempts = sorted(attempts, key = lambda w: len(w), reverse = True)
+
+    if len(attempts) == 0:
+        return None
+
+    return pick_one_word(attempts)
+
+def word_weight(index, length, power = 2):
+    a = pow((index + 1) / length, 2)
+    return int(100000 * a)
+
+def pick_one_word(words, power = 2, max_len = 12):
+    words = list(filter(lambda w: len(w) <= max_len, words))
+    
+    if len(words) == 0:
+        return None
+    
+    weights = [word_weight(i, len(words), power = power) for i in range(0, len(words))]
+    return random.choices(words, weights = weights)[0]
 
 class WordMaker:
     def __init__(self):
         print("Loading dictionaries")
+        illegal = set(ch for ch in (string.ascii_uppercase + string.punctuation + string.digits + string.whitespace))
         with open ("mhyph.txt", "r", encoding = "mac-roman") as f:
-            lines = [line.strip() for line in f.readlines()]
-            lines = filter(lambda w: len(w) > 0 and not re.search(r'[- A-Z]', w), lines)
-            words = [line.split("•") for line in lines]
-            words = sorted(words, key = lambda w: len(w), reverse = True)
-            self.words = words
+            lines = [l.strip() for l in f.readlines()]
+            lines = filter(lambda w: len(w) > 0 and not any(ch in illegal for ch in w), lines)
+            words = [l.replace("•", "") for l in lines]
+            self.all_words = words
+            words = list(set(sorted(words, key = lambda w: len(w), reverse = True)))
             self.first_words = list(filter(lambda w: not is_suffixed(w), words))
-            self.plain_words = ["".join(w).lower() for w in words]
+            self.next_words = words
 
         with open("porthyph.txt", "r") as f:
             lines = [line.strip() for line in f.readlines()]
-            lines = filter(lambda l: len(l) > 0, lines)
-            words = [line.split("=") for line in lines]
-            words = sorted(words, key = lambda w: len(w), reverse = True)
-            self.alt_words = words
-            self.plain_words.extend(["".join(w).lower() for w in words])
-
-    def get_one_word(self, words):
-        weights = [int(100.0 * (x + 1.0) / len(words)) for x in range(0, len(words))]
-        return random.choices(words, weights = weights)[0]
-
-    def get_second_word(self, first_word):
-        first_word = list(first_word)
-        first_end = first_word[-1]
-
-        if random.randint(0, 100) < 50:
-            second_dict = self.alt_words
+            words = list(filter(lambda l: len(l) > 0, lines))
+            self.all_words = list(set(sorted([w.lower() for w in [*self.all_words, *words]], key = lambda w: len(w), reverse = True)))
+            self.port_words = list(set(sorted(words, key = lambda w: len(w), reverse = True)))
+    
+    def extend_word2(self, prev_word):
+        port_dict = random.randint(0, 100) < PORT_PCT
+        if port_dict:
+            next_dict = self.port_words
         else:
-            second_dict = self.words
+            next_dict = self.next_words
 
-        if random.randint(0, 100) < 50:
-            second_iter = filter(lambda w: w[0].lower().startswith(first_end.lower()) or first_end.lower().startswith(w[0].lower()), second_dict)
-        else:
-            second_iter = filter(lambda w: w[0].lower().startswith(first_end.lower()), second_dict)
-        second_words = list(second_iter)
+        new_words = [overlap_words(prev_word, w) for w in next_dict if overlap_words(prev_word, w)]
+
+        while len(new_words) > 0:
+            new_word = pick_one_word(new_words, power = 2 if port_dict else 4)
+            if not new_word:
+                return None
+            new_words.remove(new_word)
+
+            if new_word.lower() not in self.all_words:
+                return new_word 
 
-        while len(second_words) > 0:
-            second_word_orig = self.get_one_word(second_words)
-            second_words.remove(second_word_orig)
-            second_word = [s.lower() for s in second_word_orig]
-            
-            word = [*first_word[:-1], *second_word]
-            if not "".join(word).lower() in self.plain_words:
-                return word
-    
         return None
 
     def get_portmanteau(self):
         target_times = 1
-        if random.randint(0, 100) > 50:
-            words = self.alt_words
+        port_dict = random.randint(0, 100) < PORT_PCT
+        if port_dict:
+            words = self.port_words
         else:
             words = self.first_words
 
         while True:
             while True:
-                word = self.get_one_word(words)
+                word = pick_one_word(words, power = 2 if port_dict else 4)
                 
                 times = target_times
                 while times > 0:
-                    next_word = self.get_second_word(word)
-                    if next_word is None:
+                    ext_word = self.extend_word2(word)
+                    if ext_word is None:
                         break
                     
-                    word = next_word
+                    word = ext_word
                     times -= 1
             
                 if times == 0:
                     break
 
-            word_str = "".join(word)
-            
-            if len(word_str) < 15:
+            if len(word) < 15:
                 break
 
-        print(word_str)
+        word = word.lower()
+
+        print(word)
         
-        return word_str
+        return word
 
     def get_portmanteaus(self, count = 10):
         return [self.get_portmanteau() for x in range(0, count)]
@@ -141,7 +163,6 @@ class PortBotClient(BotClient):
             "retry_rate": 60,
             "poll_interval": 15,
             **config} 
-        
       
         super().__init__(bot, config)
     
@@ -159,28 +180,29 @@ class PortBotClient(BotClient):
         if status["account"]["id"] != self.my_id:
             return
 
-        if status["created_at"] < datetime.now(timezone.utc) - timedelta(hours = 24) and status["reblogs_count"] == 0 and status["favourites_count"] == 0:
-            try:
-                print("Deleting", status["created_at"], status["content"])
-                self.api.status_delete(status["id"])
-                time.sleep(2)
-            except MastodonNotFoundError:
-                pass
-        pass
+        #if status["created_at"] < datetime.now(timezone.utc) - timedelta(hours = 24) and status["reblogs_count"] == 0 and status["favourites_count"] == 0:
+        #    try:
+        #        print("Deleting", status["created_at"], status["content"])
+        #        self.api.status_delete(status["id"])
+        #        time.sleep(2)
+        #    except MastodonNotFoundError:
+        #        pass
 
 def post():
     for client_name, client in bot.clients.items():
-        words = wm.get_portmanteaus(1)
+        words = wm.get_portmanteaus(3)
+        print()
         if random.randint(0, 100) <= 100:
             visibility = "public"
         else:
             visibility = "unlisted"
     
+        dt = next_dt()
+        
         if not TEST:
             client.api.status_post("\n".join(words), visibility = visibility)
-
-        dt = next_dt()
-        print("Scheduling at", dt)
+            print("Scheduling at", dt)
+        
         if TEST:
             scheduler.enter(1, 1, post)
         else:
@@ -196,9 +218,9 @@ bot.start()
 print("Running")
 
 dt = next_dt()
-print("Scheduling at", dt)
 if TEST:
     scheduler.enter(1, 1, post)
 else:
+    print("Scheduling at", dt)
     scheduler.enterabs(dt.timestamp(), 1, post)
 scheduler.run()