|
|
|
@ -1,23 +1,24 @@ |
|
|
|
|
import os |
|
|
|
|
import sys |
|
|
|
|
import time |
|
|
|
|
from datetime import datetime, timedelta, timezone |
|
|
|
|
import toml |
|
|
|
|
import random |
|
|
|
|
import re |
|
|
|
|
import sched |
|
|
|
|
import math |
|
|
|
|
import string |
|
|
|
|
import itertools |
|
|
|
|
from datetime import datetime, timedelta, timezone |
|
|
|
|
|
|
|
|
|
from mastodon import Mastodon, MastodonNotFoundError |
|
|
|
|
from fedbot.bot import Bot, BotClient |
|
|
|
|
|
|
|
|
|
POST_INTERVAL = timedelta(seconds = 15) |
|
|
|
|
TEST = "test" in sys.argv[1:] |
|
|
|
|
PORT_PCT = 30 |
|
|
|
|
MAX_DICT_LEN = 100 |
|
|
|
|
MAX_GEN_LEN = 15 |
|
|
|
|
USER_PCT = 35 |
|
|
|
|
MIN_MAIN_LEN = 3 |
|
|
|
|
MAX_PORT_LEN = 14 |
|
|
|
|
MAIN_DICT_PATH = "dicts/main.csv" |
|
|
|
|
USER_DICT_PATH = "dicts/user.dict" |
|
|
|
|
USED_DICT_PATH = "dicts/used.dict" |
|
|
|
|
|
|
|
|
|
def next_dt(): |
|
|
|
|
dt = datetime.now(timezone.utc) |
|
|
|
@ -27,7 +28,6 @@ def next_dt(): |
|
|
|
|
microseconds = dt.microsecond) |
|
|
|
|
return dt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config_path = os.path.join(os.path.dirname(sys.argv[0]), "config.toml") |
|
|
|
|
loaded_config = { |
|
|
|
|
"name": "portmanteaubot", |
|
|
|
@ -42,7 +42,9 @@ def overlap_words(left_word, right_word): |
|
|
|
|
if left_word == right_word: |
|
|
|
|
return set() |
|
|
|
|
|
|
|
|
|
offset = 2 |
|
|
|
|
min_shift = 2 |
|
|
|
|
|
|
|
|
|
offset = min_shift |
|
|
|
|
attempts = set() |
|
|
|
|
while offset + 2 <= len(left_word[0]): |
|
|
|
|
if right_word[0].lower().startswith(left_word[0].lower()[offset : offset + 2]): |
|
|
|
@ -52,7 +54,7 @@ def overlap_words(left_word, right_word): |
|
|
|
|
offset += 1 |
|
|
|
|
|
|
|
|
|
offset = len(right_word[0]) - 2 |
|
|
|
|
while offset >= 0: |
|
|
|
|
while offset >= 2: |
|
|
|
|
if left_word[0].lower().endswith(right_word[0].lower()[offset : offset + 2]): |
|
|
|
|
word_str = left_word[0] + right_word[0][offset + 2:] |
|
|
|
|
if len(word_str) >= 6 and not is_affixed(word_str): |
|
|
|
@ -74,26 +76,35 @@ def pick_one_word(words): |
|
|
|
|
|
|
|
|
|
return random.choices(list(words.items()), weights = (v[0] for v in words.values()))[0] |
|
|
|
|
|
|
|
|
|
def word_diff(a, b): |
|
|
|
|
seq = difflib.SequenceMatcher(None, a, b) |
|
|
|
|
return seq.ratio() |
|
|
|
|
|
|
|
|
|
class WordMaker: |
|
|
|
|
def __init__(self): |
|
|
|
|
print("Loading dictionaries") |
|
|
|
|
illegal = set(ch for ch in (string.ascii_uppercase + string.punctuation + string.digits + string.whitespace)) |
|
|
|
|
with open ("dict.csv", "r") as f: |
|
|
|
|
with open (MAIN_DICT_PATH, "r") as f: |
|
|
|
|
self.main_words = { |
|
|
|
|
sl[0] : |
|
|
|
|
(int(sl[1]), tuple(sl[2].split("="))) for sl in |
|
|
|
|
(tuple(l.split(",")) for l in f.read().splitlines()) if |
|
|
|
|
len(sl[0]) >= 3 and |
|
|
|
|
len(sl[0]) < MAX_DICT_LEN and |
|
|
|
|
len(sl[0]) >= MIN_MAIN_LEN and |
|
|
|
|
not any(c in illegal for c in sl[0])} |
|
|
|
|
|
|
|
|
|
with open("userdict.csv", "r") as f: |
|
|
|
|
with open(USER_DICT_PATH, "r") as f: |
|
|
|
|
self.user_words = {l : (1, None) for l in f.read().splitlines()} |
|
|
|
|
|
|
|
|
|
self.all_words = {k.lower() : v for k, v in {**self.main_words, **self.user_words}.items()} |
|
|
|
|
if os.path.exists(USED_DICT_PATH): |
|
|
|
|
with open("dicts/used.dict", "r") as f: |
|
|
|
|
used_words = {l : (1, None) for l in f.read().splitlines()} |
|
|
|
|
else: |
|
|
|
|
used_words = dict() |
|
|
|
|
|
|
|
|
|
self.all_words = {k.lower() : v for k, v in {**self.main_words, **self.user_words, **used_words}.items()} |
|
|
|
|
|
|
|
|
|
def extend_word(self, prev_word): |
|
|
|
|
user_dict = random.randint(0, 100) < PORT_PCT |
|
|
|
|
user_dict = random.randint(0, 100) < USER_PCT |
|
|
|
|
if user_dict: |
|
|
|
|
next_dict = self.user_words |
|
|
|
|
else: |
|
|
|
@ -108,7 +119,7 @@ class WordMaker: |
|
|
|
|
|
|
|
|
|
max_len = max(len(w) for w in new_words) |
|
|
|
|
for w in new_words: |
|
|
|
|
new_words[w] = (max_len + 1 - len(w), None) |
|
|
|
|
new_words[w] = (math.pow(max_len + 1 - len(w), 3), None) |
|
|
|
|
|
|
|
|
|
while len(new_words) > 0: |
|
|
|
|
new_word = pick_one_word(new_words) |
|
|
|
@ -120,7 +131,7 @@ class WordMaker: |
|
|
|
|
return None |
|
|
|
|
|
|
|
|
|
def get_portmanteau(self, target_times = 1): |
|
|
|
|
user_dict = random.randint(0, 100) < PORT_PCT |
|
|
|
|
user_dict = random.randint(0, 100) < USER_PCT |
|
|
|
|
if user_dict: |
|
|
|
|
words = self.user_words |
|
|
|
|
else: |
|
|
|
@ -142,7 +153,7 @@ class WordMaker: |
|
|
|
|
if times == 0: |
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
if len(word[0]) < MAX_GEN_LEN: |
|
|
|
|
if len(word[0]) < MAX_PORT_LEN: |
|
|
|
|
break |
|
|
|
|
|
|
|
|
|
word_str = word[0].lower() |
|
|
|
@ -154,11 +165,19 @@ class WordMaker: |
|
|
|
|
def get_portmanteaus(self, count = 10): |
|
|
|
|
words = set() |
|
|
|
|
|
|
|
|
|
used_words = dict() |
|
|
|
|
while count > 0: |
|
|
|
|
word_str = self.get_portmanteau() |
|
|
|
|
if word_str not in words: |
|
|
|
|
words.add(word_str) |
|
|
|
|
used_words[word_str] = (1, None) |
|
|
|
|
count -= 1 |
|
|
|
|
|
|
|
|
|
self.all_words.update(used_words) |
|
|
|
|
|
|
|
|
|
if not TEST: |
|
|
|
|
with open("dicts/used.dict", "a") as f: |
|
|
|
|
f.write("\n".join(used_words.keys()) + "\n") |
|
|
|
|
|
|
|
|
|
return words |
|
|
|
|
|
|
|
|
|