Move dicts around and adjust algorithm

master
Thor 3 years ago
parent 3ceb937911
commit 54c31581d5
  1. 0
      dicts/main.csv
  2. 0
      dicts/mhyph.txt
  3. 0
      dicts/unigram_freq.csv
  4. 0
      dicts/user.dict
  5. 4
      mergedicts.py
  6. 55
      portbot.py

unable to load file from base commit

@ -1,9 +1,9 @@
import itertools
with open("sourcedicts/mhyph.txt", "r", encoding = "macroman", newline = "\r\n") as f:
with open("dicts/mhyph.txt", "r", encoding = "macroman", newline = "\r\n") as f:
hyph_words = set(tuple(l.split("")) for l in f.read().splitlines())
with open("sourcedicts/unigram_freq.csv", "r") as f:
with open("dicts/unigram_freq.csv", "r") as f:
goog_words = set((w, int(f)) for w, f in
itertools.islice((l.split(",") for
l in f.read().splitlines()), 1, None))

@ -1,23 +1,24 @@
import os
import sys
import time
from datetime import datetime, timedelta, timezone
import toml
import random
import re
import sched
import math
import string
import itertools
from datetime import datetime, timedelta, timezone
from mastodon import Mastodon, MastodonNotFoundError
from fedbot.bot import Bot, BotClient
POST_INTERVAL = timedelta(seconds = 15)
TEST = "test" in sys.argv[1:]
PORT_PCT = 30
MAX_DICT_LEN = 100
MAX_GEN_LEN = 15
USER_PCT = 35
MIN_MAIN_LEN = 3
MAX_PORT_LEN = 14
MAIN_DICT_PATH = "dicts/main.csv"
USER_DICT_PATH = "dicts/user.dict"
USED_DICT_PATH = "dicts/used.dict"
def next_dt():
dt = datetime.now(timezone.utc)
@ -27,7 +28,6 @@ def next_dt():
microseconds = dt.microsecond)
return dt
config_path = os.path.join(os.path.dirname(sys.argv[0]), "config.toml")
loaded_config = {
"name": "portmanteaubot",
@ -42,7 +42,9 @@ def overlap_words(left_word, right_word):
if left_word == right_word:
return set()
offset = 2
min_shift = 2
offset = min_shift
attempts = set()
while offset + 2 <= len(left_word[0]):
if right_word[0].lower().startswith(left_word[0].lower()[offset : offset + 2]):
@ -52,7 +54,7 @@ def overlap_words(left_word, right_word):
offset += 1
offset = len(right_word[0]) - 2
while offset >= 0:
while offset >= 2:
if left_word[0].lower().endswith(right_word[0].lower()[offset : offset + 2]):
word_str = left_word[0] + right_word[0][offset + 2:]
if len(word_str) >= 6 and not is_affixed(word_str):
@ -74,26 +76,35 @@ def pick_one_word(words):
return random.choices(list(words.items()), weights = (v[0] for v in words.values()))[0]
def word_diff(a, b):
seq = difflib.SequenceMatcher(None, a, b)
return seq.ratio()
class WordMaker:
def __init__(self):
print("Loading dictionaries")
illegal = set(ch for ch in (string.ascii_uppercase + string.punctuation + string.digits + string.whitespace))
with open ("dict.csv", "r") as f:
with open (MAIN_DICT_PATH, "r") as f:
self.main_words = {
sl[0] :
(int(sl[1]), tuple(sl[2].split("="))) for sl in
(tuple(l.split(",")) for l in f.read().splitlines()) if
len(sl[0]) >= 3 and
len(sl[0]) < MAX_DICT_LEN and
len(sl[0]) >= MIN_MAIN_LEN and
not any(c in illegal for c in sl[0])}
with open("userdict.csv", "r") as f:
with open(USER_DICT_PATH, "r") as f:
self.user_words = {l : (1, None) for l in f.read().splitlines()}
self.all_words = {k.lower() : v for k, v in {**self.main_words, **self.user_words}.items()}
if os.path.exists(USED_DICT_PATH):
with open("dicts/used.dict", "r") as f:
used_words = {l : (1, None) for l in f.read().splitlines()}
else:
used_words = dict()
self.all_words = {k.lower() : v for k, v in {**self.main_words, **self.user_words, **used_words}.items()}
def extend_word(self, prev_word):
user_dict = random.randint(0, 100) < PORT_PCT
user_dict = random.randint(0, 100) < USER_PCT
if user_dict:
next_dict = self.user_words
else:
@ -108,7 +119,7 @@ class WordMaker:
max_len = max(len(w) for w in new_words)
for w in new_words:
new_words[w] = (max_len + 1 - len(w), None)
new_words[w] = (math.pow(max_len + 1 - len(w), 3), None)
while len(new_words) > 0:
new_word = pick_one_word(new_words)
@ -120,7 +131,7 @@ class WordMaker:
return None
def get_portmanteau(self, target_times = 1):
user_dict = random.randint(0, 100) < PORT_PCT
user_dict = random.randint(0, 100) < USER_PCT
if user_dict:
words = self.user_words
else:
@ -142,7 +153,7 @@ class WordMaker:
if times == 0:
break
if len(word[0]) < MAX_GEN_LEN:
if len(word[0]) < MAX_PORT_LEN:
break
word_str = word[0].lower()
@ -154,11 +165,19 @@ class WordMaker:
def get_portmanteaus(self, count = 10):
words = set()
used_words = dict()
while count > 0:
word_str = self.get_portmanteau()
if word_str not in words:
words.add(word_str)
used_words[word_str] = (1, None)
count -= 1
self.all_words.update(used_words)
if not TEST:
with open("dicts/used.dict", "a") as f:
f.write("\n".join(used_words.keys()) + "\n")
return words

Loading…
Cancel
Save