Move dicts around and adjust algorithm

master
Thor 3 years ago
parent 3ceb937911
commit 54c31581d5
  1. 0
      dicts/main.csv
  2. 0
      dicts/mhyph.txt
  3. 0
      dicts/unigram_freq.csv
  4. 0
      dicts/user.dict
  5. 4
      mergedicts.py
  6. 55
      portbot.py

unable to load file from base commit

@ -1,9 +1,9 @@
import itertools import itertools
with open("sourcedicts/mhyph.txt", "r", encoding = "macroman", newline = "\r\n") as f: with open("dicts/mhyph.txt", "r", encoding = "macroman", newline = "\r\n") as f:
hyph_words = set(tuple(l.split("")) for l in f.read().splitlines()) hyph_words = set(tuple(l.split("")) for l in f.read().splitlines())
with open("sourcedicts/unigram_freq.csv", "r") as f: with open("dicts/unigram_freq.csv", "r") as f:
goog_words = set((w, int(f)) for w, f in goog_words = set((w, int(f)) for w, f in
itertools.islice((l.split(",") for itertools.islice((l.split(",") for
l in f.read().splitlines()), 1, None)) l in f.read().splitlines()), 1, None))

@ -1,23 +1,24 @@
import os import os
import sys import sys
import time import time
from datetime import datetime, timedelta, timezone
import toml import toml
import random import random
import re import re
import sched import sched
import math import math
import string import string
import itertools
from datetime import datetime, timedelta, timezone
from mastodon import Mastodon, MastodonNotFoundError from mastodon import Mastodon, MastodonNotFoundError
from fedbot.bot import Bot, BotClient from fedbot.bot import Bot, BotClient
POST_INTERVAL = timedelta(seconds = 15)
TEST = "test" in sys.argv[1:] TEST = "test" in sys.argv[1:]
PORT_PCT = 30 USER_PCT = 35
MAX_DICT_LEN = 100 MIN_MAIN_LEN = 3
MAX_GEN_LEN = 15 MAX_PORT_LEN = 14
MAIN_DICT_PATH = "dicts/main.csv"
USER_DICT_PATH = "dicts/user.dict"
USED_DICT_PATH = "dicts/used.dict"
def next_dt(): def next_dt():
dt = datetime.now(timezone.utc) dt = datetime.now(timezone.utc)
@ -27,7 +28,6 @@ def next_dt():
microseconds = dt.microsecond) microseconds = dt.microsecond)
return dt return dt
config_path = os.path.join(os.path.dirname(sys.argv[0]), "config.toml") config_path = os.path.join(os.path.dirname(sys.argv[0]), "config.toml")
loaded_config = { loaded_config = {
"name": "portmanteaubot", "name": "portmanteaubot",
@ -42,7 +42,9 @@ def overlap_words(left_word, right_word):
if left_word == right_word: if left_word == right_word:
return set() return set()
offset = 2 min_shift = 2
offset = min_shift
attempts = set() attempts = set()
while offset + 2 <= len(left_word[0]): while offset + 2 <= len(left_word[0]):
if right_word[0].lower().startswith(left_word[0].lower()[offset : offset + 2]): if right_word[0].lower().startswith(left_word[0].lower()[offset : offset + 2]):
@ -52,7 +54,7 @@ def overlap_words(left_word, right_word):
offset += 1 offset += 1
offset = len(right_word[0]) - 2 offset = len(right_word[0]) - 2
while offset >= 0: while offset >= 2:
if left_word[0].lower().endswith(right_word[0].lower()[offset : offset + 2]): if left_word[0].lower().endswith(right_word[0].lower()[offset : offset + 2]):
word_str = left_word[0] + right_word[0][offset + 2:] word_str = left_word[0] + right_word[0][offset + 2:]
if len(word_str) >= 6 and not is_affixed(word_str): if len(word_str) >= 6 and not is_affixed(word_str):
@ -74,26 +76,35 @@ def pick_one_word(words):
return random.choices(list(words.items()), weights = (v[0] for v in words.values()))[0] return random.choices(list(words.items()), weights = (v[0] for v in words.values()))[0]
def word_diff(a, b):
seq = difflib.SequenceMatcher(None, a, b)
return seq.ratio()
class WordMaker: class WordMaker:
def __init__(self): def __init__(self):
print("Loading dictionaries") print("Loading dictionaries")
illegal = set(ch for ch in (string.ascii_uppercase + string.punctuation + string.digits + string.whitespace)) illegal = set(ch for ch in (string.ascii_uppercase + string.punctuation + string.digits + string.whitespace))
with open ("dict.csv", "r") as f: with open (MAIN_DICT_PATH, "r") as f:
self.main_words = { self.main_words = {
sl[0] : sl[0] :
(int(sl[1]), tuple(sl[2].split("="))) for sl in (int(sl[1]), tuple(sl[2].split("="))) for sl in
(tuple(l.split(",")) for l in f.read().splitlines()) if (tuple(l.split(",")) for l in f.read().splitlines()) if
len(sl[0]) >= 3 and len(sl[0]) >= MIN_MAIN_LEN and
len(sl[0]) < MAX_DICT_LEN and
not any(c in illegal for c in sl[0])} not any(c in illegal for c in sl[0])}
with open("userdict.csv", "r") as f: with open(USER_DICT_PATH, "r") as f:
self.user_words = {l : (1, None) for l in f.read().splitlines()} self.user_words = {l : (1, None) for l in f.read().splitlines()}
self.all_words = {k.lower() : v for k, v in {**self.main_words, **self.user_words}.items()} if os.path.exists(USED_DICT_PATH):
with open("dicts/used.dict", "r") as f:
used_words = {l : (1, None) for l in f.read().splitlines()}
else:
used_words = dict()
self.all_words = {k.lower() : v for k, v in {**self.main_words, **self.user_words, **used_words}.items()}
def extend_word(self, prev_word): def extend_word(self, prev_word):
user_dict = random.randint(0, 100) < PORT_PCT user_dict = random.randint(0, 100) < USER_PCT
if user_dict: if user_dict:
next_dict = self.user_words next_dict = self.user_words
else: else:
@ -108,7 +119,7 @@ class WordMaker:
max_len = max(len(w) for w in new_words) max_len = max(len(w) for w in new_words)
for w in new_words: for w in new_words:
new_words[w] = (max_len + 1 - len(w), None) new_words[w] = (math.pow(max_len + 1 - len(w), 3), None)
while len(new_words) > 0: while len(new_words) > 0:
new_word = pick_one_word(new_words) new_word = pick_one_word(new_words)
@ -120,7 +131,7 @@ class WordMaker:
return None return None
def get_portmanteau(self, target_times = 1): def get_portmanteau(self, target_times = 1):
user_dict = random.randint(0, 100) < PORT_PCT user_dict = random.randint(0, 100) < USER_PCT
if user_dict: if user_dict:
words = self.user_words words = self.user_words
else: else:
@ -142,7 +153,7 @@ class WordMaker:
if times == 0: if times == 0:
break break
if len(word[0]) < MAX_GEN_LEN: if len(word[0]) < MAX_PORT_LEN:
break break
word_str = word[0].lower() word_str = word[0].lower()
@ -154,11 +165,19 @@ class WordMaker:
def get_portmanteaus(self, count = 10): def get_portmanteaus(self, count = 10):
words = set() words = set()
used_words = dict()
while count > 0: while count > 0:
word_str = self.get_portmanteau() word_str = self.get_portmanteau()
if word_str not in words: if word_str not in words:
words.add(word_str) words.add(word_str)
used_words[word_str] = (1, None)
count -= 1 count -= 1
self.all_words.update(used_words)
if not TEST:
with open("dicts/used.dict", "a") as f:
f.write("\n".join(used_words.keys()) + "\n")
return words return words

Loading…
Cancel
Save