Compare commits

...

4 Commits

  1. 51361
      dict.csv
  2. 20
      mergedicts.py
  3. 170
      portbot.py
  4. 0
      sourcedicts/mhyph.txt
  5. 0
      sourcedicts/unigram_freq.csv
  6. 55
      userdict.csv

51361
dict.csv

File diff suppressed because it is too large Load Diff

@ -0,0 +1,20 @@
import itertools
with open("sourcedicts/mhyph.txt", "r", encoding = "macroman", newline = "\r\n") as f:
hyph_words = set(tuple(l.split("")) for l in f.read().splitlines())
with open("sourcedicts/unigram_freq.csv", "r") as f:
goog_words = set((w, int(f)) for w, f in
itertools.islice((l.split(",") for
l in f.read().splitlines()), 1, None))
hyph_map = {"".join(x) : x for x in hyph_words}
words = dict()
for w, f in sorted(goog_words, key = lambda w: w[1], reverse = True):
hw = hyph_map.get(w)
if hw:
words[w] = (f, hw)
for w, (f, hw) in words.items():
print("{},{},{}".format(w, f, "=".join(hw)))

@ -7,6 +7,7 @@ import re
import sched
import math
import string
import itertools
from datetime import datetime, timedelta, timezone
from mastodon import Mastodon, MastodonNotFoundError
@ -14,12 +15,14 @@ from fedbot.bot import Bot, BotClient
POST_INTERVAL = timedelta(seconds = 15)
TEST = "test" in sys.argv[1:]
PORT_PCT = 67
PORT_PCT = 30
MAX_DICT_LEN = 100
MAX_GEN_LEN = 15
def next_dt():
dt = datetime.now(timezone.utc)
dt -= timedelta(hours = 0,
minutes = (dt.minute % 15) - 15,
dt -= timedelta(hours = -1,
minutes = dt.minute,
seconds = dt.second,
microseconds = dt.microsecond)
return dt
@ -30,110 +33,106 @@ loaded_config = {
"name": "portmanteaubot",
**toml.load(config_path)}
SUFFIXES = [
'ly$',
'ing$',
'[bdklmptw]?est$',
'[^ious]s$',
'ted$',
'[ei]ty$']
AFFIXES = []
def is_suffixed(word):
return any(re.fullmatch(suf, word) for suf in SUFFIXES)
def is_affixed(word):
return any(re.search(suf, word) for suf in AFFIXES)
def overlap_words(left_word, right_word):
if left_word == right_word:
return None
return set()
offset = 2
attempts = []
while offset + 2 <= len(left_word):
if right_word.lower().startswith(left_word.lower()[offset : offset + 2]):
attempts.append(left_word[:offset] + right_word)
#break
attempts = set()
while offset + 2 <= len(left_word[0]):
if right_word[0].lower().startswith(left_word[0].lower()[offset : offset + 2]):
word_str = left_word[0][:offset] + right_word[0]
if len(word_str) >= 6 and not is_affixed(word_str):
attempts.add(word_str)
offset += 1
offset = len(right_word) - 2
offset = len(right_word[0]) - 2
while offset >= 0:
if left_word.lower().endswith(right_word.lower()[offset : offset + 2]):
attempts.append(left_word + right_word[offset + 2:])
#break
if left_word[0].lower().endswith(right_word[0].lower()[offset : offset + 2]):
word_str = left_word[0] + right_word[0][offset + 2:]
if len(word_str) >= 6 and not is_affixed(word_str):
attempts.add(word_str)
offset -= 1
attempts = sorted(attempts, key = lambda w: len(w), reverse = True)
if len(attempts) == 0:
return None
return pick_one_word(attempts)
return attempts
def word_weight(index, length, power = 2):
def word_weight(index, length, power):
a = pow((index + 1) / length, 2)
return int(100000 * a)
return int(350000 * a)
def pick_one_word(words, power = 2, max_len = 12):
words = list(filter(lambda w: len(w) <= max_len, words))
def weights_for(words, power):
return [word_weight(i, len(words), power = power) for i in range(0, len(words))]
def pick_one_word(words):
if len(words) == 0:
return None
weights = [word_weight(i, len(words), power = power) for i in range(0, len(words))]
return random.choices(words, weights = weights)[0]
return random.choices(list(words.items()), weights = (v[0] for v in words.values()))[0]
class WordMaker:
def __init__(self):
print("Loading dictionaries")
illegal = set(ch for ch in (string.ascii_uppercase + string.punctuation + string.digits + string.whitespace))
with open ("mhyph.txt", "r", encoding = "mac-roman") as f:
lines = [l.strip() for l in f.readlines()]
lines = filter(lambda w: len(w) > 0 and not any(ch in illegal for ch in w), lines)
words = [l.replace("", "") for l in lines]
self.all_words = words
words = list(set(sorted(words, key = lambda w: len(w), reverse = True)))
self.first_words = list(filter(lambda w: not is_suffixed(w), words))
self.next_words = words
with open("porthyph.txt", "r") as f:
lines = [line.strip() for line in f.readlines()]
words = list(filter(lambda l: len(l) > 0, lines))
self.all_words = list(set(sorted([w.lower() for w in [*self.all_words, *words]], key = lambda w: len(w), reverse = True)))
self.port_words = list(set(sorted(words, key = lambda w: len(w), reverse = True)))
def extend_word2(self, prev_word):
port_dict = random.randint(0, 100) < PORT_PCT
if port_dict:
next_dict = self.port_words
with open ("dict.csv", "r") as f:
self.main_words = {
sl[0] :
(int(sl[1]), tuple(sl[2].split("="))) for sl in
(tuple(l.split(",")) for l in f.read().splitlines()) if
len(sl[0]) >= 3 and
len(sl[0]) < MAX_DICT_LEN and
not any(c in illegal for c in sl[0])}
with open("userdict.csv", "r") as f:
self.user_words = {l : (1, None) for l in f.read().splitlines()}
self.all_words = {k.lower() : v for k, v in {**self.main_words, **self.user_words}.items()}
def extend_word(self, prev_word):
user_dict = random.randint(0, 100) < PORT_PCT
if user_dict:
next_dict = self.user_words
else:
next_dict = self.next_words
next_dict = self.main_words
new_words = dict()
for w in next_dict.items():
new_words.update(dict.fromkeys(overlap_words(prev_word, w)))
new_words = [overlap_words(prev_word, w) for w in next_dict if overlap_words(prev_word, w)]
if len(new_words) == 0:
return None
max_len = max(len(w) for w in new_words)
for w in new_words:
new_words[w] = (max_len + 1 - len(w), None)
while len(new_words) > 0:
new_word = pick_one_word(new_words, power = 2 if port_dict else 4)
if not new_word:
return None
new_words.remove(new_word)
new_word = pick_one_word(new_words)
del new_words[new_word[0]]
if new_word.lower() not in self.all_words:
if new_word[0].lower() not in self.all_words:
return new_word
return None
def get_portmanteau(self):
target_times = 1
port_dict = random.randint(0, 100) < PORT_PCT
if port_dict:
words = self.port_words
def get_portmanteau(self, target_times = 1):
user_dict = random.randint(0, 100) < PORT_PCT
if user_dict:
words = self.user_words
else:
words = self.first_words
words = self.main_words
while True:
while True:
word = pick_one_word(words, power = 2 if port_dict else 4)
word = pick_one_word(words)
times = target_times
while times > 0:
ext_word = self.extend_word2(word)
ext_word = self.extend_word(word)
if ext_word is None:
break
@ -143,17 +142,25 @@ class WordMaker:
if times == 0:
break
if len(word) < 15:
if len(word[0]) < MAX_GEN_LEN:
break
word = word.lower()
word_str = word[0].lower()
print(word)
print(word_str)
return word
return word_str
def get_portmanteaus(self, count = 10):
return [self.get_portmanteau() for x in range(0, count)]
words = set()
while count > 0:
word_str = self.get_portmanteau()
if word_str not in words:
words.add(word_str)
count -= 1
return words
class PortBotClient(BotClient):
def __init__(self, bot, config):
@ -180,14 +187,6 @@ class PortBotClient(BotClient):
if status["account"]["id"] != self.my_id:
return
#if status["created_at"] < datetime.now(timezone.utc) - timedelta(hours = 24) and status["reblogs_count"] == 0 and status["favourites_count"] == 0:
# try:
# print("Deleting", status["created_at"], status["content"])
# self.api.status_delete(status["id"])
# time.sleep(2)
# except MastodonNotFoundError:
# pass
def post():
for client_name, client in bot.clients.items():
words = wm.get_portmanteaus(3)
@ -204,7 +203,7 @@ def post():
print("Scheduling at", dt)
if TEST:
scheduler.enter(1, 1, post)
scheduler.enter(2, 1, post)
else:
scheduler.enterabs(dt.timestamp(), 1, post)
@ -218,8 +217,9 @@ bot.start()
print("Running")
dt = next_dt()
if TEST:
scheduler.enter(1, 1, post)
scheduler.enter(2, 1, post)
else:
print("Scheduling at", dt)
scheduler.enterabs(dt.timestamp(), 1, post)

@ -1,6 +1,5 @@
4chan
4channer
acquihire
ActivityPub
amazon
angst
@ -9,6 +8,7 @@ anime
animu
Anon
antifa
anus
Apple
Asperger
aspie
@ -20,6 +20,7 @@ Barbie
based
basic
Batman
belly
birb
bisexual
bitch
@ -28,8 +29,13 @@ blåhaj
blaseball
blaze
bloatware
blob
blobcat
blockchain
blogger
blue
blueballs
Bluetooth
boner
boob
boomer
@ -48,6 +54,7 @@ bummer
buttcrack
buttload
Buzzfeed
cancel
cancer
cat
catgirl
@ -63,6 +70,9 @@ chill
chonk
chonker
choomer
chub
chubby
chungus
clickbait
cocaine
cock
@ -72,8 +82,10 @@ conservative
cookie
cool
coomer
crab
crap
creeper
crew
cringe
crush
crypto
@ -137,11 +149,16 @@ fedi
fediverse
fedora
Fedora
fiddle
fidget
fire
fired
Firefox
flame
flamewar
fluff
fluffy
fondle
freemium
fuck
fucker
@ -150,6 +167,9 @@ futa
futurama
game
gamer
gang
gangsta
gangster
gay
gaydar
gaymer
@ -182,6 +202,8 @@ hipster
hodl
hoodie
horny
huge
humble
hung
husbando
hyperlink
@ -197,6 +219,7 @@ jam
JavaScript
jerk
jiggle
jumbo
kalm
Kickstarter
Kirby
@ -221,6 +244,7 @@ Luigi
lulz
lurk
lurker
mad
mafia
malware
mama
@ -229,6 +253,7 @@ manscape
Mario
Mastodon
masturbate
mega
Megaman
meme
metal
@ -239,6 +264,7 @@ mista
modem
moe
mother
multiplayer
Murican
mutual
Nazi
@ -264,12 +290,15 @@ ozone
panik
pedo
Pedobear
penis
Pepe
pet
photo
photobomb
Pikachu
pirate
Pixelfed
player
PlayStation
Pleroma
plugin
@ -277,6 +306,7 @@ Pokémon
porn
PornHub
pornography
possum
post
potat
potet
@ -284,13 +314,14 @@ pregnant
protecc
pspsps
psyop
pube
pubes
pussy
pwned
rageface
reboot
Reddit
redditor
RedHat
reboot
ree
reee
reeee
@ -299,8 +330,11 @@ Republican
retweet
rickroll
Robocop
roleplay
roleplayer
sage
sapphic
satan
selfie
senpai
sex
@ -312,14 +346,18 @@ shitton
shroom
sick
sista
skeleton
Slenderman
Slendermen
small
smartphone
smol
smug
smut
Snapchat
Snowden
snug
snuggle
software
Sonic
soyboy
@ -327,11 +365,14 @@ spam
sperg
Spiderman
Spongebob
spook
spooky
Spotify
squad
Squidward
startup
steampunk
stim
stonk
stonks
subtweet
@ -344,6 +385,9 @@ thread
tiger
Tiktok
Tinder
tiny
tit
tits
tlap
Tor
totes
@ -352,6 +396,8 @@ transsex
transsexual
trap
trending
trigger
triggered
trollface
trolltard
tsundere
@ -360,8 +406,10 @@ Uber
Ubuntu
unfappable
Unix
vagina
Valhalla
vegan
videogame
viral
virgin
virus
@ -381,6 +429,7 @@ weeaboo
weeb
weed
WhatsApp
wiener
Wikipedia
Windows
WordPress
unable to load file from base commit
Loading…
Cancel
Save