You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
20 lines
642 B
20 lines
642 B
import itertools
|
|
|
|
with open("dicts/mhyph.txt", "r", encoding = "macroman", newline = "\r\n") as f:
|
|
hyph_words = set(tuple(l.split("•")) for l in f.read().splitlines())
|
|
|
|
with open("dicts/unigram_freq.csv", "r") as f:
|
|
goog_words = set((w, int(f)) for w, f in
|
|
itertools.islice((l.split(",") for
|
|
l in f.read().splitlines()), 1, None))
|
|
|
|
hyph_map = {"".join(x) : x for x in hyph_words}
|
|
|
|
words = dict()
|
|
for w, f in sorted(goog_words, key = lambda w: w[1], reverse = True):
|
|
hw = hyph_map.get(w)
|
|
if hw:
|
|
words[w] = (f, hw)
|
|
|
|
for w, (f, hw) in words.items():
|
|
print("{},{},{}".format(w, f, "=".join(hw)))
|
|
|