parent
f0667b31b0
commit
3ceb937911
1 changed files with 20 additions and 0 deletions
@ -0,0 +1,20 @@ |
||||
import itertools |
||||
|
||||
with open("sourcedicts/mhyph.txt", "r", encoding = "macroman", newline = "\r\n") as f: |
||||
hyph_words = set(tuple(l.split("•")) for l in f.read().splitlines()) |
||||
|
||||
with open("sourcedicts/unigram_freq.csv", "r") as f: |
||||
goog_words = set((w, int(f)) for w, f in |
||||
itertools.islice((l.split(",") for |
||||
l in f.read().splitlines()), 1, None)) |
||||
|
||||
hyph_map = {"".join(x) : x for x in hyph_words} |
||||
|
||||
words = dict() |
||||
for w, f in sorted(goog_words, key = lambda w: w[1], reverse = True): |
||||
hw = hyph_map.get(w) |
||||
if hw: |
||||
words[w] = (f, hw) |
||||
|
||||
for w, (f, hw) in words.items(): |
||||
print("{},{},{}".format(w, f, "=".join(hw))) |
Loading…
Reference in new issue