parent
f0667b31b0
commit
3ceb937911
1 changed files with 20 additions and 0 deletions
@ -0,0 +1,20 @@ |
|||||||
|
import itertools |
||||||
|
|
||||||
|
with open("sourcedicts/mhyph.txt", "r", encoding = "macroman", newline = "\r\n") as f: |
||||||
|
hyph_words = set(tuple(l.split("•")) for l in f.read().splitlines()) |
||||||
|
|
||||||
|
with open("sourcedicts/unigram_freq.csv", "r") as f: |
||||||
|
goog_words = set((w, int(f)) for w, f in |
||||||
|
itertools.islice((l.split(",") for |
||||||
|
l in f.read().splitlines()), 1, None)) |
||||||
|
|
||||||
|
hyph_map = {"".join(x) : x for x in hyph_words} |
||||||
|
|
||||||
|
words = dict() |
||||||
|
for w, f in sorted(goog_words, key = lambda w: w[1], reverse = True): |
||||||
|
hw = hyph_map.get(w) |
||||||
|
if hw: |
||||||
|
words[w] = (f, hw) |
||||||
|
|
||||||
|
for w, (f, hw) in words.items(): |
||||||
|
print("{},{},{}".format(w, f, "=".join(hw))) |
Loading…
Reference in new issue