From 3ceb937911f8e5312c3f406ee44b6750522c949b Mon Sep 17 00:00:00 2001 From: Thor Harald Johansen Date: Mon, 2 Aug 2021 16:49:04 +0200 Subject: [PATCH] Add dictionary merger --- mergedicts.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 mergedicts.py diff --git a/mergedicts.py b/mergedicts.py new file mode 100644 index 0000000..ab3e870 --- /dev/null +++ b/mergedicts.py @@ -0,0 +1,20 @@ +import itertools + +with open("sourcedicts/mhyph.txt", "r", encoding = "macroman", newline = "\r\n") as f: + hyph_words = set(tuple(l.split("•")) for l in f.read().splitlines()) + +with open("sourcedicts/unigram_freq.csv", "r") as f: + goog_words = set((w, int(f)) for w, f in + itertools.islice((l.split(",") for + l in f.read().splitlines()), 1, None)) + +hyph_map = {"".join(x) : x for x in hyph_words} + +words = dict() +for w, f in sorted(goog_words, key = lambda w: w[1], reverse = True): + hw = hyph_map.get(w) + if hw: + words[w] = (f, hw) + +for w, (f, hw) in words.items(): + print("{},{},{}".format(w, f, "=".join(hw)))