import itertools with open("dicts/mhyph.txt", "r", encoding = "macroman", newline = "\r\n") as f: hyph_words = set(tuple(l.split("•")) for l in f.read().splitlines()) with open("dicts/unigram_freq.csv", "r") as f: goog_words = set((w, int(f)) for w, f in itertools.islice((l.split(",") for l in f.read().splitlines()), 1, None)) hyph_map = {"".join(x) : x for x in hyph_words} words = dict() for w, f in sorted(goog_words, key = lambda w: w[1], reverse = True): hw = hyph_map.get(w) if hw: words[w] = (f, hw) for w, (f, hw) in words.items(): print("{},{},{}".format(w, f, "=".join(hw)))