import string from dict_dl import DictFile, FullDictionary # CHANGE PREFIX_LENGTH... # prefix_dir, suffix = "en_MerriamWebster/", "_MW.json" # prefix_dir, suffix = "en_MWThesaurus/", "_MWT.json" prefix_dir, suffix = "de_Duden/", "_D.json" new_prefix_length = 3 letters = string.ascii_lowercase unusual = lambda prefix: len(prefix) < new_prefix_length or not all( [c in letters for c in prefix.lower()] ) # d = FullDictionary("en_merriam_webster/", "_mw.json") # df = FullDictionary(prefix_dir, suffix) # print(len(df)) # print(d.keys() ^ df.keys()) # exit() # d = FullDictionary("en_MW_thesaurus/", "_mwt.json") d = FullDictionary("de_duden/", "_duden.json") print(f"{d.readtime:.06f}") ke = d.keys() pre = { k[:new_prefix_length].lower() if not unusual(k[:new_prefix_length]) else "_" * new_prefix_length for k in ke } print(len(pre)) print(len({k for k in ke if unusual(k[:new_prefix_length])})) for prefix in pre: # prefix = p[: self.prefix_length].lower() if unusual(prefix): print(prefix) prefix = "_" * new_prefix_length partd = DictFile(f"{prefix_dir}{prefix}{suffix}") partd |= {k: v for k, v in d.items() if unusual(k[:new_prefix_length].lower())} else: partd = DictFile(f"{prefix_dir}{prefix}{suffix}") partd |= {k: v for k, v in d.items() if k[:new_prefix_length].lower() == prefix} partd.save()