2022-07-10 04:31:07 +00:00
|
|
|
import string
|
|
|
|
|
|
|
|
from dict_dl import DictFile, FullDictionary
|
|
|
|
|
|
|
|
# CHANGE PREFIX_LENGTH...
|
|
|
|
# prefix_dir, suffix = "en_MerriamWebster/", "_MW.json"
|
|
|
|
# prefix_dir, suffix = "en_MWThesaurus/", "_MWT.json"
|
|
|
|
prefix_dir, suffix = "de_Duden/", "_D.json"
|
|
|
|
new_prefix_length = 3
|
|
|
|
|
|
|
|
letters = string.ascii_lowercase
|
|
|
|
unusual = lambda prefix: len(prefix) < new_prefix_length or not all(
|
|
|
|
[c in letters for c in prefix.lower()]
|
|
|
|
)
|
2022-07-10 03:16:16 +00:00
|
|
|
|
|
|
|
# d = FullDictionary("en_merriam_webster/", "_mw.json")
|
2022-07-10 04:31:07 +00:00
|
|
|
# df = FullDictionary(prefix_dir, suffix)
|
|
|
|
# print(len(df))
|
|
|
|
# print(d.keys() ^ df.keys())
|
|
|
|
# exit()
|
2022-07-10 03:16:16 +00:00
|
|
|
# d = FullDictionary("en_MW_thesaurus/", "_mwt.json")
|
|
|
|
d = FullDictionary("de_duden/", "_duden.json")
|
|
|
|
print(f"{d.readtime:.06f}")
|
|
|
|
ke = d.keys()
|
2022-07-10 04:31:07 +00:00
|
|
|
pre = {
|
|
|
|
k[:new_prefix_length].lower()
|
|
|
|
if not unusual(k[:new_prefix_length])
|
|
|
|
else "_" * new_prefix_length
|
|
|
|
for k in ke
|
|
|
|
}
|
|
|
|
|
2022-07-10 03:16:16 +00:00
|
|
|
print(len(pre))
|
2022-07-10 04:31:07 +00:00
|
|
|
print(len({k for k in ke if unusual(k[:new_prefix_length])}))
|
|
|
|
|
|
|
|
|
|
|
|
for prefix in pre:
|
|
|
|
# prefix = p[: self.prefix_length].lower()
|
|
|
|
if unusual(prefix):
|
|
|
|
print(prefix)
|
|
|
|
prefix = "_" * new_prefix_length
|
|
|
|
partd = DictFile(f"{prefix_dir}{prefix}{suffix}")
|
|
|
|
partd |= {k: v for k, v in d.items() if unusual(k[:new_prefix_length].lower())}
|
|
|
|
else:
|
|
|
|
partd = DictFile(f"{prefix_dir}{prefix}{suffix}")
|
|
|
|
partd |= {k: v for k, v in d.items() if k[:new_prefix_length].lower() == prefix}
|
|
|
|
partd.save()
|