dict_dl/maintenance.py

47 lines
1.4 KiB
Python
Raw Normal View History

import string
from dict_dl import DictFile, FullDictionary
# CHANGE PREFIX_LENGTH...
# prefix_dir, suffix = "en_MerriamWebster/", "_MW.json"
# prefix_dir, suffix = "en_MWThesaurus/", "_MWT.json"
prefix_dir, suffix = "de_Duden/", "_D.json"
new_prefix_length = 3
letters = string.ascii_lowercase
unusual = lambda prefix: len(prefix) < new_prefix_length or not all(
[c in letters for c in prefix.lower()]
)
2022-07-10 03:16:16 +00:00
# d = FullDictionary("en_merriam_webster/", "_mw.json")
# df = FullDictionary(prefix_dir, suffix)
# print(len(df))
# print(d.keys() ^ df.keys())
# exit()
2022-07-10 03:16:16 +00:00
# d = FullDictionary("en_MW_thesaurus/", "_mwt.json")
d = FullDictionary("de_duden/", "_duden.json")
print(f"{d.readtime:.06f}")
ke = d.keys()
pre = {
k[:new_prefix_length].lower()
if not unusual(k[:new_prefix_length])
else "_" * new_prefix_length
for k in ke
}
2022-07-10 03:16:16 +00:00
print(len(pre))
print(len({k for k in ke if unusual(k[:new_prefix_length])}))
for prefix in pre:
# prefix = p[: self.prefix_length].lower()
if unusual(prefix):
print(prefix)
prefix = "_" * new_prefix_length
partd = DictFile(f"{prefix_dir}{prefix}{suffix}")
partd |= {k: v for k, v in d.items() if unusual(k[:new_prefix_length].lower())}
else:
partd = DictFile(f"{prefix_dir}{prefix}{suffix}")
partd |= {k: v for k, v in d.items() if k[:new_prefix_length].lower() == prefix}
partd.save()