dict_dl/analysis.py
2022-07-08 14:36:55 +00:00

76 lines
2.1 KiB
Python

# import matplotlib.pyplot as plt
# from PIL import Image
# from wordcloud import STOPWORDS, WordCloud
from dict_dl import Dictionary, uq, uqall
d = Dictionary("en_merriam_webster/", "_mw.json")
# d = Dictionary("en_MW_thesaurus/", "_mwt.json")
# d = Dictionary("de_duden/", "_duden.json")
print(f"{d.readtime:.06f}")
perc = {k for k,v in d.items() if "%" in d[k].__repr__()}
print(len(perc))
with open(f"{d.dir_prefix}redo", "at") as f:
f.write("\n".join(list(perc)))
exit()
# older than date
# old = {k for k,v in d.items() if int(v["time_of_retrieval"][:8]) < 20220705}
# with open(f"{d.dir_prefix}redo", "at") as f:
# f.write('\n'.join(list(old)))
# print(len(old))
# new synonym discussion
new = {k for k, v in d.items() if not "synonym_discussion" in v}
print(len(new) / len(d))
exit()
def grey_color_func(
word, font_size, position, orientation, random_state=None, **kwargs
):
return "hsl(0, 0%%, %d%%)" % random.randint(60, 100)
# mask = np.array(Image.open(os.path.join(d, "stormtrooper_mask.png")))
word = "abhorrent"
text = " ".join(mw[word]["synonyms"] + [word])
wc = WordCloud(
max_words=200,
width=1920,
height=1080,
margin=10,
min_font_size=40,
max_font_size=100,
random_state=1,
).generate(text)
default_colors = wc.to_array()
# plt.title("Custom colors")
# plt.imshow(wc.recolor(color_func=grey_color_func, random_state=3),
# interpolation="bilinear")
plt.figure()
plt.title(word)
plt.imshow(default_colors, interpolation="bilinear")
plt.axis("off")
plt.show()
exit()
# letters = {k[0] for k in mw}
# start = time.time()
# for c in letters:
# c_db = {k:v for k,v in mw.items() if k[0] == c}
# with open(f"{c}_mw.json", "w") as f: # save DB
# json.dump(c_db, f, separators=(",", ":"), indent=2)
# print(time.time() - start)
# exit()
# types = {w["word"] for w in mw if not w["history_and_etymology"]}
types = [w["type"] for w in mw]
print(len(types))
new_mw = {w["word"]: {k: v for k, v in w.items() if k != "word"} for w in mw}
print(new_mw)
print(len(new_mw))
with open("new_mw.json", "w") as f:
json.dump(new_mw, f, separators=(",", ":"), indent=2)