From f5081d174da1b6ea019684ea70b5994db17a8f59 Mon Sep 17 00:00:00 2001 From: julius Date: Sun, 10 Jul 2022 04:35:09 +0000 Subject: [PATCH] changed dirs, clean code --- MW_thesaurus.py | 48 +++++++++++++++++++++++++++++++++++++++------- analysis.py | 8 ++++---- dict_dl.py | 2 +- duden.py | 29 +++++++++++++++------------- merriam_webster.py | 30 +++++++++++++++-------------- t.py | 23 ++++++++++++++++------ 6 files changed, 95 insertions(+), 45 deletions(-) diff --git a/MW_thesaurus.py b/MW_thesaurus.py index 9362285f..d070598f 100644 --- a/MW_thesaurus.py +++ b/MW_thesaurus.py @@ -22,12 +22,46 @@ class MWThesaurusParser(WordParser): [e.remove(ul) for ul in e.findall(".//ul")] d = ot(e) thes[d] = {"examples": examples} - thes[d]["synonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list syn-list']/div[@class='thes-list-content synonyms_list']//li//a") ] - thes[d]["near synonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list rel-list']/div[@class='thes-list-content synonyms_list']//li//a") ] - thes[d]["near synonyms"].extend([ ot(li) for li in se.findall( ".//span[@class='thes-list sim-list']/div[@class='thes-list-content synonyms_list']//li//a") ]) - thes[d]["near antonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list near-list']/div[@class='thes-list-content synonyms_list']//li//a") ] - thes[d]["near antonyms"].extend([ ot(li) for li in se.findall( ".//span[@class='thes-list opp-list']/div[@class='thes-list-content synonyms_list']//li//a") ]) - thes[d]["antonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list ant-list']/div[@class='thes-list-content synonyms_list']//li//a") ] + thes[d]["synonyms"] = [ + ot(li) + for li in se.findall( + ".//span[@class='thes-list syn-list']/div[@class='thes-list-content synonyms_list']//li//a" + ) + ] + thes[d]["near synonyms"] = [ + ot(li) + for li in se.findall( + ".//span[@class='thes-list rel-list']/div[@class='thes-list-content synonyms_list']//li//a" + ) + ] + thes[d]["near synonyms"].extend( + [ + ot(li) + for li in se.findall( + ".//span[@class='thes-list sim-list']/div[@class='thes-list-content synonyms_list']//li//a" + ) + ] + ) + thes[d]["near antonyms"] = [ + ot(li) + for li in se.findall( + ".//span[@class='thes-list near-list']/div[@class='thes-list-content synonyms_list']//li//a" + ) + ] + thes[d]["near antonyms"].extend( + [ + ot(li) + for li in se.findall( + ".//span[@class='thes-list opp-list']/div[@class='thes-list-content synonyms_list']//li//a" + ) + ] + ) + thes[d]["antonyms"] = [ + ot(li) + for li in se.findall( + ".//span[@class='thes-list ant-list']/div[@class='thes-list-content synonyms_list']//li//a" + ) + ] return thes @@ -60,7 +94,7 @@ class MWThesaurusParser(WordParser): # print(w.todict()) # exit() -q = Queue(MWThesaurusParser, "en_MW_thesaurus/", "_mwt.json", prefix_length=2) +q = Queue(MWThesaurusParser, "en_MWThesaurus/", "_MWT.json") q.loadDB() while True: diff --git a/analysis.py b/analysis.py index 06894878..875c2bac 100644 --- a/analysis.py +++ b/analysis.py @@ -1,7 +1,6 @@ # import matplotlib.pyplot as plt # from PIL import Image # from wordcloud import STOPWORDS, WordCloud -from dict_dl import fulldictionary d = FullDictionary("en_merriam_webster/", "_mw.json") # d = Dictionary("en_MW_thesaurus/", "_mwt.json") @@ -13,9 +12,9 @@ print([k for k in d if "?" in k]) exit() again = set() -for k,v in d.items(): - for ke,di in v.items(): - if ke != "type": +for k, v in d.items(): + for ke, di in v.items(): + if ke != "type": if "related" in di: again.add(k) print(again, len(again)) @@ -23,6 +22,7 @@ with open(f"{d.dir_prefix}redo", "at") as f: f.write("\n".join(list(again))) exit() + def grey_color_func( word, font_size, position, orientation, random_state=None, **kwargs ): diff --git a/dict_dl.py b/dict_dl.py index caf3f452..f2db3b24 100644 --- a/dict_dl.py +++ b/dict_dl.py @@ -14,7 +14,7 @@ from bs4 import BeautifulSoup from requests.exceptions import ConnectionError letters = string.ascii_lowercase -unusual = lambda prefix: not all( [c in letters for c in prefix.lower()]) +unusual = lambda prefix: not all([c in letters for c in prefix.lower()]) # def uq(s): # return unquote(s).split("?")[0] uq = unquote diff --git a/duden.py b/duden.py index a991fb09..f51f2742 100644 --- a/duden.py +++ b/duden.py @@ -3,9 +3,9 @@ from dict_dl import Queue, WordParser, cw, ot, uqall class DudenParser(WordParser): def __init__(self, word): - url_prefix= "https://www.duden.de/rechtschreibung/" + url_prefix = "https://www.duden.de/rechtschreibung/" super().__init__(word, url_prefix) - + @property def definitions(self): defs = {} @@ -81,24 +81,27 @@ class DudenParser(WordParser): assert ( self.type or self.definitions ), f"{self.time} {self.word}: type or definitions came back empty..." - return uqall({ - self.word: { - "type": self.type, - "definitions": self.definitions, - "pronounciation": self.pronounciation, - "synonyms": self.synonyms, - "history_and_etymology": self.history_and_etymology, - "wendungen": self.wendungen, - "time_of_retrieval": self.time, + return uqall( + { + self.word: { + "type": self.type, + "definitions": self.definitions, + "pronounciation": self.pronounciation, + "synonyms": self.synonyms, + "history_and_etymology": self.history_and_etymology, + "wendungen": self.wendungen, + "time_of_retrieval": self.time, + } } - }) + ) + # d = DudenParser("hinfallen") # print(d.neighbours) # print(d.todict()) # exit() -q = Queue(DudenParser, "de_duden/", "_duden.json") +q = Queue(DudenParser, "de_Duden/", "_D.json") q.loadDB() while True: diff --git a/merriam_webster.py b/merriam_webster.py index ad5a745b..722d9b18 100644 --- a/merriam_webster.py +++ b/merriam_webster.py @@ -108,20 +108,22 @@ class MerriamWebsterParser(WordParser): assert ( self.type or self.definitions ), f"{self.time} {self.word}: type or definitions came back empty..." - return uqall({ - self.word: { - "type": self.type, - "definitions": self.definitions, - "pronounciation": self.pronounciation, - "synonyms": self.synonyms, - "antonyms": self.antonyms, - "synonym_discussion": self.synonym_discussion, - "examples": self.examples, - "history_and_etymology": self.history_and_etymology, - "first_known_use": self.first_known_use, - "time_of_retrieval": self.time, + return uqall( + { + self.word: { + "type": self.type, + "definitions": self.definitions, + "pronounciation": self.pronounciation, + "synonyms": self.synonyms, + "antonyms": self.antonyms, + "synonym_discussion": self.synonym_discussion, + "examples": self.examples, + "history_and_etymology": self.history_and_etymology, + "first_known_use": self.first_known_use, + "time_of_retrieval": self.time, + } } - }) + ) # testword = "revivalist" @@ -132,7 +134,7 @@ class MerriamWebsterParser(WordParser): # print(f"### {k} ###\n", v) # exit() -q = Queue(MerriamWebsterParser, "en_merriam_webster/", "_mw.json", prefix_length=2) +q = Queue(MerriamWebsterParser, "en_MerriamWebster/", "_MW.json") q.loadDB() while True: diff --git a/t.py b/t.py index df148c55..72299a61 100755 --- a/t.py +++ b/t.py @@ -10,11 +10,22 @@ prefix = query[:2] d = Dictionary("en_MW_thesaurus/", f"{prefix}_mwt.json") print(f"### {query:<70}###") -print("================================================================================") -for k,v in d[query].items(): +print( + "================================================================================" +) +for k, v in d[query].items(): if k != "type": print(f" {k}") - print("--------------------------------------------------------------------------------") - for ka in ["synonyms", "related" if "related" in v else "near synonyms", "near antonyms", "antonyms"]: - print(f"{ka:^13}: {' | '.join(v[ka])}") - print("--------------------------------------------------------------------------------") + print( + "--------------------------------------------------------------------------------" + ) + for ka in [ + "synonyms", + "related" if "related" in v else "near synonyms", + "near antonyms", + "antonyms", + ]: + print(f"{ka:^13}: {' | '.join(v[ka])}") + print( + "--------------------------------------------------------------------------------" + )