changed dirs, clean code

2022-07-10 04:35:09 +00:00 · 2022-07-10 04:35:09 +00:00 · f5081d174d
commit f5081d174d
parent 5080ad4f63
6 changed files with 95 additions and 45 deletions
--- a/MW_thesaurus.py
+++ b/MW_thesaurus.py
@ -22,12 +22,46 @@ class MWThesaurusParser(WordParser):
                        [e.remove(ul) for ul in e.findall(".//ul")]
                        d = ot(e)
                        thes[d] = {"examples": examples}
-                    thes[d]["synonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list syn-list']/div[@class='thes-list-content synonyms_list']//li//a") ]
+                    thes[d]["synonyms"] = [
-                    thes[d]["near synonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list rel-list']/div[@class='thes-list-content synonyms_list']//li//a") ]
+                        ot(li)
-                    thes[d]["near synonyms"].extend([ ot(li) for li in se.findall( ".//span[@class='thes-list sim-list']/div[@class='thes-list-content synonyms_list']//li//a") ])
+                        for li in se.findall(
-                    thes[d]["near antonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list near-list']/div[@class='thes-list-content synonyms_list']//li//a") ]
+                            ".//span[@class='thes-list syn-list']/div[@class='thes-list-content synonyms_list']//li//a"
-                    thes[d]["near antonyms"].extend([ ot(li) for li in se.findall( ".//span[@class='thes-list opp-list']/div[@class='thes-list-content synonyms_list']//li//a") ])
+                        )
-                    thes[d]["antonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list ant-list']/div[@class='thes-list-content synonyms_list']//li//a") ]
+                    ]
                    thes[d]["near synonyms"] = [
                        ot(li)
                        for li in se.findall(
                            ".//span[@class='thes-list rel-list']/div[@class='thes-list-content synonyms_list']//li//a"
                        )
                    ]
                    thes[d]["near synonyms"].extend(
                        [
                            ot(li)
                            for li in se.findall(
                                ".//span[@class='thes-list sim-list']/div[@class='thes-list-content synonyms_list']//li//a"
                            )
                        ]
                    )
                    thes[d]["near antonyms"] = [
                        ot(li)
                        for li in se.findall(
                            ".//span[@class='thes-list near-list']/div[@class='thes-list-content synonyms_list']//li//a"
                        )
                    ]
                    thes[d]["near antonyms"].extend(
                        [
                            ot(li)
                            for li in se.findall(
                                ".//span[@class='thes-list opp-list']/div[@class='thes-list-content synonyms_list']//li//a"
                            )
                        ]
                    )
                    thes[d]["antonyms"] = [
                        ot(li)
                        for li in se.findall(
                            ".//span[@class='thes-list ant-list']/div[@class='thes-list-content synonyms_list']//li//a"
                        )
                    ]
        return thes
@ -60,7 +94,7 @@ class MWThesaurusParser(WordParser):
 # print(w.todict())
 # exit()
-q = Queue(MWThesaurusParser, "en_MW_thesaurus/", "_mwt.json", prefix_length=2)
+q = Queue(MWThesaurusParser, "en_MWThesaurus/", "_MWT.json")
 q.loadDB()
 while True:
--- a/analysis.py
+++ b/analysis.py
@ -1,7 +1,6 @@
 # import matplotlib.pyplot as plt
 # from PIL import Image
 # from wordcloud import STOPWORDS, WordCloud
 from dict_dl import fulldictionary
 d = FullDictionary("en_merriam_webster/", "_mw.json")
 # d = Dictionary("en_MW_thesaurus/", "_mwt.json")
@ -13,9 +12,9 @@ print([k for k in d if "?" in k])
 exit()
 again = set()
-for k,v in d.items():
+for k, v in d.items():
-    for ke,di in v.items():
+    for ke, di in v.items():
-        if ke != "type": 
+        if ke != "type":
            if "related" in di:
                again.add(k)
 print(again, len(again))
@ -23,6 +22,7 @@ with open(f"{d.dir_prefix}redo", "at") as f:
    f.write("\n".join(list(again)))
 exit()
 def grey_color_func(
    word, font_size, position, orientation, random_state=None, **kwargs
 ):
--- a/dict_dl.py
+++ b/dict_dl.py
@ -14,7 +14,7 @@ from bs4 import BeautifulSoup
 from requests.exceptions import ConnectionError
 letters = string.ascii_lowercase
-unusual = lambda prefix: not all( [c in letters for c in prefix.lower()])
+unusual = lambda prefix: not all([c in letters for c in prefix.lower()])
 # def uq(s):
 #     return unquote(s).split("?")[0]
 uq = unquote
--- a/duden.py
+++ b/duden.py
@ -3,9 +3,9 @@ from dict_dl import Queue, WordParser, cw, ot, uqall
 class DudenParser(WordParser):
    def __init__(self, word):
-        url_prefix= "https://www.duden.de/rechtschreibung/"
+        url_prefix = "https://www.duden.de/rechtschreibung/"
        super().__init__(word, url_prefix)
-        
+
    @property
    def definitions(self):
        defs = {}
@ -81,24 +81,27 @@ class DudenParser(WordParser):
        assert (
            self.type or self.definitions
        ), f"{self.time} {self.word}: type or definitions came back empty..."
-        return uqall({
+        return uqall(
-            self.word: {
+            {
-                "type": self.type,
+                self.word: {
-                "definitions": self.definitions,
+                    "type": self.type,
-                "pronounciation": self.pronounciation,
+                    "definitions": self.definitions,
-                "synonyms": self.synonyms,
+                    "pronounciation": self.pronounciation,
-                "history_and_etymology": self.history_and_etymology,
+                    "synonyms": self.synonyms,
-                "wendungen": self.wendungen,
+                    "history_and_etymology": self.history_and_etymology,
-                "time_of_retrieval": self.time,
+                    "wendungen": self.wendungen,
                    "time_of_retrieval": self.time,
                }
            }
-        })
+        )
 # d = DudenParser("hinfallen")
 # print(d.neighbours)
 # print(d.todict())
 # exit()
-q = Queue(DudenParser, "de_duden/", "_duden.json")
+q = Queue(DudenParser, "de_Duden/", "_D.json")
 q.loadDB()
 while True:
--- a/merriam_webster.py
+++ b/merriam_webster.py
@ -108,20 +108,22 @@ class MerriamWebsterParser(WordParser):
        assert (
            self.type or self.definitions
        ), f"{self.time} {self.word}: type or definitions came back empty..."
-        return uqall({
+        return uqall(
-            self.word: {
+            {
-                "type": self.type,
+                self.word: {
-                "definitions": self.definitions,
+                    "type": self.type,
-                "pronounciation": self.pronounciation,
+                    "definitions": self.definitions,
-                "synonyms": self.synonyms,
+                    "pronounciation": self.pronounciation,
-                "antonyms": self.antonyms,
+                    "synonyms": self.synonyms,
-                "synonym_discussion": self.synonym_discussion,
+                    "antonyms": self.antonyms,
-                "examples": self.examples,
+                    "synonym_discussion": self.synonym_discussion,
-                "history_and_etymology": self.history_and_etymology,
+                    "examples": self.examples,
-                "first_known_use": self.first_known_use,
+                    "history_and_etymology": self.history_and_etymology,
-                "time_of_retrieval": self.time,
+                    "first_known_use": self.first_known_use,
                    "time_of_retrieval": self.time,
                }
            }
-        })
+        )
 # testword = "revivalist"
@ -132,7 +134,7 @@ class MerriamWebsterParser(WordParser):
 #     print(f"### {k} ###\n", v)
 # exit()
-q = Queue(MerriamWebsterParser, "en_merriam_webster/", "_mw.json", prefix_length=2)
+q = Queue(MerriamWebsterParser, "en_MerriamWebster/", "_MW.json")
 q.loadDB()
 while True:
--- a/t.py
+++ b/t.py
@ -10,11 +10,22 @@ prefix = query[:2]
 d = Dictionary("en_MW_thesaurus/", f"{prefix}_mwt.json")
 print(f"###    {query:<70}###")
-print("================================================================================")
+print(
-for k,v in d[query].items():
+    "================================================================================"
 )
 for k, v in d[query].items():
    if k != "type":
        print(f"  {k}")
-        print("--------------------------------------------------------------------------------")
+        print(
-        for ka in ["synonyms", "related" if "related" in v else "near synonyms", "near antonyms", "antonyms"]:
+            "--------------------------------------------------------------------------------"
-                print(f"{ka:^13}: {' | '.join(v[ka])}")
+        )
-        print("--------------------------------------------------------------------------------")
+        for ka in [
            "synonyms",
            "related" if "related" in v else "near synonyms",
            "near antonyms",
            "antonyms",
        ]:
            print(f"{ka:^13}: {' | '.join(v[ka])}")
        print(
            "--------------------------------------------------------------------------------"
        )