changed dirs, clean code

This commit is contained in:
julius 2022-07-10 04:35:09 +00:00
parent 5080ad4f63
commit f5081d174d
6 changed files with 95 additions and 45 deletions

View File

@ -22,12 +22,46 @@ class MWThesaurusParser(WordParser):
[e.remove(ul) for ul in e.findall(".//ul")] [e.remove(ul) for ul in e.findall(".//ul")]
d = ot(e) d = ot(e)
thes[d] = {"examples": examples} thes[d] = {"examples": examples}
thes[d]["synonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list syn-list']/div[@class='thes-list-content synonyms_list']//li//a") ] thes[d]["synonyms"] = [
thes[d]["near synonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list rel-list']/div[@class='thes-list-content synonyms_list']//li//a") ] ot(li)
thes[d]["near synonyms"].extend([ ot(li) for li in se.findall( ".//span[@class='thes-list sim-list']/div[@class='thes-list-content synonyms_list']//li//a") ]) for li in se.findall(
thes[d]["near antonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list near-list']/div[@class='thes-list-content synonyms_list']//li//a") ] ".//span[@class='thes-list syn-list']/div[@class='thes-list-content synonyms_list']//li//a"
thes[d]["near antonyms"].extend([ ot(li) for li in se.findall( ".//span[@class='thes-list opp-list']/div[@class='thes-list-content synonyms_list']//li//a") ]) )
thes[d]["antonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list ant-list']/div[@class='thes-list-content synonyms_list']//li//a") ] ]
thes[d]["near synonyms"] = [
ot(li)
for li in se.findall(
".//span[@class='thes-list rel-list']/div[@class='thes-list-content synonyms_list']//li//a"
)
]
thes[d]["near synonyms"].extend(
[
ot(li)
for li in se.findall(
".//span[@class='thes-list sim-list']/div[@class='thes-list-content synonyms_list']//li//a"
)
]
)
thes[d]["near antonyms"] = [
ot(li)
for li in se.findall(
".//span[@class='thes-list near-list']/div[@class='thes-list-content synonyms_list']//li//a"
)
]
thes[d]["near antonyms"].extend(
[
ot(li)
for li in se.findall(
".//span[@class='thes-list opp-list']/div[@class='thes-list-content synonyms_list']//li//a"
)
]
)
thes[d]["antonyms"] = [
ot(li)
for li in se.findall(
".//span[@class='thes-list ant-list']/div[@class='thes-list-content synonyms_list']//li//a"
)
]
return thes return thes
@ -60,7 +94,7 @@ class MWThesaurusParser(WordParser):
# print(w.todict()) # print(w.todict())
# exit() # exit()
q = Queue(MWThesaurusParser, "en_MW_thesaurus/", "_mwt.json", prefix_length=2) q = Queue(MWThesaurusParser, "en_MWThesaurus/", "_MWT.json")
q.loadDB() q.loadDB()
while True: while True:

View File

@ -1,7 +1,6 @@
# import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
# from PIL import Image # from PIL import Image
# from wordcloud import STOPWORDS, WordCloud # from wordcloud import STOPWORDS, WordCloud
from dict_dl import fulldictionary
d = FullDictionary("en_merriam_webster/", "_mw.json") d = FullDictionary("en_merriam_webster/", "_mw.json")
# d = Dictionary("en_MW_thesaurus/", "_mwt.json") # d = Dictionary("en_MW_thesaurus/", "_mwt.json")
@ -13,9 +12,9 @@ print([k for k in d if "?" in k])
exit() exit()
again = set() again = set()
for k,v in d.items(): for k, v in d.items():
for ke,di in v.items(): for ke, di in v.items():
if ke != "type": if ke != "type":
if "related" in di: if "related" in di:
again.add(k) again.add(k)
print(again, len(again)) print(again, len(again))
@ -23,6 +22,7 @@ with open(f"{d.dir_prefix}redo", "at") as f:
f.write("\n".join(list(again))) f.write("\n".join(list(again)))
exit() exit()
def grey_color_func( def grey_color_func(
word, font_size, position, orientation, random_state=None, **kwargs word, font_size, position, orientation, random_state=None, **kwargs
): ):

View File

@ -14,7 +14,7 @@ from bs4 import BeautifulSoup
from requests.exceptions import ConnectionError from requests.exceptions import ConnectionError
letters = string.ascii_lowercase letters = string.ascii_lowercase
unusual = lambda prefix: not all( [c in letters for c in prefix.lower()]) unusual = lambda prefix: not all([c in letters for c in prefix.lower()])
# def uq(s): # def uq(s):
# return unquote(s).split("?")[0] # return unquote(s).split("?")[0]
uq = unquote uq = unquote

View File

@ -3,9 +3,9 @@ from dict_dl import Queue, WordParser, cw, ot, uqall
class DudenParser(WordParser): class DudenParser(WordParser):
def __init__(self, word): def __init__(self, word):
url_prefix= "https://www.duden.de/rechtschreibung/" url_prefix = "https://www.duden.de/rechtschreibung/"
super().__init__(word, url_prefix) super().__init__(word, url_prefix)
@property @property
def definitions(self): def definitions(self):
defs = {} defs = {}
@ -81,24 +81,27 @@ class DudenParser(WordParser):
assert ( assert (
self.type or self.definitions self.type or self.definitions
), f"{self.time} {self.word}: type or definitions came back empty..." ), f"{self.time} {self.word}: type or definitions came back empty..."
return uqall({ return uqall(
self.word: { {
"type": self.type, self.word: {
"definitions": self.definitions, "type": self.type,
"pronounciation": self.pronounciation, "definitions": self.definitions,
"synonyms": self.synonyms, "pronounciation": self.pronounciation,
"history_and_etymology": self.history_and_etymology, "synonyms": self.synonyms,
"wendungen": self.wendungen, "history_and_etymology": self.history_and_etymology,
"time_of_retrieval": self.time, "wendungen": self.wendungen,
"time_of_retrieval": self.time,
}
} }
}) )
# d = DudenParser("hinfallen") # d = DudenParser("hinfallen")
# print(d.neighbours) # print(d.neighbours)
# print(d.todict()) # print(d.todict())
# exit() # exit()
q = Queue(DudenParser, "de_duden/", "_duden.json") q = Queue(DudenParser, "de_Duden/", "_D.json")
q.loadDB() q.loadDB()
while True: while True:

View File

@ -108,20 +108,22 @@ class MerriamWebsterParser(WordParser):
assert ( assert (
self.type or self.definitions self.type or self.definitions
), f"{self.time} {self.word}: type or definitions came back empty..." ), f"{self.time} {self.word}: type or definitions came back empty..."
return uqall({ return uqall(
self.word: { {
"type": self.type, self.word: {
"definitions": self.definitions, "type": self.type,
"pronounciation": self.pronounciation, "definitions": self.definitions,
"synonyms": self.synonyms, "pronounciation": self.pronounciation,
"antonyms": self.antonyms, "synonyms": self.synonyms,
"synonym_discussion": self.synonym_discussion, "antonyms": self.antonyms,
"examples": self.examples, "synonym_discussion": self.synonym_discussion,
"history_and_etymology": self.history_and_etymology, "examples": self.examples,
"first_known_use": self.first_known_use, "history_and_etymology": self.history_and_etymology,
"time_of_retrieval": self.time, "first_known_use": self.first_known_use,
"time_of_retrieval": self.time,
}
} }
}) )
# testword = "revivalist" # testword = "revivalist"
@ -132,7 +134,7 @@ class MerriamWebsterParser(WordParser):
# print(f"### {k} ###\n", v) # print(f"### {k} ###\n", v)
# exit() # exit()
q = Queue(MerriamWebsterParser, "en_merriam_webster/", "_mw.json", prefix_length=2) q = Queue(MerriamWebsterParser, "en_MerriamWebster/", "_MW.json")
q.loadDB() q.loadDB()
while True: while True:

23
t.py
View File

@ -10,11 +10,22 @@ prefix = query[:2]
d = Dictionary("en_MW_thesaurus/", f"{prefix}_mwt.json") d = Dictionary("en_MW_thesaurus/", f"{prefix}_mwt.json")
print(f"### {query:<70}###") print(f"### {query:<70}###")
print("================================================================================") print(
for k,v in d[query].items(): "================================================================================"
)
for k, v in d[query].items():
if k != "type": if k != "type":
print(f" {k}") print(f" {k}")
print("--------------------------------------------------------------------------------") print(
for ka in ["synonyms", "related" if "related" in v else "near synonyms", "near antonyms", "antonyms"]: "--------------------------------------------------------------------------------"
print(f"{ka:^13}: {' | '.join(v[ka])}") )
print("--------------------------------------------------------------------------------") for ka in [
"synonyms",
"related" if "related" in v else "near synonyms",
"near antonyms",
"antonyms",
]:
print(f"{ka:^13}: {' | '.join(v[ka])}")
print(
"--------------------------------------------------------------------------------"
)