changed dirs, clean code
This commit is contained in:
parent
5080ad4f63
commit
f5081d174d
@ -22,12 +22,46 @@ class MWThesaurusParser(WordParser):
|
|||||||
[e.remove(ul) for ul in e.findall(".//ul")]
|
[e.remove(ul) for ul in e.findall(".//ul")]
|
||||||
d = ot(e)
|
d = ot(e)
|
||||||
thes[d] = {"examples": examples}
|
thes[d] = {"examples": examples}
|
||||||
thes[d]["synonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list syn-list']/div[@class='thes-list-content synonyms_list']//li//a") ]
|
thes[d]["synonyms"] = [
|
||||||
thes[d]["near synonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list rel-list']/div[@class='thes-list-content synonyms_list']//li//a") ]
|
ot(li)
|
||||||
thes[d]["near synonyms"].extend([ ot(li) for li in se.findall( ".//span[@class='thes-list sim-list']/div[@class='thes-list-content synonyms_list']//li//a") ])
|
for li in se.findall(
|
||||||
thes[d]["near antonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list near-list']/div[@class='thes-list-content synonyms_list']//li//a") ]
|
".//span[@class='thes-list syn-list']/div[@class='thes-list-content synonyms_list']//li//a"
|
||||||
thes[d]["near antonyms"].extend([ ot(li) for li in se.findall( ".//span[@class='thes-list opp-list']/div[@class='thes-list-content synonyms_list']//li//a") ])
|
)
|
||||||
thes[d]["antonyms"] = [ ot(li) for li in se.findall( ".//span[@class='thes-list ant-list']/div[@class='thes-list-content synonyms_list']//li//a") ]
|
]
|
||||||
|
thes[d]["near synonyms"] = [
|
||||||
|
ot(li)
|
||||||
|
for li in se.findall(
|
||||||
|
".//span[@class='thes-list rel-list']/div[@class='thes-list-content synonyms_list']//li//a"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
thes[d]["near synonyms"].extend(
|
||||||
|
[
|
||||||
|
ot(li)
|
||||||
|
for li in se.findall(
|
||||||
|
".//span[@class='thes-list sim-list']/div[@class='thes-list-content synonyms_list']//li//a"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
thes[d]["near antonyms"] = [
|
||||||
|
ot(li)
|
||||||
|
for li in se.findall(
|
||||||
|
".//span[@class='thes-list near-list']/div[@class='thes-list-content synonyms_list']//li//a"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
thes[d]["near antonyms"].extend(
|
||||||
|
[
|
||||||
|
ot(li)
|
||||||
|
for li in se.findall(
|
||||||
|
".//span[@class='thes-list opp-list']/div[@class='thes-list-content synonyms_list']//li//a"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
thes[d]["antonyms"] = [
|
||||||
|
ot(li)
|
||||||
|
for li in se.findall(
|
||||||
|
".//span[@class='thes-list ant-list']/div[@class='thes-list-content synonyms_list']//li//a"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
return thes
|
return thes
|
||||||
|
|
||||||
@ -60,7 +94,7 @@ class MWThesaurusParser(WordParser):
|
|||||||
# print(w.todict())
|
# print(w.todict())
|
||||||
# exit()
|
# exit()
|
||||||
|
|
||||||
q = Queue(MWThesaurusParser, "en_MW_thesaurus/", "_mwt.json", prefix_length=2)
|
q = Queue(MWThesaurusParser, "en_MWThesaurus/", "_MWT.json")
|
||||||
q.loadDB()
|
q.loadDB()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
# import matplotlib.pyplot as plt
|
# import matplotlib.pyplot as plt
|
||||||
# from PIL import Image
|
# from PIL import Image
|
||||||
# from wordcloud import STOPWORDS, WordCloud
|
# from wordcloud import STOPWORDS, WordCloud
|
||||||
from dict_dl import fulldictionary
|
|
||||||
|
|
||||||
d = FullDictionary("en_merriam_webster/", "_mw.json")
|
d = FullDictionary("en_merriam_webster/", "_mw.json")
|
||||||
# d = Dictionary("en_MW_thesaurus/", "_mwt.json")
|
# d = Dictionary("en_MW_thesaurus/", "_mwt.json")
|
||||||
@ -13,9 +12,9 @@ print([k for k in d if "?" in k])
|
|||||||
exit()
|
exit()
|
||||||
|
|
||||||
again = set()
|
again = set()
|
||||||
for k,v in d.items():
|
for k, v in d.items():
|
||||||
for ke,di in v.items():
|
for ke, di in v.items():
|
||||||
if ke != "type":
|
if ke != "type":
|
||||||
if "related" in di:
|
if "related" in di:
|
||||||
again.add(k)
|
again.add(k)
|
||||||
print(again, len(again))
|
print(again, len(again))
|
||||||
@ -23,6 +22,7 @@ with open(f"{d.dir_prefix}redo", "at") as f:
|
|||||||
f.write("\n".join(list(again)))
|
f.write("\n".join(list(again)))
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
|
||||||
def grey_color_func(
|
def grey_color_func(
|
||||||
word, font_size, position, orientation, random_state=None, **kwargs
|
word, font_size, position, orientation, random_state=None, **kwargs
|
||||||
):
|
):
|
||||||
|
@ -14,7 +14,7 @@ from bs4 import BeautifulSoup
|
|||||||
from requests.exceptions import ConnectionError
|
from requests.exceptions import ConnectionError
|
||||||
|
|
||||||
letters = string.ascii_lowercase
|
letters = string.ascii_lowercase
|
||||||
unusual = lambda prefix: not all( [c in letters for c in prefix.lower()])
|
unusual = lambda prefix: not all([c in letters for c in prefix.lower()])
|
||||||
# def uq(s):
|
# def uq(s):
|
||||||
# return unquote(s).split("?")[0]
|
# return unquote(s).split("?")[0]
|
||||||
uq = unquote
|
uq = unquote
|
||||||
|
29
duden.py
29
duden.py
@ -3,9 +3,9 @@ from dict_dl import Queue, WordParser, cw, ot, uqall
|
|||||||
|
|
||||||
class DudenParser(WordParser):
|
class DudenParser(WordParser):
|
||||||
def __init__(self, word):
|
def __init__(self, word):
|
||||||
url_prefix= "https://www.duden.de/rechtschreibung/"
|
url_prefix = "https://www.duden.de/rechtschreibung/"
|
||||||
super().__init__(word, url_prefix)
|
super().__init__(word, url_prefix)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def definitions(self):
|
def definitions(self):
|
||||||
defs = {}
|
defs = {}
|
||||||
@ -81,24 +81,27 @@ class DudenParser(WordParser):
|
|||||||
assert (
|
assert (
|
||||||
self.type or self.definitions
|
self.type or self.definitions
|
||||||
), f"{self.time} {self.word}: type or definitions came back empty..."
|
), f"{self.time} {self.word}: type or definitions came back empty..."
|
||||||
return uqall({
|
return uqall(
|
||||||
self.word: {
|
{
|
||||||
"type": self.type,
|
self.word: {
|
||||||
"definitions": self.definitions,
|
"type": self.type,
|
||||||
"pronounciation": self.pronounciation,
|
"definitions": self.definitions,
|
||||||
"synonyms": self.synonyms,
|
"pronounciation": self.pronounciation,
|
||||||
"history_and_etymology": self.history_and_etymology,
|
"synonyms": self.synonyms,
|
||||||
"wendungen": self.wendungen,
|
"history_and_etymology": self.history_and_etymology,
|
||||||
"time_of_retrieval": self.time,
|
"wendungen": self.wendungen,
|
||||||
|
"time_of_retrieval": self.time,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
)
|
||||||
|
|
||||||
|
|
||||||
# d = DudenParser("hinfallen")
|
# d = DudenParser("hinfallen")
|
||||||
# print(d.neighbours)
|
# print(d.neighbours)
|
||||||
# print(d.todict())
|
# print(d.todict())
|
||||||
# exit()
|
# exit()
|
||||||
|
|
||||||
q = Queue(DudenParser, "de_duden/", "_duden.json")
|
q = Queue(DudenParser, "de_Duden/", "_D.json")
|
||||||
q.loadDB()
|
q.loadDB()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
@ -108,20 +108,22 @@ class MerriamWebsterParser(WordParser):
|
|||||||
assert (
|
assert (
|
||||||
self.type or self.definitions
|
self.type or self.definitions
|
||||||
), f"{self.time} {self.word}: type or definitions came back empty..."
|
), f"{self.time} {self.word}: type or definitions came back empty..."
|
||||||
return uqall({
|
return uqall(
|
||||||
self.word: {
|
{
|
||||||
"type": self.type,
|
self.word: {
|
||||||
"definitions": self.definitions,
|
"type": self.type,
|
||||||
"pronounciation": self.pronounciation,
|
"definitions": self.definitions,
|
||||||
"synonyms": self.synonyms,
|
"pronounciation": self.pronounciation,
|
||||||
"antonyms": self.antonyms,
|
"synonyms": self.synonyms,
|
||||||
"synonym_discussion": self.synonym_discussion,
|
"antonyms": self.antonyms,
|
||||||
"examples": self.examples,
|
"synonym_discussion": self.synonym_discussion,
|
||||||
"history_and_etymology": self.history_and_etymology,
|
"examples": self.examples,
|
||||||
"first_known_use": self.first_known_use,
|
"history_and_etymology": self.history_and_etymology,
|
||||||
"time_of_retrieval": self.time,
|
"first_known_use": self.first_known_use,
|
||||||
|
"time_of_retrieval": self.time,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
)
|
||||||
|
|
||||||
|
|
||||||
# testword = "revivalist"
|
# testword = "revivalist"
|
||||||
@ -132,7 +134,7 @@ class MerriamWebsterParser(WordParser):
|
|||||||
# print(f"### {k} ###\n", v)
|
# print(f"### {k} ###\n", v)
|
||||||
# exit()
|
# exit()
|
||||||
|
|
||||||
q = Queue(MerriamWebsterParser, "en_merriam_webster/", "_mw.json", prefix_length=2)
|
q = Queue(MerriamWebsterParser, "en_MerriamWebster/", "_MW.json")
|
||||||
q.loadDB()
|
q.loadDB()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
23
t.py
23
t.py
@ -10,11 +10,22 @@ prefix = query[:2]
|
|||||||
|
|
||||||
d = Dictionary("en_MW_thesaurus/", f"{prefix}_mwt.json")
|
d = Dictionary("en_MW_thesaurus/", f"{prefix}_mwt.json")
|
||||||
print(f"### {query:<70}###")
|
print(f"### {query:<70}###")
|
||||||
print("================================================================================")
|
print(
|
||||||
for k,v in d[query].items():
|
"================================================================================"
|
||||||
|
)
|
||||||
|
for k, v in d[query].items():
|
||||||
if k != "type":
|
if k != "type":
|
||||||
print(f" {k}")
|
print(f" {k}")
|
||||||
print("--------------------------------------------------------------------------------")
|
print(
|
||||||
for ka in ["synonyms", "related" if "related" in v else "near synonyms", "near antonyms", "antonyms"]:
|
"--------------------------------------------------------------------------------"
|
||||||
print(f"{ka:^13}: {' | '.join(v[ka])}")
|
)
|
||||||
print("--------------------------------------------------------------------------------")
|
for ka in [
|
||||||
|
"synonyms",
|
||||||
|
"related" if "related" in v else "near synonyms",
|
||||||
|
"near antonyms",
|
||||||
|
"antonyms",
|
||||||
|
]:
|
||||||
|
print(f"{ka:^13}: {' | '.join(v[ka])}")
|
||||||
|
print(
|
||||||
|
"--------------------------------------------------------------------------------"
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user