Compare commits

..

2 Commits

Author SHA1 Message Date
julius
b8b8e42b8e
autoflake 2023-11-20 17:49:47 +01:00
julius
52238ad9ae
browse through the MW dictionary 2023-11-20 17:48:21 +01:00
6 changed files with 38 additions and 8 deletions

View File

@ -1,4 +1,3 @@
from itertools import chain
from dict_dl import Queue, WordParser, ot, rb, uq, uqall, only_first_text

3
d.py
View File

@ -1,10 +1,7 @@
#!/bin/python
import os
import sys
from itertools import zip_longest
from rich.console import Console
from rich.table import Table
from dict_dl import DictFile

View File

@ -1,8 +1,7 @@
import random
from pathlib import Path
import uvicorn
from fastapi import FastAPI, Response
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from sqlmodel import Session, select, text

View File

@ -1,5 +1,4 @@
from dict_dl import Queue, WordParser, cw, ot, rb, uq, uqall
import itertools
class MerriamWebsterParser(WordParser):

36
mw_browser.py Normal file
View File

@ -0,0 +1,36 @@
import random
from pathlib import Path
from time import sleep
from string import ascii_lowercase
from urllib.parse import urlparse
from requests_html import HTMLSession
letters = list(ascii_lowercase) + ["BIO", "GEO", "0-9"]
db_file = "ALL.db"
if not Path(db_file).is_file():
Path(db_file).touch()
html_session = HTMLSession()
url_stem = "https://www.merriam-webster.com"
links = set()
words = {line.strip() for line in open(db_file, "rt")}
for letter in letters:
url = f"https://www.merriam-webster.com/browse/dictionary/{letter}"
r = html_session.get(url)
# r.html.render()
links |= {
p.attrs["href"] for p in r.html.find(f'a[href*="browse/dictionary/{letter}/"]')
}
for link in links:
print(link)
r = html_session.get(url_stem + link)
words |= {
Path(urlparse(word.attrs["href"]).path).name
for word in r.html.find('a[href*="/dictionary/"]')
}
sleep(random.random() * 2)
with open(db_file, "wt") as f:
f.write("\n".join(list(words)))

View File

@ -6,7 +6,7 @@ from time import sleep
from urllib.parse import urlparse
from requests_html import HTMLSession
from sqlmodel import Field, Relationship, Session, SQLModel, create_engine, select, Column, ARRAY, String
from sqlmodel import Field, Session, SQLModel, create_engine, select, Column, ARRAY, String
with open("db.secrets", "r") as f:
db_pass = f.readline().strip()