Compare commits

..

No commits in common. "b8b8e42b8e4b673b2844682e8d9ad74ae59efef5" and "47130f30d7c3f6fac4b5a8c6144568c0a3379c43" have entirely different histories.

6 changed files with 8 additions and 38 deletions

View File

@ -1,3 +1,4 @@
from itertools import chain
from dict_dl import Queue, WordParser, ot, rb, uq, uqall, only_first_text from dict_dl import Queue, WordParser, ot, rb, uq, uqall, only_first_text

3
d.py
View File

@ -1,7 +1,10 @@
#!/bin/python #!/bin/python
import os import os
import sys import sys
from itertools import zip_longest
from rich.console import Console
from rich.table import Table
from dict_dl import DictFile from dict_dl import DictFile

View File

@ -1,7 +1,8 @@
import random import random
from pathlib import Path
import uvicorn import uvicorn
from fastapi import FastAPI from fastapi import FastAPI, Response
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from sqlmodel import Session, select, text from sqlmodel import Session, select, text

View File

@ -1,4 +1,5 @@
from dict_dl import Queue, WordParser, cw, ot, rb, uq, uqall from dict_dl import Queue, WordParser, cw, ot, rb, uq, uqall
import itertools
class MerriamWebsterParser(WordParser): class MerriamWebsterParser(WordParser):

View File

@ -1,36 +0,0 @@
import random
from pathlib import Path
from time import sleep
from string import ascii_lowercase
from urllib.parse import urlparse
from requests_html import HTMLSession
letters = list(ascii_lowercase) + ["BIO", "GEO", "0-9"]
db_file = "ALL.db"
if not Path(db_file).is_file():
Path(db_file).touch()
html_session = HTMLSession()
url_stem = "https://www.merriam-webster.com"
links = set()
words = {line.strip() for line in open(db_file, "rt")}
for letter in letters:
url = f"https://www.merriam-webster.com/browse/dictionary/{letter}"
r = html_session.get(url)
# r.html.render()
links |= {
p.attrs["href"] for p in r.html.find(f'a[href*="browse/dictionary/{letter}/"]')
}
for link in links:
print(link)
r = html_session.get(url_stem + link)
words |= {
Path(urlparse(word.attrs["href"]).path).name
for word in r.html.find('a[href*="/dictionary/"]')
}
sleep(random.random() * 2)
with open(db_file, "wt") as f:
f.write("\n".join(list(words)))

View File

@ -6,7 +6,7 @@ from time import sleep
from urllib.parse import urlparse from urllib.parse import urlparse
from requests_html import HTMLSession from requests_html import HTMLSession
from sqlmodel import Field, Session, SQLModel, create_engine, select, Column, ARRAY, String from sqlmodel import Field, Relationship, Session, SQLModel, create_engine, select, Column, ARRAY, String
with open("db.secrets", "r") as f: with open("db.secrets", "r") as f:
db_pass = f.readline().strip() db_pass = f.readline().strip()