Compare commits
No commits in common. "b8b8e42b8e4b673b2844682e8d9ad74ae59efef5" and "47130f30d7c3f6fac4b5a8c6144568c0a3379c43" have entirely different histories.
b8b8e42b8e
...
47130f30d7
@ -1,3 +1,4 @@
|
|||||||
|
from itertools import chain
|
||||||
|
|
||||||
from dict_dl import Queue, WordParser, ot, rb, uq, uqall, only_first_text
|
from dict_dl import Queue, WordParser, ot, rb, uq, uqall, only_first_text
|
||||||
|
|
||||||
|
3
d.py
3
d.py
@ -1,7 +1,10 @@
|
|||||||
#!/bin/python
|
#!/bin/python
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
from itertools import zip_longest
|
||||||
|
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
from dict_dl import DictFile
|
from dict_dl import DictFile
|
||||||
|
|
||||||
|
3
main.py
3
main.py
@ -1,7 +1,8 @@
|
|||||||
import random
|
import random
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI, Response
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from sqlmodel import Session, select, text
|
from sqlmodel import Session, select, text
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from dict_dl import Queue, WordParser, cw, ot, rb, uq, uqall
|
from dict_dl import Queue, WordParser, cw, ot, rb, uq, uqall
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
|
||||||
class MerriamWebsterParser(WordParser):
|
class MerriamWebsterParser(WordParser):
|
||||||
|
@ -1,36 +0,0 @@
|
|||||||
import random
|
|
||||||
from pathlib import Path
|
|
||||||
from time import sleep
|
|
||||||
from string import ascii_lowercase
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
from requests_html import HTMLSession
|
|
||||||
|
|
||||||
letters = list(ascii_lowercase) + ["BIO", "GEO", "0-9"]
|
|
||||||
db_file = "ALL.db"
|
|
||||||
if not Path(db_file).is_file():
|
|
||||||
Path(db_file).touch()
|
|
||||||
|
|
||||||
html_session = HTMLSession()
|
|
||||||
url_stem = "https://www.merriam-webster.com"
|
|
||||||
|
|
||||||
links = set()
|
|
||||||
words = {line.strip() for line in open(db_file, "rt")}
|
|
||||||
|
|
||||||
for letter in letters:
|
|
||||||
url = f"https://www.merriam-webster.com/browse/dictionary/{letter}"
|
|
||||||
r = html_session.get(url)
|
|
||||||
# r.html.render()
|
|
||||||
links |= {
|
|
||||||
p.attrs["href"] for p in r.html.find(f'a[href*="browse/dictionary/{letter}/"]')
|
|
||||||
}
|
|
||||||
for link in links:
|
|
||||||
print(link)
|
|
||||||
r = html_session.get(url_stem + link)
|
|
||||||
words |= {
|
|
||||||
Path(urlparse(word.attrs["href"]).path).name
|
|
||||||
for word in r.html.find('a[href*="/dictionary/"]')
|
|
||||||
}
|
|
||||||
sleep(random.random() * 2)
|
|
||||||
with open(db_file, "wt") as f:
|
|
||||||
f.write("\n".join(list(words)))
|
|
@ -6,7 +6,7 @@ from time import sleep
|
|||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from requests_html import HTMLSession
|
from requests_html import HTMLSession
|
||||||
from sqlmodel import Field, Session, SQLModel, create_engine, select, Column, ARRAY, String
|
from sqlmodel import Field, Relationship, Session, SQLModel, create_engine, select, Column, ARRAY, String
|
||||||
|
|
||||||
with open("db.secrets", "r") as f:
|
with open("db.secrets", "r") as f:
|
||||||
db_pass = f.readline().strip()
|
db_pass = f.readline().strip()
|
||||||
|
Loading…
Reference in New Issue
Block a user