dict_dl/de_duden/dq.py

84 lines
2.4 KiB
Python
Raw Normal View History

2022-07-06 11:06:37 +00:00
import json
import random
import time
from pathlib import Path
from dp import WordParser
q = "queue"
sn = "snafus"
rd = "redo"
duden = dict()
for db_file in Path("./").glob("*duden.json"):
with open(db_file, "r") as f:
duden |= json.load(f)
def updatedb(picks):
start = time.time()
for c in [w[0].lower() for w in picks]:
c_db = {k: v for k, v in duden.items() if k[0].lower() == c}
with open(f"{c}_duden.json", "w") as f: # save DB
json.dump(c_db, f, separators=(",", ":"), indent=2)
with open(q, "w") as f: # save queue
f.write("\n".join(list(queue)))
with open(rd, "w") as f: # save queue
f.write("\n".join(list(redo - set(picks))))
print(
f"{len(duden)} words collected, {len(queue)-len(picks)} words waiting in queue, {time.time() - start:.06f}s"
)
TIME_BASE = 1.01
TIME_EXPONENT = 70
def randtime(a, b, k=0):
if k:
return [random.uniform(a, b) for _ in range(k)]
else:
return random.uniform(a, b)
while True:
start = time.time()
queue = {line.strip() for line in open(q, "r")} # read queue
snafus = {line.strip() for line in open(sn, "r")} # read snafus
redo = {line.strip() for line in open(rd, "r")}
queue -= snafus
# queue -= duden.keys() # clean queue
queue |= redo
assert (
time.time() - start < 1
), "WARNING: queue maintainance takes more than a second...."
picks = random.sample(list(queue), k=random.randint(1, 4))
wait_times = randtime(
TIME_BASE**TIME_EXPONENT, TIME_BASE ** (TIME_EXPONENT * 3), k=len(picks)
)
wait_time_after = randtime(
TIME_BASE ** (TIME_EXPONENT * 2), TIME_BASE ** (TIME_EXPONENT * 4)
)
for t, p in zip(wait_times, picks):
try:
w = WordParser(p) # fetch new word
duden |= w.todict()
queue |= set(w.neighbors)
except:
with open("test.html", "w") as f:
f.write(w.xml_string)
print("...try anew...")
with open(sn, "a") as f:
f.write(f"{p}\n")
queue -= {p}
updatedb(picks)
TIME_EXPONENT += 1
time.sleep(t)
updatedb(picks)
print(
f"---wait---{wait_time_after:.03f}s-------{TIME_BASE:.03f}^{TIME_EXPONENT:.03f}={TIME_BASE**TIME_EXPONENT:.03f}s------max {TIME_BASE**(TIME_EXPONENT*4):.03f}s--------------"
)
time.sleep(wait_time_after)