import json import random import time from pathlib import Path from dp import WordParser q = "queue" sn = "snafus" rd = "redo" duden = dict() for db_file in Path("./").glob("*duden.json"): with open(db_file, "r") as f: duden |= json.load(f) def updatedb(picks): start = time.time() for c in [w[0].lower() for w in picks]: c_db = {k: v for k, v in duden.items() if k[0].lower() == c} with open(f"{c}_duden.json", "w") as f: # save DB json.dump(c_db, f, separators=(",", ":"), indent=2) with open(q, "w") as f: # save queue f.write("\n".join(list(queue))) with open(rd, "w") as f: # save queue f.write("\n".join(list(redo - set(picks)))) print( f"{len(duden)} words collected, {len(queue)-len(picks)} words waiting in queue, {time.time() - start:.06f}s" ) TIME_BASE = 1.01 TIME_EXPONENT = 70 def randtime(a, b, k=0): if k: return [random.uniform(a, b) for _ in range(k)] else: return random.uniform(a, b) while True: start = time.time() queue = {line.strip() for line in open(q, "r")} # read queue snafus = {line.strip() for line in open(sn, "r")} # read snafus redo = {line.strip() for line in open(rd, "r")} queue -= snafus # queue -= duden.keys() # clean queue queue |= redo assert ( time.time() - start < 1 ), "WARNING: queue maintainance takes more than a second...." picks = random.sample(list(queue), k=random.randint(1, 4)) wait_times = randtime( TIME_BASE**TIME_EXPONENT, TIME_BASE ** (TIME_EXPONENT * 3), k=len(picks) ) wait_time_after = randtime( TIME_BASE ** (TIME_EXPONENT * 2), TIME_BASE ** (TIME_EXPONENT * 4) ) for t, p in zip(wait_times, picks): try: w = WordParser(p) # fetch new word duden |= w.todict() queue |= set(w.neighbors) except: with open("test.html", "w") as f: f.write(w.xml_string) print("...try anew...") with open(sn, "a") as f: f.write(f"{p}\n") queue -= {p} updatedb(picks) TIME_EXPONENT += 1 time.sleep(t) updatedb(picks) print( f"---wait---{wait_time_after:.03f}s-------{TIME_BASE:.03f}^{TIME_EXPONENT:.03f}={TIME_BASE**TIME_EXPONENT:.03f}s------max {TIME_BASE**(TIME_EXPONENT*4):.03f}s--------------" ) time.sleep(wait_time_after)