diff --git a/server/inference.py b/server/inference.py index 1b48751..46e75d9 100644 --- a/server/inference.py +++ b/server/inference.py @@ -1,7 +1,7 @@ import llm, llm.cli, sqlite_utils from .http import Request, JSONResponse, WebSocket from .tid import tid_now -import json +from json import dumps as json db = sqlite_utils.Database(llm.cli.logs_db_path()) girlypop_prompt = llm.cli.load_template("girlypop").system @@ -39,17 +39,17 @@ async def connect_to_conversation(ws: WebSocket): if not response._done: continue if response.prompt.system: system_prompt = None - await ws.send_text(json.dumps({"u": response.prompt.prompt})) # user - await ws.send_text(json.dumps({"f": response.text_or_raise()})) # full + await ws.send_text(json({"u": response.prompt.prompt})) # user + await ws.send_text(json({"f": response.text_or_raise()})) # full async for message in ws.iter_text(): response = conversation.prompt(message, system=system_prompt, stream=True) system_prompt = None response_tid = tid_now() - await ws.send_text(json.dumps({"u": message})) - await ws.send_text(json.dumps({"s": response_tid})) # start + await ws.send_text(json({"u": message})) + await ws.send_text(json({"s": response_tid})) # start async for chunk in response: - await ws.send_text(json.dumps({"r": response_tid, "c": chunk})) - await ws.send_text(json.dumps({"d": response_tid})) # done + await ws.send_text(json({"r": response_tid, "c": chunk})) + await ws.send_text(json({"d": response_tid})) # done (await response.to_sync_response()).log_to_db(db)