llm-py-web/server/inference.py
2025-02-25 17:39:29 +00:00

33 lines
1.2 KiB
Python

import llm, llm.cli, sqlite_utils
from .http import Request, JSONResponse, WebSocket, RedirectResponse
import json
db = sqlite_utils.Database(llm.cli.logs_db_path())
async def list_conversations(request: Request):
conversations = []
for row in db["conversations"].rows:
conversations.append({ "id": row["id"], "name": row["name"] })
return JSONResponse(conversations)
girlypop_prompt = llm.cli.load_template("girlypop").system
async def connect_to_conversation(ws: WebSocket):
conversation_id = ws.path_params["conversation"]
if conversation_id == "new":
conversation = llm.AsyncConversation(llm.get_async_model())
else:
try:
conversation: llm.AsyncConversation = llm.cli.load_conversation(conversation_id, async_=True)
except:
await ws.send_denial_response(JSONResponse({
"error": "unable to load conversation {}".format(conversation_id)
}))
return
await ws.accept()
async for message in ws.iter_text():
response = conversation.prompt(message, system=girlypop_prompt)
async for chunk in response:
ws.send_text(json.dumps({"c": chunk}))
ws.send_text(json.dumps({"d": True})) # done