llm-py-web/server/inference.py

import llm, llm.cli, sqlite_utils
from .http import Request, JSONResponse, WebSocket, RedirectResponse
import json

db = sqlite_utils.Database(llm.cli.logs_db_path())

async def list_conversations(request: Request):
  conversations = []
  for row in db["conversations"].rows:
    conversations.append({ "id": row["id"], "name": row["name"] })
  return JSONResponse(conversations)

girlypop_prompt = llm.cli.load_template("girlypop").system

async def connect_to_conversation(ws: WebSocket):
  conversation_id = ws.path_params["conversation"]
  if conversation_id == "new":
    conversation = llm.AsyncConversation(llm.get_async_model())
  else:
    try:
      conversation: llm.AsyncConversation = llm.cli.load_conversation(conversation_id, async_=True)
    except:
      await ws.send_denial_response(JSONResponse({
        "error": "unable to load conversation {}".format(conversation_id)
      }))
      return

  await ws.accept()
  async for message in ws.iter_text():
    response = conversation.prompt(message, system=girlypop_prompt)
    async for chunk in response:
      ws.send_text(json.dumps({"c": chunk}))
    ws.send_text(json.dumps({"d": True})) # done