llm-py-web/server/inference.py

import llm, llm.cli, sqlite_utils
from .http import Request, JSONResponse, WebSocket
from .tid import tid_now
from json import dumps as json

db = sqlite_utils.Database(llm.cli.logs_db_path())


async def list_conversations(request: Request):
  conversations = []
  for row in db["conversations"].rows:
    conversations.append({ "id": row["id"], "name": row["name"] })
  return JSONResponse(conversations)


async def delete_conversation(request: Request):
  conversation_id = request.path_params["conversation"]
  db["responses"].delete_where("conversation_id = ?", [conversation_id])
  db["conversations"].delete(conversation_id)
  return JSONResponse({"status": "ok"})


async def connect_to_conversation(ws: WebSocket):
  continuing = bool(ws.query_params.get("continue"))
  conversation_id = ws.path_params["conversation"]
  if conversation_id == "new":
    conversation = llm.AsyncConversation(llm.get_async_model(ws.query_params.get("model")))
  else:
    try:
      conversation: llm.AsyncConversation = llm.cli.load_conversation(conversation_id, async_=True)
    except:
      await ws.send_denial_response(JSONResponse({
        "error": "unable to load conversation {}".format(conversation_id)
      }, status_code=404))
      return

  await ws.accept()

  # only send the system prompt at the start of a conversation
  system_prompt = llm.cli.load_template("girlypop").system

  if not continuing:
    await ws.send_text(json({"m": conversation.model.model_id}))

    for response in conversation.responses:
      response: llm.AsyncResponse = response
      if not response._done: continue
      if response.prompt.system:
        system_prompt = None
        await ws.send_text(json({"sys": response.prompt.system}))
      await ws.send_text(json({"u": response.prompt.prompt})) # user
      await ws.send_text(json({"f": response.text_or_raise()})) # full

  if conversation.name:
    await ws.send_text(json({"n": conversation.name}))

  if conversation_id == "new":
    await ws.send_text(json({"i": conversation.id}))

  async for message in ws.iter_text():
    if system_prompt:
      await ws.send_text(json({"sys": system_prompt}))
    response = conversation.prompt(message, system=system_prompt, stream=True)
    system_prompt = None

    response_tid = tid_now()
    await ws.send_text(json({"u": message}))
    await ws.send_text(json({"s": response_tid})) # start
    try:
      async for chunk in response:
        await ws.send_text(json({"r": response_tid, "c": chunk}))
    except BaseException as e:
      await ws.send_text(json({"err": str(e), "r": response_tid}))
    await ws.send_text(json({"d": response_tid})) # done
    (await response.to_sync_response()).log_to_db(db)

    if not conversation.name:
      new_conversation_name = llm.cli.load_conversation(conversation.id).name
      if new_conversation_name:
        conversation.name = new_conversation_name
        await ws.send_text(json({"n": new_conversation_name}))