#[macro_use] extern crate log; use ammonia::clean; use axum::{ body::Bytes, extract::State, http::StatusCode, response::IntoResponse, routing::{get, post}, Json, Router, }; use chrono::{DateTime, NaiveDateTime, Utc}; use fuzzy_matcher::skim::SkimMatcherV2; use fuzzy_matcher::FuzzyMatcher; use scraper::{Html, Selector}; use serde::{Deserialize, Serialize}; use sqlx::sqlite::SqlitePool; use std::env; use std::net::SocketAddr; use std::sync::Arc; use url::Url; use whatlang::{detect_lang, Lang}; struct AppState { pool: SqlitePool, } #[tokio::main] async fn main() { tracing_subscriber::fmt::init(); let pool = SqlitePool::connect(&env::var("DATABASE_URL").unwrap()) .await .unwrap(); // update_index(&pool).await; let shared_state = Arc::new(AppState { pool: pool }); let app = Router::new() // `GET /` goes to `root` .route("/", get(root)) .route("/api/search", get(search)) .with_state(shared_state); let addr = SocketAddr::from(([127, 0, 0, 1], 3000)); tracing::debug!("listening on {}", addr); axum::Server::bind(&addr) .serve(app.into_make_service()) .await .unwrap(); } async fn root() -> &'static str { "Hello, World!" } #[derive(Deserialize)] struct SearchQuery { language: String, include: String, ignore: Option>, } #[derive(Serialize)] struct SearchResult { url: Url, size: i64, title: String, summary: String, last_updated: DateTime, } async fn search( State(state): State>, Json(query): Json, ) -> Json> { let mut conn = state.pool.acquire().await.unwrap(); let list = sqlx::query!( r#" SELECT title, summary, url, content, last_updated, clicks, size FROM search_index WHERE language = ?1 ORDER BY last_updated "#, query.language ) .fetch_all(&mut *conn) .await .unwrap(); let mut results = Vec::new(); let matcher = SkimMatcherV2::default(); for res in list { let mut is_match = false; if matcher.fuzzy_match(&res.title, &query.include).is_some() { is_match = true; } else if matcher.fuzzy_match(&res.summary, &query.include).is_some() { is_match = true; } else if matcher.fuzzy_match(&res.url, &query.include).is_some() { is_match = true; } if is_match { let timestamp = DateTime::::from_utc( NaiveDateTime::from_timestamp_opt(res.last_updated, 0).unwrap(), Utc, ); results.push(SearchResult { url: Url::parse(&res.url).unwrap(), size: res.size, title: res.title, summary: res.summary, last_updated: timestamp, }); } } return Json(results); } async fn update_index(pool: &SqlitePool) { let mut conn = pool.acquire().await.unwrap(); let crawled = sqlx::query!( r#" SELECT last_fetched, url, body FROM crawled_urls ORDER BY last_fetched "# ) .fetch_all(&mut *conn) .await .unwrap(); for res in crawled { let size = std::mem::size_of_val(&res.body) as u32; let lang = detect_lang(&res.body).unwrap().code(); let document = Html::parse_document(&res.body); let title_selector = Selector::parse("title").unwrap(); let title = match document.select(&title_selector).next() { Some(v) => v.inner_html(), None => res.url.clone(), }; let desc_selector = Selector::parse("p").unwrap(); let summary = match document.select(&desc_selector).next() { Some(v) => v.inner_html(), None => String::new(), }; let id = sqlx::query!( r#" REPLACE INTO search_index ( url, size, language, title, summary, content, last_updated ) VALUES ( ?1, ?2, ?3, ?4, ?5, ?6, ?7 ) "#, res.url, size, lang, title, summary, res.body, res.last_fetched, ) .execute(&mut *conn) .await .unwrap() .last_insert_rowid(); } }