diff --git a/Cargo.lock b/Cargo.lock index 8c01eaa..40b3a6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -395,13 +395,16 @@ dependencies = [ "config", "dirs", "env_logger", + "fuzzy-matcher", "log", "scraper", "serde", + "serde_json", "sqlx", "tokio", "tracing", "tracing-subscriber", + "url", "whatlang", ] @@ -835,6 +838,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fuzzy-matcher" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94" +dependencies = [ + "thread_local", +] + [[package]] name = "fxhash" version = "0.2.1" diff --git a/core/Cargo.toml b/core/Cargo.toml index 018ba30..28d6f1e 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -16,8 +16,11 @@ config.workspace = true dirs.workspace = true scraper.workspace = true axum.workspace = true +url.workspace = true whatlang = "0.16.2" ammonia = "3" tracing-subscriber = "0.3.17" tracing = "0.1.37" serde = "1.0.175" +serde_json = "1.0.103" +fuzzy-matcher = "0.3.7" diff --git a/core/src/main.rs b/core/src/main.rs index 2757ea2..acf9f62 100644 --- a/core/src/main.rs +++ b/core/src/main.rs @@ -3,18 +3,29 @@ extern crate log; use ammonia::clean; use axum::{ + body::Bytes, + extract::State, http::StatusCode, response::IntoResponse, routing::{get, post}, Json, Router, }; +use chrono::{DateTime, NaiveDateTime, Utc}; +use fuzzy_matcher::skim::SkimMatcherV2; +use fuzzy_matcher::FuzzyMatcher; use scraper::{Html, Selector}; use serde::{Deserialize, Serialize}; use sqlx::sqlite::SqlitePool; use std::env; use std::net::SocketAddr; +use std::sync::Arc; +use url::Url; use whatlang::{detect_lang, Lang}; +struct AppState { + pool: SqlitePool, +} + #[tokio::main] async fn main() { tracing_subscriber::fmt::init(); @@ -22,12 +33,17 @@ async fn main() { let pool = SqlitePool::connect(&env::var("DATABASE_URL").unwrap()) .await .unwrap(); + // update_index(&pool).await; + let shared_state = Arc::new(AppState { pool: pool }); let app = Router::new() // `GET /` goes to `root` - .route("/", get(root)); + .route("/", get(root)) + .route("/api/search", get(search)) + .with_state(shared_state); let addr = SocketAddr::from(([127, 0, 0, 1], 3000)); tracing::debug!("listening on {}", addr); + axum::Server::bind(&addr) .serve(app.into_make_service()) .await @@ -38,6 +54,68 @@ async fn root() -> &'static str { "Hello, World!" } +#[derive(Deserialize)] +struct SearchQuery { + language: String, + include: String, + ignore: Option>, +} + +#[derive(Serialize)] +struct SearchResult { + url: Url, + size: i64, + title: String, + summary: String, + last_updated: DateTime, +} + +async fn search( + State(state): State>, + Json(query): Json, +) -> Json> { + let mut conn = state.pool.acquire().await.unwrap(); + let list = sqlx::query!( + r#" + SELECT title, summary, url, content, last_updated, clicks, size + FROM search_index + WHERE language = ?1 + ORDER BY last_updated + "#, + query.language + ) + .fetch_all(&mut *conn) + .await + .unwrap(); + + let mut results = Vec::new(); + let matcher = SkimMatcherV2::default(); + for res in list { + let mut is_match = false; + if matcher.fuzzy_match(&res.title, &query.include).is_some() { + is_match = true; + } else if matcher.fuzzy_match(&res.summary, &query.include).is_some() { + is_match = true; + } else if matcher.fuzzy_match(&res.url, &query.include).is_some() { + is_match = true; + } + if is_match { + let timestamp = DateTime::::from_utc( + NaiveDateTime::from_timestamp_opt(res.last_updated, 0).unwrap(), + Utc, + ); + results.push(SearchResult { + url: Url::parse(&res.url).unwrap(), + size: res.size, + title: res.title, + summary: res.summary, + last_updated: timestamp, + }); + } + } + return Json(results); +} + async fn update_index(pool: &SqlitePool) { let mut conn = pool.acquire().await.unwrap(); let crawled = sqlx::query!( @@ -47,7 +125,7 @@ async fn update_index(pool: &SqlitePool) { ORDER BY last_fetched "# ) - .fetch_all(pool) + .fetch_all(&mut *conn) .await .unwrap(); diff --git a/migrations/20230726004317_index.sql b/migrations/20230726004317_index.sql index 800b221..b826f64 100644 --- a/migrations/20230726004317_index.sql +++ b/migrations/20230726004317_index.sql @@ -1,7 +1,6 @@ CREATE TABLE IF NOT EXISTS search_index ( - id INTEGER PRIMARY KEY NOT NULL, - url TEXT NOT NULL, + url TEXT PRIMARY KEY NOT NULL, clicks INTEGER NOT NULL DEFAULT 0, size INTEGER NOT NULL, language TEXT NOT NULL,