main
~erin 2023-07-25 19:47:39 -04:00
parent 561ef2dfb4
commit 758c16b78b
Signed by: erin
GPG Key ID: 0FEDEAFF1C14847E
5 changed files with 32 additions and 14 deletions

2
.gitignore vendored
View File

@ -1,2 +1,2 @@
/target
test.db
test.db*

6
Justfile Normal file
View File

@ -0,0 +1,6 @@
db:
rm test.db*
sqlx database create
sqlx migrate run
crawl: db
RUST_LOG=info cargo run -p crawler

View File

@ -17,4 +17,4 @@
2. Clone this repository
3. Install [sqlx-cli](https://lib.rs/crates/sqlx-cli)
4. Set the `DATABASE_URL` environment variable to `"sqlite:todos.db"`
5. Run `sqlx database create` and `sqlx migrate run`
5. Install the [just](https://just.systems/) command runner

View File

@ -24,6 +24,9 @@ struct Page {
async fn main() {
env_logger::init();
let settings = Settings::new().unwrap();
let pool = SqlitePool::connect(&env::var("DATABASE_URL").unwrap())
.await
.unwrap();
let mut to_crawl = settings.sitemap;
let mut crawled = 0;
@ -55,8 +58,6 @@ async fn main() {
info!("Succesfully crawled {} pages!", crawled);
insert_db().await.unwrap();
let mut cache_dir = dirs::cache_dir().unwrap();
cache_dir.push("ferret");
for i in cacache::list_sync(cache_dir.as_path()) {
@ -64,6 +65,7 @@ async fn main() {
.await
.unwrap();
let decoded_page: Page = bincode::deserialize(&data).unwrap();
insert_db(&pool, &decoded_page).await;
info!(
"Found page: {} {}",
&decoded_page.url.as_str(),
@ -150,11 +152,21 @@ async fn find_links(html: &str, base: &Url, allow: Vec<Url>) -> Vec<Url> {
return links;
}
async fn insert_db() -> Result<(), sqlx::Error> {
let pool = SqlitePool::connect(&env::var("DATABASE_URL").unwrap()).await?;
let row: (i64,) = sqlx::query_as("SELECT $1")
.bind(150_i64)
.fetch_one(&pool)
.await?;
Ok(())
async fn insert_db(pool: &SqlitePool, page: &Page) {
let mut conn = pool.acquire().await.unwrap();
let url = page.url.clone().into_string();
let timestamp = page.last_fetched.clone().timestamp();
let id = sqlx::query!(
r#"
REPLACE INTO cached_urls ( last_fetched, url, body )
VALUES ( ?1, ?2, ?3 )
"#,
timestamp,
url,
page.body
)
.execute(&mut *conn)
.await
.unwrap()
.last_insert_rowid();
}

View File

@ -1,7 +1,7 @@
-- Add migration script here
CREATE TABLE IF NOT EXISTS cached_urls
(
last_fetched INTEGER PRIMARY KEY NOT NULL,
url TEXT NOT NULL,
body TEXT NOT NULL
last_fetched INTEGER NOT NULL,
url TEXT NOT NULL,
body TEXT NOT NULL
);