Justfile
parent
561ef2dfb4
commit
758c16b78b
|
@ -1,2 +1,2 @@
|
|||
/target
|
||||
test.db
|
||||
test.db*
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
db:
|
||||
rm test.db*
|
||||
sqlx database create
|
||||
sqlx migrate run
|
||||
crawl: db
|
||||
RUST_LOG=info cargo run -p crawler
|
|
@ -17,4 +17,4 @@
|
|||
2. Clone this repository
|
||||
3. Install [sqlx-cli](https://lib.rs/crates/sqlx-cli)
|
||||
4. Set the `DATABASE_URL` environment variable to `"sqlite:todos.db"`
|
||||
5. Run `sqlx database create` and `sqlx migrate run`
|
||||
5. Install the [just](https://just.systems/) command runner
|
||||
|
|
|
@ -24,6 +24,9 @@ struct Page {
|
|||
async fn main() {
|
||||
env_logger::init();
|
||||
let settings = Settings::new().unwrap();
|
||||
let pool = SqlitePool::connect(&env::var("DATABASE_URL").unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut to_crawl = settings.sitemap;
|
||||
let mut crawled = 0;
|
||||
|
@ -55,8 +58,6 @@ async fn main() {
|
|||
|
||||
info!("Succesfully crawled {} pages!", crawled);
|
||||
|
||||
insert_db().await.unwrap();
|
||||
|
||||
let mut cache_dir = dirs::cache_dir().unwrap();
|
||||
cache_dir.push("ferret");
|
||||
for i in cacache::list_sync(cache_dir.as_path()) {
|
||||
|
@ -64,6 +65,7 @@ async fn main() {
|
|||
.await
|
||||
.unwrap();
|
||||
let decoded_page: Page = bincode::deserialize(&data).unwrap();
|
||||
insert_db(&pool, &decoded_page).await;
|
||||
info!(
|
||||
"Found page: {} {}",
|
||||
&decoded_page.url.as_str(),
|
||||
|
@ -150,11 +152,21 @@ async fn find_links(html: &str, base: &Url, allow: Vec<Url>) -> Vec<Url> {
|
|||
return links;
|
||||
}
|
||||
|
||||
async fn insert_db() -> Result<(), sqlx::Error> {
|
||||
let pool = SqlitePool::connect(&env::var("DATABASE_URL").unwrap()).await?;
|
||||
let row: (i64,) = sqlx::query_as("SELECT $1")
|
||||
.bind(150_i64)
|
||||
.fetch_one(&pool)
|
||||
.await?;
|
||||
Ok(())
|
||||
async fn insert_db(pool: &SqlitePool, page: &Page) {
|
||||
let mut conn = pool.acquire().await.unwrap();
|
||||
let url = page.url.clone().into_string();
|
||||
let timestamp = page.last_fetched.clone().timestamp();
|
||||
let id = sqlx::query!(
|
||||
r#"
|
||||
REPLACE INTO cached_urls ( last_fetched, url, body )
|
||||
VALUES ( ?1, ?2, ?3 )
|
||||
"#,
|
||||
timestamp,
|
||||
url,
|
||||
page.body
|
||||
)
|
||||
.execute(&mut *conn)
|
||||
.await
|
||||
.unwrap()
|
||||
.last_insert_rowid();
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
-- Add migration script here
|
||||
CREATE TABLE IF NOT EXISTS cached_urls
|
||||
(
|
||||
last_fetched INTEGER PRIMARY KEY NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
body TEXT NOT NULL
|
||||
last_fetched INTEGER NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
body TEXT NOT NULL
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue