Should crawl pages after a certain age

main
~erin 2023-07-25 18:33:38 -04:00
parent 57684c037e
commit 326a6b8042
Signed by: erin
GPG Key ID: 0FEDEAFF1C14847E
1 changed files with 12 additions and 3 deletions

View File

@ -3,6 +3,7 @@ mod settings;
#[macro_use]
extern crate log;
use chrono::prelude::*;
use chrono::Duration;
use scraper::{Html, Selector};
use serde::{Deserialize, Serialize};
use settings::Settings;
@ -72,9 +73,17 @@ async fn crawl_url(url: Url, allow: Vec<Url>) -> Vec<Url> {
for i in cacache::list_sync(cache_dir.as_path()) {
match i {
Ok(_) => {
if i.unwrap().key == url.clone().into_string() {
error!("Already crawled {}", &url.as_str());
return vec![];
if i.as_ref().unwrap().key == url.clone().into_string() {
let now = Utc::now();
let timestamp = DateTime::<Utc>::from_utc(
NaiveDateTime::from_timestamp_opt(i.unwrap().time as i64, 0).unwrap(),
Utc,
);
let diff = now - timestamp;
if diff <= Duration::hours(1) {
error!("Already crawled {}", &url.as_str());
return vec![];
}
}
}
Err(e) => error!("{}", e),