Should crawl pages after a certain age
parent
57684c037e
commit
326a6b8042
|
@ -3,6 +3,7 @@ mod settings;
|
|||
#[macro_use]
|
||||
extern crate log;
|
||||
use chrono::prelude::*;
|
||||
use chrono::Duration;
|
||||
use scraper::{Html, Selector};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use settings::Settings;
|
||||
|
@ -72,9 +73,17 @@ async fn crawl_url(url: Url, allow: Vec<Url>) -> Vec<Url> {
|
|||
for i in cacache::list_sync(cache_dir.as_path()) {
|
||||
match i {
|
||||
Ok(_) => {
|
||||
if i.unwrap().key == url.clone().into_string() {
|
||||
error!("Already crawled {}", &url.as_str());
|
||||
return vec![];
|
||||
if i.as_ref().unwrap().key == url.clone().into_string() {
|
||||
let now = Utc::now();
|
||||
let timestamp = DateTime::<Utc>::from_utc(
|
||||
NaiveDateTime::from_timestamp_opt(i.unwrap().time as i64, 0).unwrap(),
|
||||
Utc,
|
||||
);
|
||||
let diff = now - timestamp;
|
||||
if diff <= Duration::hours(1) {
|
||||
error!("Already crawled {}", &url.as_str());
|
||||
return vec![];
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => error!("{}", e),
|
||||
|
|
Loading…
Reference in New Issue