diff --git a/Cargo.lock b/Cargo.lock index b59d7d5..e217dad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -136,7 +136,10 @@ name = "crawler" version = "0.1.0" dependencies = [ "config", + "dirs", + "serde", "tokio", + "url", ] [[package]] @@ -159,12 +162,42 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys", +] + [[package]] name = "dlv-list" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257" +[[package]] +name = "form_urlencoded" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652" +dependencies = [ + "percent-encoding", +] + [[package]] name = "frontend" version = "0.1.0" @@ -211,6 +244,16 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +[[package]] +name = "idna" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "itoa" version = "1.0.9" @@ -323,6 +366,12 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "ordered-multimap" version = "0.4.3" @@ -351,7 +400,7 @@ checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.3.5", "smallvec", "windows-targets", ] @@ -362,6 +411,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8835116a5c179084a830efb3adc117ab007512b535bc1a21c991d3b32a6b44dd" +[[package]] +name = "percent-encoding" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" + [[package]] name = "pest" version = "2.7.1" @@ -430,6 +485,15 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags", +] + [[package]] name = "redox_syscall" version = "0.3.5" @@ -439,6 +503,17 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +dependencies = [ + "getrandom", + "redox_syscall 0.2.16", + "thiserror", +] + [[package]] name = "ron" version = "0.7.1" @@ -576,6 +651,21 @@ dependencies = [ "syn", ] +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "tokio" version = "1.29.1" @@ -628,12 +718,39 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" +[[package]] +name = "unicode-bidi" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" + [[package]] name = "unicode-ident" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "url" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + [[package]] name = "version_check" version = "0.9.4" diff --git a/Cargo.toml b/Cargo.toml index c88d74c..a1480f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,8 +9,10 @@ license = "AGPL-3.0-or-later" [workspace.dependencies] config = "0.13.3" +dirs = "5.0.1" chrono = "0.4.26" tokio = { version = "1.29.1", features = ["full"] } +url = { version = "2.4.0", features = [ "serde" ] } [profile.release] strip = true diff --git a/crawler/Cargo.toml b/crawler/Cargo.toml index 03c5d6c..1fcc54f 100644 --- a/crawler/Cargo.toml +++ b/crawler/Cargo.toml @@ -9,3 +9,6 @@ license.workspace = true [dependencies] tokio.workspace = true config.workspace = true +dirs.workspace = true +url.workspace = true +serde = { version = "1.0.175", features = [ "derive" ] } diff --git a/crawler/src/main.rs b/crawler/src/main.rs index 94f4cc1..818db0e 100644 --- a/crawler/src/main.rs +++ b/crawler/src/main.rs @@ -1,4 +1,10 @@ +mod settings; + +use settings::Settings; + #[tokio::main] async fn main() { - println!("meow!"); + let settings = Settings::new(); + + println!("{:?}", settings); } diff --git a/crawler/src/settings.rs b/crawler/src/settings.rs new file mode 100644 index 0000000..d768b84 --- /dev/null +++ b/crawler/src/settings.rs @@ -0,0 +1,28 @@ +use config::{Config, ConfigError, Environment, File}; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use url::Url; + +#[derive(Debug, Deserialize)] +#[allow(unused)] +pub struct Settings { + strict: bool, + timeout: u8, + sitemap: Vec, +} + +impl Settings { + pub fn new() -> Result { + let mut config_dir = match dirs::config_dir() { + Some(v) => v, + None => PathBuf::new(), + }; + config_dir.push("ferret/crawler"); + + let s = Config::builder() + .add_source(File::from(config_dir)) + .build()?; + + s.try_deserialize() + } +}