From 3ff819444d64038d8c1e6bd1e357c2875d9dc1df Mon Sep 17 00:00:00 2001 From: videogame hacker Date: Sun, 12 Sep 2021 13:07:20 +0100 Subject: [PATCH] Refactor & prepare for matrix HTML parsing --- Cargo.lock | 147 +++++++++++++++++++++++++++++ Cargo.toml | 1 + src/matrix.rs | 8 +- src/message_ast/convert_discord.rs | 32 +++---- src/message_ast/convert_matrix.rs | 34 ++++--- src/message_ast/convert_plain.rs | 4 +- src/message_ast/mod.rs | 4 +- 7 files changed, 191 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7895146..7d48893 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -584,6 +584,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "futf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.16" @@ -857,6 +867,20 @@ dependencies = [ "utf8-width", ] +[[package]] +name = "html5ever" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2 1.0.28", + "quote 1.0.9", + "syn 1.0.74", +] + [[package]] name = "http" version = "0.2.4" @@ -1108,12 +1132,32 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + [[package]] name = "maplit" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +[[package]] +name = "markup5ever" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "matches" version = "0.1.8" @@ -1318,6 +1362,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + [[package]] name = "ntapi" version = "0.3.6" @@ -1489,6 +1539,44 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared", + "rand 0.7.3", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + [[package]] name = "phoebe" version = "0.1.0" @@ -1497,6 +1585,7 @@ dependencies = [ "discord_message_format", "env_logger", "html-escape", + "html5ever", "log", "matrix-sdk", "serde", @@ -1596,6 +1685,12 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro-crate" version = "1.0.0" @@ -1675,6 +1770,7 @@ dependencies = [ "rand_chacha 0.2.2", "rand_core 0.5.1", "rand_hc 0.2.0", + "rand_pcg", ] [[package]] @@ -1745,6 +1841,15 @@ dependencies = [ "rand_core 0.6.3", ] +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + [[package]] name = "rayon" version = "1.5.1" @@ -2313,6 +2418,12 @@ version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c19772be3c4dd2ceaacf03cb41d5885f2a02c4d8804884918e3a258480803335" +[[package]] +name = "siphasher" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b" + [[package]] name = "slab" version = "0.4.4" @@ -2424,6 +2535,31 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0" +[[package]] +name = "string_cache" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ddb1139b5353f96e429e1a5e19fbaf663bddedaa06d1dbd49f82e352601209a" +dependencies = [ + "lazy_static", + "new_debug_unreachable", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2 1.0.28", + "quote 1.0.9", +] + [[package]] name = "subtle" version = "2.4.1" @@ -2478,6 +2614,17 @@ dependencies = [ "winapi", ] +[[package]] +name = "tendril" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9ef557cb397a4f0a5a3a628f06515f78563f2209e64d47055d9dc6052bf5e33" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "termcolor" version = "1.1.2" diff --git a/Cargo.toml b/Cargo.toml index d0701be..ef4dac7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,6 +14,7 @@ url = "2.2.2" log = "0.4.14" env_logger = "0.9.0" html-escape = "0.2.9" +html5ever = "0.25.1" [dependencies.serenity] version = "0.10.9" diff --git a/src/matrix.rs b/src/matrix.rs index 2363513..cdec691 100644 --- a/src/matrix.rs +++ b/src/matrix.rs @@ -20,7 +20,7 @@ use tokio::sync::mpsc; use url::Url; use crate::{ - message_ast::{convert_plain, format_discord, format_matrix}, + message_ast::{convert_matrix, convert_plain, format_discord, format_matrix}, messages::{MessageAuthor, MessageReference, SentMessage}, }; @@ -87,14 +87,14 @@ impl EventHandler for MatrixHandler { match message_type { MessageType::Text(text) => { - let content = if let Some(_html) = text + let content = if let Some(html) = text .formatted .as_ref() .filter(|f| f.format == MessageFormat::Html) .map(|f| &f.body) { // TODO: Parse html_body into MessageContent AST - convert_plain(&text.body) + convert_matrix(html) } else { convert_plain(&text.body) }; @@ -106,7 +106,7 @@ impl EventHandler for MatrixHandler { author: MessageAuthor { display_name: sender .display_name() - .unwrap_or(sender.name()) + .unwrap_or_else(|| sender.name()) .to_string(), }, }); diff --git a/src/message_ast/convert_discord.rs b/src/message_ast/convert_discord.rs index 1c6378d..9a75805 100644 --- a/src/message_ast/convert_discord.rs +++ b/src/message_ast/convert_discord.rs @@ -1,7 +1,7 @@ -use super::{MessageContent, Styled}; +use super::{MessageComponent, MessageContent}; use discord_message_format::DiscordComponent; -impl<'a> From<&DiscordComponent<'a>> for Styled { +impl<'a> From<&DiscordComponent<'a>> for MessageComponent { fn from(discord: &DiscordComponent<'a>) -> Self { match discord { DiscordComponent::Plain(text) => Self::Plain(text.to_string()), @@ -38,38 +38,38 @@ impl<'a> From<&DiscordComponent<'a>> for Styled { pub fn convert_discord(discord_message: &[DiscordComponent<'_>]) -> MessageContent { discord_message .iter() - .map(Styled::from) + .map(MessageComponent::from) .collect::() } -pub fn format_discord(message_content: &MessageContent) -> String { +pub fn format_discord(message_content: &[MessageComponent]) -> String { message_content .iter() .map(|component| match component { - Styled::Plain(text) => text.to_string(), // TODO: Escape + MessageComponent::Plain(text) => text.to_string(), // TODO: Escape - Styled::Link { target, .. } => target.to_string(), // TODO: Link text + MessageComponent::Link { target, .. } => target.to_string(), // TODO: Link text - Styled::Italic(inner) => format!("*{}*", format_discord(inner)), - Styled::Bold(inner) => format!("**{}**", format_discord(inner)), - Styled::Strikethrough(inner) => format!("~~{}~~", format_discord(inner)), - Styled::Underline(inner) => format!("__{}__", format_discord(inner)), + MessageComponent::Italic(inner) => format!("*{}*", format_discord(inner)), + MessageComponent::Bold(inner) => format!("**{}**", format_discord(inner)), + MessageComponent::Strikethrough(inner) => format!("~~{}~~", format_discord(inner)), + MessageComponent::Underline(inner) => format!("__{}__", format_discord(inner)), - Styled::Code(code) => format!("`{}`", code), // TODO: Double-grave delimiting when code contains '`' - Styled::CodeBlock { lang, source } => { + MessageComponent::Code(code) => format!("`{}`", code), // TODO: Double-grave delimiting when code contains '`' + MessageComponent::CodeBlock { lang, source } => { format!( "```{}\n{}\n```", lang.as_ref() .map(|s| s.to_string()) - .unwrap_or("".to_string()), + .unwrap_or_else(|| "".to_string()), source.to_string() ) } - Styled::Spoiler { content, .. } => format!("||{}||", format_discord(content)), // TODO: Spoiler reason + MessageComponent::Spoiler { content, .. } => format!("||{}||", format_discord(content)), // TODO: Spoiler reason - Styled::HardBreak => "\n".to_string(), - Styled::BlockQuote(inner) => format!("> {}", format_discord(inner)), + MessageComponent::HardBreak => "\n".to_string(), + MessageComponent::BlockQuote(inner) => format!("> {}", format_discord(inner)), }) .collect() } diff --git a/src/message_ast/convert_matrix.rs b/src/message_ast/convert_matrix.rs index 3bbddcb..3de03f2 100644 --- a/src/message_ast/convert_matrix.rs +++ b/src/message_ast/convert_matrix.rs @@ -1,27 +1,31 @@ -use super::{MessageContent, Styled}; +use super::{MessageComponent, MessageContent}; pub fn convert_matrix(message: &str) -> MessageContent { todo!(); } -pub fn format_matrix(message_content: &MessageContent) -> String { +pub fn format_matrix(message_content: &[MessageComponent]) -> String { message_content .iter() .map(|component| match component { - Styled::Plain(text) => html_escape::encode_text(text).to_string(), - Styled::Link { target, text } => format!( + MessageComponent::Plain(text) => html_escape::encode_text(text).to_string(), + MessageComponent::Link { target, text } => format!( r#"{}"#, html_escape::encode_quoted_attribute(target), format_matrix(text) ), - Styled::Italic(inner) => format!("{}", format_matrix(inner)), - Styled::Bold(inner) => format!("{}", format_matrix(inner)), - Styled::Strikethrough(inner) => format!("{}", format_matrix(inner)), - Styled::Underline(inner) => format!("{}", format_matrix(inner)), + MessageComponent::Italic(inner) => format!("{}", format_matrix(inner)), + MessageComponent::Bold(inner) => format!("{}", format_matrix(inner)), + MessageComponent::Strikethrough(inner) => { + format!("{}", format_matrix(inner)) + } + MessageComponent::Underline(inner) => format!("{}", format_matrix(inner)), - Styled::Code(code) => format!("{}", html_escape::encode_text(code)), - Styled::CodeBlock { lang, source } => { + MessageComponent::Code(code) => { + format!("{}", html_escape::encode_text(code)) + } + MessageComponent::CodeBlock { lang, source } => { format!( r#"
{}
"#, lang.as_ref() @@ -29,21 +33,21 @@ pub fn format_matrix(message_content: &MessageContent) -> String { r#" class="language-{}""#, html_escape::encode_quoted_attribute(lang) )) - .unwrap_or("".to_string()), + .unwrap_or_else(|| "".to_string()), source, ) } - Styled::Spoiler { reason, content } => format!( + MessageComponent::Spoiler { reason, content } => format!( "{}", reason .as_ref() .map(|reason| format!(r#"="{}""#, html_escape::encode_quoted_attribute(reason))) - .unwrap_or("".to_string()), + .unwrap_or_else(|| "".to_string()), format_matrix(content) ), - Styled::HardBreak => "
".to_string(), - Styled::BlockQuote(inner) => { + MessageComponent::HardBreak => "
".to_string(), + MessageComponent::BlockQuote(inner) => { format!("
{}
", format_matrix(inner)) } }) diff --git a/src/message_ast/convert_plain.rs b/src/message_ast/convert_plain.rs index 1c38551..003d9ed 100644 --- a/src/message_ast/convert_plain.rs +++ b/src/message_ast/convert_plain.rs @@ -1,5 +1,5 @@ -use super::{MessageContent, Styled}; +use super::{MessageComponent, MessageContent}; pub fn convert_plain(message: &str) -> MessageContent { - vec![Styled::Plain(message.to_string())] + vec![MessageComponent::Plain(message.to_string())] } diff --git a/src/message_ast/mod.rs b/src/message_ast/mod.rs index 7ae9660..c7878c3 100644 --- a/src/message_ast/mod.rs +++ b/src/message_ast/mod.rs @@ -2,9 +2,9 @@ mod convert_discord; mod convert_matrix; mod convert_plain; -pub type MessageContent = Vec; +pub type MessageContent = Vec; -pub enum Styled { +pub enum MessageComponent { Plain(String), Link { target: String,