Refactor & prepare for matrix HTML parsing

legacy
Charlotte Som 2021-09-12 13:07:20 +01:00
parent e483064ee1
commit 3ff819444d
7 changed files with 191 additions and 39 deletions

147
Cargo.lock generated
View File

@ -584,6 +584,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "futf"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "futures"
version = "0.3.16"
@ -857,6 +867,20 @@ dependencies = [
"utf8-width",
]
[[package]]
name = "html5ever"
version = "0.25.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
dependencies = [
"log",
"mac",
"markup5ever",
"proc-macro2 1.0.28",
"quote 1.0.9",
"syn 1.0.74",
]
[[package]]
name = "http"
version = "0.2.4"
@ -1108,12 +1132,32 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "markup5ever"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd"
dependencies = [
"log",
"phf",
"phf_codegen",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]]
name = "matches"
version = "0.1.8"
@ -1318,6 +1362,12 @@ dependencies = [
"tempfile",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]]
name = "ntapi"
version = "0.3.6"
@ -1489,6 +1539,44 @@ version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]]
name = "phf"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
dependencies = [
"phf_shared",
"rand 0.7.3",
]
[[package]]
name = "phf_shared"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
dependencies = [
"siphasher",
]
[[package]]
name = "phoebe"
version = "0.1.0"
@ -1497,6 +1585,7 @@ dependencies = [
"discord_message_format",
"env_logger",
"html-escape",
"html5ever",
"log",
"matrix-sdk",
"serde",
@ -1596,6 +1685,12 @@ version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro-crate"
version = "1.0.0"
@ -1675,6 +1770,7 @@ dependencies = [
"rand_chacha 0.2.2",
"rand_core 0.5.1",
"rand_hc 0.2.0",
"rand_pcg",
]
[[package]]
@ -1745,6 +1841,15 @@ dependencies = [
"rand_core 0.6.3",
]
[[package]]
name = "rand_pcg"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
dependencies = [
"rand_core 0.5.1",
]
[[package]]
name = "rayon"
version = "1.5.1"
@ -2313,6 +2418,12 @@ version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c19772be3c4dd2ceaacf03cb41d5885f2a02c4d8804884918e3a258480803335"
[[package]]
name = "siphasher"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b"
[[package]]
name = "slab"
version = "0.4.4"
@ -2424,6 +2535,31 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0"
[[package]]
name = "string_cache"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ddb1139b5353f96e429e1a5e19fbaf663bddedaa06d1dbd49f82e352601209a"
dependencies = [
"lazy_static",
"new_debug_unreachable",
"phf_shared",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2 1.0.28",
"quote 1.0.9",
]
[[package]]
name = "subtle"
version = "2.4.1"
@ -2478,6 +2614,17 @@ dependencies = [
"winapi",
]
[[package]]
name = "tendril"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9ef557cb397a4f0a5a3a628f06515f78563f2209e64d47055d9dc6052bf5e33"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "termcolor"
version = "1.1.2"

View File

@ -14,6 +14,7 @@ url = "2.2.2"
log = "0.4.14"
env_logger = "0.9.0"
html-escape = "0.2.9"
html5ever = "0.25.1"
[dependencies.serenity]
version = "0.10.9"

View File

@ -20,7 +20,7 @@ use tokio::sync::mpsc;
use url::Url;
use crate::{
message_ast::{convert_plain, format_discord, format_matrix},
message_ast::{convert_matrix, convert_plain, format_discord, format_matrix},
messages::{MessageAuthor, MessageReference, SentMessage},
};
@ -87,14 +87,14 @@ impl EventHandler for MatrixHandler {
match message_type {
MessageType::Text(text) => {
let content = if let Some(_html) = text
let content = if let Some(html) = text
.formatted
.as_ref()
.filter(|f| f.format == MessageFormat::Html)
.map(|f| &f.body)
{
// TODO: Parse html_body into MessageContent AST
convert_plain(&text.body)
convert_matrix(html)
} else {
convert_plain(&text.body)
};
@ -106,7 +106,7 @@ impl EventHandler for MatrixHandler {
author: MessageAuthor {
display_name: sender
.display_name()
.unwrap_or(sender.name())
.unwrap_or_else(|| sender.name())
.to_string(),
},
});

View File

@ -1,7 +1,7 @@
use super::{MessageContent, Styled};
use super::{MessageComponent, MessageContent};
use discord_message_format::DiscordComponent;
impl<'a> From<&DiscordComponent<'a>> for Styled {
impl<'a> From<&DiscordComponent<'a>> for MessageComponent {
fn from(discord: &DiscordComponent<'a>) -> Self {
match discord {
DiscordComponent::Plain(text) => Self::Plain(text.to_string()),
@ -38,38 +38,38 @@ impl<'a> From<&DiscordComponent<'a>> for Styled {
pub fn convert_discord(discord_message: &[DiscordComponent<'_>]) -> MessageContent {
discord_message
.iter()
.map(Styled::from)
.map(MessageComponent::from)
.collect::<MessageContent>()
}
pub fn format_discord(message_content: &MessageContent) -> String {
pub fn format_discord(message_content: &[MessageComponent]) -> String {
message_content
.iter()
.map(|component| match component {
Styled::Plain(text) => text.to_string(), // TODO: Escape
MessageComponent::Plain(text) => text.to_string(), // TODO: Escape
Styled::Link { target, .. } => target.to_string(), // TODO: Link text
MessageComponent::Link { target, .. } => target.to_string(), // TODO: Link text
Styled::Italic(inner) => format!("*{}*", format_discord(inner)),
Styled::Bold(inner) => format!("**{}**", format_discord(inner)),
Styled::Strikethrough(inner) => format!("~~{}~~", format_discord(inner)),
Styled::Underline(inner) => format!("__{}__", format_discord(inner)),
MessageComponent::Italic(inner) => format!("*{}*", format_discord(inner)),
MessageComponent::Bold(inner) => format!("**{}**", format_discord(inner)),
MessageComponent::Strikethrough(inner) => format!("~~{}~~", format_discord(inner)),
MessageComponent::Underline(inner) => format!("__{}__", format_discord(inner)),
Styled::Code(code) => format!("`{}`", code), // TODO: Double-grave delimiting when code contains '`'
Styled::CodeBlock { lang, source } => {
MessageComponent::Code(code) => format!("`{}`", code), // TODO: Double-grave delimiting when code contains '`'
MessageComponent::CodeBlock { lang, source } => {
format!(
"```{}\n{}\n```",
lang.as_ref()
.map(|s| s.to_string())
.unwrap_or("".to_string()),
.unwrap_or_else(|| "".to_string()),
source.to_string()
)
}
Styled::Spoiler { content, .. } => format!("||{}||", format_discord(content)), // TODO: Spoiler reason
MessageComponent::Spoiler { content, .. } => format!("||{}||", format_discord(content)), // TODO: Spoiler reason
Styled::HardBreak => "\n".to_string(),
Styled::BlockQuote(inner) => format!("> {}", format_discord(inner)),
MessageComponent::HardBreak => "\n".to_string(),
MessageComponent::BlockQuote(inner) => format!("> {}", format_discord(inner)),
})
.collect()
}

View File

@ -1,27 +1,31 @@
use super::{MessageContent, Styled};
use super::{MessageComponent, MessageContent};
pub fn convert_matrix(message: &str) -> MessageContent {
todo!();
}
pub fn format_matrix(message_content: &MessageContent) -> String {
pub fn format_matrix(message_content: &[MessageComponent]) -> String {
message_content
.iter()
.map(|component| match component {
Styled::Plain(text) => html_escape::encode_text(text).to_string(),
Styled::Link { target, text } => format!(
MessageComponent::Plain(text) => html_escape::encode_text(text).to_string(),
MessageComponent::Link { target, text } => format!(
r#"<a href="{}">{}</a>"#,
html_escape::encode_quoted_attribute(target),
format_matrix(text)
),
Styled::Italic(inner) => format!("<em>{}</em>", format_matrix(inner)),
Styled::Bold(inner) => format!("<strong>{}</strong>", format_matrix(inner)),
Styled::Strikethrough(inner) => format!("<del>{}</del>", format_matrix(inner)),
Styled::Underline(inner) => format!("<u>{}</u>", format_matrix(inner)),
MessageComponent::Italic(inner) => format!("<em>{}</em>", format_matrix(inner)),
MessageComponent::Bold(inner) => format!("<strong>{}</strong>", format_matrix(inner)),
MessageComponent::Strikethrough(inner) => {
format!("<del>{}</del>", format_matrix(inner))
}
MessageComponent::Underline(inner) => format!("<u>{}</u>", format_matrix(inner)),
Styled::Code(code) => format!("<code>{}</code>", html_escape::encode_text(code)),
Styled::CodeBlock { lang, source } => {
MessageComponent::Code(code) => {
format!("<code>{}</code>", html_escape::encode_text(code))
}
MessageComponent::CodeBlock { lang, source } => {
format!(
r#"<pre><code{}>{}</code></pre>"#,
lang.as_ref()
@ -29,21 +33,21 @@ pub fn format_matrix(message_content: &MessageContent) -> String {
r#" class="language-{}""#,
html_escape::encode_quoted_attribute(lang)
))
.unwrap_or("".to_string()),
.unwrap_or_else(|| "".to_string()),
source,
)
}
Styled::Spoiler { reason, content } => format!(
MessageComponent::Spoiler { reason, content } => format!(
"<span data-mx-spoiler{}>{}</span>",
reason
.as_ref()
.map(|reason| format!(r#"="{}""#, html_escape::encode_quoted_attribute(reason)))
.unwrap_or("".to_string()),
.unwrap_or_else(|| "".to_string()),
format_matrix(content)
),
Styled::HardBreak => "<br>".to_string(),
Styled::BlockQuote(inner) => {
MessageComponent::HardBreak => "<br>".to_string(),
MessageComponent::BlockQuote(inner) => {
format!("<blockquote>{}</blockquote>", format_matrix(inner))
}
})

View File

@ -1,5 +1,5 @@
use super::{MessageContent, Styled};
use super::{MessageComponent, MessageContent};
pub fn convert_plain(message: &str) -> MessageContent {
vec![Styled::Plain(message.to_string())]
vec![MessageComponent::Plain(message.to_string())]
}

View File

@ -2,9 +2,9 @@ mod convert_discord;
mod convert_matrix;
mod convert_plain;
pub type MessageContent = Vec<Styled>;
pub type MessageContent = Vec<MessageComponent>;
pub enum Styled {
pub enum MessageComponent {
Plain(String),
Link {
target: String,