2021-09-12 20:42:09 +00:00
|
|
|
use html5ever::{local_name, namespace_url, ns, LocalName, QualName};
|
|
|
|
use kuchiki::{iter::NodeEdge, traits::*, NodeData};
|
2021-09-12 12:16:45 +00:00
|
|
|
|
2021-09-12 12:07:20 +00:00
|
|
|
use super::{MessageComponent, MessageContent};
|
2021-09-12 11:59:18 +00:00
|
|
|
|
|
|
|
pub fn convert_matrix(message: &str) -> MessageContent {
|
2021-09-12 12:16:45 +00:00
|
|
|
let dom = kuchiki::parse_fragment(
|
|
|
|
QualName::new(None, ns!(html), LocalName::from("div")),
|
|
|
|
vec![],
|
|
|
|
)
|
|
|
|
.one(message);
|
|
|
|
|
2021-09-12 20:42:09 +00:00
|
|
|
let mut parents = vec![];
|
|
|
|
let mut components = vec![];
|
2021-09-12 22:24:51 +00:00
|
|
|
let mut skip_text = false;
|
2021-09-12 20:42:09 +00:00
|
|
|
|
2021-09-12 12:16:45 +00:00
|
|
|
for edge in dom.traverse() {
|
|
|
|
match edge {
|
2021-09-12 20:42:09 +00:00
|
|
|
NodeEdge::Start(node) => {
|
|
|
|
if let NodeData::Element(element) = node.data() {
|
|
|
|
if element.name.ns == ns!(html) {
|
|
|
|
match element.name.local {
|
|
|
|
local_name!("strong")
|
|
|
|
| local_name!("b")
|
|
|
|
| local_name!("em")
|
2021-09-12 21:01:01 +00:00
|
|
|
| local_name!("i")
|
2021-09-12 21:25:27 +00:00
|
|
|
| local_name!("s")
|
|
|
|
| local_name!("u")
|
|
|
|
| local_name!("a")
|
|
|
|
| local_name!("blockquote") => {
|
2021-09-12 20:42:09 +00:00
|
|
|
parents.push(components);
|
|
|
|
components = vec![];
|
|
|
|
}
|
2021-09-12 21:25:27 +00:00
|
|
|
|
|
|
|
local_name!("span") => {
|
|
|
|
let attrs = element.attributes.borrow();
|
|
|
|
if attrs.get("data-mx-spoiler").is_some() {
|
|
|
|
parents.push(components);
|
|
|
|
components = vec![];
|
|
|
|
}
|
|
|
|
}
|
2021-09-12 21:25:49 +00:00
|
|
|
|
2021-09-12 22:24:51 +00:00
|
|
|
local_name!("code") => {
|
|
|
|
skip_text = true;
|
|
|
|
}
|
|
|
|
|
2021-09-12 20:42:09 +00:00
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-12 21:35:48 +00:00
|
|
|
NodeEdge::End(node) => match node.data() {
|
|
|
|
NodeData::Text(text) => {
|
2021-09-12 22:24:51 +00:00
|
|
|
if !skip_text {
|
2021-09-12 22:39:15 +00:00
|
|
|
let text = text.borrow().lines().collect::<Vec<_>>().join(" ");
|
|
|
|
components.push(MessageComponent::Plain(text));
|
2021-09-12 22:24:51 +00:00
|
|
|
}
|
2021-09-12 21:35:48 +00:00
|
|
|
}
|
|
|
|
NodeData::Element(element) => {
|
|
|
|
macro_rules! construct_component {
|
|
|
|
($f:expr) => {{
|
|
|
|
let component_type = $f;
|
|
|
|
if let Some(mut parent_components) = parents.pop() {
|
|
|
|
parent_components.push((component_type)(components));
|
|
|
|
components = parent_components;
|
|
|
|
}
|
|
|
|
}};
|
2021-09-12 20:42:09 +00:00
|
|
|
}
|
2021-09-12 21:01:01 +00:00
|
|
|
|
2021-09-12 21:35:48 +00:00
|
|
|
if element.name.ns == ns!(html) {
|
|
|
|
match element.name.local {
|
|
|
|
local_name!("strong") | local_name!("b") => {
|
|
|
|
construct_component!(MessageComponent::Bold)
|
|
|
|
}
|
|
|
|
local_name!("em") | local_name!("i") => {
|
|
|
|
construct_component!(MessageComponent::Italic)
|
|
|
|
}
|
|
|
|
local_name!("s") => {
|
|
|
|
construct_component!(MessageComponent::Strikethrough)
|
|
|
|
}
|
|
|
|
local_name!("u") => {
|
|
|
|
construct_component!(MessageComponent::Underline)
|
|
|
|
}
|
|
|
|
local_name!("a") => {
|
|
|
|
if let Some(mut parent_components) = parents.pop() {
|
2021-09-12 21:25:27 +00:00
|
|
|
let attrs = element.attributes.borrow();
|
2021-09-12 21:35:48 +00:00
|
|
|
if let Some(href) = attrs.get(local_name!("href")) {
|
|
|
|
parent_components.push(MessageComponent::Link {
|
|
|
|
target: href.to_string(),
|
|
|
|
text: components,
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
parent_components.append(&mut components);
|
2021-09-12 21:25:27 +00:00
|
|
|
}
|
2021-09-12 21:35:48 +00:00
|
|
|
|
|
|
|
components = parent_components;
|
2021-09-12 21:25:27 +00:00
|
|
|
}
|
2021-09-12 20:42:09 +00:00
|
|
|
}
|
2021-09-12 22:39:15 +00:00
|
|
|
local_name!("br") | local_name!("p") => {
|
2021-09-12 21:35:48 +00:00
|
|
|
components.push(MessageComponent::HardBreak);
|
|
|
|
}
|
|
|
|
local_name!("blockquote") => {
|
|
|
|
construct_component!(MessageComponent::BlockQuote)
|
|
|
|
}
|
|
|
|
local_name!("span") => {
|
|
|
|
let attrs = element.attributes.borrow();
|
|
|
|
if let Some(spoiler_reason) = attrs.get("data-mx-spoiler") {
|
|
|
|
construct_component!(|inner| MessageComponent::Spoiler {
|
|
|
|
reason: (!spoiler_reason.is_empty())
|
|
|
|
.then(|| spoiler_reason.to_string()),
|
|
|
|
content: inner,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2021-09-12 22:24:51 +00:00
|
|
|
local_name!("code") => {
|
|
|
|
// is_code_block = whether we are the child of a <pre> tag
|
|
|
|
let is_code_block = node
|
|
|
|
.parent()
|
|
|
|
.as_ref()
|
|
|
|
.map(|p| p.data())
|
|
|
|
.and_then(|d| match d {
|
|
|
|
NodeData::Element(e) => {
|
|
|
|
Some(e.name.local == local_name!("pre"))
|
|
|
|
}
|
|
|
|
_ => None,
|
|
|
|
})
|
|
|
|
.unwrap_or(false);
|
|
|
|
|
|
|
|
components.push(if is_code_block {
|
|
|
|
let attrs = element.attributes.borrow();
|
|
|
|
let lang = attrs
|
|
|
|
.get(local_name!("class"))
|
|
|
|
.and_then(|lang| lang.strip_prefix("language-"))
|
|
|
|
.map(|s| s.to_string());
|
|
|
|
|
|
|
|
MessageComponent::CodeBlock {
|
|
|
|
lang,
|
|
|
|
source: node.text_contents(),
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
MessageComponent::Code(node.text_contents())
|
|
|
|
});
|
|
|
|
|
|
|
|
skip_text = false;
|
|
|
|
}
|
2021-09-12 21:35:48 +00:00
|
|
|
_ => {}
|
2021-09-12 20:42:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-09-12 21:35:48 +00:00
|
|
|
_ => {}
|
|
|
|
},
|
2021-09-12 12:16:45 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2021-09-12 20:42:09 +00:00
|
|
|
components
|
2021-09-12 11:59:18 +00:00
|
|
|
}
|
|
|
|
|
2021-09-12 12:07:20 +00:00
|
|
|
pub fn format_matrix(message_content: &[MessageComponent]) -> String {
|
2021-09-12 11:59:18 +00:00
|
|
|
message_content
|
|
|
|
.iter()
|
|
|
|
.map(|component| match component {
|
2021-09-12 12:07:20 +00:00
|
|
|
MessageComponent::Plain(text) => html_escape::encode_text(text).to_string(),
|
|
|
|
MessageComponent::Link { target, text } => format!(
|
2021-09-12 11:59:18 +00:00
|
|
|
r#"<a href="{}">{}</a>"#,
|
|
|
|
html_escape::encode_quoted_attribute(target),
|
|
|
|
format_matrix(text)
|
|
|
|
),
|
|
|
|
|
2021-09-12 12:07:20 +00:00
|
|
|
MessageComponent::Italic(inner) => format!("<em>{}</em>", format_matrix(inner)),
|
|
|
|
MessageComponent::Bold(inner) => format!("<strong>{}</strong>", format_matrix(inner)),
|
|
|
|
MessageComponent::Strikethrough(inner) => {
|
|
|
|
format!("<del>{}</del>", format_matrix(inner))
|
|
|
|
}
|
|
|
|
MessageComponent::Underline(inner) => format!("<u>{}</u>", format_matrix(inner)),
|
2021-09-12 11:59:18 +00:00
|
|
|
|
2021-09-12 12:07:20 +00:00
|
|
|
MessageComponent::Code(code) => {
|
|
|
|
format!("<code>{}</code>", html_escape::encode_text(code))
|
|
|
|
}
|
|
|
|
MessageComponent::CodeBlock { lang, source } => {
|
2021-09-12 11:59:18 +00:00
|
|
|
format!(
|
|
|
|
r#"<pre><code{}>{}</code></pre>"#,
|
|
|
|
lang.as_ref()
|
|
|
|
.map(|lang| format!(
|
|
|
|
r#" class="language-{}""#,
|
|
|
|
html_escape::encode_quoted_attribute(lang)
|
|
|
|
))
|
2021-09-12 12:07:20 +00:00
|
|
|
.unwrap_or_else(|| "".to_string()),
|
2021-09-12 11:59:18 +00:00
|
|
|
source,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2021-09-12 12:07:20 +00:00
|
|
|
MessageComponent::Spoiler { reason, content } => format!(
|
2021-09-12 11:59:18 +00:00
|
|
|
"<span data-mx-spoiler{}>{}</span>",
|
|
|
|
reason
|
|
|
|
.as_ref()
|
|
|
|
.map(|reason| format!(r#"="{}""#, html_escape::encode_quoted_attribute(reason)))
|
2021-09-12 12:07:20 +00:00
|
|
|
.unwrap_or_else(|| "".to_string()),
|
2021-09-12 11:59:18 +00:00
|
|
|
format_matrix(content)
|
|
|
|
),
|
2021-09-12 12:07:20 +00:00
|
|
|
MessageComponent::HardBreak => "<br>".to_string(),
|
|
|
|
MessageComponent::BlockQuote(inner) => {
|
2021-09-12 11:59:18 +00:00
|
|
|
format!("<blockquote>{}</blockquote>", format_matrix(inner))
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.collect()
|
|
|
|
}
|
2021-09-12 20:42:09 +00:00
|
|
|
|
|
|
|
#[test]
|
2021-09-12 22:24:51 +00:00
|
|
|
fn simple_parsing() {
|
2021-09-12 21:01:01 +00:00
|
|
|
use MessageComponent::*;
|
|
|
|
|
|
|
|
let html =
|
|
|
|
r#"<strong>hello! <i><></i></strong> <a href="https://example.com/">example</a>"#;
|
2021-09-12 20:42:09 +00:00
|
|
|
|
|
|
|
assert_eq!(
|
2021-09-12 21:25:27 +00:00
|
|
|
convert_matrix(html),
|
2021-09-12 21:01:01 +00:00
|
|
|
vec![
|
|
|
|
Bold(vec![
|
|
|
|
Plain("hello! ".to_string(),),
|
|
|
|
Italic(vec![Plain("<>".to_string())]),
|
|
|
|
]),
|
|
|
|
Plain(" ".to_string()),
|
|
|
|
Link {
|
|
|
|
target: "https://example.com/".to_string(),
|
|
|
|
text: vec![Plain("example".to_string())]
|
|
|
|
},
|
|
|
|
]
|
2021-09-12 20:42:09 +00:00
|
|
|
)
|
|
|
|
}
|
2021-09-12 21:25:27 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn spoiler_parsing() {
|
|
|
|
use MessageComponent::*;
|
|
|
|
|
|
|
|
let html = r#"<span data-mx-spoiler>the <em>whole</em> island is populated by lesbians</span>"#;
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
convert_matrix(html),
|
|
|
|
vec![Spoiler {
|
|
|
|
reason: None,
|
|
|
|
content: vec![
|
|
|
|
Plain("the ".to_string()),
|
|
|
|
Italic(vec![Plain("whole".to_string())]),
|
|
|
|
Plain(" island is populated by lesbians".to_string())
|
|
|
|
]
|
|
|
|
}]
|
|
|
|
);
|
|
|
|
}
|
2021-09-12 22:24:51 +00:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn code_parsing() {
|
|
|
|
use MessageComponent::*;
|
|
|
|
|
|
|
|
let html = r#"<code>hello_world();</code>"#;
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
convert_matrix(html),
|
|
|
|
vec![Code("hello_world();".to_string())]
|
|
|
|
);
|
|
|
|
|
|
|
|
let html = r#"<pre><code class="language-javascript">console.log("hello, world!");
|
|
|
|
console.table({ a: 1, b: 2, c: 3 });
|
|
|
|
</code></pre>"#;
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
convert_matrix(html),
|
|
|
|
vec![CodeBlock {
|
|
|
|
lang: Some("javascript".to_string()),
|
|
|
|
source: "console.log(\"hello, world!\");\nconsole.table({ a: 1, b: 2, c: 3 });\n"
|
|
|
|
.to_string(),
|
|
|
|
}]
|
|
|
|
);
|
|
|
|
}
|