phoebe/src/message_ast/convert_matrix.rs

292 lines
12 KiB
Rust

use html5ever::{local_name, namespace_url, ns, LocalName, QualName};
use kuchiki::{iter::NodeEdge, traits::*, NodeData};
use super::{MessageComponent, MessageContent};
pub fn convert_matrix(message: &str) -> MessageContent {
let dom = kuchiki::parse_fragment(
QualName::new(None, ns!(html), LocalName::from("div")),
vec![],
)
.one(message);
let mut parents = vec![];
let mut components = vec![];
let mut skip_text = 0;
let mut skip_all = 0;
for edge in dom.traverse() {
match edge {
NodeEdge::Start(node) => {
if let NodeData::Element(element) = node.data() {
if element.name.local == *"mx-reply" {
skip_all += 1;
}
if skip_all > 0 {
continue;
}
if element.name.ns == ns!(html) {
match element.name.local {
local_name!("strong")
| local_name!("b")
| local_name!("em")
| local_name!("i")
| local_name!("s")
| local_name!("u")
| local_name!("a")
| local_name!("blockquote") => {
parents.push(components);
components = vec![];
}
local_name!("span") => {
let attrs = element.attributes.borrow();
if attrs.get("data-mx-spoiler").is_some() {
parents.push(components);
components = vec![];
}
}
local_name!("code") => {
skip_text += 1;
}
_ => {}
}
}
}
}
NodeEdge::End(node) => match node.data() {
NodeData::Text(text) => {
if skip_text <= 0 && skip_all <= 0 {
let text = text.borrow().lines().collect::<Vec<_>>().join(" ");
components.push(MessageComponent::Plain(text));
}
}
NodeData::Element(element) => {
if element.name.local == *"mx-reply" {
skip_all -= 1;
}
if skip_all > 0 {
continue;
}
macro_rules! construct_component {
($f:expr) => {{
let component_type = $f;
if let Some(mut parent_components) = parents.pop() {
parent_components.push((component_type)(components));
components = parent_components;
}
}};
}
if element.name.ns == ns!(html) {
match element.name.local {
local_name!("strong") | local_name!("b") => {
construct_component!(MessageComponent::Bold)
}
local_name!("em") | local_name!("i") => {
construct_component!(MessageComponent::Italic)
}
local_name!("s") => {
construct_component!(MessageComponent::Strikethrough)
}
local_name!("u") => {
construct_component!(MessageComponent::Underline)
}
local_name!("a") => {
if let Some(mut parent_components) = parents.pop() {
let attrs = element.attributes.borrow();
if let Some(href) = attrs.get(local_name!("href")) {
parent_components.push(MessageComponent::Link {
target: href.to_string(),
text: components,
});
} else {
parent_components.append(&mut components);
}
components = parent_components;
}
}
local_name!("br") | local_name!("p") => {
components.push(MessageComponent::HardBreak);
}
local_name!("blockquote") => {
construct_component!(MessageComponent::BlockQuote)
}
local_name!("span") => {
let attrs = element.attributes.borrow();
if let Some(spoiler_reason) = attrs.get("data-mx-spoiler") {
construct_component!(|inner| MessageComponent::Spoiler {
reason: (!spoiler_reason.is_empty())
.then(|| spoiler_reason.to_string()),
content: inner,
})
}
}
local_name!("code") => {
// is_code_block = whether we are the child of a <pre> tag
let is_code_block = node
.parent()
.as_ref()
.map(|p| p.data())
.and_then(|d| match d {
NodeData::Element(e) => {
Some(e.name.local == local_name!("pre"))
}
_ => None,
})
.unwrap_or(false);
components.push(if is_code_block {
let attrs = element.attributes.borrow();
let lang = attrs
.get(local_name!("class"))
.and_then(|lang| lang.strip_prefix("language-"))
.map(|s| s.to_string());
MessageComponent::CodeBlock {
lang,
source: node.text_contents(),
}
} else {
MessageComponent::Code(node.text_contents())
});
skip_text -= 1;
}
_ => {}
}
}
}
_ => {}
},
};
}
components
}
pub fn format_matrix(message_content: &[MessageComponent]) -> String {
message_content
.iter()
.map(|component| match component {
MessageComponent::Plain(text) => html_escape::encode_text(text).to_string(),
MessageComponent::Link { target, text } => format!(
r#"<a href="{}">{}</a>"#,
html_escape::encode_quoted_attribute(target),
format_matrix(text)
),
MessageComponent::Italic(inner) => format!("<em>{}</em>", format_matrix(inner)),
MessageComponent::Bold(inner) => format!("<strong>{}</strong>", format_matrix(inner)),
MessageComponent::Strikethrough(inner) => {
format!("<del>{}</del>", format_matrix(inner))
}
MessageComponent::Underline(inner) => format!("<u>{}</u>", format_matrix(inner)),
MessageComponent::Code(code) => {
format!("<code>{}</code>", html_escape::encode_text(code))
}
MessageComponent::CodeBlock { lang, source } => {
format!(
r#"<pre><code{}>{}</code></pre>"#,
lang.as_ref()
.map(|lang| format!(
r#" class="language-{}""#,
html_escape::encode_quoted_attribute(lang)
))
.unwrap_or_else(|| "".to_string()),
source,
)
}
MessageComponent::Spoiler { reason, content } => format!(
"<span data-mx-spoiler{}>{}</span>",
reason
.as_ref()
.map(|reason| format!(r#"="{}""#, html_escape::encode_quoted_attribute(reason)))
.unwrap_or_else(|| "".to_string()),
format_matrix(content)
),
MessageComponent::HardBreak => "<br>".to_string(),
MessageComponent::BlockQuote(inner) => {
format!("<blockquote>{}</blockquote>", format_matrix(inner))
}
})
.collect()
}
#[test]
fn simple_parsing() {
use MessageComponent::*;
let html =
r#"<strong>hello! <i>&lt;&gt;</i></strong> <a href="https://example.com/">example</a>"#;
assert_eq!(
convert_matrix(html),
vec![
Bold(vec![
Plain("hello! ".to_string(),),
Italic(vec![Plain("<>".to_string())]),
]),
Plain(" ".to_string()),
Link {
target: "https://example.com/".to_string(),
text: vec![Plain("example".to_string())]
},
]
)
}
#[test]
fn spoiler_parsing() {
use MessageComponent::*;
let html = r#"<span data-mx-spoiler>the <em>whole</em> island is populated by lesbians</span>"#;
assert_eq!(
convert_matrix(html),
vec![Spoiler {
reason: None,
content: vec![
Plain("the ".to_string()),
Italic(vec![Plain("whole".to_string())]),
Plain(" island is populated by lesbians".to_string())
]
}]
);
}
#[test]
fn code_parsing() {
use MessageComponent::*;
let html = r#"<code>hello_world();</code>"#;
assert_eq!(
convert_matrix(html),
vec![Code("hello_world();".to_string())]
);
let html = r#"<pre><code class="language-javascript">console.log("hello, world!");
console.table({ a: 1, b: 2, c: 3 });
</code></pre>"#;
assert_eq!(
convert_matrix(html),
vec![CodeBlock {
lang: Some("javascript".to_string()),
source: "console.log(\"hello, world!\");\nconsole.table({ a: 1, b: 2, c: 3 });\n"
.to_string(),
}]
);
}