phoebe/src/message_ast/convert_matrix.rs

136 lines
5.2 KiB
Rust

use html5ever::{local_name, namespace_url, ns, LocalName, QualName};
use kuchiki::{iter::NodeEdge, traits::*, NodeData};
use super::{MessageComponent, MessageContent};
pub fn convert_matrix(message: &str) -> MessageContent {
let dom = kuchiki::parse_fragment(
QualName::new(None, ns!(html), LocalName::from("div")),
vec![],
)
.one(message);
let mut parents = vec![];
let mut components = vec![];
for edge in dom.traverse() {
match edge {
NodeEdge::Start(node) => {
if let NodeData::Element(element) = node.data() {
if element.name.ns == ns!(html) {
match element.name.local {
local_name!("strong")
| local_name!("b")
| local_name!("em")
| local_name!("i") => {
parents.push(components);
components = vec![];
}
_ => {}
}
}
}
}
NodeEdge::End(node) => {
match node.data() {
NodeData::Text(text) => {
// TODO: Does this need to get HTML-decoded or something
components.push(MessageComponent::Plain(text.borrow().clone()));
}
NodeData::Element(element) => {
macro_rules! construct_component {
($f:expr) => {{
let component_type = $f;
if let Some(mut parent_components) = parents.pop() {
parent_components.push((component_type)(components));
components = parent_components;
}
}};
}
if element.name.ns == ns!(html) {
match element.name.local {
local_name!("strong") | local_name!("b") => {
construct_component!(MessageComponent::Bold)
}
local_name!("i") | local_name!("i") => {
construct_component!(MessageComponent::Italic)
}
_ => {}
}
}
}
_ => {}
}
}
};
}
components
}
pub fn format_matrix(message_content: &[MessageComponent]) -> String {
message_content
.iter()
.map(|component| match component {
MessageComponent::Plain(text) => html_escape::encode_text(text).to_string(),
MessageComponent::Link { target, text } => format!(
r#"<a href="{}">{}</a>"#,
html_escape::encode_quoted_attribute(target),
format_matrix(text)
),
MessageComponent::Italic(inner) => format!("<em>{}</em>", format_matrix(inner)),
MessageComponent::Bold(inner) => format!("<strong>{}</strong>", format_matrix(inner)),
MessageComponent::Strikethrough(inner) => {
format!("<del>{}</del>", format_matrix(inner))
}
MessageComponent::Underline(inner) => format!("<u>{}</u>", format_matrix(inner)),
MessageComponent::Code(code) => {
format!("<code>{}</code>", html_escape::encode_text(code))
}
MessageComponent::CodeBlock { lang, source } => {
format!(
r#"<pre><code{}>{}</code></pre>"#,
lang.as_ref()
.map(|lang| format!(
r#" class="language-{}""#,
html_escape::encode_quoted_attribute(lang)
))
.unwrap_or_else(|| "".to_string()),
source,
)
}
MessageComponent::Spoiler { reason, content } => format!(
"<span data-mx-spoiler{}>{}</span>",
reason
.as_ref()
.map(|reason| format!(r#"="{}""#, html_escape::encode_quoted_attribute(reason)))
.unwrap_or_else(|| "".to_string()),
format_matrix(content)
),
MessageComponent::HardBreak => "<br>".to_string(),
MessageComponent::BlockQuote(inner) => {
format!("<blockquote>{}</blockquote>", format_matrix(inner))
}
})
.collect()
}
#[test]
fn simple_matrix_parsing() {
let html = r"<strong>hello! <i>&lt;&gt;</i></strong>";
let ast = convert_matrix(html);
assert_eq!(
ast,
vec![MessageComponent::Bold(vec![
MessageComponent::Plain("hello! ".to_string(),),
MessageComponent::Italic(vec![MessageComponent::Plain("<>".to_string())])
])]
)
}