From aff1f8b59e60a05cbeb2ce9e5e04841eb590d0ef Mon Sep 17 00:00:00 2001 From: videogame hacker Date: Sun, 12 Sep 2021 21:42:09 +0100 Subject: [PATCH] Begin actual parsing of matrix messages --- src/message_ast/convert_matrix.rs | 84 ++++++++++++++++++++++++++++--- src/message_ast/mod.rs | 1 + 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/src/message_ast/convert_matrix.rs b/src/message_ast/convert_matrix.rs index d88dda4..a8701a7 100644 --- a/src/message_ast/convert_matrix.rs +++ b/src/message_ast/convert_matrix.rs @@ -1,5 +1,5 @@ -use html5ever::{namespace_url, ns, LocalName, QualName}; -use kuchiki::{iter::NodeEdge, traits::*}; +use html5ever::{local_name, namespace_url, ns, LocalName, QualName}; +use kuchiki::{iter::NodeEdge, traits::*, NodeData}; use super::{MessageComponent, MessageContent}; @@ -10,15 +10,73 @@ pub fn convert_matrix(message: &str) -> MessageContent { ) .one(message); - // TODO: Work out how to convert the DOM into a message syntax tree + let mut parents = vec![]; + let mut components = vec![]; + for edge in dom.traverse() { match edge { - NodeEdge::Start(_) => {} - NodeEdge::End(_) => {} + NodeEdge::Start(node) => { + if let NodeData::Element(element) = node.data() { + if element.name.ns == ns!(html) { + match element.name.local { + local_name!("strong") + | local_name!("b") + | local_name!("em") + | local_name!("i") => { + parents.push(components); + components = vec![]; + } + _ => {} + } + } + } + } + + NodeEdge::End(node) => { + match node.data() { + NodeData::Text(text) => { + // TODO: Does this need to get HTML-decoded or something + components.push(MessageComponent::Plain(text.borrow().clone())); + } + NodeData::Element(element) => { + fn __construct_component( + component_type: fn(MessageContent) -> MessageComponent, + components: MessageContent, + parents: &mut Vec, + ) -> MessageContent { + if let Some(mut parent_components) = parents.pop() { + parent_components.push((component_type)(components)); + parent_components + } else { + vec![] + } + } + + macro_rules! construct_component { + ($f:expr) => { + components = __construct_component($f, components, &mut parents); + }; + } + + if element.name.ns == ns!(html) { + match element.name.local { + local_name!("strong") | local_name!("b") => { + construct_component!(MessageComponent::Bold) + } + local_name!("i") | local_name!("i") => { + construct_component!(MessageComponent::Italic) + } + _ => {} + } + } + } + _ => {} + } + } }; } - todo!(); + components } pub fn format_matrix(message_content: &[MessageComponent]) -> String { @@ -70,3 +128,17 @@ pub fn format_matrix(message_content: &[MessageComponent]) -> String { }) .collect() } + +#[test] +fn simple_matrix_parsing() { + let html = r"hello! <>"; + let ast = convert_matrix(html); + + assert_eq!( + ast, + vec![MessageComponent::Bold(vec![ + MessageComponent::Plain("hello! ".to_string(),), + MessageComponent::Italic(vec![MessageComponent::Plain("<>".to_string())]) + ])] + ) +} diff --git a/src/message_ast/mod.rs b/src/message_ast/mod.rs index c7878c3..b08c3f3 100644 --- a/src/message_ast/mod.rs +++ b/src/message_ast/mod.rs @@ -4,6 +4,7 @@ mod convert_plain; pub type MessageContent = Vec; +#[derive(Debug, PartialEq, Eq)] pub enum MessageComponent { Plain(String), Link {