Begin actual parsing of matrix messages

This commit is contained in:
Charlotte Som 2021-09-12 21:42:09 +01:00
parent a3e35b7c0c
commit aff1f8b59e
2 changed files with 79 additions and 6 deletions

View file

@ -1,5 +1,5 @@
use html5ever::{namespace_url, ns, LocalName, QualName};
use kuchiki::{iter::NodeEdge, traits::*};
use html5ever::{local_name, namespace_url, ns, LocalName, QualName};
use kuchiki::{iter::NodeEdge, traits::*, NodeData};
use super::{MessageComponent, MessageContent};
@ -10,15 +10,73 @@ pub fn convert_matrix(message: &str) -> MessageContent {
)
.one(message);
// TODO: Work out how to convert the DOM into a message syntax tree
let mut parents = vec![];
let mut components = vec![];
for edge in dom.traverse() {
match edge {
NodeEdge::Start(_) => {}
NodeEdge::End(_) => {}
NodeEdge::Start(node) => {
if let NodeData::Element(element) = node.data() {
if element.name.ns == ns!(html) {
match element.name.local {
local_name!("strong")
| local_name!("b")
| local_name!("em")
| local_name!("i") => {
parents.push(components);
components = vec![];
}
_ => {}
}
}
}
}
NodeEdge::End(node) => {
match node.data() {
NodeData::Text(text) => {
// TODO: Does this need to get HTML-decoded or something
components.push(MessageComponent::Plain(text.borrow().clone()));
}
NodeData::Element(element) => {
fn __construct_component(
component_type: fn(MessageContent) -> MessageComponent,
components: MessageContent,
parents: &mut Vec<MessageContent>,
) -> MessageContent {
if let Some(mut parent_components) = parents.pop() {
parent_components.push((component_type)(components));
parent_components
} else {
vec![]
}
}
macro_rules! construct_component {
($f:expr) => {
components = __construct_component($f, components, &mut parents);
};
}
todo!();
if element.name.ns == ns!(html) {
match element.name.local {
local_name!("strong") | local_name!("b") => {
construct_component!(MessageComponent::Bold)
}
local_name!("i") | local_name!("i") => {
construct_component!(MessageComponent::Italic)
}
_ => {}
}
}
}
_ => {}
}
}
};
}
components
}
pub fn format_matrix(message_content: &[MessageComponent]) -> String {
@ -70,3 +128,17 @@ pub fn format_matrix(message_content: &[MessageComponent]) -> String {
})
.collect()
}
#[test]
fn simple_matrix_parsing() {
let html = r"<strong>hello! <i>&lt;&gt;</i></strong>";
let ast = convert_matrix(html);
assert_eq!(
ast,
vec![MessageComponent::Bold(vec![
MessageComponent::Plain("hello! ".to_string(),),
MessageComponent::Italic(vec![MessageComponent::Plain("<>".to_string())])
])]
)
}

View file

@ -4,6 +4,7 @@ mod convert_plain;
pub type MessageContent = Vec<MessageComponent>;
#[derive(Debug, PartialEq, Eq)]
pub enum MessageComponent {
Plain(String),
Link {