Initial commit

main
Charlotte Som 2021-08-07 10:34:53 +01:00
commit bbf120f753
12 changed files with 448 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

49
Cargo.lock generated Normal file
View File

@ -0,0 +1,49 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
dependencies = [
"memchr",
]
[[package]]
name = "discord_message_format"
version = "0.1.0"
dependencies = [
"lazy_static",
"regex",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "memchr"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
[[package]]
name = "regex"
version = "1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"

8
Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "discord_message_format"
version = "0.1.0"
edition = "2018"
[dependencies]
lazy_static = "1.4.0"
regex = "1.5.4"

10
README.md Normal file
View File

@ -0,0 +1,10 @@
# discord_message_format
AST Construction for the format that Discord use for messages.
This lets you be formatting-aware when you parse user messages when writing bots.
## To Do
- User/role mentions
- Emoji

22
src/ast.rs Normal file
View File

@ -0,0 +1,22 @@
#[derive(Debug)]
pub enum DiscordComponent<'a> {
Plain(&'a str),
Literal(char),
Link(&'a str),
Bold(Vec<DiscordComponent<'a>>),
Italic(Vec<DiscordComponent<'a>>),
Strikethrough(Vec<DiscordComponent<'a>>),
Underline(Vec<DiscordComponent<'a>>),
Code(&'a str),
CodeBlock {
lang: Option<&'a str>,
source: &'a str,
},
Spoiler(Vec<DiscordComponent<'a>>),
LineBreak,
Quote(Vec<DiscordComponent<'a>>),
}

52
src/convert.rs Normal file
View File

@ -0,0 +1,52 @@
use super::DiscordComponent;
pub trait ToHtml {
fn to_html(&self) -> String;
}
impl<'a> ToHtml for DiscordComponent<'a> {
fn to_html(&self) -> String {
match self {
DiscordComponent::Plain(s) => s.to_string(), // TODO: Escape
DiscordComponent::Literal(c) => c.to_string(),
DiscordComponent::Link(target) => format!(r#"<a href="{0}">{0}</a>"#, target),
DiscordComponent::Bold(children) => {
format!("<strong>{}</strong>", children.to_html())
}
DiscordComponent::Italic(children) => {
format!("<em>{}</em>", children.to_html())
}
DiscordComponent::Strikethrough(children) => {
format!("<del>{}</del>", children.to_html())
}
DiscordComponent::Underline(children) => {
format!("<u>{}</u>", children.to_html())
}
DiscordComponent::Code(source) => format!("<code>{}</code>", source),
DiscordComponent::CodeBlock { lang, source } => {
let language_class = lang
.map(|l| " class=\"language-".to_owned() + l + "\"")
.unwrap_or_else(String::new);
format!("<pre><code{}>{}</code></pre>", language_class, source)
}
DiscordComponent::Spoiler(children) => {
format!("<span data-mx-spoiler>{}</span>", children.to_html())
}
DiscordComponent::LineBreak => "<br>".to_string(),
DiscordComponent::Quote(children) => {
format!("<blockquote>{}</blockquote>", children.to_html())
}
}
}
}
impl<'a> ToHtml for Vec<DiscordComponent<'a>> {
fn to_html(&self) -> String {
self.iter().map(|c| c.to_html()).collect()
}
}

13
src/lib.rs Normal file
View File

@ -0,0 +1,13 @@
mod ast;
mod convert;
mod parse_basic;
mod parse_code;
mod parse_inline_style;
mod parse_link;
mod parse_quotes;
pub use ast::DiscordComponent;
pub use convert::ToHtml;
pub use parse_basic::parse;

87
src/parse_basic.rs Normal file
View File

@ -0,0 +1,87 @@
use super::{
ast::DiscordComponent, parse_code::*, parse_inline_style::*, parse_link::*, parse_quotes::*,
};
pub fn parse(text: &'_ str) -> Vec<DiscordComponent<'_>> {
let mut tokens = Vec::new();
let mut working_plain_start: isize = -1;
let mut i = 0;
while i < text.len() {
let is_line_start = matches!(tokens.last(), None | Some(DiscordComponent::LineBreak));
match parse_token(&text[i..], is_line_start) {
Some((token, consumed)) => {
if working_plain_start >= 0 {
let plain_start = working_plain_start as usize;
tokens.push(DiscordComponent::Plain(&text[plain_start..i]));
working_plain_start = -1;
}
tokens.push(token);
i += consumed;
}
None => {
if working_plain_start < 0 {
working_plain_start = i as isize;
}
let mut next_char = i + 1;
while !text.is_char_boundary(next_char) {
next_char += 1;
}
i = next_char;
}
}
}
if working_plain_start >= 0 {
let plain_start = working_plain_start as usize;
tokens.push(DiscordComponent::Plain(&text[plain_start..]));
}
tokens
}
pub fn parse_token(text: &'_ str, is_line_start: bool) -> Option<(DiscordComponent<'_>, usize)> {
parse_escaped_literal(text)
.or_else(|| {
if is_line_start {
parse_quotes(text)
} else {
None
}
})
.or_else(|| parse_code_block(text))
.or_else(|| parse_code(text))
.or_else(|| parse_link(text))
.or_else(|| parse_bold(text))
.or_else(|| parse_italic(text))
.or_else(|| parse_strikethrough(text))
.or_else(|| parse_underline(text))
.or_else(|| parse_spoiler(text))
.or_else(|| parse_line_break(text))
}
pub fn parse_escaped_literal(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
let mut chars = text.chars();
if let Some('\\') = chars.next() {
return match chars.next() {
Some(x @ '\\') | Some(x @ '`') | Some(x @ '*') | Some(x @ '_') | Some(x @ '{')
| Some(x @ '}') | Some(x @ '[') | Some(x @ ']') | Some(x @ '(') | Some(x @ ')')
| Some(x @ '#') | Some(x @ '+') | Some(x @ '-') | Some(x @ '.') | Some(x @ '!') => {
Some((DiscordComponent::Literal(x), 2))
}
_ => None,
};
}
None
}
pub fn parse_line_break(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
if text.starts_with('\n') {
return Some((DiscordComponent::LineBreak, 1));
}
None
}

36
src/parse_code.rs Normal file
View File

@ -0,0 +1,36 @@
use lazy_static::lazy_static;
use regex::Regex;
use super::ast::DiscordComponent;
pub fn parse_code_block(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
lazy_static! {
static ref CODE_BLOCK: Regex =
Regex::new(r"^```(?P<lang>.+)?\n(?P<source>.*?)\n```").unwrap();
}
if let Some(caps) = CODE_BLOCK.captures(text) {
let lang = caps.name("lang").map(|m| m.as_str());
let source = caps.name("source").unwrap().as_str();
let whole_len = caps.get(0).unwrap().as_str().len();
return Some((DiscordComponent::CodeBlock { lang, source }, whole_len));
}
None
}
pub fn parse_code(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
lazy_static! {
static ref CODE: Regex = Regex::new(r"^`(?P<inner>.+?)`").unwrap();
}
if let Some(caps) = CODE.captures(text) {
let inner = caps.name("inner").unwrap().as_str();
let whole_len = caps.get(0).unwrap().as_str().len();
return Some((DiscordComponent::Code(inner), whole_len));
}
None
}

84
src/parse_inline_style.rs Normal file
View File

@ -0,0 +1,84 @@
use lazy_static::lazy_static;
use regex::Regex;
use super::ast::DiscordComponent;
use super::parse_basic::parse;
pub fn parse_bold(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
lazy_static! {
static ref BOLD: Regex = Regex::new(r"^\*\*(?P<inner>.+?)\*\*").unwrap();
}
if let Some(caps) = BOLD.captures(text) {
let inner = caps.name("inner").unwrap().as_str();
let whole_len = caps.get(0).unwrap().as_str().len();
return Some((DiscordComponent::Bold(parse(inner)), whole_len));
}
None
}
pub fn parse_italic(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
lazy_static! {
static ref ITALIC_UNDERSCORE: Regex = Regex::new(r"^_(?P<inner>.+?)_").unwrap();
static ref ITALIC_ASTERISK: Regex = Regex::new(r"^\*(?P<inner>.+?)\*").unwrap();
}
if let Some(caps) = ITALIC_UNDERSCORE
.captures(text)
.or_else(|| ITALIC_ASTERISK.captures(text))
{
let inner = caps.name("inner").unwrap().as_str();
let whole_len = caps.get(0).unwrap().as_str().len();
return Some((DiscordComponent::Italic(parse(inner)), whole_len));
}
None
}
pub fn parse_strikethrough(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
lazy_static! {
static ref STRIKETHROUGH: Regex = Regex::new(r"^~~(?P<inner>.+?)~~").unwrap();
}
if let Some(caps) = STRIKETHROUGH.captures(text) {
let inner = caps.name("inner").unwrap().as_str();
let whole_len = caps.get(0).unwrap().as_str().len();
return Some((DiscordComponent::Strikethrough(parse(inner)), whole_len));
}
None
}
pub fn parse_underline(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
lazy_static! {
static ref UNDERLINE: Regex = Regex::new(r"^__(?P<inner>.+?)__").unwrap();
}
if let Some(caps) = UNDERLINE.captures(text) {
let inner = caps.name("inner").unwrap().as_str();
let whole_len = caps.get(0).unwrap().as_str().len();
return Some((DiscordComponent::Underline(parse(inner)), whole_len));
}
None
}
pub fn parse_spoiler(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
lazy_static! {
static ref SPOILER: Regex = Regex::new(r"^\|\|(?P<inner>.+?)\|\|").unwrap();
}
if let Some(caps) = SPOILER.captures(text) {
let inner = caps.name("inner").unwrap().as_str();
let whole_len = caps.get(0).unwrap().as_str().len();
return Some((DiscordComponent::Spoiler(parse(inner)), whole_len));
}
None
}

37
src/parse_link.rs Normal file
View File

@ -0,0 +1,37 @@
use lazy_static::lazy_static;
use regex::Regex;
use super::ast::DiscordComponent;
pub fn parse_link(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
lazy_static! {
static ref LINK: Regex =
Regex::new(r#"^((?:https?|steam)://[^\s<]+[^<.,:;"'\]\s])"#).unwrap();
}
if let Some(caps) = LINK.captures(text) {
let mut link_range = caps.get(0).unwrap().range();
let orig_link = &text[link_range.clone()];
if orig_link.ends_with(')') {
let mut bracket_balance: i16 = 0;
for c in orig_link.chars() {
if c == '(' {
bracket_balance -= 1;
} else if c == ')' {
bracket_balance += 1;
}
}
if bracket_balance > 0 {
link_range = link_range.start..(link_range.end - 1);
}
}
let link = &text[link_range];
return Some((DiscordComponent::Link(link), link.len()));
}
// TODO(Charlotte): Parse links
None
}

49
src/parse_quotes.rs Normal file
View File

@ -0,0 +1,49 @@
use lazy_static::lazy_static;
use regex::Regex;
use super::ast::DiscordComponent;
use super::parse_basic::parse;
pub fn parse_quotes(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
lazy_static! {
static ref QUOTE: Regex = Regex::new(r"^> (?P<body>.*?)(?P<endl>\n|$)").unwrap();
}
static MULTIQUOTE_PREFIX: &str = ">>> ";
if text.starts_with(MULTIQUOTE_PREFIX) {
return Some((
DiscordComponent::Quote(parse(text.strip_prefix(MULTIQUOTE_PREFIX).unwrap())),
text.len(),
));
}
if let Some(caps) = QUOTE.captures(text) {
let body = caps.name("body").unwrap().as_str();
let mut endl = caps.name("endl");
let mut whole_len = caps.get(0).unwrap().as_str().len();
let mut body_components = parse(body);
while let Some(next_line_caps) = QUOTE.captures(&text[whole_len..]) {
dbg!(&next_line_caps);
let next_line_body = next_line_caps.name("body").unwrap().as_str();
endl = next_line_caps.name("endl");
let mut next_body_components = parse(next_line_body);
body_components.push(DiscordComponent::LineBreak);
body_components.append(&mut next_body_components);
whole_len += next_line_caps.get(0).unwrap().as_str().len();
}
let endl_len = endl.map(|m| m.as_str().len()).unwrap_or(0);
return Some((
DiscordComponent::Quote(body_components),
whole_len - endl_len,
));
}
None
}