Initial commit
commit
bbf120f753
|
@ -0,0 +1 @@
|
|||
/target
|
|
@ -0,0 +1,49 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "discord_message_format"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
|
@ -0,0 +1,8 @@
|
|||
[package]
|
||||
name = "discord_message_format"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
lazy_static = "1.4.0"
|
||||
regex = "1.5.4"
|
|
@ -0,0 +1,10 @@
|
|||
# discord_message_format
|
||||
|
||||
AST Construction for the format that Discord use for messages.
|
||||
|
||||
This lets you be formatting-aware when you parse user messages when writing bots.
|
||||
|
||||
## To Do
|
||||
|
||||
- User/role mentions
|
||||
- Emoji
|
|
@ -0,0 +1,22 @@
|
|||
#[derive(Debug)]
|
||||
pub enum DiscordComponent<'a> {
|
||||
Plain(&'a str),
|
||||
Literal(char),
|
||||
Link(&'a str),
|
||||
|
||||
Bold(Vec<DiscordComponent<'a>>),
|
||||
Italic(Vec<DiscordComponent<'a>>),
|
||||
Strikethrough(Vec<DiscordComponent<'a>>),
|
||||
Underline(Vec<DiscordComponent<'a>>),
|
||||
|
||||
Code(&'a str),
|
||||
CodeBlock {
|
||||
lang: Option<&'a str>,
|
||||
source: &'a str,
|
||||
},
|
||||
|
||||
Spoiler(Vec<DiscordComponent<'a>>),
|
||||
|
||||
LineBreak,
|
||||
Quote(Vec<DiscordComponent<'a>>),
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
use super::DiscordComponent;
|
||||
|
||||
pub trait ToHtml {
|
||||
fn to_html(&self) -> String;
|
||||
}
|
||||
|
||||
impl<'a> ToHtml for DiscordComponent<'a> {
|
||||
fn to_html(&self) -> String {
|
||||
match self {
|
||||
DiscordComponent::Plain(s) => s.to_string(), // TODO: Escape
|
||||
DiscordComponent::Literal(c) => c.to_string(),
|
||||
DiscordComponent::Link(target) => format!(r#"<a href="{0}">{0}</a>"#, target),
|
||||
|
||||
DiscordComponent::Bold(children) => {
|
||||
format!("<strong>{}</strong>", children.to_html())
|
||||
}
|
||||
DiscordComponent::Italic(children) => {
|
||||
format!("<em>{}</em>", children.to_html())
|
||||
}
|
||||
DiscordComponent::Strikethrough(children) => {
|
||||
format!("<del>{}</del>", children.to_html())
|
||||
}
|
||||
DiscordComponent::Underline(children) => {
|
||||
format!("<u>{}</u>", children.to_html())
|
||||
}
|
||||
|
||||
DiscordComponent::Code(source) => format!("<code>{}</code>", source),
|
||||
DiscordComponent::CodeBlock { lang, source } => {
|
||||
let language_class = lang
|
||||
.map(|l| " class=\"language-".to_owned() + l + "\"")
|
||||
.unwrap_or_else(String::new);
|
||||
|
||||
format!("<pre><code{}>{}</code></pre>", language_class, source)
|
||||
}
|
||||
|
||||
DiscordComponent::Spoiler(children) => {
|
||||
format!("<span data-mx-spoiler>{}</span>", children.to_html())
|
||||
}
|
||||
|
||||
DiscordComponent::LineBreak => "<br>".to_string(),
|
||||
DiscordComponent::Quote(children) => {
|
||||
format!("<blockquote>{}</blockquote>", children.to_html())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ToHtml for Vec<DiscordComponent<'a>> {
|
||||
fn to_html(&self) -> String {
|
||||
self.iter().map(|c| c.to_html()).collect()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
mod ast;
|
||||
|
||||
mod convert;
|
||||
|
||||
mod parse_basic;
|
||||
mod parse_code;
|
||||
mod parse_inline_style;
|
||||
mod parse_link;
|
||||
mod parse_quotes;
|
||||
|
||||
pub use ast::DiscordComponent;
|
||||
pub use convert::ToHtml;
|
||||
pub use parse_basic::parse;
|
|
@ -0,0 +1,87 @@
|
|||
use super::{
|
||||
ast::DiscordComponent, parse_code::*, parse_inline_style::*, parse_link::*, parse_quotes::*,
|
||||
};
|
||||
|
||||
pub fn parse(text: &'_ str) -> Vec<DiscordComponent<'_>> {
|
||||
let mut tokens = Vec::new();
|
||||
let mut working_plain_start: isize = -1;
|
||||
let mut i = 0;
|
||||
while i < text.len() {
|
||||
let is_line_start = matches!(tokens.last(), None | Some(DiscordComponent::LineBreak));
|
||||
|
||||
match parse_token(&text[i..], is_line_start) {
|
||||
Some((token, consumed)) => {
|
||||
if working_plain_start >= 0 {
|
||||
let plain_start = working_plain_start as usize;
|
||||
tokens.push(DiscordComponent::Plain(&text[plain_start..i]));
|
||||
working_plain_start = -1;
|
||||
}
|
||||
|
||||
tokens.push(token);
|
||||
i += consumed;
|
||||
}
|
||||
|
||||
None => {
|
||||
if working_plain_start < 0 {
|
||||
working_plain_start = i as isize;
|
||||
}
|
||||
|
||||
let mut next_char = i + 1;
|
||||
while !text.is_char_boundary(next_char) {
|
||||
next_char += 1;
|
||||
}
|
||||
i = next_char;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if working_plain_start >= 0 {
|
||||
let plain_start = working_plain_start as usize;
|
||||
tokens.push(DiscordComponent::Plain(&text[plain_start..]));
|
||||
}
|
||||
|
||||
tokens
|
||||
}
|
||||
|
||||
pub fn parse_token(text: &'_ str, is_line_start: bool) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
parse_escaped_literal(text)
|
||||
.or_else(|| {
|
||||
if is_line_start {
|
||||
parse_quotes(text)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.or_else(|| parse_code_block(text))
|
||||
.or_else(|| parse_code(text))
|
||||
.or_else(|| parse_link(text))
|
||||
.or_else(|| parse_bold(text))
|
||||
.or_else(|| parse_italic(text))
|
||||
.or_else(|| parse_strikethrough(text))
|
||||
.or_else(|| parse_underline(text))
|
||||
.or_else(|| parse_spoiler(text))
|
||||
.or_else(|| parse_line_break(text))
|
||||
}
|
||||
|
||||
pub fn parse_escaped_literal(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
let mut chars = text.chars();
|
||||
if let Some('\\') = chars.next() {
|
||||
return match chars.next() {
|
||||
Some(x @ '\\') | Some(x @ '`') | Some(x @ '*') | Some(x @ '_') | Some(x @ '{')
|
||||
| Some(x @ '}') | Some(x @ '[') | Some(x @ ']') | Some(x @ '(') | Some(x @ ')')
|
||||
| Some(x @ '#') | Some(x @ '+') | Some(x @ '-') | Some(x @ '.') | Some(x @ '!') => {
|
||||
Some((DiscordComponent::Literal(x), 2))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn parse_line_break(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
if text.starts_with('\n') {
|
||||
return Some((DiscordComponent::LineBreak, 1));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
|
||||
use super::ast::DiscordComponent;
|
||||
|
||||
pub fn parse_code_block(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
lazy_static! {
|
||||
static ref CODE_BLOCK: Regex =
|
||||
Regex::new(r"^```(?P<lang>.+)?\n(?P<source>.*?)\n```").unwrap();
|
||||
}
|
||||
|
||||
if let Some(caps) = CODE_BLOCK.captures(text) {
|
||||
let lang = caps.name("lang").map(|m| m.as_str());
|
||||
let source = caps.name("source").unwrap().as_str();
|
||||
let whole_len = caps.get(0).unwrap().as_str().len();
|
||||
|
||||
return Some((DiscordComponent::CodeBlock { lang, source }, whole_len));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn parse_code(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
lazy_static! {
|
||||
static ref CODE: Regex = Regex::new(r"^`(?P<inner>.+?)`").unwrap();
|
||||
}
|
||||
|
||||
if let Some(caps) = CODE.captures(text) {
|
||||
let inner = caps.name("inner").unwrap().as_str();
|
||||
let whole_len = caps.get(0).unwrap().as_str().len();
|
||||
|
||||
return Some((DiscordComponent::Code(inner), whole_len));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
|
||||
use super::ast::DiscordComponent;
|
||||
use super::parse_basic::parse;
|
||||
|
||||
pub fn parse_bold(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
lazy_static! {
|
||||
static ref BOLD: Regex = Regex::new(r"^\*\*(?P<inner>.+?)\*\*").unwrap();
|
||||
}
|
||||
|
||||
if let Some(caps) = BOLD.captures(text) {
|
||||
let inner = caps.name("inner").unwrap().as_str();
|
||||
let whole_len = caps.get(0).unwrap().as_str().len();
|
||||
|
||||
return Some((DiscordComponent::Bold(parse(inner)), whole_len));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn parse_italic(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
lazy_static! {
|
||||
static ref ITALIC_UNDERSCORE: Regex = Regex::new(r"^_(?P<inner>.+?)_").unwrap();
|
||||
static ref ITALIC_ASTERISK: Regex = Regex::new(r"^\*(?P<inner>.+?)\*").unwrap();
|
||||
}
|
||||
|
||||
if let Some(caps) = ITALIC_UNDERSCORE
|
||||
.captures(text)
|
||||
.or_else(|| ITALIC_ASTERISK.captures(text))
|
||||
{
|
||||
let inner = caps.name("inner").unwrap().as_str();
|
||||
let whole_len = caps.get(0).unwrap().as_str().len();
|
||||
|
||||
return Some((DiscordComponent::Italic(parse(inner)), whole_len));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn parse_strikethrough(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
lazy_static! {
|
||||
static ref STRIKETHROUGH: Regex = Regex::new(r"^~~(?P<inner>.+?)~~").unwrap();
|
||||
}
|
||||
|
||||
if let Some(caps) = STRIKETHROUGH.captures(text) {
|
||||
let inner = caps.name("inner").unwrap().as_str();
|
||||
let whole_len = caps.get(0).unwrap().as_str().len();
|
||||
|
||||
return Some((DiscordComponent::Strikethrough(parse(inner)), whole_len));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn parse_underline(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
lazy_static! {
|
||||
static ref UNDERLINE: Regex = Regex::new(r"^__(?P<inner>.+?)__").unwrap();
|
||||
}
|
||||
|
||||
if let Some(caps) = UNDERLINE.captures(text) {
|
||||
let inner = caps.name("inner").unwrap().as_str();
|
||||
let whole_len = caps.get(0).unwrap().as_str().len();
|
||||
|
||||
return Some((DiscordComponent::Underline(parse(inner)), whole_len));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn parse_spoiler(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
lazy_static! {
|
||||
static ref SPOILER: Regex = Regex::new(r"^\|\|(?P<inner>.+?)\|\|").unwrap();
|
||||
}
|
||||
|
||||
if let Some(caps) = SPOILER.captures(text) {
|
||||
let inner = caps.name("inner").unwrap().as_str();
|
||||
let whole_len = caps.get(0).unwrap().as_str().len();
|
||||
|
||||
return Some((DiscordComponent::Spoiler(parse(inner)), whole_len));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
|
||||
use super::ast::DiscordComponent;
|
||||
|
||||
pub fn parse_link(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
lazy_static! {
|
||||
static ref LINK: Regex =
|
||||
Regex::new(r#"^((?:https?|steam)://[^\s<]+[^<.,:;"'\]\s])"#).unwrap();
|
||||
}
|
||||
|
||||
if let Some(caps) = LINK.captures(text) {
|
||||
let mut link_range = caps.get(0).unwrap().range();
|
||||
let orig_link = &text[link_range.clone()];
|
||||
|
||||
if orig_link.ends_with(')') {
|
||||
let mut bracket_balance: i16 = 0;
|
||||
for c in orig_link.chars() {
|
||||
if c == '(' {
|
||||
bracket_balance -= 1;
|
||||
} else if c == ')' {
|
||||
bracket_balance += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if bracket_balance > 0 {
|
||||
link_range = link_range.start..(link_range.end - 1);
|
||||
}
|
||||
}
|
||||
|
||||
let link = &text[link_range];
|
||||
return Some((DiscordComponent::Link(link), link.len()));
|
||||
}
|
||||
|
||||
// TODO(Charlotte): Parse links
|
||||
None
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
|
||||
use super::ast::DiscordComponent;
|
||||
use super::parse_basic::parse;
|
||||
|
||||
pub fn parse_quotes(text: &'_ str) -> Option<(DiscordComponent<'_>, usize)> {
|
||||
lazy_static! {
|
||||
static ref QUOTE: Regex = Regex::new(r"^> (?P<body>.*?)(?P<endl>\n|$)").unwrap();
|
||||
}
|
||||
static MULTIQUOTE_PREFIX: &str = ">>> ";
|
||||
|
||||
if text.starts_with(MULTIQUOTE_PREFIX) {
|
||||
return Some((
|
||||
DiscordComponent::Quote(parse(text.strip_prefix(MULTIQUOTE_PREFIX).unwrap())),
|
||||
text.len(),
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(caps) = QUOTE.captures(text) {
|
||||
let body = caps.name("body").unwrap().as_str();
|
||||
let mut endl = caps.name("endl");
|
||||
|
||||
let mut whole_len = caps.get(0).unwrap().as_str().len();
|
||||
let mut body_components = parse(body);
|
||||
|
||||
while let Some(next_line_caps) = QUOTE.captures(&text[whole_len..]) {
|
||||
dbg!(&next_line_caps);
|
||||
|
||||
let next_line_body = next_line_caps.name("body").unwrap().as_str();
|
||||
endl = next_line_caps.name("endl");
|
||||
|
||||
let mut next_body_components = parse(next_line_body);
|
||||
body_components.push(DiscordComponent::LineBreak);
|
||||
body_components.append(&mut next_body_components);
|
||||
|
||||
whole_len += next_line_caps.get(0).unwrap().as_str().len();
|
||||
}
|
||||
|
||||
let endl_len = endl.map(|m| m.as_str().len()).unwrap_or(0);
|
||||
|
||||
return Some((
|
||||
DiscordComponent::Quote(body_components),
|
||||
whole_len - endl_len,
|
||||
));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
Loading…
Reference in New Issue