From 57a7c79ca17ddaa02c85b391fc9c09cba4b8154d Mon Sep 17 00:00:00 2001 From: videogame hacker Date: Thu, 28 Apr 2022 13:45:47 +0100 Subject: [PATCH] Initial commit: Rust support only --- .gitignore | 2 + Cargo.toml | 14 +++++++ src/languages.rs | 27 ++++++++++++++ src/lib.rs | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 140 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/languages.rs create mode 100644 src/lib.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..813f9de --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "chroma-syntaxis" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +html-escape = "0.2.11" +once_cell = "1.10.0" +thiserror = "1.0.30" +tree-sitter = "0.20.6" +tree-sitter-highlight = "0.20.1" +tree-sitter-rust = "0.20.1" diff --git a/src/languages.rs b/src/languages.rs new file mode 100644 index 0000000..ca8c2dd --- /dev/null +++ b/src/languages.rs @@ -0,0 +1,27 @@ +use tree_sitter_highlight::HighlightConfiguration; + +fn also_configure(mut config: HighlightConfiguration) -> (HighlightConfiguration, Vec) { + let capture_names: Vec<_> = config + .query + .capture_names() + .iter() + .map(String::clone) + .collect(); + config.configure(&capture_names); + + (config, capture_names) +} + +pub fn get_highlight_config(lang: &str) -> Option<(HighlightConfiguration, Vec)> { + match lang { + "rust" => HighlightConfiguration::new( + tree_sitter_rust::language(), + tree_sitter_rust::HIGHLIGHT_QUERY, + "", + "", + ) + .map(also_configure) + .ok(), + _ => None, + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..9163f84 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,97 @@ +use html_escape::encode_text; +use tree_sitter_highlight::{HighlightEvent, Highlighter}; + +mod languages; + +pub fn highlight(source: &str) -> String { + let source = source.as_bytes(); + + let mut highlighter = Highlighter::new(); + let (highlight_config, capture_names) = + languages::get_highlight_config("rust").expect("Could not get Rust language config"); + + let items = highlighter + .highlight(&highlight_config, source, None, |lang| { + languages::get_highlight_config(lang).map(|x| /* ugh */ &*Box::leak(Box::new(x.0))) + }) + .unwrap(); + + let mut highlight_attrs: Vec<&str> = Vec::new(); + let mut out = String::new(); + + // Collapse adjacent identical attribute sets + let mut last_attrs: u64 = 0; + let mut span_is_open = false; + + for item in items { + match item.unwrap() { + HighlightEvent::Source { start, end } => { + let source_section_bytes = &source[start..end]; + let source_section = String::from_utf8_lossy(source_section_bytes); + + let attr_hash = { + use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + }; + + let mut hasher = DefaultHasher::new(); + highlight_attrs.hash(&mut hasher); + hasher.finish() + }; + + if last_attrs != attr_hash { + if span_is_open { + out.push_str(""); + span_is_open = false; + } + + if !highlight_attrs.is_empty() { + out.push_str(""); + span_is_open = true; + last_attrs = attr_hash; + } + } + + out.push_str(&encode_text(&source_section)); + } + HighlightEvent::HighlightStart(highlight) => { + let capture_name = &capture_names[highlight.0]; + highlight_attrs.push(capture_name); + } + HighlightEvent::HighlightEnd => { + highlight_attrs.pop(); + } + } + } + + if span_is_open { + out.push_str(""); + } + + out +} + +#[cfg(test)] +mod tests { + use crate::highlight; + + #[test] + fn highlight_rust() { + println!( + "{}", + highlight( + r#"fn main() { + println!("Hello, world!"); +}"# + ) + ) + } +}