From 2b62e2e81027d25f6f0a1a8ede21d4b555c6412c Mon Sep 17 00:00:00 2001 From: videogame hacker Date: Thu, 26 May 2022 20:07:23 +0200 Subject: [PATCH] Create a struct to hold highlight configurations instead of leaking memory --- Cargo.toml | 22 ++-- samples/fizzbuzz.js.html | 10 +- samples/fizzbuzz.py.html | 4 +- samples/fizzbuzz.rs.html | 12 +- src/{languages/built_in.rs => languages.rs} | 37 +++--- src/languages/mod.rs | 32 ------ src/lib.rs | 119 +++++++++++++------- 7 files changed, 120 insertions(+), 116 deletions(-) rename src/{languages/built_in.rs => languages.rs} (82%) delete mode 100644 src/languages/mod.rs diff --git a/Cargo.toml b/Cargo.toml index bcb561d..1fe096a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,14 +3,6 @@ name = "chroma-syntaxis" version = "0.1.0" edition = "2021" -[features] -default = ["built-in"] -built-in = [ - "tree-sitter-rust", "tree-sitter-javascript", "tree-sitter-regex", - "tree-sitter-python", "tree-sitter-c", "tree-sitter-cpp", - "tree-sitter-typescript", -] - [dependencies] html-escape = "0.2.11" once_cell = "1.10.0" @@ -18,10 +10,10 @@ thiserror = "1.0.30" tree-sitter = "0.20.6" tree-sitter-highlight = "0.20.1" -tree-sitter-rust = { version = "0.20.1", optional = true } -tree-sitter-javascript = { version = "0.20.0", optional = true } -tree-sitter-regex = { git = "https://github.com/tree-sitter/tree-sitter-regex.git", rev = "e1cfca", optional = true } -tree-sitter-python = { version = "0.19.1", optional = true } -tree-sitter-c = { version = "0.20.1", optional = true } -tree-sitter-cpp = { version = "0.20.0", optional = true } -tree-sitter-typescript = { version = "0.20.1", optional = true } +tree-sitter-rust = { version = "0.20.1" } +tree-sitter-javascript = { version = "0.20.0" } +tree-sitter-regex = { git = "https://github.com/tree-sitter/tree-sitter-regex.git", rev = "e1cfca" } +tree-sitter-python = { version = "0.19.1" } +tree-sitter-c = { version = "0.20.1" } +tree-sitter-cpp = { version = "0.20.0" } +tree-sitter-typescript = { version = "0.20.1" } diff --git a/samples/fizzbuzz.js.html b/samples/fizzbuzz.js.html index 5f9009f..7bfdbe1 100644 --- a/samples/fizzbuzz.js.html +++ b/samples/fizzbuzz.js.html @@ -1,16 +1,16 @@ const fizzbuzz = n => { - if (n % 15 == 0) + if (n % 15 == 0) return "Fizz buzz"; - if (n % 3 == 0) + if (n % 3 == 0) return "Fizz"; - if (n % 5 == 0) + if (n % 5 == 0) return "Buzz"; return n; } const main = () => { - for (let n = 1; n <= 100; n++) { - console.log(fizzbuzz(n)); + for (let n = 1; n <= 100; n++) { + console.log(fizzbuzz(n)); } }; diff --git a/samples/fizzbuzz.py.html b/samples/fizzbuzz.py.html index b0d089b..2ea2634 100644 --- a/samples/fizzbuzz.py.html +++ b/samples/fizzbuzz.py.html @@ -1,7 +1,7 @@ def main(): - for n in range(1, 101): + for n in range(1, 101): lut = [n, "Fizz", "Buzz", "Fizz Buzz"] - idx = (n % 3 == 0) + 2 * (n % 5 == 0) + idx = (n % 3 == 0) + 2 * (n % 5 == 0) print(lut[idx]) if __name__ == "__main__": diff --git a/samples/fizzbuzz.rs.html b/samples/fizzbuzz.rs.html index 2d6d019..072a1c0 100644 --- a/samples/fizzbuzz.rs.html +++ b/samples/fizzbuzz.rs.html @@ -1,10 +1,10 @@ fn main() { - for n in 1..=100 { - match (n % 3, n % 5) { - (0, 0) => println!("Fizz buzz"), - (0, _) => println!("Fizz"), - (_, 0) => println!("Buzz"), - _ => println!("{}", n), + for n in 1..=100 { + match (n % 3, n % 5) { + (0, 0) => println!("Fizz buzz"), + (0, _) => println!("Fizz"), + (_, 0) => println!("Buzz"), + _ => println!("{}", n), } } } diff --git a/src/languages/built_in.rs b/src/languages.rs similarity index 82% rename from src/languages/built_in.rs rename to src/languages.rs index 4f111d7..768131b 100644 --- a/src/languages/built_in.rs +++ b/src/languages.rs @@ -1,7 +1,7 @@ use tree_sitter::QueryError; use tree_sitter_highlight::HighlightConfiguration; -use crate::register_language; +use crate::SyntaxHighlighter; pub static COMMON_HIGHLIGHT_NAMES: &[&str] = &[ "attribute", @@ -40,26 +40,33 @@ pub static COMMON_HIGHLIGHT_NAMES: &[&str] = &[ "variable.parameter", ]; -pub fn register_builtin_languages() { - register_language("rust", rust_hl_factory); +pub fn register_builtin_languages(highlighter: &mut SyntaxHighlighter) { + let mut add = |lang: &str, factory: fn() -> Result| { + if let Ok(mut config) = factory() { + config.configure(highlighter.highlight_names); + highlighter.register(lang.to_string(), config); + } + }; - register_language("javascript", javascript_hl_factory); - register_language("js", javascript_hl_factory); - register_language("jsx", jsx_hl_factory); + add("rust", rust_hl_factory); - register_language("regex", regex_hl_factory); + add("javascript", javascript_hl_factory); + add("js", javascript_hl_factory); + add("jsx", jsx_hl_factory); - register_language("python", python_hl_factory); - register_language("py", python_hl_factory); + add("regex", regex_hl_factory); - register_language("c", c_hl_factory); + add("python", python_hl_factory); + add("py", python_hl_factory); - register_language("cpp", cpp_hl_factory); - register_language("c++", cpp_hl_factory); + add("c", c_hl_factory); - register_language("typescript", typescript_hl_factory); - register_language("ts", typescript_hl_factory); - register_language("tsx", tsx_hl_factory); + add("cpp", cpp_hl_factory); + add("c++", cpp_hl_factory); + + add("typescript", typescript_hl_factory); + add("ts", typescript_hl_factory); + add("tsx", tsx_hl_factory); } pub fn rust_hl_factory() -> Result { diff --git a/src/languages/mod.rs b/src/languages/mod.rs deleted file mode 100644 index 331b3cb..0000000 --- a/src/languages/mod.rs +++ /dev/null @@ -1,32 +0,0 @@ -use std::{collections::HashMap, sync::Mutex}; - -use once_cell::sync::Lazy; -use tree_sitter::QueryError; -use tree_sitter_highlight::HighlightConfiguration; - -pub type HighlightConfigFactory = fn() -> Result; - -static LANGUAGES: Lazy>> = - Lazy::new(|| Mutex::new(HashMap::new())); - -#[cfg(feature = "built-in")] -pub mod built_in; - -pub fn register_language(lang: impl Into, factory: HighlightConfigFactory) { - let mut languages = LANGUAGES.lock().unwrap(); - languages.insert(lang.into(), factory); -} - -pub fn get_highlight_config( - lang: &str, - highlight_names: &[&str], -) -> Option { - let languages = LANGUAGES.lock().unwrap(); - languages - .get(lang) - .and_then(|factory| factory().ok()) - .map(|mut config| { - config.configure(highlight_names); - config - }) -} diff --git a/src/lib.rs b/src/lib.rs index d325de0..4f7a29f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,9 @@ +use std::collections::HashMap; + use html_escape::encode_text; -use tree_sitter_highlight::{HighlightEvent, Highlighter}; +use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter}; -mod languages; - -#[cfg(feature = "built-in")] -pub use languages::built_in; -pub use languages::register_language; +pub mod languages; fn get_hash_for_attrs(attrs: &[&str]) -> u64 { use std::{ @@ -30,22 +28,11 @@ fn write_opening_tag(out: &mut String, attrs: &[&str]) { out.push_str("\">"); } -pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String { - let highlight_config = match languages::get_highlight_config(lang, highlight_names) { - Some(conf) => conf, - None => return source.to_string(), - }; - - let source_bytes = source.as_bytes(); - - let mut highlighter = Highlighter::new(); - let items = highlighter - .highlight(&highlight_config, source_bytes, None, |injected_lang| { - languages::get_highlight_config(injected_lang, highlight_names) - .map(|x| /* ugh */ &*Box::leak(Box::new(x))) - }) - .unwrap(); - +fn build_highlighted_html<'a>( + source: &'a str, + events: impl Iterator + 'a, + highlight_names: &[&str], +) -> String { let mut highlight_attrs: Vec<&str> = Vec::new(); let mut out = String::new(); @@ -53,8 +40,8 @@ pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String { let mut last_attrs: u64 = 0; let mut tag_is_open = false; - for item in items { - match item.unwrap() { + for event in events { + match event { HighlightEvent::Source { start, end } => { let source_section = &source[start..end]; @@ -74,7 +61,7 @@ pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String { } HighlightEvent::HighlightStart(highlight) => { - let capture_name = &highlight_names[highlight.0]; + let capture_name = highlight_names[highlight.0]; highlight_attrs.push(capture_name); } @@ -91,47 +78,97 @@ pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String { out } +pub struct SyntaxHighlighter<'a> { + languages: HashMap, + highlight_names: &'a [&'a str], +} + +impl SyntaxHighlighter<'_> { + pub fn new() -> Self { + let mut highlighter = Self { + languages: HashMap::new(), + highlight_names: languages::COMMON_HIGHLIGHT_NAMES, + }; + + languages::register_builtin_languages(&mut highlighter); + + highlighter + } +} + +impl Default for SyntaxHighlighter<'_> { + fn default() -> Self { + Self::new() + } +} + +impl<'a> SyntaxHighlighter<'a> { + pub fn new_empty(highlight_names: &'a [&'a str]) -> Self { + Self { + languages: HashMap::new(), + highlight_names, + } + } + + pub fn register(&mut self, lang: String, config: HighlightConfiguration) { + self.languages.insert(lang, config); + } + + pub fn highlight(&self, lang: &str, source: &str) -> String { + let highlight_config = match self.languages.get(lang) { + Some(config) => config, + None => return source.to_string(), + }; + + let source_bytes = source.as_bytes(); + let mut highlighter = Highlighter::new(); + + let highlight_result = + highlighter.highlight(highlight_config, source_bytes, None, |injected_lang| { + self.languages.get(injected_lang) + }); + + let events = match highlight_result { + Ok(events) => events, + Err(_) => return source.to_string(), + } + .filter_map(|e| e.ok()); + + build_highlighted_html(source, events, self.highlight_names) + } +} + #[cfg(test)] -#[cfg(feature = "built-in")] mod tests { use crate::*; #[test] fn highlight_rust() { - built_in::register_builtin_languages(); + let highlighter = SyntaxHighlighter::new(); let source = include_str!("../samples/fizzbuzz.rs"); let expected_result = include_str!("../samples/fizzbuzz.rs.html"); - assert_eq!( - highlight("rust", source, built_in::COMMON_HIGHLIGHT_NAMES), - expected_result - ); + assert_eq!(highlighter.highlight("rust", source), expected_result); } #[test] fn highlight_js() { - built_in::register_builtin_languages(); + let highlighter = SyntaxHighlighter::new(); let source = include_str!("../samples/fizzbuzz.js"); let expected_result = include_str!("../samples/fizzbuzz.js.html"); - assert_eq!( - highlight("js", source, built_in::COMMON_HIGHLIGHT_NAMES), - expected_result - ); + assert_eq!(highlighter.highlight("js", source), expected_result); } #[test] fn highlight_python() { - built_in::register_builtin_languages(); + let highlighter = SyntaxHighlighter::new(); let source = include_str!("../samples/fizzbuzz.py"); let expected_result = include_str!("../samples/fizzbuzz.py.html"); - assert_eq!( - highlight("python", source, built_in::COMMON_HIGHLIGHT_NAMES), - expected_result - ); + assert_eq!(highlighter.highlight("python", source), expected_result); } }