From 2cc38dba2c6739ce1c61a7f065aea54c6413e712 Mon Sep 17 00:00:00 2001 From: videogame hacker Date: Thu, 28 Apr 2022 17:00:06 +0100 Subject: [PATCH] Refactor: Add built-in support for Rust, JS, Python --- .editorconfig | 9 +++ Cargo.toml | 10 ++- samples/fizzbuzz.js | 17 +++++ samples/fizzbuzz.js.html | 17 +++++ samples/fizzbuzz.py | 8 +++ samples/fizzbuzz.py.html | 8 +++ samples/fizzbuzz.rs | 10 +++ samples/fizzbuzz.rs.html | 10 +++ src/languages.rs | 27 -------- src/languages/built_in.rs | 83 ++++++++++++++++++++++ src/languages/mod.rs | 32 +++++++++ src/lib.rs | 142 ++++++++++++++++++++++++-------------- 12 files changed, 293 insertions(+), 80 deletions(-) create mode 100644 .editorconfig create mode 100644 samples/fizzbuzz.js create mode 100644 samples/fizzbuzz.js.html create mode 100644 samples/fizzbuzz.py create mode 100644 samples/fizzbuzz.py.html create mode 100644 samples/fizzbuzz.rs create mode 100644 samples/fizzbuzz.rs.html delete mode 100644 src/languages.rs create mode 100644 src/languages/built_in.rs create mode 100644 src/languages/mod.rs diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..73390a1 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,9 @@ +root = true + +[*] +indent_style = space +indent_size = 4 +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = false +insert_final_newline = true diff --git a/Cargo.toml b/Cargo.toml index 813f9de..ce8b73f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,9 @@ name = "chroma-syntaxis" version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +default = ["built-in"] +built-in = ["tree-sitter-rust", "tree-sitter-javascript", "tree-sitter-regex", "tree-sitter-python"] [dependencies] html-escape = "0.2.11" @@ -11,4 +13,8 @@ once_cell = "1.10.0" thiserror = "1.0.30" tree-sitter = "0.20.6" tree-sitter-highlight = "0.20.1" -tree-sitter-rust = "0.20.1" + +tree-sitter-rust = { version = "0.20.1", optional = true } +tree-sitter-javascript = { version = "0.20.0", optional = true } +tree-sitter-regex = { git = "https://github.com/tree-sitter/tree-sitter-regex.git", rev = "e1cfca", optional = true } +tree-sitter-python = { version = "0.19.1", optional = true } diff --git a/samples/fizzbuzz.js b/samples/fizzbuzz.js new file mode 100644 index 0000000..784be54 --- /dev/null +++ b/samples/fizzbuzz.js @@ -0,0 +1,17 @@ +const fizzbuzz = n => { + if (n % 15 == 0) + return "Fizz buzz"; + if (n % 3 == 0) + return "Fizz"; + if (n % 5 == 0) + return "Buzz"; + return n; +} + +const main = () => { + for (let n = 1; n <= 100; n++) { + console.log(fizzbuzz(n)); + } +}; + +main(); diff --git a/samples/fizzbuzz.js.html b/samples/fizzbuzz.js.html new file mode 100644 index 0000000..5f9009f --- /dev/null +++ b/samples/fizzbuzz.js.html @@ -0,0 +1,17 @@ +const fizzbuzz = n => { + if (n % 15 == 0) + return "Fizz buzz"; + if (n % 3 == 0) + return "Fizz"; + if (n % 5 == 0) + return "Buzz"; + return n; +} + +const main = () => { + for (let n = 1; n <= 100; n++) { + console.log(fizzbuzz(n)); + } +}; + +main(); diff --git a/samples/fizzbuzz.py b/samples/fizzbuzz.py new file mode 100644 index 0000000..04b47a3 --- /dev/null +++ b/samples/fizzbuzz.py @@ -0,0 +1,8 @@ +def main(): + for n in range(1, 101): + lut = [n, "Fizz", "Buzz", "Fizz Buzz"] + idx = (n % 3 == 0) + 2 * (n % 5 == 0) + print(lut[idx]) + +if __name__ == "__main__": + main() diff --git a/samples/fizzbuzz.py.html b/samples/fizzbuzz.py.html new file mode 100644 index 0000000..b0d089b --- /dev/null +++ b/samples/fizzbuzz.py.html @@ -0,0 +1,8 @@ +def main(): + for n in range(1, 101): + lut = [n, "Fizz", "Buzz", "Fizz Buzz"] + idx = (n % 3 == 0) + 2 * (n % 5 == 0) + print(lut[idx]) + +if __name__ == "__main__": + main() diff --git a/samples/fizzbuzz.rs b/samples/fizzbuzz.rs new file mode 100644 index 0000000..7043a03 --- /dev/null +++ b/samples/fizzbuzz.rs @@ -0,0 +1,10 @@ +fn main() { + for n in 1..=100 { + match (n % 3, n % 5) { + (0, 0) => println!("Fizz buzz"), + (0, _) => println!("Fizz"), + (_, 0) => println!("Buzz"), + _ => println!("{}", n), + } + } +} diff --git a/samples/fizzbuzz.rs.html b/samples/fizzbuzz.rs.html new file mode 100644 index 0000000..2d6d019 --- /dev/null +++ b/samples/fizzbuzz.rs.html @@ -0,0 +1,10 @@ +fn main() { + for n in 1..=100 { + match (n % 3, n % 5) { + (0, 0) => println!("Fizz buzz"), + (0, _) => println!("Fizz"), + (_, 0) => println!("Buzz"), + _ => println!("{}", n), + } + } +} diff --git a/src/languages.rs b/src/languages.rs deleted file mode 100644 index ca8c2dd..0000000 --- a/src/languages.rs +++ /dev/null @@ -1,27 +0,0 @@ -use tree_sitter_highlight::HighlightConfiguration; - -fn also_configure(mut config: HighlightConfiguration) -> (HighlightConfiguration, Vec) { - let capture_names: Vec<_> = config - .query - .capture_names() - .iter() - .map(String::clone) - .collect(); - config.configure(&capture_names); - - (config, capture_names) -} - -pub fn get_highlight_config(lang: &str) -> Option<(HighlightConfiguration, Vec)> { - match lang { - "rust" => HighlightConfiguration::new( - tree_sitter_rust::language(), - tree_sitter_rust::HIGHLIGHT_QUERY, - "", - "", - ) - .map(also_configure) - .ok(), - _ => None, - } -} diff --git a/src/languages/built_in.rs b/src/languages/built_in.rs new file mode 100644 index 0000000..e45e217 --- /dev/null +++ b/src/languages/built_in.rs @@ -0,0 +1,83 @@ +use tree_sitter::QueryError; +use tree_sitter_highlight::HighlightConfiguration; + +use crate::register_language; + +pub static COMMON_HIGHLIGHT_NAMES: &[&str] = &[ + "attribute", + "constant", + "function.builtin", + "function", + "keyword", + "operator", + "property", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "string", + "string.special", + "tag", + "type", + "type.builtin", + "variable", + "variable.builtin", + "variable.parameter", +]; + +pub fn register_builtin_languages() { + register_language("rust", rust_hl_factory); + + register_language("javascript", javascript_hl_factory); + register_language("js", javascript_hl_factory); + register_language("jsx", jsx_hl_factory); + + register_language("regex", regex_hl_factory); + + register_language("python", tree_sitter_python); + register_language("py", tree_sitter_python); +} + +pub fn rust_hl_factory() -> Result { + HighlightConfiguration::new( + tree_sitter_rust::language(), + tree_sitter_rust::HIGHLIGHT_QUERY, + "", + "", + ) +} + +pub fn javascript_hl_factory() -> Result { + HighlightConfiguration::new( + tree_sitter_javascript::language(), + tree_sitter_javascript::HIGHLIGHT_QUERY, + tree_sitter_javascript::INJECTION_QUERY, + tree_sitter_javascript::LOCALS_QUERY, + ) +} + +pub fn jsx_hl_factory() -> Result { + HighlightConfiguration::new( + tree_sitter_javascript::language(), + tree_sitter_javascript::JSX_HIGHLIGHT_QUERY, + tree_sitter_javascript::INJECTION_QUERY, + tree_sitter_javascript::LOCALS_QUERY, + ) +} + +pub fn regex_hl_factory() -> Result { + HighlightConfiguration::new( + tree_sitter_regex::language(), + tree_sitter_regex::HIGHLIGHTS_QUERY, + "", + "", + ) +} + +pub fn tree_sitter_python() -> Result { + HighlightConfiguration::new( + tree_sitter_python::language(), + tree_sitter_python::HIGHLIGHT_QUERY, + "", + "", + ) +} diff --git a/src/languages/mod.rs b/src/languages/mod.rs new file mode 100644 index 0000000..331b3cb --- /dev/null +++ b/src/languages/mod.rs @@ -0,0 +1,32 @@ +use std::{collections::HashMap, sync::Mutex}; + +use once_cell::sync::Lazy; +use tree_sitter::QueryError; +use tree_sitter_highlight::HighlightConfiguration; + +pub type HighlightConfigFactory = fn() -> Result; + +static LANGUAGES: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::new())); + +#[cfg(feature = "built-in")] +pub mod built_in; + +pub fn register_language(lang: impl Into, factory: HighlightConfigFactory) { + let mut languages = LANGUAGES.lock().unwrap(); + languages.insert(lang.into(), factory); +} + +pub fn get_highlight_config( + lang: &str, + highlight_names: &[&str], +) -> Option { + let languages = LANGUAGES.lock().unwrap(); + languages + .get(lang) + .and_then(|factory| factory().ok()) + .map(|mut config| { + config.configure(highlight_names); + config + }) +} diff --git a/src/lib.rs b/src/lib.rs index 9163f84..d325de0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,16 +3,46 @@ use tree_sitter_highlight::{HighlightEvent, Highlighter}; mod languages; -pub fn highlight(source: &str) -> String { - let source = source.as_bytes(); +#[cfg(feature = "built-in")] +pub use languages::built_in; +pub use languages::register_language; + +fn get_hash_for_attrs(attrs: &[&str]) -> u64 { + use std::{ + collections::hash_map::DefaultHasher, + hash::{Hash, Hasher}, + }; + + let mut hasher = DefaultHasher::new(); + attrs.hash(&mut hasher); + hasher.finish() +} + +fn write_opening_tag(out: &mut String, attrs: &[&str]) { + out.push_str(""); +} + +pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String { + let highlight_config = match languages::get_highlight_config(lang, highlight_names) { + Some(conf) => conf, + None => return source.to_string(), + }; + + let source_bytes = source.as_bytes(); let mut highlighter = Highlighter::new(); - let (highlight_config, capture_names) = - languages::get_highlight_config("rust").expect("Could not get Rust language config"); - let items = highlighter - .highlight(&highlight_config, source, None, |lang| { - languages::get_highlight_config(lang).map(|x| /* ugh */ &*Box::leak(Box::new(x.0))) + .highlight(&highlight_config, source_bytes, None, |injected_lang| { + languages::get_highlight_config(injected_lang, highlight_names) + .map(|x| /* ugh */ &*Box::leak(Box::new(x))) }) .unwrap(); @@ -21,58 +51,40 @@ pub fn highlight(source: &str) -> String { // Collapse adjacent identical attribute sets let mut last_attrs: u64 = 0; - let mut span_is_open = false; + let mut tag_is_open = false; for item in items { match item.unwrap() { HighlightEvent::Source { start, end } => { - let source_section_bytes = &source[start..end]; - let source_section = String::from_utf8_lossy(source_section_bytes); + let source_section = &source[start..end]; - let attr_hash = { - use std::{ - collections::hash_map::DefaultHasher, - hash::{Hash, Hasher}, - }; - - let mut hasher = DefaultHasher::new(); - highlight_attrs.hash(&mut hasher); - hasher.finish() - }; - - if last_attrs != attr_hash { - if span_is_open { - out.push_str(""); - span_is_open = false; - } - - if !highlight_attrs.is_empty() { - out.push_str(""); - span_is_open = true; - last_attrs = attr_hash; - } + let attr_hash = get_hash_for_attrs(&highlight_attrs); + if last_attrs != attr_hash && tag_is_open { + out.push_str(""); + tag_is_open = false; } - out.push_str(&encode_text(&source_section)); + if !highlight_attrs.is_empty() && (!tag_is_open || last_attrs != attr_hash) { + write_opening_tag(&mut out, &highlight_attrs); + tag_is_open = true; + last_attrs = attr_hash; + } + + out.push_str(&encode_text(source_section)); } + HighlightEvent::HighlightStart(highlight) => { - let capture_name = &capture_names[highlight.0]; + let capture_name = &highlight_names[highlight.0]; highlight_attrs.push(capture_name); } + HighlightEvent::HighlightEnd => { highlight_attrs.pop(); } } } - if span_is_open { + if tag_is_open { out.push_str(""); } @@ -80,18 +92,46 @@ pub fn highlight(source: &str) -> String { } #[cfg(test)] +#[cfg(feature = "built-in")] mod tests { - use crate::highlight; + use crate::*; #[test] fn highlight_rust() { - println!( - "{}", - highlight( - r#"fn main() { - println!("Hello, world!"); -}"# - ) - ) + built_in::register_builtin_languages(); + + let source = include_str!("../samples/fizzbuzz.rs"); + let expected_result = include_str!("../samples/fizzbuzz.rs.html"); + + assert_eq!( + highlight("rust", source, built_in::COMMON_HIGHLIGHT_NAMES), + expected_result + ); + } + + #[test] + fn highlight_js() { + built_in::register_builtin_languages(); + + let source = include_str!("../samples/fizzbuzz.js"); + let expected_result = include_str!("../samples/fizzbuzz.js.html"); + + assert_eq!( + highlight("js", source, built_in::COMMON_HIGHLIGHT_NAMES), + expected_result + ); + } + + #[test] + fn highlight_python() { + built_in::register_builtin_languages(); + + let source = include_str!("../samples/fizzbuzz.py"); + let expected_result = include_str!("../samples/fizzbuzz.py.html"); + + assert_eq!( + highlight("python", source, built_in::COMMON_HIGHLIGHT_NAMES), + expected_result + ); } }