diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..73390a1
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,9 @@
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = false
+insert_final_newline = true
diff --git a/Cargo.toml b/Cargo.toml
index 813f9de..ce8b73f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,9 @@ name = "chroma-syntaxis"
version = "0.1.0"
edition = "2021"
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[features]
+default = ["built-in"]
+built-in = ["tree-sitter-rust", "tree-sitter-javascript", "tree-sitter-regex", "tree-sitter-python"]
[dependencies]
html-escape = "0.2.11"
@@ -11,4 +13,8 @@ once_cell = "1.10.0"
thiserror = "1.0.30"
tree-sitter = "0.20.6"
tree-sitter-highlight = "0.20.1"
-tree-sitter-rust = "0.20.1"
+
+tree-sitter-rust = { version = "0.20.1", optional = true }
+tree-sitter-javascript = { version = "0.20.0", optional = true }
+tree-sitter-regex = { git = "https://github.com/tree-sitter/tree-sitter-regex.git", rev = "e1cfca", optional = true }
+tree-sitter-python = { version = "0.19.1", optional = true }
diff --git a/samples/fizzbuzz.js b/samples/fizzbuzz.js
new file mode 100644
index 0000000..784be54
--- /dev/null
+++ b/samples/fizzbuzz.js
@@ -0,0 +1,17 @@
+const fizzbuzz = n => {
+ if (n % 15 == 0)
+ return "Fizz buzz";
+ if (n % 3 == 0)
+ return "Fizz";
+ if (n % 5 == 0)
+ return "Buzz";
+ return n;
+}
+
+const main = () => {
+ for (let n = 1; n <= 100; n++) {
+ console.log(fizzbuzz(n));
+ }
+};
+
+main();
diff --git a/samples/fizzbuzz.js.html b/samples/fizzbuzz.js.html
new file mode 100644
index 0000000..5f9009f
--- /dev/null
+++ b/samples/fizzbuzz.js.html
@@ -0,0 +1,17 @@
+const fizzbuzz = n => {
+ if (n % 15 == 0)
+ return "Fizz buzz";
+ if (n % 3 == 0)
+ return "Fizz";
+ if (n % 5 == 0)
+ return "Buzz";
+ return n;
+}
+
+const main = () => {
+ for (let n = 1; n <= 100; n++) {
+ console.log(fizzbuzz(n));
+ }
+};
+
+main();
diff --git a/samples/fizzbuzz.py b/samples/fizzbuzz.py
new file mode 100644
index 0000000..04b47a3
--- /dev/null
+++ b/samples/fizzbuzz.py
@@ -0,0 +1,8 @@
+def main():
+ for n in range(1, 101):
+ lut = [n, "Fizz", "Buzz", "Fizz Buzz"]
+ idx = (n % 3 == 0) + 2 * (n % 5 == 0)
+ print(lut[idx])
+
+if __name__ == "__main__":
+ main()
diff --git a/samples/fizzbuzz.py.html b/samples/fizzbuzz.py.html
new file mode 100644
index 0000000..b0d089b
--- /dev/null
+++ b/samples/fizzbuzz.py.html
@@ -0,0 +1,8 @@
+def main():
+ for n in range(1, 101):
+ lut = [n, "Fizz", "Buzz", "Fizz Buzz"]
+ idx = (n % 3 == 0) + 2 * (n % 5 == 0)
+ print(lut[idx])
+
+if __name__ == "__main__":
+ main()
diff --git a/samples/fizzbuzz.rs b/samples/fizzbuzz.rs
new file mode 100644
index 0000000..7043a03
--- /dev/null
+++ b/samples/fizzbuzz.rs
@@ -0,0 +1,10 @@
+fn main() {
+ for n in 1..=100 {
+ match (n % 3, n % 5) {
+ (0, 0) => println!("Fizz buzz"),
+ (0, _) => println!("Fizz"),
+ (_, 0) => println!("Buzz"),
+ _ => println!("{}", n),
+ }
+ }
+}
diff --git a/samples/fizzbuzz.rs.html b/samples/fizzbuzz.rs.html
new file mode 100644
index 0000000..2d6d019
--- /dev/null
+++ b/samples/fizzbuzz.rs.html
@@ -0,0 +1,10 @@
+fn main() {
+ for n in 1..=100 {
+ match (n % 3, n % 5) {
+ (0, 0) => println!("Fizz buzz"),
+ (0, _) => println!("Fizz"),
+ (_, 0) => println!("Buzz"),
+ _ => println!("{}", n),
+ }
+ }
+}
diff --git a/src/languages.rs b/src/languages.rs
deleted file mode 100644
index ca8c2dd..0000000
--- a/src/languages.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-use tree_sitter_highlight::HighlightConfiguration;
-
-fn also_configure(mut config: HighlightConfiguration) -> (HighlightConfiguration, Vec) {
- let capture_names: Vec<_> = config
- .query
- .capture_names()
- .iter()
- .map(String::clone)
- .collect();
- config.configure(&capture_names);
-
- (config, capture_names)
-}
-
-pub fn get_highlight_config(lang: &str) -> Option<(HighlightConfiguration, Vec)> {
- match lang {
- "rust" => HighlightConfiguration::new(
- tree_sitter_rust::language(),
- tree_sitter_rust::HIGHLIGHT_QUERY,
- "",
- "",
- )
- .map(also_configure)
- .ok(),
- _ => None,
- }
-}
diff --git a/src/languages/built_in.rs b/src/languages/built_in.rs
new file mode 100644
index 0000000..e45e217
--- /dev/null
+++ b/src/languages/built_in.rs
@@ -0,0 +1,83 @@
+use tree_sitter::QueryError;
+use tree_sitter_highlight::HighlightConfiguration;
+
+use crate::register_language;
+
+pub static COMMON_HIGHLIGHT_NAMES: &[&str] = &[
+ "attribute",
+ "constant",
+ "function.builtin",
+ "function",
+ "keyword",
+ "operator",
+ "property",
+ "punctuation",
+ "punctuation.bracket",
+ "punctuation.delimiter",
+ "string",
+ "string.special",
+ "tag",
+ "type",
+ "type.builtin",
+ "variable",
+ "variable.builtin",
+ "variable.parameter",
+];
+
+pub fn register_builtin_languages() {
+ register_language("rust", rust_hl_factory);
+
+ register_language("javascript", javascript_hl_factory);
+ register_language("js", javascript_hl_factory);
+ register_language("jsx", jsx_hl_factory);
+
+ register_language("regex", regex_hl_factory);
+
+ register_language("python", tree_sitter_python);
+ register_language("py", tree_sitter_python);
+}
+
+pub fn rust_hl_factory() -> Result {
+ HighlightConfiguration::new(
+ tree_sitter_rust::language(),
+ tree_sitter_rust::HIGHLIGHT_QUERY,
+ "",
+ "",
+ )
+}
+
+pub fn javascript_hl_factory() -> Result {
+ HighlightConfiguration::new(
+ tree_sitter_javascript::language(),
+ tree_sitter_javascript::HIGHLIGHT_QUERY,
+ tree_sitter_javascript::INJECTION_QUERY,
+ tree_sitter_javascript::LOCALS_QUERY,
+ )
+}
+
+pub fn jsx_hl_factory() -> Result {
+ HighlightConfiguration::new(
+ tree_sitter_javascript::language(),
+ tree_sitter_javascript::JSX_HIGHLIGHT_QUERY,
+ tree_sitter_javascript::INJECTION_QUERY,
+ tree_sitter_javascript::LOCALS_QUERY,
+ )
+}
+
+pub fn regex_hl_factory() -> Result {
+ HighlightConfiguration::new(
+ tree_sitter_regex::language(),
+ tree_sitter_regex::HIGHLIGHTS_QUERY,
+ "",
+ "",
+ )
+}
+
+pub fn tree_sitter_python() -> Result {
+ HighlightConfiguration::new(
+ tree_sitter_python::language(),
+ tree_sitter_python::HIGHLIGHT_QUERY,
+ "",
+ "",
+ )
+}
diff --git a/src/languages/mod.rs b/src/languages/mod.rs
new file mode 100644
index 0000000..331b3cb
--- /dev/null
+++ b/src/languages/mod.rs
@@ -0,0 +1,32 @@
+use std::{collections::HashMap, sync::Mutex};
+
+use once_cell::sync::Lazy;
+use tree_sitter::QueryError;
+use tree_sitter_highlight::HighlightConfiguration;
+
+pub type HighlightConfigFactory = fn() -> Result;
+
+static LANGUAGES: Lazy>> =
+ Lazy::new(|| Mutex::new(HashMap::new()));
+
+#[cfg(feature = "built-in")]
+pub mod built_in;
+
+pub fn register_language(lang: impl Into, factory: HighlightConfigFactory) {
+ let mut languages = LANGUAGES.lock().unwrap();
+ languages.insert(lang.into(), factory);
+}
+
+pub fn get_highlight_config(
+ lang: &str,
+ highlight_names: &[&str],
+) -> Option {
+ let languages = LANGUAGES.lock().unwrap();
+ languages
+ .get(lang)
+ .and_then(|factory| factory().ok())
+ .map(|mut config| {
+ config.configure(highlight_names);
+ config
+ })
+}
diff --git a/src/lib.rs b/src/lib.rs
index 9163f84..d325de0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,16 +3,46 @@ use tree_sitter_highlight::{HighlightEvent, Highlighter};
mod languages;
-pub fn highlight(source: &str) -> String {
- let source = source.as_bytes();
+#[cfg(feature = "built-in")]
+pub use languages::built_in;
+pub use languages::register_language;
+
+fn get_hash_for_attrs(attrs: &[&str]) -> u64 {
+ use std::{
+ collections::hash_map::DefaultHasher,
+ hash::{Hash, Hasher},
+ };
+
+ let mut hasher = DefaultHasher::new();
+ attrs.hash(&mut hasher);
+ hasher.finish()
+}
+
+fn write_opening_tag(out: &mut String, attrs: &[&str]) {
+ out.push_str("");
+}
+
+pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String {
+ let highlight_config = match languages::get_highlight_config(lang, highlight_names) {
+ Some(conf) => conf,
+ None => return source.to_string(),
+ };
+
+ let source_bytes = source.as_bytes();
let mut highlighter = Highlighter::new();
- let (highlight_config, capture_names) =
- languages::get_highlight_config("rust").expect("Could not get Rust language config");
-
let items = highlighter
- .highlight(&highlight_config, source, None, |lang| {
- languages::get_highlight_config(lang).map(|x| /* ugh */ &*Box::leak(Box::new(x.0)))
+ .highlight(&highlight_config, source_bytes, None, |injected_lang| {
+ languages::get_highlight_config(injected_lang, highlight_names)
+ .map(|x| /* ugh */ &*Box::leak(Box::new(x)))
})
.unwrap();
@@ -21,58 +51,40 @@ pub fn highlight(source: &str) -> String {
// Collapse adjacent identical attribute sets
let mut last_attrs: u64 = 0;
- let mut span_is_open = false;
+ let mut tag_is_open = false;
for item in items {
match item.unwrap() {
HighlightEvent::Source { start, end } => {
- let source_section_bytes = &source[start..end];
- let source_section = String::from_utf8_lossy(source_section_bytes);
+ let source_section = &source[start..end];
- let attr_hash = {
- use std::{
- collections::hash_map::DefaultHasher,
- hash::{Hash, Hasher},
- };
-
- let mut hasher = DefaultHasher::new();
- highlight_attrs.hash(&mut hasher);
- hasher.finish()
- };
-
- if last_attrs != attr_hash {
- if span_is_open {
- out.push_str("");
- span_is_open = false;
- }
-
- if !highlight_attrs.is_empty() {
- out.push_str("");
- span_is_open = true;
- last_attrs = attr_hash;
- }
+ let attr_hash = get_hash_for_attrs(&highlight_attrs);
+ if last_attrs != attr_hash && tag_is_open {
+ out.push_str("");
+ tag_is_open = false;
}
- out.push_str(&encode_text(&source_section));
+ if !highlight_attrs.is_empty() && (!tag_is_open || last_attrs != attr_hash) {
+ write_opening_tag(&mut out, &highlight_attrs);
+ tag_is_open = true;
+ last_attrs = attr_hash;
+ }
+
+ out.push_str(&encode_text(source_section));
}
+
HighlightEvent::HighlightStart(highlight) => {
- let capture_name = &capture_names[highlight.0];
+ let capture_name = &highlight_names[highlight.0];
highlight_attrs.push(capture_name);
}
+
HighlightEvent::HighlightEnd => {
highlight_attrs.pop();
}
}
}
- if span_is_open {
+ if tag_is_open {
out.push_str("");
}
@@ -80,18 +92,46 @@ pub fn highlight(source: &str) -> String {
}
#[cfg(test)]
+#[cfg(feature = "built-in")]
mod tests {
- use crate::highlight;
+ use crate::*;
#[test]
fn highlight_rust() {
- println!(
- "{}",
- highlight(
- r#"fn main() {
- println!("Hello, world!");
-}"#
- )
- )
+ built_in::register_builtin_languages();
+
+ let source = include_str!("../samples/fizzbuzz.rs");
+ let expected_result = include_str!("../samples/fizzbuzz.rs.html");
+
+ assert_eq!(
+ highlight("rust", source, built_in::COMMON_HIGHLIGHT_NAMES),
+ expected_result
+ );
+ }
+
+ #[test]
+ fn highlight_js() {
+ built_in::register_builtin_languages();
+
+ let source = include_str!("../samples/fizzbuzz.js");
+ let expected_result = include_str!("../samples/fizzbuzz.js.html");
+
+ assert_eq!(
+ highlight("js", source, built_in::COMMON_HIGHLIGHT_NAMES),
+ expected_result
+ );
+ }
+
+ #[test]
+ fn highlight_python() {
+ built_in::register_builtin_languages();
+
+ let source = include_str!("../samples/fizzbuzz.py");
+ let expected_result = include_str!("../samples/fizzbuzz.py.html");
+
+ assert_eq!(
+ highlight("python", source, built_in::COMMON_HIGHLIGHT_NAMES),
+ expected_result
+ );
}
}