Refactor: Add built-in support for Rust, JS, Python
This commit is contained in:
parent
57a7c79ca1
commit
2cc38dba2c
12 changed files with 293 additions and 80 deletions
9
.editorconfig
Normal file
9
.editorconfig
Normal file
|
@ -0,0 +1,9 @@
|
|||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = false
|
||||
insert_final_newline = true
|
10
Cargo.toml
10
Cargo.toml
|
@ -3,7 +3,9 @@ name = "chroma-syntaxis"
|
|||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
[features]
|
||||
default = ["built-in"]
|
||||
built-in = ["tree-sitter-rust", "tree-sitter-javascript", "tree-sitter-regex", "tree-sitter-python"]
|
||||
|
||||
[dependencies]
|
||||
html-escape = "0.2.11"
|
||||
|
@ -11,4 +13,8 @@ once_cell = "1.10.0"
|
|||
thiserror = "1.0.30"
|
||||
tree-sitter = "0.20.6"
|
||||
tree-sitter-highlight = "0.20.1"
|
||||
tree-sitter-rust = "0.20.1"
|
||||
|
||||
tree-sitter-rust = { version = "0.20.1", optional = true }
|
||||
tree-sitter-javascript = { version = "0.20.0", optional = true }
|
||||
tree-sitter-regex = { git = "https://github.com/tree-sitter/tree-sitter-regex.git", rev = "e1cfca", optional = true }
|
||||
tree-sitter-python = { version = "0.19.1", optional = true }
|
||||
|
|
17
samples/fizzbuzz.js
Normal file
17
samples/fizzbuzz.js
Normal file
|
@ -0,0 +1,17 @@
|
|||
const fizzbuzz = n => {
|
||||
if (n % 15 == 0)
|
||||
return "Fizz buzz";
|
||||
if (n % 3 == 0)
|
||||
return "Fizz";
|
||||
if (n % 5 == 0)
|
||||
return "Buzz";
|
||||
return n;
|
||||
}
|
||||
|
||||
const main = () => {
|
||||
for (let n = 1; n <= 100; n++) {
|
||||
console.log(fizzbuzz(n));
|
||||
}
|
||||
};
|
||||
|
||||
main();
|
17
samples/fizzbuzz.js.html
Normal file
17
samples/fizzbuzz.js.html
Normal file
|
@ -0,0 +1,17 @@
|
|||
<span class="keyword">const</span> <span class="function">fizzbuzz</span> <span class="operator">=</span> <span class="variable">n</span> <span class="operator">=></span> <span class="punctuation bracket">{</span>
|
||||
<span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> 15 <span class="operator">==</span> 0<span class="punctuation bracket">)</span>
|
||||
<span class="keyword">return</span> <span class="string">"Fizz buzz"</span><span class="punctuation delimiter">;</span>
|
||||
<span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> 3 <span class="operator">==</span> 0<span class="punctuation bracket">)</span>
|
||||
<span class="keyword">return</span> <span class="string">"Fizz"</span><span class="punctuation delimiter">;</span>
|
||||
<span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> 5 <span class="operator">==</span> 0<span class="punctuation bracket">)</span>
|
||||
<span class="keyword">return</span> <span class="string">"Buzz"</span><span class="punctuation delimiter">;</span>
|
||||
<span class="keyword">return</span> <span class="variable">n</span><span class="punctuation delimiter">;</span>
|
||||
<span class="punctuation bracket">}</span>
|
||||
|
||||
<span class="keyword">const</span> <span class="function">main</span> <span class="operator">=</span> <span class="punctuation bracket">()</span> <span class="operator">=></span> <span class="punctuation bracket">{</span>
|
||||
<span class="keyword">for</span> <span class="punctuation bracket">(</span><span class="keyword">let</span> <span class="variable">n</span> <span class="operator">=</span> 1<span class="punctuation delimiter">;</span> <span class="variable">n</span> <span class="operator"><=</span> 100<span class="punctuation delimiter">;</span> <span class="variable">n</span><span class="operator">++</span><span class="punctuation bracket">)</span> <span class="punctuation bracket">{</span>
|
||||
<span class="variable builtin">console</span><span class="punctuation delimiter">.</span><span class="function">log</span><span class="punctuation bracket">(</span><span class="function">fizzbuzz</span><span class="punctuation bracket">(</span><span class="variable">n</span><span class="punctuation bracket">))</span><span class="punctuation delimiter">;</span>
|
||||
<span class="punctuation bracket">}</span>
|
||||
<span class="punctuation bracket">}</span><span class="punctuation delimiter">;</span>
|
||||
|
||||
<span class="function">main</span><span class="punctuation bracket">()</span><span class="punctuation delimiter">;</span>
|
8
samples/fizzbuzz.py
Normal file
8
samples/fizzbuzz.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
def main():
|
||||
for n in range(1, 101):
|
||||
lut = [n, "Fizz", "Buzz", "Fizz Buzz"]
|
||||
idx = (n % 3 == 0) + 2 * (n % 5 == 0)
|
||||
print(lut[idx])
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
8
samples/fizzbuzz.py.html
Normal file
8
samples/fizzbuzz.py.html
Normal file
|
@ -0,0 +1,8 @@
|
|||
<span class="keyword">def</span> <span class="function">main</span>():
|
||||
<span class="keyword">for</span> <span class="variable">n</span> <span class="operator">in</span> <span class="function builtin">range</span>(1, 101):
|
||||
<span class="variable">lut</span> <span class="operator">=</span> [<span class="variable">n</span>, <span class="string">"Fizz"</span>, <span class="string">"Buzz"</span>, <span class="string">"Fizz Buzz"</span>]
|
||||
<span class="variable">idx</span> <span class="operator">=</span> (<span class="variable">n</span> <span class="operator">%</span> 3 <span class="operator">==</span> 0) <span class="operator">+</span> 2 <span class="operator">*</span> (<span class="variable">n</span> <span class="operator">%</span> 5 <span class="operator">==</span> 0)
|
||||
<span class="function builtin">print</span>(<span class="variable">lut</span>[<span class="variable">idx</span>])
|
||||
|
||||
<span class="keyword">if</span> <span class="variable">__name__</span> <span class="operator">==</span> <span class="string">"__main__"</span>:
|
||||
<span class="function">main</span>()
|
10
samples/fizzbuzz.rs
Normal file
10
samples/fizzbuzz.rs
Normal file
|
@ -0,0 +1,10 @@
|
|||
fn main() {
|
||||
for n in 1..=100 {
|
||||
match (n % 3, n % 5) {
|
||||
(0, 0) => println!("Fizz buzz"),
|
||||
(0, _) => println!("Fizz"),
|
||||
(_, 0) => println!("Buzz"),
|
||||
_ => println!("{}", n),
|
||||
}
|
||||
}
|
||||
}
|
10
samples/fizzbuzz.rs.html
Normal file
10
samples/fizzbuzz.rs.html
Normal file
|
@ -0,0 +1,10 @@
|
|||
<span class="keyword">fn</span> <span class="function">main</span><span class="punctuation bracket">()</span> {
|
||||
<span class="keyword">for</span> n <span class="keyword">in</span> <span class="constant">1</span>..=<span class="constant">100</span> {
|
||||
<span class="keyword">match</span> <span class="punctuation bracket">(</span>n % <span class="constant">3</span>, n % <span class="constant">5</span><span class="punctuation bracket">)</span> {
|
||||
<span class="punctuation bracket">(</span><span class="constant">0</span>, <span class="constant">0</span><span class="punctuation bracket">)</span> => <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"Fizz buzz"</span><span class="punctuation bracket">)</span>,
|
||||
<span class="punctuation bracket">(</span><span class="constant">0</span>, _<span class="punctuation bracket">)</span> => <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"Fizz"</span><span class="punctuation bracket">)</span>,
|
||||
<span class="punctuation bracket">(</span>_, <span class="constant">0</span><span class="punctuation bracket">)</span> => <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"Buzz"</span><span class="punctuation bracket">)</span>,
|
||||
_ => <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"{}"</span>, n<span class="punctuation bracket">)</span>,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
use tree_sitter_highlight::HighlightConfiguration;
|
||||
|
||||
fn also_configure(mut config: HighlightConfiguration) -> (HighlightConfiguration, Vec<String>) {
|
||||
let capture_names: Vec<_> = config
|
||||
.query
|
||||
.capture_names()
|
||||
.iter()
|
||||
.map(String::clone)
|
||||
.collect();
|
||||
config.configure(&capture_names);
|
||||
|
||||
(config, capture_names)
|
||||
}
|
||||
|
||||
pub fn get_highlight_config(lang: &str) -> Option<(HighlightConfiguration, Vec<String>)> {
|
||||
match lang {
|
||||
"rust" => HighlightConfiguration::new(
|
||||
tree_sitter_rust::language(),
|
||||
tree_sitter_rust::HIGHLIGHT_QUERY,
|
||||
"",
|
||||
"",
|
||||
)
|
||||
.map(also_configure)
|
||||
.ok(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
83
src/languages/built_in.rs
Normal file
83
src/languages/built_in.rs
Normal file
|
@ -0,0 +1,83 @@
|
|||
use tree_sitter::QueryError;
|
||||
use tree_sitter_highlight::HighlightConfiguration;
|
||||
|
||||
use crate::register_language;
|
||||
|
||||
pub static COMMON_HIGHLIGHT_NAMES: &[&str] = &[
|
||||
"attribute",
|
||||
"constant",
|
||||
"function.builtin",
|
||||
"function",
|
||||
"keyword",
|
||||
"operator",
|
||||
"property",
|
||||
"punctuation",
|
||||
"punctuation.bracket",
|
||||
"punctuation.delimiter",
|
||||
"string",
|
||||
"string.special",
|
||||
"tag",
|
||||
"type",
|
||||
"type.builtin",
|
||||
"variable",
|
||||
"variable.builtin",
|
||||
"variable.parameter",
|
||||
];
|
||||
|
||||
pub fn register_builtin_languages() {
|
||||
register_language("rust", rust_hl_factory);
|
||||
|
||||
register_language("javascript", javascript_hl_factory);
|
||||
register_language("js", javascript_hl_factory);
|
||||
register_language("jsx", jsx_hl_factory);
|
||||
|
||||
register_language("regex", regex_hl_factory);
|
||||
|
||||
register_language("python", tree_sitter_python);
|
||||
register_language("py", tree_sitter_python);
|
||||
}
|
||||
|
||||
pub fn rust_hl_factory() -> Result<HighlightConfiguration, QueryError> {
|
||||
HighlightConfiguration::new(
|
||||
tree_sitter_rust::language(),
|
||||
tree_sitter_rust::HIGHLIGHT_QUERY,
|
||||
"",
|
||||
"",
|
||||
)
|
||||
}
|
||||
|
||||
pub fn javascript_hl_factory() -> Result<HighlightConfiguration, QueryError> {
|
||||
HighlightConfiguration::new(
|
||||
tree_sitter_javascript::language(),
|
||||
tree_sitter_javascript::HIGHLIGHT_QUERY,
|
||||
tree_sitter_javascript::INJECTION_QUERY,
|
||||
tree_sitter_javascript::LOCALS_QUERY,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn jsx_hl_factory() -> Result<HighlightConfiguration, QueryError> {
|
||||
HighlightConfiguration::new(
|
||||
tree_sitter_javascript::language(),
|
||||
tree_sitter_javascript::JSX_HIGHLIGHT_QUERY,
|
||||
tree_sitter_javascript::INJECTION_QUERY,
|
||||
tree_sitter_javascript::LOCALS_QUERY,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn regex_hl_factory() -> Result<HighlightConfiguration, QueryError> {
|
||||
HighlightConfiguration::new(
|
||||
tree_sitter_regex::language(),
|
||||
tree_sitter_regex::HIGHLIGHTS_QUERY,
|
||||
"",
|
||||
"",
|
||||
)
|
||||
}
|
||||
|
||||
pub fn tree_sitter_python() -> Result<HighlightConfiguration, QueryError> {
|
||||
HighlightConfiguration::new(
|
||||
tree_sitter_python::language(),
|
||||
tree_sitter_python::HIGHLIGHT_QUERY,
|
||||
"",
|
||||
"",
|
||||
)
|
||||
}
|
32
src/languages/mod.rs
Normal file
32
src/languages/mod.rs
Normal file
|
@ -0,0 +1,32 @@
|
|||
use std::{collections::HashMap, sync::Mutex};
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use tree_sitter::QueryError;
|
||||
use tree_sitter_highlight::HighlightConfiguration;
|
||||
|
||||
pub type HighlightConfigFactory = fn() -> Result<HighlightConfiguration, QueryError>;
|
||||
|
||||
static LANGUAGES: Lazy<Mutex<HashMap<String, HighlightConfigFactory>>> =
|
||||
Lazy::new(|| Mutex::new(HashMap::new()));
|
||||
|
||||
#[cfg(feature = "built-in")]
|
||||
pub mod built_in;
|
||||
|
||||
pub fn register_language(lang: impl Into<String>, factory: HighlightConfigFactory) {
|
||||
let mut languages = LANGUAGES.lock().unwrap();
|
||||
languages.insert(lang.into(), factory);
|
||||
}
|
||||
|
||||
pub fn get_highlight_config(
|
||||
lang: &str,
|
||||
highlight_names: &[&str],
|
||||
) -> Option<HighlightConfiguration> {
|
||||
let languages = LANGUAGES.lock().unwrap();
|
||||
languages
|
||||
.get(lang)
|
||||
.and_then(|factory| factory().ok())
|
||||
.map(|mut config| {
|
||||
config.configure(highlight_names);
|
||||
config
|
||||
})
|
||||
}
|
142
src/lib.rs
142
src/lib.rs
|
@ -3,16 +3,46 @@ use tree_sitter_highlight::{HighlightEvent, Highlighter};
|
|||
|
||||
mod languages;
|
||||
|
||||
pub fn highlight(source: &str) -> String {
|
||||
let source = source.as_bytes();
|
||||
#[cfg(feature = "built-in")]
|
||||
pub use languages::built_in;
|
||||
pub use languages::register_language;
|
||||
|
||||
fn get_hash_for_attrs(attrs: &[&str]) -> u64 {
|
||||
use std::{
|
||||
collections::hash_map::DefaultHasher,
|
||||
hash::{Hash, Hasher},
|
||||
};
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
attrs.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
fn write_opening_tag(out: &mut String, attrs: &[&str]) {
|
||||
out.push_str("<span class=\"");
|
||||
for (i, attr) in attrs.iter().enumerate() {
|
||||
if i != 0 {
|
||||
out.push(' ');
|
||||
}
|
||||
|
||||
out.push_str(&attr.replace('.', " "));
|
||||
}
|
||||
out.push_str("\">");
|
||||
}
|
||||
|
||||
pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String {
|
||||
let highlight_config = match languages::get_highlight_config(lang, highlight_names) {
|
||||
Some(conf) => conf,
|
||||
None => return source.to_string(),
|
||||
};
|
||||
|
||||
let source_bytes = source.as_bytes();
|
||||
|
||||
let mut highlighter = Highlighter::new();
|
||||
let (highlight_config, capture_names) =
|
||||
languages::get_highlight_config("rust").expect("Could not get Rust language config");
|
||||
|
||||
let items = highlighter
|
||||
.highlight(&highlight_config, source, None, |lang| {
|
||||
languages::get_highlight_config(lang).map(|x| /* ugh */ &*Box::leak(Box::new(x.0)))
|
||||
.highlight(&highlight_config, source_bytes, None, |injected_lang| {
|
||||
languages::get_highlight_config(injected_lang, highlight_names)
|
||||
.map(|x| /* ugh */ &*Box::leak(Box::new(x)))
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
@ -21,58 +51,40 @@ pub fn highlight(source: &str) -> String {
|
|||
|
||||
// Collapse adjacent identical attribute sets
|
||||
let mut last_attrs: u64 = 0;
|
||||
let mut span_is_open = false;
|
||||
let mut tag_is_open = false;
|
||||
|
||||
for item in items {
|
||||
match item.unwrap() {
|
||||
HighlightEvent::Source { start, end } => {
|
||||
let source_section_bytes = &source[start..end];
|
||||
let source_section = String::from_utf8_lossy(source_section_bytes);
|
||||
let source_section = &source[start..end];
|
||||
|
||||
let attr_hash = {
|
||||
use std::{
|
||||
collections::hash_map::DefaultHasher,
|
||||
hash::{Hash, Hasher},
|
||||
};
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
highlight_attrs.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
};
|
||||
|
||||
if last_attrs != attr_hash {
|
||||
if span_is_open {
|
||||
out.push_str("</span>");
|
||||
span_is_open = false;
|
||||
}
|
||||
|
||||
if !highlight_attrs.is_empty() {
|
||||
out.push_str("<span class=\"");
|
||||
for (i, attr) in highlight_attrs.iter().enumerate() {
|
||||
if i != 0 {
|
||||
out.push(' ');
|
||||
}
|
||||
out.push_str(&attr.replace('.', " "));
|
||||
}
|
||||
out.push_str("\">");
|
||||
span_is_open = true;
|
||||
last_attrs = attr_hash;
|
||||
}
|
||||
let attr_hash = get_hash_for_attrs(&highlight_attrs);
|
||||
if last_attrs != attr_hash && tag_is_open {
|
||||
out.push_str("</span>");
|
||||
tag_is_open = false;
|
||||
}
|
||||
|
||||
out.push_str(&encode_text(&source_section));
|
||||
if !highlight_attrs.is_empty() && (!tag_is_open || last_attrs != attr_hash) {
|
||||
write_opening_tag(&mut out, &highlight_attrs);
|
||||
tag_is_open = true;
|
||||
last_attrs = attr_hash;
|
||||
}
|
||||
|
||||
out.push_str(&encode_text(source_section));
|
||||
}
|
||||
|
||||
HighlightEvent::HighlightStart(highlight) => {
|
||||
let capture_name = &capture_names[highlight.0];
|
||||
let capture_name = &highlight_names[highlight.0];
|
||||
highlight_attrs.push(capture_name);
|
||||
}
|
||||
|
||||
HighlightEvent::HighlightEnd => {
|
||||
highlight_attrs.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if span_is_open {
|
||||
if tag_is_open {
|
||||
out.push_str("</span>");
|
||||
}
|
||||
|
||||
|
@ -80,18 +92,46 @@ pub fn highlight(source: &str) -> String {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(feature = "built-in")]
|
||||
mod tests {
|
||||
use crate::highlight;
|
||||
use crate::*;
|
||||
|
||||
#[test]
|
||||
fn highlight_rust() {
|
||||
println!(
|
||||
"{}",
|
||||
highlight(
|
||||
r#"fn main() {
|
||||
println!("Hello, world!");
|
||||
}"#
|
||||
)
|
||||
)
|
||||
built_in::register_builtin_languages();
|
||||
|
||||
let source = include_str!("../samples/fizzbuzz.rs");
|
||||
let expected_result = include_str!("../samples/fizzbuzz.rs.html");
|
||||
|
||||
assert_eq!(
|
||||
highlight("rust", source, built_in::COMMON_HIGHLIGHT_NAMES),
|
||||
expected_result
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_js() {
|
||||
built_in::register_builtin_languages();
|
||||
|
||||
let source = include_str!("../samples/fizzbuzz.js");
|
||||
let expected_result = include_str!("../samples/fizzbuzz.js.html");
|
||||
|
||||
assert_eq!(
|
||||
highlight("js", source, built_in::COMMON_HIGHLIGHT_NAMES),
|
||||
expected_result
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn highlight_python() {
|
||||
built_in::register_builtin_languages();
|
||||
|
||||
let source = include_str!("../samples/fizzbuzz.py");
|
||||
let expected_result = include_str!("../samples/fizzbuzz.py.html");
|
||||
|
||||
assert_eq!(
|
||||
highlight("python", source, built_in::COMMON_HIGHLIGHT_NAMES),
|
||||
expected_result
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue