Refactor: Add built-in support for Rust, JS, Python

This commit is contained in:
Charlotte Som 2022-04-28 17:00:06 +01:00
parent 57a7c79ca1
commit 2cc38dba2c
12 changed files with 293 additions and 80 deletions

9
.editorconfig Normal file
View file

@ -0,0 +1,9 @@
root = true
[*]
indent_style = space
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = false
insert_final_newline = true

View file

@ -3,7 +3,9 @@ name = "chroma-syntaxis"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
default = ["built-in"]
built-in = ["tree-sitter-rust", "tree-sitter-javascript", "tree-sitter-regex", "tree-sitter-python"]
[dependencies]
html-escape = "0.2.11"
@ -11,4 +13,8 @@ once_cell = "1.10.0"
thiserror = "1.0.30"
tree-sitter = "0.20.6"
tree-sitter-highlight = "0.20.1"
tree-sitter-rust = "0.20.1"
tree-sitter-rust = { version = "0.20.1", optional = true }
tree-sitter-javascript = { version = "0.20.0", optional = true }
tree-sitter-regex = { git = "https://github.com/tree-sitter/tree-sitter-regex.git", rev = "e1cfca", optional = true }
tree-sitter-python = { version = "0.19.1", optional = true }

17
samples/fizzbuzz.js Normal file
View file

@ -0,0 +1,17 @@
const fizzbuzz = n => {
if (n % 15 == 0)
return "Fizz buzz";
if (n % 3 == 0)
return "Fizz";
if (n % 5 == 0)
return "Buzz";
return n;
}
const main = () => {
for (let n = 1; n <= 100; n++) {
console.log(fizzbuzz(n));
}
};
main();

17
samples/fizzbuzz.js.html Normal file
View file

@ -0,0 +1,17 @@
<span class="keyword">const</span> <span class="function">fizzbuzz</span> <span class="operator">=</span> <span class="variable">n</span> <span class="operator">=&gt;</span> <span class="punctuation bracket">{</span>
<span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> 15 <span class="operator">==</span> 0<span class="punctuation bracket">)</span>
<span class="keyword">return</span> <span class="string">"Fizz buzz"</span><span class="punctuation delimiter">;</span>
<span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> 3 <span class="operator">==</span> 0<span class="punctuation bracket">)</span>
<span class="keyword">return</span> <span class="string">"Fizz"</span><span class="punctuation delimiter">;</span>
<span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> 5 <span class="operator">==</span> 0<span class="punctuation bracket">)</span>
<span class="keyword">return</span> <span class="string">"Buzz"</span><span class="punctuation delimiter">;</span>
<span class="keyword">return</span> <span class="variable">n</span><span class="punctuation delimiter">;</span>
<span class="punctuation bracket">}</span>
<span class="keyword">const</span> <span class="function">main</span> <span class="operator">=</span> <span class="punctuation bracket">()</span> <span class="operator">=&gt;</span> <span class="punctuation bracket">{</span>
<span class="keyword">for</span> <span class="punctuation bracket">(</span><span class="keyword">let</span> <span class="variable">n</span> <span class="operator">=</span> 1<span class="punctuation delimiter">;</span> <span class="variable">n</span> <span class="operator">&lt;=</span> 100<span class="punctuation delimiter">;</span> <span class="variable">n</span><span class="operator">++</span><span class="punctuation bracket">)</span> <span class="punctuation bracket">{</span>
<span class="variable builtin">console</span><span class="punctuation delimiter">.</span><span class="function">log</span><span class="punctuation bracket">(</span><span class="function">fizzbuzz</span><span class="punctuation bracket">(</span><span class="variable">n</span><span class="punctuation bracket">))</span><span class="punctuation delimiter">;</span>
<span class="punctuation bracket">}</span>
<span class="punctuation bracket">}</span><span class="punctuation delimiter">;</span>
<span class="function">main</span><span class="punctuation bracket">()</span><span class="punctuation delimiter">;</span>

8
samples/fizzbuzz.py Normal file
View file

@ -0,0 +1,8 @@
def main():
for n in range(1, 101):
lut = [n, "Fizz", "Buzz", "Fizz Buzz"]
idx = (n % 3 == 0) + 2 * (n % 5 == 0)
print(lut[idx])
if __name__ == "__main__":
main()

8
samples/fizzbuzz.py.html Normal file
View file

@ -0,0 +1,8 @@
<span class="keyword">def</span> <span class="function">main</span>():
<span class="keyword">for</span> <span class="variable">n</span> <span class="operator">in</span> <span class="function builtin">range</span>(1, 101):
<span class="variable">lut</span> <span class="operator">=</span> [<span class="variable">n</span>, <span class="string">"Fizz"</span>, <span class="string">"Buzz"</span>, <span class="string">"Fizz Buzz"</span>]
<span class="variable">idx</span> <span class="operator">=</span> (<span class="variable">n</span> <span class="operator">%</span> 3 <span class="operator">==</span> 0) <span class="operator">+</span> 2 <span class="operator">*</span> (<span class="variable">n</span> <span class="operator">%</span> 5 <span class="operator">==</span> 0)
<span class="function builtin">print</span>(<span class="variable">lut</span>[<span class="variable">idx</span>])
<span class="keyword">if</span> <span class="variable">__name__</span> <span class="operator">==</span> <span class="string">"__main__"</span>:
<span class="function">main</span>()

10
samples/fizzbuzz.rs Normal file
View file

@ -0,0 +1,10 @@
fn main() {
for n in 1..=100 {
match (n % 3, n % 5) {
(0, 0) => println!("Fizz buzz"),
(0, _) => println!("Fizz"),
(_, 0) => println!("Buzz"),
_ => println!("{}", n),
}
}
}

10
samples/fizzbuzz.rs.html Normal file
View file

@ -0,0 +1,10 @@
<span class="keyword">fn</span> <span class="function">main</span><span class="punctuation bracket">()</span> {
<span class="keyword">for</span> n <span class="keyword">in</span> <span class="constant">1</span>..=<span class="constant">100</span> {
<span class="keyword">match</span> <span class="punctuation bracket">(</span>n % <span class="constant">3</span>, n % <span class="constant">5</span><span class="punctuation bracket">)</span> {
<span class="punctuation bracket">(</span><span class="constant">0</span>, <span class="constant">0</span><span class="punctuation bracket">)</span> =&gt; <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"Fizz buzz"</span><span class="punctuation bracket">)</span>,
<span class="punctuation bracket">(</span><span class="constant">0</span>, _<span class="punctuation bracket">)</span> =&gt; <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"Fizz"</span><span class="punctuation bracket">)</span>,
<span class="punctuation bracket">(</span>_, <span class="constant">0</span><span class="punctuation bracket">)</span> =&gt; <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"Buzz"</span><span class="punctuation bracket">)</span>,
_ =&gt; <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"{}"</span>, n<span class="punctuation bracket">)</span>,
}
}
}

View file

@ -1,27 +0,0 @@
use tree_sitter_highlight::HighlightConfiguration;
fn also_configure(mut config: HighlightConfiguration) -> (HighlightConfiguration, Vec<String>) {
let capture_names: Vec<_> = config
.query
.capture_names()
.iter()
.map(String::clone)
.collect();
config.configure(&capture_names);
(config, capture_names)
}
pub fn get_highlight_config(lang: &str) -> Option<(HighlightConfiguration, Vec<String>)> {
match lang {
"rust" => HighlightConfiguration::new(
tree_sitter_rust::language(),
tree_sitter_rust::HIGHLIGHT_QUERY,
"",
"",
)
.map(also_configure)
.ok(),
_ => None,
}
}

83
src/languages/built_in.rs Normal file
View file

@ -0,0 +1,83 @@
use tree_sitter::QueryError;
use tree_sitter_highlight::HighlightConfiguration;
use crate::register_language;
pub static COMMON_HIGHLIGHT_NAMES: &[&str] = &[
"attribute",
"constant",
"function.builtin",
"function",
"keyword",
"operator",
"property",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"string",
"string.special",
"tag",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.parameter",
];
pub fn register_builtin_languages() {
register_language("rust", rust_hl_factory);
register_language("javascript", javascript_hl_factory);
register_language("js", javascript_hl_factory);
register_language("jsx", jsx_hl_factory);
register_language("regex", regex_hl_factory);
register_language("python", tree_sitter_python);
register_language("py", tree_sitter_python);
}
pub fn rust_hl_factory() -> Result<HighlightConfiguration, QueryError> {
HighlightConfiguration::new(
tree_sitter_rust::language(),
tree_sitter_rust::HIGHLIGHT_QUERY,
"",
"",
)
}
pub fn javascript_hl_factory() -> Result<HighlightConfiguration, QueryError> {
HighlightConfiguration::new(
tree_sitter_javascript::language(),
tree_sitter_javascript::HIGHLIGHT_QUERY,
tree_sitter_javascript::INJECTION_QUERY,
tree_sitter_javascript::LOCALS_QUERY,
)
}
pub fn jsx_hl_factory() -> Result<HighlightConfiguration, QueryError> {
HighlightConfiguration::new(
tree_sitter_javascript::language(),
tree_sitter_javascript::JSX_HIGHLIGHT_QUERY,
tree_sitter_javascript::INJECTION_QUERY,
tree_sitter_javascript::LOCALS_QUERY,
)
}
pub fn regex_hl_factory() -> Result<HighlightConfiguration, QueryError> {
HighlightConfiguration::new(
tree_sitter_regex::language(),
tree_sitter_regex::HIGHLIGHTS_QUERY,
"",
"",
)
}
pub fn tree_sitter_python() -> Result<HighlightConfiguration, QueryError> {
HighlightConfiguration::new(
tree_sitter_python::language(),
tree_sitter_python::HIGHLIGHT_QUERY,
"",
"",
)
}

32
src/languages/mod.rs Normal file
View file

@ -0,0 +1,32 @@
use std::{collections::HashMap, sync::Mutex};
use once_cell::sync::Lazy;
use tree_sitter::QueryError;
use tree_sitter_highlight::HighlightConfiguration;
pub type HighlightConfigFactory = fn() -> Result<HighlightConfiguration, QueryError>;
static LANGUAGES: Lazy<Mutex<HashMap<String, HighlightConfigFactory>>> =
Lazy::new(|| Mutex::new(HashMap::new()));
#[cfg(feature = "built-in")]
pub mod built_in;
pub fn register_language(lang: impl Into<String>, factory: HighlightConfigFactory) {
let mut languages = LANGUAGES.lock().unwrap();
languages.insert(lang.into(), factory);
}
pub fn get_highlight_config(
lang: &str,
highlight_names: &[&str],
) -> Option<HighlightConfiguration> {
let languages = LANGUAGES.lock().unwrap();
languages
.get(lang)
.and_then(|factory| factory().ok())
.map(|mut config| {
config.configure(highlight_names);
config
})
}

View file

@ -3,16 +3,46 @@ use tree_sitter_highlight::{HighlightEvent, Highlighter};
mod languages;
pub fn highlight(source: &str) -> String {
let source = source.as_bytes();
#[cfg(feature = "built-in")]
pub use languages::built_in;
pub use languages::register_language;
fn get_hash_for_attrs(attrs: &[&str]) -> u64 {
use std::{
collections::hash_map::DefaultHasher,
hash::{Hash, Hasher},
};
let mut hasher = DefaultHasher::new();
attrs.hash(&mut hasher);
hasher.finish()
}
fn write_opening_tag(out: &mut String, attrs: &[&str]) {
out.push_str("<span class=\"");
for (i, attr) in attrs.iter().enumerate() {
if i != 0 {
out.push(' ');
}
out.push_str(&attr.replace('.', " "));
}
out.push_str("\">");
}
pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String {
let highlight_config = match languages::get_highlight_config(lang, highlight_names) {
Some(conf) => conf,
None => return source.to_string(),
};
let source_bytes = source.as_bytes();
let mut highlighter = Highlighter::new();
let (highlight_config, capture_names) =
languages::get_highlight_config("rust").expect("Could not get Rust language config");
let items = highlighter
.highlight(&highlight_config, source, None, |lang| {
languages::get_highlight_config(lang).map(|x| /* ugh */ &*Box::leak(Box::new(x.0)))
.highlight(&highlight_config, source_bytes, None, |injected_lang| {
languages::get_highlight_config(injected_lang, highlight_names)
.map(|x| /* ugh */ &*Box::leak(Box::new(x)))
})
.unwrap();
@ -21,58 +51,40 @@ pub fn highlight(source: &str) -> String {
// Collapse adjacent identical attribute sets
let mut last_attrs: u64 = 0;
let mut span_is_open = false;
let mut tag_is_open = false;
for item in items {
match item.unwrap() {
HighlightEvent::Source { start, end } => {
let source_section_bytes = &source[start..end];
let source_section = String::from_utf8_lossy(source_section_bytes);
let source_section = &source[start..end];
let attr_hash = {
use std::{
collections::hash_map::DefaultHasher,
hash::{Hash, Hasher},
};
let mut hasher = DefaultHasher::new();
highlight_attrs.hash(&mut hasher);
hasher.finish()
};
if last_attrs != attr_hash {
if span_is_open {
out.push_str("</span>");
span_is_open = false;
}
if !highlight_attrs.is_empty() {
out.push_str("<span class=\"");
for (i, attr) in highlight_attrs.iter().enumerate() {
if i != 0 {
out.push(' ');
}
out.push_str(&attr.replace('.', " "));
}
out.push_str("\">");
span_is_open = true;
last_attrs = attr_hash;
}
let attr_hash = get_hash_for_attrs(&highlight_attrs);
if last_attrs != attr_hash && tag_is_open {
out.push_str("</span>");
tag_is_open = false;
}
out.push_str(&encode_text(&source_section));
if !highlight_attrs.is_empty() && (!tag_is_open || last_attrs != attr_hash) {
write_opening_tag(&mut out, &highlight_attrs);
tag_is_open = true;
last_attrs = attr_hash;
}
out.push_str(&encode_text(source_section));
}
HighlightEvent::HighlightStart(highlight) => {
let capture_name = &capture_names[highlight.0];
let capture_name = &highlight_names[highlight.0];
highlight_attrs.push(capture_name);
}
HighlightEvent::HighlightEnd => {
highlight_attrs.pop();
}
}
}
if span_is_open {
if tag_is_open {
out.push_str("</span>");
}
@ -80,18 +92,46 @@ pub fn highlight(source: &str) -> String {
}
#[cfg(test)]
#[cfg(feature = "built-in")]
mod tests {
use crate::highlight;
use crate::*;
#[test]
fn highlight_rust() {
println!(
"{}",
highlight(
r#"fn main() {
println!("Hello, world!");
}"#
)
)
built_in::register_builtin_languages();
let source = include_str!("../samples/fizzbuzz.rs");
let expected_result = include_str!("../samples/fizzbuzz.rs.html");
assert_eq!(
highlight("rust", source, built_in::COMMON_HIGHLIGHT_NAMES),
expected_result
);
}
#[test]
fn highlight_js() {
built_in::register_builtin_languages();
let source = include_str!("../samples/fizzbuzz.js");
let expected_result = include_str!("../samples/fizzbuzz.js.html");
assert_eq!(
highlight("js", source, built_in::COMMON_HIGHLIGHT_NAMES),
expected_result
);
}
#[test]
fn highlight_python() {
built_in::register_builtin_languages();
let source = include_str!("../samples/fizzbuzz.py");
let expected_result = include_str!("../samples/fizzbuzz.py.html");
assert_eq!(
highlight("python", source, built_in::COMMON_HIGHLIGHT_NAMES),
expected_result
);
}
}