Create a struct to hold highlight configurations instead of leaking memory

main
Charlotte Som 2022-05-26 20:07:23 +02:00
parent d36feb184f
commit 2b62e2e810
7 changed files with 120 additions and 116 deletions

View File

@ -3,14 +3,6 @@ name = "chroma-syntaxis"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
[features]
default = ["built-in"]
built-in = [
"tree-sitter-rust", "tree-sitter-javascript", "tree-sitter-regex",
"tree-sitter-python", "tree-sitter-c", "tree-sitter-cpp",
"tree-sitter-typescript",
]
[dependencies] [dependencies]
html-escape = "0.2.11" html-escape = "0.2.11"
once_cell = "1.10.0" once_cell = "1.10.0"
@ -18,10 +10,10 @@ thiserror = "1.0.30"
tree-sitter = "0.20.6" tree-sitter = "0.20.6"
tree-sitter-highlight = "0.20.1" tree-sitter-highlight = "0.20.1"
tree-sitter-rust = { version = "0.20.1", optional = true } tree-sitter-rust = { version = "0.20.1" }
tree-sitter-javascript = { version = "0.20.0", optional = true } tree-sitter-javascript = { version = "0.20.0" }
tree-sitter-regex = { git = "https://github.com/tree-sitter/tree-sitter-regex.git", rev = "e1cfca", optional = true } tree-sitter-regex = { git = "https://github.com/tree-sitter/tree-sitter-regex.git", rev = "e1cfca" }
tree-sitter-python = { version = "0.19.1", optional = true } tree-sitter-python = { version = "0.19.1" }
tree-sitter-c = { version = "0.20.1", optional = true } tree-sitter-c = { version = "0.20.1" }
tree-sitter-cpp = { version = "0.20.0", optional = true } tree-sitter-cpp = { version = "0.20.0" }
tree-sitter-typescript = { version = "0.20.1", optional = true } tree-sitter-typescript = { version = "0.20.1" }

View File

@ -1,16 +1,16 @@
<span class="keyword">const</span> <span class="function">fizzbuzz</span> <span class="operator">=</span> <span class="variable">n</span> <span class="operator">=&gt;</span> <span class="punctuation bracket">{</span> <span class="keyword">const</span> <span class="function">fizzbuzz</span> <span class="operator">=</span> <span class="variable">n</span> <span class="operator">=&gt;</span> <span class="punctuation bracket">{</span>
<span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> 15 <span class="operator">==</span> 0<span class="punctuation bracket">)</span> <span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> <span class="number">15</span> <span class="operator">==</span> <span class="number">0</span><span class="punctuation bracket">)</span>
<span class="keyword">return</span> <span class="string">"Fizz buzz"</span><span class="punctuation delimiter">;</span> <span class="keyword">return</span> <span class="string">"Fizz buzz"</span><span class="punctuation delimiter">;</span>
<span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> 3 <span class="operator">==</span> 0<span class="punctuation bracket">)</span> <span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> <span class="number">3</span> <span class="operator">==</span> <span class="number">0</span><span class="punctuation bracket">)</span>
<span class="keyword">return</span> <span class="string">"Fizz"</span><span class="punctuation delimiter">;</span> <span class="keyword">return</span> <span class="string">"Fizz"</span><span class="punctuation delimiter">;</span>
<span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> 5 <span class="operator">==</span> 0<span class="punctuation bracket">)</span> <span class="keyword">if</span> <span class="punctuation bracket">(</span><span class="variable">n</span> <span class="operator">%</span> <span class="number">5</span> <span class="operator">==</span> <span class="number">0</span><span class="punctuation bracket">)</span>
<span class="keyword">return</span> <span class="string">"Buzz"</span><span class="punctuation delimiter">;</span> <span class="keyword">return</span> <span class="string">"Buzz"</span><span class="punctuation delimiter">;</span>
<span class="keyword">return</span> <span class="variable">n</span><span class="punctuation delimiter">;</span> <span class="keyword">return</span> <span class="variable">n</span><span class="punctuation delimiter">;</span>
<span class="punctuation bracket">}</span> <span class="punctuation bracket">}</span>
<span class="keyword">const</span> <span class="function">main</span> <span class="operator">=</span> <span class="punctuation bracket">()</span> <span class="operator">=&gt;</span> <span class="punctuation bracket">{</span> <span class="keyword">const</span> <span class="function">main</span> <span class="operator">=</span> <span class="punctuation bracket">()</span> <span class="operator">=&gt;</span> <span class="punctuation bracket">{</span>
<span class="keyword">for</span> <span class="punctuation bracket">(</span><span class="keyword">let</span> <span class="variable">n</span> <span class="operator">=</span> 1<span class="punctuation delimiter">;</span> <span class="variable">n</span> <span class="operator">&lt;=</span> 100<span class="punctuation delimiter">;</span> <span class="variable">n</span><span class="operator">++</span><span class="punctuation bracket">)</span> <span class="punctuation bracket">{</span> <span class="keyword">for</span> <span class="punctuation bracket">(</span><span class="keyword">let</span> <span class="variable">n</span> <span class="operator">=</span> <span class="number">1</span><span class="punctuation delimiter">;</span> <span class="variable">n</span> <span class="operator">&lt;=</span> <span class="number">100</span><span class="punctuation delimiter">;</span> <span class="variable">n</span><span class="operator">++</span><span class="punctuation bracket">)</span> <span class="punctuation bracket">{</span>
<span class="variable builtin">console</span><span class="punctuation delimiter">.</span><span class="function">log</span><span class="punctuation bracket">(</span><span class="function">fizzbuzz</span><span class="punctuation bracket">(</span><span class="variable">n</span><span class="punctuation bracket">))</span><span class="punctuation delimiter">;</span> <span class="variable builtin">console</span><span class="punctuation delimiter">.</span><span class="function method">log</span><span class="punctuation bracket">(</span><span class="function">fizzbuzz</span><span class="punctuation bracket">(</span><span class="variable">n</span><span class="punctuation bracket">))</span><span class="punctuation delimiter">;</span>
<span class="punctuation bracket">}</span> <span class="punctuation bracket">}</span>
<span class="punctuation bracket">}</span><span class="punctuation delimiter">;</span> <span class="punctuation bracket">}</span><span class="punctuation delimiter">;</span>

View File

@ -1,7 +1,7 @@
<span class="keyword">def</span> <span class="function">main</span>(): <span class="keyword">def</span> <span class="function">main</span>():
<span class="keyword">for</span> <span class="variable">n</span> <span class="operator">in</span> <span class="function builtin">range</span>(1, 101): <span class="keyword">for</span> <span class="variable">n</span> <span class="operator">in</span> <span class="function builtin">range</span>(<span class="number">1</span>, <span class="number">101</span>):
<span class="variable">lut</span> <span class="operator">=</span> [<span class="variable">n</span>, <span class="string">"Fizz"</span>, <span class="string">"Buzz"</span>, <span class="string">"Fizz Buzz"</span>] <span class="variable">lut</span> <span class="operator">=</span> [<span class="variable">n</span>, <span class="string">"Fizz"</span>, <span class="string">"Buzz"</span>, <span class="string">"Fizz Buzz"</span>]
<span class="variable">idx</span> <span class="operator">=</span> (<span class="variable">n</span> <span class="operator">%</span> 3 <span class="operator">==</span> 0) <span class="operator">+</span> 2 <span class="operator">*</span> (<span class="variable">n</span> <span class="operator">%</span> 5 <span class="operator">==</span> 0) <span class="variable">idx</span> <span class="operator">=</span> (<span class="variable">n</span> <span class="operator">%</span> <span class="number">3</span> <span class="operator">==</span> <span class="number">0</span>) <span class="operator">+</span> <span class="number">2</span> <span class="operator">*</span> (<span class="variable">n</span> <span class="operator">%</span> <span class="number">5</span> <span class="operator">==</span> <span class="number">0</span>)
<span class="function builtin">print</span>(<span class="variable">lut</span>[<span class="variable">idx</span>]) <span class="function builtin">print</span>(<span class="variable">lut</span>[<span class="variable">idx</span>])
<span class="keyword">if</span> <span class="variable">__name__</span> <span class="operator">==</span> <span class="string">"__main__"</span>: <span class="keyword">if</span> <span class="variable">__name__</span> <span class="operator">==</span> <span class="string">"__main__"</span>:

View File

@ -1,10 +1,10 @@
<span class="keyword">fn</span> <span class="function">main</span><span class="punctuation bracket">()</span> { <span class="keyword">fn</span> <span class="function">main</span><span class="punctuation bracket">()</span> {
<span class="keyword">for</span> n <span class="keyword">in</span> <span class="constant">1</span>..=<span class="constant">100</span> { <span class="keyword">for</span> n <span class="keyword">in</span> <span class="constant builtin">1</span>..=<span class="constant builtin">100</span> {
<span class="keyword">match</span> <span class="punctuation bracket">(</span>n % <span class="constant">3</span>, n % <span class="constant">5</span><span class="punctuation bracket">)</span> { <span class="keyword">match</span> <span class="punctuation bracket">(</span>n % <span class="constant builtin">3</span>, n % <span class="constant builtin">5</span><span class="punctuation bracket">)</span> {
<span class="punctuation bracket">(</span><span class="constant">0</span>, <span class="constant">0</span><span class="punctuation bracket">)</span> =&gt; <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"Fizz buzz"</span><span class="punctuation bracket">)</span>, <span class="punctuation bracket">(</span><span class="constant builtin">0</span>, <span class="constant builtin">0</span><span class="punctuation bracket">)</span> =&gt; <span class="function macro">println!</span><span class="punctuation bracket">(</span><span class="string">"Fizz buzz"</span><span class="punctuation bracket">)</span>,
<span class="punctuation bracket">(</span><span class="constant">0</span>, _<span class="punctuation bracket">)</span> =&gt; <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"Fizz"</span><span class="punctuation bracket">)</span>, <span class="punctuation bracket">(</span><span class="constant builtin">0</span>, _<span class="punctuation bracket">)</span> =&gt; <span class="function macro">println!</span><span class="punctuation bracket">(</span><span class="string">"Fizz"</span><span class="punctuation bracket">)</span>,
<span class="punctuation bracket">(</span>_, <span class="constant">0</span><span class="punctuation bracket">)</span> =&gt; <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"Buzz"</span><span class="punctuation bracket">)</span>, <span class="punctuation bracket">(</span>_, <span class="constant builtin">0</span><span class="punctuation bracket">)</span> =&gt; <span class="function macro">println!</span><span class="punctuation bracket">(</span><span class="string">"Buzz"</span><span class="punctuation bracket">)</span>,
_ =&gt; <span class="function">println!</span><span class="punctuation bracket">(</span><span class="string">"{}"</span>, n<span class="punctuation bracket">)</span>, _ =&gt; <span class="function macro">println!</span><span class="punctuation bracket">(</span><span class="string">"{}"</span>, n<span class="punctuation bracket">)</span>,
} }
} }
} }

View File

@ -1,7 +1,7 @@
use tree_sitter::QueryError; use tree_sitter::QueryError;
use tree_sitter_highlight::HighlightConfiguration; use tree_sitter_highlight::HighlightConfiguration;
use crate::register_language; use crate::SyntaxHighlighter;
pub static COMMON_HIGHLIGHT_NAMES: &[&str] = &[ pub static COMMON_HIGHLIGHT_NAMES: &[&str] = &[
"attribute", "attribute",
@ -40,26 +40,33 @@ pub static COMMON_HIGHLIGHT_NAMES: &[&str] = &[
"variable.parameter", "variable.parameter",
]; ];
pub fn register_builtin_languages() { pub fn register_builtin_languages(highlighter: &mut SyntaxHighlighter) {
register_language("rust", rust_hl_factory); let mut add = |lang: &str, factory: fn() -> Result<HighlightConfiguration, QueryError>| {
if let Ok(mut config) = factory() {
config.configure(highlighter.highlight_names);
highlighter.register(lang.to_string(), config);
}
};
register_language("javascript", javascript_hl_factory); add("rust", rust_hl_factory);
register_language("js", javascript_hl_factory);
register_language("jsx", jsx_hl_factory);
register_language("regex", regex_hl_factory); add("javascript", javascript_hl_factory);
add("js", javascript_hl_factory);
add("jsx", jsx_hl_factory);
register_language("python", python_hl_factory); add("regex", regex_hl_factory);
register_language("py", python_hl_factory);
register_language("c", c_hl_factory); add("python", python_hl_factory);
add("py", python_hl_factory);
register_language("cpp", cpp_hl_factory); add("c", c_hl_factory);
register_language("c++", cpp_hl_factory);
register_language("typescript", typescript_hl_factory); add("cpp", cpp_hl_factory);
register_language("ts", typescript_hl_factory); add("c++", cpp_hl_factory);
register_language("tsx", tsx_hl_factory);
add("typescript", typescript_hl_factory);
add("ts", typescript_hl_factory);
add("tsx", tsx_hl_factory);
} }
pub fn rust_hl_factory() -> Result<HighlightConfiguration, QueryError> { pub fn rust_hl_factory() -> Result<HighlightConfiguration, QueryError> {

View File

@ -1,32 +0,0 @@
use std::{collections::HashMap, sync::Mutex};
use once_cell::sync::Lazy;
use tree_sitter::QueryError;
use tree_sitter_highlight::HighlightConfiguration;
pub type HighlightConfigFactory = fn() -> Result<HighlightConfiguration, QueryError>;
static LANGUAGES: Lazy<Mutex<HashMap<String, HighlightConfigFactory>>> =
Lazy::new(|| Mutex::new(HashMap::new()));
#[cfg(feature = "built-in")]
pub mod built_in;
pub fn register_language(lang: impl Into<String>, factory: HighlightConfigFactory) {
let mut languages = LANGUAGES.lock().unwrap();
languages.insert(lang.into(), factory);
}
pub fn get_highlight_config(
lang: &str,
highlight_names: &[&str],
) -> Option<HighlightConfiguration> {
let languages = LANGUAGES.lock().unwrap();
languages
.get(lang)
.and_then(|factory| factory().ok())
.map(|mut config| {
config.configure(highlight_names);
config
})
}

View File

@ -1,11 +1,9 @@
use std::collections::HashMap;
use html_escape::encode_text; use html_escape::encode_text;
use tree_sitter_highlight::{HighlightEvent, Highlighter}; use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter};
mod languages; pub mod languages;
#[cfg(feature = "built-in")]
pub use languages::built_in;
pub use languages::register_language;
fn get_hash_for_attrs(attrs: &[&str]) -> u64 { fn get_hash_for_attrs(attrs: &[&str]) -> u64 {
use std::{ use std::{
@ -30,22 +28,11 @@ fn write_opening_tag(out: &mut String, attrs: &[&str]) {
out.push_str("\">"); out.push_str("\">");
} }
pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String { fn build_highlighted_html<'a>(
let highlight_config = match languages::get_highlight_config(lang, highlight_names) { source: &'a str,
Some(conf) => conf, events: impl Iterator<Item = HighlightEvent> + 'a,
None => return source.to_string(), highlight_names: &[&str],
}; ) -> String {
let source_bytes = source.as_bytes();
let mut highlighter = Highlighter::new();
let items = highlighter
.highlight(&highlight_config, source_bytes, None, |injected_lang| {
languages::get_highlight_config(injected_lang, highlight_names)
.map(|x| /* ugh */ &*Box::leak(Box::new(x)))
})
.unwrap();
let mut highlight_attrs: Vec<&str> = Vec::new(); let mut highlight_attrs: Vec<&str> = Vec::new();
let mut out = String::new(); let mut out = String::new();
@ -53,8 +40,8 @@ pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String {
let mut last_attrs: u64 = 0; let mut last_attrs: u64 = 0;
let mut tag_is_open = false; let mut tag_is_open = false;
for item in items { for event in events {
match item.unwrap() { match event {
HighlightEvent::Source { start, end } => { HighlightEvent::Source { start, end } => {
let source_section = &source[start..end]; let source_section = &source[start..end];
@ -74,7 +61,7 @@ pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String {
} }
HighlightEvent::HighlightStart(highlight) => { HighlightEvent::HighlightStart(highlight) => {
let capture_name = &highlight_names[highlight.0]; let capture_name = highlight_names[highlight.0];
highlight_attrs.push(capture_name); highlight_attrs.push(capture_name);
} }
@ -91,47 +78,97 @@ pub fn highlight(lang: &str, source: &str, highlight_names: &[&str]) -> String {
out out
} }
pub struct SyntaxHighlighter<'a> {
languages: HashMap<String, HighlightConfiguration>,
highlight_names: &'a [&'a str],
}
impl SyntaxHighlighter<'_> {
pub fn new() -> Self {
let mut highlighter = Self {
languages: HashMap::new(),
highlight_names: languages::COMMON_HIGHLIGHT_NAMES,
};
languages::register_builtin_languages(&mut highlighter);
highlighter
}
}
impl Default for SyntaxHighlighter<'_> {
fn default() -> Self {
Self::new()
}
}
impl<'a> SyntaxHighlighter<'a> {
pub fn new_empty(highlight_names: &'a [&'a str]) -> Self {
Self {
languages: HashMap::new(),
highlight_names,
}
}
pub fn register(&mut self, lang: String, config: HighlightConfiguration) {
self.languages.insert(lang, config);
}
pub fn highlight(&self, lang: &str, source: &str) -> String {
let highlight_config = match self.languages.get(lang) {
Some(config) => config,
None => return source.to_string(),
};
let source_bytes = source.as_bytes();
let mut highlighter = Highlighter::new();
let highlight_result =
highlighter.highlight(highlight_config, source_bytes, None, |injected_lang| {
self.languages.get(injected_lang)
});
let events = match highlight_result {
Ok(events) => events,
Err(_) => return source.to_string(),
}
.filter_map(|e| e.ok());
build_highlighted_html(source, events, self.highlight_names)
}
}
#[cfg(test)] #[cfg(test)]
#[cfg(feature = "built-in")]
mod tests { mod tests {
use crate::*; use crate::*;
#[test] #[test]
fn highlight_rust() { fn highlight_rust() {
built_in::register_builtin_languages(); let highlighter = SyntaxHighlighter::new();
let source = include_str!("../samples/fizzbuzz.rs"); let source = include_str!("../samples/fizzbuzz.rs");
let expected_result = include_str!("../samples/fizzbuzz.rs.html"); let expected_result = include_str!("../samples/fizzbuzz.rs.html");
assert_eq!( assert_eq!(highlighter.highlight("rust", source), expected_result);
highlight("rust", source, built_in::COMMON_HIGHLIGHT_NAMES),
expected_result
);
} }
#[test] #[test]
fn highlight_js() { fn highlight_js() {
built_in::register_builtin_languages(); let highlighter = SyntaxHighlighter::new();
let source = include_str!("../samples/fizzbuzz.js"); let source = include_str!("../samples/fizzbuzz.js");
let expected_result = include_str!("../samples/fizzbuzz.js.html"); let expected_result = include_str!("../samples/fizzbuzz.js.html");
assert_eq!( assert_eq!(highlighter.highlight("js", source), expected_result);
highlight("js", source, built_in::COMMON_HIGHLIGHT_NAMES),
expected_result
);
} }
#[test] #[test]
fn highlight_python() { fn highlight_python() {
built_in::register_builtin_languages(); let highlighter = SyntaxHighlighter::new();
let source = include_str!("../samples/fizzbuzz.py"); let source = include_str!("../samples/fizzbuzz.py");
let expected_result = include_str!("../samples/fizzbuzz.py.html"); let expected_result = include_str!("../samples/fizzbuzz.py.html");
assert_eq!( assert_eq!(highlighter.highlight("python", source), expected_result);
highlight("python", source, built_in::COMMON_HIGHLIGHT_NAMES),
expected_result
);
} }
} }