Add the machinery to load syntax config from TOML.

It's embedded into the binary at build time for now, but it's progress.
This commit is contained in:
Blaž Hrastnik 2021-03-25 15:26:25 +09:00
parent a900159a86
commit e3c4edae32
12 changed files with 86 additions and 55 deletions

12
Cargo.lock generated
View File

@ -489,6 +489,7 @@ dependencies = [
"once_cell",
"regex",
"ropey",
"serde",
"smallvec",
"tendril",
"tree-sitter",
@ -523,6 +524,7 @@ name = "helix-syntax"
version = "0.1.0"
dependencies = [
"cc",
"serde",
"threadpool",
"tree-sitter",
]
@ -549,6 +551,7 @@ dependencies = [
"pulldown-cmark",
"smol",
"smol-timeout",
"toml",
"tui",
]
@ -1207,6 +1210,15 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "toml"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa"
dependencies = [
"serde",
]
[[package]]
name = "tree-sitter"
version = "0.17.1"

View File

@ -19,3 +19,5 @@ unicode-width = "0.1"
tree-sitter = "0.17"
once_cell = "1.4"
regex = "1"
serde = { version = "1.0", features = ["derive"] }

View File

@ -10,9 +10,19 @@
};
use once_cell::sync::{Lazy, OnceCell};
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
pub struct Configuration {
language: Vec<LanguageConfiguration>,
}
// largely based on tree-sitter/cli/src/loader.rs
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct LanguageConfiguration {
#[serde(rename = "name")]
pub(crate) language_id: Lang,
pub scope: String, // source.rust
pub file_types: Vec<String>, // filename ends_with? <Gemfile, rb, etc>
pub roots: Vec<String>, // these indicate project roots <.git, Cargo.toml>
@ -24,22 +34,29 @@ pub struct LanguageConfiguration {
// injection_regex
// first_line_regex
//
//
pub(crate) language_id: Lang,
#[serde(skip)]
pub(crate) highlight_config: OnceCell<Option<Arc<HighlightConfiguration>>>,
// tags_config OnceCell<> https://github.com/tree-sitter/tree-sitter/pull/583
pub language_server_config: Option<LanguageServerConfiguration>,
pub indent_config: Option<IndentationConfiguration>,
#[serde(skip_serializing_if = "Option::is_none")]
pub language_server: Option<LanguageServerConfiguration>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indent: Option<IndentationConfiguration>,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct LanguageServerConfiguration {
pub command: String,
#[serde(default)]
#[serde(skip_serializing_if = "Vec::is_empty")]
pub args: Vec<String>,
}
#[derive(Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub struct IndentationConfiguration {
pub tab_width: usize,
pub indent_unit: String,
pub unit: String,
}
impl LanguageConfiguration {
@ -81,7 +98,7 @@ pub fn scope(&self) -> &str {
}
}
pub static LOADER: Lazy<Loader> = Lazy::new(Loader::init);
pub static LOADER: OnceCell<Loader> = OnceCell::new();
pub struct Loader {
// highlight_names ?
@ -90,48 +107,13 @@ pub struct Loader {
}
impl Loader {
fn init() -> Self {
pub fn new(config: Configuration) -> Self {
let mut loader = Self {
language_configs: Vec::new(),
language_config_ids_by_file_type: HashMap::new(),
};
// hardcoded from now, might load from toml
let configs = vec![
LanguageConfiguration {
scope: "source.rust".to_string(),
file_types: vec!["rs".to_string()],
language_id: Lang::Rust,
highlight_config: OnceCell::new(),
//
path: "../helix-syntax/languages/tree-sitter-rust".into(),
roots: vec![],
language_server_config: Some(LanguageServerConfiguration {
command: "rust-analyzer".to_string(),
args: vec![],
}),
indent_config: Some(IndentationConfiguration {
tab_width: 4,
indent_unit: String::from(" "),
}),
},
LanguageConfiguration {
scope: "source.toml".to_string(),
file_types: vec!["toml".to_string()],
language_id: Lang::Toml,
highlight_config: OnceCell::new(),
//
path: "../helix-syntax/languages/tree-sitter-toml".into(),
roots: vec![],
language_server_config: None,
indent_config: Some(IndentationConfiguration {
tab_width: 2,
indent_unit: String::from(" "),
}),
},
];
for config in configs {
for config in config.language {
// get the next id
let language_id = loader.language_configs.len();

View File

@ -127,7 +127,7 @@ pub fn get(
ex: &smol::Executor,
) -> Option<Arc<Client>> {
// TODO: propagate the error
if let Some(config) = &language_config.language_server_config {
if let Some(config) = &language_config.language_server {
// avoid borrow issues
let inner = &mut self.inner;
let s_incoming = &self.incoming;

View File

@ -8,6 +8,7 @@ edition = "2018"
[dependencies]
tree-sitter = "0.17"
serde = { version = "1.0", features = ["derive"] }
[build-dependencies]
cc = { version = "1", features = ["parallel"] }

View File

@ -1,5 +0,0 @@
[[language]]
name = "rust"
scope = "source.rust"
injection-regex = "rust"
file-types = ["rs"]

View File

@ -1,3 +1,4 @@
use serde::{Deserialize, Serialize};
use tree_sitter::Language;
#[macro_export]
@ -12,7 +13,8 @@ macro_rules! mk_extern {
#[macro_export]
macro_rules! mk_enum {
( $( $camel:ident ),* ) => {
#[derive(Clone, Copy, Debug, PartialEq)]
#[derive(Clone, Copy, Debug, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Lang {
$(
$camel,

View File

@ -41,3 +41,6 @@ ignore = "0.4"
dirs-next = "2.0"
# markdown doc rendering
pulldown-cmark = { version = "0.8", default-features = false }
# config
toml = "0.5"

View File

@ -73,6 +73,14 @@ fn main() {
setup_logging(verbosity).expect("failed to initialize logging.");
// initialize language registry
use helix_core::syntax::{Loader, LOADER};
let toml = include_str!("../../languages.toml");
LOADER.get_or_init(|| {
let config = toml::from_str(&toml).expect("Could not parse languages.toml");
Loader::new(config)
});
for _ in 0..num_cpus::get() {
std::thread::spawn(move || smol::block_on(EX.run(smol::future::pending::<()>())));
}

View File

@ -80,6 +80,8 @@ fn to_span(text: pulldown_cmark::CowStr) -> Span {
let rope = Rope::from(text.as_ref());
let syntax = syntax::LOADER
.get()
.unwrap()
.language_config_for_scope(&format!("source.{}", language))
.and_then(|config| config.highlight_config(theme.scopes()))
.map(|config| Syntax::new(&rope, config));

View File

@ -97,7 +97,10 @@ pub fn load(path: PathBuf, scopes: &[String]) -> Result<Self, Error> {
let mut doc = Self::new(doc);
let language_config = LOADER.language_config_for_file_name(path.as_path());
let language_config = LOADER
.get()
.unwrap()
.language_config_for_file_name(path.as_path());
doc.set_language(language_config, scopes);
// canonicalize path to absolute value
@ -161,7 +164,8 @@ pub fn set_language(
}
pub fn set_language2(&mut self, scope: &str, scopes: &[String]) {
let language_config = LOADER.language_config_for_scope(scope);
let language_config = LOADER.get().unwrap().language_config_for_scope(scope);
self.set_language(language_config, scopes);
}
@ -304,7 +308,7 @@ pub fn syntax(&self) -> Option<&Syntax> {
pub fn tab_width(&self) -> usize {
self.language
.as_ref()
.and_then(|config| config.indent_config.as_ref())
.and_then(|config| config.indent.as_ref())
.map(|config| config.tab_width)
.unwrap_or(4) // fallback to 4 columns
}
@ -313,8 +317,8 @@ pub fn tab_width(&self) -> usize {
pub fn indent_unit(&self) -> &str {
self.language
.as_ref()
.and_then(|config| config.indent_config.as_ref())
.map(|config| config.indent_unit.as_str())
.and_then(|config| config.indent.as_ref())
.map(|config| config.unit.as_str())
.unwrap_or(" ") // fallback to 2 spaces
// " ".repeat(TAB_WIDTH)

20
languages.toml Normal file
View File

@ -0,0 +1,20 @@
[[language]]
name = "rust"
scope = "source.rust"
injection-regex = "rust"
file-types = ["rs"]
roots = []
path = "../helix-syntax/languages/tree-sitter-rust"
language-server = { command = "rust-analyzer" }
indent = { tab-width = 4, unit = " " }
[[language]]
name = "toml"
scope = "source.toml"
injection-regex = "toml"
file-types = ["toml"]
roots = []
path = "../helix-syntax/languages/tree-sitter-toml"
indent = { tab-width = 2, unit = " " }