helix-mirror/helix-core/src/indent.rs

493 lines
14 KiB
Rust
Raw Normal View History

2020-10-09 11:58:43 +04:00
use crate::{
chars::{char_is_line_ending, char_is_whitespace},
find_first_non_whitespace_char,
syntax::{IndentQuery, LanguageConfiguration, Syntax},
tree_sitter::Node,
Rope, RopeSlice,
2020-10-09 11:58:43 +04:00
};
/// Enum representing indentation style.
///
/// Only values 1-8 are valid for the `Spaces` variant.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum IndentStyle {
Tabs,
Spaces(u8),
}
impl IndentStyle {
/// Creates an `IndentStyle` from an indentation string.
///
/// For example, passing `" "` (four spaces) will create `IndentStyle::Spaces(4)`.
#[allow(clippy::should_implement_trait)]
#[inline]
pub fn from_str(indent: &str) -> Self {
// XXX: do we care about validating the input more than this? Probably not...?
debug_assert!(!indent.is_empty() && indent.len() <= 8);
if indent.starts_with(' ') {
IndentStyle::Spaces(indent.len() as u8)
} else {
IndentStyle::Tabs
}
}
#[inline]
pub fn as_str(&self) -> &'static str {
match *self {
IndentStyle::Tabs => "\t",
IndentStyle::Spaces(1) => " ",
IndentStyle::Spaces(2) => " ",
IndentStyle::Spaces(3) => " ",
IndentStyle::Spaces(4) => " ",
IndentStyle::Spaces(5) => " ",
IndentStyle::Spaces(6) => " ",
IndentStyle::Spaces(7) => " ",
IndentStyle::Spaces(8) => " ",
// Unsupported indentation style. This should never happen,
// but just in case fall back to two spaces.
IndentStyle::Spaces(n) => {
debug_assert!(n > 0 && n <= 8); // Always triggers. `debug_panic!()` wanted.
" "
}
}
}
}
/// Attempts to detect the indentation style used in a document.
///
/// Returns the indentation style if the auto-detect confidence is
/// reasonably high, otherwise returns `None`.
pub fn auto_detect_indent_style(document_text: &Rope) -> Option<IndentStyle> {
// Build a histogram of the indentation *increases* between
// subsequent lines, ignoring lines that are all whitespace.
//
// Index 0 is for tabs, the rest are 1-8 spaces.
let histogram: [usize; 9] = {
let mut histogram = [0; 9];
let mut prev_line_is_tabs = false;
let mut prev_line_leading_count = 0usize;
// Loop through the lines, checking for and recording indentation
// increases as we go.
'outer: for line in document_text.lines().take(1000) {
let mut c_iter = line.chars();
// Is first character a tab or space?
let is_tabs = match c_iter.next() {
Some('\t') => true,
Some(' ') => false,
// Ignore blank lines.
Some(c) if char_is_line_ending(c) => continue,
_ => {
prev_line_is_tabs = false;
prev_line_leading_count = 0;
continue;
}
};
// Count the line's total leading tab/space characters.
let mut leading_count = 1;
let mut count_is_done = false;
for c in c_iter {
match c {
'\t' if is_tabs && !count_is_done => leading_count += 1,
' ' if !is_tabs && !count_is_done => leading_count += 1,
// We stop counting if we hit whitespace that doesn't
// qualify as indent or doesn't match the leading
// whitespace, but we don't exit the loop yet because
// we still want to determine if the line is blank.
c if char_is_whitespace(c) => count_is_done = true,
// Ignore blank lines.
c if char_is_line_ending(c) => continue 'outer,
_ => break,
}
// Bound the worst-case execution time for weird text files.
if leading_count > 256 {
continue 'outer;
}
}
// If there was an increase in indentation over the previous
// line, update the histogram with that increase.
if (prev_line_is_tabs == is_tabs || prev_line_leading_count == 0)
&& prev_line_leading_count < leading_count
{
if is_tabs {
histogram[0] += 1;
} else {
let amount = leading_count - prev_line_leading_count;
if amount <= 8 {
histogram[amount] += 1;
}
}
}
// Store this line's leading whitespace info for use with
// the next line.
prev_line_is_tabs = is_tabs;
prev_line_leading_count = leading_count;
}
// Give more weight to tabs, because their presence is a very
// strong indicator.
histogram[0] *= 2;
histogram
};
// Find the most frequent indent, its frequency, and the frequency of
// the next-most frequent indent.
let indent = histogram
.iter()
.enumerate()
.max_by_key(|kv| kv.1)
.unwrap()
.0;
let indent_freq = histogram[indent];
let indent_freq_2 = *histogram
.iter()
.enumerate()
.filter(|kv| kv.0 != indent)
.map(|kv| kv.1)
.max()
.unwrap();
// Return the the auto-detected result if we're confident enough in its
// accuracy, based on some heuristics.
if indent_freq >= 1 && (indent_freq_2 as f64 / indent_freq as f64) < 0.66 {
Some(match indent {
0 => IndentStyle::Tabs,
_ => IndentStyle::Spaces(indent as u8),
})
} else {
None
}
}
/// To determine indentation of a newly inserted line, figure out the indentation at the last col
/// of the previous line.
#[allow(dead_code)]
2021-03-22 08:47:39 +04:00
fn indent_level_for_line(line: RopeSlice, tab_width: usize) -> usize {
2020-10-09 11:58:43 +04:00
let mut len = 0;
for ch in line.chars() {
match ch {
2021-03-22 08:47:39 +04:00
'\t' => len += tab_width,
2020-10-09 11:58:43 +04:00
' ' => len += 1,
_ => break,
}
}
2021-03-22 08:47:39 +04:00
len / tab_width
2020-10-09 11:58:43 +04:00
}
/// Find the highest syntax node at position.
/// This is to identify the column where this node (e.g., an HTML closing tag) ends.
fn get_highest_syntax_node_at_bytepos(syntax: &Syntax, pos: usize) -> Option<Node> {
2021-03-26 06:02:32 +04:00
let tree = syntax.tree();
2020-10-09 11:58:43 +04:00
// named_descendant
let mut node = match tree.root_node().descendant_for_byte_range(pos, pos) {
2020-10-09 11:58:43 +04:00
Some(node) => node,
None => return None,
};
while let Some(parent) = node.parent() {
if parent.start_byte() == node.start_byte() {
node = parent
} else {
break;
}
}
Some(node)
}
fn calculate_indentation(query: &IndentQuery, node: Option<Node>, newline: bool) -> usize {
// NOTE: can't use contains() on query because of comparing Vec<String> and &str
// https://doc.rust-lang.org/std/vec/struct.Vec.html#method.contains
2021-07-24 18:37:33 +04:00
let mut increment: isize = 0;
let mut node = match node {
Some(node) => node,
None => return 0,
};
2021-01-10 18:46:18 +04:00
let mut prev_start = node.start_position().row;
// if we're calculating indentation for a brand new line then the current node will become the
// parent node. We need to take it's indentation level into account too.
let node_kind = node.kind();
if newline && query.indent.contains(node_kind) {
increment += 1;
2020-10-09 11:58:43 +04:00
}
while let Some(parent) = node.parent() {
let parent_kind = parent.kind();
2021-01-10 18:46:18 +04:00
let start = parent.start_position().row;
// detect deeply nested indents in the same line
2021-04-14 09:28:31 +04:00
// .map(|a| { <-- ({ is two scopes
// let len = 1; <-- indents one level
// }) <-- }) is two scopes
let starts_same_line = start == prev_start;
if query.outdent.contains(node.kind()) && !starts_same_line {
// we outdent by skipping the rules for the current level and jumping up
2021-01-10 18:46:18 +04:00
// node = parent;
increment -= 1;
// continue;
}
if query.indent.contains(parent_kind) // && not_first_or_last_sibling
2021-01-10 18:46:18 +04:00
&& !starts_same_line
{
// println!("is_scope {}", parent_kind);
2021-01-10 18:46:18 +04:00
prev_start = start;
increment += 1
}
// if last_scope && increment > 0 && ...{ ignore }
node = parent;
}
2021-07-24 18:37:33 +04:00
increment.max(0) as usize
2020-10-09 11:58:43 +04:00
}
#[allow(dead_code)]
2021-03-22 08:47:39 +04:00
fn suggested_indent_for_line(
language_config: &LanguageConfiguration,
2021-03-22 08:47:39 +04:00
syntax: Option<&Syntax>,
text: RopeSlice,
line_num: usize,
_tab_width: usize,
2021-03-22 08:47:39 +04:00
) -> usize {
if let Some(start) = find_first_non_whitespace_char(text.line(line_num)) {
return suggested_indent_for_pos(
Some(language_config),
syntax,
text,
start + text.line_to_char(line_num),
false,
);
};
2020-10-09 11:58:43 +04:00
// if the line is blank, indent should be zero
0
}
// TODO: two usecases: if we are triggering this for a new, blank line:
// - it should return 0 when mass indenting stuff
// - it should look up the wrapper node and count it too when we press o/O
pub fn suggested_indent_for_pos(
language_config: Option<&LanguageConfiguration>,
syntax: Option<&Syntax>,
text: RopeSlice,
pos: usize,
new_line: bool,
) -> usize {
if let (Some(query), Some(syntax)) = (
language_config.and_then(|config| config.indent_query()),
syntax,
) {
let byte_start = text.char_to_byte(pos);
let node = get_highest_syntax_node_at_bytepos(syntax, byte_start);
2020-10-09 11:58:43 +04:00
// let config = load indentation query config from Syntax(should contain language_config)
2021-01-08 11:31:19 +04:00
// TODO: special case for comments
// TODO: if preserve_leading_whitespace
calculate_indentation(query, node, new_line)
2020-10-09 11:58:43 +04:00
} else {
2020-10-15 18:31:37 +04:00
// TODO: heuristics for non-tree sitter grammars
2020-10-09 11:58:43 +04:00
0
}
}
pub fn get_scopes(syntax: Option<&Syntax>, text: RopeSlice, pos: usize) -> Vec<&'static str> {
let mut scopes = Vec::new();
if let Some(syntax) = syntax {
let pos = text.char_to_byte(pos);
let mut node = match syntax
.tree()
.root_node()
.descendant_for_byte_range(pos, pos)
{
Some(node) => node,
None => return scopes,
};
scopes.push(node.kind());
while let Some(parent) = node.parent() {
scopes.push(parent.kind());
node = parent;
}
}
scopes.reverse();
2021-08-13 08:16:31 +04:00
scopes
}
2020-10-09 11:58:43 +04:00
#[cfg(test)]
mod test {
use super::*;
use crate::Rope;
2020-10-09 11:58:43 +04:00
#[test]
fn test_indent_level() {
2021-03-22 08:47:39 +04:00
let tab_width = 4;
2020-10-09 11:58:43 +04:00
let line = Rope::from(" fn new"); // 8 spaces
2021-03-22 08:47:39 +04:00
assert_eq!(indent_level_for_line(line.slice(..), tab_width), 2);
2020-10-09 11:58:43 +04:00
let line = Rope::from("\t\t\tfn new"); // 3 tabs
2021-03-22 08:47:39 +04:00
assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3);
2020-10-09 11:58:43 +04:00
// mixed indentation
let line = Rope::from("\t \tfn new"); // 1 tab, 4 spaces, tab
2021-03-22 08:47:39 +04:00
assert_eq!(indent_level_for_line(line.slice(..), tab_width), 3);
2020-10-09 11:58:43 +04:00
}
#[test]
fn test_suggested_indent_for_line() {
let doc = Rope::from(
"
use std::{
io::{self, stdout, Stdout, Write},
path::PathBuf,
sync::Arc,
time::Duration,
}
mod test {
fn hello_world() {
1 + 1;
let does_indentation_work = 1;
let test_function = function_with_param(this_param,
that_param
);
let test_function = function_with_param(
this_param,
that_param
);
let test_function = function_with_proper_indent(param1,
param2,
);
let selection = Selection::new(
changes
.clone()
.map(|(start, end, text): (usize, usize, Option<Tendril>)| {
let len = text.map(|text| text.len()).unwrap() - 1; // minus newline
let pos = start + len;
Range::new(pos, pos)
})
.collect(),
0,
);
return;
}
}
impl<A, D> MyTrait<A, D> for YourType
where
A: TraitB + TraitC,
D: TraitE + TraitF,
{
}
#[test]
//
2021-01-10 18:46:18 +04:00
match test {
Some(a) => 1,
None => {
unimplemented!()
}
}
std::panic::set_hook(Box::new(move |info| {
hook(info);
}));
{ { {
1
}}}
pub fn change<I>(document: &Document, changes: I) -> Self
2021-01-10 18:46:18 +04:00
where
I: IntoIterator<Item = Change> + ExactSizeIterator,
{
2021-05-09 14:02:35 +04:00
[
1,
2,
3,
];
(
1,
2
);
2021-01-10 18:46:18 +04:00
true
}
",
);
let doc = Rope::from(doc);
2021-03-25 11:53:32 +04:00
use crate::syntax::{
Configuration, IndentationConfiguration, LanguageConfiguration, Loader,
2021-03-25 11:53:32 +04:00
};
use once_cell::sync::OnceCell;
2021-06-19 15:26:52 +04:00
let loader = Loader::new(Configuration {
language: vec![LanguageConfiguration {
scope: "source.rust".to_string(),
file_types: vec!["rs".to_string()],
language_id: "Rust".to_string(),
2021-06-19 15:26:52 +04:00
highlight_config: OnceCell::new(),
config: None,
2021-06-19 15:26:52 +04:00
//
roots: vec![],
comment_token: None,
2021-06-19 15:26:52 +04:00
auto_format: false,
language_server: None,
indent: Some(IndentationConfiguration {
tab_width: 4,
unit: String::from(" "),
}),
indent_query: OnceCell::new(),
}],
});
2021-03-25 11:53:32 +04:00
// set runtime path so we can find the queries
let mut runtime = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
runtime.push("../runtime");
std::env::set_var("HELIX_RUNTIME", runtime.to_str().unwrap());
2021-03-25 11:53:32 +04:00
let language_config = loader.language_config_for_scope("source.rust").unwrap();
2021-02-24 11:07:39 +04:00
let highlight_config = language_config.highlight_config(&[]).unwrap();
let syntax = Syntax::new(&doc, highlight_config.clone());
let text = doc.slice(..);
2021-03-22 08:47:39 +04:00
let tab_width = 4;
2021-01-10 18:46:18 +04:00
for i in 0..doc.len_lines() {
2021-01-10 18:46:18 +04:00
let line = text.line(i);
2021-03-22 08:47:39 +04:00
let indent = indent_level_for_line(line, tab_width);
2021-01-10 18:46:18 +04:00
assert_eq!(
suggested_indent_for_line(&language_config, Some(&syntax), text, i, tab_width),
2021-01-10 18:46:18 +04:00
indent,
"line {}: {}",
i,
line
);
}
}
2020-10-09 11:58:43 +04:00
}