2022-03-30 19:08:07 +04:00
use std ::collections ::HashMap ;
use tree_sitter ::{ Query , QueryCursor , QueryPredicateArg } ;
2020-10-09 11:58:43 +04:00
use crate ::{
2021-07-29 23:10:59 +04:00
chars ::{ char_is_line_ending , char_is_whitespace } ,
2022-03-30 19:08:07 +04:00
syntax ::{ LanguageConfiguration , RopeProvider , Syntax } ,
2021-07-01 23:24:22 +04:00
tree_sitter ::Node ,
2021-07-29 23:10:59 +04:00
Rope , RopeSlice ,
2020-10-09 11:58:43 +04:00
} ;
2021-07-29 23:10:59 +04:00
/// Enum representing indentation style.
///
/// Only values 1-8 are valid for the `Spaces` variant.
#[ derive(Debug, Copy, Clone, PartialEq, Eq, Hash) ]
pub enum IndentStyle {
Tabs ,
Spaces ( u8 ) ,
}
impl IndentStyle {
/// Creates an `IndentStyle` from an indentation string.
///
/// For example, passing `" "` (four spaces) will create `IndentStyle::Spaces(4)`.
#[ allow(clippy::should_implement_trait) ]
#[ inline ]
pub fn from_str ( indent : & str ) -> Self {
// XXX: do we care about validating the input more than this? Probably not...?
debug_assert! ( ! indent . is_empty ( ) & & indent . len ( ) < = 8 ) ;
if indent . starts_with ( ' ' ) {
IndentStyle ::Spaces ( indent . len ( ) as u8 )
} else {
IndentStyle ::Tabs
}
}
#[ inline ]
pub fn as_str ( & self ) -> & 'static str {
match * self {
IndentStyle ::Tabs = > " \t " ,
IndentStyle ::Spaces ( 1 ) = > " " ,
IndentStyle ::Spaces ( 2 ) = > " " ,
IndentStyle ::Spaces ( 3 ) = > " " ,
IndentStyle ::Spaces ( 4 ) = > " " ,
IndentStyle ::Spaces ( 5 ) = > " " ,
IndentStyle ::Spaces ( 6 ) = > " " ,
IndentStyle ::Spaces ( 7 ) = > " " ,
IndentStyle ::Spaces ( 8 ) = > " " ,
// Unsupported indentation style. This should never happen,
// but just in case fall back to two spaces.
IndentStyle ::Spaces ( n ) = > {
debug_assert! ( n > 0 & & n < = 8 ) ; // Always triggers. `debug_panic!()` wanted.
" "
}
}
}
}
/// Attempts to detect the indentation style used in a document.
///
/// Returns the indentation style if the auto-detect confidence is
/// reasonably high, otherwise returns `None`.
pub fn auto_detect_indent_style ( document_text : & Rope ) -> Option < IndentStyle > {
// Build a histogram of the indentation *increases* between
// subsequent lines, ignoring lines that are all whitespace.
//
// Index 0 is for tabs, the rest are 1-8 spaces.
let histogram : [ usize ; 9 ] = {
let mut histogram = [ 0 ; 9 ] ;
let mut prev_line_is_tabs = false ;
let mut prev_line_leading_count = 0 usize ;
// Loop through the lines, checking for and recording indentation
// increases as we go.
' outer : for line in document_text . lines ( ) . take ( 1000 ) {
let mut c_iter = line . chars ( ) ;
// Is first character a tab or space?
let is_tabs = match c_iter . next ( ) {
Some ( '\t' ) = > true ,
Some ( ' ' ) = > false ,
// Ignore blank lines.
Some ( c ) if char_is_line_ending ( c ) = > continue ,
_ = > {
prev_line_is_tabs = false ;
prev_line_leading_count = 0 ;
continue ;
}
} ;
// Count the line's total leading tab/space characters.
let mut leading_count = 1 ;
let mut count_is_done = false ;
for c in c_iter {
match c {
'\t' if is_tabs & & ! count_is_done = > leading_count + = 1 ,
' ' if ! is_tabs & & ! count_is_done = > leading_count + = 1 ,
// We stop counting if we hit whitespace that doesn't
// qualify as indent or doesn't match the leading
// whitespace, but we don't exit the loop yet because
// we still want to determine if the line is blank.
c if char_is_whitespace ( c ) = > count_is_done = true ,
// Ignore blank lines.
c if char_is_line_ending ( c ) = > continue 'outer ,
_ = > break ,
}
// Bound the worst-case execution time for weird text files.
if leading_count > 256 {
continue 'outer ;
}
}
// If there was an increase in indentation over the previous
// line, update the histogram with that increase.
if ( prev_line_is_tabs = = is_tabs | | prev_line_leading_count = = 0 )
& & prev_line_leading_count < leading_count
{
if is_tabs {
histogram [ 0 ] + = 1 ;
} else {
let amount = leading_count - prev_line_leading_count ;
if amount < = 8 {
histogram [ amount ] + = 1 ;
}
}
}
// Store this line's leading whitespace info for use with
// the next line.
prev_line_is_tabs = is_tabs ;
prev_line_leading_count = leading_count ;
}
// Give more weight to tabs, because their presence is a very
// strong indicator.
histogram [ 0 ] * = 2 ;
histogram
} ;
// Find the most frequent indent, its frequency, and the frequency of
// the next-most frequent indent.
let indent = histogram
. iter ( )
. enumerate ( )
. max_by_key ( | kv | kv . 1 )
. unwrap ( )
. 0 ;
let indent_freq = histogram [ indent ] ;
let indent_freq_2 = * histogram
. iter ( )
. enumerate ( )
. filter ( | kv | kv . 0 ! = indent )
. map ( | kv | kv . 1 )
. max ( )
. unwrap ( ) ;
// Return the the auto-detected result if we're confident enough in its
// accuracy, based on some heuristics.
if indent_freq > = 1 & & ( indent_freq_2 as f64 / indent_freq as f64 ) < 0.66 {
Some ( match indent {
0 = > IndentStyle ::Tabs ,
_ = > IndentStyle ::Spaces ( indent as u8 ) ,
} )
} else {
None
}
}
2020-10-14 13:07:42 +04:00
/// To determine indentation of a newly inserted line, figure out the indentation at the last col
/// of the previous line.
2022-01-03 06:03:57 +04:00
pub fn indent_level_for_line ( line : RopeSlice , tab_width : usize ) -> usize {
2020-10-09 11:58:43 +04:00
let mut len = 0 ;
for ch in line . chars ( ) {
match ch {
2021-03-22 08:47:39 +04:00
'\t' = > len + = tab_width ,
2020-10-09 11:58:43 +04:00
' ' = > len + = 1 ,
_ = > break ,
}
}
2021-03-22 08:47:39 +04:00
len / tab_width
2020-10-09 11:58:43 +04:00
}
2022-03-30 19:08:07 +04:00
/// Computes for node and all ancestors whether they are the first node on their line.
/// The first entry in the return value represents the root node, the last one the node itself
fn get_first_in_line ( mut node : Node , byte_pos : usize , new_line : bool ) -> Vec < bool > {
let mut first_in_line = Vec ::new ( ) ;
loop {
if let Some ( prev ) = node . prev_sibling ( ) {
// If we insert a new line, the first node at/after the cursor is considered to be the first in its line
let first = prev . end_position ( ) . row ! = node . start_position ( ) . row
| | ( new_line & & node . start_byte ( ) > = byte_pos & & prev . start_byte ( ) < byte_pos ) ;
first_in_line . push ( Some ( first ) ) ;
} else {
// Nodes that have no previous siblings are first in their line if and only if their parent is
// (which we don't know yet)
first_in_line . push ( None ) ;
}
if let Some ( parent ) = node . parent ( ) {
node = parent ;
2020-10-09 11:58:43 +04:00
} else {
break ;
}
}
2022-03-30 19:08:07 +04:00
let mut result = Vec ::with_capacity ( first_in_line . len ( ) ) ;
let mut parent_is_first = true ; // The root node is by definition the first node in its line
for first in first_in_line . into_iter ( ) . rev ( ) {
if let Some ( first ) = first {
result . push ( first ) ;
parent_is_first = first ;
} else {
result . push ( parent_is_first ) ;
}
}
result
2020-10-09 11:58:43 +04:00
}
2022-03-30 19:08:07 +04:00
/// The total indent for some line of code.
/// This is usually constructed in one of 2 ways:
/// - Successively add indent captures to get the (added) indent from a single line
/// - Successively add the indent results for each line
#[ derive(Default) ]
struct Indentation {
/// The total indent (the number of indent levels) is defined as max(0, indent-outdent).
/// The string that this results in depends on the indent style (spaces or tabs, etc.)
indent : usize ,
outdent : usize ,
}
impl Indentation {
/// Add some other [IndentResult] to this.
/// The added indent should be the total added indent from one line
fn add_line ( & mut self , added : & Indentation ) {
if added . indent > 0 & & added . outdent = = 0 {
self . indent + = 1 ;
} else if added . outdent > 0 & & added . indent = = 0 {
self . outdent + = 1 ;
}
}
/// Add an indent capture to this indent.
/// All the captures that are added in this way should be on the same line.
fn add_capture ( & mut self , added : IndentCaptureType ) {
match added {
IndentCaptureType ::Indent = > {
self . indent = 1 ;
}
IndentCaptureType ::Outdent = > {
self . outdent = 1 ;
}
}
}
fn as_string ( & self , indent_style : & IndentStyle ) -> String {
let indent_level = if self . indent > = self . outdent {
self . indent - self . outdent
} else {
log ::warn! ( " Encountered more outdent than indent nodes while calculating indentation: {} outdent, {} indent " , self . outdent , self . indent ) ;
0
} ;
indent_style . as_str ( ) . repeat ( indent_level )
}
}
2020-10-14 13:07:42 +04:00
2022-03-30 19:08:07 +04:00
/// An indent definition which corresponds to a capture from the indent query
struct IndentCapture {
capture_type : IndentCaptureType ,
scope : IndentScope ,
}
#[ derive(Clone, Copy) ]
enum IndentCaptureType {
Indent ,
Outdent ,
}
impl IndentCaptureType {
fn default_scope ( & self ) -> IndentScope {
match self {
IndentCaptureType ::Indent = > IndentScope ::Tail ,
IndentCaptureType ::Outdent = > IndentScope ::All ,
}
}
}
/// This defines which part of a node an [IndentCapture] applies to.
/// Each [IndentCaptureType] has a default scope, but the scope can be changed
/// with `#set!` property declarations.
#[ derive(Clone, Copy) ]
enum IndentScope {
/// The indent applies to the whole node
All ,
/// The indent applies to everything except for the first line of the node
Tail ,
}
2022-01-03 06:03:57 +04:00
2022-03-30 19:08:07 +04:00
/// Execute the indent query.
/// Returns for each node (identified by its id) a list of indent captures for that node.
fn query_indents (
query : & Query ,
syntax : & Syntax ,
cursor : & mut QueryCursor ,
text : RopeSlice ,
range : std ::ops ::Range < usize > ,
// Position of the (optional) newly inserted line break.
// Given as (line, byte_pos)
new_line_break : Option < ( usize , usize ) > ,
) -> HashMap < usize , Vec < IndentCapture > > {
let mut indent_captures : HashMap < usize , Vec < IndentCapture > > = HashMap ::new ( ) ;
cursor . set_byte_range ( range ) ;
// Iterate over all captures from the query
for m in cursor . matches ( query , syntax . tree ( ) . root_node ( ) , RopeProvider ( text ) ) {
// Skip matches where not all custom predicates are fulfilled
if ! query . general_predicates ( m . pattern_index ) . iter ( ) . all ( | pred | {
match pred . operator . as_ref ( ) {
" not-kind-eq? " = > match ( pred . args . get ( 0 ) , pred . args . get ( 1 ) ) {
(
Some ( QueryPredicateArg ::Capture ( capture_idx ) ) ,
Some ( QueryPredicateArg ::String ( kind ) ) ,
) = > {
let node = m . nodes_for_capture_index ( * capture_idx ) . next ( ) ;
match node {
Some ( node ) = > node . kind ( ) ! = kind . as_ref ( ) ,
_ = > true ,
}
}
_ = > {
panic! ( " Invalid indent query: Arguments to \" not-kind-eq? \" must be a capture and a string " ) ;
}
} ,
" same-line? " | " not-same-line? " = > {
match ( pred . args . get ( 0 ) , pred . args . get ( 1 ) ) {
(
Some ( QueryPredicateArg ::Capture ( capt1 ) ) ,
Some ( QueryPredicateArg ::Capture ( capt2 ) )
) = > {
let get_line_num = | node : Node | {
let mut node_line = node . start_position ( ) . row ;
// Adjust for the new line that will be inserted
if let Some ( ( line , byte ) ) = new_line_break {
if node_line = = line & & node . start_byte ( ) > = byte {
node_line + = 1 ;
}
}
node_line
} ;
let n1 = m . nodes_for_capture_index ( * capt1 ) . next ( ) ;
let n2 = m . nodes_for_capture_index ( * capt2 ) . next ( ) ;
match ( n1 , n2 ) {
( Some ( n1 ) , Some ( n2 ) ) = > {
let same_line = get_line_num ( n1 ) = = get_line_num ( n2 ) ;
same_line = = ( pred . operator . as_ref ( ) = = " same-line? " )
}
_ = > true ,
}
}
_ = > {
panic! ( " Invalid indent query: Arguments to \" {} \" must be 2 captures " , pred . operator ) ;
}
}
}
_ = > {
panic! (
" Invalid indent query: Unknown predicate ( \" {} \" ) " ,
pred . operator
) ;
}
2022-01-03 06:03:57 +04:00
}
2022-03-30 19:08:07 +04:00
} ) {
continue ;
2021-01-08 11:15:12 +04:00
}
2022-03-30 19:08:07 +04:00
for capture in m . captures {
let capture_type = query . capture_names ( ) [ capture . index as usize ] . as_str ( ) ;
let capture_type = match capture_type {
" indent " = > IndentCaptureType ::Indent ,
" outdent " = > IndentCaptureType ::Outdent ,
_ = > {
// Ignore any unknown captures (these may be needed for predicates such as #match?)
continue ;
}
} ;
let scope = capture_type . default_scope ( ) ;
let mut indent_capture = IndentCapture {
capture_type ,
scope ,
} ;
// Apply additional settings for this capture
for property in query . property_settings ( m . pattern_index ) {
match property . key . as_ref ( ) {
" scope " = > {
indent_capture . scope = match property . value . as_deref ( ) {
Some ( " all " ) = > IndentScope ::All ,
Some ( " tail " ) = > IndentScope ::Tail ,
Some ( s ) = > {
panic! ( " Invalid indent query: Unknown value for \" scope \" property ( \" {} \" ) " , s ) ;
}
None = > {
panic! (
" Invalid indent query: Missing value for \" scope \" property "
) ;
}
}
}
_ = > {
panic! (
" Invalid indent query: Unknown property \" {} \" " ,
property . key
) ;
}
}
}
indent_captures
. entry ( capture . node . id ( ) )
// Most entries only need to contain a single IndentCapture
. or_insert_with ( | | Vec ::with_capacity ( 1 ) )
. push ( indent_capture ) ;
2022-01-03 06:03:57 +04:00
}
2022-03-30 19:08:07 +04:00
}
indent_captures
}
/// Use the syntax tree to determine the indentation for a given position.
/// This can be used in 2 ways:
///
/// - To get the correct indentation for an existing line (new_line=false), not necessarily equal to the current indentation.
/// - In this case, pos should be inside the first tree-sitter node on that line.
/// In most cases, this can just be the first non-whitespace on that line.
/// - To get the indentation for a new line (new_line=true). This behaves like the first usecase if the part of the current line
/// after pos were moved to a new line.
///
/// The indentation is determined by traversing all the tree-sitter nodes containing the position.
/// Each of these nodes produces some [AddedIndent] for:
///
/// - The line of the (beginning of the) node. This is defined by the scope `all` if this is the first node on its line.
/// - The line after the node. This is defined by:
/// - The scope `tail`.
/// - The scope `all` if this node is not the first node on its line.
/// Intuitively, `all` applies to everything contained in this node while `tail` applies to everything except for the first line of the node.
/// The indents from different nodes for the same line are then combined.
/// The [IndentResult] is simply the sum of the [AddedIndent] for all lines.
///
/// Specifying which line exactly an [AddedIndent] applies to is important because indents on the same line combine differently than indents on different lines:
/// ```ignore
/// some_function(|| {
/// // Both the function parameters as well as the contained block should be indented.
/// // Because they are on the same line, this only yields one indent level
/// });
/// ```
///
/// ```ignore
/// some_function(
2022-07-06 06:49:54 +04:00
/// param1,
2022-03-30 19:08:07 +04:00
/// || {
/// // Here we get 2 indent levels because the 'parameters' and the 'block' node begin on different lines
/// },
/// );
/// ```
pub fn treesitter_indent_for_pos (
query : & Query ,
syntax : & Syntax ,
indent_style : & IndentStyle ,
text : RopeSlice ,
line : usize ,
pos : usize ,
new_line : bool ,
) -> Option < String > {
let byte_pos = text . char_to_byte ( pos ) ;
let mut node = syntax
. tree ( )
. root_node ( )
. descendant_for_byte_range ( byte_pos , byte_pos ) ? ;
let mut first_in_line = get_first_in_line ( node , byte_pos , new_line ) ;
let new_line_break = if new_line {
Some ( ( line , byte_pos ) )
} else {
None
} ;
let query_result = crate ::syntax ::PARSER . with ( | ts_parser | {
let mut ts_parser = ts_parser . borrow_mut ( ) ;
let mut cursor = ts_parser . cursors . pop ( ) . unwrap_or_else ( QueryCursor ::new ) ;
let query_result = query_indents (
query ,
syntax ,
& mut cursor ,
text ,
byte_pos .. byte_pos + 1 ,
new_line_break ,
) ;
ts_parser . cursors . push ( cursor ) ;
query_result
} ) ;
let mut result = Indentation ::default ( ) ;
// We always keep track of all the indent changes on one line, in order to only indent once
// even if there are multiple "indent" nodes on the same line
let mut indent_for_line = Indentation ::default ( ) ;
let mut indent_for_line_below = Indentation ::default ( ) ;
loop {
// This can safely be unwrapped because `first_in_line` contains
// one entry for each ancestor of the node (which is what we iterate over)
let is_first = * first_in_line . last ( ) . unwrap ( ) ;
// Apply all indent definitions for this node
if let Some ( definitions ) = query_result . get ( & node . id ( ) ) {
for definition in definitions {
match definition . scope {
IndentScope ::All = > {
if is_first {
indent_for_line . add_capture ( definition . capture_type ) ;
} else {
indent_for_line_below . add_capture ( definition . capture_type ) ;
}
}
IndentScope ::Tail = > {
indent_for_line_below . add_capture ( definition . capture_type ) ;
}
}
}
2021-01-08 11:15:12 +04:00
}
2022-01-03 06:03:57 +04:00
if let Some ( parent ) = node . parent ( ) {
2022-03-30 19:08:07 +04:00
let mut node_line = node . start_position ( ) . row ;
let mut parent_line = parent . start_position ( ) . row ;
if node_line = = line & & new_line {
// Also consider the line that will be inserted
if node . start_byte ( ) > = byte_pos {
node_line + = 1 ;
}
if parent . start_byte ( ) > = byte_pos {
parent_line + = 1 ;
}
} ;
if node_line ! = parent_line {
if node_line < line + ( new_line as usize ) {
// Don't add indent for the line below the line of the query
result . add_line ( & indent_for_line_below ) ;
}
if node_line = = parent_line + 1 {
indent_for_line_below = indent_for_line ;
} else {
result . add_line ( & indent_for_line ) ;
indent_for_line_below = Indentation ::default ( ) ;
}
indent_for_line = Indentation ::default ( ) ;
}
2022-01-03 06:03:57 +04:00
node = parent ;
2022-03-30 19:08:07 +04:00
first_in_line . pop ( ) ;
2022-01-03 06:03:57 +04:00
} else {
2022-03-30 19:08:07 +04:00
result . add_line ( & indent_for_line_below ) ;
result . add_line ( & indent_for_line ) ;
2022-01-03 06:03:57 +04:00
break ;
}
}
2022-03-30 19:08:07 +04:00
Some ( result . as_string ( indent_style ) )
2020-10-09 11:58:43 +04:00
}
2022-03-30 19:08:07 +04:00
/// Returns the indentation for a new line.
/// This is done either using treesitter, or if that's not available by copying the indentation from the current line
#[ allow(clippy::too_many_arguments) ]
pub fn indent_for_newline (
2021-05-14 14:21:46 +04:00
language_config : Option < & LanguageConfiguration > ,
2021-01-08 11:15:12 +04:00
syntax : Option < & Syntax > ,
2022-03-30 19:08:07 +04:00
indent_style : & IndentStyle ,
tab_width : usize ,
2021-03-18 08:45:57 +04:00
text : RopeSlice ,
2022-03-30 19:08:07 +04:00
line_before : usize ,
line_before_end_pos : usize ,
current_line : usize ,
) -> String {
2021-05-14 14:21:46 +04:00
if let ( Some ( query ) , Some ( syntax ) ) = (
language_config . and_then ( | config | config . indent_query ( ) ) ,
syntax ,
) {
2022-03-30 19:08:07 +04:00
if let Some ( indent ) = treesitter_indent_for_pos (
query ,
syntax ,
indent_style ,
text ,
line_before ,
line_before_end_pos ,
true ,
) {
return indent ;
} ;
2020-10-09 11:58:43 +04:00
}
2022-03-30 19:08:07 +04:00
let indent_level = indent_level_for_line ( text . line ( current_line ) , tab_width ) ;
indent_style . as_str ( ) . repeat ( indent_level )
2020-10-09 11:58:43 +04:00
}
2021-08-13 08:15:36 +04:00
pub fn get_scopes ( syntax : Option < & Syntax > , text : RopeSlice , pos : usize ) -> Vec < & 'static str > {
let mut scopes = Vec ::new ( ) ;
if let Some ( syntax ) = syntax {
2021-09-02 10:11:27 +04:00
let pos = text . char_to_byte ( pos ) ;
let mut node = match syntax
. tree ( )
. root_node ( )
. descendant_for_byte_range ( pos , pos )
{
2021-08-13 08:15:36 +04:00
Some ( node ) = > node ,
None = > return scopes ,
} ;
scopes . push ( node . kind ( ) ) ;
while let Some ( parent ) = node . parent ( ) {
2021-09-02 10:11:27 +04:00
scopes . push ( parent . kind ( ) ) ;
node = parent ;
2021-08-13 08:15:36 +04:00
}
}
scopes . reverse ( ) ;
2021-08-13 08:16:31 +04:00
scopes
2021-08-13 08:15:36 +04:00
}
2020-10-09 11:58:43 +04:00
#[ cfg(test) ]
mod test {
use super ::* ;
2021-07-01 23:24:22 +04:00
use crate ::Rope ;
2020-10-09 11:58:43 +04:00
#[ test ]
2020-10-14 13:07:42 +04:00
fn test_indent_level ( ) {
2021-03-22 08:47:39 +04:00
let tab_width = 4 ;
2020-10-09 11:58:43 +04:00
let line = Rope ::from ( " fn new " ) ; // 8 spaces
2021-03-22 08:47:39 +04:00
assert_eq! ( indent_level_for_line ( line . slice ( .. ) , tab_width ) , 2 ) ;
2020-10-09 11:58:43 +04:00
let line = Rope ::from ( " \t \t \t fn new " ) ; // 3 tabs
2021-03-22 08:47:39 +04:00
assert_eq! ( indent_level_for_line ( line . slice ( .. ) , tab_width ) , 3 ) ;
2020-10-09 11:58:43 +04:00
// mixed indentation
let line = Rope ::from ( " \t \t fn new " ) ; // 1 tab, 4 spaces, tab
2021-03-22 08:47:39 +04:00
assert_eq! ( indent_level_for_line ( line . slice ( .. ) , tab_width ) , 3 ) ;
2020-10-09 11:58:43 +04:00
}
}