helix-mirror/helix-core/src/state.rs

400 lines
12 KiB
Rust
Raw Normal View History

use crate::graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary, RopeGraphemes};
use crate::syntax::LOADER;
use crate::{ChangeSet, Diagnostic, Position, Range, Rope, RopeSlice, Selection, Syntax};
use anyhow::Error;
/// A state represents the current editor state of a single buffer.
2020-10-23 06:36:46 +04:00
#[derive(Clone)]
pub struct State {
// TODO: fields should be private but we need to refactor commands.rs first
pub doc: Rope,
pub selection: Selection,
}
2020-06-01 12:42:28 +04:00
#[derive(Copy, Clone, PartialEq, Eq)]
pub enum Direction {
Forward,
Backward,
}
#[derive(Copy, Clone, PartialEq, Eq)]
pub enum Granularity {
Character,
Line,
}
impl State {
2020-05-28 09:45:44 +04:00
#[must_use]
pub fn new(doc: Rope) -> Self {
Self {
2020-06-01 12:42:28 +04:00
doc,
selection: Selection::single(0, 0),
}
}
// TODO: doc/selection accessors
// TODO: be able to take either Rope or RopeSlice
#[inline]
pub fn doc(&self) -> &Rope {
&self.doc
}
#[inline]
pub fn selection(&self) -> &Selection {
&self.selection
}
// pub fn doc<R>(&self, range: R) -> RopeSlice
// where
// R: std::ops::RangeBounds<usize>,
// {
// self.doc.slice(range)
// }
// update/transact:
// update(desc) => transaction ? transaction.doc() for applied doc
// transaction.apply(doc)
// doc.transact(fn -> ... end)
// replaceSelection (transaction that replaces selection)
// changeByRange
// changes
// slice
//
// getters:
// tabSize
// indentUnit
// languageDataAt()
//
// config:
// indentation
// tabSize
// lineUnit
// syntax
// foldable
// changeFilter/transactionFilter
2020-06-01 12:42:28 +04:00
2020-09-09 10:48:25 +04:00
// TODO: move that accepts a boundary matcher fn/list, we keep incrementing until we hit
// a boundary
// TODO: edits, does each keypress trigger a full command? I guess it's adding to the same
// transaction
// There should be three pieces of the state: current transaction, the original doc, "preview"
// of the new state.
// 1. apply the newly generated keypress as a transaction
// 2. compose onto a ongoing transaction
// 3. on insert mode leave, that transaction gets stored into undo history
pub fn move_range(
2020-06-01 12:42:28 +04:00
&self,
range: Range,
2020-06-01 12:42:28 +04:00
dir: Direction,
granularity: Granularity,
2020-06-07 19:15:39 +04:00
count: usize,
extend: bool,
) -> Range {
let text = &self.doc;
let pos = range.head;
2020-06-01 12:42:28 +04:00
match (dir, granularity) {
(Direction::Backward, Granularity::Character) => {
2020-09-28 20:11:17 +04:00
// Clamp to line
let line = text.char_to_line(pos);
let start = text.line_to_char(line);
let pos = std::cmp::max(
nth_prev_grapheme_boundary(text.slice(..), pos, count),
2020-09-28 20:11:17 +04:00
start,
);
Range::new(if extend { range.anchor } else { pos }, pos)
2020-06-01 12:42:28 +04:00
}
(Direction::Forward, Granularity::Character) => {
2020-09-28 20:11:17 +04:00
// Clamp to line
let line = text.char_to_line(pos);
2020-09-29 12:49:19 +04:00
// Line end is pos at the start of next line - 1
// subtract another 1 because the line ends with \n
let end = text.line_to_char(line + 1).saturating_sub(2);
let pos =
std::cmp::min(nth_next_grapheme_boundary(text.slice(..), pos, count), end);
Range::new(if extend { range.anchor } else { pos }, pos)
2020-09-24 14:16:35 +04:00
}
(_, Granularity::Line) => move_vertically(text.slice(..), dir, range, count, extend),
2020-06-01 12:42:28 +04:00
}
}
pub fn move_next_word_start(slice: RopeSlice, mut pos: usize, count: usize) -> usize {
2020-09-24 14:16:35 +04:00
// TODO: confirm it's fine without using graphemes, I think it should be
for _ in 0..count {
let ch = slice.char(pos);
let next = slice.char(pos.saturating_add(1));
if categorize(ch) != categorize(next) {
pos += 1;
}
2020-09-24 14:16:35 +04:00
// refetch
let ch = slice.char(pos);
2020-09-24 14:16:35 +04:00
if is_word(ch) {
skip_over_next(slice, &mut pos, is_word);
} else if ch.is_ascii_punctuation() {
skip_over_next(slice, &mut pos, |ch| ch.is_ascii_punctuation());
}
2020-09-24 14:16:35 +04:00
// TODO: don't include newline?
skip_over_next(slice, &mut pos, |ch| ch.is_ascii_whitespace());
}
2020-09-24 14:16:35 +04:00
pos
}
pub fn move_prev_word_start(slice: RopeSlice, mut pos: usize, count: usize) -> usize {
2020-09-24 14:16:35 +04:00
// TODO: confirm it's fine without using graphemes, I think it should be
for _ in 0..count {
let ch = slice.char(pos);
let prev = slice.char(pos.saturating_sub(1)); // TODO: just return original pos if at start
2020-09-24 14:16:35 +04:00
if categorize(ch) != categorize(prev) {
pos -= 1;
}
2020-09-24 14:16:35 +04:00
// TODO: skip while eol
2020-09-24 14:16:35 +04:00
// TODO: don't include newline?
skip_over_prev(slice, &mut pos, |ch| ch.is_ascii_whitespace());
2020-09-24 14:16:35 +04:00
// refetch
let ch = slice.char(pos);
2020-09-24 14:16:35 +04:00
if is_word(ch) {
skip_over_prev(slice, &mut pos, is_word);
} else if ch.is_ascii_punctuation() {
skip_over_prev(slice, &mut pos, |ch| ch.is_ascii_punctuation());
}
pos = pos.saturating_add(1)
2020-09-24 14:16:35 +04:00
}
pos
2020-09-24 14:16:35 +04:00
}
pub fn move_next_word_end(slice: RopeSlice, mut pos: usize, count: usize) -> usize {
for _ in 0..count {
// TODO: confirm it's fine without using graphemes, I think it should be
let ch = slice.char(pos);
let next = slice.char(pos.saturating_add(1));
if categorize(ch) != categorize(next) {
pos += 1;
}
2020-09-24 14:16:35 +04:00
// TODO: don't include newline?
skip_over_next(slice, &mut pos, |ch| ch.is_ascii_whitespace());
2020-09-24 14:16:35 +04:00
// refetch
let ch = slice.char(pos);
2020-09-24 14:16:35 +04:00
if is_word(ch) {
skip_over_next(slice, &mut pos, is_word);
} else if ch.is_ascii_punctuation() {
skip_over_next(slice, &mut pos, |ch| ch.is_ascii_punctuation());
}
pos = pos.saturating_sub(1)
2020-09-24 14:16:35 +04:00
}
pos
2020-09-24 14:16:35 +04:00
}
2020-06-01 12:42:28 +04:00
pub fn move_selection(
&self,
dir: Direction,
granularity: Granularity,
2020-06-07 19:15:39 +04:00
count: usize,
2020-06-01 12:42:28 +04:00
) -> Selection {
self.selection
.transform(|range| self.move_range(range, dir, granularity, count, false))
2020-06-01 12:42:28 +04:00
}
pub fn extend_selection(
&self,
dir: Direction,
granularity: Granularity,
2020-06-07 19:15:39 +04:00
count: usize,
2020-06-01 12:42:28 +04:00
) -> Selection {
self.selection
.transform(|range| self.move_range(range, dir, granularity, count, true))
2020-06-01 12:42:28 +04:00
}
}
/// Convert a character index to (line, column) coordinates.
pub fn coords_at_pos(text: RopeSlice, pos: usize) -> Position {
let line = text.char_to_line(pos);
let line_start = text.line_to_char(line);
let col = RopeGraphemes::new(text.slice(line_start..pos)).count();
2020-09-17 09:57:49 +04:00
Position::new(line, col)
}
/// Convert (line, column) coordinates to a character index.
pub fn pos_at_coords(text: RopeSlice, coords: Position) -> usize {
2020-09-17 09:57:49 +04:00
let Position { row, col } = coords;
let line_start = text.line_to_char(row);
// line_start + col
nth_next_grapheme_boundary(text, line_start, col)
}
fn move_vertically(
text: RopeSlice,
dir: Direction,
range: Range,
count: usize,
extend: bool,
) -> Range {
let Position { row, col } = coords_at_pos(text, range.head);
let horiz = range.horiz.unwrap_or(col as u32);
2020-06-07 19:08:21 +04:00
let new_line = match dir {
2020-09-17 09:57:49 +04:00
Direction::Backward => row.saturating_sub(count),
Direction::Forward => std::cmp::min(row.saturating_add(count), text.len_lines() - 1),
2020-06-07 19:08:21 +04:00
};
2020-09-04 13:18:59 +04:00
// convert to 0-indexed, subtract another 1 because len_chars() counts \n
let new_line_len = text.line(new_line).len_chars().saturating_sub(2);
2020-06-07 19:08:21 +04:00
let new_col = std::cmp::min(horiz as usize, new_line_len);
let pos = pos_at_coords(text, Position::new(new_line, new_col));
2020-06-07 19:08:21 +04:00
let mut range = Range::new(if extend { range.anchor } else { pos }, pos);
range.horiz = Some(horiz);
range
2020-06-07 19:08:21 +04:00
}
2020-09-24 14:16:35 +04:00
// used for by-word movement
fn is_word(ch: char) -> bool {
ch.is_alphanumeric() || ch == '_'
}
#[derive(Debug, Eq, PartialEq)]
enum Category {
Whitespace,
EOL,
Word,
Punctuation,
}
fn categorize(ch: char) -> Category {
if ch == '\n' {
Category::EOL
} else if ch.is_ascii_whitespace() {
Category::Whitespace
} else if ch.is_ascii_punctuation() {
Category::Punctuation
} else if ch.is_ascii_alphanumeric() {
Category::Word
} else {
unreachable!()
}
}
2021-02-26 12:21:59 +04:00
#[inline]
pub fn skip_over_next<F>(slice: RopeSlice, pos: &mut usize, fun: F)
2020-09-24 14:16:35 +04:00
where
F: Fn(char) -> bool,
{
let mut chars = slice.chars_at(*pos);
2020-09-29 13:02:27 +04:00
for ch in chars {
2020-09-24 14:16:35 +04:00
if !fun(ch) {
break;
}
*pos += 1;
}
}
2021-02-26 12:21:59 +04:00
#[inline]
pub fn skip_over_prev<F>(slice: RopeSlice, pos: &mut usize, fun: F)
2020-09-24 14:16:35 +04:00
where
F: Fn(char) -> bool,
{
// need to +1 so that prev() includes current char
let mut chars = slice.chars_at(*pos + 1);
while let Some(ch) = chars.prev() {
if !fun(ch) {
break;
}
*pos -= 1;
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_coords_at_pos() {
let text = Rope::from("ḧëḷḷö\nẅöṛḷḋ");
let slice = text.slice(..);
// assert_eq!(coords_at_pos(slice, 0), (0, 0).into());
// assert_eq!(coords_at_pos(slice, 5), (0, 5).into()); // position on \n
// assert_eq!(coords_at_pos(slice, 6), (1, 0).into()); // position on w
// assert_eq!(coords_at_pos(slice, 7), (1, 1).into()); // position on o
// assert_eq!(coords_at_pos(slice, 10), (1, 4).into()); // position on d
// test with grapheme clusters
let text = Rope::from("a̐éö̲\r\n");
let slice = text.slice(..);
assert_eq!(coords_at_pos(slice, 0), (0, 0).into());
assert_eq!(coords_at_pos(slice, 2), (0, 1).into());
assert_eq!(coords_at_pos(slice, 4), (0, 2).into());
assert_eq!(coords_at_pos(slice, 7), (0, 3).into());
let text = Rope::from("किमपि");
let slice = text.slice(..);
assert_eq!(coords_at_pos(slice, 0), (0, 0).into());
assert_eq!(coords_at_pos(slice, 2), (0, 1).into());
assert_eq!(coords_at_pos(slice, 3), (0, 2).into());
assert_eq!(coords_at_pos(slice, 5), (0, 3).into());
}
#[test]
fn test_pos_at_coords() {
let text = Rope::from("ḧëḷḷö\nẅöṛḷḋ");
let slice = text.slice(..);
assert_eq!(pos_at_coords(slice, (0, 0).into()), 0);
assert_eq!(pos_at_coords(slice, (0, 5).into()), 5); // position on \n
assert_eq!(pos_at_coords(slice, (1, 0).into()), 6); // position on w
assert_eq!(pos_at_coords(slice, (1, 1).into()), 7); // position on o
assert_eq!(pos_at_coords(slice, (1, 4).into()), 10); // position on d
// test with grapheme clusters
let text = Rope::from("a̐éö̲\r\n");
let slice = text.slice(..);
assert_eq!(pos_at_coords(slice, (0, 0).into()), 0);
assert_eq!(pos_at_coords(slice, (0, 1).into()), 2);
assert_eq!(pos_at_coords(slice, (0, 2).into()), 4);
assert_eq!(pos_at_coords(slice, (0, 3).into()), 7); // \r\n is one char here
assert_eq!(pos_at_coords(slice, (0, 4).into()), 9);
let text = Rope::from("किमपि");
// 2 - 1 - 2 codepoints
// TODO: delete handling as per https://news.ycombinator.com/item?id=20058454
let slice = text.slice(..);
assert_eq!(pos_at_coords(slice, (0, 0).into()), 0);
assert_eq!(pos_at_coords(slice, (0, 1).into()), 2);
assert_eq!(pos_at_coords(slice, (0, 2).into()), 3);
assert_eq!(pos_at_coords(slice, (0, 3).into()), 5); // eol
}
2020-06-07 19:08:21 +04:00
#[test]
fn test_vertical_move() {
let text = Rope::from("abcd\nefg\nwrs");
let slice = text.slice(..);
let pos = pos_at_coords(slice, (0, 4).into());
2020-06-07 19:08:21 +04:00
let range = Range::new(pos, pos);
2020-06-07 19:08:21 +04:00
assert_eq!(
coords_at_pos(
slice,
move_vertically(slice, Direction::Forward, range, 1, false).head
),
2020-09-17 09:57:49 +04:00
(1, 2).into()
2020-06-07 19:08:21 +04:00
);
}
}