diff --git a/Cargo.lock b/Cargo.lock index 24c277e12..a1de71385 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -331,6 +331,7 @@ dependencies = [ "bitflags", "cassowary", "crossterm", + "helix-core", "serde", "unicode-segmentation", "unicode-width", diff --git a/helix-core/src/auto_pairs.rs b/helix-core/src/auto_pairs.rs index 74e25ac90..746f201a4 100644 --- a/helix-core/src/auto_pairs.rs +++ b/helix-core/src/auto_pairs.rs @@ -12,7 +12,7 @@ ('`', '`'), ]; -const CLOSE_BEFORE: &str = ")]}'\":;> \n"; // includes space and newline +const CLOSE_BEFORE: &str = ")]}'\":;> \n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; // includes space and newlines // insert hook: // Fn(doc, selection, char) => Option diff --git a/helix-core/src/chars.rs b/helix-core/src/chars.rs index 243a13743..24133dd33 100644 --- a/helix-core/src/chars.rs +++ b/helix-core/src/chars.rs @@ -1,25 +1,44 @@ -/// Determine whether a character is a line break. -pub fn char_is_linebreak(c: char) -> bool { - matches!( - c, - '\u{000A}' | // LineFeed - '\u{000B}' | // VerticalTab - '\u{000C}' | // FormFeed - '\u{000D}' | // CarriageReturn - '\u{0085}' | // NextLine - '\u{2028}' | // Line Separator - '\u{2029}' // ParagraphSeparator - ) +use crate::LineEnding; + +#[derive(Debug, Eq, PartialEq)] +pub enum CharCategory { + Whitespace, + Eol, + Word, + Punctuation, + Unknown, +} + +#[inline] +pub fn categorize_char(ch: char) -> CharCategory { + if char_is_line_ending(ch) { + CharCategory::Eol + } else if ch.is_whitespace() { + CharCategory::Whitespace + } else if char_is_word(ch) { + CharCategory::Word + } else if char_is_punctuation(ch) { + CharCategory::Punctuation + } else { + CharCategory::Unknown + } +} + +/// Determine whether a character is a line ending. +#[inline] +pub fn char_is_line_ending(ch: char) -> bool { + LineEnding::from_char(ch).is_some() } /// Determine whether a character qualifies as (non-line-break) /// whitespace. -pub fn char_is_whitespace(c: char) -> bool { +#[inline] +pub fn char_is_whitespace(ch: char) -> bool { // TODO: this is a naive binary categorization of whitespace // characters. For display, word wrapping, etc. we'll need a better // categorization based on e.g. breaking vs non-breaking spaces // and whether they're zero-width or not. - match c { + match ch { //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace) '\u{0009}' | // Character Tabulation '\u{0020}' | // Space @@ -34,8 +53,81 @@ pub fn char_is_whitespace(c: char) -> bool { // En Quad, Em Quad, En Space, Em Space, Three-per-em Space, // Four-per-em Space, Six-per-em Space, Figure Space, // Punctuation Space, Thin Space, Hair Space, Zero Width Space. - c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true, + ch if ('\u{2000}' ..= '\u{200B}').contains(&ch) => true, _ => false, } } + +#[inline] +pub fn char_is_punctuation(ch: char) -> bool { + use unicode_general_category::{get_general_category, GeneralCategory}; + + matches!( + get_general_category(ch), + GeneralCategory::OtherPunctuation + | GeneralCategory::OpenPunctuation + | GeneralCategory::ClosePunctuation + | GeneralCategory::InitialPunctuation + | GeneralCategory::FinalPunctuation + | GeneralCategory::ConnectorPunctuation + | GeneralCategory::DashPunctuation + | GeneralCategory::MathSymbol + | GeneralCategory::CurrencySymbol + | GeneralCategory::ModifierSymbol + ) +} + +#[inline] +pub fn char_is_word(ch: char) -> bool { + ch.is_alphanumeric() || ch == '_' +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_categorize() { + const EOL_TEST_CASE: &'static str = "\n\r\u{000B}\u{000C}\u{0085}\u{2028}\u{2029}"; + const WORD_TEST_CASE: &'static str = + "_hello_world_あいうえおー12345678901234567890"; + const PUNCTUATION_TEST_CASE: &'static str = + "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~"; + const WHITESPACE_TEST_CASE: &'static str = "      "; + + for ch in EOL_TEST_CASE.chars() { + assert_eq!(CharCategory::Eol, categorize_char(ch)); + } + + for ch in WHITESPACE_TEST_CASE.chars() { + assert_eq!( + CharCategory::Whitespace, + categorize_char(ch), + "Testing '{}', but got `{:?}` instead of `Category::Whitespace`", + ch, + categorize_char(ch) + ); + } + + for ch in WORD_TEST_CASE.chars() { + assert_eq!( + CharCategory::Word, + categorize_char(ch), + "Testing '{}', but got `{:?}` instead of `Category::Word`", + ch, + categorize_char(ch) + ); + } + + for ch in PUNCTUATION_TEST_CASE.chars() { + assert_eq!( + CharCategory::Punctuation, + categorize_char(ch), + "Testing '{}', but got `{:?}` instead of `Category::Punctuation`", + ch, + categorize_char(ch) + ); + } + } +} diff --git a/helix-core/src/lib.rs b/helix-core/src/lib.rs index e00e56be6..183b9f0a6 100644 --- a/helix-core/src/lib.rs +++ b/helix-core/src/lib.rs @@ -113,6 +113,6 @@ pub fn cache_dir() -> std::path::PathBuf { pub use state::State; pub use line_ending::{ - auto_detect_line_ending, get_line_ending, line_end, LineEnding, DEFAULT_LINE_ENDING, + auto_detect_line_ending, get_line_ending, line_end_char_index, LineEnding, DEFAULT_LINE_ENDING, }; pub use transaction::{Assoc, Change, ChangeSet, Operation, Transaction}; diff --git a/helix-core/src/line_ending.rs b/helix-core/src/line_ending.rs index 45e20c888..c4636c632 100644 --- a/helix-core/src/line_ending.rs +++ b/helix-core/src/line_ending.rs @@ -1,5 +1,10 @@ use crate::{Rope, RopeGraphemes, RopeSlice}; +#[cfg(target_os = "windows")] +pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf; +#[cfg(not(target_os = "windows"))] +pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF; + /// Represents one of the valid Unicode line endings. #[derive(PartialEq, Copy, Clone, Debug)] pub enum LineEnding { @@ -14,6 +19,7 @@ pub enum LineEnding { } impl LineEnding { + #[inline] pub fn len_chars(&self) -> usize { match self { Self::Crlf => 2, @@ -21,6 +27,7 @@ pub fn len_chars(&self) -> usize { } } + #[inline] pub fn as_str(&self) -> &'static str { match self { Self::Crlf => "\u{000D}\u{000A}", @@ -34,6 +41,22 @@ pub fn as_str(&self) -> &'static str { } } + #[inline] + pub fn from_char(ch: char) -> Option { + match ch { + '\u{000A}' => Some(LineEnding::LF), + '\u{000B}' => Some(LineEnding::VT), + '\u{000C}' => Some(LineEnding::FF), + '\u{000D}' => Some(LineEnding::CR), + '\u{0085}' => Some(LineEnding::Nel), + '\u{2028}' => Some(LineEnding::LS), + '\u{2029}' => Some(LineEnding::PS), + // Not a line ending + _ => None, + } + } + + #[inline] pub fn from_str(g: &str) -> Option { match g { "\u{000D}\u{000A}" => Some(LineEnding::Crlf), @@ -49,6 +72,7 @@ pub fn from_str(g: &str) -> Option { } } + #[inline] pub fn from_rope_slice(g: &RopeSlice) -> Option { if let Some(text) = g.as_str() { LineEnding::from_str(text) @@ -62,6 +86,11 @@ pub fn from_rope_slice(g: &RopeSlice) -> Option { } } +#[inline] +pub fn str_is_line_ending(s: &str) -> bool { + LineEnding::from_str(s).is_some() +} + /// Attempts to detect what line ending the passed document uses. pub fn auto_detect_line_ending(doc: &Rope) -> Option { // Return first matched line ending. Not all possible line endings @@ -96,19 +125,13 @@ pub fn get_line_ending(line: &RopeSlice) -> Option { } /// Returns the char index of the end of the given line, not including its line ending. -pub fn line_end(slice: &RopeSlice, line: usize) -> usize { - slice.line_to_char(line + 1).saturating_sub( - get_line_ending(&slice.line(line)) +pub fn line_end_char_index(slice: &RopeSlice, line: usize) -> usize { + slice.line_to_char(line + 1) + - get_line_ending(&slice.line(line)) .map(|le| le.len_chars()) - .unwrap_or(0), - ) + .unwrap_or(0) } -#[cfg(target_os = "windows")] -pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::Crlf; -#[cfg(not(target_os = "windows"))] -pub const DEFAULT_LINE_ENDING: LineEnding = LineEnding::LF; - #[cfg(test)] mod line_ending_tests { use super::*; @@ -150,11 +173,11 @@ fn test_autodetect() { fn test_rope_slice_to_line_ending() { let r = Rope::from_str("\r\n"); assert_eq!( - rope_slice_to_line_ending(&r.slice(1..2)), + LineEnding::from_rope_slice(&r.slice(1..2)), Some(LineEnding::LF) ); assert_eq!( - rope_slice_to_line_ending(&r.slice(0..2)), + LineEnding::from_rope_slice(&r.slice(0..2)), Some(LineEnding::Crlf) ); } diff --git a/helix-core/src/movement.rs b/helix-core/src/movement.rs index 7f47e6625..d0023e9f8 100644 --- a/helix-core/src/movement.rs +++ b/helix-core/src/movement.rs @@ -3,9 +3,13 @@ use ropey::iter::Chars; use crate::{ + chars::{ + categorize_char, char_is_line_ending, char_is_punctuation, char_is_whitespace, + char_is_word, CharCategory, + }, coords_at_pos, get_line_ending, graphemes::{nth_next_grapheme_boundary, nth_prev_grapheme_boundary}, - line_end, pos_at_coords, Position, Range, RopeSlice, + line_end_char_index, pos_at_coords, Position, Range, RopeSlice, }; #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -37,9 +41,8 @@ pub fn move_horizontally( nth_prev_grapheme_boundary(slice, pos, count).max(start) } Direction::Forward => { - // Line end is pos at the start of next line - 1 - let end = line_end(&slice, line); - nth_next_grapheme_boundary(slice, pos, count).min(end) + let end_char_idx = line_end_char_index(&slice, line); + nth_next_grapheme_boundary(slice, pos, count).min(end_char_idx) } }; let anchor = match behaviour { @@ -68,8 +71,11 @@ pub fn move_vertically( ), }; - // convert to 0-indexed, subtract another 1 because len_chars() counts \n - let new_line_len = slice.line(new_line).len_chars().saturating_sub(2); + // Length of the line sans line-ending. + let new_line_len = { + let line = slice.line(new_line); + line.len_chars() - get_line_ending(&line).map(|le| le.len_chars()).unwrap_or(0) + }; let new_col = std::cmp::min(horiz as usize, new_line_len); @@ -104,64 +110,6 @@ fn word_move(slice: RopeSlice, mut range: Range, count: usize, target: WordMotio } // ---- util ------------ -#[inline] -pub(crate) fn is_word(ch: char) -> bool { - ch.is_alphanumeric() || ch == '_' -} - -#[inline] -pub(crate) fn is_end_of_line(ch: char) -> bool { - ch == '\n' -} - -#[inline] -// Whitespace, but not end of line -pub(crate) fn is_strict_whitespace(ch: char) -> bool { - ch.is_whitespace() && !is_end_of_line(ch) -} - -#[inline] -pub(crate) fn is_punctuation(ch: char) -> bool { - use unicode_general_category::{get_general_category, GeneralCategory}; - - matches!( - get_general_category(ch), - GeneralCategory::OtherPunctuation - | GeneralCategory::OpenPunctuation - | GeneralCategory::ClosePunctuation - | GeneralCategory::InitialPunctuation - | GeneralCategory::FinalPunctuation - | GeneralCategory::ConnectorPunctuation - | GeneralCategory::DashPunctuation - | GeneralCategory::MathSymbol - | GeneralCategory::CurrencySymbol - | GeneralCategory::ModifierSymbol - ) -} - -#[derive(Debug, Eq, PartialEq)] -pub enum Category { - Whitespace, - Eol, - Word, - Punctuation, - Unknown, -} - -#[inline] -pub(crate) fn categorize(ch: char) -> Category { - if is_end_of_line(ch) { - Category::Eol - } else if ch.is_whitespace() { - Category::Whitespace - } else if is_word(ch) { - Category::Word - } else if is_punctuation(ch) { - Category::Punctuation - } else { - Category::Unknown - } -} #[inline] /// Returns first index that doesn't satisfy a given predicate when @@ -235,7 +183,8 @@ fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range let mut phase = WordMotionPhase::Start; let mut head = origin.head; let mut anchor: Option = None; - let is_boundary = |a: char, b: Option| categorize(a) != categorize(b.unwrap_or(a)); + let is_boundary = + |a: char, b: Option| categorize_char(a) != categorize_char(b.unwrap_or(a)); while let Some(peek) = characters.peek().copied() { phase = match phase { WordMotionPhase::Start => { @@ -244,7 +193,8 @@ fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range break; // We're at the end, so there's nothing to do. } // Anchor may remain here if the head wasn't at a boundary - if !is_boundary(peek, characters.peek().copied()) && !is_end_of_line(peek) { + if !is_boundary(peek, characters.peek().copied()) && !char_is_line_ending(peek) + { anchor = Some(head); } // First character is always skipped by the head @@ -252,7 +202,7 @@ fn range_to_target(&mut self, target: WordMotionTarget, origin: Range) -> Range WordMotionPhase::SkipNewlines } WordMotionPhase::SkipNewlines => { - if is_end_of_line(peek) { + if char_is_line_ending(peek) { characters.next(); if characters.peek().is_some() { advance(&mut head); @@ -286,12 +236,12 @@ fn reached_target(target: WordMotionTarget, peek: char, next_peek: Option<&char> match target { WordMotionTarget::NextWordStart => { - ((categorize(peek) != categorize(*next_peek)) - && (is_end_of_line(*next_peek) || !next_peek.is_whitespace())) + ((categorize_char(peek) != categorize_char(*next_peek)) + && (char_is_line_ending(*next_peek) || !next_peek.is_whitespace())) } WordMotionTarget::NextWordEnd | WordMotionTarget::PrevWordStart => { - ((categorize(peek) != categorize(*next_peek)) - && (!peek.is_whitespace() || is_end_of_line(*next_peek))) + ((categorize_char(peek) != categorize_char(*next_peek)) + && (!peek.is_whitespace() || char_is_line_ending(*next_peek))) } } } @@ -330,7 +280,7 @@ fn test_vertical_move() { slice, move_vertically(slice, range, Direction::Forward, 1, Movement::Move).head ), - (1, 2).into() + (1, 3).into() ); } @@ -343,12 +293,12 @@ fn horizontal_moves_through_single_line_in_single_line_text() { let mut range = Range::point(position); let moves_and_expected_coordinates = [ - ((Direction::Forward, 1usize), (0, 1)), - ((Direction::Forward, 2usize), (0, 3)), - ((Direction::Forward, 0usize), (0, 3)), - ((Direction::Forward, 999usize), (0, 31)), - ((Direction::Forward, 999usize), (0, 31)), - ((Direction::Backward, 999usize), (0, 0)), + ((Direction::Forward, 1usize), (0, 1)), // T|his is a simple alphabetic line + ((Direction::Forward, 2usize), (0, 3)), // Thi|s is a simple alphabetic line + ((Direction::Forward, 0usize), (0, 3)), // Thi|s is a simple alphabetic line + ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line| + ((Direction::Forward, 999usize), (0, 32)), // This is a simple alphabetic line| + ((Direction::Backward, 999usize), (0, 0)), // |This is a simple alphabetic line ]; for ((direction, amount), coordinates) in IntoIter::new(moves_and_expected_coordinates) { @@ -366,15 +316,15 @@ fn horizontal_moves_through_single_line_in_multiline_text() { let mut range = Range::point(position); let moves_and_expected_coordinates = IntoIter::new([ - ((Direction::Forward, 1usize), (0, 1)), // M_ltiline - ((Direction::Forward, 2usize), (0, 3)), // Mul_iline - ((Direction::Backward, 6usize), (0, 0)), // _ultiline - ((Direction::Backward, 999usize), (0, 0)), // _ultiline - ((Direction::Forward, 3usize), (0, 3)), // Mul_iline - ((Direction::Forward, 0usize), (0, 3)), // Mul_iline - ((Direction::Backward, 0usize), (0, 3)), // Mul_iline - ((Direction::Forward, 999usize), (0, 9)), // Multilin_ - ((Direction::Forward, 999usize), (0, 9)), // Multilin_ + ((Direction::Forward, 1usize), (0, 1)), // M|ultiline\n + ((Direction::Forward, 2usize), (0, 3)), // Mul|tiline\n + ((Direction::Backward, 6usize), (0, 0)), // |Multiline\n + ((Direction::Backward, 999usize), (0, 0)), // |Multiline\n + ((Direction::Forward, 3usize), (0, 3)), // Mul|tiline\n + ((Direction::Forward, 0usize), (0, 3)), // Mul|tiline\n + ((Direction::Backward, 0usize), (0, 3)), // Mul|tiline\n + ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n + ((Direction::Forward, 999usize), (0, 9)), // Multiline|\n ]); for ((direction, amount), coordinates) in moves_and_expected_coordinates { @@ -446,7 +396,7 @@ enum Axis { // First descent preserves column as the target line is wider ((Axis::V, Direction::Forward, 1usize), (1, 8)), // Second descent clamps column as the target line is shorter - ((Axis::V, Direction::Forward, 1usize), (2, 4)), + ((Axis::V, Direction::Forward, 1usize), (2, 5)), // Third descent restores the original column ((Axis::V, Direction::Forward, 1usize), (3, 8)), // Behaviour is preserved even through long jumps @@ -760,45 +710,4 @@ fn test_behaviour_when_moving_to_end_of_next_words() { } } } - - #[test] - fn test_categorize() { - const WORD_TEST_CASE: &'static str = - "_hello_world_あいうえおー12345678901234567890"; - const PUNCTUATION_TEST_CASE: &'static str = - "!\"#$%&\'()*+,-./:;<=>?@[\\]^`{|}~!”#$%&’()*+、。:;<=>?@「」^`{|}~"; - const WHITESPACE_TEST_CASE: &'static str = "      "; - - assert_eq!(Category::Eol, categorize('\n')); - - for ch in WHITESPACE_TEST_CASE.chars() { - assert_eq!( - Category::Whitespace, - categorize(ch), - "Testing '{}', but got `{:?}` instead of `Category::Whitespace`", - ch, - categorize(ch) - ); - } - - for ch in WORD_TEST_CASE.chars() { - assert_eq!( - Category::Word, - categorize(ch), - "Testing '{}', but got `{:?}` instead of `Category::Word`", - ch, - categorize(ch) - ); - } - - for ch in PUNCTUATION_TEST_CASE.chars() { - assert_eq!( - Category::Punctuation, - categorize(ch), - "Testing '{}', but got `{:?}` instead of `Category::Punctuation`", - ch, - categorize(ch) - ); - } - } } diff --git a/helix-core/src/position.rs b/helix-core/src/position.rs index 3d85ff2f4..392eee9cc 100644 --- a/helix-core/src/position.rs +++ b/helix-core/src/position.rs @@ -1,4 +1,5 @@ use crate::{ + chars::char_is_line_ending, graphemes::{nth_next_grapheme_boundary, RopeGraphemes}, Rope, RopeSlice, }; @@ -23,8 +24,9 @@ pub const fn is_zero(self) -> bool { pub fn traverse(self, text: &crate::Tendril) -> Self { let Self { mut row, mut col } = self; // TODO: there should be a better way here - for ch in text.chars() { - if ch == '\n' { + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { row += 1; col = 0; } else { diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index ae058eb18..92e52d73b 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,4 +1,4 @@ -use crate::{regex::Regex, Change, Rope, RopeSlice, Transaction}; +use crate::{chars::char_is_line_ending, regex::Regex, Change, Rope, RopeSlice, Transaction}; pub use helix_syntax::{get_language, get_language_name, Lang}; use std::{ @@ -579,9 +579,10 @@ fn traverse(point: Point, text: &Tendril) -> Point { mut column, } = point; - // TODO: there should be a better way here - for ch in text.bytes() { - if ch == b'\n' { + // TODO: there should be a better way here. + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { row += 1; column = 0; } else { diff --git a/helix-lsp/src/client.rs b/helix-lsp/src/client.rs index 101d2f9b1..7f136fe84 100644 --- a/helix-lsp/src/client.rs +++ b/helix-lsp/src/client.rs @@ -3,7 +3,7 @@ Call, Error, OffsetEncoding, Result, }; -use helix_core::{find_root, ChangeSet, Rope}; +use helix_core::{chars::char_is_line_ending, find_root, ChangeSet, Rope}; use jsonrpc_core as jsonrpc; use lsp_types as lsp; use serde_json::Value; @@ -337,8 +337,9 @@ fn traverse(pos: lsp::Position, text: RopeSlice) -> lsp::Position { mut character, } = pos; - for ch in text.chars() { - if ch == '\n' { + let mut chars = text.chars().peekable(); + while let Some(ch) = chars.next() { + if char_is_line_ending(ch) && !(ch == '\r' && chars.peek() == Some(&'\n')) { line += 1; character = 0; } else { diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index 8124c17af..b006504b1 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -1,6 +1,6 @@ use helix_core::{ comment, coords_at_pos, find_first_non_whitespace_char, find_root, get_line_ending, graphemes, - indent, line_end, match_brackets, + indent, line_end_char_index, match_brackets, movement::{self, Direction}, object, pos_at_coords, regex::{self, Regex}, @@ -342,7 +342,7 @@ fn move_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - let pos = line_end(&text.slice(..), line); + let pos = line_end_char_index(&text.slice(..), line); Range::new(pos, pos) }); @@ -490,6 +490,8 @@ fn find_char_impl(cx: &mut Context, search_fn: F, inclusive: bool, extend: bo let count = cx.count(); // need to wait for next key + // TODO: should this be done by grapheme rather than char? For example, + // we can't properly handle the line-ending case here in terms of char. cx.on_next_key(move |cx, event| { let ch = match event { KeyEvent { @@ -623,7 +625,7 @@ fn replace(cx: &mut Context) { KeyEvent { code: KeyCode::Enter, .. - } => Some('\n'), // TODO: replace this with DEFAULT_LINE_ENDING + } => Some('\n'), // TODO: use the document's default line ending. _ => None, }; @@ -763,7 +765,7 @@ fn extend_line_end(cx: &mut Context) { let text = doc.text(); let line = text.char_to_line(range.head); - let pos = line_end(&text.slice(..), line); + let pos = line_end_char_index(&text.slice(..), line); Range::new(range.anchor, pos) }); @@ -1642,7 +1644,7 @@ fn append_to_line(cx: &mut Context) { let selection = doc.selection(view.id).transform(|range| { let text = doc.text(); let line = text.char_to_line(range.head); - let pos = line_end(&text.slice(..), line); + let pos = line_end_char_index(&text.slice(..), line); Range::new(pos, pos) }); doc.set_selection(view.id, selection); diff --git a/helix-term/src/ui/markdown.rs b/helix-term/src/ui/markdown.rs index be113747d..3ce3a5b80 100644 --- a/helix-term/src/ui/markdown.rs +++ b/helix-term/src/ui/markdown.rs @@ -110,6 +110,8 @@ fn to_span(text: pulldown_cmark::CowStr) -> Span { // TODO: replace tabs with indentation let mut slice = &text[start..end]; + // TODO: do we need to handle all unicode line endings + // here, or is just '\n' okay? while let Some(end) = slice.find('\n') { // emit span up to newline let text = &slice[..end]; diff --git a/helix-tui/Cargo.toml b/helix-tui/Cargo.toml index 89fa755d3..30e2374dd 100644 --- a/helix-tui/Cargo.toml +++ b/helix-tui/Cargo.toml @@ -22,3 +22,4 @@ unicode-segmentation = "1.2" unicode-width = "0.1" crossterm = { version = "0.20", optional = true } serde = { version = "1", "optional" = true, features = ["derive"]} +helix-core = { version = "0.2", path = "../helix-core" } diff --git a/helix-tui/src/text.rs b/helix-tui/src/text.rs index c671e918e..b23bfd81d 100644 --- a/helix-tui/src/text.rs +++ b/helix-tui/src/text.rs @@ -47,6 +47,7 @@ //! ]); //! ``` use crate::style::Style; +use helix_core::line_ending::str_is_line_ending; use std::borrow::Cow; use unicode_segmentation::UnicodeSegmentation; use unicode_width::UnicodeWidthStr; @@ -177,7 +178,7 @@ pub fn styled_graphemes( symbol: g, style: base_style.patch(self.style), }) - .filter(|s| s.symbol != "\n") + .filter(|s| !str_is_line_ending(s.symbol)) } } diff --git a/helix-tui/src/widgets/reflow.rs b/helix-tui/src/widgets/reflow.rs index 94ff7330d..ae561a4f9 100644 --- a/helix-tui/src/widgets/reflow.rs +++ b/helix-tui/src/widgets/reflow.rs @@ -1,4 +1,5 @@ use crate::text::StyledGrapheme; +use helix_core::line_ending::str_is_line_ending; use unicode_segmentation::UnicodeSegmentation; use unicode_width::UnicodeWidthStr; @@ -62,13 +63,13 @@ fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)> { // Ignore characters wider that the total max width. if symbol.width() as u16 > self.max_line_width // Skip leading whitespace when trim is enabled. - || self.trim && symbol_whitespace && symbol != "\n" && current_line_width == 0 + || self.trim && symbol_whitespace && !str_is_line_ending(symbol) && current_line_width == 0 { continue; } // Break on newline and discard it. - if symbol == "\n" { + if str_is_line_ending(symbol) { if prev_whitespace { current_line_width = width_to_last_word_end; self.current_line.truncate(symbols_to_last_word_end); @@ -170,7 +171,7 @@ fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)> { } // Break on newline and discard it. - if symbol == "\n" { + if str_is_line_ending(symbol) { break; } @@ -199,7 +200,7 @@ fn next_line(&mut self) -> Option<(&[StyledGrapheme<'a>], u16)> { if skip_rest { for StyledGrapheme { symbol, .. } in &mut self.symbols { - if symbol == "\n" { + if str_is_line_ending(symbol) { break; } } diff --git a/helix-view/src/document.rs b/helix-view/src/document.rs index 80be1ed25..3e38c24d6 100644 --- a/helix-view/src/document.rs +++ b/helix-view/src/document.rs @@ -10,7 +10,7 @@ use helix_core::{ auto_detect_line_ending, - chars::{char_is_linebreak, char_is_whitespace}, + chars::{char_is_line_ending, char_is_whitespace}, history::History, syntax::{LanguageConfiguration, LOADER}, ChangeSet, Diagnostic, LineEnding, Rope, Selection, State, Syntax, Transaction, @@ -81,6 +81,9 @@ pub struct Document { /// Current indent style. pub indent_style: IndentStyle, + /// The document's default line ending. + pub line_ending: LineEnding, + syntax: Option, // /// Corresponding language scope name. Usually `source.`. pub(crate) language: Option>, @@ -99,7 +102,6 @@ pub struct Document { diagnostics: Vec, language_server: Option>, - line_ending: LineEnding, } use std::fmt; @@ -254,21 +256,21 @@ pub fn new(text: Rope) -> Self { pub fn load(path: PathBuf) -> Result { use std::{fs::File, io::BufReader}; - let doc = if !path.exists() { + let mut doc = if !path.exists() { Rope::from(DEFAULT_LINE_ENDING.as_str()) } else { let file = File::open(&path).context(format!("unable to open {:?}", path))?; - let mut doc = Rope::from_reader(BufReader::new(file))?; - // add missing newline at the end of file - if doc.len_bytes() == 0 || doc.byte(doc.len_bytes() - 1) != b'\n' { - doc.insert_char(doc.len_chars(), '\n'); - } - doc + Rope::from_reader(BufReader::new(file))? }; // search for line endings let line_ending = auto_detect_line_ending(&doc).unwrap_or(DEFAULT_LINE_ENDING); + // add missing newline at the end of file + if doc.len_bytes() == 0 || char_is_line_ending(doc.char(doc.len_chars() - 1)) { + doc.insert(doc.len_chars(), line_ending.as_str()); + } + let mut doc = Self::new(doc); // set the path and try detecting the language doc.set_path(&path)?; @@ -379,7 +381,7 @@ fn detect_indent_style(&mut self) { Some(' ') => false, // Ignore blank lines. - Some(c) if char_is_linebreak(c) => continue, + Some(c) if char_is_line_ending(c) => continue, _ => { prev_line_is_tabs = false; @@ -403,7 +405,7 @@ fn detect_indent_style(&mut self) { c if char_is_whitespace(c) => count_is_done = true, // Ignore blank lines. - c if char_is_linebreak(c) => continue 'outer, + c if char_is_line_ending(c) => continue 'outer, _ => break, } diff --git a/helix-view/src/editor.rs b/helix-view/src/editor.rs index db8ae87ab..fb2eb36d6 100644 --- a/helix-view/src/editor.rs +++ b/helix-view/src/editor.rs @@ -12,7 +12,7 @@ pub use helix_core::diagnostic::Severity; pub use helix_core::register::Registers; -use helix_core::Position; +use helix_core::{Position, DEFAULT_LINE_ENDING}; #[derive(Debug)] pub struct Editor { @@ -150,7 +150,7 @@ pub fn switch(&mut self, id: DocumentId, action: Action) { pub fn new_file(&mut self, action: Action) -> DocumentId { use helix_core::Rope; - let doc = Document::new(Rope::from("\n")); + let doc = Document::new(Rope::from(DEFAULT_LINE_ENDING.as_str())); let id = self.documents.insert(doc); self.documents[id].id = id; self.switch(id, action);