helix-mirror/helix-core/src/chars.rs

/// Determine whether a character is a line break.
pub fn char_is_linebreak(c: char) -> bool {
    matches!(
        c,
        '\u{000A}' | // LineFeed
        '\u{000B}' | // VerticalTab
        '\u{000C}' | // FormFeed
        '\u{000D}' | // CarriageReturn
        '\u{0085}' | // NextLine
        '\u{2028}' | // Line Separator
        '\u{2029}' // ParagraphSeparator
    )
}

/// Determine whether a character qualifies as (non-line-break)
/// whitespace.
pub fn char_is_whitespace(c: char) -> bool {
    // TODO: this is a naive binary categorization of whitespace
    // characters.  For display, word wrapping, etc. we'll need a better
    // categorization based on e.g. breaking vs non-breaking spaces
    // and whether they're zero-width or not.
    match c {
        //'\u{1680}' | // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)
        '\u{0009}' | // Character Tabulation
        '\u{0020}' | // Space
        '\u{00A0}' | // No-break Space
        '\u{180E}' | // Mongolian Vowel Separator
        '\u{202F}' | // Narrow No-break Space
        '\u{205F}' | // Medium Mathematical Space
        '\u{3000}' | // Ideographic Space
        '\u{FEFF}'   // Zero Width No-break Space
        => true,

        // En Quad, Em Quad, En Space, Em Space, Three-per-em Space,
        // Four-per-em Space, Six-per-em Space, Figure Space,
        // Punctuation Space, Thin Space, Hair Space, Zero Width Space.
        c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,

        _ => false,
    }
}
Address PR comments. - Move char functions into their own module under helix_core. - Use matches!() macro where appropriate. - Use a static lifetime on indent_unit() now that we can. 2021-06-14 06:13:31 +04:00			`/// Determine whether a character is a line break.`
			`pub fn char_is_linebreak(c: char) -> bool {`
			`matches!(`
			`c,`
			`'\u{000A}' \| // LineFeed`
			`'\u{000B}' \| // VerticalTab`
			`'\u{000C}' \| // FormFeed`
			`'\u{000D}' \| // CarriageReturn`
			`'\u{0085}' \| // NextLine`
			`'\u{2028}' \| // Line Separator`
			`'\u{2029}' // ParagraphSeparator`
			`)`
			`}`

			`/// Determine whether a character qualifies as (non-line-break)`
			`/// whitespace.`
			`pub fn char_is_whitespace(c: char) -> bool {`
			`// TODO: this is a naive binary categorization of whitespace`
			`// characters. For display, word wrapping, etc. we'll need a better`
			`// categorization based on e.g. breaking vs non-breaking spaces`
			`// and whether they're zero-width or not.`
			`match c {`
			`//'\u{1680}' \| // Ogham Space Mark (here for completeness, but usually displayed as a dash, not as whitespace)`
			`'\u{0009}' \| // Character Tabulation`
			`'\u{0020}' \| // Space`
			`'\u{00A0}' \| // No-break Space`
			`'\u{180E}' \| // Mongolian Vowel Separator`
			`'\u{202F}' \| // Narrow No-break Space`
			`'\u{205F}' \| // Medium Mathematical Space`
			`'\u{3000}' \| // Ideographic Space`
			`'\u{FEFF}' // Zero Width No-break Space`
			`=> true,`

			`// En Quad, Em Quad, En Space, Em Space, Three-per-em Space,`
			`// Four-per-em Space, Six-per-em Space, Figure Space,`
			`// Punctuation Space, Thin Space, Hair Space, Zero Width Space.`
			`c if ('\u{2000}' ..= '\u{200B}').contains(&c) => true,`

			`_ => false,`
			`}`
			`}`