From 080f1697e97e13461ec6df4d31c8924d01257a1b Mon Sep 17 00:00:00 2001 From: Roy Ben Shabat Date: Tue, 9 Apr 2019 01:47:48 +0300 Subject: MERGE --- .../Document/TextUtilities.cs | 332 +++++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Document/TextUtilities.cs (limited to 'Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Document/TextUtilities.cs') diff --git a/Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Document/TextUtilities.cs b/Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Document/TextUtilities.cs new file mode 100644 index 000000000..a0428c4e3 --- /dev/null +++ b/Software/Visual_Studio/Scripting/Tango.Scripting.Editors/Document/TextUtilities.cs @@ -0,0 +1,332 @@ +// Copyright (c) AlphaSierraPapa for the SharpDevelop Team (for details please see \doc\copyright.txt) +// This code is distributed under the GNU LGPL (for details please see \doc\license.txt) + +using System; +using System.Globalization; +using System.Windows.Documents; + +namespace Tango.Scripting.Editors.Document +{ + /// + /// Specifies the mode for getting the next caret position. + /// + public enum CaretPositioningMode + { + /// + /// Normal positioning (stop at every caret position) + /// + Normal, + /// + /// Stop only on word borders. + /// + WordBorder, + /// + /// Stop only at the beginning of words. This is used for Ctrl+Left/Ctrl+Right. + /// + WordStart, + /// + /// Stop only at the beginning of words, and anywhere in the middle of symbols. + /// + WordStartOrSymbol, + /// + /// Stop only on word borders, and anywhere in the middle of symbols. + /// + WordBorderOrSymbol + } + + /// + /// Static helper methods for working with text. + /// + public static partial class TextUtilities + { + #region GetControlCharacterName + // the names of the first 32 ASCII characters = Unicode C0 block + static readonly string[] c0Table = { + "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "HT", + "LF", "VT", "FF", "CR", "SO", "SI", "DLE", "DC1", "DC2", "DC3", + "DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS", + "RS", "US" + }; + + // DEL (ASCII 127) and + // the names of the control characters in the C1 block (Unicode 128 to 159) + static readonly string[] delAndC1Table = { + "DEL", + "PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", "HTS", "HTJ", + "VTS", "PLD", "PLU", "RI", "SS2", "SS3", "DCS", "PU1", "PU2", "STS", + "CCH", "MW", "SPA", "EPA", "SOS", "SGCI", "SCI", "CSI", "ST", "OSC", + "PM", "APC" + }; + + /// + /// Gets the name of the control character. + /// For unknown characters, the unicode codepoint is returned as 4-digit hexadecimal value. + /// + public static string GetControlCharacterName(char controlCharacter) + { + int num = (int)controlCharacter; + if (num < c0Table.Length) + return c0Table[num]; + else if (num >= 127 && num <= 159) + return delAndC1Table[num - 127]; + else + return num.ToString("x4", CultureInfo.InvariantCulture); + } + #endregion + + #region GetWhitespace + /// + /// Gets all whitespace (' ' and '\t', but no newlines) after offset. + /// + /// The text source. + /// The offset where the whitespace starts. + /// The segment containing the whitespace. + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", + Justification = "WPF uses 'Whitespace'")] + public static ISegment GetWhitespaceAfter(ITextSource textSource, int offset) + { + if (textSource == null) + throw new ArgumentNullException("textSource"); + int pos; + for (pos = offset; pos < textSource.TextLength; pos++) { + char c = textSource.GetCharAt(pos); + if (c != ' ' && c != '\t') + break; + } + return new SimpleSegment(offset, pos - offset); + } + + /// + /// Gets all whitespace (' ' and '\t', but no newlines) before offset. + /// + /// The text source. + /// The offset where the whitespace ends. + /// The segment containing the whitespace. + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", + Justification = "WPF uses 'Whitespace'")] + public static ISegment GetWhitespaceBefore(ITextSource textSource, int offset) + { + if (textSource == null) + throw new ArgumentNullException("textSource"); + int pos; + for (pos = offset - 1; pos >= 0; pos--) { + char c = textSource.GetCharAt(pos); + if (c != ' ' && c != '\t') + break; + } + pos++; // go back the one character that isn't whitespace + return new SimpleSegment(pos, offset - pos); + } + + /// + /// Gets the leading whitespace segment on the document line. + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", + Justification = "WPF uses 'Whitespace'")] + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters", + Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")] + public static ISegment GetLeadingWhitespace(TextDocument document, DocumentLine documentLine) + { + if (documentLine == null) + throw new ArgumentNullException("documentLine"); + return GetWhitespaceAfter(document, documentLine.Offset); + } + + /// + /// Gets the trailing whitespace segment on the document line. + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", + Justification = "WPF uses 'Whitespace'")] + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters", + Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")] + public static ISegment GetTrailingWhitespace(TextDocument document, DocumentLine documentLine) + { + if (documentLine == null) + throw new ArgumentNullException("documentLine"); + ISegment segment = GetWhitespaceBefore(document, documentLine.EndOffset); + // If the whole line consists of whitespace, we consider all of it as leading whitespace, + // so return an empty segment as trailing whitespace. + if (segment.Offset == documentLine.Offset) + return new SimpleSegment(documentLine.EndOffset, 0); + else + return segment; + } + #endregion + + #region GetSingleIndentationSegment + /// + /// Gets a single indentation segment starting at - at most one tab + /// or spaces. + /// + /// The text source. + /// The offset where the indentation segment starts. + /// The size of an indentation unit. See . + /// The indentation segment. + /// If there is no indentation character at the specified , + /// an empty segment is returned. + public static ISegment GetSingleIndentationSegment(ITextSource textSource, int offset, int indentationSize) + { + if (textSource == null) + throw new ArgumentNullException("textSource"); + int pos = offset; + while (pos < textSource.TextLength) { + char c = textSource.GetCharAt(pos); + if (c == '\t') { + if (pos == offset) + return new SimpleSegment(offset, 1); + else + break; + } else if (c == ' ') { + if (pos - offset >= indentationSize) + break; + } else { + break; + } + // continue only if c==' ' and (pos-offset) + /// Gets whether the character is whitespace, part of an identifier, or line terminator. + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "c")] + public static CharacterClass GetCharacterClass(char c) + { + if (c == '\r' || c == '\n') + return CharacterClass.LineTerminator; + else if (char.IsWhiteSpace(c)) + return CharacterClass.Whitespace; + else if (char.IsLetterOrDigit(c) || c == '_') + return CharacterClass.IdentifierPart; + else + return CharacterClass.Other; + } + #endregion + + #region GetNextCaretPosition + /// + /// Gets the next caret position. + /// + /// The text source. + /// The start offset inside the text source. + /// The search direction (forwards or backwards). + /// The mode for caret positioning. + /// The offset of the next caret position, or -1 if there is no further caret position + /// in the text source. + /// + /// This method is NOT equivalent to the actual caret movement when using VisualLine.GetNextCaretPosition. + /// In real caret movement, there are additional caret stops at line starts and ends. This method + /// treats linefeeds as simple whitespace. + /// + public static int GetNextCaretPosition(ITextSource textSource, int offset, LogicalDirection direction, CaretPositioningMode mode) + { + if (textSource == null) + throw new ArgumentNullException("textSource"); + if (mode != CaretPositioningMode.Normal + && mode != CaretPositioningMode.WordBorder + && mode != CaretPositioningMode.WordStart + && mode != CaretPositioningMode.WordBorderOrSymbol + && mode != CaretPositioningMode.WordStartOrSymbol) + { + throw new ArgumentException("Unsupported CaretPositioningMode: " + mode, "mode"); + } + if (direction != LogicalDirection.Backward + && direction != LogicalDirection.Forward) + { + throw new ArgumentException("Invalid LogicalDirection: " + direction, "direction"); + } + int textLength = textSource.TextLength; + if (textLength <= 0) { + // empty document? has a normal caret position at 0, though no word borders + if (mode == CaretPositioningMode.Normal) { + if (offset > 0 && direction == LogicalDirection.Backward) return 0; + if (offset < 0 && direction == LogicalDirection.Forward) return 0; + } + return -1; + } + while (true) { + int nextPos = (direction == LogicalDirection.Backward) ? offset - 1 : offset + 1; + + // return -1 if there is no further caret position in the text source + // we also need this to handle offset values outside the valid range + if (nextPos < 0 || nextPos > textLength) + return -1; + + // stop at every caret position? we can stop immediately. + if (mode == CaretPositioningMode.Normal) + return nextPos; + // not normal mode? we're looking for word borders... + + // check if we've run against the textSource borders. + // a 'textSource' usually isn't the whole document, but a single VisualLineElement. + if (nextPos == 0) { + // at the document start, there's only a word border + // if the first character is not whitespace + if (!char.IsWhiteSpace(textSource.GetCharAt(0))) + return nextPos; + } else if (nextPos == textLength) { + // at the document end, there's never a word start + if (mode != CaretPositioningMode.WordStart && mode != CaretPositioningMode.WordStartOrSymbol) { + // at the document end, there's only a word border + // if the last character is not whitespace + if (!char.IsWhiteSpace(textSource.GetCharAt(textLength - 1))) + return nextPos; + } + } else { + CharacterClass charBefore = GetCharacterClass(textSource.GetCharAt(nextPos - 1)); + CharacterClass charAfter = GetCharacterClass(textSource.GetCharAt(nextPos)); + if (charBefore == charAfter) { + if (charBefore == CharacterClass.Other && + (mode == CaretPositioningMode.WordBorderOrSymbol || mode == CaretPositioningMode.WordStartOrSymbol)) + { + // With the "OrSymbol" modes, there's a word border and start between any two unknown characters + return nextPos; + } + } else { + // this looks like a possible border + + // if we're looking for word starts, check that this is a word start (and not a word end) + // if we're just checking for word borders, accept unconditionally + if (!((mode == CaretPositioningMode.WordStart || mode == CaretPositioningMode.WordStartOrSymbol) + && (charAfter == CharacterClass.Whitespace || charAfter == CharacterClass.LineTerminator))) + { + return nextPos; + } + } + } + // we'll have to continue searching... + offset = nextPos; + } + } + #endregion + } + + /// + /// Classifies a character as whitespace, line terminator, part of an identifier, or other. + /// + public enum CharacterClass + { + /// + /// The character is not whitespace, line terminator or part of an identifier. + /// + Other, + /// + /// The character is whitespace (but not line terminator). + /// + [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", + Justification = "WPF uses 'Whitespace'")] + Whitespace, + /// + /// The character can be part of an identifier (Letter, digit or underscore). + /// + IdentifierPart, + /// + /// The character is line terminator (\r or \n). + /// + LineTerminator + } +} -- cgit v1.3.1