aboutsummaryrefslogtreecommitdiffstats
path: root/Software/Visual_Studio/SideChains/ICSharpCode.AvalonEdit/Document/TextUtilities.cs
blob: 8f07596b8d18606dff9e7e0c781e87fbb0defdb3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
// Copyright (c) AlphaSierraPapa for the SharpDevelop Team (for details please see \doc\copyright.txt)
// This code is distributed under the GNU LGPL (for details please see \doc\license.txt)

using System;
using System.Globalization;
using System.Windows.Documents;

namespace ICSharpCode.AvalonEdit.Document
{
	/// <summary>
	/// Specifies the mode for getting the next caret position.
	/// </summary>
	public enum CaretPositioningMode
	{
		/// <summary>
		/// Normal positioning (stop at every caret position)
		/// </summary>
		Normal,
		/// <summary>
		/// Stop only on word borders.
		/// </summary>
		WordBorder,
		/// <summary>
		/// Stop only at the beginning of words. This is used for Ctrl+Left/Ctrl+Right.
		/// </summary>
		WordStart,
		/// <summary>
		/// Stop only at the beginning of words, and anywhere in the middle of symbols.
		/// </summary>
		WordStartOrSymbol,
		/// <summary>
		/// Stop only on word borders, and anywhere in the middle of symbols.
		/// </summary>
		WordBorderOrSymbol
	}
	
	/// <summary>
	/// Static helper methods for working with text.
	/// </summary>
	public static partial class TextUtilities
	{
		#region GetControlCharacterName
		// the names of the first 32 ASCII characters = Unicode C0 block
		static readonly string[] c0Table = {
			"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "HT",
			"LF", "VT", "FF", "CR", "SO", "SI", "DLE", "DC1", "DC2", "DC3",
			"DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS",
			"RS", "US"
		};
		
		// DEL (ASCII 127) and
		// the names of the control characters in the C1 block (Unicode 128 to 159)
		static readonly string[] delAndC1Table = {
			"DEL",
			"PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", "HTS", "HTJ",
			"VTS", "PLD", "PLU", "RI", "SS2", "SS3", "DCS", "PU1", "PU2", "STS",
			"CCH", "MW", "SPA", "EPA", "SOS", "SGCI", "SCI", "CSI", "ST", "OSC",
			"PM", "APC"
		};
		
		/// <summary>
		/// Gets the name of the control character.
		/// For unknown characters, the unicode codepoint is returned as 4-digit hexadecimal value.
		/// </summary>
		public static string GetControlCharacterName(char controlCharacter)
		{
			int num = (int)controlCharacter;
			if (num < c0Table.Length)
				return c0Table[num];
			else if (num >= 127 && num <= 159)
				return delAndC1Table[num - 127];
			else
				return num.ToString("x4", CultureInfo.InvariantCulture);
		}
		#endregion
		
		#region GetWhitespace
		/// <summary>
		/// Gets all whitespace (' ' and '\t', but no newlines) after offset.
		/// </summary>
		/// <param name="textSource">The text source.</param>
		/// <param name="offset">The offset where the whitespace starts.</param>
		/// <returns>The segment containing the whitespace.</returns>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
		                                                 Justification = "WPF uses 'Whitespace'")]
		public static ISegment GetWhitespaceAfter(ITextSource textSource, int offset)
		{
			if (textSource == null)
				throw new ArgumentNullException("textSource");
			int pos;
			for (pos = offset; pos < textSource.TextLength; pos++) {
				char c = textSource.GetCharAt(pos);
				if (c != ' ' && c != '\t')
					break;
			}
			return new SimpleSegment(offset, pos - offset);
		}
		
		/// <summary>
		/// Gets all whitespace (' ' and '\t', but no newlines) before offset.
		/// </summary>
		/// <param name="textSource">The text source.</param>
		/// <param name="offset">The offset where the whitespace ends.</param>
		/// <returns>The segment containing the whitespace.</returns>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
		                                                 Justification = "WPF uses 'Whitespace'")]
		public static ISegment GetWhitespaceBefore(ITextSource textSource, int offset)
		{
			if (textSource == null)
				throw new ArgumentNullException("textSource");
			int pos;
			for (pos = offset - 1; pos >= 0; pos--) {
				char c = textSource.GetCharAt(pos);
				if (c != ' ' && c != '\t')
					break;
			}
			pos++; // go back the one character that isn't whitespace
			return new SimpleSegment(pos, offset - pos);
		}
		
		/// <summary>
		/// Gets the leading whitespace segment on the document line.
		/// </summary>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
		                                                 Justification = "WPF uses 'Whitespace'")]
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters",
		                                                 Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")]
		public static ISegment GetLeadingWhitespace(TextDocument document, DocumentLine documentLine)
		{
			if (documentLine == null)
				throw new ArgumentNullException("documentLine");
			return GetWhitespaceAfter(document, documentLine.Offset);
		}
		
		/// <summary>
		/// Gets the trailing whitespace segment on the document line.
		/// </summary>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
		                                                 Justification = "WPF uses 'Whitespace'")]
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters",
		                                                 Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")]
		public static ISegment GetTrailingWhitespace(TextDocument document, DocumentLine documentLine)
		{
			if (documentLine == null)
				throw new ArgumentNullException("documentLine");
			ISegment segment = GetWhitespaceBefore(document, documentLine.EndOffset);
			// If the whole line consists of whitespace, we consider all of it as leading whitespace,
			// so return an empty segment as trailing whitespace.
			if (segment.Offset == documentLine.Offset)
				return new SimpleSegment(documentLine.EndOffset, 0);
			else
				return segment;
		}
		#endregion
		
		#region GetSingleIndentationSegment
		/// <summary>
		/// Gets a single indentation segment starting at <paramref name="offset"/> - at most one tab
		/// or <paramref name="indentationSize"/> spaces.
		/// </summary>
		/// <param name="textSource">The text source.</param>
		/// <param name="offset">The offset where the indentation segment starts.</param>
		/// <param name="indentationSize">The size of an indentation unit. See <see cref="TextEditorOptions.IndentationSize"/>.</param>
		/// <returns>The indentation segment.
		/// If there is no indentation character at the specified <paramref name="offset"/>,
		/// an empty segment is returned.</returns>
		public static ISegment GetSingleIndentationSegment(ITextSource textSource, int offset, int indentationSize)
		{
			if (textSource == null)
				throw new ArgumentNullException("textSource");
			int pos = offset;
			while (pos < textSource.TextLength) {
				char c = textSource.GetCharAt(pos);
				if (c == '\t') {
					if (pos == offset)
						return new SimpleSegment(offset, 1);
					else
						break;
				} else if (c == ' ') {
					if (pos - offset >= indentationSize)
						break;
				} else {
					break;
				}
				// continue only if c==' ' and (pos-offset)<tabSize
				pos++;
			}
			return new SimpleSegment(offset, pos - offset);
		}
		#endregion
		
		#region GetCharacterClass
		/// <summary>
		/// Gets whether the character is whitespace, part of an identifier, or line terminator.
		/// </summary>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "c")]
		public static CharacterClass GetCharacterClass(char c)
		{
			if (c == '\r' || c == '\n')
				return CharacterClass.LineTerminator;
			else if (char.IsWhiteSpace(c))
				return CharacterClass.Whitespace;
			else if (char.IsLetterOrDigit(c) || c == '_')
				return CharacterClass.IdentifierPart;
			else
				return CharacterClass.Other;
		}
		#endregion
		
		#region GetNextCaretPosition
		/// <summary>
		/// Gets the next caret position.
		/// </summary>
		/// <param name="textSource">The text source.</param>
		/// <param name="offset">The start offset inside the text source.</param>
		/// <param name="direction">The search direction (forwards or backwards).</param>
		/// <param name="mode">The mode for caret positioning.</param>
		/// <returns>The offset of the next caret position, or -1 if there is no further caret position
		/// in the text source.</returns>
		/// <remarks>
		/// This method is NOT equivalent to the actual caret movement when using VisualLine.GetNextCaretPosition.
		/// In real caret movement, there are additional caret stops at line starts and ends. This method
		/// treats linefeeds as simple whitespace.
		/// </remarks>
		public static int GetNextCaretPosition(ITextSource textSource, int offset, LogicalDirection direction, CaretPositioningMode mode)
		{
			if (textSource == null)
				throw new ArgumentNullException("textSource");
			if (mode != CaretPositioningMode.Normal
			    && mode != CaretPositioningMode.WordBorder
			    && mode != CaretPositioningMode.WordStart
			    && mode != CaretPositioningMode.WordBorderOrSymbol
			    && mode != CaretPositioningMode.WordStartOrSymbol)
			{
				throw new ArgumentException("Unsupported CaretPositioningMode: " + mode, "mode");
			}
			if (direction != LogicalDirection.Backward
			    && direction != LogicalDirection.Forward)
			{
				throw new ArgumentException("Invalid LogicalDirection: " + direction, "direction");
			}
			int textLength = textSource.TextLength;
			if (textLength <= 0) {
				// empty document? has a normal caret position at 0, though no word borders
				if (mode == CaretPositioningMode.Normal) {
					if (offset > 0 && direction == LogicalDirection.Backward) return 0;
					if (offset < 0 && direction == LogicalDirection.Forward) return 0;
				}
				return -1;
			}
			while (true) {
				int nextPos = (direction == LogicalDirection.Backward) ? offset - 1 : offset + 1;
				
				// return -1 if there is no further caret position in the text source
				// we also need this to handle offset values outside the valid range
				if (nextPos < 0 || nextPos > textLength)
					return -1;
				
				// stop at every caret position? we can stop immediately.
				if (mode == CaretPositioningMode.Normal)
					return nextPos;
				// not normal mode? we're looking for word borders...
				
				// check if we've run against the textSource borders.
				// a 'textSource' usually isn't the whole document, but a single VisualLineElement.
				if (nextPos == 0) {
					// at the document start, there's only a word border
					// if the first character is not whitespace
					if (!char.IsWhiteSpace(textSource.GetCharAt(0)))
						return nextPos;
				} else if (nextPos == textLength) {
					// at the document end, there's never a word start
					if (mode != CaretPositioningMode.WordStart && mode != CaretPositioningMode.WordStartOrSymbol) {
						// at the document end, there's only a word border
						// if the last character is not whitespace
						if (!char.IsWhiteSpace(textSource.GetCharAt(textLength - 1)))
							return nextPos;
					}
				} else {
					CharacterClass charBefore = GetCharacterClass(textSource.GetCharAt(nextPos - 1));
					CharacterClass charAfter = GetCharacterClass(textSource.GetCharAt(nextPos));
					if (charBefore == charAfter) {
						if (charBefore == CharacterClass.Other &&
						    (mode == CaretPositioningMode.WordBorderOrSymbol || mode == CaretPositioningMode.WordStartOrSymbol))
						{
							// With the "OrSymbol" modes, there's a word border and start between any two unknown characters
							return nextPos;
						}
					} else {
						// this looks like a possible border
						
						// if we're looking for word starts, check that this is a word start (and not a word end)
						// if we're just checking for word borders, accept unconditionally
						if (!((mode == CaretPositioningMode.WordStart || mode == CaretPositioningMode.WordStartOrSymbol)
						      && (charAfter == CharacterClass.Whitespace || charAfter == CharacterClass.LineTerminator)))
						{
							return nextPos;
						}
					}
				}
				// we'll have to continue searching...
				offset = nextPos;
			}
		}
		#endregion
	}
	
	/// <summary>
	/// Classifies a character as whitespace, line terminator, part of an identifier, or other.
	/// </summary>
	public enum CharacterClass
	{
		/// <summary>
		/// The character is not whitespace, line terminator or part of an identifier.
		/// </summary>
		Other,
		/// <summary>
		/// The character is whitespace (but not line terminator).
		/// </summary>
		[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
		                                                 Justification = "WPF uses 'Whitespace'")]
		Whitespace,
		/// <summary>
		/// The character can be part of an identifier (Letter, digit or underscore).
		/// </summary>
		IdentifierPart,
		/// <summary>
		/// The character is line terminator (\r or \n).
		/// </summary>
		LineTerminator
	}
}