1 | // Copyright (c) 2014 AlphaSierraPapa for the SharpDevelop Team |
---|
2 | // |
---|
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this |
---|
4 | // software and associated documentation files (the "Software"), to deal in the Software |
---|
5 | // without restriction, including without limitation the rights to use, copy, modify, merge, |
---|
6 | // publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons |
---|
7 | // to whom the Software is furnished to do so, subject to the following conditions: |
---|
8 | // |
---|
9 | // The above copyright notice and this permission notice shall be included in all copies or |
---|
10 | // substantial portions of the Software. |
---|
11 | // |
---|
12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, |
---|
13 | // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
---|
14 | // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE |
---|
15 | // FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
---|
16 | // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
---|
17 | // DEALINGS IN THE SOFTWARE. |
---|
18 | |
---|
19 | using System; |
---|
20 | using System.Globalization; |
---|
21 | using System.Windows.Documents; |
---|
22 | #if NREFACTORY |
---|
23 | using ICSharpCode.NRefactory.Editor; |
---|
24 | #endif |
---|
25 | |
---|
26 | namespace ICSharpCode.AvalonEdit.Document |
---|
27 | { |
---|
28 | /// <summary> |
---|
29 | /// Specifies the mode for getting the next caret position. |
---|
30 | /// </summary> |
---|
31 | public enum CaretPositioningMode |
---|
32 | { |
---|
33 | /// <summary> |
---|
34 | /// Normal positioning (stop after every grapheme) |
---|
35 | /// </summary> |
---|
36 | Normal, |
---|
37 | /// <summary> |
---|
38 | /// Stop only on word borders. |
---|
39 | /// </summary> |
---|
40 | WordBorder, |
---|
41 | /// <summary> |
---|
42 | /// Stop only at the beginning of words. This is used for Ctrl+Left/Ctrl+Right. |
---|
43 | /// </summary> |
---|
44 | WordStart, |
---|
45 | /// <summary> |
---|
46 | /// Stop only at the beginning of words, and anywhere in the middle of symbols. |
---|
47 | /// </summary> |
---|
48 | WordStartOrSymbol, |
---|
49 | /// <summary> |
---|
50 | /// Stop only on word borders, and anywhere in the middle of symbols. |
---|
51 | /// </summary> |
---|
52 | WordBorderOrSymbol, |
---|
53 | /// <summary> |
---|
54 | /// Stop between every Unicode codepoint, even within the same grapheme. |
---|
55 | /// This is used to implement deleting the previous grapheme when Backspace is pressed. |
---|
56 | /// </summary> |
---|
57 | EveryCodepoint |
---|
58 | } |
---|
59 | |
---|
60 | /// <summary> |
---|
61 | /// Static helper methods for working with text. |
---|
62 | /// </summary> |
---|
63 | public static partial class TextUtilities |
---|
64 | { |
---|
65 | #region GetControlCharacterName |
---|
66 | // the names of the first 32 ASCII characters = Unicode C0 block |
---|
67 | static readonly string[] c0Table = { |
---|
68 | "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "HT", |
---|
69 | "LF", "VT", "FF", "CR", "SO", "SI", "DLE", "DC1", "DC2", "DC3", |
---|
70 | "DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS", |
---|
71 | "RS", "US" |
---|
72 | }; |
---|
73 | |
---|
74 | // DEL (ASCII 127) and |
---|
75 | // the names of the control characters in the C1 block (Unicode 128 to 159) |
---|
76 | static readonly string[] delAndC1Table = { |
---|
77 | "DEL", |
---|
78 | "PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", "HTS", "HTJ", |
---|
79 | "VTS", "PLD", "PLU", "RI", "SS2", "SS3", "DCS", "PU1", "PU2", "STS", |
---|
80 | "CCH", "MW", "SPA", "EPA", "SOS", "SGCI", "SCI", "CSI", "ST", "OSC", |
---|
81 | "PM", "APC" |
---|
82 | }; |
---|
83 | |
---|
84 | /// <summary> |
---|
85 | /// Gets the name of the control character. |
---|
86 | /// For unknown characters, the unicode codepoint is returned as 4-digit hexadecimal value. |
---|
87 | /// </summary> |
---|
88 | public static string GetControlCharacterName(char controlCharacter) |
---|
89 | { |
---|
90 | int num = (int)controlCharacter; |
---|
91 | if (num < c0Table.Length) |
---|
92 | return c0Table[num]; |
---|
93 | else if (num >= 127 && num <= 159) |
---|
94 | return delAndC1Table[num - 127]; |
---|
95 | else |
---|
96 | return num.ToString("x4", CultureInfo.InvariantCulture); |
---|
97 | } |
---|
98 | #endregion |
---|
99 | |
---|
100 | #region GetWhitespace |
---|
101 | /// <summary> |
---|
102 | /// Gets all whitespace (' ' and '\t', but no newlines) after offset. |
---|
103 | /// </summary> |
---|
104 | /// <param name="textSource">The text source.</param> |
---|
105 | /// <param name="offset">The offset where the whitespace starts.</param> |
---|
106 | /// <returns>The segment containing the whitespace.</returns> |
---|
107 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", |
---|
108 | Justification = "WPF uses 'Whitespace'")] |
---|
109 | public static ISegment GetWhitespaceAfter(ITextSource textSource, int offset) |
---|
110 | { |
---|
111 | if (textSource == null) |
---|
112 | throw new ArgumentNullException("textSource"); |
---|
113 | int pos; |
---|
114 | for (pos = offset; pos < textSource.TextLength; pos++) { |
---|
115 | char c = textSource.GetCharAt(pos); |
---|
116 | if (c != ' ' && c != '\t') |
---|
117 | break; |
---|
118 | } |
---|
119 | return new SimpleSegment(offset, pos - offset); |
---|
120 | } |
---|
121 | |
---|
122 | /// <summary> |
---|
123 | /// Gets all whitespace (' ' and '\t', but no newlines) before offset. |
---|
124 | /// </summary> |
---|
125 | /// <param name="textSource">The text source.</param> |
---|
126 | /// <param name="offset">The offset where the whitespace ends.</param> |
---|
127 | /// <returns>The segment containing the whitespace.</returns> |
---|
128 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", |
---|
129 | Justification = "WPF uses 'Whitespace'")] |
---|
130 | public static ISegment GetWhitespaceBefore(ITextSource textSource, int offset) |
---|
131 | { |
---|
132 | if (textSource == null) |
---|
133 | throw new ArgumentNullException("textSource"); |
---|
134 | int pos; |
---|
135 | for (pos = offset - 1; pos >= 0; pos--) { |
---|
136 | char c = textSource.GetCharAt(pos); |
---|
137 | if (c != ' ' && c != '\t') |
---|
138 | break; |
---|
139 | } |
---|
140 | pos++; // go back the one character that isn't whitespace |
---|
141 | return new SimpleSegment(pos, offset - pos); |
---|
142 | } |
---|
143 | |
---|
144 | /// <summary> |
---|
145 | /// Gets the leading whitespace segment on the document line. |
---|
146 | /// </summary> |
---|
147 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", |
---|
148 | Justification = "WPF uses 'Whitespace'")] |
---|
149 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters", |
---|
150 | Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")] |
---|
151 | public static ISegment GetLeadingWhitespace(TextDocument document, DocumentLine documentLine) |
---|
152 | { |
---|
153 | if (documentLine == null) |
---|
154 | throw new ArgumentNullException("documentLine"); |
---|
155 | return GetWhitespaceAfter(document, documentLine.Offset); |
---|
156 | } |
---|
157 | |
---|
158 | /// <summary> |
---|
159 | /// Gets the trailing whitespace segment on the document line. |
---|
160 | /// </summary> |
---|
161 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", |
---|
162 | Justification = "WPF uses 'Whitespace'")] |
---|
163 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters", |
---|
164 | Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")] |
---|
165 | public static ISegment GetTrailingWhitespace(TextDocument document, DocumentLine documentLine) |
---|
166 | { |
---|
167 | if (documentLine == null) |
---|
168 | throw new ArgumentNullException("documentLine"); |
---|
169 | ISegment segment = GetWhitespaceBefore(document, documentLine.EndOffset); |
---|
170 | // If the whole line consists of whitespace, we consider all of it as leading whitespace, |
---|
171 | // so return an empty segment as trailing whitespace. |
---|
172 | if (segment.Offset == documentLine.Offset) |
---|
173 | return new SimpleSegment(documentLine.EndOffset, 0); |
---|
174 | else |
---|
175 | return segment; |
---|
176 | } |
---|
177 | #endregion |
---|
178 | |
---|
179 | #region GetSingleIndentationSegment |
---|
180 | /// <summary> |
---|
181 | /// Gets a single indentation segment starting at <paramref name="offset"/> - at most one tab |
---|
182 | /// or <paramref name="indentationSize"/> spaces. |
---|
183 | /// </summary> |
---|
184 | /// <param name="textSource">The text source.</param> |
---|
185 | /// <param name="offset">The offset where the indentation segment starts.</param> |
---|
186 | /// <param name="indentationSize">The size of an indentation unit. See <see cref="TextEditorOptions.IndentationSize"/>.</param> |
---|
187 | /// <returns>The indentation segment. |
---|
188 | /// If there is no indentation character at the specified <paramref name="offset"/>, |
---|
189 | /// an empty segment is returned.</returns> |
---|
190 | public static ISegment GetSingleIndentationSegment(ITextSource textSource, int offset, int indentationSize) |
---|
191 | { |
---|
192 | if (textSource == null) |
---|
193 | throw new ArgumentNullException("textSource"); |
---|
194 | int pos = offset; |
---|
195 | while (pos < textSource.TextLength) { |
---|
196 | char c = textSource.GetCharAt(pos); |
---|
197 | if (c == '\t') { |
---|
198 | if (pos == offset) |
---|
199 | return new SimpleSegment(offset, 1); |
---|
200 | else |
---|
201 | break; |
---|
202 | } else if (c == ' ') { |
---|
203 | if (pos - offset >= indentationSize) |
---|
204 | break; |
---|
205 | } else { |
---|
206 | break; |
---|
207 | } |
---|
208 | // continue only if c==' ' and (pos-offset)<tabSize |
---|
209 | pos++; |
---|
210 | } |
---|
211 | return new SimpleSegment(offset, pos - offset); |
---|
212 | } |
---|
213 | #endregion |
---|
214 | |
---|
215 | #region GetCharacterClass |
---|
216 | /// <summary> |
---|
217 | /// Gets whether the character is whitespace, part of an identifier, or line terminator. |
---|
218 | /// </summary> |
---|
219 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "c")] |
---|
220 | public static CharacterClass GetCharacterClass(char c) |
---|
221 | { |
---|
222 | if (c == '\r' || c == '\n') |
---|
223 | return CharacterClass.LineTerminator; |
---|
224 | if (c == '_') |
---|
225 | return CharacterClass.IdentifierPart; |
---|
226 | return GetCharacterClass(char.GetUnicodeCategory(c)); |
---|
227 | } |
---|
228 | |
---|
229 | static CharacterClass GetCharacterClass(char highSurrogate, char lowSurrogate) |
---|
230 | { |
---|
231 | if (char.IsSurrogatePair(highSurrogate, lowSurrogate)) { |
---|
232 | return GetCharacterClass(char.GetUnicodeCategory(highSurrogate.ToString() + lowSurrogate.ToString(), 0)); |
---|
233 | } else { |
---|
234 | // malformed surrogate pair |
---|
235 | return CharacterClass.Other; |
---|
236 | } |
---|
237 | } |
---|
238 | |
---|
239 | static CharacterClass GetCharacterClass(UnicodeCategory c) |
---|
240 | { |
---|
241 | switch (c) { |
---|
242 | case UnicodeCategory.SpaceSeparator: |
---|
243 | case UnicodeCategory.LineSeparator: |
---|
244 | case UnicodeCategory.ParagraphSeparator: |
---|
245 | case UnicodeCategory.Control: |
---|
246 | return CharacterClass.Whitespace; |
---|
247 | case UnicodeCategory.UppercaseLetter: |
---|
248 | case UnicodeCategory.LowercaseLetter: |
---|
249 | case UnicodeCategory.TitlecaseLetter: |
---|
250 | case UnicodeCategory.ModifierLetter: |
---|
251 | case UnicodeCategory.OtherLetter: |
---|
252 | case UnicodeCategory.DecimalDigitNumber: |
---|
253 | return CharacterClass.IdentifierPart; |
---|
254 | case UnicodeCategory.NonSpacingMark: |
---|
255 | case UnicodeCategory.SpacingCombiningMark: |
---|
256 | case UnicodeCategory.EnclosingMark: |
---|
257 | return CharacterClass.CombiningMark; |
---|
258 | default: |
---|
259 | return CharacterClass.Other; |
---|
260 | } |
---|
261 | } |
---|
262 | #endregion |
---|
263 | |
---|
264 | #region GetNextCaretPosition |
---|
265 | /// <summary> |
---|
266 | /// Gets the next caret position. |
---|
267 | /// </summary> |
---|
268 | /// <param name="textSource">The text source.</param> |
---|
269 | /// <param name="offset">The start offset inside the text source.</param> |
---|
270 | /// <param name="direction">The search direction (forwards or backwards).</param> |
---|
271 | /// <param name="mode">The mode for caret positioning.</param> |
---|
272 | /// <returns>The offset of the next caret position, or -1 if there is no further caret position |
---|
273 | /// in the text source.</returns> |
---|
274 | /// <remarks> |
---|
275 | /// This method is NOT equivalent to the actual caret movement when using VisualLine.GetNextCaretPosition. |
---|
276 | /// In real caret movement, there are additional caret stops at line starts and ends. This method |
---|
277 | /// treats linefeeds as simple whitespace. |
---|
278 | /// </remarks> |
---|
279 | public static int GetNextCaretPosition(ITextSource textSource, int offset, LogicalDirection direction, CaretPositioningMode mode) |
---|
280 | { |
---|
281 | if (textSource == null) |
---|
282 | throw new ArgumentNullException("textSource"); |
---|
283 | switch (mode) { |
---|
284 | case CaretPositioningMode.Normal: |
---|
285 | case CaretPositioningMode.EveryCodepoint: |
---|
286 | case CaretPositioningMode.WordBorder: |
---|
287 | case CaretPositioningMode.WordBorderOrSymbol: |
---|
288 | case CaretPositioningMode.WordStart: |
---|
289 | case CaretPositioningMode.WordStartOrSymbol: |
---|
290 | break; // OK |
---|
291 | default: |
---|
292 | throw new ArgumentException("Unsupported CaretPositioningMode: " + mode, "mode"); |
---|
293 | } |
---|
294 | if (direction != LogicalDirection.Backward |
---|
295 | && direction != LogicalDirection.Forward) |
---|
296 | { |
---|
297 | throw new ArgumentException("Invalid LogicalDirection: " + direction, "direction"); |
---|
298 | } |
---|
299 | int textLength = textSource.TextLength; |
---|
300 | if (textLength <= 0) { |
---|
301 | // empty document? has a normal caret position at 0, though no word borders |
---|
302 | if (IsNormal(mode)) { |
---|
303 | if (offset > 0 && direction == LogicalDirection.Backward) return 0; |
---|
304 | if (offset < 0 && direction == LogicalDirection.Forward) return 0; |
---|
305 | } |
---|
306 | return -1; |
---|
307 | } |
---|
308 | while (true) { |
---|
309 | int nextPos = (direction == LogicalDirection.Backward) ? offset - 1 : offset + 1; |
---|
310 | |
---|
311 | // return -1 if there is no further caret position in the text source |
---|
312 | // we also need this to handle offset values outside the valid range |
---|
313 | if (nextPos < 0 || nextPos > textLength) |
---|
314 | return -1; |
---|
315 | |
---|
316 | // check if we've run against the textSource borders. |
---|
317 | // a 'textSource' usually isn't the whole document, but a single VisualLineElement. |
---|
318 | if (nextPos == 0) { |
---|
319 | // at the document start, there's only a word border |
---|
320 | // if the first character is not whitespace |
---|
321 | if (IsNormal(mode) || !char.IsWhiteSpace(textSource.GetCharAt(0))) |
---|
322 | return nextPos; |
---|
323 | } else if (nextPos == textLength) { |
---|
324 | // at the document end, there's never a word start |
---|
325 | if (mode != CaretPositioningMode.WordStart && mode != CaretPositioningMode.WordStartOrSymbol) { |
---|
326 | // at the document end, there's only a word border |
---|
327 | // if the last character is not whitespace |
---|
328 | if (IsNormal(mode) || !char.IsWhiteSpace(textSource.GetCharAt(textLength - 1))) |
---|
329 | return nextPos; |
---|
330 | } |
---|
331 | } else { |
---|
332 | char charBefore = textSource.GetCharAt(nextPos - 1); |
---|
333 | char charAfter = textSource.GetCharAt(nextPos); |
---|
334 | // Don't stop in the middle of a surrogate pair |
---|
335 | if (!char.IsSurrogatePair(charBefore, charAfter)) { |
---|
336 | CharacterClass classBefore = GetCharacterClass(charBefore); |
---|
337 | CharacterClass classAfter = GetCharacterClass(charAfter); |
---|
338 | // get correct class for characters outside BMP: |
---|
339 | if (char.IsLowSurrogate(charBefore) && nextPos >= 2) { |
---|
340 | classBefore = GetCharacterClass(textSource.GetCharAt(nextPos - 2), charBefore); |
---|
341 | } |
---|
342 | if (char.IsHighSurrogate(charAfter) && nextPos + 1 < textLength) { |
---|
343 | classAfter = GetCharacterClass(charAfter, textSource.GetCharAt(nextPos + 1)); |
---|
344 | } |
---|
345 | if (StopBetweenCharacters(mode, classBefore, classAfter)) { |
---|
346 | return nextPos; |
---|
347 | } |
---|
348 | } |
---|
349 | } |
---|
350 | // we'll have to continue searching... |
---|
351 | offset = nextPos; |
---|
352 | } |
---|
353 | } |
---|
354 | |
---|
355 | static bool IsNormal(CaretPositioningMode mode) |
---|
356 | { |
---|
357 | return mode == CaretPositioningMode.Normal || mode == CaretPositioningMode.EveryCodepoint; |
---|
358 | } |
---|
359 | |
---|
360 | static bool StopBetweenCharacters(CaretPositioningMode mode, CharacterClass charBefore, CharacterClass charAfter) |
---|
361 | { |
---|
362 | if (mode == CaretPositioningMode.EveryCodepoint) |
---|
363 | return true; |
---|
364 | // Don't stop in the middle of a grapheme |
---|
365 | if (charAfter == CharacterClass.CombiningMark) |
---|
366 | return false; |
---|
367 | // Stop after every grapheme in normal mode |
---|
368 | if (mode == CaretPositioningMode.Normal) |
---|
369 | return true; |
---|
370 | if (charBefore == charAfter) { |
---|
371 | if (charBefore == CharacterClass.Other && |
---|
372 | (mode == CaretPositioningMode.WordBorderOrSymbol || mode == CaretPositioningMode.WordStartOrSymbol)) |
---|
373 | { |
---|
374 | // With the "OrSymbol" modes, there's a word border and start between any two unknown characters |
---|
375 | return true; |
---|
376 | } |
---|
377 | } else { |
---|
378 | // this looks like a possible border |
---|
379 | |
---|
380 | // if we're looking for word starts, check that this is a word start (and not a word end) |
---|
381 | // if we're just checking for word borders, accept unconditionally |
---|
382 | if (!((mode == CaretPositioningMode.WordStart || mode == CaretPositioningMode.WordStartOrSymbol) |
---|
383 | && (charAfter == CharacterClass.Whitespace || charAfter == CharacterClass.LineTerminator))) |
---|
384 | { |
---|
385 | return true; |
---|
386 | } |
---|
387 | } |
---|
388 | return false; |
---|
389 | } |
---|
390 | #endregion |
---|
391 | } |
---|
392 | |
---|
393 | /// <summary> |
---|
394 | /// Classifies a character as whitespace, line terminator, part of an identifier, or other. |
---|
395 | /// </summary> |
---|
396 | public enum CharacterClass |
---|
397 | { |
---|
398 | /// <summary> |
---|
399 | /// The character is not whitespace, line terminator or part of an identifier. |
---|
400 | /// </summary> |
---|
401 | Other, |
---|
402 | /// <summary> |
---|
403 | /// The character is whitespace (but not line terminator). |
---|
404 | /// </summary> |
---|
405 | [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace", |
---|
406 | Justification = "WPF uses 'Whitespace'")] |
---|
407 | Whitespace, |
---|
408 | /// <summary> |
---|
409 | /// The character can be part of an identifier (Letter, digit or underscore). |
---|
410 | /// </summary> |
---|
411 | IdentifierPart, |
---|
412 | /// <summary> |
---|
413 | /// The character is line terminator (\r or \n). |
---|
414 | /// </summary> |
---|
415 | LineTerminator, |
---|
416 | /// <summary> |
---|
417 | /// The character is a unicode combining mark that modifies the previous character. |
---|
418 | /// Corresponds to the Unicode designations "Mn", "Mc" and "Me". |
---|
419 | /// </summary> |
---|
420 | CombiningMark |
---|
421 | } |
---|
422 | } |
---|