Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2695_dataset-ids/HeuristicLab.ExtLibs/HeuristicLab.AvalonEdit/5.0.1/AvalonEdit-5.0.1/Document/TextUtilities.cs

Last change on this file was 11700, checked in by jkarder, 10 years ago

#2077: created branch and added first version

File size: 16.6 KB
Line 
1// Copyright (c) 2014 AlphaSierraPapa for the SharpDevelop Team
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy of this
4// software and associated documentation files (the "Software"), to deal in the Software
5// without restriction, including without limitation the rights to use, copy, modify, merge,
6// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
7// to whom the Software is furnished to do so, subject to the following conditions:
8//
9// The above copyright notice and this permission notice shall be included in all copies or
10// substantial portions of the Software.
11//
12// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
15// FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
16// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
17// DEALINGS IN THE SOFTWARE.
18
19using System;
20using System.Globalization;
21using System.Windows.Documents;
22#if NREFACTORY
23using ICSharpCode.NRefactory.Editor;
24#endif
25
26namespace ICSharpCode.AvalonEdit.Document
27{
28  /// <summary>
29  /// Specifies the mode for getting the next caret position.
30  /// </summary>
31  public enum CaretPositioningMode
32  {
33    /// <summary>
34    /// Normal positioning (stop after every grapheme)
35    /// </summary>
36    Normal,
37    /// <summary>
38    /// Stop only on word borders.
39    /// </summary>
40    WordBorder,
41    /// <summary>
42    /// Stop only at the beginning of words. This is used for Ctrl+Left/Ctrl+Right.
43    /// </summary>
44    WordStart,
45    /// <summary>
46    /// Stop only at the beginning of words, and anywhere in the middle of symbols.
47    /// </summary>
48    WordStartOrSymbol,
49    /// <summary>
50    /// Stop only on word borders, and anywhere in the middle of symbols.
51    /// </summary>
52    WordBorderOrSymbol,
53    /// <summary>
54    /// Stop between every Unicode codepoint, even within the same grapheme.
55    /// This is used to implement deleting the previous grapheme when Backspace is pressed.
56    /// </summary>
57    EveryCodepoint
58  }
59 
60  /// <summary>
61  /// Static helper methods for working with text.
62  /// </summary>
63  public static partial class TextUtilities
64  {
65    #region GetControlCharacterName
66    // the names of the first 32 ASCII characters = Unicode C0 block
67    static readonly string[] c0Table = {
68      "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "HT",
69      "LF", "VT", "FF", "CR", "SO", "SI", "DLE", "DC1", "DC2", "DC3",
70      "DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS",
71      "RS", "US"
72    };
73   
74    // DEL (ASCII 127) and
75    // the names of the control characters in the C1 block (Unicode 128 to 159)
76    static readonly string[] delAndC1Table = {
77      "DEL",
78      "PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", "HTS", "HTJ",
79      "VTS", "PLD", "PLU", "RI", "SS2", "SS3", "DCS", "PU1", "PU2", "STS",
80      "CCH", "MW", "SPA", "EPA", "SOS", "SGCI", "SCI", "CSI", "ST", "OSC",
81      "PM", "APC"
82    };
83   
84    /// <summary>
85    /// Gets the name of the control character.
86    /// For unknown characters, the unicode codepoint is returned as 4-digit hexadecimal value.
87    /// </summary>
88    public static string GetControlCharacterName(char controlCharacter)
89    {
90      int num = (int)controlCharacter;
91      if (num < c0Table.Length)
92        return c0Table[num];
93      else if (num >= 127 && num <= 159)
94        return delAndC1Table[num - 127];
95      else
96        return num.ToString("x4", CultureInfo.InvariantCulture);
97    }
98    #endregion
99   
100    #region GetWhitespace
101    /// <summary>
102    /// Gets all whitespace (' ' and '\t', but no newlines) after offset.
103    /// </summary>
104    /// <param name="textSource">The text source.</param>
105    /// <param name="offset">The offset where the whitespace starts.</param>
106    /// <returns>The segment containing the whitespace.</returns>
107    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
108                                                     Justification = "WPF uses 'Whitespace'")]
109    public static ISegment GetWhitespaceAfter(ITextSource textSource, int offset)
110    {
111      if (textSource == null)
112        throw new ArgumentNullException("textSource");
113      int pos;
114      for (pos = offset; pos < textSource.TextLength; pos++) {
115        char c = textSource.GetCharAt(pos);
116        if (c != ' ' && c != '\t')
117          break;
118      }
119      return new SimpleSegment(offset, pos - offset);
120    }
121   
122    /// <summary>
123    /// Gets all whitespace (' ' and '\t', but no newlines) before offset.
124    /// </summary>
125    /// <param name="textSource">The text source.</param>
126    /// <param name="offset">The offset where the whitespace ends.</param>
127    /// <returns>The segment containing the whitespace.</returns>
128    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
129                                                     Justification = "WPF uses 'Whitespace'")]
130    public static ISegment GetWhitespaceBefore(ITextSource textSource, int offset)
131    {
132      if (textSource == null)
133        throw new ArgumentNullException("textSource");
134      int pos;
135      for (pos = offset - 1; pos >= 0; pos--) {
136        char c = textSource.GetCharAt(pos);
137        if (c != ' ' && c != '\t')
138          break;
139      }
140      pos++; // go back the one character that isn't whitespace
141      return new SimpleSegment(pos, offset - pos);
142    }
143   
144    /// <summary>
145    /// Gets the leading whitespace segment on the document line.
146    /// </summary>
147    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
148                                                     Justification = "WPF uses 'Whitespace'")]
149    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters",
150                                                     Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")]
151    public static ISegment GetLeadingWhitespace(TextDocument document, DocumentLine documentLine)
152    {
153      if (documentLine == null)
154        throw new ArgumentNullException("documentLine");
155      return GetWhitespaceAfter(document, documentLine.Offset);
156    }
157   
158    /// <summary>
159    /// Gets the trailing whitespace segment on the document line.
160    /// </summary>
161    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
162                                                     Justification = "WPF uses 'Whitespace'")]
163    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters",
164                                                     Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")]
165    public static ISegment GetTrailingWhitespace(TextDocument document, DocumentLine documentLine)
166    {
167      if (documentLine == null)
168        throw new ArgumentNullException("documentLine");
169      ISegment segment = GetWhitespaceBefore(document, documentLine.EndOffset);
170      // If the whole line consists of whitespace, we consider all of it as leading whitespace,
171      // so return an empty segment as trailing whitespace.
172      if (segment.Offset == documentLine.Offset)
173        return new SimpleSegment(documentLine.EndOffset, 0);
174      else
175        return segment;
176    }
177    #endregion
178   
179    #region GetSingleIndentationSegment
180    /// <summary>
181    /// Gets a single indentation segment starting at <paramref name="offset"/> - at most one tab
182    /// or <paramref name="indentationSize"/> spaces.
183    /// </summary>
184    /// <param name="textSource">The text source.</param>
185    /// <param name="offset">The offset where the indentation segment starts.</param>
186    /// <param name="indentationSize">The size of an indentation unit. See <see cref="TextEditorOptions.IndentationSize"/>.</param>
187    /// <returns>The indentation segment.
188    /// If there is no indentation character at the specified <paramref name="offset"/>,
189    /// an empty segment is returned.</returns>
190    public static ISegment GetSingleIndentationSegment(ITextSource textSource, int offset, int indentationSize)
191    {
192      if (textSource == null)
193        throw new ArgumentNullException("textSource");
194      int pos = offset;
195      while (pos < textSource.TextLength) {
196        char c = textSource.GetCharAt(pos);
197        if (c == '\t') {
198          if (pos == offset)
199            return new SimpleSegment(offset, 1);
200          else
201            break;
202        } else if (c == ' ') {
203          if (pos - offset >= indentationSize)
204            break;
205        } else {
206          break;
207        }
208        // continue only if c==' ' and (pos-offset)<tabSize
209        pos++;
210      }
211      return new SimpleSegment(offset, pos - offset);
212    }
213    #endregion
214   
215    #region GetCharacterClass
216    /// <summary>
217    /// Gets whether the character is whitespace, part of an identifier, or line terminator.
218    /// </summary>
219    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "c")]
220    public static CharacterClass GetCharacterClass(char c)
221    {
222      if (c == '\r' || c == '\n')
223        return CharacterClass.LineTerminator;
224      if (c == '_')
225        return CharacterClass.IdentifierPart;
226      return GetCharacterClass(char.GetUnicodeCategory(c));
227    }
228   
229    static CharacterClass GetCharacterClass(char highSurrogate, char lowSurrogate)
230    {
231      if (char.IsSurrogatePair(highSurrogate, lowSurrogate)) {
232        return GetCharacterClass(char.GetUnicodeCategory(highSurrogate.ToString() + lowSurrogate.ToString(), 0));
233      } else {
234        // malformed surrogate pair
235        return CharacterClass.Other;
236      }
237    }
238   
239    static CharacterClass GetCharacterClass(UnicodeCategory c)
240    {
241      switch (c) {
242        case UnicodeCategory.SpaceSeparator:
243        case UnicodeCategory.LineSeparator:
244        case UnicodeCategory.ParagraphSeparator:
245        case UnicodeCategory.Control:
246          return CharacterClass.Whitespace;
247        case UnicodeCategory.UppercaseLetter:
248        case UnicodeCategory.LowercaseLetter:
249        case UnicodeCategory.TitlecaseLetter:
250        case UnicodeCategory.ModifierLetter:
251        case UnicodeCategory.OtherLetter:
252        case UnicodeCategory.DecimalDigitNumber:
253          return CharacterClass.IdentifierPart;
254        case UnicodeCategory.NonSpacingMark:
255        case UnicodeCategory.SpacingCombiningMark:
256        case UnicodeCategory.EnclosingMark:
257          return CharacterClass.CombiningMark;
258        default:
259          return CharacterClass.Other;
260      }
261    }
262    #endregion
263   
264    #region GetNextCaretPosition
265    /// <summary>
266    /// Gets the next caret position.
267    /// </summary>
268    /// <param name="textSource">The text source.</param>
269    /// <param name="offset">The start offset inside the text source.</param>
270    /// <param name="direction">The search direction (forwards or backwards).</param>
271    /// <param name="mode">The mode for caret positioning.</param>
272    /// <returns>The offset of the next caret position, or -1 if there is no further caret position
273    /// in the text source.</returns>
274    /// <remarks>
275    /// This method is NOT equivalent to the actual caret movement when using VisualLine.GetNextCaretPosition.
276    /// In real caret movement, there are additional caret stops at line starts and ends. This method
277    /// treats linefeeds as simple whitespace.
278    /// </remarks>
279    public static int GetNextCaretPosition(ITextSource textSource, int offset, LogicalDirection direction, CaretPositioningMode mode)
280    {
281      if (textSource == null)
282        throw new ArgumentNullException("textSource");
283      switch (mode) {
284        case CaretPositioningMode.Normal:
285        case CaretPositioningMode.EveryCodepoint:
286        case CaretPositioningMode.WordBorder:
287        case CaretPositioningMode.WordBorderOrSymbol:
288        case CaretPositioningMode.WordStart:
289        case CaretPositioningMode.WordStartOrSymbol:
290          break; // OK
291        default:
292          throw new ArgumentException("Unsupported CaretPositioningMode: " + mode, "mode");
293      }
294      if (direction != LogicalDirection.Backward
295          && direction != LogicalDirection.Forward)
296      {
297        throw new ArgumentException("Invalid LogicalDirection: " + direction, "direction");
298      }
299      int textLength = textSource.TextLength;
300      if (textLength <= 0) {
301        // empty document? has a normal caret position at 0, though no word borders
302        if (IsNormal(mode)) {
303          if (offset > 0 && direction == LogicalDirection.Backward) return 0;
304          if (offset < 0 && direction == LogicalDirection.Forward) return 0;
305        }
306        return -1;
307      }
308      while (true) {
309        int nextPos = (direction == LogicalDirection.Backward) ? offset - 1 : offset + 1;
310       
311        // return -1 if there is no further caret position in the text source
312        // we also need this to handle offset values outside the valid range
313        if (nextPos < 0 || nextPos > textLength)
314          return -1;
315       
316        // check if we've run against the textSource borders.
317        // a 'textSource' usually isn't the whole document, but a single VisualLineElement.
318        if (nextPos == 0) {
319          // at the document start, there's only a word border
320          // if the first character is not whitespace
321          if (IsNormal(mode) || !char.IsWhiteSpace(textSource.GetCharAt(0)))
322            return nextPos;
323        } else if (nextPos == textLength) {
324          // at the document end, there's never a word start
325          if (mode != CaretPositioningMode.WordStart && mode != CaretPositioningMode.WordStartOrSymbol) {
326            // at the document end, there's only a word border
327            // if the last character is not whitespace
328            if (IsNormal(mode) || !char.IsWhiteSpace(textSource.GetCharAt(textLength - 1)))
329              return nextPos;
330          }
331        } else {
332          char charBefore = textSource.GetCharAt(nextPos - 1);
333          char charAfter = textSource.GetCharAt(nextPos);
334          // Don't stop in the middle of a surrogate pair
335          if (!char.IsSurrogatePair(charBefore, charAfter)) {
336            CharacterClass classBefore = GetCharacterClass(charBefore);
337            CharacterClass classAfter = GetCharacterClass(charAfter);
338            // get correct class for characters outside BMP:
339            if (char.IsLowSurrogate(charBefore) && nextPos >= 2) {
340              classBefore = GetCharacterClass(textSource.GetCharAt(nextPos - 2), charBefore);
341            }
342            if (char.IsHighSurrogate(charAfter) && nextPos + 1 < textLength) {
343              classAfter = GetCharacterClass(charAfter, textSource.GetCharAt(nextPos + 1));
344            }
345            if (StopBetweenCharacters(mode, classBefore, classAfter)) {
346              return nextPos;
347            }
348          }
349        }
350        // we'll have to continue searching...
351        offset = nextPos;
352      }
353    }
354   
355    static bool IsNormal(CaretPositioningMode mode)
356    {
357      return mode == CaretPositioningMode.Normal || mode == CaretPositioningMode.EveryCodepoint;
358    }
359   
360    static bool StopBetweenCharacters(CaretPositioningMode mode, CharacterClass charBefore, CharacterClass charAfter)
361    {
362      if (mode == CaretPositioningMode.EveryCodepoint)
363        return true;
364      // Don't stop in the middle of a grapheme
365      if (charAfter == CharacterClass.CombiningMark)
366        return false;
367      // Stop after every grapheme in normal mode
368      if (mode == CaretPositioningMode.Normal)
369        return true;
370      if (charBefore == charAfter) {
371        if (charBefore == CharacterClass.Other &&
372            (mode == CaretPositioningMode.WordBorderOrSymbol || mode == CaretPositioningMode.WordStartOrSymbol))
373        {
374          // With the "OrSymbol" modes, there's a word border and start between any two unknown characters
375          return true;
376        }
377      } else {
378        // this looks like a possible border
379       
380        // if we're looking for word starts, check that this is a word start (and not a word end)
381        // if we're just checking for word borders, accept unconditionally
382        if (!((mode == CaretPositioningMode.WordStart || mode == CaretPositioningMode.WordStartOrSymbol)
383              && (charAfter == CharacterClass.Whitespace || charAfter == CharacterClass.LineTerminator)))
384        {
385          return true;
386        }
387      }
388      return false;
389    }
390    #endregion
391  }
392 
393  /// <summary>
394  /// Classifies a character as whitespace, line terminator, part of an identifier, or other.
395  /// </summary>
396  public enum CharacterClass
397  {
398    /// <summary>
399    /// The character is not whitespace, line terminator or part of an identifier.
400    /// </summary>
401    Other,
402    /// <summary>
403    /// The character is whitespace (but not line terminator).
404    /// </summary>
405    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
406                                                     Justification = "WPF uses 'Whitespace'")]
407    Whitespace,
408    /// <summary>
409    /// The character can be part of an identifier (Letter, digit or underscore).
410    /// </summary>
411    IdentifierPart,
412    /// <summary>
413    /// The character is line terminator (\r or \n).
414    /// </summary>
415    LineTerminator,
416    /// <summary>
417    /// The character is a unicode combining mark that modifies the previous character.
418    /// Corresponds to the Unicode designations "Mn", "Mc" and "Me".
419    /// </summary>
420    CombiningMark
421  }
422}
Note: See TracBrowser for help on using the repository browser.