Context Navigation

source: branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.ExtLibs/HeuristicLab.AvalonEdit/5.0.1/AvalonEdit-5.0.1/Document/TextUtilities.cs @ 15990

Visit:

Last change on this file since 15990 was 11700, checked in by jkarder, 10 years ago
#2077: created branch and added first version
File size: 16.6 KB

Line
1	// Copyright (c) 2014 AlphaSierraPapa for the SharpDevelop Team
2	//
3	// Permission is hereby granted, free of charge, to any person obtaining a copy of this
4	// software and associated documentation files (the "Software"), to deal in the Software
5	// without restriction, including without limitation the rights to use, copy, modify, merge,
6	// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
7	// to whom the Software is furnished to do so, subject to the following conditions:
8	//
9	// The above copyright notice and this permission notice shall be included in all copies or
10	// substantial portions of the Software.
11	//
12	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13	// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14	// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
15	// FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
16	// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
17	// DEALINGS IN THE SOFTWARE.
18
19	using System;
20	using System.Globalization;
21	using System.Windows.Documents;
22	#if NREFACTORY
23	using ICSharpCode.NRefactory.Editor;
24	#endif
25
26	namespace ICSharpCode.AvalonEdit.Document
27	{
28	/// <summary>
29	/// Specifies the mode for getting the next caret position.
30	/// </summary>
31	public enum CaretPositioningMode
32	{
33	/// <summary>
34	/// Normal positioning (stop after every grapheme)
35	/// </summary>
36	Normal,
37	/// <summary>
38	/// Stop only on word borders.
39	/// </summary>
40	WordBorder,
41	/// <summary>
42	/// Stop only at the beginning of words. This is used for Ctrl+Left/Ctrl+Right.
43	/// </summary>
44	WordStart,
45	/// <summary>
46	/// Stop only at the beginning of words, and anywhere in the middle of symbols.
47	/// </summary>
48	WordStartOrSymbol,
49	/// <summary>
50	/// Stop only on word borders, and anywhere in the middle of symbols.
51	/// </summary>
52	WordBorderOrSymbol,
53	/// <summary>
54	/// Stop between every Unicode codepoint, even within the same grapheme.
55	/// This is used to implement deleting the previous grapheme when Backspace is pressed.
56	/// </summary>
57	EveryCodepoint
58	}
59
60	/// <summary>
61	/// Static helper methods for working with text.
62	/// </summary>
63	public static partial class TextUtilities
64	{
65	#region GetControlCharacterName
66	// the names of the first 32 ASCII characters = Unicode C0 block
67	static readonly string[] c0Table = {
68	"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", "BS", "HT",
69	"LF", "VT", "FF", "CR", "SO", "SI", "DLE", "DC1", "DC2", "DC3",
70	"DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "FS", "GS",
71	"RS", "US"
72	};
73
74	// DEL (ASCII 127) and
75	// the names of the control characters in the C1 block (Unicode 128 to 159)
76	static readonly string[] delAndC1Table = {
77	"DEL",
78	"PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", "HTS", "HTJ",
79	"VTS", "PLD", "PLU", "RI", "SS2", "SS3", "DCS", "PU1", "PU2", "STS",
80	"CCH", "MW", "SPA", "EPA", "SOS", "SGCI", "SCI", "CSI", "ST", "OSC",
81	"PM", "APC"
82	};
83
84	/// <summary>
85	/// Gets the name of the control character.
86	/// For unknown characters, the unicode codepoint is returned as 4-digit hexadecimal value.
87	/// </summary>
88	public static string GetControlCharacterName(char controlCharacter)
89	{
90	int num = (int)controlCharacter;
91	if (num < c0Table.Length)
92	return c0Table[num];
93	else if (num >= 127 && num <= 159)
94	return delAndC1Table[num - 127];
95	else
96	return num.ToString("x4", CultureInfo.InvariantCulture);
97	}
98	#endregion
99
100	#region GetWhitespace
101	/// <summary>
102	/// Gets all whitespace (' ' and '\t', but no newlines) after offset.
103	/// </summary>
104	/// <param name="textSource">The text source.</param>
105	/// <param name="offset">The offset where the whitespace starts.</param>
106	/// <returns>The segment containing the whitespace.</returns>
107	[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
108	Justification = "WPF uses 'Whitespace'")]
109	public static ISegment GetWhitespaceAfter(ITextSource textSource, int offset)
110	{
111	if (textSource == null)
112	throw new ArgumentNullException("textSource");
113	int pos;
114	for (pos = offset; pos < textSource.TextLength; pos++) {
115	char c = textSource.GetCharAt(pos);
116	if (c != ' ' && c != '\t')
117	break;
118	}
119	return new SimpleSegment(offset, pos - offset);
120	}
121
122	/// <summary>
123	/// Gets all whitespace (' ' and '\t', but no newlines) before offset.
124	/// </summary>
125	/// <param name="textSource">The text source.</param>
126	/// <param name="offset">The offset where the whitespace ends.</param>
127	/// <returns>The segment containing the whitespace.</returns>
128	[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
129	Justification = "WPF uses 'Whitespace'")]
130	public static ISegment GetWhitespaceBefore(ITextSource textSource, int offset)
131	{
132	if (textSource == null)
133	throw new ArgumentNullException("textSource");
134	int pos;
135	for (pos = offset - 1; pos >= 0; pos--) {
136	char c = textSource.GetCharAt(pos);
137	if (c != ' ' && c != '\t')
138	break;
139	}
140	pos++; // go back the one character that isn't whitespace
141	return new SimpleSegment(pos, offset - pos);
142	}
143
144	/// <summary>
145	/// Gets the leading whitespace segment on the document line.
146	/// </summary>
147	[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
148	Justification = "WPF uses 'Whitespace'")]
149	[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters",
150	Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")]
151	public static ISegment GetLeadingWhitespace(TextDocument document, DocumentLine documentLine)
152	{
153	if (documentLine == null)
154	throw new ArgumentNullException("documentLine");
155	return GetWhitespaceAfter(document, documentLine.Offset);
156	}
157
158	/// <summary>
159	/// Gets the trailing whitespace segment on the document line.
160	/// </summary>
161	[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
162	Justification = "WPF uses 'Whitespace'")]
163	[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Design", "CA1011:ConsiderPassingBaseTypesAsParameters",
164	Justification = "Parameter cannot be ITextSource because it must belong to the DocumentLine")]
165	public static ISegment GetTrailingWhitespace(TextDocument document, DocumentLine documentLine)
166	{
167	if (documentLine == null)
168	throw new ArgumentNullException("documentLine");
169	ISegment segment = GetWhitespaceBefore(document, documentLine.EndOffset);
170	// If the whole line consists of whitespace, we consider all of it as leading whitespace,
171	// so return an empty segment as trailing whitespace.
172	if (segment.Offset == documentLine.Offset)
173	return new SimpleSegment(documentLine.EndOffset, 0);
174	else
175	return segment;
176	}
177	#endregion
178
179	#region GetSingleIndentationSegment
180	/// <summary>
181	/// Gets a single indentation segment starting at <paramref name="offset"/> - at most one tab
182	/// or <paramref name="indentationSize"/> spaces.
183	/// </summary>
184	/// <param name="textSource">The text source.</param>
185	/// <param name="offset">The offset where the indentation segment starts.</param>
186	/// <param name="indentationSize">The size of an indentation unit. See <see cref="TextEditorOptions.IndentationSize"/>.</param>
187	/// <returns>The indentation segment.
188	/// If there is no indentation character at the specified <paramref name="offset"/>,
189	/// an empty segment is returned.</returns>
190	public static ISegment GetSingleIndentationSegment(ITextSource textSource, int offset, int indentationSize)
191	{
192	if (textSource == null)
193	throw new ArgumentNullException("textSource");
194	int pos = offset;
195	while (pos < textSource.TextLength) {
196	char c = textSource.GetCharAt(pos);
197	if (c == '\t') {
198	if (pos == offset)
199	return new SimpleSegment(offset, 1);
200	else
201	break;
202	} else if (c == ' ') {
203	if (pos - offset >= indentationSize)
204	break;
205	} else {
206	break;
207	}
208	// continue only if c==' ' and (pos-offset)<tabSize
209	pos++;
210	}
211	return new SimpleSegment(offset, pos - offset);
212	}
213	#endregion
214
215	#region GetCharacterClass
216	/// <summary>
217	/// Gets whether the character is whitespace, part of an identifier, or line terminator.
218	/// </summary>
219	[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "c")]
220	public static CharacterClass GetCharacterClass(char c)
221	{
222	if (c == '\r' \|\| c == '\n')
223	return CharacterClass.LineTerminator;
224	if (c == '_')
225	return CharacterClass.IdentifierPart;
226	return GetCharacterClass(char.GetUnicodeCategory(c));
227	}
228
229	static CharacterClass GetCharacterClass(char highSurrogate, char lowSurrogate)
230	{
231	if (char.IsSurrogatePair(highSurrogate, lowSurrogate)) {
232	return GetCharacterClass(char.GetUnicodeCategory(highSurrogate.ToString() + lowSurrogate.ToString(), 0));
233	} else {
234	// malformed surrogate pair
235	return CharacterClass.Other;
236	}
237	}
238
239	static CharacterClass GetCharacterClass(UnicodeCategory c)
240	{
241	switch (c) {
242	case UnicodeCategory.SpaceSeparator:
243	case UnicodeCategory.LineSeparator:
244	case UnicodeCategory.ParagraphSeparator:
245	case UnicodeCategory.Control:
246	return CharacterClass.Whitespace;
247	case UnicodeCategory.UppercaseLetter:
248	case UnicodeCategory.LowercaseLetter:
249	case UnicodeCategory.TitlecaseLetter:
250	case UnicodeCategory.ModifierLetter:
251	case UnicodeCategory.OtherLetter:
252	case UnicodeCategory.DecimalDigitNumber:
253	return CharacterClass.IdentifierPart;
254	case UnicodeCategory.NonSpacingMark:
255	case UnicodeCategory.SpacingCombiningMark:
256	case UnicodeCategory.EnclosingMark:
257	return CharacterClass.CombiningMark;
258	default:
259	return CharacterClass.Other;
260	}
261	}
262	#endregion
263
264	#region GetNextCaretPosition
265	/// <summary>
266	/// Gets the next caret position.
267	/// </summary>
268	/// <param name="textSource">The text source.</param>
269	/// <param name="offset">The start offset inside the text source.</param>
270	/// <param name="direction">The search direction (forwards or backwards).</param>
271	/// <param name="mode">The mode for caret positioning.</param>
272	/// <returns>The offset of the next caret position, or -1 if there is no further caret position
273	/// in the text source.</returns>
274	/// <remarks>
275	/// This method is NOT equivalent to the actual caret movement when using VisualLine.GetNextCaretPosition.
276	/// In real caret movement, there are additional caret stops at line starts and ends. This method
277	/// treats linefeeds as simple whitespace.
278	/// </remarks>
279	public static int GetNextCaretPosition(ITextSource textSource, int offset, LogicalDirection direction, CaretPositioningMode mode)
280	{
281	if (textSource == null)
282	throw new ArgumentNullException("textSource");
283	switch (mode) {
284	case CaretPositioningMode.Normal:
285	case CaretPositioningMode.EveryCodepoint:
286	case CaretPositioningMode.WordBorder:
287	case CaretPositioningMode.WordBorderOrSymbol:
288	case CaretPositioningMode.WordStart:
289	case CaretPositioningMode.WordStartOrSymbol:
290	break; // OK
291	default:
292	throw new ArgumentException("Unsupported CaretPositioningMode: " + mode, "mode");
293	}
294	if (direction != LogicalDirection.Backward
295	&& direction != LogicalDirection.Forward)
296	{
297	throw new ArgumentException("Invalid LogicalDirection: " + direction, "direction");
298	}
299	int textLength = textSource.TextLength;
300	if (textLength <= 0) {
301	// empty document? has a normal caret position at 0, though no word borders
302	if (IsNormal(mode)) {
303	if (offset > 0 && direction == LogicalDirection.Backward) return 0;
304	if (offset < 0 && direction == LogicalDirection.Forward) return 0;
305	}
306	return -1;
307	}
308	while (true) {
309	int nextPos = (direction == LogicalDirection.Backward) ? offset - 1 : offset + 1;
310
311	// return -1 if there is no further caret position in the text source
312	// we also need this to handle offset values outside the valid range
313	if (nextPos < 0 \|\| nextPos > textLength)
314	return -1;
315
316	// check if we've run against the textSource borders.
317	// a 'textSource' usually isn't the whole document, but a single VisualLineElement.
318	if (nextPos == 0) {
319	// at the document start, there's only a word border
320	// if the first character is not whitespace
321	if (IsNormal(mode) \|\| !char.IsWhiteSpace(textSource.GetCharAt(0)))
322	return nextPos;
323	} else if (nextPos == textLength) {
324	// at the document end, there's never a word start
325	if (mode != CaretPositioningMode.WordStart && mode != CaretPositioningMode.WordStartOrSymbol) {
326	// at the document end, there's only a word border
327	// if the last character is not whitespace
328	if (IsNormal(mode) \|\| !char.IsWhiteSpace(textSource.GetCharAt(textLength - 1)))
329	return nextPos;
330	}
331	} else {
332	char charBefore = textSource.GetCharAt(nextPos - 1);
333	char charAfter = textSource.GetCharAt(nextPos);
334	// Don't stop in the middle of a surrogate pair
335	if (!char.IsSurrogatePair(charBefore, charAfter)) {
336	CharacterClass classBefore = GetCharacterClass(charBefore);
337	CharacterClass classAfter = GetCharacterClass(charAfter);
338	// get correct class for characters outside BMP:
339	if (char.IsLowSurrogate(charBefore) && nextPos >= 2) {
340	classBefore = GetCharacterClass(textSource.GetCharAt(nextPos - 2), charBefore);
341	}
342	if (char.IsHighSurrogate(charAfter) && nextPos + 1 < textLength) {
343	classAfter = GetCharacterClass(charAfter, textSource.GetCharAt(nextPos + 1));
344	}
345	if (StopBetweenCharacters(mode, classBefore, classAfter)) {
346	return nextPos;
347	}
348	}
349	}
350	// we'll have to continue searching...
351	offset = nextPos;
352	}
353	}
354
355	static bool IsNormal(CaretPositioningMode mode)
356	{
357	return mode == CaretPositioningMode.Normal \|\| mode == CaretPositioningMode.EveryCodepoint;
358	}
359
360	static bool StopBetweenCharacters(CaretPositioningMode mode, CharacterClass charBefore, CharacterClass charAfter)
361	{
362	if (mode == CaretPositioningMode.EveryCodepoint)
363	return true;
364	// Don't stop in the middle of a grapheme
365	if (charAfter == CharacterClass.CombiningMark)
366	return false;
367	// Stop after every grapheme in normal mode
368	if (mode == CaretPositioningMode.Normal)
369	return true;
370	if (charBefore == charAfter) {
371	if (charBefore == CharacterClass.Other &&
372	(mode == CaretPositioningMode.WordBorderOrSymbol \|\| mode == CaretPositioningMode.WordStartOrSymbol))
373	{
374	// With the "OrSymbol" modes, there's a word border and start between any two unknown characters
375	return true;
376	}
377	} else {
378	// this looks like a possible border
379
380	// if we're looking for word starts, check that this is a word start (and not a word end)
381	// if we're just checking for word borders, accept unconditionally
382	if (!((mode == CaretPositioningMode.WordStart \|\| mode == CaretPositioningMode.WordStartOrSymbol)
383	&& (charAfter == CharacterClass.Whitespace \|\| charAfter == CharacterClass.LineTerminator)))
384	{
385	return true;
386	}
387	}
388	return false;
389	}
390	#endregion
391	}
392
393	/// <summary>
394	/// Classifies a character as whitespace, line terminator, part of an identifier, or other.
395	/// </summary>
396	public enum CharacterClass
397	{
398	/// <summary>
399	/// The character is not whitespace, line terminator or part of an identifier.
400	/// </summary>
401	Other,
402	/// <summary>
403	/// The character is whitespace (but not line terminator).
404	/// </summary>
405	[System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1702:CompoundWordsShouldBeCasedCorrectly", MessageId = "Whitespace",
406	Justification = "WPF uses 'Whitespace'")]
407	Whitespace,
408	/// <summary>
409	/// The character can be part of an identifier (Letter, digit or underscore).
410	/// </summary>
411	IdentifierPart,
412	/// <summary>
413	/// The character is line terminator (\r or \n).
414	/// </summary>
415	LineTerminator,
416	/// <summary>
417	/// The character is a unicode combining mark that modifies the previous character.
418	/// Corresponds to the Unicode designations "Mn", "Mc" and "Me".
419	/// </summary>
420	CombiningMark
421	}
422	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences