1 | // |
---|
2 | // UnicodeNewline.cs |
---|
3 | // |
---|
4 | // Author: |
---|
5 | // Mike KrÃŒger <mkrueger@xamarin.com> |
---|
6 | // |
---|
7 | // Copyright (c) 2013 Xamarin Inc. (http://xamarin.com) |
---|
8 | // |
---|
9 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
---|
10 | // of this software and associated documentation files (the "Software"), to deal |
---|
11 | // in the Software without restriction, including without limitation the rights |
---|
12 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
---|
13 | // copies of the Software, and to permit persons to whom the Software is |
---|
14 | // furnished to do so, subject to the following conditions: |
---|
15 | // |
---|
16 | // The above copyright notice and this permission notice shall be included in |
---|
17 | // all copies or substantial portions of the Software. |
---|
18 | // |
---|
19 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
---|
20 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
---|
21 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
---|
22 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
---|
23 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
---|
24 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
---|
25 | // THE SOFTWARE. |
---|
26 | using System; |
---|
27 | |
---|
28 | namespace ICSharpCode.NRefactory |
---|
29 | { |
---|
30 | public enum UnicodeNewline { |
---|
31 | Unknown, |
---|
32 | |
---|
33 | /// <summary> |
---|
34 | /// Line Feed, U+000A |
---|
35 | /// </summary> |
---|
36 | LF = 0x0A, |
---|
37 | |
---|
38 | |
---|
39 | CRLF = 0x0D0A, |
---|
40 | |
---|
41 | /// <summary> |
---|
42 | /// Carriage Return, U+000D |
---|
43 | /// </summary> |
---|
44 | CR = 0x0D, |
---|
45 | |
---|
46 | /// <summary> |
---|
47 | /// Next Line, U+0085 |
---|
48 | /// </summary> |
---|
49 | NEL = 0x85, |
---|
50 | |
---|
51 | /// <summary> |
---|
52 | /// Vertical Tab, U+000B |
---|
53 | /// </summary> |
---|
54 | VT = 0x0B, |
---|
55 | |
---|
56 | /// <summary> |
---|
57 | /// Form Feed, U+000C |
---|
58 | /// </summary> |
---|
59 | FF = 0x0C, |
---|
60 | |
---|
61 | /// <summary> |
---|
62 | /// Line Separator, U+2028 |
---|
63 | /// </summary> |
---|
64 | LS = 0x2028, |
---|
65 | |
---|
66 | /// <summary> |
---|
67 | /// Paragraph Separator, U+2029 |
---|
68 | /// </summary> |
---|
69 | PS = 0x2029 |
---|
70 | } |
---|
71 | |
---|
72 | |
---|
73 | /// <summary> |
---|
74 | /// Defines unicode new lines according to Unicode Technical Report #13 |
---|
75 | /// http://www.unicode.org/standard/reports/tr13/tr13-5.html |
---|
76 | /// </summary> |
---|
77 | public static class NewLine |
---|
78 | { |
---|
79 | /// <summary> |
---|
80 | /// Carriage Return, U+000D |
---|
81 | /// </summary> |
---|
82 | public const char CR = (char)0x0D; |
---|
83 | |
---|
84 | /// <summary> |
---|
85 | /// Line Feed, U+000A |
---|
86 | /// </summary> |
---|
87 | public const char LF = (char)0x0A; |
---|
88 | |
---|
89 | /// <summary> |
---|
90 | /// Next Line, U+0085 |
---|
91 | /// </summary> |
---|
92 | public const char NEL = (char)0x85; |
---|
93 | |
---|
94 | /// <summary> |
---|
95 | /// Vertical Tab, U+000B |
---|
96 | /// </summary> |
---|
97 | public const char VT = (char)0x0B; |
---|
98 | |
---|
99 | /// <summary> |
---|
100 | /// Form Feed, U+000C |
---|
101 | /// </summary> |
---|
102 | public const char FF = (char)0x0C; |
---|
103 | |
---|
104 | /// <summary> |
---|
105 | /// Line Separator, U+2028 |
---|
106 | /// </summary> |
---|
107 | public const char LS = (char)0x2028; |
---|
108 | |
---|
109 | /// <summary> |
---|
110 | /// Paragraph Separator, U+2029 |
---|
111 | /// </summary> |
---|
112 | public const char PS = (char)0x2029; |
---|
113 | |
---|
114 | /// <summary> |
---|
115 | /// Determines if a char is a new line delimiter. |
---|
116 | /// </summary> |
---|
117 | /// <returns>0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter.</returns> |
---|
118 | /// <param name="curChar">The current character.</param> |
---|
119 | /// <param name="nextChar">A callback getting the next character (may be null).</param> |
---|
120 | public static int GetDelimiterLength (char curChar, Func<char> nextChar = null) |
---|
121 | { |
---|
122 | if (curChar == CR) { |
---|
123 | if (nextChar != null && nextChar () == LF) |
---|
124 | return 2; |
---|
125 | return 1; |
---|
126 | } |
---|
127 | |
---|
128 | if (curChar == LF || curChar == NEL || curChar == VT || curChar == FF || curChar == LS || curChar == PS) |
---|
129 | return 1; |
---|
130 | return 0; |
---|
131 | } |
---|
132 | |
---|
133 | /// <summary> |
---|
134 | /// Determines if a char is a new line delimiter. |
---|
135 | /// </summary> |
---|
136 | /// <returns>0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter.</returns> |
---|
137 | /// <param name="curChar">The current character.</param> |
---|
138 | /// <param name="nextChar">The next character (if != LF then length will always be 0 or 1).</param> |
---|
139 | public static int GetDelimiterLength (char curChar, char nextChar) |
---|
140 | { |
---|
141 | if (curChar == CR) { |
---|
142 | if (nextChar == LF) |
---|
143 | return 2; |
---|
144 | return 1; |
---|
145 | } |
---|
146 | |
---|
147 | if (curChar == LF || curChar == NEL || curChar == VT || curChar == FF || curChar == LS || curChar == PS) |
---|
148 | return 1; |
---|
149 | return 0; |
---|
150 | } |
---|
151 | |
---|
152 | |
---|
153 | /// <summary> |
---|
154 | /// Determines if a char is a new line delimiter. |
---|
155 | /// </summary> |
---|
156 | /// <returns>0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter.</returns> |
---|
157 | /// <param name="curChar">The current character.</param> |
---|
158 | /// <param name = "length">The length of the delimiter</param> |
---|
159 | /// <param name = "type">The type of the delimiter</param> |
---|
160 | /// <param name="nextChar">A callback getting the next character (may be null).</param> |
---|
161 | public static bool TryGetDelimiterLengthAndType (char curChar, out int length, out UnicodeNewline type, Func<char> nextChar = null) |
---|
162 | { |
---|
163 | if (curChar == CR) { |
---|
164 | if (nextChar != null && nextChar () == LF) { |
---|
165 | length = 2; |
---|
166 | type = UnicodeNewline.CRLF; |
---|
167 | } else { |
---|
168 | length = 1; |
---|
169 | type = UnicodeNewline.CR; |
---|
170 | |
---|
171 | } |
---|
172 | return true; |
---|
173 | } |
---|
174 | |
---|
175 | switch (curChar) { |
---|
176 | case LF: |
---|
177 | type = UnicodeNewline.LF; |
---|
178 | length = 1; |
---|
179 | return true; |
---|
180 | case NEL: |
---|
181 | type = UnicodeNewline.NEL; |
---|
182 | length = 1; |
---|
183 | return true; |
---|
184 | case VT: |
---|
185 | type = UnicodeNewline.VT; |
---|
186 | length = 1; |
---|
187 | return true; |
---|
188 | case FF: |
---|
189 | type = UnicodeNewline.FF; |
---|
190 | length = 1; |
---|
191 | return true; |
---|
192 | case LS: |
---|
193 | type = UnicodeNewline.LS; |
---|
194 | length = 1; |
---|
195 | return true; |
---|
196 | case PS: |
---|
197 | type = UnicodeNewline.PS; |
---|
198 | length = 1; |
---|
199 | return true; |
---|
200 | } |
---|
201 | length = -1; |
---|
202 | type = UnicodeNewline.Unknown; |
---|
203 | return false; |
---|
204 | } |
---|
205 | |
---|
206 | /// <summary> |
---|
207 | /// Determines if a char is a new line delimiter. |
---|
208 | /// </summary> |
---|
209 | /// <returns>0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter.</returns> |
---|
210 | /// <param name="curChar">The current character.</param> |
---|
211 | /// <param name = "length">The length of the delimiter</param> |
---|
212 | /// <param name = "type">The type of the delimiter</param> |
---|
213 | /// <param name="nextChar">The next character (if != LF then length will always be 0 or 1).</param> |
---|
214 | public static bool TryGetDelimiterLengthAndType (char curChar, out int length, out UnicodeNewline type, char nextChar) |
---|
215 | { |
---|
216 | if (curChar == CR) { |
---|
217 | if (nextChar == LF) { |
---|
218 | length = 2; |
---|
219 | type = UnicodeNewline.CRLF; |
---|
220 | } else { |
---|
221 | length = 1; |
---|
222 | type = UnicodeNewline.CR; |
---|
223 | |
---|
224 | } |
---|
225 | return true; |
---|
226 | } |
---|
227 | |
---|
228 | switch (curChar) { |
---|
229 | case LF: |
---|
230 | type = UnicodeNewline.LF; |
---|
231 | length = 1; |
---|
232 | return true; |
---|
233 | case NEL: |
---|
234 | type = UnicodeNewline.NEL; |
---|
235 | length = 1; |
---|
236 | return true; |
---|
237 | case VT: |
---|
238 | type = UnicodeNewline.VT; |
---|
239 | length = 1; |
---|
240 | return true; |
---|
241 | case FF: |
---|
242 | type = UnicodeNewline.FF; |
---|
243 | length = 1; |
---|
244 | return true; |
---|
245 | case LS: |
---|
246 | type = UnicodeNewline.LS; |
---|
247 | length = 1; |
---|
248 | return true; |
---|
249 | case PS: |
---|
250 | type = UnicodeNewline.PS; |
---|
251 | length = 1; |
---|
252 | return true; |
---|
253 | } |
---|
254 | length = -1; |
---|
255 | type = UnicodeNewline.Unknown; |
---|
256 | return false; |
---|
257 | } |
---|
258 | |
---|
259 | /// <summary> |
---|
260 | /// Gets the new line type of a given char/next char. |
---|
261 | /// </summary> |
---|
262 | /// <returns>0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter.</returns> |
---|
263 | /// <param name="curChar">The current character.</param> |
---|
264 | /// <param name="nextChar">A callback getting the next character (may be null).</param> |
---|
265 | public static UnicodeNewline GetDelimiterType (char curChar, Func<char> nextChar = null) |
---|
266 | { |
---|
267 | switch (curChar) { |
---|
268 | case CR: |
---|
269 | if (nextChar != null && nextChar () == LF) |
---|
270 | return UnicodeNewline.CRLF; |
---|
271 | return UnicodeNewline.CR; |
---|
272 | case LF: |
---|
273 | return UnicodeNewline.LF; |
---|
274 | case NEL: |
---|
275 | return UnicodeNewline.NEL; |
---|
276 | case VT: |
---|
277 | return UnicodeNewline.VT; |
---|
278 | case FF: |
---|
279 | return UnicodeNewline.FF; |
---|
280 | case LS: |
---|
281 | return UnicodeNewline.LS; |
---|
282 | case PS: |
---|
283 | return UnicodeNewline.PS; |
---|
284 | } |
---|
285 | return UnicodeNewline.Unknown; |
---|
286 | } |
---|
287 | |
---|
288 | /// <summary> |
---|
289 | /// Gets the new line type of a given char/next char. |
---|
290 | /// </summary> |
---|
291 | /// <returns>0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter.</returns> |
---|
292 | /// <param name="curChar">The current character.</param> |
---|
293 | /// <param name="nextChar">The next character (if != LF then length will always be 0 or 1).</param> |
---|
294 | public static UnicodeNewline GetDelimiterType (char curChar, char nextChar) |
---|
295 | { |
---|
296 | switch (curChar) { |
---|
297 | case CR: |
---|
298 | if (nextChar == LF) |
---|
299 | return UnicodeNewline.CRLF; |
---|
300 | return UnicodeNewline.CR; |
---|
301 | case LF: |
---|
302 | return UnicodeNewline.LF; |
---|
303 | case NEL: |
---|
304 | return UnicodeNewline.NEL; |
---|
305 | case VT: |
---|
306 | return UnicodeNewline.VT; |
---|
307 | case FF: |
---|
308 | return UnicodeNewline.FF; |
---|
309 | case LS: |
---|
310 | return UnicodeNewline.LS; |
---|
311 | case PS: |
---|
312 | return UnicodeNewline.PS; |
---|
313 | } |
---|
314 | return UnicodeNewline.Unknown; |
---|
315 | } |
---|
316 | |
---|
317 | /// <summary> |
---|
318 | /// Determines if a char is a new line delimiter. |
---|
319 | /// |
---|
320 | /// Note that the only 2 char wide new line is CR LF and both chars are new line |
---|
321 | /// chars on their own. For most cases GetDelimiterLength is the better choice. |
---|
322 | /// </summary> |
---|
323 | public static bool IsNewLine(char ch) |
---|
324 | { |
---|
325 | return |
---|
326 | ch == NewLine.CR || |
---|
327 | ch == NewLine.LF || |
---|
328 | ch == NewLine.NEL || |
---|
329 | ch == NewLine.VT || |
---|
330 | ch == NewLine.FF || |
---|
331 | ch == NewLine.LS || |
---|
332 | ch == NewLine.PS; |
---|
333 | } |
---|
334 | |
---|
335 | /// <summary> |
---|
336 | /// Gets the new line as a string. |
---|
337 | /// </summary> |
---|
338 | public static string GetString (UnicodeNewline newLine) |
---|
339 | { |
---|
340 | switch (newLine) { |
---|
341 | case UnicodeNewline.Unknown: |
---|
342 | return ""; |
---|
343 | case UnicodeNewline.LF: |
---|
344 | return "\n"; |
---|
345 | case UnicodeNewline.CRLF: |
---|
346 | return "\r\n"; |
---|
347 | case UnicodeNewline.CR: |
---|
348 | return "\r"; |
---|
349 | case UnicodeNewline.NEL: |
---|
350 | return "\u0085"; |
---|
351 | case UnicodeNewline.VT: |
---|
352 | return "\u000B"; |
---|
353 | case UnicodeNewline.FF: |
---|
354 | return "\u000C"; |
---|
355 | case UnicodeNewline.LS: |
---|
356 | return "\u2028"; |
---|
357 | case UnicodeNewline.PS: |
---|
358 | return "\u2029"; |
---|
359 | default: |
---|
360 | throw new ArgumentOutOfRangeException (); |
---|
361 | } |
---|
362 | } |
---|
363 | } |
---|
364 | } |
---|
365 | |
---|