// // UnicodeNewline.cs // // Author: // Mike Krüger // // Copyright (c) 2013 Xamarin Inc. (http://xamarin.com) // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. using System; namespace ICSharpCode.NRefactory { public enum UnicodeNewline { Unknown, /// /// Line Feed, U+000A /// LF = 0x0A, CRLF = 0x0D0A, /// /// Carriage Return, U+000D /// CR = 0x0D, /// /// Next Line, U+0085 /// NEL = 0x85, /// /// Vertical Tab, U+000B /// VT = 0x0B, /// /// Form Feed, U+000C /// FF = 0x0C, /// /// Line Separator, U+2028 /// LS = 0x2028, /// /// Paragraph Separator, U+2029 /// PS = 0x2029 } /// /// Defines unicode new lines according to Unicode Technical Report #13 /// http://www.unicode.org/standard/reports/tr13/tr13-5.html /// public static class NewLine { /// /// Carriage Return, U+000D /// public const char CR = (char)0x0D; /// /// Line Feed, U+000A /// public const char LF = (char)0x0A; /// /// Next Line, U+0085 /// public const char NEL = (char)0x85; /// /// Vertical Tab, U+000B /// public const char VT = (char)0x0B; /// /// Form Feed, U+000C /// public const char FF = (char)0x0C; /// /// Line Separator, U+2028 /// public const char LS = (char)0x2028; /// /// Paragraph Separator, U+2029 /// public const char PS = (char)0x2029; /// /// Determines if a char is a new line delimiter. /// /// 0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter. /// The current character. /// A callback getting the next character (may be null). public static int GetDelimiterLength (char curChar, Func nextChar = null) { if (curChar == CR) { if (nextChar != null && nextChar () == LF) return 2; return 1; } if (curChar == LF || curChar == NEL || curChar == VT || curChar == FF || curChar == LS || curChar == PS) return 1; return 0; } /// /// Determines if a char is a new line delimiter. /// /// 0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter. /// The current character. /// The next character (if != LF then length will always be 0 or 1). public static int GetDelimiterLength (char curChar, char nextChar) { if (curChar == CR) { if (nextChar == LF) return 2; return 1; } if (curChar == LF || curChar == NEL || curChar == VT || curChar == FF || curChar == LS || curChar == PS) return 1; return 0; } /// /// Determines if a char is a new line delimiter. /// /// 0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter. /// The current character. /// The length of the delimiter /// The type of the delimiter /// A callback getting the next character (may be null). public static bool TryGetDelimiterLengthAndType (char curChar, out int length, out UnicodeNewline type, Func nextChar = null) { if (curChar == CR) { if (nextChar != null && nextChar () == LF) { length = 2; type = UnicodeNewline.CRLF; } else { length = 1; type = UnicodeNewline.CR; } return true; } switch (curChar) { case LF: type = UnicodeNewline.LF; length = 1; return true; case NEL: type = UnicodeNewline.NEL; length = 1; return true; case VT: type = UnicodeNewline.VT; length = 1; return true; case FF: type = UnicodeNewline.FF; length = 1; return true; case LS: type = UnicodeNewline.LS; length = 1; return true; case PS: type = UnicodeNewline.PS; length = 1; return true; } length = -1; type = UnicodeNewline.Unknown; return false; } /// /// Determines if a char is a new line delimiter. /// /// 0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter. /// The current character. /// The length of the delimiter /// The type of the delimiter /// The next character (if != LF then length will always be 0 or 1). public static bool TryGetDelimiterLengthAndType (char curChar, out int length, out UnicodeNewline type, char nextChar) { if (curChar == CR) { if (nextChar == LF) { length = 2; type = UnicodeNewline.CRLF; } else { length = 1; type = UnicodeNewline.CR; } return true; } switch (curChar) { case LF: type = UnicodeNewline.LF; length = 1; return true; case NEL: type = UnicodeNewline.NEL; length = 1; return true; case VT: type = UnicodeNewline.VT; length = 1; return true; case FF: type = UnicodeNewline.FF; length = 1; return true; case LS: type = UnicodeNewline.LS; length = 1; return true; case PS: type = UnicodeNewline.PS; length = 1; return true; } length = -1; type = UnicodeNewline.Unknown; return false; } /// /// Gets the new line type of a given char/next char. /// /// 0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter. /// The current character. /// A callback getting the next character (may be null). public static UnicodeNewline GetDelimiterType (char curChar, Func nextChar = null) { switch (curChar) { case CR: if (nextChar != null && nextChar () == LF) return UnicodeNewline.CRLF; return UnicodeNewline.CR; case LF: return UnicodeNewline.LF; case NEL: return UnicodeNewline.NEL; case VT: return UnicodeNewline.VT; case FF: return UnicodeNewline.FF; case LS: return UnicodeNewline.LS; case PS: return UnicodeNewline.PS; } return UnicodeNewline.Unknown; } /// /// Gets the new line type of a given char/next char. /// /// 0 == no new line, otherwise it returns either 1 or 2 depending of the length of the delimiter. /// The current character. /// The next character (if != LF then length will always be 0 or 1). public static UnicodeNewline GetDelimiterType (char curChar, char nextChar) { switch (curChar) { case CR: if (nextChar == LF) return UnicodeNewline.CRLF; return UnicodeNewline.CR; case LF: return UnicodeNewline.LF; case NEL: return UnicodeNewline.NEL; case VT: return UnicodeNewline.VT; case FF: return UnicodeNewline.FF; case LS: return UnicodeNewline.LS; case PS: return UnicodeNewline.PS; } return UnicodeNewline.Unknown; } /// /// Determines if a char is a new line delimiter. /// /// Note that the only 2 char wide new line is CR LF and both chars are new line /// chars on their own. For most cases GetDelimiterLength is the better choice. /// public static bool IsNewLine(char ch) { return ch == NewLine.CR || ch == NewLine.LF || ch == NewLine.NEL || ch == NewLine.VT || ch == NewLine.FF || ch == NewLine.LS || ch == NewLine.PS; } /// /// Gets the new line as a string. /// public static string GetString (UnicodeNewline newLine) { switch (newLine) { case UnicodeNewline.Unknown: return ""; case UnicodeNewline.LF: return "\n"; case UnicodeNewline.CRLF: return "\r\n"; case UnicodeNewline.CR: return "\r"; case UnicodeNewline.NEL: return "\u0085"; case UnicodeNewline.VT: return "\u000B"; case UnicodeNewline.FF: return "\u000C"; case UnicodeNewline.LS: return "\u2028"; case UnicodeNewline.PS: return "\u2029"; default: throw new ArgumentOutOfRangeException (); } } } }