1 | #region Copyright notice and license
|
---|
2 | // Protocol Buffers - Google's data interchange format
|
---|
3 | // Copyright 2008 Google Inc. All rights reserved.
|
---|
4 | // http://github.com/jskeet/dotnet-protobufs/
|
---|
5 | // Original C++/Java/Python code:
|
---|
6 | // http://code.google.com/p/protobuf/
|
---|
7 | //
|
---|
8 | // Redistribution and use in source and binary forms, with or without
|
---|
9 | // modification, are permitted provided that the following conditions are
|
---|
10 | // met:
|
---|
11 | //
|
---|
12 | // * Redistributions of source code must retain the above copyright
|
---|
13 | // notice, this list of conditions and the following disclaimer.
|
---|
14 | // * Redistributions in binary form must reproduce the above
|
---|
15 | // copyright notice, this list of conditions and the following disclaimer
|
---|
16 | // in the documentation and/or other materials provided with the
|
---|
17 | // distribution.
|
---|
18 | // * Neither the name of Google Inc. nor the names of its
|
---|
19 | // contributors may be used to endorse or promote products derived from
|
---|
20 | // this software without specific prior written permission.
|
---|
21 | //
|
---|
22 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
---|
23 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
---|
24 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
---|
25 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
---|
26 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
---|
27 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
---|
28 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
---|
29 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
---|
30 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
---|
31 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
---|
32 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
---|
33 | #endregion
|
---|
34 |
|
---|
35 | using System;
|
---|
36 | using System.Globalization;
|
---|
37 | using System.Text.RegularExpressions;
|
---|
38 |
|
---|
39 | namespace Google.ProtocolBuffers {
|
---|
40 | /// <summary>
|
---|
41 | /// Represents a stream of tokens parsed from a string.
|
---|
42 | /// </summary>
|
---|
43 | internal sealed class TextTokenizer {
|
---|
44 | private readonly string text;
|
---|
45 | private string currentToken;
|
---|
46 |
|
---|
47 | /// <summary>
|
---|
48 | /// The character index within the text to perform the next regex match at.
|
---|
49 | /// </summary>
|
---|
50 | private int matchPos = 0;
|
---|
51 |
|
---|
52 | /// <summary>
|
---|
53 | /// The character index within the text at which the current token begins.
|
---|
54 | /// </summary>
|
---|
55 | private int pos = 0;
|
---|
56 |
|
---|
57 | /// <summary>
|
---|
58 | /// The line number of the current token.
|
---|
59 | /// </summary>
|
---|
60 | private int line = 0;
|
---|
61 | /// <summary>
|
---|
62 | /// The column number of the current token.
|
---|
63 | /// </summary>
|
---|
64 | private int column = 0;
|
---|
65 |
|
---|
66 | /// <summary>
|
---|
67 | /// The line number of the previous token.
|
---|
68 | /// </summary>
|
---|
69 | private int previousLine = 0;
|
---|
70 | /// <summary>
|
---|
71 | /// The column number of the previous token.
|
---|
72 | /// </summary>
|
---|
73 | private int previousColumn = 0;
|
---|
74 |
|
---|
75 | // Note: atomic groups used to mimic possessive quantifiers in Java in both of these regexes
|
---|
76 | internal static readonly Regex WhitespaceAndCommentPattern = new Regex("\\G(?>(\\s|(#.*$))+)",
|
---|
77 | SilverlightCompatibility.CompiledRegexWhereAvailable | RegexOptions.Multiline);
|
---|
78 | private static readonly Regex TokenPattern = new Regex(
|
---|
79 | "\\G[a-zA-Z_](?>[0-9a-zA-Z_+-]*)|" + // an identifier
|
---|
80 | "\\G[0-9+-](?>[0-9a-zA-Z_.+-]*)|" + // a number
|
---|
81 | "\\G\"(?>([^\"\\\n\\\\]|\\\\.)*)(\"|\\\\?$)|" + // a double-quoted string
|
---|
82 | "\\G\'(?>([^\"\\\n\\\\]|\\\\.)*)(\'|\\\\?$)", // a single-quoted string
|
---|
83 | SilverlightCompatibility.CompiledRegexWhereAvailable | RegexOptions.Multiline);
|
---|
84 |
|
---|
85 | private static readonly Regex DoubleInfinity = new Regex("^-?inf(inity)?$",
|
---|
86 | SilverlightCompatibility.CompiledRegexWhereAvailable | RegexOptions.IgnoreCase);
|
---|
87 | private static readonly Regex FloatInfinity = new Regex("^-?inf(inity)?f?$",
|
---|
88 | SilverlightCompatibility.CompiledRegexWhereAvailable | RegexOptions.IgnoreCase);
|
---|
89 | private static readonly Regex FloatNan = new Regex("^nanf?$",
|
---|
90 | SilverlightCompatibility.CompiledRegexWhereAvailable | RegexOptions.IgnoreCase);
|
---|
91 |
|
---|
92 | /** Construct a tokenizer that parses tokens from the given text. */
|
---|
93 | public TextTokenizer(string text) {
|
---|
94 | this.text = text;
|
---|
95 | SkipWhitespace();
|
---|
96 | NextToken();
|
---|
97 | }
|
---|
98 |
|
---|
99 | /// <summary>
|
---|
100 | /// Are we at the end of the input?
|
---|
101 | /// </summary>
|
---|
102 | public bool AtEnd {
|
---|
103 | get { return currentToken.Length == 0; }
|
---|
104 | }
|
---|
105 |
|
---|
106 | /// <summary>
|
---|
107 | /// Advances to the next token.
|
---|
108 | /// </summary>
|
---|
109 | public void NextToken() {
|
---|
110 | previousLine = line;
|
---|
111 | previousColumn = column;
|
---|
112 |
|
---|
113 | // Advance the line counter to the current position.
|
---|
114 | while (pos < matchPos) {
|
---|
115 | if (text[pos] == '\n') {
|
---|
116 | ++line;
|
---|
117 | column = 0;
|
---|
118 | } else {
|
---|
119 | ++column;
|
---|
120 | }
|
---|
121 | ++pos;
|
---|
122 | }
|
---|
123 |
|
---|
124 | // Match the next token.
|
---|
125 | if (matchPos == text.Length) {
|
---|
126 | // EOF
|
---|
127 | currentToken = "";
|
---|
128 | } else {
|
---|
129 | Match match = TokenPattern.Match(text, matchPos);
|
---|
130 | if (match.Success) {
|
---|
131 | currentToken = match.Value;
|
---|
132 | matchPos += match.Length;
|
---|
133 | } else {
|
---|
134 | // Take one character.
|
---|
135 | currentToken = text[matchPos].ToString();
|
---|
136 | matchPos++;
|
---|
137 | }
|
---|
138 |
|
---|
139 | SkipWhitespace();
|
---|
140 | }
|
---|
141 | }
|
---|
142 |
|
---|
143 | /// <summary>
|
---|
144 | /// Skip over any whitespace so that matchPos starts at the next token.
|
---|
145 | /// </summary>
|
---|
146 | private void SkipWhitespace() {
|
---|
147 | Match match = WhitespaceAndCommentPattern.Match(text, matchPos);
|
---|
148 | if (match.Success) {
|
---|
149 | matchPos += match.Length;
|
---|
150 | }
|
---|
151 | }
|
---|
152 |
|
---|
153 | /// <summary>
|
---|
154 | /// If the next token exactly matches the given token, consume it and return
|
---|
155 | /// true. Otherwise, return false without doing anything.
|
---|
156 | /// </summary>
|
---|
157 | public bool TryConsume(string token) {
|
---|
158 | if (currentToken == token) {
|
---|
159 | NextToken();
|
---|
160 | return true;
|
---|
161 | }
|
---|
162 | return false;
|
---|
163 | }
|
---|
164 |
|
---|
165 | /*
|
---|
166 | * If the next token exactly matches {@code token}, consume it. Otherwise,
|
---|
167 | * throw a {@link ParseException}.
|
---|
168 | */
|
---|
169 | /// <summary>
|
---|
170 | /// If the next token exactly matches the specified one, consume it.
|
---|
171 | /// Otherwise, throw a FormatException.
|
---|
172 | /// </summary>
|
---|
173 | /// <param name="token"></param>
|
---|
174 | public void Consume(string token) {
|
---|
175 | if (!TryConsume(token)) {
|
---|
176 | throw CreateFormatException("Expected \"" + token + "\".");
|
---|
177 | }
|
---|
178 | }
|
---|
179 |
|
---|
180 | /// <summary>
|
---|
181 | /// Returns true if the next token is an integer, but does not consume it.
|
---|
182 | /// </summary>
|
---|
183 | public bool LookingAtInteger() {
|
---|
184 | if (currentToken.Length == 0) {
|
---|
185 | return false;
|
---|
186 | }
|
---|
187 |
|
---|
188 | char c = currentToken[0];
|
---|
189 | return ('0' <= c && c <= '9') || c == '-' || c == '+';
|
---|
190 | }
|
---|
191 |
|
---|
192 | /// <summary>
|
---|
193 | /// If the next token is an identifier, consume it and return its value.
|
---|
194 | /// Otherwise, throw a FormatException.
|
---|
195 | /// </summary>
|
---|
196 | public string ConsumeIdentifier() {
|
---|
197 | foreach (char c in currentToken) {
|
---|
198 | if (('a' <= c && c <= 'z') ||
|
---|
199 | ('A' <= c && c <= 'Z') ||
|
---|
200 | ('0' <= c && c <= '9') ||
|
---|
201 | (c == '_') || (c == '.')) {
|
---|
202 | // OK
|
---|
203 | } else {
|
---|
204 | throw CreateFormatException("Expected identifier.");
|
---|
205 | }
|
---|
206 | }
|
---|
207 |
|
---|
208 | string result = currentToken;
|
---|
209 | NextToken();
|
---|
210 | return result;
|
---|
211 | }
|
---|
212 |
|
---|
213 | /// <summary>
|
---|
214 | /// If the next token is a 32-bit signed integer, consume it and return its
|
---|
215 | /// value. Otherwise, throw a FormatException.
|
---|
216 | /// </summary>
|
---|
217 | public int ConsumeInt32() {
|
---|
218 | try {
|
---|
219 | int result = TextFormat.ParseInt32(currentToken);
|
---|
220 | NextToken();
|
---|
221 | return result;
|
---|
222 | } catch (FormatException e) {
|
---|
223 | throw CreateIntegerParseException(e);
|
---|
224 | }
|
---|
225 | }
|
---|
226 |
|
---|
227 | /// <summary>
|
---|
228 | /// If the next token is a 32-bit unsigned integer, consume it and return its
|
---|
229 | /// value. Otherwise, throw a FormatException.
|
---|
230 | /// </summary>
|
---|
231 | public uint ConsumeUInt32() {
|
---|
232 | try {
|
---|
233 | uint result = TextFormat.ParseUInt32(currentToken);
|
---|
234 | NextToken();
|
---|
235 | return result;
|
---|
236 | } catch (FormatException e) {
|
---|
237 | throw CreateIntegerParseException(e);
|
---|
238 | }
|
---|
239 | }
|
---|
240 |
|
---|
241 | /// <summary>
|
---|
242 | /// If the next token is a 64-bit signed integer, consume it and return its
|
---|
243 | /// value. Otherwise, throw a FormatException.
|
---|
244 | /// </summary>
|
---|
245 | public long ConsumeInt64() {
|
---|
246 | try {
|
---|
247 | long result = TextFormat.ParseInt64(currentToken);
|
---|
248 | NextToken();
|
---|
249 | return result;
|
---|
250 | } catch (FormatException e) {
|
---|
251 | throw CreateIntegerParseException(e);
|
---|
252 | }
|
---|
253 | }
|
---|
254 |
|
---|
255 | /// <summary>
|
---|
256 | /// If the next token is a 64-bit unsigned integer, consume it and return its
|
---|
257 | /// value. Otherwise, throw a FormatException.
|
---|
258 | /// </summary>
|
---|
259 | public ulong ConsumeUInt64() {
|
---|
260 | try {
|
---|
261 | ulong result = TextFormat.ParseUInt64(currentToken);
|
---|
262 | NextToken();
|
---|
263 | return result;
|
---|
264 | } catch (FormatException e) {
|
---|
265 | throw CreateIntegerParseException(e);
|
---|
266 | }
|
---|
267 | }
|
---|
268 |
|
---|
269 | /// <summary>
|
---|
270 | /// If the next token is a double, consume it and return its value.
|
---|
271 | /// Otherwise, throw a FormatException.
|
---|
272 | /// </summary>
|
---|
273 | public double ConsumeDouble() {
|
---|
274 | // We need to parse infinity and nan separately because
|
---|
275 | // double.Parse() does not accept "inf", "infinity", or "nan".
|
---|
276 | if (DoubleInfinity.IsMatch(currentToken)) {
|
---|
277 | bool negative = currentToken.StartsWith("-");
|
---|
278 | NextToken();
|
---|
279 | return negative ? double.NegativeInfinity : double.PositiveInfinity;
|
---|
280 | }
|
---|
281 | if (currentToken.Equals("nan", StringComparison.InvariantCultureIgnoreCase)) {
|
---|
282 | NextToken();
|
---|
283 | return Double.NaN;
|
---|
284 | }
|
---|
285 |
|
---|
286 | try {
|
---|
287 | double result = double.Parse(currentToken, CultureInfo.InvariantCulture);
|
---|
288 | NextToken();
|
---|
289 | return result;
|
---|
290 | } catch (FormatException e) {
|
---|
291 | throw CreateFloatParseException(e);
|
---|
292 | } catch (OverflowException e) {
|
---|
293 | throw CreateFloatParseException(e);
|
---|
294 | }
|
---|
295 | }
|
---|
296 |
|
---|
297 | /// <summary>
|
---|
298 | /// If the next token is a float, consume it and return its value.
|
---|
299 | /// Otherwise, throw a FormatException.
|
---|
300 | /// </summary>
|
---|
301 | public float ConsumeFloat() {
|
---|
302 | // We need to parse infinity and nan separately because
|
---|
303 | // Float.parseFloat() does not accept "inf", "infinity", or "nan".
|
---|
304 | if (FloatInfinity.IsMatch(currentToken)) {
|
---|
305 | bool negative = currentToken.StartsWith("-");
|
---|
306 | NextToken();
|
---|
307 | return negative ? float.NegativeInfinity : float.PositiveInfinity;
|
---|
308 | }
|
---|
309 | if (FloatNan.IsMatch(currentToken)) {
|
---|
310 | NextToken();
|
---|
311 | return float.NaN;
|
---|
312 | }
|
---|
313 |
|
---|
314 | if (currentToken.EndsWith("f")) {
|
---|
315 | currentToken = currentToken.TrimEnd('f');
|
---|
316 | }
|
---|
317 |
|
---|
318 | try {
|
---|
319 | float result = float.Parse(currentToken, CultureInfo.InvariantCulture);
|
---|
320 | NextToken();
|
---|
321 | return result;
|
---|
322 | } catch (FormatException e) {
|
---|
323 | throw CreateFloatParseException(e);
|
---|
324 | } catch (OverflowException e) {
|
---|
325 | throw CreateFloatParseException(e);
|
---|
326 | }
|
---|
327 | }
|
---|
328 |
|
---|
329 | /// <summary>
|
---|
330 | /// If the next token is a Boolean, consume it and return its value.
|
---|
331 | /// Otherwise, throw a FormatException.
|
---|
332 | /// </summary>
|
---|
333 | public bool ConsumeBoolean() {
|
---|
334 | if (currentToken == "true") {
|
---|
335 | NextToken();
|
---|
336 | return true;
|
---|
337 | }
|
---|
338 | if (currentToken == "false") {
|
---|
339 | NextToken();
|
---|
340 | return false;
|
---|
341 | }
|
---|
342 | throw CreateFormatException("Expected \"true\" or \"false\".");
|
---|
343 | }
|
---|
344 |
|
---|
345 | /// <summary>
|
---|
346 | /// If the next token is a string, consume it and return its (unescaped) value.
|
---|
347 | /// Otherwise, throw a FormatException.
|
---|
348 | /// </summary>
|
---|
349 | public string ConsumeString() {
|
---|
350 | return ConsumeByteString().ToStringUtf8();
|
---|
351 | }
|
---|
352 |
|
---|
353 | /// <summary>
|
---|
354 | /// If the next token is a string, consume it, unescape it as a
|
---|
355 | /// ByteString and return it. Otherwise, throw a FormatException.
|
---|
356 | /// </summary>
|
---|
357 | public ByteString ConsumeByteString() {
|
---|
358 | char quote = currentToken.Length > 0 ? currentToken[0] : '\0';
|
---|
359 | if (quote != '\"' && quote != '\'') {
|
---|
360 | throw CreateFormatException("Expected string.");
|
---|
361 | }
|
---|
362 |
|
---|
363 | if (currentToken.Length < 2 ||
|
---|
364 | currentToken[currentToken.Length-1] != quote) {
|
---|
365 | throw CreateFormatException("String missing ending quote.");
|
---|
366 | }
|
---|
367 |
|
---|
368 | try {
|
---|
369 | string escaped = currentToken.Substring(1, currentToken.Length - 2);
|
---|
370 | ByteString result = TextFormat.UnescapeBytes(escaped);
|
---|
371 | NextToken();
|
---|
372 | return result;
|
---|
373 | } catch (FormatException e) {
|
---|
374 | throw CreateFormatException(e.Message);
|
---|
375 | }
|
---|
376 | }
|
---|
377 |
|
---|
378 | /// <summary>
|
---|
379 | /// Returns a format exception with the current line and column numbers
|
---|
380 | /// in the description, suitable for throwing.
|
---|
381 | /// </summary>
|
---|
382 | public FormatException CreateFormatException(string description) {
|
---|
383 | // Note: People generally prefer one-based line and column numbers.
|
---|
384 | return new FormatException((line + 1) + ":" + (column + 1) + ": " + description);
|
---|
385 | }
|
---|
386 |
|
---|
387 | /// <summary>
|
---|
388 | /// Returns a format exception with the line and column numbers of the
|
---|
389 | /// previous token in the description, suitable for throwing.
|
---|
390 | /// </summary>
|
---|
391 | public FormatException CreateFormatExceptionPreviousToken(string description) {
|
---|
392 | // Note: People generally prefer one-based line and column numbers.
|
---|
393 | return new FormatException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
|
---|
394 | }
|
---|
395 |
|
---|
396 | /// <summary>
|
---|
397 | /// Constructs an appropriate FormatException for the given existing exception
|
---|
398 | /// when trying to parse an integer.
|
---|
399 | /// </summary>
|
---|
400 | private FormatException CreateIntegerParseException(FormatException e) {
|
---|
401 | return CreateFormatException("Couldn't parse integer: " + e.Message);
|
---|
402 | }
|
---|
403 |
|
---|
404 | /// <summary>
|
---|
405 | /// Constructs an appropriate FormatException for the given existing exception
|
---|
406 | /// when trying to parse a float or double.
|
---|
407 | /// </summary>
|
---|
408 | private FormatException CreateFloatParseException(Exception e) {
|
---|
409 | return CreateFormatException("Couldn't parse number: " + e.Message);
|
---|
410 | }
|
---|
411 | }
|
---|
412 | }
|
---|