1 | #region Copyright notice and license
|
---|
2 |
|
---|
3 | // Protocol Buffers - Google's data interchange format
|
---|
4 | // Copyright 2008 Google Inc. All rights reserved.
|
---|
5 | // http://github.com/jskeet/dotnet-protobufs/
|
---|
6 | // Original C++/Java/Python code:
|
---|
7 | // http://code.google.com/p/protobuf/
|
---|
8 | //
|
---|
9 | // Redistribution and use in source and binary forms, with or without
|
---|
10 | // modification, are permitted provided that the following conditions are
|
---|
11 | // met:
|
---|
12 | //
|
---|
13 | // * Redistributions of source code must retain the above copyright
|
---|
14 | // notice, this list of conditions and the following disclaimer.
|
---|
15 | // * Redistributions in binary form must reproduce the above
|
---|
16 | // copyright notice, this list of conditions and the following disclaimer
|
---|
17 | // in the documentation and/or other materials provided with the
|
---|
18 | // distribution.
|
---|
19 | // * Neither the name of Google Inc. nor the names of its
|
---|
20 | // contributors may be used to endorse or promote products derived from
|
---|
21 | // this software without specific prior written permission.
|
---|
22 | //
|
---|
23 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
---|
24 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
---|
25 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
---|
26 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
---|
27 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
---|
28 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
---|
29 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
---|
30 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
---|
31 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
---|
32 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
---|
33 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
---|
34 |
|
---|
35 | #endregion
|
---|
36 |
|
---|
37 | using System;
|
---|
38 | using System.Globalization;
|
---|
39 | using System.Text.RegularExpressions;
|
---|
40 |
|
---|
41 | namespace Google.ProtocolBuffers
|
---|
42 | {
|
---|
43 | /// <summary>
|
---|
44 | /// Represents a stream of tokens parsed from a string.
|
---|
45 | /// </summary>
|
---|
46 | internal sealed class TextTokenizer
|
---|
47 | {
|
---|
48 | private readonly string text;
|
---|
49 | private string currentToken;
|
---|
50 |
|
---|
51 | /// <summary>
|
---|
52 | /// The character index within the text to perform the next regex match at.
|
---|
53 | /// </summary>
|
---|
54 | private int matchPos = 0;
|
---|
55 |
|
---|
56 | /// <summary>
|
---|
57 | /// The character index within the text at which the current token begins.
|
---|
58 | /// </summary>
|
---|
59 | private int pos = 0;
|
---|
60 |
|
---|
61 | /// <summary>
|
---|
62 | /// The line number of the current token.
|
---|
63 | /// </summary>
|
---|
64 | private int line = 0;
|
---|
65 |
|
---|
66 | /// <summary>
|
---|
67 | /// The column number of the current token.
|
---|
68 | /// </summary>
|
---|
69 | private int column = 0;
|
---|
70 |
|
---|
71 | /// <summary>
|
---|
72 | /// The line number of the previous token.
|
---|
73 | /// </summary>
|
---|
74 | private int previousLine = 0;
|
---|
75 |
|
---|
76 | /// <summary>
|
---|
77 | /// The column number of the previous token.
|
---|
78 | /// </summary>
|
---|
79 | private int previousColumn = 0;
|
---|
80 |
|
---|
81 | // Note: atomic groups used to mimic possessive quantifiers in Java in both of these regexes
|
---|
82 | internal static readonly Regex WhitespaceAndCommentPattern = new Regex("\\G(?>(\\s|(#.*$))+)",
|
---|
83 | SilverlightCompatibility.
|
---|
84 | CompiledRegexWhereAvailable |
|
---|
85 | RegexOptions.Multiline);
|
---|
86 |
|
---|
87 | private static readonly Regex TokenPattern = new Regex(
|
---|
88 | "\\G[a-zA-Z_](?>[0-9a-zA-Z_+-]*)|" + // an identifier
|
---|
89 | "\\G[0-9+-](?>[0-9a-zA-Z_.+-]*)|" + // a number
|
---|
90 | "\\G\"(?>([^\"\\\n\\\\]|\\\\.)*)(\"|\\\\?$)|" + // a double-quoted string
|
---|
91 | "\\G\'(?>([^\"\\\n\\\\]|\\\\.)*)(\'|\\\\?$)", // a single-quoted string
|
---|
92 | SilverlightCompatibility.CompiledRegexWhereAvailable | RegexOptions.Multiline);
|
---|
93 |
|
---|
94 | private static readonly Regex DoubleInfinity = new Regex("^-?inf(inity)?$",
|
---|
95 | SilverlightCompatibility.CompiledRegexWhereAvailable |
|
---|
96 | RegexOptions.IgnoreCase);
|
---|
97 |
|
---|
98 | private static readonly Regex FloatInfinity = new Regex("^-?inf(inity)?f?$",
|
---|
99 | SilverlightCompatibility.CompiledRegexWhereAvailable |
|
---|
100 | RegexOptions.IgnoreCase);
|
---|
101 |
|
---|
102 | private static readonly Regex FloatNan = new Regex("^nanf?$",
|
---|
103 | SilverlightCompatibility.CompiledRegexWhereAvailable |
|
---|
104 | RegexOptions.IgnoreCase);
|
---|
105 |
|
---|
106 | /** Construct a tokenizer that parses tokens from the given text. */
|
---|
107 |
|
---|
108 | public TextTokenizer(string text)
|
---|
109 | {
|
---|
110 | this.text = text;
|
---|
111 | SkipWhitespace();
|
---|
112 | NextToken();
|
---|
113 | }
|
---|
114 |
|
---|
115 | /// <summary>
|
---|
116 | /// Are we at the end of the input?
|
---|
117 | /// </summary>
|
---|
118 | public bool AtEnd
|
---|
119 | {
|
---|
120 | get { return currentToken.Length == 0; }
|
---|
121 | }
|
---|
122 |
|
---|
123 | /// <summary>
|
---|
124 | /// Advances to the next token.
|
---|
125 | /// </summary>
|
---|
126 | public void NextToken()
|
---|
127 | {
|
---|
128 | previousLine = line;
|
---|
129 | previousColumn = column;
|
---|
130 |
|
---|
131 | // Advance the line counter to the current position.
|
---|
132 | while (pos < matchPos)
|
---|
133 | {
|
---|
134 | if (text[pos] == '\n')
|
---|
135 | {
|
---|
136 | ++line;
|
---|
137 | column = 0;
|
---|
138 | }
|
---|
139 | else
|
---|
140 | {
|
---|
141 | ++column;
|
---|
142 | }
|
---|
143 | ++pos;
|
---|
144 | }
|
---|
145 |
|
---|
146 | // Match the next token.
|
---|
147 | if (matchPos == text.Length)
|
---|
148 | {
|
---|
149 | // EOF
|
---|
150 | currentToken = "";
|
---|
151 | }
|
---|
152 | else
|
---|
153 | {
|
---|
154 | Match match = TokenPattern.Match(text, matchPos);
|
---|
155 | if (match.Success)
|
---|
156 | {
|
---|
157 | currentToken = match.Value;
|
---|
158 | matchPos += match.Length;
|
---|
159 | }
|
---|
160 | else
|
---|
161 | {
|
---|
162 | // Take one character.
|
---|
163 | currentToken = text[matchPos].ToString();
|
---|
164 | matchPos++;
|
---|
165 | }
|
---|
166 |
|
---|
167 | SkipWhitespace();
|
---|
168 | }
|
---|
169 | }
|
---|
170 |
|
---|
171 | /// <summary>
|
---|
172 | /// Skip over any whitespace so that matchPos starts at the next token.
|
---|
173 | /// </summary>
|
---|
174 | private void SkipWhitespace()
|
---|
175 | {
|
---|
176 | Match match = WhitespaceAndCommentPattern.Match(text, matchPos);
|
---|
177 | if (match.Success)
|
---|
178 | {
|
---|
179 | matchPos += match.Length;
|
---|
180 | }
|
---|
181 | }
|
---|
182 |
|
---|
183 | /// <summary>
|
---|
184 | /// If the next token exactly matches the given token, consume it and return
|
---|
185 | /// true. Otherwise, return false without doing anything.
|
---|
186 | /// </summary>
|
---|
187 | public bool TryConsume(string token)
|
---|
188 | {
|
---|
189 | if (currentToken == token)
|
---|
190 | {
|
---|
191 | NextToken();
|
---|
192 | return true;
|
---|
193 | }
|
---|
194 | return false;
|
---|
195 | }
|
---|
196 |
|
---|
197 | /*
|
---|
198 | * If the next token exactly matches {@code token}, consume it. Otherwise,
|
---|
199 | * throw a {@link ParseException}.
|
---|
200 | */
|
---|
201 |
|
---|
202 | /// <summary>
|
---|
203 | /// If the next token exactly matches the specified one, consume it.
|
---|
204 | /// Otherwise, throw a FormatException.
|
---|
205 | /// </summary>
|
---|
206 | /// <param name="token"></param>
|
---|
207 | public void Consume(string token)
|
---|
208 | {
|
---|
209 | if (!TryConsume(token))
|
---|
210 | {
|
---|
211 | throw CreateFormatException("Expected \"" + token + "\".");
|
---|
212 | }
|
---|
213 | }
|
---|
214 |
|
---|
215 | /// <summary>
|
---|
216 | /// Returns true if the next token is an integer, but does not consume it.
|
---|
217 | /// </summary>
|
---|
218 | public bool LookingAtInteger()
|
---|
219 | {
|
---|
220 | if (currentToken.Length == 0)
|
---|
221 | {
|
---|
222 | return false;
|
---|
223 | }
|
---|
224 |
|
---|
225 | char c = currentToken[0];
|
---|
226 | return ('0' <= c && c <= '9') || c == '-' || c == '+';
|
---|
227 | }
|
---|
228 |
|
---|
229 | /// <summary>
|
---|
230 | /// If the next token is an identifier, consume it and return its value.
|
---|
231 | /// Otherwise, throw a FormatException.
|
---|
232 | /// </summary>
|
---|
233 | public string ConsumeIdentifier()
|
---|
234 | {
|
---|
235 | foreach (char c in currentToken)
|
---|
236 | {
|
---|
237 | if (('a' <= c && c <= 'z') ||
|
---|
238 | ('A' <= c && c <= 'Z') ||
|
---|
239 | ('0' <= c && c <= '9') ||
|
---|
240 | (c == '_') || (c == '.'))
|
---|
241 | {
|
---|
242 | // OK
|
---|
243 | }
|
---|
244 | else
|
---|
245 | {
|
---|
246 | throw CreateFormatException("Expected identifier.");
|
---|
247 | }
|
---|
248 | }
|
---|
249 |
|
---|
250 | string result = currentToken;
|
---|
251 | NextToken();
|
---|
252 | return result;
|
---|
253 | }
|
---|
254 |
|
---|
255 | /// <summary>
|
---|
256 | /// If the next token is a 32-bit signed integer, consume it and return its
|
---|
257 | /// value. Otherwise, throw a FormatException.
|
---|
258 | /// </summary>
|
---|
259 | public int ConsumeInt32()
|
---|
260 | {
|
---|
261 | try
|
---|
262 | {
|
---|
263 | int result = TextFormat.ParseInt32(currentToken);
|
---|
264 | NextToken();
|
---|
265 | return result;
|
---|
266 | }
|
---|
267 | catch (FormatException e)
|
---|
268 | {
|
---|
269 | throw CreateIntegerParseException(e);
|
---|
270 | }
|
---|
271 | }
|
---|
272 |
|
---|
273 | /// <summary>
|
---|
274 | /// If the next token is a 32-bit unsigned integer, consume it and return its
|
---|
275 | /// value. Otherwise, throw a FormatException.
|
---|
276 | /// </summary>
|
---|
277 | public uint ConsumeUInt32()
|
---|
278 | {
|
---|
279 | try
|
---|
280 | {
|
---|
281 | uint result = TextFormat.ParseUInt32(currentToken);
|
---|
282 | NextToken();
|
---|
283 | return result;
|
---|
284 | }
|
---|
285 | catch (FormatException e)
|
---|
286 | {
|
---|
287 | throw CreateIntegerParseException(e);
|
---|
288 | }
|
---|
289 | }
|
---|
290 |
|
---|
291 | /// <summary>
|
---|
292 | /// If the next token is a 64-bit signed integer, consume it and return its
|
---|
293 | /// value. Otherwise, throw a FormatException.
|
---|
294 | /// </summary>
|
---|
295 | public long ConsumeInt64()
|
---|
296 | {
|
---|
297 | try
|
---|
298 | {
|
---|
299 | long result = TextFormat.ParseInt64(currentToken);
|
---|
300 | NextToken();
|
---|
301 | return result;
|
---|
302 | }
|
---|
303 | catch (FormatException e)
|
---|
304 | {
|
---|
305 | throw CreateIntegerParseException(e);
|
---|
306 | }
|
---|
307 | }
|
---|
308 |
|
---|
309 | /// <summary>
|
---|
310 | /// If the next token is a 64-bit unsigned integer, consume it and return its
|
---|
311 | /// value. Otherwise, throw a FormatException.
|
---|
312 | /// </summary>
|
---|
313 | public ulong ConsumeUInt64()
|
---|
314 | {
|
---|
315 | try
|
---|
316 | {
|
---|
317 | ulong result = TextFormat.ParseUInt64(currentToken);
|
---|
318 | NextToken();
|
---|
319 | return result;
|
---|
320 | }
|
---|
321 | catch (FormatException e)
|
---|
322 | {
|
---|
323 | throw CreateIntegerParseException(e);
|
---|
324 | }
|
---|
325 | }
|
---|
326 |
|
---|
327 | /// <summary>
|
---|
328 | /// If the next token is a double, consume it and return its value.
|
---|
329 | /// Otherwise, throw a FormatException.
|
---|
330 | /// </summary>
|
---|
331 | public double ConsumeDouble()
|
---|
332 | {
|
---|
333 | // We need to parse infinity and nan separately because
|
---|
334 | // double.Parse() does not accept "inf", "infinity", or "nan".
|
---|
335 | if (DoubleInfinity.IsMatch(currentToken))
|
---|
336 | {
|
---|
337 | bool negative = currentToken.StartsWith("-");
|
---|
338 | NextToken();
|
---|
339 | return negative ? double.NegativeInfinity : double.PositiveInfinity;
|
---|
340 | }
|
---|
341 | if (currentToken.Equals("nan", StringComparison.InvariantCultureIgnoreCase))
|
---|
342 | {
|
---|
343 | NextToken();
|
---|
344 | return Double.NaN;
|
---|
345 | }
|
---|
346 |
|
---|
347 | try
|
---|
348 | {
|
---|
349 | double result = double.Parse(currentToken, CultureInfo.InvariantCulture);
|
---|
350 | NextToken();
|
---|
351 | return result;
|
---|
352 | }
|
---|
353 | catch (FormatException e)
|
---|
354 | {
|
---|
355 | throw CreateFloatParseException(e);
|
---|
356 | }
|
---|
357 | catch (OverflowException e)
|
---|
358 | {
|
---|
359 | throw CreateFloatParseException(e);
|
---|
360 | }
|
---|
361 | }
|
---|
362 |
|
---|
363 | /// <summary>
|
---|
364 | /// If the next token is a float, consume it and return its value.
|
---|
365 | /// Otherwise, throw a FormatException.
|
---|
366 | /// </summary>
|
---|
367 | public float ConsumeFloat()
|
---|
368 | {
|
---|
369 | // We need to parse infinity and nan separately because
|
---|
370 | // Float.parseFloat() does not accept "inf", "infinity", or "nan".
|
---|
371 | if (FloatInfinity.IsMatch(currentToken))
|
---|
372 | {
|
---|
373 | bool negative = currentToken.StartsWith("-");
|
---|
374 | NextToken();
|
---|
375 | return negative ? float.NegativeInfinity : float.PositiveInfinity;
|
---|
376 | }
|
---|
377 | if (FloatNan.IsMatch(currentToken))
|
---|
378 | {
|
---|
379 | NextToken();
|
---|
380 | return float.NaN;
|
---|
381 | }
|
---|
382 |
|
---|
383 | if (currentToken.EndsWith("f"))
|
---|
384 | {
|
---|
385 | currentToken = currentToken.TrimEnd('f');
|
---|
386 | }
|
---|
387 |
|
---|
388 | try
|
---|
389 | {
|
---|
390 | float result = float.Parse(currentToken, CultureInfo.InvariantCulture);
|
---|
391 | NextToken();
|
---|
392 | return result;
|
---|
393 | }
|
---|
394 | catch (FormatException e)
|
---|
395 | {
|
---|
396 | throw CreateFloatParseException(e);
|
---|
397 | }
|
---|
398 | catch (OverflowException e)
|
---|
399 | {
|
---|
400 | throw CreateFloatParseException(e);
|
---|
401 | }
|
---|
402 | }
|
---|
403 |
|
---|
404 | /// <summary>
|
---|
405 | /// If the next token is a Boolean, consume it and return its value.
|
---|
406 | /// Otherwise, throw a FormatException.
|
---|
407 | /// </summary>
|
---|
408 | public bool ConsumeBoolean()
|
---|
409 | {
|
---|
410 | if (currentToken == "true")
|
---|
411 | {
|
---|
412 | NextToken();
|
---|
413 | return true;
|
---|
414 | }
|
---|
415 | if (currentToken == "false")
|
---|
416 | {
|
---|
417 | NextToken();
|
---|
418 | return false;
|
---|
419 | }
|
---|
420 | throw CreateFormatException("Expected \"true\" or \"false\".");
|
---|
421 | }
|
---|
422 |
|
---|
423 | /// <summary>
|
---|
424 | /// If the next token is a string, consume it and return its (unescaped) value.
|
---|
425 | /// Otherwise, throw a FormatException.
|
---|
426 | /// </summary>
|
---|
427 | public string ConsumeString()
|
---|
428 | {
|
---|
429 | return ConsumeByteString().ToStringUtf8();
|
---|
430 | }
|
---|
431 |
|
---|
432 | /// <summary>
|
---|
433 | /// If the next token is a string, consume it, unescape it as a
|
---|
434 | /// ByteString and return it. Otherwise, throw a FormatException.
|
---|
435 | /// </summary>
|
---|
436 | public ByteString ConsumeByteString()
|
---|
437 | {
|
---|
438 | char quote = currentToken.Length > 0 ? currentToken[0] : '\0';
|
---|
439 | if (quote != '\"' && quote != '\'')
|
---|
440 | {
|
---|
441 | throw CreateFormatException("Expected string.");
|
---|
442 | }
|
---|
443 |
|
---|
444 | if (currentToken.Length < 2 ||
|
---|
445 | currentToken[currentToken.Length - 1] != quote)
|
---|
446 | {
|
---|
447 | throw CreateFormatException("String missing ending quote.");
|
---|
448 | }
|
---|
449 |
|
---|
450 | try
|
---|
451 | {
|
---|
452 | string escaped = currentToken.Substring(1, currentToken.Length - 2);
|
---|
453 | ByteString result = TextFormat.UnescapeBytes(escaped);
|
---|
454 | NextToken();
|
---|
455 | return result;
|
---|
456 | }
|
---|
457 | catch (FormatException e)
|
---|
458 | {
|
---|
459 | throw CreateFormatException(e.Message);
|
---|
460 | }
|
---|
461 | }
|
---|
462 |
|
---|
463 | /// <summary>
|
---|
464 | /// Returns a format exception with the current line and column numbers
|
---|
465 | /// in the description, suitable for throwing.
|
---|
466 | /// </summary>
|
---|
467 | public FormatException CreateFormatException(string description)
|
---|
468 | {
|
---|
469 | // Note: People generally prefer one-based line and column numbers.
|
---|
470 | return new FormatException((line + 1) + ":" + (column + 1) + ": " + description);
|
---|
471 | }
|
---|
472 |
|
---|
473 | /// <summary>
|
---|
474 | /// Returns a format exception with the line and column numbers of the
|
---|
475 | /// previous token in the description, suitable for throwing.
|
---|
476 | /// </summary>
|
---|
477 | public FormatException CreateFormatExceptionPreviousToken(string description)
|
---|
478 | {
|
---|
479 | // Note: People generally prefer one-based line and column numbers.
|
---|
480 | return new FormatException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
|
---|
481 | }
|
---|
482 |
|
---|
483 | /// <summary>
|
---|
484 | /// Constructs an appropriate FormatException for the given existing exception
|
---|
485 | /// when trying to parse an integer.
|
---|
486 | /// </summary>
|
---|
487 | private FormatException CreateIntegerParseException(FormatException e)
|
---|
488 | {
|
---|
489 | return CreateFormatException("Couldn't parse integer: " + e.Message);
|
---|
490 | }
|
---|
491 |
|
---|
492 | /// <summary>
|
---|
493 | /// Constructs an appropriate FormatException for the given existing exception
|
---|
494 | /// when trying to parse a float or double.
|
---|
495 | /// </summary>
|
---|
496 | private FormatException CreateFloatParseException(Exception e)
|
---|
497 | {
|
---|
498 | return CreateFormatException("Couldn't parse number: " + e.Message);
|
---|
499 | }
|
---|
500 | }
|
---|
501 | } |
---|