Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2922-DataCompletenessChartPerf/HeuristicLab.ExtLibs/HeuristicLab.ProtobufCS/2.4.1/ProtobufCS/src/ProtocolBuffers/TextTokenizer.cs @ 17511

Last change on this file since 17511 was 8295, checked in by abeham, 12 years ago

#1897:

  • Removed protocol buffers 0.9.1
  • Added protocol buffers 2.4.1
  • Updated proto processing command
File size: 17.4 KB
Line 
1#region Copyright notice and license
2
3// Protocol Buffers - Google's data interchange format
4// Copyright 2008 Google Inc.  All rights reserved.
5// http://github.com/jskeet/dotnet-protobufs/
6// Original C++/Java/Python code:
7// http://code.google.com/p/protobuf/
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13//     * Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//     * Redistributions in binary form must reproduce the above
16// copyright notice, this list of conditions and the following disclaimer
17// in the documentation and/or other materials provided with the
18// distribution.
19//     * Neither the name of Google Inc. nor the names of its
20// contributors may be used to endorse or promote products derived from
21// this software without specific prior written permission.
22//
23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35#endregion
36
37using System;
38using System.Globalization;
39using System.Text.RegularExpressions;
40
41namespace Google.ProtocolBuffers
42{
43    /// <summary>
44    /// Represents a stream of tokens parsed from a string.
45    /// </summary>
46    internal sealed class TextTokenizer
47    {
48        private readonly string text;
49        private string currentToken;
50
51        /// <summary>
52        /// The character index within the text to perform the next regex match at.
53        /// </summary>
54        private int matchPos = 0;
55
56        /// <summary>
57        /// The character index within the text at which the current token begins.
58        /// </summary>
59        private int pos = 0;
60
61        /// <summary>
62        /// The line number of the current token.
63        /// </summary>
64        private int line = 0;
65
66        /// <summary>
67        /// The column number of the current token.
68        /// </summary>
69        private int column = 0;
70
71        /// <summary>
72        /// The line number of the previous token.
73        /// </summary>
74        private int previousLine = 0;
75
76        /// <summary>
77        /// The column number of the previous token.
78        /// </summary>
79        private int previousColumn = 0;
80
81        // Note: atomic groups used to mimic possessive quantifiers in Java in both of these regexes
82        internal static readonly Regex WhitespaceAndCommentPattern = new Regex("\\G(?>(\\s|(#.*$))+)",
83                                                                               SilverlightCompatibility.
84                                                                                   CompiledRegexWhereAvailable |
85                                                                               RegexOptions.Multiline);
86
87        private static readonly Regex TokenPattern = new Regex(
88            "\\G[a-zA-Z_](?>[0-9a-zA-Z_+-]*)|" + // an identifier
89            "\\G[0-9+-](?>[0-9a-zA-Z_.+-]*)|" + // a number
90            "\\G\"(?>([^\"\\\n\\\\]|\\\\.)*)(\"|\\\\?$)|" + // a double-quoted string
91            "\\G\'(?>([^\"\\\n\\\\]|\\\\.)*)(\'|\\\\?$)", // a single-quoted string
92            SilverlightCompatibility.CompiledRegexWhereAvailable | RegexOptions.Multiline);
93
94        private static readonly Regex DoubleInfinity = new Regex("^-?inf(inity)?$",
95                                                                 SilverlightCompatibility.CompiledRegexWhereAvailable |
96                                                                 RegexOptions.IgnoreCase);
97
98        private static readonly Regex FloatInfinity = new Regex("^-?inf(inity)?f?$",
99                                                                SilverlightCompatibility.CompiledRegexWhereAvailable |
100                                                                RegexOptions.IgnoreCase);
101
102        private static readonly Regex FloatNan = new Regex("^nanf?$",
103                                                           SilverlightCompatibility.CompiledRegexWhereAvailable |
104                                                           RegexOptions.IgnoreCase);
105
106        /** Construct a tokenizer that parses tokens from the given text. */
107
108        public TextTokenizer(string text)
109        {
110            this.text = text;
111            SkipWhitespace();
112            NextToken();
113        }
114
115        /// <summary>
116        /// Are we at the end of the input?
117        /// </summary>
118        public bool AtEnd
119        {
120            get { return currentToken.Length == 0; }
121        }
122
123        /// <summary>
124        /// Advances to the next token.
125        /// </summary>
126        public void NextToken()
127        {
128            previousLine = line;
129            previousColumn = column;
130
131            // Advance the line counter to the current position.
132            while (pos < matchPos)
133            {
134                if (text[pos] == '\n')
135                {
136                    ++line;
137                    column = 0;
138                }
139                else
140                {
141                    ++column;
142                }
143                ++pos;
144            }
145
146            // Match the next token.
147            if (matchPos == text.Length)
148            {
149                // EOF
150                currentToken = "";
151            }
152            else
153            {
154                Match match = TokenPattern.Match(text, matchPos);
155                if (match.Success)
156                {
157                    currentToken = match.Value;
158                    matchPos += match.Length;
159                }
160                else
161                {
162                    // Take one character.
163                    currentToken = text[matchPos].ToString();
164                    matchPos++;
165                }
166
167                SkipWhitespace();
168            }
169        }
170
171        /// <summary>
172        /// Skip over any whitespace so that matchPos starts at the next token.
173        /// </summary>
174        private void SkipWhitespace()
175        {
176            Match match = WhitespaceAndCommentPattern.Match(text, matchPos);
177            if (match.Success)
178            {
179                matchPos += match.Length;
180            }
181        }
182
183        /// <summary>
184        /// If the next token exactly matches the given token, consume it and return
185        /// true. Otherwise, return false without doing anything.
186        /// </summary>
187        public bool TryConsume(string token)
188        {
189            if (currentToken == token)
190            {
191                NextToken();
192                return true;
193            }
194            return false;
195        }
196
197        /*
198     * If the next token exactly matches {@code token}, consume it.  Otherwise,
199     * throw a {@link ParseException}.
200     */
201
202        /// <summary>
203        /// If the next token exactly matches the specified one, consume it.
204        /// Otherwise, throw a FormatException.
205        /// </summary>
206        /// <param name="token"></param>
207        public void Consume(string token)
208        {
209            if (!TryConsume(token))
210            {
211                throw CreateFormatException("Expected \"" + token + "\".");
212            }
213        }
214
215        /// <summary>
216        /// Returns true if the next token is an integer, but does not consume it.
217        /// </summary>
218        public bool LookingAtInteger()
219        {
220            if (currentToken.Length == 0)
221            {
222                return false;
223            }
224
225            char c = currentToken[0];
226            return ('0' <= c && c <= '9') || c == '-' || c == '+';
227        }
228
229        /// <summary>
230        /// If the next token is an identifier, consume it and return its value.
231        /// Otherwise, throw a FormatException.
232        /// </summary>
233        public string ConsumeIdentifier()
234        {
235            foreach (char c in currentToken)
236            {
237                if (('a' <= c && c <= 'z') ||
238                    ('A' <= c && c <= 'Z') ||
239                    ('0' <= c && c <= '9') ||
240                    (c == '_') || (c == '.'))
241                {
242                    // OK
243                }
244                else
245                {
246                    throw CreateFormatException("Expected identifier.");
247                }
248            }
249
250            string result = currentToken;
251            NextToken();
252            return result;
253        }
254
255        /// <summary>
256        /// If the next token is a 32-bit signed integer, consume it and return its
257        /// value. Otherwise, throw a FormatException.
258        /// </summary>
259        public int ConsumeInt32()
260        {
261            try
262            {
263                int result = TextFormat.ParseInt32(currentToken);
264                NextToken();
265                return result;
266            }
267            catch (FormatException e)
268            {
269                throw CreateIntegerParseException(e);
270            }
271        }
272
273        /// <summary>
274        /// If the next token is a 32-bit unsigned integer, consume it and return its
275        /// value. Otherwise, throw a FormatException.
276        /// </summary>
277        public uint ConsumeUInt32()
278        {
279            try
280            {
281                uint result = TextFormat.ParseUInt32(currentToken);
282                NextToken();
283                return result;
284            }
285            catch (FormatException e)
286            {
287                throw CreateIntegerParseException(e);
288            }
289        }
290
291        /// <summary>
292        /// If the next token is a 64-bit signed integer, consume it and return its
293        /// value. Otherwise, throw a FormatException.
294        /// </summary>
295        public long ConsumeInt64()
296        {
297            try
298            {
299                long result = TextFormat.ParseInt64(currentToken);
300                NextToken();
301                return result;
302            }
303            catch (FormatException e)
304            {
305                throw CreateIntegerParseException(e);
306            }
307        }
308
309        /// <summary>
310        /// If the next token is a 64-bit unsigned integer, consume it and return its
311        /// value. Otherwise, throw a FormatException.
312        /// </summary>
313        public ulong ConsumeUInt64()
314        {
315            try
316            {
317                ulong result = TextFormat.ParseUInt64(currentToken);
318                NextToken();
319                return result;
320            }
321            catch (FormatException e)
322            {
323                throw CreateIntegerParseException(e);
324            }
325        }
326
327        /// <summary>
328        /// If the next token is a double, consume it and return its value.
329        /// Otherwise, throw a FormatException.
330        /// </summary>
331        public double ConsumeDouble()
332        {
333            // We need to parse infinity and nan separately because
334            // double.Parse() does not accept "inf", "infinity", or "nan".
335            if (DoubleInfinity.IsMatch(currentToken))
336            {
337                bool negative = currentToken.StartsWith("-");
338                NextToken();
339                return negative ? double.NegativeInfinity : double.PositiveInfinity;
340            }
341            if (currentToken.Equals("nan", StringComparison.InvariantCultureIgnoreCase))
342            {
343                NextToken();
344                return Double.NaN;
345            }
346
347            try
348            {
349                double result = double.Parse(currentToken, CultureInfo.InvariantCulture);
350                NextToken();
351                return result;
352            }
353            catch (FormatException e)
354            {
355                throw CreateFloatParseException(e);
356            }
357            catch (OverflowException e)
358            {
359                throw CreateFloatParseException(e);
360            }
361        }
362
363        /// <summary>
364        /// If the next token is a float, consume it and return its value.
365        /// Otherwise, throw a FormatException.
366        /// </summary>
367        public float ConsumeFloat()
368        {
369            // We need to parse infinity and nan separately because
370            // Float.parseFloat() does not accept "inf", "infinity", or "nan".
371            if (FloatInfinity.IsMatch(currentToken))
372            {
373                bool negative = currentToken.StartsWith("-");
374                NextToken();
375                return negative ? float.NegativeInfinity : float.PositiveInfinity;
376            }
377            if (FloatNan.IsMatch(currentToken))
378            {
379                NextToken();
380                return float.NaN;
381            }
382
383            if (currentToken.EndsWith("f"))
384            {
385                currentToken = currentToken.TrimEnd('f');
386            }
387
388            try
389            {
390                float result = float.Parse(currentToken, CultureInfo.InvariantCulture);
391                NextToken();
392                return result;
393            }
394            catch (FormatException e)
395            {
396                throw CreateFloatParseException(e);
397            }
398            catch (OverflowException e)
399            {
400                throw CreateFloatParseException(e);
401            }
402        }
403
404        /// <summary>
405        /// If the next token is a Boolean, consume it and return its value.
406        /// Otherwise, throw a FormatException.   
407        /// </summary>
408        public bool ConsumeBoolean()
409        {
410            if (currentToken == "true")
411            {
412                NextToken();
413                return true;
414            }
415            if (currentToken == "false")
416            {
417                NextToken();
418                return false;
419            }
420            throw CreateFormatException("Expected \"true\" or \"false\".");
421        }
422
423        /// <summary>
424        /// If the next token is a string, consume it and return its (unescaped) value.
425        /// Otherwise, throw a FormatException.
426        /// </summary>
427        public string ConsumeString()
428        {
429            return ConsumeByteString().ToStringUtf8();
430        }
431
432        /// <summary>
433        /// If the next token is a string, consume it, unescape it as a
434        /// ByteString and return it. Otherwise, throw a FormatException.
435        /// </summary>
436        public ByteString ConsumeByteString()
437        {
438            char quote = currentToken.Length > 0 ? currentToken[0] : '\0';
439            if (quote != '\"' && quote != '\'')
440            {
441                throw CreateFormatException("Expected string.");
442            }
443
444            if (currentToken.Length < 2 ||
445                currentToken[currentToken.Length - 1] != quote)
446            {
447                throw CreateFormatException("String missing ending quote.");
448            }
449
450            try
451            {
452                string escaped = currentToken.Substring(1, currentToken.Length - 2);
453                ByteString result = TextFormat.UnescapeBytes(escaped);
454                NextToken();
455                return result;
456            }
457            catch (FormatException e)
458            {
459                throw CreateFormatException(e.Message);
460            }
461        }
462
463        /// <summary>
464        /// Returns a format exception with the current line and column numbers
465        /// in the description, suitable for throwing.
466        /// </summary>
467        public FormatException CreateFormatException(string description)
468        {
469            // Note:  People generally prefer one-based line and column numbers.
470            return new FormatException((line + 1) + ":" + (column + 1) + ": " + description);
471        }
472
473        /// <summary>
474        /// Returns a format exception with the line and column numbers of the
475        /// previous token in the description, suitable for throwing.
476        /// </summary>
477        public FormatException CreateFormatExceptionPreviousToken(string description)
478        {
479            // Note:  People generally prefer one-based line and column numbers.
480            return new FormatException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
481        }
482
483        /// <summary>
484        /// Constructs an appropriate FormatException for the given existing exception
485        /// when trying to parse an integer.
486        /// </summary>
487        private FormatException CreateIntegerParseException(FormatException e)
488        {
489            return CreateFormatException("Couldn't parse integer: " + e.Message);
490        }
491
492        /// <summary>
493        /// Constructs an appropriate FormatException for the given existing exception
494        /// when trying to parse a float or double.
495        /// </summary>
496        private FormatException CreateFloatParseException(Exception e)
497        {
498            return CreateFormatException("Couldn't parse number: " + e.Message);
499        }
500    }
501}
Note: See TracBrowser for help on using the repository browser.