Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: branches/3107_LearningALPS/HeuristicLab.ExtLibs/HeuristicLab.ProtobufCS/2.4.1/ProtobufCS/src/ProtocolBuffers/TextTokenizer.cs

Visit:

Last change on this file was 8295, checked in by abeham, 12 years ago

Removed protocol buffers 0.9.1
Added protocol buffers 2.4.1
Updated proto processing command

File size: 17.4 KB

Line
1	#region Copyright notice and license
2
3	// Protocol Buffers - Google's data interchange format
4	// Copyright 2008 Google Inc. All rights reserved.
5	// http://github.com/jskeet/dotnet-protobufs/
6	// Original C++/Java/Python code:
7	// http://code.google.com/p/protobuf/
8	//
9	// Redistribution and use in source and binary forms, with or without
10	// modification, are permitted provided that the following conditions are
11	// met:
12	//
13	// * Redistributions of source code must retain the above copyright
14	// notice, this list of conditions and the following disclaimer.
15	// * Redistributions in binary form must reproduce the above
16	// copyright notice, this list of conditions and the following disclaimer
17	// in the documentation and/or other materials provided with the
18	// distribution.
19	// * Neither the name of Google Inc. nor the names of its
20	// contributors may be used to endorse or promote products derived from
21	// this software without specific prior written permission.
22	//
23	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35	#endregion
36
37	using System;
38	using System.Globalization;
39	using System.Text.RegularExpressions;
40
41	namespace Google.ProtocolBuffers
42	{
43	/// <summary>
44	/// Represents a stream of tokens parsed from a string.
45	/// </summary>
46	internal sealed class TextTokenizer
47	{
48	private readonly string text;
49	private string currentToken;
50
51	/// <summary>
52	/// The character index within the text to perform the next regex match at.
53	/// </summary>
54	private int matchPos = 0;
55
56	/// <summary>
57	/// The character index within the text at which the current token begins.
58	/// </summary>
59	private int pos = 0;
60
61	/// <summary>
62	/// The line number of the current token.
63	/// </summary>
64	private int line = 0;
65
66	/// <summary>
67	/// The column number of the current token.
68	/// </summary>
69	private int column = 0;
70
71	/// <summary>
72	/// The line number of the previous token.
73	/// </summary>
74	private int previousLine = 0;
75
76	/// <summary>
77	/// The column number of the previous token.
78	/// </summary>
79	private int previousColumn = 0;
80
81	// Note: atomic groups used to mimic possessive quantifiers in Java in both of these regexes
82	internal static readonly Regex WhitespaceAndCommentPattern = new Regex("\\G(?>(\\s\|(#.*$))+)",
83	SilverlightCompatibility.
84	CompiledRegexWhereAvailable \|
85	RegexOptions.Multiline);
86
87	private static readonly Regex TokenPattern = new Regex(
88	"\\G[a-zA-Z_](?>[0-9a-zA-Z_+-]*)\|" + // an identifier
89	"\\G[0-9+-](?>[0-9a-zA-Z_.+-]*)\|" + // a number
90	"\\G\"(?>([^\"\\\n\\\\]\|\\\\.)*)(\"\|\\\\?$)\|" + // a double-quoted string
91	"\\G\'(?>([^\"\\\n\\\\]\|\\\\.)*)(\'\|\\\\?$)", // a single-quoted string
92	SilverlightCompatibility.CompiledRegexWhereAvailable \| RegexOptions.Multiline);
93
94	private static readonly Regex DoubleInfinity = new Regex("^-?inf(inity)?$",
95	SilverlightCompatibility.CompiledRegexWhereAvailable \|
96	RegexOptions.IgnoreCase);
97
98	private static readonly Regex FloatInfinity = new Regex("^-?inf(inity)?f?$",
99	SilverlightCompatibility.CompiledRegexWhereAvailable \|
100	RegexOptions.IgnoreCase);
101
102	private static readonly Regex FloatNan = new Regex("^nanf?$",
103	SilverlightCompatibility.CompiledRegexWhereAvailable \|
104	RegexOptions.IgnoreCase);
105
106	/** Construct a tokenizer that parses tokens from the given text. */
107
108	public TextTokenizer(string text)
109	{
110	this.text = text;
111	SkipWhitespace();
112	NextToken();
113	}
114
115	/// <summary>
116	/// Are we at the end of the input?
117	/// </summary>
118	public bool AtEnd
119	{
120	get { return currentToken.Length == 0; }
121	}
122
123	/// <summary>
124	/// Advances to the next token.
125	/// </summary>
126	public void NextToken()
127	{
128	previousLine = line;
129	previousColumn = column;
130
131	// Advance the line counter to the current position.
132	while (pos < matchPos)
133	{
134	if (text[pos] == '\n')
135	{
136	++line;
137	column = 0;
138	}
139	else
140	{
141	++column;
142	}
143	++pos;
144	}
145
146	// Match the next token.
147	if (matchPos == text.Length)
148	{
149	// EOF
150	currentToken = "";
151	}
152	else
153	{
154	Match match = TokenPattern.Match(text, matchPos);
155	if (match.Success)
156	{
157	currentToken = match.Value;
158	matchPos += match.Length;
159	}
160	else
161	{
162	// Take one character.
163	currentToken = text[matchPos].ToString();
164	matchPos++;
165	}
166
167	SkipWhitespace();
168	}
169	}
170
171	/// <summary>
172	/// Skip over any whitespace so that matchPos starts at the next token.
173	/// </summary>
174	private void SkipWhitespace()
175	{
176	Match match = WhitespaceAndCommentPattern.Match(text, matchPos);
177	if (match.Success)
178	{
179	matchPos += match.Length;
180	}
181	}
182
183	/// <summary>
184	/// If the next token exactly matches the given token, consume it and return
185	/// true. Otherwise, return false without doing anything.
186	/// </summary>
187	public bool TryConsume(string token)
188	{
189	if (currentToken == token)
190	{
191	NextToken();
192	return true;
193	}
194	return false;
195	}
196
197	/*
198	* If the next token exactly matches {@code token}, consume it. Otherwise,
199	* throw a {@link ParseException}.
200	*/
201
202	/// <summary>
203	/// If the next token exactly matches the specified one, consume it.
204	/// Otherwise, throw a FormatException.
205	/// </summary>
206	/// <param name="token"></param>
207	public void Consume(string token)
208	{
209	if (!TryConsume(token))
210	{
211	throw CreateFormatException("Expected \"" + token + "\".");
212	}
213	}
214
215	/// <summary>
216	/// Returns true if the next token is an integer, but does not consume it.
217	/// </summary>
218	public bool LookingAtInteger()
219	{
220	if (currentToken.Length == 0)
221	{
222	return false;
223	}
224
225	char c = currentToken[0];
226	return ('0' <= c && c <= '9') \|\| c == '-' \|\| c == '+';
227	}
228
229	/// <summary>
230	/// If the next token is an identifier, consume it and return its value.
231	/// Otherwise, throw a FormatException.
232	/// </summary>
233	public string ConsumeIdentifier()
234	{
235	foreach (char c in currentToken)
236	{
237	if (('a' <= c && c <= 'z') \|\|
238	('A' <= c && c <= 'Z') \|\|
239	('0' <= c && c <= '9') \|\|
240	(c == '_') \|\| (c == '.'))
241	{
242	// OK
243	}
244	else
245	{
246	throw CreateFormatException("Expected identifier.");
247	}
248	}
249
250	string result = currentToken;
251	NextToken();
252	return result;
253	}
254
255	/// <summary>
256	/// If the next token is a 32-bit signed integer, consume it and return its
257	/// value. Otherwise, throw a FormatException.
258	/// </summary>
259	public int ConsumeInt32()
260	{
261	try
262	{
263	int result = TextFormat.ParseInt32(currentToken);
264	NextToken();
265	return result;
266	}
267	catch (FormatException e)
268	{
269	throw CreateIntegerParseException(e);
270	}
271	}
272
273	/// <summary>
274	/// If the next token is a 32-bit unsigned integer, consume it and return its
275	/// value. Otherwise, throw a FormatException.
276	/// </summary>
277	public uint ConsumeUInt32()
278	{
279	try
280	{
281	uint result = TextFormat.ParseUInt32(currentToken);
282	NextToken();
283	return result;
284	}
285	catch (FormatException e)
286	{
287	throw CreateIntegerParseException(e);
288	}
289	}
290
291	/// <summary>
292	/// If the next token is a 64-bit signed integer, consume it and return its
293	/// value. Otherwise, throw a FormatException.
294	/// </summary>
295	public long ConsumeInt64()
296	{
297	try
298	{
299	long result = TextFormat.ParseInt64(currentToken);
300	NextToken();
301	return result;
302	}
303	catch (FormatException e)
304	{
305	throw CreateIntegerParseException(e);
306	}
307	}
308
309	/// <summary>
310	/// If the next token is a 64-bit unsigned integer, consume it and return its
311	/// value. Otherwise, throw a FormatException.
312	/// </summary>
313	public ulong ConsumeUInt64()
314	{
315	try
316	{
317	ulong result = TextFormat.ParseUInt64(currentToken);
318	NextToken();
319	return result;
320	}
321	catch (FormatException e)
322	{
323	throw CreateIntegerParseException(e);
324	}
325	}
326
327	/// <summary>
328	/// If the next token is a double, consume it and return its value.
329	/// Otherwise, throw a FormatException.
330	/// </summary>
331	public double ConsumeDouble()
332	{
333	// We need to parse infinity and nan separately because
334	// double.Parse() does not accept "inf", "infinity", or "nan".
335	if (DoubleInfinity.IsMatch(currentToken))
336	{
337	bool negative = currentToken.StartsWith("-");
338	NextToken();
339	return negative ? double.NegativeInfinity : double.PositiveInfinity;
340	}
341	if (currentToken.Equals("nan", StringComparison.InvariantCultureIgnoreCase))
342	{
343	NextToken();
344	return Double.NaN;
345	}
346
347	try
348	{
349	double result = double.Parse(currentToken, CultureInfo.InvariantCulture);
350	NextToken();
351	return result;
352	}
353	catch (FormatException e)
354	{
355	throw CreateFloatParseException(e);
356	}
357	catch (OverflowException e)
358	{
359	throw CreateFloatParseException(e);
360	}
361	}
362
363	/// <summary>
364	/// If the next token is a float, consume it and return its value.
365	/// Otherwise, throw a FormatException.
366	/// </summary>
367	public float ConsumeFloat()
368	{
369	// We need to parse infinity and nan separately because
370	// Float.parseFloat() does not accept "inf", "infinity", or "nan".
371	if (FloatInfinity.IsMatch(currentToken))
372	{
373	bool negative = currentToken.StartsWith("-");
374	NextToken();
375	return negative ? float.NegativeInfinity : float.PositiveInfinity;
376	}
377	if (FloatNan.IsMatch(currentToken))
378	{
379	NextToken();
380	return float.NaN;
381	}
382
383	if (currentToken.EndsWith("f"))
384	{
385	currentToken = currentToken.TrimEnd('f');
386	}
387
388	try
389	{
390	float result = float.Parse(currentToken, CultureInfo.InvariantCulture);
391	NextToken();
392	return result;
393	}
394	catch (FormatException e)
395	{
396	throw CreateFloatParseException(e);
397	}
398	catch (OverflowException e)
399	{
400	throw CreateFloatParseException(e);
401	}
402	}
403
404	/// <summary>
405	/// If the next token is a Boolean, consume it and return its value.
406	/// Otherwise, throw a FormatException.
407	/// </summary>
408	public bool ConsumeBoolean()
409	{
410	if (currentToken == "true")
411	{
412	NextToken();
413	return true;
414	}
415	if (currentToken == "false")
416	{
417	NextToken();
418	return false;
419	}
420	throw CreateFormatException("Expected \"true\" or \"false\".");
421	}
422
423	/// <summary>
424	/// If the next token is a string, consume it and return its (unescaped) value.
425	/// Otherwise, throw a FormatException.
426	/// </summary>
427	public string ConsumeString()
428	{
429	return ConsumeByteString().ToStringUtf8();
430	}
431
432	/// <summary>
433	/// If the next token is a string, consume it, unescape it as a
434	/// ByteString and return it. Otherwise, throw a FormatException.
435	/// </summary>
436	public ByteString ConsumeByteString()
437	{
438	char quote = currentToken.Length > 0 ? currentToken[0] : '\0';
439	if (quote != '\"' && quote != '\'')
440	{
441	throw CreateFormatException("Expected string.");
442	}
443
444	if (currentToken.Length < 2 \|\|
445	currentToken[currentToken.Length - 1] != quote)
446	{
447	throw CreateFormatException("String missing ending quote.");
448	}
449
450	try
451	{
452	string escaped = currentToken.Substring(1, currentToken.Length - 2);
453	ByteString result = TextFormat.UnescapeBytes(escaped);
454	NextToken();
455	return result;
456	}
457	catch (FormatException e)
458	{
459	throw CreateFormatException(e.Message);
460	}
461	}
462
463	/// <summary>
464	/// Returns a format exception with the current line and column numbers
465	/// in the description, suitable for throwing.
466	/// </summary>
467	public FormatException CreateFormatException(string description)
468	{
469	// Note: People generally prefer one-based line and column numbers.
470	return new FormatException((line + 1) + ":" + (column + 1) + ": " + description);
471	}
472
473	/// <summary>
474	/// Returns a format exception with the line and column numbers of the
475	/// previous token in the description, suitable for throwing.
476	/// </summary>
477	public FormatException CreateFormatExceptionPreviousToken(string description)
478	{
479	// Note: People generally prefer one-based line and column numbers.
480	return new FormatException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
481	}
482
483	/// <summary>
484	/// Constructs an appropriate FormatException for the given existing exception
485	/// when trying to parse an integer.
486	/// </summary>
487	private FormatException CreateIntegerParseException(FormatException e)
488	{
489	return CreateFormatException("Couldn't parse integer: " + e.Message);
490	}
491
492	/// <summary>
493	/// Constructs an appropriate FormatException for the given existing exception
494	/// when trying to parse a float or double.
495	/// </summary>
496	private FormatException CreateFloatParseException(Exception e)
497	{
498	return CreateFormatException("Couldn't parse number: " + e.Message);
499	}
500	}
501	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences