Context Navigation

source: trunk/sources/HeuristicLab.ExtLibs/HeuristicLab.ProtobufCS/0.9.1/ProtobufCS/src/ProtocolBuffers/TextTokenizer.cs @ 4095

Visit:

Last change on this file since 4095 was 3857, checked in by abeham, 14 years ago

Added protobuf-csharp-port project source to ExtLibs

File size: 14.1 KB

Line
1	#region Copyright notice and license
2	// Protocol Buffers - Google's data interchange format
3	// Copyright 2008 Google Inc. All rights reserved.
4	// http://github.com/jskeet/dotnet-protobufs/
5	// Original C++/Java/Python code:
6	// http://code.google.com/p/protobuf/
7	//
8	// Redistribution and use in source and binary forms, with or without
9	// modification, are permitted provided that the following conditions are
10	// met:
11	//
12	// * Redistributions of source code must retain the above copyright
13	// notice, this list of conditions and the following disclaimer.
14	// * Redistributions in binary form must reproduce the above
15	// copyright notice, this list of conditions and the following disclaimer
16	// in the documentation and/or other materials provided with the
17	// distribution.
18	// * Neither the name of Google Inc. nor the names of its
19	// contributors may be used to endorse or promote products derived from
20	// this software without specific prior written permission.
21	//
22	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33	#endregion
34
35	using System;
36	using System.Globalization;
37	using System.Text.RegularExpressions;
38
39	namespace Google.ProtocolBuffers {
40	/// <summary>
41	/// Represents a stream of tokens parsed from a string.
42	/// </summary>
43	internal sealed class TextTokenizer {
44	private readonly string text;
45	private string currentToken;
46
47	/// <summary>
48	/// The character index within the text to perform the next regex match at.
49	/// </summary>
50	private int matchPos = 0;
51
52	/// <summary>
53	/// The character index within the text at which the current token begins.
54	/// </summary>
55	private int pos = 0;
56
57	/// <summary>
58	/// The line number of the current token.
59	/// </summary>
60	private int line = 0;
61	/// <summary>
62	/// The column number of the current token.
63	/// </summary>
64	private int column = 0;
65
66	/// <summary>
67	/// The line number of the previous token.
68	/// </summary>
69	private int previousLine = 0;
70	/// <summary>
71	/// The column number of the previous token.
72	/// </summary>
73	private int previousColumn = 0;
74
75	// Note: atomic groups used to mimic possessive quantifiers in Java in both of these regexes
76	internal static readonly Regex WhitespaceAndCommentPattern = new Regex("\\G(?>(\\s\|(#.*$))+)",
77	SilverlightCompatibility.CompiledRegexWhereAvailable \| RegexOptions.Multiline);
78	private static readonly Regex TokenPattern = new Regex(
79	"\\G[a-zA-Z_](?>[0-9a-zA-Z_+-]*)\|" + // an identifier
80	"\\G[0-9+-](?>[0-9a-zA-Z_.+-]*)\|" + // a number
81	"\\G\"(?>([^\"\\\n\\\\]\|\\\\.)*)(\"\|\\\\?$)\|" + // a double-quoted string
82	"\\G\'(?>([^\"\\\n\\\\]\|\\\\.)*)(\'\|\\\\?$)", // a single-quoted string
83	SilverlightCompatibility.CompiledRegexWhereAvailable \| RegexOptions.Multiline);
84
85	private static readonly Regex DoubleInfinity = new Regex("^-?inf(inity)?$",
86	SilverlightCompatibility.CompiledRegexWhereAvailable \| RegexOptions.IgnoreCase);
87	private static readonly Regex FloatInfinity = new Regex("^-?inf(inity)?f?$",
88	SilverlightCompatibility.CompiledRegexWhereAvailable \| RegexOptions.IgnoreCase);
89	private static readonly Regex FloatNan = new Regex("^nanf?$",
90	SilverlightCompatibility.CompiledRegexWhereAvailable \| RegexOptions.IgnoreCase);
91
92	/** Construct a tokenizer that parses tokens from the given text. */
93	public TextTokenizer(string text) {
94	this.text = text;
95	SkipWhitespace();
96	NextToken();
97	}
98
99	/// <summary>
100	/// Are we at the end of the input?
101	/// </summary>
102	public bool AtEnd {
103	get { return currentToken.Length == 0; }
104	}
105
106	/// <summary>
107	/// Advances to the next token.
108	/// </summary>
109	public void NextToken() {
110	previousLine = line;
111	previousColumn = column;
112
113	// Advance the line counter to the current position.
114	while (pos < matchPos) {
115	if (text[pos] == '\n') {
116	++line;
117	column = 0;
118	} else {
119	++column;
120	}
121	++pos;
122	}
123
124	// Match the next token.
125	if (matchPos == text.Length) {
126	// EOF
127	currentToken = "";
128	} else {
129	Match match = TokenPattern.Match(text, matchPos);
130	if (match.Success) {
131	currentToken = match.Value;
132	matchPos += match.Length;
133	} else {
134	// Take one character.
135	currentToken = text[matchPos].ToString();
136	matchPos++;
137	}
138
139	SkipWhitespace();
140	}
141	}
142
143	/// <summary>
144	/// Skip over any whitespace so that matchPos starts at the next token.
145	/// </summary>
146	private void SkipWhitespace() {
147	Match match = WhitespaceAndCommentPattern.Match(text, matchPos);
148	if (match.Success) {
149	matchPos += match.Length;
150	}
151	}
152
153	/// <summary>
154	/// If the next token exactly matches the given token, consume it and return
155	/// true. Otherwise, return false without doing anything.
156	/// </summary>
157	public bool TryConsume(string token) {
158	if (currentToken == token) {
159	NextToken();
160	return true;
161	}
162	return false;
163	}
164
165	/*
166	* If the next token exactly matches {@code token}, consume it. Otherwise,
167	* throw a {@link ParseException}.
168	*/
169	/// <summary>
170	/// If the next token exactly matches the specified one, consume it.
171	/// Otherwise, throw a FormatException.
172	/// </summary>
173	/// <param name="token"></param>
174	public void Consume(string token) {
175	if (!TryConsume(token)) {
176	throw CreateFormatException("Expected \"" + token + "\".");
177	}
178	}
179
180	/// <summary>
181	/// Returns true if the next token is an integer, but does not consume it.
182	/// </summary>
183	public bool LookingAtInteger() {
184	if (currentToken.Length == 0) {
185	return false;
186	}
187
188	char c = currentToken[0];
189	return ('0' <= c && c <= '9') \|\| c == '-' \|\| c == '+';
190	}
191
192	/// <summary>
193	/// If the next token is an identifier, consume it and return its value.
194	/// Otherwise, throw a FormatException.
195	/// </summary>
196	public string ConsumeIdentifier() {
197	foreach (char c in currentToken) {
198	if (('a' <= c && c <= 'z') \|\|
199	('A' <= c && c <= 'Z') \|\|
200	('0' <= c && c <= '9') \|\|
201	(c == '_') \|\| (c == '.')) {
202	// OK
203	} else {
204	throw CreateFormatException("Expected identifier.");
205	}
206	}
207
208	string result = currentToken;
209	NextToken();
210	return result;
211	}
212
213	/// <summary>
214	/// If the next token is a 32-bit signed integer, consume it and return its
215	/// value. Otherwise, throw a FormatException.
216	/// </summary>
217	public int ConsumeInt32() {
218	try {
219	int result = TextFormat.ParseInt32(currentToken);
220	NextToken();
221	return result;
222	} catch (FormatException e) {
223	throw CreateIntegerParseException(e);
224	}
225	}
226
227	/// <summary>
228	/// If the next token is a 32-bit unsigned integer, consume it and return its
229	/// value. Otherwise, throw a FormatException.
230	/// </summary>
231	public uint ConsumeUInt32() {
232	try {
233	uint result = TextFormat.ParseUInt32(currentToken);
234	NextToken();
235	return result;
236	} catch (FormatException e) {
237	throw CreateIntegerParseException(e);
238	}
239	}
240
241	/// <summary>
242	/// If the next token is a 64-bit signed integer, consume it and return its
243	/// value. Otherwise, throw a FormatException.
244	/// </summary>
245	public long ConsumeInt64() {
246	try {
247	long result = TextFormat.ParseInt64(currentToken);
248	NextToken();
249	return result;
250	} catch (FormatException e) {
251	throw CreateIntegerParseException(e);
252	}
253	}
254
255	/// <summary>
256	/// If the next token is a 64-bit unsigned integer, consume it and return its
257	/// value. Otherwise, throw a FormatException.
258	/// </summary>
259	public ulong ConsumeUInt64() {
260	try {
261	ulong result = TextFormat.ParseUInt64(currentToken);
262	NextToken();
263	return result;
264	} catch (FormatException e) {
265	throw CreateIntegerParseException(e);
266	}
267	}
268
269	/// <summary>
270	/// If the next token is a double, consume it and return its value.
271	/// Otherwise, throw a FormatException.
272	/// </summary>
273	public double ConsumeDouble() {
274	// We need to parse infinity and nan separately because
275	// double.Parse() does not accept "inf", "infinity", or "nan".
276	if (DoubleInfinity.IsMatch(currentToken)) {
277	bool negative = currentToken.StartsWith("-");
278	NextToken();
279	return negative ? double.NegativeInfinity : double.PositiveInfinity;
280	}
281	if (currentToken.Equals("nan", StringComparison.InvariantCultureIgnoreCase)) {
282	NextToken();
283	return Double.NaN;
284	}
285
286	try {
287	double result = double.Parse(currentToken, CultureInfo.InvariantCulture);
288	NextToken();
289	return result;
290	} catch (FormatException e) {
291	throw CreateFloatParseException(e);
292	} catch (OverflowException e) {
293	throw CreateFloatParseException(e);
294	}
295	}
296
297	/// <summary>
298	/// If the next token is a float, consume it and return its value.
299	/// Otherwise, throw a FormatException.
300	/// </summary>
301	public float ConsumeFloat() {
302	// We need to parse infinity and nan separately because
303	// Float.parseFloat() does not accept "inf", "infinity", or "nan".
304	if (FloatInfinity.IsMatch(currentToken)) {
305	bool negative = currentToken.StartsWith("-");
306	NextToken();
307	return negative ? float.NegativeInfinity : float.PositiveInfinity;
308	}
309	if (FloatNan.IsMatch(currentToken)) {
310	NextToken();
311	return float.NaN;
312	}
313
314	if (currentToken.EndsWith("f")) {
315	currentToken = currentToken.TrimEnd('f');
316	}
317
318	try {
319	float result = float.Parse(currentToken, CultureInfo.InvariantCulture);
320	NextToken();
321	return result;
322	} catch (FormatException e) {
323	throw CreateFloatParseException(e);
324	} catch (OverflowException e) {
325	throw CreateFloatParseException(e);
326	}
327	}
328
329	/// <summary>
330	/// If the next token is a Boolean, consume it and return its value.
331	/// Otherwise, throw a FormatException.
332	/// </summary>
333	public bool ConsumeBoolean() {
334	if (currentToken == "true") {
335	NextToken();
336	return true;
337	}
338	if (currentToken == "false") {
339	NextToken();
340	return false;
341	}
342	throw CreateFormatException("Expected \"true\" or \"false\".");
343	}
344
345	/// <summary>
346	/// If the next token is a string, consume it and return its (unescaped) value.
347	/// Otherwise, throw a FormatException.
348	/// </summary>
349	public string ConsumeString() {
350	return ConsumeByteString().ToStringUtf8();
351	}
352
353	/// <summary>
354	/// If the next token is a string, consume it, unescape it as a
355	/// ByteString and return it. Otherwise, throw a FormatException.
356	/// </summary>
357	public ByteString ConsumeByteString() {
358	char quote = currentToken.Length > 0 ? currentToken[0] : '\0';
359	if (quote != '\"' && quote != '\'') {
360	throw CreateFormatException("Expected string.");
361	}
362
363	if (currentToken.Length < 2 \|\|
364	currentToken[currentToken.Length-1] != quote) {
365	throw CreateFormatException("String missing ending quote.");
366	}
367
368	try {
369	string escaped = currentToken.Substring(1, currentToken.Length - 2);
370	ByteString result = TextFormat.UnescapeBytes(escaped);
371	NextToken();
372	return result;
373	} catch (FormatException e) {
374	throw CreateFormatException(e.Message);
375	}
376	}
377
378	/// <summary>
379	/// Returns a format exception with the current line and column numbers
380	/// in the description, suitable for throwing.
381	/// </summary>
382	public FormatException CreateFormatException(string description) {
383	// Note: People generally prefer one-based line and column numbers.
384	return new FormatException((line + 1) + ":" + (column + 1) + ": " + description);
385	}
386
387	/// <summary>
388	/// Returns a format exception with the line and column numbers of the
389	/// previous token in the description, suitable for throwing.
390	/// </summary>
391	public FormatException CreateFormatExceptionPreviousToken(string description) {
392	// Note: People generally prefer one-based line and column numbers.
393	return new FormatException((previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
394	}
395
396	/// <summary>
397	/// Constructs an appropriate FormatException for the given existing exception
398	/// when trying to parse an integer.
399	/// </summary>
400	private FormatException CreateIntegerParseException(FormatException e) {
401	return CreateFormatException("Couldn't parse integer: " + e.Message);
402	}
403
404	/// <summary>
405	/// Constructs an appropriate FormatException for the given existing exception
406	/// when trying to parse a float or double.
407	/// </summary>
408	private FormatException CreateFloatParseException(Exception e) {
409	return CreateFormatException("Couldn't parse number: " + e.Message);
410	}
411	}
412	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences