1 | // Protocol Buffers - Google's data interchange format |
---|
2 | // Copyright 2008 Google Inc. All rights reserved. |
---|
3 | // http://code.google.com/p/protobuf/ |
---|
4 | // |
---|
5 | // Redistribution and use in source and binary forms, with or without |
---|
6 | // modification, are permitted provided that the following conditions are |
---|
7 | // met: |
---|
8 | // |
---|
9 | // * Redistributions of source code must retain the above copyright |
---|
10 | // notice, this list of conditions and the following disclaimer. |
---|
11 | // * Redistributions in binary form must reproduce the above |
---|
12 | // copyright notice, this list of conditions and the following disclaimer |
---|
13 | // in the documentation and/or other materials provided with the |
---|
14 | // distribution. |
---|
15 | // * Neither the name of Google Inc. nor the names of its |
---|
16 | // contributors may be used to endorse or promote products derived from |
---|
17 | // this software without specific prior written permission. |
---|
18 | // |
---|
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
---|
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
---|
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
---|
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
---|
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
---|
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
---|
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
---|
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
---|
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
---|
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
30 | |
---|
31 | package com.google.protobuf; |
---|
32 | |
---|
33 | import com.google.protobuf.Descriptors.Descriptor; |
---|
34 | import com.google.protobuf.Descriptors.FieldDescriptor; |
---|
35 | import com.google.protobuf.Descriptors.EnumDescriptor; |
---|
36 | import com.google.protobuf.Descriptors.EnumValueDescriptor; |
---|
37 | |
---|
38 | import java.io.IOException; |
---|
39 | import java.nio.CharBuffer; |
---|
40 | import java.math.BigInteger; |
---|
41 | import java.util.ArrayList; |
---|
42 | import java.util.List; |
---|
43 | import java.util.Locale; |
---|
44 | import java.util.Map; |
---|
45 | import java.util.regex.Matcher; |
---|
46 | import java.util.regex.Pattern; |
---|
47 | |
---|
48 | /** |
---|
49 | * Provide text parsing and formatting support for proto2 instances. |
---|
50 | * The implementation largely follows google/protobuf/text_format.cc. |
---|
51 | * |
---|
52 | * @author wenboz@google.com Wenbo Zhu |
---|
53 | * @author kenton@google.com Kenton Varda |
---|
54 | */ |
---|
55 | public final class TextFormat { |
---|
56 | private TextFormat() {} |
---|
57 | |
---|
58 | private static final Printer DEFAULT_PRINTER = new Printer(false); |
---|
59 | private static final Printer SINGLE_LINE_PRINTER = new Printer(true); |
---|
60 | |
---|
61 | /** |
---|
62 | * Outputs a textual representation of the Protocol Message supplied into |
---|
63 | * the parameter output. (This representation is the new version of the |
---|
64 | * classic "ProtocolPrinter" output from the original Protocol Buffer system) |
---|
65 | */ |
---|
66 | public static void print(final Message message, final Appendable output) |
---|
67 | throws IOException { |
---|
68 | DEFAULT_PRINTER.print(message, new TextGenerator(output)); |
---|
69 | } |
---|
70 | |
---|
71 | /** Outputs a textual representation of {@code fields} to {@code output}. */ |
---|
72 | public static void print(final UnknownFieldSet fields, |
---|
73 | final Appendable output) |
---|
74 | throws IOException { |
---|
75 | DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output)); |
---|
76 | } |
---|
77 | |
---|
78 | /** |
---|
79 | * Generates a human readable form of this message, useful for debugging and |
---|
80 | * other purposes, with no newline characters. |
---|
81 | */ |
---|
82 | public static String shortDebugString(final Message message) { |
---|
83 | try { |
---|
84 | final StringBuilder sb = new StringBuilder(); |
---|
85 | SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb)); |
---|
86 | // Single line mode currently might have an extra space at the end. |
---|
87 | return sb.toString().trim(); |
---|
88 | } catch (IOException e) { |
---|
89 | throw new IllegalStateException(e); |
---|
90 | } |
---|
91 | } |
---|
92 | |
---|
93 | /** |
---|
94 | * Generates a human readable form of the unknown fields, useful for debugging |
---|
95 | * and other purposes, with no newline characters. |
---|
96 | */ |
---|
97 | public static String shortDebugString(final UnknownFieldSet fields) { |
---|
98 | try { |
---|
99 | final StringBuilder sb = new StringBuilder(); |
---|
100 | SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb)); |
---|
101 | // Single line mode currently might have an extra space at the end. |
---|
102 | return sb.toString().trim(); |
---|
103 | } catch (IOException e) { |
---|
104 | throw new IllegalStateException(e); |
---|
105 | } |
---|
106 | } |
---|
107 | |
---|
108 | /** |
---|
109 | * Like {@code print()}, but writes directly to a {@code String} and |
---|
110 | * returns it. |
---|
111 | */ |
---|
112 | public static String printToString(final Message message) { |
---|
113 | try { |
---|
114 | final StringBuilder text = new StringBuilder(); |
---|
115 | print(message, text); |
---|
116 | return text.toString(); |
---|
117 | } catch (IOException e) { |
---|
118 | throw new IllegalStateException(e); |
---|
119 | } |
---|
120 | } |
---|
121 | |
---|
122 | /** |
---|
123 | * Like {@code print()}, but writes directly to a {@code String} and |
---|
124 | * returns it. |
---|
125 | */ |
---|
126 | public static String printToString(final UnknownFieldSet fields) { |
---|
127 | try { |
---|
128 | final StringBuilder text = new StringBuilder(); |
---|
129 | print(fields, text); |
---|
130 | return text.toString(); |
---|
131 | } catch (IOException e) { |
---|
132 | throw new IllegalStateException(e); |
---|
133 | } |
---|
134 | } |
---|
135 | |
---|
136 | public static void printField(final FieldDescriptor field, |
---|
137 | final Object value, |
---|
138 | final Appendable output) |
---|
139 | throws IOException { |
---|
140 | DEFAULT_PRINTER.printField(field, value, new TextGenerator(output)); |
---|
141 | } |
---|
142 | |
---|
143 | public static String printFieldToString(final FieldDescriptor field, |
---|
144 | final Object value) { |
---|
145 | try { |
---|
146 | final StringBuilder text = new StringBuilder(); |
---|
147 | printField(field, value, text); |
---|
148 | return text.toString(); |
---|
149 | } catch (IOException e) { |
---|
150 | throw new IllegalStateException(e); |
---|
151 | } |
---|
152 | } |
---|
153 | |
---|
154 | /** |
---|
155 | * Outputs a textual representation of the value of given field value. |
---|
156 | * |
---|
157 | * @param field the descriptor of the field |
---|
158 | * @param value the value of the field |
---|
159 | * @param output the output to which to append the formatted value |
---|
160 | * @throws ClassCastException if the value is not appropriate for the |
---|
161 | * given field descriptor |
---|
162 | * @throws IOException if there is an exception writing to the output |
---|
163 | */ |
---|
164 | public static void printFieldValue(final FieldDescriptor field, |
---|
165 | final Object value, |
---|
166 | final Appendable output) |
---|
167 | throws IOException { |
---|
168 | DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output)); |
---|
169 | } |
---|
170 | |
---|
171 | /** |
---|
172 | * Outputs a textual representation of the value of an unknown field. |
---|
173 | * |
---|
174 | * @param tag the field's tag number |
---|
175 | * @param value the value of the field |
---|
176 | * @param output the output to which to append the formatted value |
---|
177 | * @throws ClassCastException if the value is not appropriate for the |
---|
178 | * given field descriptor |
---|
179 | * @throws IOException if there is an exception writing to the output |
---|
180 | */ |
---|
181 | public static void printUnknownFieldValue(final int tag, |
---|
182 | final Object value, |
---|
183 | final Appendable output) |
---|
184 | throws IOException { |
---|
185 | printUnknownFieldValue(tag, value, new TextGenerator(output)); |
---|
186 | } |
---|
187 | |
---|
188 | private static void printUnknownFieldValue(final int tag, |
---|
189 | final Object value, |
---|
190 | final TextGenerator generator) |
---|
191 | throws IOException { |
---|
192 | switch (WireFormat.getTagWireType(tag)) { |
---|
193 | case WireFormat.WIRETYPE_VARINT: |
---|
194 | generator.print(unsignedToString((Long) value)); |
---|
195 | break; |
---|
196 | case WireFormat.WIRETYPE_FIXED32: |
---|
197 | generator.print( |
---|
198 | String.format((Locale) null, "0x%08x", (Integer) value)); |
---|
199 | break; |
---|
200 | case WireFormat.WIRETYPE_FIXED64: |
---|
201 | generator.print(String.format((Locale) null, "0x%016x", (Long) value)); |
---|
202 | break; |
---|
203 | case WireFormat.WIRETYPE_LENGTH_DELIMITED: |
---|
204 | generator.print("\""); |
---|
205 | generator.print(escapeBytes((ByteString) value)); |
---|
206 | generator.print("\""); |
---|
207 | break; |
---|
208 | case WireFormat.WIRETYPE_START_GROUP: |
---|
209 | DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator); |
---|
210 | break; |
---|
211 | default: |
---|
212 | throw new IllegalArgumentException("Bad tag: " + tag); |
---|
213 | } |
---|
214 | } |
---|
215 | |
---|
216 | /** Helper class for converting protobufs to text. */ |
---|
217 | private static final class Printer { |
---|
218 | /** Whether to omit newlines from the output. */ |
---|
219 | final boolean singleLineMode; |
---|
220 | |
---|
221 | private Printer(final boolean singleLineMode) { |
---|
222 | this.singleLineMode = singleLineMode; |
---|
223 | } |
---|
224 | |
---|
225 | private void print(final Message message, final TextGenerator generator) |
---|
226 | throws IOException { |
---|
227 | for (Map.Entry<FieldDescriptor, Object> field |
---|
228 | : message.getAllFields().entrySet()) { |
---|
229 | printField(field.getKey(), field.getValue(), generator); |
---|
230 | } |
---|
231 | printUnknownFields(message.getUnknownFields(), generator); |
---|
232 | } |
---|
233 | |
---|
234 | private void printField(final FieldDescriptor field, final Object value, |
---|
235 | final TextGenerator generator) throws IOException { |
---|
236 | if (field.isRepeated()) { |
---|
237 | // Repeated field. Print each element. |
---|
238 | for (Object element : (List<?>) value) { |
---|
239 | printSingleField(field, element, generator); |
---|
240 | } |
---|
241 | } else { |
---|
242 | printSingleField(field, value, generator); |
---|
243 | } |
---|
244 | } |
---|
245 | |
---|
246 | private void printSingleField(final FieldDescriptor field, |
---|
247 | final Object value, |
---|
248 | final TextGenerator generator) |
---|
249 | throws IOException { |
---|
250 | if (field.isExtension()) { |
---|
251 | generator.print("["); |
---|
252 | // We special-case MessageSet elements for compatibility with proto1. |
---|
253 | if (field.getContainingType().getOptions().getMessageSetWireFormat() |
---|
254 | && (field.getType() == FieldDescriptor.Type.MESSAGE) |
---|
255 | && (field.isOptional()) |
---|
256 | // object equality |
---|
257 | && (field.getExtensionScope() == field.getMessageType())) { |
---|
258 | generator.print(field.getMessageType().getFullName()); |
---|
259 | } else { |
---|
260 | generator.print(field.getFullName()); |
---|
261 | } |
---|
262 | generator.print("]"); |
---|
263 | } else { |
---|
264 | if (field.getType() == FieldDescriptor.Type.GROUP) { |
---|
265 | // Groups must be serialized with their original capitalization. |
---|
266 | generator.print(field.getMessageType().getName()); |
---|
267 | } else { |
---|
268 | generator.print(field.getName()); |
---|
269 | } |
---|
270 | } |
---|
271 | |
---|
272 | if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { |
---|
273 | if (singleLineMode) { |
---|
274 | generator.print(" { "); |
---|
275 | } else { |
---|
276 | generator.print(" {\n"); |
---|
277 | generator.indent(); |
---|
278 | } |
---|
279 | } else { |
---|
280 | generator.print(": "); |
---|
281 | } |
---|
282 | |
---|
283 | printFieldValue(field, value, generator); |
---|
284 | |
---|
285 | if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { |
---|
286 | if (singleLineMode) { |
---|
287 | generator.print("} "); |
---|
288 | } else { |
---|
289 | generator.outdent(); |
---|
290 | generator.print("}\n"); |
---|
291 | } |
---|
292 | } else { |
---|
293 | if (singleLineMode) { |
---|
294 | generator.print(" "); |
---|
295 | } else { |
---|
296 | generator.print("\n"); |
---|
297 | } |
---|
298 | } |
---|
299 | } |
---|
300 | |
---|
301 | private void printFieldValue(final FieldDescriptor field, |
---|
302 | final Object value, |
---|
303 | final TextGenerator generator) |
---|
304 | throws IOException { |
---|
305 | switch (field.getType()) { |
---|
306 | case INT32: |
---|
307 | case SINT32: |
---|
308 | case SFIXED32: |
---|
309 | generator.print(((Integer) value).toString()); |
---|
310 | break; |
---|
311 | |
---|
312 | case INT64: |
---|
313 | case SINT64: |
---|
314 | case SFIXED64: |
---|
315 | generator.print(((Long) value).toString()); |
---|
316 | break; |
---|
317 | |
---|
318 | case BOOL: |
---|
319 | generator.print(((Boolean) value).toString()); |
---|
320 | break; |
---|
321 | |
---|
322 | case FLOAT: |
---|
323 | generator.print(((Float) value).toString()); |
---|
324 | break; |
---|
325 | |
---|
326 | case DOUBLE: |
---|
327 | generator.print(((Double) value).toString()); |
---|
328 | break; |
---|
329 | |
---|
330 | case UINT32: |
---|
331 | case FIXED32: |
---|
332 | generator.print(unsignedToString((Integer) value)); |
---|
333 | break; |
---|
334 | |
---|
335 | case UINT64: |
---|
336 | case FIXED64: |
---|
337 | generator.print(unsignedToString((Long) value)); |
---|
338 | break; |
---|
339 | |
---|
340 | case STRING: |
---|
341 | generator.print("\""); |
---|
342 | generator.print(escapeText((String) value)); |
---|
343 | generator.print("\""); |
---|
344 | break; |
---|
345 | |
---|
346 | case BYTES: |
---|
347 | generator.print("\""); |
---|
348 | generator.print(escapeBytes((ByteString) value)); |
---|
349 | generator.print("\""); |
---|
350 | break; |
---|
351 | |
---|
352 | case ENUM: |
---|
353 | generator.print(((EnumValueDescriptor) value).getName()); |
---|
354 | break; |
---|
355 | |
---|
356 | case MESSAGE: |
---|
357 | case GROUP: |
---|
358 | print((Message) value, generator); |
---|
359 | break; |
---|
360 | } |
---|
361 | } |
---|
362 | |
---|
363 | private void printUnknownFields(final UnknownFieldSet unknownFields, |
---|
364 | final TextGenerator generator) |
---|
365 | throws IOException { |
---|
366 | for (Map.Entry<Integer, UnknownFieldSet.Field> entry : |
---|
367 | unknownFields.asMap().entrySet()) { |
---|
368 | final int number = entry.getKey(); |
---|
369 | final UnknownFieldSet.Field field = entry.getValue(); |
---|
370 | printUnknownField(number, WireFormat.WIRETYPE_VARINT, |
---|
371 | field.getVarintList(), generator); |
---|
372 | printUnknownField(number, WireFormat.WIRETYPE_FIXED32, |
---|
373 | field.getFixed32List(), generator); |
---|
374 | printUnknownField(number, WireFormat.WIRETYPE_FIXED64, |
---|
375 | field.getFixed64List(), generator); |
---|
376 | printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED, |
---|
377 | field.getLengthDelimitedList(), generator); |
---|
378 | for (final UnknownFieldSet value : field.getGroupList()) { |
---|
379 | generator.print(entry.getKey().toString()); |
---|
380 | if (singleLineMode) { |
---|
381 | generator.print(" { "); |
---|
382 | } else { |
---|
383 | generator.print(" {\n"); |
---|
384 | generator.indent(); |
---|
385 | } |
---|
386 | printUnknownFields(value, generator); |
---|
387 | if (singleLineMode) { |
---|
388 | generator.print("} "); |
---|
389 | } else { |
---|
390 | generator.outdent(); |
---|
391 | generator.print("}\n"); |
---|
392 | } |
---|
393 | } |
---|
394 | } |
---|
395 | } |
---|
396 | |
---|
397 | private void printUnknownField(final int number, |
---|
398 | final int wireType, |
---|
399 | final List<?> values, |
---|
400 | final TextGenerator generator) |
---|
401 | throws IOException { |
---|
402 | for (final Object value : values) { |
---|
403 | generator.print(String.valueOf(number)); |
---|
404 | generator.print(": "); |
---|
405 | printUnknownFieldValue(wireType, value, generator); |
---|
406 | generator.print(singleLineMode ? " " : "\n"); |
---|
407 | } |
---|
408 | } |
---|
409 | } |
---|
410 | |
---|
411 | /** Convert an unsigned 32-bit integer to a string. */ |
---|
412 | private static String unsignedToString(final int value) { |
---|
413 | if (value >= 0) { |
---|
414 | return Integer.toString(value); |
---|
415 | } else { |
---|
416 | return Long.toString(((long) value) & 0x00000000FFFFFFFFL); |
---|
417 | } |
---|
418 | } |
---|
419 | |
---|
420 | /** Convert an unsigned 64-bit integer to a string. */ |
---|
421 | private static String unsignedToString(final long value) { |
---|
422 | if (value >= 0) { |
---|
423 | return Long.toString(value); |
---|
424 | } else { |
---|
425 | // Pull off the most-significant bit so that BigInteger doesn't think |
---|
426 | // the number is negative, then set it again using setBit(). |
---|
427 | return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL) |
---|
428 | .setBit(63).toString(); |
---|
429 | } |
---|
430 | } |
---|
431 | |
---|
432 | /** |
---|
433 | * An inner class for writing text to the output stream. |
---|
434 | */ |
---|
435 | private static final class TextGenerator { |
---|
436 | private final Appendable output; |
---|
437 | private final StringBuilder indent = new StringBuilder(); |
---|
438 | private boolean atStartOfLine = true; |
---|
439 | |
---|
440 | private TextGenerator(final Appendable output) { |
---|
441 | this.output = output; |
---|
442 | } |
---|
443 | |
---|
444 | /** |
---|
445 | * Indent text by two spaces. After calling Indent(), two spaces will be |
---|
446 | * inserted at the beginning of each line of text. Indent() may be called |
---|
447 | * multiple times to produce deeper indents. |
---|
448 | */ |
---|
449 | public void indent() { |
---|
450 | indent.append(" "); |
---|
451 | } |
---|
452 | |
---|
453 | /** |
---|
454 | * Reduces the current indent level by two spaces, or crashes if the indent |
---|
455 | * level is zero. |
---|
456 | */ |
---|
457 | public void outdent() { |
---|
458 | final int length = indent.length(); |
---|
459 | if (length == 0) { |
---|
460 | throw new IllegalArgumentException( |
---|
461 | " Outdent() without matching Indent()."); |
---|
462 | } |
---|
463 | indent.delete(length - 2, length); |
---|
464 | } |
---|
465 | |
---|
466 | /** |
---|
467 | * Print text to the output stream. |
---|
468 | */ |
---|
469 | public void print(final CharSequence text) throws IOException { |
---|
470 | final int size = text.length(); |
---|
471 | int pos = 0; |
---|
472 | |
---|
473 | for (int i = 0; i < size; i++) { |
---|
474 | if (text.charAt(i) == '\n') { |
---|
475 | write(text.subSequence(pos, size), i - pos + 1); |
---|
476 | pos = i + 1; |
---|
477 | atStartOfLine = true; |
---|
478 | } |
---|
479 | } |
---|
480 | write(text.subSequence(pos, size), size - pos); |
---|
481 | } |
---|
482 | |
---|
483 | private void write(final CharSequence data, final int size) |
---|
484 | throws IOException { |
---|
485 | if (size == 0) { |
---|
486 | return; |
---|
487 | } |
---|
488 | if (atStartOfLine) { |
---|
489 | atStartOfLine = false; |
---|
490 | output.append(indent); |
---|
491 | } |
---|
492 | output.append(data); |
---|
493 | } |
---|
494 | } |
---|
495 | |
---|
496 | // ================================================================= |
---|
497 | // Parsing |
---|
498 | |
---|
499 | /** |
---|
500 | * Represents a stream of tokens parsed from a {@code String}. |
---|
501 | * |
---|
502 | * <p>The Java standard library provides many classes that you might think |
---|
503 | * would be useful for implementing this, but aren't. For example: |
---|
504 | * |
---|
505 | * <ul> |
---|
506 | * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, |
---|
507 | * at least, something that would get us close to what we want -- except |
---|
508 | * for one fatal flaw: It automatically un-escapes strings using Java |
---|
509 | * escape sequences, which do not include all the escape sequences we |
---|
510 | * need to support (e.g. '\x'). |
---|
511 | * <li>{@code java.util.Scanner}: This seems like a great way at least to |
---|
512 | * parse regular expressions out of a stream (so we wouldn't have to load |
---|
513 | * the entire input into a single string before parsing). Sadly, |
---|
514 | * {@code Scanner} requires that tokens be delimited with some delimiter. |
---|
515 | * Thus, although the text "foo:" should parse to two tokens ("foo" and |
---|
516 | * ":"), {@code Scanner} would recognize it only as a single token. |
---|
517 | * Furthermore, {@code Scanner} provides no way to inspect the contents |
---|
518 | * of delimiters, making it impossible to keep track of line and column |
---|
519 | * numbers. |
---|
520 | * </ul> |
---|
521 | * |
---|
522 | * <p>Luckily, Java's regular expression support does manage to be useful to |
---|
523 | * us. (Barely: We need {@code Matcher.usePattern()}, which is new in |
---|
524 | * Java 1.5.) So, we can use that, at least. Unfortunately, this implies |
---|
525 | * that we need to have the entire input in one contiguous string. |
---|
526 | */ |
---|
527 | private static final class Tokenizer { |
---|
528 | private final CharSequence text; |
---|
529 | private final Matcher matcher; |
---|
530 | private String currentToken; |
---|
531 | |
---|
532 | // The character index within this.text at which the current token begins. |
---|
533 | private int pos = 0; |
---|
534 | |
---|
535 | // The line and column numbers of the current token. |
---|
536 | private int line = 0; |
---|
537 | private int column = 0; |
---|
538 | |
---|
539 | // The line and column numbers of the previous token (allows throwing |
---|
540 | // errors *after* consuming). |
---|
541 | private int previousLine = 0; |
---|
542 | private int previousColumn = 0; |
---|
543 | |
---|
544 | // We use possesive quantifiers (*+ and ++) because otherwise the Java |
---|
545 | // regex matcher has stack overflows on large inputs. |
---|
546 | private static final Pattern WHITESPACE = |
---|
547 | Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); |
---|
548 | private static final Pattern TOKEN = Pattern.compile( |
---|
549 | "[a-zA-Z_][0-9a-zA-Z_+-]*+|" + // an identifier |
---|
550 | "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" + // a number |
---|
551 | "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" + // a double-quoted string |
---|
552 | "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string |
---|
553 | Pattern.MULTILINE); |
---|
554 | |
---|
555 | private static final Pattern DOUBLE_INFINITY = Pattern.compile( |
---|
556 | "-?inf(inity)?", |
---|
557 | Pattern.CASE_INSENSITIVE); |
---|
558 | private static final Pattern FLOAT_INFINITY = Pattern.compile( |
---|
559 | "-?inf(inity)?f?", |
---|
560 | Pattern.CASE_INSENSITIVE); |
---|
561 | private static final Pattern FLOAT_NAN = Pattern.compile( |
---|
562 | "nanf?", |
---|
563 | Pattern.CASE_INSENSITIVE); |
---|
564 | |
---|
565 | /** Construct a tokenizer that parses tokens from the given text. */ |
---|
566 | private Tokenizer(final CharSequence text) { |
---|
567 | this.text = text; |
---|
568 | this.matcher = WHITESPACE.matcher(text); |
---|
569 | skipWhitespace(); |
---|
570 | nextToken(); |
---|
571 | } |
---|
572 | |
---|
573 | /** Are we at the end of the input? */ |
---|
574 | public boolean atEnd() { |
---|
575 | return currentToken.length() == 0; |
---|
576 | } |
---|
577 | |
---|
578 | /** Advance to the next token. */ |
---|
579 | public void nextToken() { |
---|
580 | previousLine = line; |
---|
581 | previousColumn = column; |
---|
582 | |
---|
583 | // Advance the line counter to the current position. |
---|
584 | while (pos < matcher.regionStart()) { |
---|
585 | if (text.charAt(pos) == '\n') { |
---|
586 | ++line; |
---|
587 | column = 0; |
---|
588 | } else { |
---|
589 | ++column; |
---|
590 | } |
---|
591 | ++pos; |
---|
592 | } |
---|
593 | |
---|
594 | // Match the next token. |
---|
595 | if (matcher.regionStart() == matcher.regionEnd()) { |
---|
596 | // EOF |
---|
597 | currentToken = ""; |
---|
598 | } else { |
---|
599 | matcher.usePattern(TOKEN); |
---|
600 | if (matcher.lookingAt()) { |
---|
601 | currentToken = matcher.group(); |
---|
602 | matcher.region(matcher.end(), matcher.regionEnd()); |
---|
603 | } else { |
---|
604 | // Take one character. |
---|
605 | currentToken = String.valueOf(text.charAt(pos)); |
---|
606 | matcher.region(pos + 1, matcher.regionEnd()); |
---|
607 | } |
---|
608 | |
---|
609 | skipWhitespace(); |
---|
610 | } |
---|
611 | } |
---|
612 | |
---|
613 | /** |
---|
614 | * Skip over any whitespace so that the matcher region starts at the next |
---|
615 | * token. |
---|
616 | */ |
---|
617 | private void skipWhitespace() { |
---|
618 | matcher.usePattern(WHITESPACE); |
---|
619 | if (matcher.lookingAt()) { |
---|
620 | matcher.region(matcher.end(), matcher.regionEnd()); |
---|
621 | } |
---|
622 | } |
---|
623 | |
---|
624 | /** |
---|
625 | * If the next token exactly matches {@code token}, consume it and return |
---|
626 | * {@code true}. Otherwise, return {@code false} without doing anything. |
---|
627 | */ |
---|
628 | public boolean tryConsume(final String token) { |
---|
629 | if (currentToken.equals(token)) { |
---|
630 | nextToken(); |
---|
631 | return true; |
---|
632 | } else { |
---|
633 | return false; |
---|
634 | } |
---|
635 | } |
---|
636 | |
---|
637 | /** |
---|
638 | * If the next token exactly matches {@code token}, consume it. Otherwise, |
---|
639 | * throw a {@link ParseException}. |
---|
640 | */ |
---|
641 | public void consume(final String token) throws ParseException { |
---|
642 | if (!tryConsume(token)) { |
---|
643 | throw parseException("Expected \"" + token + "\"."); |
---|
644 | } |
---|
645 | } |
---|
646 | |
---|
647 | /** |
---|
648 | * Returns {@code true} if the next token is an integer, but does |
---|
649 | * not consume it. |
---|
650 | */ |
---|
651 | public boolean lookingAtInteger() { |
---|
652 | if (currentToken.length() == 0) { |
---|
653 | return false; |
---|
654 | } |
---|
655 | |
---|
656 | final char c = currentToken.charAt(0); |
---|
657 | return ('0' <= c && c <= '9') || |
---|
658 | c == '-' || c == '+'; |
---|
659 | } |
---|
660 | |
---|
661 | /** |
---|
662 | * If the next token is an identifier, consume it and return its value. |
---|
663 | * Otherwise, throw a {@link ParseException}. |
---|
664 | */ |
---|
665 | public String consumeIdentifier() throws ParseException { |
---|
666 | for (int i = 0; i < currentToken.length(); i++) { |
---|
667 | final char c = currentToken.charAt(i); |
---|
668 | if (('a' <= c && c <= 'z') || |
---|
669 | ('A' <= c && c <= 'Z') || |
---|
670 | ('0' <= c && c <= '9') || |
---|
671 | (c == '_') || (c == '.')) { |
---|
672 | // OK |
---|
673 | } else { |
---|
674 | throw parseException("Expected identifier."); |
---|
675 | } |
---|
676 | } |
---|
677 | |
---|
678 | final String result = currentToken; |
---|
679 | nextToken(); |
---|
680 | return result; |
---|
681 | } |
---|
682 | |
---|
683 | /** |
---|
684 | * If the next token is a 32-bit signed integer, consume it and return its |
---|
685 | * value. Otherwise, throw a {@link ParseException}. |
---|
686 | */ |
---|
687 | public int consumeInt32() throws ParseException { |
---|
688 | try { |
---|
689 | final int result = parseInt32(currentToken); |
---|
690 | nextToken(); |
---|
691 | return result; |
---|
692 | } catch (NumberFormatException e) { |
---|
693 | throw integerParseException(e); |
---|
694 | } |
---|
695 | } |
---|
696 | |
---|
697 | /** |
---|
698 | * If the next token is a 32-bit unsigned integer, consume it and return its |
---|
699 | * value. Otherwise, throw a {@link ParseException}. |
---|
700 | */ |
---|
701 | public int consumeUInt32() throws ParseException { |
---|
702 | try { |
---|
703 | final int result = parseUInt32(currentToken); |
---|
704 | nextToken(); |
---|
705 | return result; |
---|
706 | } catch (NumberFormatException e) { |
---|
707 | throw integerParseException(e); |
---|
708 | } |
---|
709 | } |
---|
710 | |
---|
711 | /** |
---|
712 | * If the next token is a 64-bit signed integer, consume it and return its |
---|
713 | * value. Otherwise, throw a {@link ParseException}. |
---|
714 | */ |
---|
715 | public long consumeInt64() throws ParseException { |
---|
716 | try { |
---|
717 | final long result = parseInt64(currentToken); |
---|
718 | nextToken(); |
---|
719 | return result; |
---|
720 | } catch (NumberFormatException e) { |
---|
721 | throw integerParseException(e); |
---|
722 | } |
---|
723 | } |
---|
724 | |
---|
725 | /** |
---|
726 | * If the next token is a 64-bit unsigned integer, consume it and return its |
---|
727 | * value. Otherwise, throw a {@link ParseException}. |
---|
728 | */ |
---|
729 | public long consumeUInt64() throws ParseException { |
---|
730 | try { |
---|
731 | final long result = parseUInt64(currentToken); |
---|
732 | nextToken(); |
---|
733 | return result; |
---|
734 | } catch (NumberFormatException e) { |
---|
735 | throw integerParseException(e); |
---|
736 | } |
---|
737 | } |
---|
738 | |
---|
739 | /** |
---|
740 | * If the next token is a double, consume it and return its value. |
---|
741 | * Otherwise, throw a {@link ParseException}. |
---|
742 | */ |
---|
743 | public double consumeDouble() throws ParseException { |
---|
744 | // We need to parse infinity and nan separately because |
---|
745 | // Double.parseDouble() does not accept "inf", "infinity", or "nan". |
---|
746 | if (DOUBLE_INFINITY.matcher(currentToken).matches()) { |
---|
747 | final boolean negative = currentToken.startsWith("-"); |
---|
748 | nextToken(); |
---|
749 | return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; |
---|
750 | } |
---|
751 | if (currentToken.equalsIgnoreCase("nan")) { |
---|
752 | nextToken(); |
---|
753 | return Double.NaN; |
---|
754 | } |
---|
755 | try { |
---|
756 | final double result = Double.parseDouble(currentToken); |
---|
757 | nextToken(); |
---|
758 | return result; |
---|
759 | } catch (NumberFormatException e) { |
---|
760 | throw floatParseException(e); |
---|
761 | } |
---|
762 | } |
---|
763 | |
---|
764 | /** |
---|
765 | * If the next token is a float, consume it and return its value. |
---|
766 | * Otherwise, throw a {@link ParseException}. |
---|
767 | */ |
---|
768 | public float consumeFloat() throws ParseException { |
---|
769 | // We need to parse infinity and nan separately because |
---|
770 | // Float.parseFloat() does not accept "inf", "infinity", or "nan". |
---|
771 | if (FLOAT_INFINITY.matcher(currentToken).matches()) { |
---|
772 | final boolean negative = currentToken.startsWith("-"); |
---|
773 | nextToken(); |
---|
774 | return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; |
---|
775 | } |
---|
776 | if (FLOAT_NAN.matcher(currentToken).matches()) { |
---|
777 | nextToken(); |
---|
778 | return Float.NaN; |
---|
779 | } |
---|
780 | try { |
---|
781 | final float result = Float.parseFloat(currentToken); |
---|
782 | nextToken(); |
---|
783 | return result; |
---|
784 | } catch (NumberFormatException e) { |
---|
785 | throw floatParseException(e); |
---|
786 | } |
---|
787 | } |
---|
788 | |
---|
789 | /** |
---|
790 | * If the next token is a boolean, consume it and return its value. |
---|
791 | * Otherwise, throw a {@link ParseException}. |
---|
792 | */ |
---|
793 | public boolean consumeBoolean() throws ParseException { |
---|
794 | if (currentToken.equals("true") || |
---|
795 | currentToken.equals("t") || |
---|
796 | currentToken.equals("1")) { |
---|
797 | nextToken(); |
---|
798 | return true; |
---|
799 | } else if (currentToken.equals("false") || |
---|
800 | currentToken.equals("f") || |
---|
801 | currentToken.equals("0")) { |
---|
802 | nextToken(); |
---|
803 | return false; |
---|
804 | } else { |
---|
805 | throw parseException("Expected \"true\" or \"false\"."); |
---|
806 | } |
---|
807 | } |
---|
808 | |
---|
809 | /** |
---|
810 | * If the next token is a string, consume it and return its (unescaped) |
---|
811 | * value. Otherwise, throw a {@link ParseException}. |
---|
812 | */ |
---|
813 | public String consumeString() throws ParseException { |
---|
814 | return consumeByteString().toStringUtf8(); |
---|
815 | } |
---|
816 | |
---|
817 | /** |
---|
818 | * If the next token is a string, consume it, unescape it as a |
---|
819 | * {@link ByteString}, and return it. Otherwise, throw a |
---|
820 | * {@link ParseException}. |
---|
821 | */ |
---|
822 | public ByteString consumeByteString() throws ParseException { |
---|
823 | List<ByteString> list = new ArrayList<ByteString>(); |
---|
824 | consumeByteString(list); |
---|
825 | while (currentToken.startsWith("'") || currentToken.startsWith("\"")) { |
---|
826 | consumeByteString(list); |
---|
827 | } |
---|
828 | return ByteString.copyFrom(list); |
---|
829 | } |
---|
830 | |
---|
831 | /** |
---|
832 | * Like {@link #consumeByteString()} but adds each token of the string to |
---|
833 | * the given list. String literals (whether bytes or text) may come in |
---|
834 | * multiple adjacent tokens which are automatically concatenated, like in |
---|
835 | * C or Python. |
---|
836 | */ |
---|
837 | private void consumeByteString(List<ByteString> list) throws ParseException { |
---|
838 | final char quote = currentToken.length() > 0 ? currentToken.charAt(0) |
---|
839 | : '\0'; |
---|
840 | if (quote != '\"' && quote != '\'') { |
---|
841 | throw parseException("Expected string."); |
---|
842 | } |
---|
843 | |
---|
844 | if (currentToken.length() < 2 || |
---|
845 | currentToken.charAt(currentToken.length() - 1) != quote) { |
---|
846 | throw parseException("String missing ending quote."); |
---|
847 | } |
---|
848 | |
---|
849 | try { |
---|
850 | final String escaped = |
---|
851 | currentToken.substring(1, currentToken.length() - 1); |
---|
852 | final ByteString result = unescapeBytes(escaped); |
---|
853 | nextToken(); |
---|
854 | list.add(result); |
---|
855 | } catch (InvalidEscapeSequenceException e) { |
---|
856 | throw parseException(e.getMessage()); |
---|
857 | } |
---|
858 | } |
---|
859 | |
---|
860 | /** |
---|
861 | * Returns a {@link ParseException} with the current line and column |
---|
862 | * numbers in the description, suitable for throwing. |
---|
863 | */ |
---|
864 | public ParseException parseException(final String description) { |
---|
865 | // Note: People generally prefer one-based line and column numbers. |
---|
866 | return new ParseException( |
---|
867 | (line + 1) + ":" + (column + 1) + ": " + description); |
---|
868 | } |
---|
869 | |
---|
870 | /** |
---|
871 | * Returns a {@link ParseException} with the line and column numbers of |
---|
872 | * the previous token in the description, suitable for throwing. |
---|
873 | */ |
---|
874 | public ParseException parseExceptionPreviousToken( |
---|
875 | final String description) { |
---|
876 | // Note: People generally prefer one-based line and column numbers. |
---|
877 | return new ParseException( |
---|
878 | (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description); |
---|
879 | } |
---|
880 | |
---|
881 | /** |
---|
882 | * Constructs an appropriate {@link ParseException} for the given |
---|
883 | * {@code NumberFormatException} when trying to parse an integer. |
---|
884 | */ |
---|
885 | private ParseException integerParseException( |
---|
886 | final NumberFormatException e) { |
---|
887 | return parseException("Couldn't parse integer: " + e.getMessage()); |
---|
888 | } |
---|
889 | |
---|
890 | /** |
---|
891 | * Constructs an appropriate {@link ParseException} for the given |
---|
892 | * {@code NumberFormatException} when trying to parse a float or double. |
---|
893 | */ |
---|
894 | private ParseException floatParseException(final NumberFormatException e) { |
---|
895 | return parseException("Couldn't parse number: " + e.getMessage()); |
---|
896 | } |
---|
897 | } |
---|
898 | |
---|
899 | /** Thrown when parsing an invalid text format message. */ |
---|
900 | public static class ParseException extends IOException { |
---|
901 | private static final long serialVersionUID = 3196188060225107702L; |
---|
902 | |
---|
903 | public ParseException(final String message) { |
---|
904 | super(message); |
---|
905 | } |
---|
906 | } |
---|
907 | |
---|
908 | /** |
---|
909 | * Parse a text-format message from {@code input} and merge the contents |
---|
910 | * into {@code builder}. |
---|
911 | */ |
---|
912 | public static void merge(final Readable input, |
---|
913 | final Message.Builder builder) |
---|
914 | throws IOException { |
---|
915 | merge(input, ExtensionRegistry.getEmptyRegistry(), builder); |
---|
916 | } |
---|
917 | |
---|
918 | /** |
---|
919 | * Parse a text-format message from {@code input} and merge the contents |
---|
920 | * into {@code builder}. |
---|
921 | */ |
---|
922 | public static void merge(final CharSequence input, |
---|
923 | final Message.Builder builder) |
---|
924 | throws ParseException { |
---|
925 | merge(input, ExtensionRegistry.getEmptyRegistry(), builder); |
---|
926 | } |
---|
927 | |
---|
928 | /** |
---|
929 | * Parse a text-format message from {@code input} and merge the contents |
---|
930 | * into {@code builder}. Extensions will be recognized if they are |
---|
931 | * registered in {@code extensionRegistry}. |
---|
932 | */ |
---|
933 | public static void merge(final Readable input, |
---|
934 | final ExtensionRegistry extensionRegistry, |
---|
935 | final Message.Builder builder) |
---|
936 | throws IOException { |
---|
937 | // Read the entire input to a String then parse that. |
---|
938 | |
---|
939 | // If StreamTokenizer were not quite so crippled, or if there were a kind |
---|
940 | // of Reader that could read in chunks that match some particular regex, |
---|
941 | // or if we wanted to write a custom Reader to tokenize our stream, then |
---|
942 | // we would not have to read to one big String. Alas, none of these is |
---|
943 | // the case. Oh well. |
---|
944 | |
---|
945 | merge(toStringBuilder(input), extensionRegistry, builder); |
---|
946 | } |
---|
947 | |
---|
948 | private static final int BUFFER_SIZE = 4096; |
---|
949 | |
---|
950 | // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) |
---|
951 | // overhead is worthwhile |
---|
952 | private static StringBuilder toStringBuilder(final Readable input) |
---|
953 | throws IOException { |
---|
954 | final StringBuilder text = new StringBuilder(); |
---|
955 | final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); |
---|
956 | while (true) { |
---|
957 | final int n = input.read(buffer); |
---|
958 | if (n == -1) { |
---|
959 | break; |
---|
960 | } |
---|
961 | buffer.flip(); |
---|
962 | text.append(buffer, 0, n); |
---|
963 | } |
---|
964 | return text; |
---|
965 | } |
---|
966 | |
---|
967 | /** |
---|
968 | * Parse a text-format message from {@code input} and merge the contents |
---|
969 | * into {@code builder}. Extensions will be recognized if they are |
---|
970 | * registered in {@code extensionRegistry}. |
---|
971 | */ |
---|
972 | public static void merge(final CharSequence input, |
---|
973 | final ExtensionRegistry extensionRegistry, |
---|
974 | final Message.Builder builder) |
---|
975 | throws ParseException { |
---|
976 | final Tokenizer tokenizer = new Tokenizer(input); |
---|
977 | |
---|
978 | while (!tokenizer.atEnd()) { |
---|
979 | mergeField(tokenizer, extensionRegistry, builder); |
---|
980 | } |
---|
981 | } |
---|
982 | |
---|
983 | /** |
---|
984 | * Parse a single field from {@code tokenizer} and merge it into |
---|
985 | * {@code builder}. |
---|
986 | */ |
---|
987 | private static void mergeField(final Tokenizer tokenizer, |
---|
988 | final ExtensionRegistry extensionRegistry, |
---|
989 | final Message.Builder builder) |
---|
990 | throws ParseException { |
---|
991 | FieldDescriptor field; |
---|
992 | final Descriptor type = builder.getDescriptorForType(); |
---|
993 | ExtensionRegistry.ExtensionInfo extension = null; |
---|
994 | |
---|
995 | if (tokenizer.tryConsume("[")) { |
---|
996 | // An extension. |
---|
997 | final StringBuilder name = |
---|
998 | new StringBuilder(tokenizer.consumeIdentifier()); |
---|
999 | while (tokenizer.tryConsume(".")) { |
---|
1000 | name.append('.'); |
---|
1001 | name.append(tokenizer.consumeIdentifier()); |
---|
1002 | } |
---|
1003 | |
---|
1004 | extension = extensionRegistry.findExtensionByName(name.toString()); |
---|
1005 | |
---|
1006 | if (extension == null) { |
---|
1007 | throw tokenizer.parseExceptionPreviousToken( |
---|
1008 | "Extension \"" + name + "\" not found in the ExtensionRegistry."); |
---|
1009 | } else if (extension.descriptor.getContainingType() != type) { |
---|
1010 | throw tokenizer.parseExceptionPreviousToken( |
---|
1011 | "Extension \"" + name + "\" does not extend message type \"" + |
---|
1012 | type.getFullName() + "\"."); |
---|
1013 | } |
---|
1014 | |
---|
1015 | tokenizer.consume("]"); |
---|
1016 | |
---|
1017 | field = extension.descriptor; |
---|
1018 | } else { |
---|
1019 | final String name = tokenizer.consumeIdentifier(); |
---|
1020 | field = type.findFieldByName(name); |
---|
1021 | |
---|
1022 | // Group names are expected to be capitalized as they appear in the |
---|
1023 | // .proto file, which actually matches their type names, not their field |
---|
1024 | // names. |
---|
1025 | if (field == null) { |
---|
1026 | // Explicitly specify US locale so that this code does not break when |
---|
1027 | // executing in Turkey. |
---|
1028 | final String lowerName = name.toLowerCase(Locale.US); |
---|
1029 | field = type.findFieldByName(lowerName); |
---|
1030 | // If the case-insensitive match worked but the field is NOT a group, |
---|
1031 | if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { |
---|
1032 | field = null; |
---|
1033 | } |
---|
1034 | } |
---|
1035 | // Again, special-case group names as described above. |
---|
1036 | if (field != null && field.getType() == FieldDescriptor.Type.GROUP && |
---|
1037 | !field.getMessageType().getName().equals(name)) { |
---|
1038 | field = null; |
---|
1039 | } |
---|
1040 | |
---|
1041 | if (field == null) { |
---|
1042 | throw tokenizer.parseExceptionPreviousToken( |
---|
1043 | "Message type \"" + type.getFullName() + |
---|
1044 | "\" has no field named \"" + name + "\"."); |
---|
1045 | } |
---|
1046 | } |
---|
1047 | |
---|
1048 | Object value = null; |
---|
1049 | |
---|
1050 | if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { |
---|
1051 | tokenizer.tryConsume(":"); // optional |
---|
1052 | |
---|
1053 | final String endToken; |
---|
1054 | if (tokenizer.tryConsume("<")) { |
---|
1055 | endToken = ">"; |
---|
1056 | } else { |
---|
1057 | tokenizer.consume("{"); |
---|
1058 | endToken = "}"; |
---|
1059 | } |
---|
1060 | |
---|
1061 | final Message.Builder subBuilder; |
---|
1062 | if (extension == null) { |
---|
1063 | subBuilder = builder.newBuilderForField(field); |
---|
1064 | } else { |
---|
1065 | subBuilder = extension.defaultInstance.newBuilderForType(); |
---|
1066 | } |
---|
1067 | |
---|
1068 | while (!tokenizer.tryConsume(endToken)) { |
---|
1069 | if (tokenizer.atEnd()) { |
---|
1070 | throw tokenizer.parseException( |
---|
1071 | "Expected \"" + endToken + "\"."); |
---|
1072 | } |
---|
1073 | mergeField(tokenizer, extensionRegistry, subBuilder); |
---|
1074 | } |
---|
1075 | |
---|
1076 | value = subBuilder.build(); |
---|
1077 | |
---|
1078 | } else { |
---|
1079 | tokenizer.consume(":"); |
---|
1080 | |
---|
1081 | switch (field.getType()) { |
---|
1082 | case INT32: |
---|
1083 | case SINT32: |
---|
1084 | case SFIXED32: |
---|
1085 | value = tokenizer.consumeInt32(); |
---|
1086 | break; |
---|
1087 | |
---|
1088 | case INT64: |
---|
1089 | case SINT64: |
---|
1090 | case SFIXED64: |
---|
1091 | value = tokenizer.consumeInt64(); |
---|
1092 | break; |
---|
1093 | |
---|
1094 | case UINT32: |
---|
1095 | case FIXED32: |
---|
1096 | value = tokenizer.consumeUInt32(); |
---|
1097 | break; |
---|
1098 | |
---|
1099 | case UINT64: |
---|
1100 | case FIXED64: |
---|
1101 | value = tokenizer.consumeUInt64(); |
---|
1102 | break; |
---|
1103 | |
---|
1104 | case FLOAT: |
---|
1105 | value = tokenizer.consumeFloat(); |
---|
1106 | break; |
---|
1107 | |
---|
1108 | case DOUBLE: |
---|
1109 | value = tokenizer.consumeDouble(); |
---|
1110 | break; |
---|
1111 | |
---|
1112 | case BOOL: |
---|
1113 | value = tokenizer.consumeBoolean(); |
---|
1114 | break; |
---|
1115 | |
---|
1116 | case STRING: |
---|
1117 | value = tokenizer.consumeString(); |
---|
1118 | break; |
---|
1119 | |
---|
1120 | case BYTES: |
---|
1121 | value = tokenizer.consumeByteString(); |
---|
1122 | break; |
---|
1123 | |
---|
1124 | case ENUM: |
---|
1125 | final EnumDescriptor enumType = field.getEnumType(); |
---|
1126 | |
---|
1127 | if (tokenizer.lookingAtInteger()) { |
---|
1128 | final int number = tokenizer.consumeInt32(); |
---|
1129 | value = enumType.findValueByNumber(number); |
---|
1130 | if (value == null) { |
---|
1131 | throw tokenizer.parseExceptionPreviousToken( |
---|
1132 | "Enum type \"" + enumType.getFullName() + |
---|
1133 | "\" has no value with number " + number + '.'); |
---|
1134 | } |
---|
1135 | } else { |
---|
1136 | final String id = tokenizer.consumeIdentifier(); |
---|
1137 | value = enumType.findValueByName(id); |
---|
1138 | if (value == null) { |
---|
1139 | throw tokenizer.parseExceptionPreviousToken( |
---|
1140 | "Enum type \"" + enumType.getFullName() + |
---|
1141 | "\" has no value named \"" + id + "\"."); |
---|
1142 | } |
---|
1143 | } |
---|
1144 | |
---|
1145 | break; |
---|
1146 | |
---|
1147 | case MESSAGE: |
---|
1148 | case GROUP: |
---|
1149 | throw new RuntimeException("Can't get here."); |
---|
1150 | } |
---|
1151 | } |
---|
1152 | |
---|
1153 | if (field.isRepeated()) { |
---|
1154 | builder.addRepeatedField(field, value); |
---|
1155 | } else { |
---|
1156 | builder.setField(field, value); |
---|
1157 | } |
---|
1158 | } |
---|
1159 | |
---|
1160 | // ================================================================= |
---|
1161 | // Utility functions |
---|
1162 | // |
---|
1163 | // Some of these methods are package-private because Descriptors.java uses |
---|
1164 | // them. |
---|
1165 | |
---|
1166 | /** |
---|
1167 | * Escapes bytes in the format used in protocol buffer text format, which |
---|
1168 | * is the same as the format used for C string literals. All bytes |
---|
1169 | * that are not printable 7-bit ASCII characters are escaped, as well as |
---|
1170 | * backslash, single-quote, and double-quote characters. Characters for |
---|
1171 | * which no defined short-hand escape sequence is defined will be escaped |
---|
1172 | * using 3-digit octal sequences. |
---|
1173 | */ |
---|
1174 | static String escapeBytes(final ByteString input) { |
---|
1175 | final StringBuilder builder = new StringBuilder(input.size()); |
---|
1176 | for (int i = 0; i < input.size(); i++) { |
---|
1177 | final byte b = input.byteAt(i); |
---|
1178 | switch (b) { |
---|
1179 | // Java does not recognize \a or \v, apparently. |
---|
1180 | case 0x07: builder.append("\\a" ); break; |
---|
1181 | case '\b': builder.append("\\b" ); break; |
---|
1182 | case '\f': builder.append("\\f" ); break; |
---|
1183 | case '\n': builder.append("\\n" ); break; |
---|
1184 | case '\r': builder.append("\\r" ); break; |
---|
1185 | case '\t': builder.append("\\t" ); break; |
---|
1186 | case 0x0b: builder.append("\\v" ); break; |
---|
1187 | case '\\': builder.append("\\\\"); break; |
---|
1188 | case '\'': builder.append("\\\'"); break; |
---|
1189 | case '"' : builder.append("\\\""); break; |
---|
1190 | default: |
---|
1191 | // Note: Bytes with the high-order bit set should be escaped. Since |
---|
1192 | // bytes are signed, such bytes will compare less than 0x20, hence |
---|
1193 | // the following line is correct. |
---|
1194 | if (b >= 0x20) { |
---|
1195 | builder.append((char) b); |
---|
1196 | } else { |
---|
1197 | builder.append('\\'); |
---|
1198 | builder.append((char) ('0' + ((b >>> 6) & 3))); |
---|
1199 | builder.append((char) ('0' + ((b >>> 3) & 7))); |
---|
1200 | builder.append((char) ('0' + (b & 7))); |
---|
1201 | } |
---|
1202 | break; |
---|
1203 | } |
---|
1204 | } |
---|
1205 | return builder.toString(); |
---|
1206 | } |
---|
1207 | |
---|
1208 | /** |
---|
1209 | * Un-escape a byte sequence as escaped using |
---|
1210 | * {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with |
---|
1211 | * "\x") are also recognized. |
---|
1212 | */ |
---|
1213 | static ByteString unescapeBytes(final CharSequence charString) |
---|
1214 | throws InvalidEscapeSequenceException { |
---|
1215 | // First convert the Java characater sequence to UTF-8 bytes. |
---|
1216 | ByteString input = ByteString.copyFromUtf8(charString.toString()); |
---|
1217 | // Then unescape certain byte sequences introduced by ASCII '\\'. The valid |
---|
1218 | // escapes can all be expressed with ASCII characters, so it is safe to |
---|
1219 | // operate on bytes here. |
---|
1220 | // |
---|
1221 | // Unescaping the input byte array will result in a byte sequence that's no |
---|
1222 | // longer than the input. That's because each escape sequence is between |
---|
1223 | // two and four bytes long and stands for a single byte. |
---|
1224 | final byte[] result = new byte[input.size()]; |
---|
1225 | int pos = 0; |
---|
1226 | for (int i = 0; i < input.size(); i++) { |
---|
1227 | byte c = input.byteAt(i); |
---|
1228 | if (c == '\\') { |
---|
1229 | if (i + 1 < input.size()) { |
---|
1230 | ++i; |
---|
1231 | c = input.byteAt(i); |
---|
1232 | if (isOctal(c)) { |
---|
1233 | // Octal escape. |
---|
1234 | int code = digitValue(c); |
---|
1235 | if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { |
---|
1236 | ++i; |
---|
1237 | code = code * 8 + digitValue(input.byteAt(i)); |
---|
1238 | } |
---|
1239 | if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { |
---|
1240 | ++i; |
---|
1241 | code = code * 8 + digitValue(input.byteAt(i)); |
---|
1242 | } |
---|
1243 | // TODO: Check that 0 <= code && code <= 0xFF. |
---|
1244 | result[pos++] = (byte)code; |
---|
1245 | } else { |
---|
1246 | switch (c) { |
---|
1247 | case 'a' : result[pos++] = 0x07; break; |
---|
1248 | case 'b' : result[pos++] = '\b'; break; |
---|
1249 | case 'f' : result[pos++] = '\f'; break; |
---|
1250 | case 'n' : result[pos++] = '\n'; break; |
---|
1251 | case 'r' : result[pos++] = '\r'; break; |
---|
1252 | case 't' : result[pos++] = '\t'; break; |
---|
1253 | case 'v' : result[pos++] = 0x0b; break; |
---|
1254 | case '\\': result[pos++] = '\\'; break; |
---|
1255 | case '\'': result[pos++] = '\''; break; |
---|
1256 | case '"' : result[pos++] = '\"'; break; |
---|
1257 | |
---|
1258 | case 'x': |
---|
1259 | // hex escape |
---|
1260 | int code = 0; |
---|
1261 | if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { |
---|
1262 | ++i; |
---|
1263 | code = digitValue(input.byteAt(i)); |
---|
1264 | } else { |
---|
1265 | throw new InvalidEscapeSequenceException( |
---|
1266 | "Invalid escape sequence: '\\x' with no digits"); |
---|
1267 | } |
---|
1268 | if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { |
---|
1269 | ++i; |
---|
1270 | code = code * 16 + digitValue(input.byteAt(i)); |
---|
1271 | } |
---|
1272 | result[pos++] = (byte)code; |
---|
1273 | break; |
---|
1274 | |
---|
1275 | default: |
---|
1276 | throw new InvalidEscapeSequenceException( |
---|
1277 | "Invalid escape sequence: '\\" + (char)c + '\''); |
---|
1278 | } |
---|
1279 | } |
---|
1280 | } else { |
---|
1281 | throw new InvalidEscapeSequenceException( |
---|
1282 | "Invalid escape sequence: '\\' at end of string."); |
---|
1283 | } |
---|
1284 | } else { |
---|
1285 | result[pos++] = c; |
---|
1286 | } |
---|
1287 | } |
---|
1288 | |
---|
1289 | return ByteString.copyFrom(result, 0, pos); |
---|
1290 | } |
---|
1291 | |
---|
1292 | /** |
---|
1293 | * Thrown by {@link TextFormat#unescapeBytes} and |
---|
1294 | * {@link TextFormat#unescapeText} when an invalid escape sequence is seen. |
---|
1295 | */ |
---|
1296 | static class InvalidEscapeSequenceException extends IOException { |
---|
1297 | private static final long serialVersionUID = -8164033650142593304L; |
---|
1298 | |
---|
1299 | InvalidEscapeSequenceException(final String description) { |
---|
1300 | super(description); |
---|
1301 | } |
---|
1302 | } |
---|
1303 | |
---|
1304 | /** |
---|
1305 | * Like {@link #escapeBytes(ByteString)}, but escapes a text string. |
---|
1306 | * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped |
---|
1307 | * individually as a 3-digit octal escape. Yes, it's weird. |
---|
1308 | */ |
---|
1309 | static String escapeText(final String input) { |
---|
1310 | return escapeBytes(ByteString.copyFromUtf8(input)); |
---|
1311 | } |
---|
1312 | |
---|
1313 | /** |
---|
1314 | * Un-escape a text string as escaped using {@link #escapeText(String)}. |
---|
1315 | * Two-digit hex escapes (starting with "\x") are also recognized. |
---|
1316 | */ |
---|
1317 | static String unescapeText(final String input) |
---|
1318 | throws InvalidEscapeSequenceException { |
---|
1319 | return unescapeBytes(input).toStringUtf8(); |
---|
1320 | } |
---|
1321 | |
---|
1322 | /** Is this an octal digit? */ |
---|
1323 | private static boolean isOctal(final byte c) { |
---|
1324 | return '0' <= c && c <= '7'; |
---|
1325 | } |
---|
1326 | |
---|
1327 | /** Is this a hex digit? */ |
---|
1328 | private static boolean isHex(final byte c) { |
---|
1329 | return ('0' <= c && c <= '9') || |
---|
1330 | ('a' <= c && c <= 'f') || |
---|
1331 | ('A' <= c && c <= 'F'); |
---|
1332 | } |
---|
1333 | |
---|
1334 | /** |
---|
1335 | * Interpret a character as a digit (in any base up to 36) and return the |
---|
1336 | * numeric value. This is like {@code Character.digit()} but we don't accept |
---|
1337 | * non-ASCII digits. |
---|
1338 | */ |
---|
1339 | private static int digitValue(final byte c) { |
---|
1340 | if ('0' <= c && c <= '9') { |
---|
1341 | return c - '0'; |
---|
1342 | } else if ('a' <= c && c <= 'z') { |
---|
1343 | return c - 'a' + 10; |
---|
1344 | } else { |
---|
1345 | return c - 'A' + 10; |
---|
1346 | } |
---|
1347 | } |
---|
1348 | |
---|
1349 | /** |
---|
1350 | * Parse a 32-bit signed integer from the text. Unlike the Java standard |
---|
1351 | * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" |
---|
1352 | * and "0" to signify hexidecimal and octal numbers, respectively. |
---|
1353 | */ |
---|
1354 | static int parseInt32(final String text) throws NumberFormatException { |
---|
1355 | return (int) parseInteger(text, true, false); |
---|
1356 | } |
---|
1357 | |
---|
1358 | /** |
---|
1359 | * Parse a 32-bit unsigned integer from the text. Unlike the Java standard |
---|
1360 | * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" |
---|
1361 | * and "0" to signify hexidecimal and octal numbers, respectively. The |
---|
1362 | * result is coerced to a (signed) {@code int} when returned since Java has |
---|
1363 | * no unsigned integer type. |
---|
1364 | */ |
---|
1365 | static int parseUInt32(final String text) throws NumberFormatException { |
---|
1366 | return (int) parseInteger(text, false, false); |
---|
1367 | } |
---|
1368 | |
---|
1369 | /** |
---|
1370 | * Parse a 64-bit signed integer from the text. Unlike the Java standard |
---|
1371 | * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" |
---|
1372 | * and "0" to signify hexidecimal and octal numbers, respectively. |
---|
1373 | */ |
---|
1374 | static long parseInt64(final String text) throws NumberFormatException { |
---|
1375 | return parseInteger(text, true, true); |
---|
1376 | } |
---|
1377 | |
---|
1378 | /** |
---|
1379 | * Parse a 64-bit unsigned integer from the text. Unlike the Java standard |
---|
1380 | * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" |
---|
1381 | * and "0" to signify hexidecimal and octal numbers, respectively. The |
---|
1382 | * result is coerced to a (signed) {@code long} when returned since Java has |
---|
1383 | * no unsigned long type. |
---|
1384 | */ |
---|
1385 | static long parseUInt64(final String text) throws NumberFormatException { |
---|
1386 | return parseInteger(text, false, true); |
---|
1387 | } |
---|
1388 | |
---|
1389 | private static long parseInteger(final String text, |
---|
1390 | final boolean isSigned, |
---|
1391 | final boolean isLong) |
---|
1392 | throws NumberFormatException { |
---|
1393 | int pos = 0; |
---|
1394 | |
---|
1395 | boolean negative = false; |
---|
1396 | if (text.startsWith("-", pos)) { |
---|
1397 | if (!isSigned) { |
---|
1398 | throw new NumberFormatException("Number must be positive: " + text); |
---|
1399 | } |
---|
1400 | ++pos; |
---|
1401 | negative = true; |
---|
1402 | } |
---|
1403 | |
---|
1404 | int radix = 10; |
---|
1405 | if (text.startsWith("0x", pos)) { |
---|
1406 | pos += 2; |
---|
1407 | radix = 16; |
---|
1408 | } else if (text.startsWith("0", pos)) { |
---|
1409 | radix = 8; |
---|
1410 | } |
---|
1411 | |
---|
1412 | final String numberText = text.substring(pos); |
---|
1413 | |
---|
1414 | long result = 0; |
---|
1415 | if (numberText.length() < 16) { |
---|
1416 | // Can safely assume no overflow. |
---|
1417 | result = Long.parseLong(numberText, radix); |
---|
1418 | if (negative) { |
---|
1419 | result = -result; |
---|
1420 | } |
---|
1421 | |
---|
1422 | // Check bounds. |
---|
1423 | // No need to check for 64-bit numbers since they'd have to be 16 chars |
---|
1424 | // or longer to overflow. |
---|
1425 | if (!isLong) { |
---|
1426 | if (isSigned) { |
---|
1427 | if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) { |
---|
1428 | throw new NumberFormatException( |
---|
1429 | "Number out of range for 32-bit signed integer: " + text); |
---|
1430 | } |
---|
1431 | } else { |
---|
1432 | if (result >= (1L << 32) || result < 0) { |
---|
1433 | throw new NumberFormatException( |
---|
1434 | "Number out of range for 32-bit unsigned integer: " + text); |
---|
1435 | } |
---|
1436 | } |
---|
1437 | } |
---|
1438 | } else { |
---|
1439 | BigInteger bigValue = new BigInteger(numberText, radix); |
---|
1440 | if (negative) { |
---|
1441 | bigValue = bigValue.negate(); |
---|
1442 | } |
---|
1443 | |
---|
1444 | // Check bounds. |
---|
1445 | if (!isLong) { |
---|
1446 | if (isSigned) { |
---|
1447 | if (bigValue.bitLength() > 31) { |
---|
1448 | throw new NumberFormatException( |
---|
1449 | "Number out of range for 32-bit signed integer: " + text); |
---|
1450 | } |
---|
1451 | } else { |
---|
1452 | if (bigValue.bitLength() > 32) { |
---|
1453 | throw new NumberFormatException( |
---|
1454 | "Number out of range for 32-bit unsigned integer: " + text); |
---|
1455 | } |
---|
1456 | } |
---|
1457 | } else { |
---|
1458 | if (isSigned) { |
---|
1459 | if (bigValue.bitLength() > 63) { |
---|
1460 | throw new NumberFormatException( |
---|
1461 | "Number out of range for 64-bit signed integer: " + text); |
---|
1462 | } |
---|
1463 | } else { |
---|
1464 | if (bigValue.bitLength() > 64) { |
---|
1465 | throw new NumberFormatException( |
---|
1466 | "Number out of range for 64-bit unsigned integer: " + text); |
---|
1467 | } |
---|
1468 | } |
---|
1469 | } |
---|
1470 | |
---|
1471 | result = bigValue.longValue(); |
---|
1472 | } |
---|
1473 | |
---|
1474 | return result; |
---|
1475 | } |
---|
1476 | } |
---|