Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.ExtLibs/HeuristicLab.ProtobufCS/0.9.1/ProtobufCS/src/ProtocolBuffers/TextFormat.cs @ 3857

Last change on this file since 3857 was 3857, checked in by abeham, 14 years ago

#866

  • Added protobuf-csharp-port project source to ExtLibs
File size: 23.8 KB
Line 
1#region Copyright notice and license
2// Protocol Buffers - Google's data interchange format
3// Copyright 2008 Google Inc.  All rights reserved.
4// http://github.com/jskeet/dotnet-protobufs/
5// Original C++/Java/Python code:
6// http://code.google.com/p/protobuf/
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12//     * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//     * Redistributions in binary form must reproduce the above
15// copyright notice, this list of conditions and the following disclaimer
16// in the documentation and/or other materials provided with the
17// distribution.
18//     * Neither the name of Google Inc. nor the names of its
19// contributors may be used to endorse or promote products derived from
20// this software without specific prior written permission.
21//
22// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33#endregion
34
35using System;
36using System.Collections.Generic;
37using System.Globalization;
38using System.IO;
39using System.Text;
40using Google.ProtocolBuffers.Descriptors;
41using System.Collections;
42
43namespace Google.ProtocolBuffers {
44  /// <summary>
45  /// Provides ASCII text formatting support for messages.
46  /// TODO(jonskeet): Parsing support.
47  /// </summary>
48  public static class TextFormat {
49
50    /// <summary>
51    /// Outputs a textual representation of the Protocol Message supplied into
52    /// the parameter output.
53    /// </summary>
54    public static void Print(IMessage message, TextWriter output) {
55      TextGenerator generator = new TextGenerator(output);
56      Print(message, generator);
57    }
58
59    /// <summary>
60    /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
61    /// </summary>
62    public static void Print(UnknownFieldSet fields, TextWriter output) {
63      TextGenerator generator = new TextGenerator(output);
64      PrintUnknownFields(fields, generator);
65    }
66
67    public static string PrintToString(IMessage message) {
68      StringWriter text = new StringWriter();
69      Print(message, text);
70      return text.ToString();
71    }
72
73    public static string PrintToString(UnknownFieldSet fields) {
74      StringWriter text = new StringWriter();
75      Print(fields, text);
76      return text.ToString();
77    }
78
79    private static void Print(IMessage message, TextGenerator generator) {
80      foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
81        PrintField(entry.Key, entry.Value, generator);
82      }
83      PrintUnknownFields(message.UnknownFields, generator);
84    }
85
86    internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
87      if (field.IsRepeated) {
88        // Repeated field.  Print each element.
89        foreach (object element in (IEnumerable) value) {
90          PrintSingleField(field, element, generator);
91        }
92      } else {
93        PrintSingleField(field, value, generator);
94      }
95    }
96
97    private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
98      if (field.IsExtension) {
99        generator.Print("[");
100        // We special-case MessageSet elements for compatibility with proto1.
101        if (field.ContainingType.Options.MessageSetWireFormat
102            && field.FieldType == FieldType.Message
103            && field.IsOptional
104            // object equality (TODO(jonskeet): Work out what this comment means!)
105            && field.ExtensionScope == field.MessageType) {
106          generator.Print(field.MessageType.FullName);
107        } else {
108          generator.Print(field.FullName);
109        }
110        generator.Print("]");
111      } else {
112        if (field.FieldType == FieldType.Group) {
113          // Groups must be serialized with their original capitalization.
114          generator.Print(field.MessageType.Name);
115        } else {
116          generator.Print(field.Name);
117        }
118      }
119
120      if (field.MappedType == MappedType.Message) {
121        generator.Print(" {\n");
122        generator.Indent();
123      } else {
124        generator.Print(": ");
125      }
126
127      PrintFieldValue(field, value, generator);
128
129      if (field.MappedType == MappedType.Message) {
130        generator.Outdent();
131        generator.Print("}");
132      }
133      generator.Print("\n");
134    }
135
136    private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
137      switch (field.FieldType) {
138        case FieldType.Int32:
139        case FieldType.Int64:
140        case FieldType.SInt32:
141        case FieldType.SInt64:
142        case FieldType.SFixed32:
143        case FieldType.SFixed64:
144        case FieldType.Float:
145        case FieldType.Double:
146        case FieldType.UInt32:
147        case FieldType.UInt64:
148        case FieldType.Fixed32:
149        case FieldType.Fixed64:
150          // The simple Object.ToString converts using the current culture.
151          // We want to always use the invariant culture so it's predictable.
152          generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
153          break;
154        case FieldType.Bool:
155          // Explicitly use the Java true/false
156          generator.Print((bool) value ? "true" : "false");
157          break;
158
159        case FieldType.String:
160          generator.Print("\"");
161          generator.Print(EscapeText((string) value));
162          generator.Print("\"");
163          break;
164
165        case FieldType.Bytes: {
166          generator.Print("\"");
167          generator.Print(EscapeBytes((ByteString) value));
168          generator.Print("\"");
169          break;
170        }
171
172        case FieldType.Enum: {
173          generator.Print(((EnumValueDescriptor) value).Name);
174          break;
175        }
176
177        case FieldType.Message:
178        case FieldType.Group:
179          Print((IMessage) value, generator);
180          break;
181      }
182    }
183
184    private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
185      foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
186        String prefix = entry.Key.ToString() + ": ";
187        UnknownField field = entry.Value;
188
189        foreach (ulong value in field.VarintList) {
190          generator.Print(prefix);
191          generator.Print(value.ToString());
192          generator.Print("\n");
193        }
194        foreach (uint value in field.Fixed32List) {
195          generator.Print(prefix);
196          generator.Print(string.Format("0x{0:x8}", value));
197          generator.Print("\n");
198        }
199        foreach (ulong value in field.Fixed64List) {
200          generator.Print(prefix);
201          generator.Print(string.Format("0x{0:x16}", value));
202          generator.Print("\n");
203        }
204        foreach (ByteString value in field.LengthDelimitedList) {
205          generator.Print(entry.Key.ToString());
206          generator.Print(": \"");
207          generator.Print(EscapeBytes(value));
208          generator.Print("\"\n");
209        }
210        foreach (UnknownFieldSet value in field.GroupList) {
211          generator.Print(entry.Key.ToString());
212          generator.Print(" {\n");
213          generator.Indent();
214          PrintUnknownFields(value, generator);
215          generator.Outdent();
216          generator.Print("}\n");
217        }
218      }
219    }
220
221    internal static ulong ParseUInt64(string text) {
222      return (ulong) ParseInteger(text, false, true);
223    }
224
225    internal static long ParseInt64(string text) {
226      return ParseInteger(text, true, true);
227    }
228
229    internal static uint ParseUInt32(string text) {
230      return (uint) ParseInteger(text, false, false);
231    }
232
233    internal static int ParseInt32(string text) {
234      return (int) ParseInteger(text, true, false);
235    }
236
237    internal static float ParseFloat(string text) {
238      switch (text) {
239        case "-inf":
240        case "-infinity":
241        case "-inff":
242        case "-infinityf":
243          return float.NegativeInfinity;
244        case "inf":
245        case "infinity":
246        case "inff":
247        case "infinityf":
248          return float.PositiveInfinity;
249        case "nan":
250        case "nanf":
251          return float.NaN;
252        default:
253          return float.Parse(text, CultureInfo.InvariantCulture);
254      }
255    }
256
257    internal static double ParseDouble(string text) {
258      switch (text) {
259        case "-inf":
260        case "-infinity":
261          return double.NegativeInfinity;
262        case "inf":
263        case "infinity":
264          return double.PositiveInfinity;
265        case "nan":
266          return double.NaN;
267        default:
268          return double.Parse(text, CultureInfo.InvariantCulture);
269      }
270    }
271   
272    /// <summary>
273    /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
274    /// Only a negative sign is permitted, and it must come before the radix indicator.
275    /// </summary>
276    private static long ParseInteger(string text, bool isSigned, bool isLong) {
277      string original = text;
278      bool negative = false;
279      if (text.StartsWith("-")) {
280        if (!isSigned) {
281          throw new FormatException("Number must be positive: " + original);
282        }
283        negative = true;
284        text = text.Substring(1);
285      }
286
287      int radix = 10;
288      if (text.StartsWith("0x")) {
289        radix = 16;
290        text = text.Substring(2);
291      } else if (text.StartsWith("0")) {
292        radix = 8;
293      }
294
295      ulong result;
296      try {
297        // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
298        // We should be able to use Convert.ToUInt64 for all cases.
299        result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
300      } catch (OverflowException) {
301        // Convert OverflowException to FormatException so there's a single exception type this method can throw.
302        string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
303        throw new FormatException("Number out of range for " + numberDescription + ": " + original);
304      }
305
306      if (negative) {
307        ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
308        if (result > max) {
309          string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
310          throw new FormatException("Number out of range for " + numberDescription + ": " + original);
311        }
312        return -((long) result);
313      } else {
314        ulong max = isSigned
315            ? (isLong ? (ulong) long.MaxValue : int.MaxValue)
316            : (isLong ? ulong.MaxValue : uint.MaxValue);
317        if (result > max) {
318          string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
319          throw new FormatException("Number out of range for " + numberDescription + ": " + original);
320        }
321        return (long) result;
322      }
323    }
324
325    /// <summary>
326    /// Tests a character to see if it's an octal digit.
327    /// </summary>
328    private static bool IsOctal(char c) {
329      return '0' <= c && c <= '7';
330    }
331
332    /// <summary>
333    /// Tests a character to see if it's a hex digit.
334    /// </summary>
335    private static bool IsHex(char c) {
336      return ('0' <= c && c <= '9') ||
337             ('a' <= c && c <= 'f') ||
338             ('A' <= c && c <= 'F');
339    }
340
341    /// <summary>
342    /// Interprets a character as a digit (in any base up to 36) and returns the
343    /// numeric value.
344    /// </summary>
345    private static int ParseDigit(char c) {
346      if ('0' <= c && c <= '9') {
347        return c - '0';
348      } else if ('a' <= c && c <= 'z') {
349        return c - 'a' + 10;
350      } else {
351        return c - 'A' + 10;
352      }
353    }
354
355    /// <summary>
356    /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
357    /// Two-digit hex escapes (starting with "\x" are also recognised.
358    /// </summary>
359    internal static string UnescapeText(string input) {
360      return UnescapeBytes(input).ToStringUtf8();
361    }
362
363    /// <summary>
364    /// Like <see cref="EscapeBytes" /> but escapes a text string.
365    /// The string is first encoded as UTF-8, then each byte escaped individually.
366    /// The returned value is guaranteed to be entirely ASCII.
367    /// </summary>
368    internal static string EscapeText(string input) {
369      return EscapeBytes(ByteString.CopyFromUtf8(input));
370    }
371
372    /// <summary>
373    /// Escapes bytes in the format used in protocol buffer text format, which
374    /// is the same as the format used for C string literals.  All bytes
375    /// that are not printable 7-bit ASCII characters are escaped, as well as
376    /// backslash, single-quote, and double-quote characters.  Characters for
377    /// which no defined short-hand escape sequence is defined will be escaped
378    /// using 3-digit octal sequences.
379    /// The returned value is guaranteed to be entirely ASCII.
380    /// </summary>
381    internal static String EscapeBytes(ByteString input) {
382      StringBuilder builder = new StringBuilder(input.Length);
383      foreach (byte b in input) {
384        switch (b) {
385          // C# does not use \a or \v
386          case 0x07: builder.Append("\\a" ); break;
387          case (byte)'\b': builder.Append("\\b" ); break;
388          case (byte)'\f': builder.Append("\\f" ); break;
389          case (byte)'\n': builder.Append("\\n" ); break;
390          case (byte)'\r': builder.Append("\\r" ); break;
391          case (byte)'\t': builder.Append("\\t" ); break;
392          case 0x0b: builder.Append("\\v" ); break;
393          case (byte)'\\': builder.Append("\\\\"); break;
394          case (byte)'\'': builder.Append("\\\'"); break;
395          case (byte)'"' : builder.Append("\\\""); break;
396          default:
397            if (b >= 0x20 && b < 128) {
398              builder.Append((char) b);
399            } else {
400              builder.Append('\\');
401              builder.Append((char) ('0' + ((b >> 6) & 3)));
402              builder.Append((char) ('0' + ((b >> 3) & 7)));
403              builder.Append((char) ('0' + (b & 7)));
404            }
405            break;
406        }
407      }
408      return builder.ToString();
409    }
410
411    /// <summary>
412    /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
413    /// </summary>
414    internal static ByteString UnescapeBytes(string input) {
415      byte[] result = new byte[input.Length];
416      int pos = 0;
417      for (int i = 0; i < input.Length; i++) {
418        char c = input[i];
419        if (c > 127 || c < 32) {
420          throw new FormatException("Escaped string must only contain ASCII");
421        }
422        if (c != '\\') {
423          result[pos++] = (byte) c;
424          continue;
425        }
426        if (i + 1 >= input.Length) {
427          throw new FormatException("Invalid escape sequence: '\\' at end of string.");
428        }
429
430        i++;
431        c = input[i];
432        if (c >= '0' && c <= '7') {
433          // Octal escape.
434          int code = ParseDigit(c);
435          if (i + 1 < input.Length && IsOctal(input[i+1])) {
436            i++;
437            code = code * 8 + ParseDigit(input[i]);
438          }
439          if (i + 1 < input.Length && IsOctal(input[i+1])) {
440            i++;
441            code = code * 8 + ParseDigit(input[i]);
442          }
443          result[pos++] = (byte) code;
444        } else {
445          switch (c) {
446            case 'a': result[pos++] = 0x07; break;
447            case 'b': result[pos++] = (byte) '\b'; break;
448            case 'f': result[pos++] = (byte) '\f'; break;
449            case 'n': result[pos++] = (byte) '\n'; break;
450            case 'r': result[pos++] = (byte) '\r'; break;
451            case 't': result[pos++] = (byte) '\t'; break;
452            case 'v': result[pos++] = 0x0b; break;
453            case '\\': result[pos++] = (byte) '\\'; break;
454            case '\'': result[pos++] = (byte) '\''; break;
455            case '"': result[pos++] = (byte) '\"'; break;
456
457            case 'x':
458              // hex escape
459              int code;
460              if (i + 1 < input.Length && IsHex(input[i+1])) {
461                i++;
462                code = ParseDigit(input[i]);
463              } else {
464                throw new FormatException("Invalid escape sequence: '\\x' with no digits");
465              }
466              if (i + 1 < input.Length && IsHex(input[i+1])) {
467                ++i;
468                code = code * 16 + ParseDigit(input[i]);
469              }
470              result[pos++] = (byte)code;
471              break;
472
473            default:
474              throw new FormatException("Invalid escape sequence: '\\" + c + "'");
475          }
476        }
477      }
478
479      return ByteString.CopyFrom(result, 0, pos);
480    }
481
482    public static void Merge(string text, IBuilder builder) {
483      Merge(text, ExtensionRegistry.Empty, builder);
484    }
485
486    public static void Merge(TextReader reader, IBuilder builder) {
487      Merge(reader, ExtensionRegistry.Empty, builder);
488    }
489
490    public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
491      Merge(reader.ReadToEnd(), registry, builder);
492    }
493
494    public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
495      TextTokenizer tokenizer = new TextTokenizer(text);
496
497      while (!tokenizer.AtEnd) {
498        MergeField(tokenizer, registry, builder);
499      }
500    }
501
502    /// <summary>
503    /// Parses a single field from the specified tokenizer and merges it into
504    /// the builder.
505    /// </summary>
506    private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
507        IBuilder builder) {
508
509      FieldDescriptor field;
510      MessageDescriptor type = builder.DescriptorForType;
511      ExtensionInfo extension = null;
512
513      if (tokenizer.TryConsume("[")) {
514        // An extension.
515        StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
516        while (tokenizer.TryConsume(".")) {
517          name.Append(".");
518          name.Append(tokenizer.ConsumeIdentifier());
519        }
520
521        extension = extensionRegistry[name.ToString()];
522
523        if (extension == null) {
524          throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
525        } else if (extension.Descriptor.ContainingType != type) {
526          throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
527            type.FullName + "\".");
528        }
529
530        tokenizer.Consume("]");
531
532        field = extension.Descriptor;
533      } else {
534        String name = tokenizer.ConsumeIdentifier();
535        field = type.FindDescriptor<FieldDescriptor>(name);
536
537        // Group names are expected to be capitalized as they appear in the
538        // .proto file, which actually matches their type names, not their field
539        // names.
540        if (field == null) {
541          // Explicitly specify the invariant culture so that this code does not break when
542          // executing in Turkey.
543          String lowerName = name.ToLower(CultureInfo.InvariantCulture);
544          field = type.FindDescriptor<FieldDescriptor>(lowerName);
545          // If the case-insensitive match worked but the field is NOT a group,
546          // TODO(jonskeet): What? Java comment ends here!
547          if (field != null && field.FieldType != FieldType.Group) {
548            field = null;
549          }
550        }
551        // Again, special-case group names as described above.
552        if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
553          field = null;
554        }
555
556        if (field == null) {
557          throw tokenizer.CreateFormatExceptionPreviousToken(
558              "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
559        }
560      }
561
562      object value = null;
563
564      if (field.MappedType == MappedType.Message) {
565        tokenizer.TryConsume(":");  // optional
566
567        String endToken;
568        if (tokenizer.TryConsume("<")) {
569          endToken = ">";
570        } else {
571          tokenizer.Consume("{");
572          endToken = "}";
573        }
574
575        IBuilder subBuilder;
576        if (extension == null) {
577          subBuilder = builder.CreateBuilderForField(field);
578        } else {
579          subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
580        }
581
582        while (!tokenizer.TryConsume(endToken)) {
583          if (tokenizer.AtEnd) {
584            throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
585          }
586          MergeField(tokenizer, extensionRegistry, subBuilder);
587        }
588
589        value = subBuilder.WeakBuild();
590
591      } else {
592        tokenizer.Consume(":");
593
594        switch (field.FieldType) {
595          case FieldType.Int32:
596          case FieldType.SInt32:
597          case FieldType.SFixed32:
598            value = tokenizer.ConsumeInt32();
599            break;
600
601          case FieldType.Int64:
602          case FieldType.SInt64:
603          case FieldType.SFixed64:
604            value = tokenizer.ConsumeInt64();
605            break;
606
607          case FieldType.UInt32:
608          case FieldType.Fixed32:
609            value = tokenizer.ConsumeUInt32();
610            break;
611
612          case FieldType.UInt64:
613          case FieldType.Fixed64:
614            value = tokenizer.ConsumeUInt64();
615            break;
616
617          case FieldType.Float:
618            value = tokenizer.ConsumeFloat();
619            break;
620
621          case FieldType.Double:
622            value = tokenizer.ConsumeDouble();
623            break;
624
625          case FieldType.Bool:
626            value = tokenizer.ConsumeBoolean();
627            break;
628
629          case FieldType.String:
630            value = tokenizer.ConsumeString();
631            break;
632
633          case FieldType.Bytes:
634            value = tokenizer.ConsumeByteString();
635            break;
636
637          case FieldType.Enum: {
638            EnumDescriptor enumType = field.EnumType;
639
640            if (tokenizer.LookingAtInteger()) {
641              int number = tokenizer.ConsumeInt32();
642              value = enumType.FindValueByNumber(number);
643              if (value == null) {
644                throw tokenizer.CreateFormatExceptionPreviousToken(
645                  "Enum type \"" + enumType.FullName +
646                  "\" has no value with number " + number + ".");
647              }
648            } else {
649              String id = tokenizer.ConsumeIdentifier();
650              value = enumType.FindValueByName(id);
651              if (value == null) {
652                throw tokenizer.CreateFormatExceptionPreviousToken(
653                  "Enum type \"" + enumType.FullName +
654                  "\" has no value named \"" + id + "\".");
655              }
656            }
657
658            break;
659          }
660
661          case FieldType.Message:
662          case FieldType.Group:
663            throw new InvalidOperationException("Can't get here.");
664        }
665      }
666
667      if (field.IsRepeated) {
668        builder.WeakAddRepeatedField(field, value);
669      } else {
670        builder.SetField(field, value);
671      }
672    }
673  }
674}
Note: See TracBrowser for help on using the repository browser.