Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.ExtLibs/HeuristicLab.ProtobufCS/0.9.1/ProtobufCS/src/ProtocolBuffers/TextFormat.cs @ 4095

Last change on this file since 4095 was 4068, checked in by swagner, 14 years ago

Sorted usings and removed unused usings in entire solution (#1094)

File size: 23.9 KB
Line 
1#region Copyright notice and license
2// Protocol Buffers - Google's data interchange format
3// Copyright 2008 Google Inc.  All rights reserved.
4// http://github.com/jskeet/dotnet-protobufs/
5// Original C++/Java/Python code:
6// http://code.google.com/p/protobuf/
7//
8// Redistribution and use in source and binary forms, with or without
9// modification, are permitted provided that the following conditions are
10// met:
11//
12//     * Redistributions of source code must retain the above copyright
13// notice, this list of conditions and the following disclaimer.
14//     * Redistributions in binary form must reproduce the above
15// copyright notice, this list of conditions and the following disclaimer
16// in the documentation and/or other materials provided with the
17// distribution.
18//     * Neither the name of Google Inc. nor the names of its
19// contributors may be used to endorse or promote products derived from
20// this software without specific prior written permission.
21//
22// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33#endregion
34
35using System;
36using System.Collections;
37using System.Collections.Generic;
38using System.Globalization;
39using System.IO;
40using System.Text;
41using Google.ProtocolBuffers.Descriptors;
42
43namespace Google.ProtocolBuffers {
44  /// <summary>
45  /// Provides ASCII text formatting support for messages.
46  /// TODO(jonskeet): Parsing support.
47  /// </summary>
48  public static class TextFormat {
49
50    /// <summary>
51    /// Outputs a textual representation of the Protocol Message supplied into
52    /// the parameter output.
53    /// </summary>
54    public static void Print(IMessage message, TextWriter output) {
55      TextGenerator generator = new TextGenerator(output);
56      Print(message, generator);
57    }
58
59    /// <summary>
60    /// Outputs a textual representation of <paramref name="fields" /> to <paramref name="output"/>.
61    /// </summary>
62    public static void Print(UnknownFieldSet fields, TextWriter output) {
63      TextGenerator generator = new TextGenerator(output);
64      PrintUnknownFields(fields, generator);
65    }
66
67    public static string PrintToString(IMessage message) {
68      StringWriter text = new StringWriter();
69      Print(message, text);
70      return text.ToString();
71    }
72
73    public static string PrintToString(UnknownFieldSet fields) {
74      StringWriter text = new StringWriter();
75      Print(fields, text);
76      return text.ToString();
77    }
78
79    private static void Print(IMessage message, TextGenerator generator) {
80      foreach (KeyValuePair<FieldDescriptor, object> entry in message.AllFields) {
81        PrintField(entry.Key, entry.Value, generator);
82      }
83      PrintUnknownFields(message.UnknownFields, generator);
84    }
85
86    internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator) {
87      if (field.IsRepeated) {
88        // Repeated field.  Print each element.
89        foreach (object element in (IEnumerable)value) {
90          PrintSingleField(field, element, generator);
91        }
92      } else {
93        PrintSingleField(field, value, generator);
94      }
95    }
96
97    private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator) {
98      if (field.IsExtension) {
99        generator.Print("[");
100        // We special-case MessageSet elements for compatibility with proto1.
101        if (field.ContainingType.Options.MessageSetWireFormat
102            && field.FieldType == FieldType.Message
103            && field.IsOptional
104          // object equality (TODO(jonskeet): Work out what this comment means!)
105            && field.ExtensionScope == field.MessageType) {
106          generator.Print(field.MessageType.FullName);
107        } else {
108          generator.Print(field.FullName);
109        }
110        generator.Print("]");
111      } else {
112        if (field.FieldType == FieldType.Group) {
113          // Groups must be serialized with their original capitalization.
114          generator.Print(field.MessageType.Name);
115        } else {
116          generator.Print(field.Name);
117        }
118      }
119
120      if (field.MappedType == MappedType.Message) {
121        generator.Print(" {\n");
122        generator.Indent();
123      } else {
124        generator.Print(": ");
125      }
126
127      PrintFieldValue(field, value, generator);
128
129      if (field.MappedType == MappedType.Message) {
130        generator.Outdent();
131        generator.Print("}");
132      }
133      generator.Print("\n");
134    }
135
136    private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator) {
137      switch (field.FieldType) {
138        case FieldType.Int32:
139        case FieldType.Int64:
140        case FieldType.SInt32:
141        case FieldType.SInt64:
142        case FieldType.SFixed32:
143        case FieldType.SFixed64:
144        case FieldType.Float:
145        case FieldType.Double:
146        case FieldType.UInt32:
147        case FieldType.UInt64:
148        case FieldType.Fixed32:
149        case FieldType.Fixed64:
150          // The simple Object.ToString converts using the current culture.
151          // We want to always use the invariant culture so it's predictable.
152          generator.Print(((IConvertible)value).ToString(CultureInfo.InvariantCulture));
153          break;
154        case FieldType.Bool:
155          // Explicitly use the Java true/false
156          generator.Print((bool)value ? "true" : "false");
157          break;
158
159        case FieldType.String:
160          generator.Print("\"");
161          generator.Print(EscapeText((string)value));
162          generator.Print("\"");
163          break;
164
165        case FieldType.Bytes: {
166            generator.Print("\"");
167            generator.Print(EscapeBytes((ByteString)value));
168            generator.Print("\"");
169            break;
170          }
171
172        case FieldType.Enum: {
173            generator.Print(((EnumValueDescriptor)value).Name);
174            break;
175          }
176
177        case FieldType.Message:
178        case FieldType.Group:
179          Print((IMessage)value, generator);
180          break;
181      }
182    }
183
184    private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator) {
185      foreach (KeyValuePair<int, UnknownField> entry in unknownFields.FieldDictionary) {
186        String prefix = entry.Key.ToString() + ": ";
187        UnknownField field = entry.Value;
188
189        foreach (ulong value in field.VarintList) {
190          generator.Print(prefix);
191          generator.Print(value.ToString());
192          generator.Print("\n");
193        }
194        foreach (uint value in field.Fixed32List) {
195          generator.Print(prefix);
196          generator.Print(string.Format("0x{0:x8}", value));
197          generator.Print("\n");
198        }
199        foreach (ulong value in field.Fixed64List) {
200          generator.Print(prefix);
201          generator.Print(string.Format("0x{0:x16}", value));
202          generator.Print("\n");
203        }
204        foreach (ByteString value in field.LengthDelimitedList) {
205          generator.Print(entry.Key.ToString());
206          generator.Print(": \"");
207          generator.Print(EscapeBytes(value));
208          generator.Print("\"\n");
209        }
210        foreach (UnknownFieldSet value in field.GroupList) {
211          generator.Print(entry.Key.ToString());
212          generator.Print(" {\n");
213          generator.Indent();
214          PrintUnknownFields(value, generator);
215          generator.Outdent();
216          generator.Print("}\n");
217        }
218      }
219    }
220
221    internal static ulong ParseUInt64(string text) {
222      return (ulong)ParseInteger(text, false, true);
223    }
224
225    internal static long ParseInt64(string text) {
226      return ParseInteger(text, true, true);
227    }
228
229    internal static uint ParseUInt32(string text) {
230      return (uint)ParseInteger(text, false, false);
231    }
232
233    internal static int ParseInt32(string text) {
234      return (int)ParseInteger(text, true, false);
235    }
236
237    internal static float ParseFloat(string text) {
238      switch (text) {
239        case "-inf":
240        case "-infinity":
241        case "-inff":
242        case "-infinityf":
243          return float.NegativeInfinity;
244        case "inf":
245        case "infinity":
246        case "inff":
247        case "infinityf":
248          return float.PositiveInfinity;
249        case "nan":
250        case "nanf":
251          return float.NaN;
252        default:
253          return float.Parse(text, CultureInfo.InvariantCulture);
254      }
255    }
256
257    internal static double ParseDouble(string text) {
258      switch (text) {
259        case "-inf":
260        case "-infinity":
261          return double.NegativeInfinity;
262        case "inf":
263        case "infinity":
264          return double.PositiveInfinity;
265        case "nan":
266          return double.NaN;
267        default:
268          return double.Parse(text, CultureInfo.InvariantCulture);
269      }
270    }
271
272    /// <summary>
273    /// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
274    /// Only a negative sign is permitted, and it must come before the radix indicator.
275    /// </summary>
276    private static long ParseInteger(string text, bool isSigned, bool isLong) {
277      string original = text;
278      bool negative = false;
279      if (text.StartsWith("-")) {
280        if (!isSigned) {
281          throw new FormatException("Number must be positive: " + original);
282        }
283        negative = true;
284        text = text.Substring(1);
285      }
286
287      int radix = 10;
288      if (text.StartsWith("0x")) {
289        radix = 16;
290        text = text.Substring(2);
291      } else if (text.StartsWith("0")) {
292        radix = 8;
293      }
294
295      ulong result;
296      try {
297        // Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
298        // We should be able to use Convert.ToUInt64 for all cases.
299        result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
300      }
301      catch (OverflowException) {
302        // Convert OverflowException to FormatException so there's a single exception type this method can throw.
303        string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
304        throw new FormatException("Number out of range for " + numberDescription + ": " + original);
305      }
306
307      if (negative) {
308        ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
309        if (result > max) {
310          string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
311          throw new FormatException("Number out of range for " + numberDescription + ": " + original);
312        }
313        return -((long)result);
314      } else {
315        ulong max = isSigned
316            ? (isLong ? (ulong)long.MaxValue : int.MaxValue)
317            : (isLong ? ulong.MaxValue : uint.MaxValue);
318        if (result > max) {
319          string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32, isSigned ? "" : "un");
320          throw new FormatException("Number out of range for " + numberDescription + ": " + original);
321        }
322        return (long)result;
323      }
324    }
325
326    /// <summary>
327    /// Tests a character to see if it's an octal digit.
328    /// </summary>
329    private static bool IsOctal(char c) {
330      return '0' <= c && c <= '7';
331    }
332
333    /// <summary>
334    /// Tests a character to see if it's a hex digit.
335    /// </summary>
336    private static bool IsHex(char c) {
337      return ('0' <= c && c <= '9') ||
338             ('a' <= c && c <= 'f') ||
339             ('A' <= c && c <= 'F');
340    }
341
342    /// <summary>
343    /// Interprets a character as a digit (in any base up to 36) and returns the
344    /// numeric value.
345    /// </summary>
346    private static int ParseDigit(char c) {
347      if ('0' <= c && c <= '9') {
348        return c - '0';
349      } else if ('a' <= c && c <= 'z') {
350        return c - 'a' + 10;
351      } else {
352        return c - 'A' + 10;
353      }
354    }
355
356    /// <summary>
357    /// Unescapes a text string as escaped using <see cref="EscapeText(string)" />.
358    /// Two-digit hex escapes (starting with "\x" are also recognised.
359    /// </summary>
360    internal static string UnescapeText(string input) {
361      return UnescapeBytes(input).ToStringUtf8();
362    }
363
364    /// <summary>
365    /// Like <see cref="EscapeBytes" /> but escapes a text string.
366    /// The string is first encoded as UTF-8, then each byte escaped individually.
367    /// The returned value is guaranteed to be entirely ASCII.
368    /// </summary>
369    internal static string EscapeText(string input) {
370      return EscapeBytes(ByteString.CopyFromUtf8(input));
371    }
372
373    /// <summary>
374    /// Escapes bytes in the format used in protocol buffer text format, which
375    /// is the same as the format used for C string literals.  All bytes
376    /// that are not printable 7-bit ASCII characters are escaped, as well as
377    /// backslash, single-quote, and double-quote characters.  Characters for
378    /// which no defined short-hand escape sequence is defined will be escaped
379    /// using 3-digit octal sequences.
380    /// The returned value is guaranteed to be entirely ASCII.
381    /// </summary>
382    internal static String EscapeBytes(ByteString input) {
383      StringBuilder builder = new StringBuilder(input.Length);
384      foreach (byte b in input) {
385        switch (b) {
386          // C# does not use \a or \v
387          case 0x07: builder.Append("\\a"); break;
388          case (byte)'\b': builder.Append("\\b"); break;
389          case (byte)'\f': builder.Append("\\f"); break;
390          case (byte)'\n': builder.Append("\\n"); break;
391          case (byte)'\r': builder.Append("\\r"); break;
392          case (byte)'\t': builder.Append("\\t"); break;
393          case 0x0b: builder.Append("\\v"); break;
394          case (byte)'\\': builder.Append("\\\\"); break;
395          case (byte)'\'': builder.Append("\\\'"); break;
396          case (byte)'"': builder.Append("\\\""); break;
397          default:
398            if (b >= 0x20 && b < 128) {
399              builder.Append((char)b);
400            } else {
401              builder.Append('\\');
402              builder.Append((char)('0' + ((b >> 6) & 3)));
403              builder.Append((char)('0' + ((b >> 3) & 7)));
404              builder.Append((char)('0' + (b & 7)));
405            }
406            break;
407        }
408      }
409      return builder.ToString();
410    }
411
412    /// <summary>
413    /// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
414    /// </summary>
415    internal static ByteString UnescapeBytes(string input) {
416      byte[] result = new byte[input.Length];
417      int pos = 0;
418      for (int i = 0; i < input.Length; i++) {
419        char c = input[i];
420        if (c > 127 || c < 32) {
421          throw new FormatException("Escaped string must only contain ASCII");
422        }
423        if (c != '\\') {
424          result[pos++] = (byte)c;
425          continue;
426        }
427        if (i + 1 >= input.Length) {
428          throw new FormatException("Invalid escape sequence: '\\' at end of string.");
429        }
430
431        i++;
432        c = input[i];
433        if (c >= '0' && c <= '7') {
434          // Octal escape.
435          int code = ParseDigit(c);
436          if (i + 1 < input.Length && IsOctal(input[i + 1])) {
437            i++;
438            code = code * 8 + ParseDigit(input[i]);
439          }
440          if (i + 1 < input.Length && IsOctal(input[i + 1])) {
441            i++;
442            code = code * 8 + ParseDigit(input[i]);
443          }
444          result[pos++] = (byte)code;
445        } else {
446          switch (c) {
447            case 'a': result[pos++] = 0x07; break;
448            case 'b': result[pos++] = (byte)'\b'; break;
449            case 'f': result[pos++] = (byte)'\f'; break;
450            case 'n': result[pos++] = (byte)'\n'; break;
451            case 'r': result[pos++] = (byte)'\r'; break;
452            case 't': result[pos++] = (byte)'\t'; break;
453            case 'v': result[pos++] = 0x0b; break;
454            case '\\': result[pos++] = (byte)'\\'; break;
455            case '\'': result[pos++] = (byte)'\''; break;
456            case '"': result[pos++] = (byte)'\"'; break;
457
458            case 'x':
459              // hex escape
460              int code;
461              if (i + 1 < input.Length && IsHex(input[i + 1])) {
462                i++;
463                code = ParseDigit(input[i]);
464              } else {
465                throw new FormatException("Invalid escape sequence: '\\x' with no digits");
466              }
467              if (i + 1 < input.Length && IsHex(input[i + 1])) {
468                ++i;
469                code = code * 16 + ParseDigit(input[i]);
470              }
471              result[pos++] = (byte)code;
472              break;
473
474            default:
475              throw new FormatException("Invalid escape sequence: '\\" + c + "'");
476          }
477        }
478      }
479
480      return ByteString.CopyFrom(result, 0, pos);
481    }
482
483    public static void Merge(string text, IBuilder builder) {
484      Merge(text, ExtensionRegistry.Empty, builder);
485    }
486
487    public static void Merge(TextReader reader, IBuilder builder) {
488      Merge(reader, ExtensionRegistry.Empty, builder);
489    }
490
491    public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder) {
492      Merge(reader.ReadToEnd(), registry, builder);
493    }
494
495    public static void Merge(string text, ExtensionRegistry registry, IBuilder builder) {
496      TextTokenizer tokenizer = new TextTokenizer(text);
497
498      while (!tokenizer.AtEnd) {
499        MergeField(tokenizer, registry, builder);
500      }
501    }
502
503    /// <summary>
504    /// Parses a single field from the specified tokenizer and merges it into
505    /// the builder.
506    /// </summary>
507    private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
508        IBuilder builder) {
509
510      FieldDescriptor field;
511      MessageDescriptor type = builder.DescriptorForType;
512      ExtensionInfo extension = null;
513
514      if (tokenizer.TryConsume("[")) {
515        // An extension.
516        StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
517        while (tokenizer.TryConsume(".")) {
518          name.Append(".");
519          name.Append(tokenizer.ConsumeIdentifier());
520        }
521
522        extension = extensionRegistry[name.ToString()];
523
524        if (extension == null) {
525          throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" not found in the ExtensionRegistry.");
526        } else if (extension.Descriptor.ContainingType != type) {
527          throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name + "\" does not extend message type \"" +
528            type.FullName + "\".");
529        }
530
531        tokenizer.Consume("]");
532
533        field = extension.Descriptor;
534      } else {
535        String name = tokenizer.ConsumeIdentifier();
536        field = type.FindDescriptor<FieldDescriptor>(name);
537
538        // Group names are expected to be capitalized as they appear in the
539        // .proto file, which actually matches their type names, not their field
540        // names.
541        if (field == null) {
542          // Explicitly specify the invariant culture so that this code does not break when
543          // executing in Turkey.
544          String lowerName = name.ToLower(CultureInfo.InvariantCulture);
545          field = type.FindDescriptor<FieldDescriptor>(lowerName);
546          // If the case-insensitive match worked but the field is NOT a group,
547          // TODO(jonskeet): What? Java comment ends here!
548          if (field != null && field.FieldType != FieldType.Group) {
549            field = null;
550          }
551        }
552        // Again, special-case group names as described above.
553        if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name) {
554          field = null;
555        }
556
557        if (field == null) {
558          throw tokenizer.CreateFormatExceptionPreviousToken(
559              "Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
560        }
561      }
562
563      object value = null;
564
565      if (field.MappedType == MappedType.Message) {
566        tokenizer.TryConsume(":");  // optional
567
568        String endToken;
569        if (tokenizer.TryConsume("<")) {
570          endToken = ">";
571        } else {
572          tokenizer.Consume("{");
573          endToken = "}";
574        }
575
576        IBuilder subBuilder;
577        if (extension == null) {
578          subBuilder = builder.CreateBuilderForField(field);
579        } else {
580          subBuilder = extension.DefaultInstance.WeakCreateBuilderForType();
581        }
582
583        while (!tokenizer.TryConsume(endToken)) {
584          if (tokenizer.AtEnd) {
585            throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
586          }
587          MergeField(tokenizer, extensionRegistry, subBuilder);
588        }
589
590        value = subBuilder.WeakBuild();
591
592      } else {
593        tokenizer.Consume(":");
594
595        switch (field.FieldType) {
596          case FieldType.Int32:
597          case FieldType.SInt32:
598          case FieldType.SFixed32:
599            value = tokenizer.ConsumeInt32();
600            break;
601
602          case FieldType.Int64:
603          case FieldType.SInt64:
604          case FieldType.SFixed64:
605            value = tokenizer.ConsumeInt64();
606            break;
607
608          case FieldType.UInt32:
609          case FieldType.Fixed32:
610            value = tokenizer.ConsumeUInt32();
611            break;
612
613          case FieldType.UInt64:
614          case FieldType.Fixed64:
615            value = tokenizer.ConsumeUInt64();
616            break;
617
618          case FieldType.Float:
619            value = tokenizer.ConsumeFloat();
620            break;
621
622          case FieldType.Double:
623            value = tokenizer.ConsumeDouble();
624            break;
625
626          case FieldType.Bool:
627            value = tokenizer.ConsumeBoolean();
628            break;
629
630          case FieldType.String:
631            value = tokenizer.ConsumeString();
632            break;
633
634          case FieldType.Bytes:
635            value = tokenizer.ConsumeByteString();
636            break;
637
638          case FieldType.Enum: {
639              EnumDescriptor enumType = field.EnumType;
640
641              if (tokenizer.LookingAtInteger()) {
642                int number = tokenizer.ConsumeInt32();
643                value = enumType.FindValueByNumber(number);
644                if (value == null) {
645                  throw tokenizer.CreateFormatExceptionPreviousToken(
646                    "Enum type \"" + enumType.FullName +
647                    "\" has no value with number " + number + ".");
648                }
649              } else {
650                String id = tokenizer.ConsumeIdentifier();
651                value = enumType.FindValueByName(id);
652                if (value == null) {
653                  throw tokenizer.CreateFormatExceptionPreviousToken(
654                    "Enum type \"" + enumType.FullName +
655                    "\" has no value named \"" + id + "\".");
656                }
657              }
658
659              break;
660            }
661
662          case FieldType.Message:
663          case FieldType.Group:
664            throw new InvalidOperationException("Can't get here.");
665        }
666      }
667
668      if (field.IsRepeated) {
669        builder.WeakAddRepeatedField(field, value);
670      } else {
671        builder.SetField(field, value);
672      }
673    }
674  }
675}
Note: See TracBrowser for help on using the repository browser.