#region Copyright notice and license
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// http://github.com/jskeet/dotnet-protobufs/
// Original C++/Java/Python code:
// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endregion
using System;
using System.Collections;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Text;
using Google.ProtocolBuffers.Descriptors;
namespace Google.ProtocolBuffers
{
///
/// Provides ASCII text formatting support for messages.
/// TODO(jonskeet): Support for alternative line endings.
/// (Easy to print, via TextGenerator. Not sure about parsing.)
///
public static class TextFormat
{
///
/// Outputs a textual representation of the Protocol Message supplied into
/// the parameter output.
///
public static void Print(IMessage message, TextWriter output)
{
TextGenerator generator = new TextGenerator(output, "\n");
Print(message, generator);
}
///
/// Outputs a textual representation of to .
///
public static void Print(UnknownFieldSet fields, TextWriter output)
{
TextGenerator generator = new TextGenerator(output, "\n");
PrintUnknownFields(fields, generator);
}
public static string PrintToString(IMessage message)
{
StringWriter text = new StringWriter();
Print(message, text);
return text.ToString();
}
public static string PrintToString(UnknownFieldSet fields)
{
StringWriter text = new StringWriter();
Print(fields, text);
return text.ToString();
}
private static void Print(IMessage message, TextGenerator generator)
{
foreach (KeyValuePair entry in message.AllFields)
{
PrintField(entry.Key, entry.Value, generator);
}
PrintUnknownFields(message.UnknownFields, generator);
}
internal static void PrintField(FieldDescriptor field, object value, TextGenerator generator)
{
if (field.IsRepeated)
{
// Repeated field. Print each element.
foreach (object element in (IEnumerable) value)
{
PrintSingleField(field, element, generator);
}
}
else
{
PrintSingleField(field, value, generator);
}
}
private static void PrintSingleField(FieldDescriptor field, Object value, TextGenerator generator)
{
if (field.IsExtension)
{
generator.Print("[");
// We special-case MessageSet elements for compatibility with proto1.
if (field.ContainingType.Options.MessageSetWireFormat
&& field.FieldType == FieldType.Message
&& field.IsOptional
// object equality (TODO(jonskeet): Work out what this comment means!)
&& field.ExtensionScope == field.MessageType)
{
generator.Print(field.MessageType.FullName);
}
else
{
generator.Print(field.FullName);
}
generator.Print("]");
}
else
{
if (field.FieldType == FieldType.Group)
{
// Groups must be serialized with their original capitalization.
generator.Print(field.MessageType.Name);
}
else
{
generator.Print(field.Name);
}
}
if (field.MappedType == MappedType.Message)
{
generator.Print(" {\n");
generator.Indent();
}
else
{
generator.Print(": ");
}
PrintFieldValue(field, value, generator);
if (field.MappedType == MappedType.Message)
{
generator.Outdent();
generator.Print("}");
}
generator.Print("\n");
}
private static void PrintFieldValue(FieldDescriptor field, object value, TextGenerator generator)
{
switch (field.FieldType)
{
// The Float and Double types must specify the "r" format to preserve their precision, otherwise,
// the double to/from string will trim the precision to 6 places. As with other numeric formats
// below, always use the invariant culture so it's predictable.
case FieldType.Float:
generator.Print(((float) value).ToString("r", CultureInfo.InvariantCulture));
break;
case FieldType.Double:
generator.Print(((double) value).ToString("r", CultureInfo.InvariantCulture));
break;
case FieldType.Int32:
case FieldType.Int64:
case FieldType.SInt32:
case FieldType.SInt64:
case FieldType.SFixed32:
case FieldType.SFixed64:
case FieldType.UInt32:
case FieldType.UInt64:
case FieldType.Fixed32:
case FieldType.Fixed64:
// The simple Object.ToString converts using the current culture.
// We want to always use the invariant culture so it's predictable.
generator.Print(((IConvertible) value).ToString(CultureInfo.InvariantCulture));
break;
case FieldType.Bool:
// Explicitly use the Java true/false
generator.Print((bool) value ? "true" : "false");
break;
case FieldType.String:
generator.Print("\"");
generator.Print(EscapeText((string) value));
generator.Print("\"");
break;
case FieldType.Bytes:
{
generator.Print("\"");
generator.Print(EscapeBytes((ByteString) value));
generator.Print("\"");
break;
}
case FieldType.Enum:
{
if (value is IEnumLite && !(value is EnumValueDescriptor))
{
throw new NotSupportedException("Lite enumerations are not supported.");
}
generator.Print(((EnumValueDescriptor) value).Name);
break;
}
case FieldType.Message:
case FieldType.Group:
if (value is IMessageLite && !(value is IMessage))
{
throw new NotSupportedException("Lite messages are not supported.");
}
Print((IMessage) value, generator);
break;
}
}
private static void PrintUnknownFields(UnknownFieldSet unknownFields, TextGenerator generator)
{
foreach (KeyValuePair entry in unknownFields.FieldDictionary)
{
String prefix = entry.Key.ToString() + ": ";
UnknownField field = entry.Value;
foreach (ulong value in field.VarintList)
{
generator.Print(prefix);
generator.Print(value.ToString());
generator.Print("\n");
}
foreach (uint value in field.Fixed32List)
{
generator.Print(prefix);
generator.Print(string.Format("0x{0:x8}", value));
generator.Print("\n");
}
foreach (ulong value in field.Fixed64List)
{
generator.Print(prefix);
generator.Print(string.Format("0x{0:x16}", value));
generator.Print("\n");
}
foreach (ByteString value in field.LengthDelimitedList)
{
generator.Print(entry.Key.ToString());
generator.Print(": \"");
generator.Print(EscapeBytes(value));
generator.Print("\"\n");
}
foreach (UnknownFieldSet value in field.GroupList)
{
generator.Print(entry.Key.ToString());
generator.Print(" {\n");
generator.Indent();
PrintUnknownFields(value, generator);
generator.Outdent();
generator.Print("}\n");
}
}
}
[CLSCompliant(false)]
public static ulong ParseUInt64(string text)
{
return (ulong) ParseInteger(text, false, true);
}
public static long ParseInt64(string text)
{
return ParseInteger(text, true, true);
}
[CLSCompliant(false)]
public static uint ParseUInt32(string text)
{
return (uint) ParseInteger(text, false, false);
}
public static int ParseInt32(string text)
{
return (int) ParseInteger(text, true, false);
}
public static float ParseFloat(string text)
{
switch (text)
{
case "-inf":
case "-infinity":
case "-inff":
case "-infinityf":
return float.NegativeInfinity;
case "inf":
case "infinity":
case "inff":
case "infinityf":
return float.PositiveInfinity;
case "nan":
case "nanf":
return float.NaN;
default:
return float.Parse(text, CultureInfo.InvariantCulture);
}
}
public static double ParseDouble(string text)
{
switch (text)
{
case "-inf":
case "-infinity":
return double.NegativeInfinity;
case "inf":
case "infinity":
return double.PositiveInfinity;
case "nan":
return double.NaN;
default:
return double.Parse(text, CultureInfo.InvariantCulture);
}
}
///
/// Parses an integer in hex (leading 0x), decimal (no prefix) or octal (leading 0).
/// Only a negative sign is permitted, and it must come before the radix indicator.
///
private static long ParseInteger(string text, bool isSigned, bool isLong)
{
string original = text;
bool negative = false;
if (text.StartsWith("-"))
{
if (!isSigned)
{
throw new FormatException("Number must be positive: " + original);
}
negative = true;
text = text.Substring(1);
}
int radix = 10;
if (text.StartsWith("0x"))
{
radix = 16;
text = text.Substring(2);
}
else if (text.StartsWith("0"))
{
radix = 8;
}
ulong result;
try
{
// Workaround for https://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=278448
// We should be able to use Convert.ToUInt64 for all cases.
result = radix == 10 ? ulong.Parse(text) : Convert.ToUInt64(text, radix);
}
catch (OverflowException)
{
// Convert OverflowException to FormatException so there's a single exception type this method can throw.
string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32,
isSigned ? "" : "un");
throw new FormatException("Number out of range for " + numberDescription + ": " + original);
}
if (negative)
{
ulong max = isLong ? 0x8000000000000000UL : 0x80000000L;
if (result > max)
{
string numberDescription = string.Format("{0}-bit signed integer", isLong ? 64 : 32);
throw new FormatException("Number out of range for " + numberDescription + ": " + original);
}
return -((long) result);
}
else
{
ulong max = isSigned
? (isLong ? (ulong) long.MaxValue : int.MaxValue)
: (isLong ? ulong.MaxValue : uint.MaxValue);
if (result > max)
{
string numberDescription = string.Format("{0}-bit {1}signed integer", isLong ? 64 : 32,
isSigned ? "" : "un");
throw new FormatException("Number out of range for " + numberDescription + ": " + original);
}
return (long) result;
}
}
///
/// Tests a character to see if it's an octal digit.
///
private static bool IsOctal(char c)
{
return '0' <= c && c <= '7';
}
///
/// Tests a character to see if it's a hex digit.
///
private static bool IsHex(char c)
{
return ('0' <= c && c <= '9') ||
('a' <= c && c <= 'f') ||
('A' <= c && c <= 'F');
}
///
/// Interprets a character as a digit (in any base up to 36) and returns the
/// numeric value.
///
private static int ParseDigit(char c)
{
if ('0' <= c && c <= '9')
{
return c - '0';
}
else if ('a' <= c && c <= 'z')
{
return c - 'a' + 10;
}
else
{
return c - 'A' + 10;
}
}
///
/// Unescapes a text string as escaped using .
/// Two-digit hex escapes (starting with "\x" are also recognised.
///
public static string UnescapeText(string input)
{
return UnescapeBytes(input).ToStringUtf8();
}
///
/// Like but escapes a text string.
/// The string is first encoded as UTF-8, then each byte escaped individually.
/// The returned value is guaranteed to be entirely ASCII.
///
public static string EscapeText(string input)
{
return EscapeBytes(ByteString.CopyFromUtf8(input));
}
///
/// Escapes bytes in the format used in protocol buffer text format, which
/// is the same as the format used for C string literals. All bytes
/// that are not printable 7-bit ASCII characters are escaped, as well as
/// backslash, single-quote, and double-quote characters. Characters for
/// which no defined short-hand escape sequence is defined will be escaped
/// using 3-digit octal sequences.
/// The returned value is guaranteed to be entirely ASCII.
///
public static String EscapeBytes(ByteString input)
{
StringBuilder builder = new StringBuilder(input.Length);
foreach (byte b in input)
{
switch (b)
{
// C# does not use \a or \v
case 0x07:
builder.Append("\\a");
break;
case (byte) '\b':
builder.Append("\\b");
break;
case (byte) '\f':
builder.Append("\\f");
break;
case (byte) '\n':
builder.Append("\\n");
break;
case (byte) '\r':
builder.Append("\\r");
break;
case (byte) '\t':
builder.Append("\\t");
break;
case 0x0b:
builder.Append("\\v");
break;
case (byte) '\\':
builder.Append("\\\\");
break;
case (byte) '\'':
builder.Append("\\\'");
break;
case (byte) '"':
builder.Append("\\\"");
break;
default:
if (b >= 0x20 && b < 128)
{
builder.Append((char) b);
}
else
{
builder.Append('\\');
builder.Append((char) ('0' + ((b >> 6) & 3)));
builder.Append((char) ('0' + ((b >> 3) & 7)));
builder.Append((char) ('0' + (b & 7)));
}
break;
}
}
return builder.ToString();
}
///
/// Performs string unescaping from C style (octal, hex, form feeds, tab etc) into a byte string.
///
public static ByteString UnescapeBytes(string input)
{
byte[] result = new byte[input.Length];
int pos = 0;
for (int i = 0; i < input.Length; i++)
{
char c = input[i];
if (c > 127 || c < 32)
{
throw new FormatException("Escaped string must only contain ASCII");
}
if (c != '\\')
{
result[pos++] = (byte) c;
continue;
}
if (i + 1 >= input.Length)
{
throw new FormatException("Invalid escape sequence: '\\' at end of string.");
}
i++;
c = input[i];
if (c >= '0' && c <= '7')
{
// Octal escape.
int code = ParseDigit(c);
if (i + 1 < input.Length && IsOctal(input[i + 1]))
{
i++;
code = code*8 + ParseDigit(input[i]);
}
if (i + 1 < input.Length && IsOctal(input[i + 1]))
{
i++;
code = code*8 + ParseDigit(input[i]);
}
result[pos++] = (byte) code;
}
else
{
switch (c)
{
case 'a':
result[pos++] = 0x07;
break;
case 'b':
result[pos++] = (byte) '\b';
break;
case 'f':
result[pos++] = (byte) '\f';
break;
case 'n':
result[pos++] = (byte) '\n';
break;
case 'r':
result[pos++] = (byte) '\r';
break;
case 't':
result[pos++] = (byte) '\t';
break;
case 'v':
result[pos++] = 0x0b;
break;
case '\\':
result[pos++] = (byte) '\\';
break;
case '\'':
result[pos++] = (byte) '\'';
break;
case '"':
result[pos++] = (byte) '\"';
break;
case 'x':
// hex escape
int code;
if (i + 1 < input.Length && IsHex(input[i + 1]))
{
i++;
code = ParseDigit(input[i]);
}
else
{
throw new FormatException("Invalid escape sequence: '\\x' with no digits");
}
if (i + 1 < input.Length && IsHex(input[i + 1]))
{
++i;
code = code*16 + ParseDigit(input[i]);
}
result[pos++] = (byte) code;
break;
default:
throw new FormatException("Invalid escape sequence: '\\" + c + "'");
}
}
}
return ByteString.CopyFrom(result, 0, pos);
}
public static void Merge(string text, IBuilder builder)
{
Merge(text, ExtensionRegistry.Empty, builder);
}
public static void Merge(TextReader reader, IBuilder builder)
{
Merge(reader, ExtensionRegistry.Empty, builder);
}
public static void Merge(TextReader reader, ExtensionRegistry registry, IBuilder builder)
{
Merge(reader.ReadToEnd(), registry, builder);
}
public static void Merge(string text, ExtensionRegistry registry, IBuilder builder)
{
TextTokenizer tokenizer = new TextTokenizer(text);
while (!tokenizer.AtEnd)
{
MergeField(tokenizer, registry, builder);
}
}
///
/// Parses a single field from the specified tokenizer and merges it into
/// the builder.
///
private static void MergeField(TextTokenizer tokenizer, ExtensionRegistry extensionRegistry,
IBuilder builder)
{
FieldDescriptor field;
MessageDescriptor type = builder.DescriptorForType;
ExtensionInfo extension = null;
if (tokenizer.TryConsume("["))
{
// An extension.
StringBuilder name = new StringBuilder(tokenizer.ConsumeIdentifier());
while (tokenizer.TryConsume("."))
{
name.Append(".");
name.Append(tokenizer.ConsumeIdentifier());
}
extension = extensionRegistry.FindByName(type, name.ToString());
if (extension == null)
{
throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name +
"\" not found in the ExtensionRegistry.");
}
else if (extension.Descriptor.ContainingType != type)
{
throw tokenizer.CreateFormatExceptionPreviousToken("Extension \"" + name +
"\" does not extend message type \"" +
type.FullName + "\".");
}
tokenizer.Consume("]");
field = extension.Descriptor;
}
else
{
String name = tokenizer.ConsumeIdentifier();
field = type.FindDescriptor(name);
// Group names are expected to be capitalized as they appear in the
// .proto file, which actually matches their type names, not their field
// names.
if (field == null)
{
// Explicitly specify the invariant culture so that this code does not break when
// executing in Turkey.
String lowerName = name.ToLower(CultureInfo.InvariantCulture);
field = type.FindDescriptor(lowerName);
// If the case-insensitive match worked but the field is NOT a group,
// TODO(jonskeet): What? Java comment ends here!
if (field != null && field.FieldType != FieldType.Group)
{
field = null;
}
}
// Again, special-case group names as described above.
if (field != null && field.FieldType == FieldType.Group && field.MessageType.Name != name)
{
field = null;
}
if (field == null)
{
throw tokenizer.CreateFormatExceptionPreviousToken(
"Message type \"" + type.FullName + "\" has no field named \"" + name + "\".");
}
}
object value = null;
if (field.MappedType == MappedType.Message)
{
tokenizer.TryConsume(":"); // optional
String endToken;
if (tokenizer.TryConsume("<"))
{
endToken = ">";
}
else
{
tokenizer.Consume("{");
endToken = "}";
}
IBuilder subBuilder;
if (extension == null)
{
subBuilder = builder.CreateBuilderForField(field);
}
else
{
subBuilder = extension.DefaultInstance.WeakCreateBuilderForType() as IBuilder;
if (subBuilder == null)
{
throw new NotSupportedException("Lite messages are not supported.");
}
}
while (!tokenizer.TryConsume(endToken))
{
if (tokenizer.AtEnd)
{
throw tokenizer.CreateFormatException("Expected \"" + endToken + "\".");
}
MergeField(tokenizer, extensionRegistry, subBuilder);
}
value = subBuilder.WeakBuild();
}
else
{
tokenizer.Consume(":");
switch (field.FieldType)
{
case FieldType.Int32:
case FieldType.SInt32:
case FieldType.SFixed32:
value = tokenizer.ConsumeInt32();
break;
case FieldType.Int64:
case FieldType.SInt64:
case FieldType.SFixed64:
value = tokenizer.ConsumeInt64();
break;
case FieldType.UInt32:
case FieldType.Fixed32:
value = tokenizer.ConsumeUInt32();
break;
case FieldType.UInt64:
case FieldType.Fixed64:
value = tokenizer.ConsumeUInt64();
break;
case FieldType.Float:
value = tokenizer.ConsumeFloat();
break;
case FieldType.Double:
value = tokenizer.ConsumeDouble();
break;
case FieldType.Bool:
value = tokenizer.ConsumeBoolean();
break;
case FieldType.String:
value = tokenizer.ConsumeString();
break;
case FieldType.Bytes:
value = tokenizer.ConsumeByteString();
break;
case FieldType.Enum:
{
EnumDescriptor enumType = field.EnumType;
if (tokenizer.LookingAtInteger())
{
int number = tokenizer.ConsumeInt32();
value = enumType.FindValueByNumber(number);
if (value == null)
{
throw tokenizer.CreateFormatExceptionPreviousToken(
"Enum type \"" + enumType.FullName +
"\" has no value with number " + number + ".");
}
}
else
{
String id = tokenizer.ConsumeIdentifier();
value = enumType.FindValueByName(id);
if (value == null)
{
throw tokenizer.CreateFormatExceptionPreviousToken(
"Enum type \"" + enumType.FullName +
"\" has no value named \"" + id + "\".");
}
}
break;
}
case FieldType.Message:
case FieldType.Group:
throw new InvalidOperationException("Can't get here.");
}
}
if (field.IsRepeated)
{
builder.WeakAddRepeatedField(field, value);
}
else
{
builder.SetField(field, value);
}
}
}
}