namespace HeuristicLab.Problems.ProgramSynthesis.Push.Parser { using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text.RegularExpressions; using HeuristicLab.Problems.ProgramSynthesis.Push.Constants; using HeuristicLab.Problems.ProgramSynthesis.Push.Expressions; public static class PushParser { private const char delimiter = ' '; private static readonly CultureInfo cultureInfo = CultureInfo.CreateSpecificCulture("en-US"); private static readonly char[] symbolTrim = { '\r', '\n' }; public static Expression Parse(string source, int startIndex = 0) { bool error; var symbols = GetSymbols(source, out error); // source.Split(delimiter); int endIndex; return Parse(symbols, startIndex, out endIndex); } public static PushProgram ParseProgram(string source, int startIndex = 0) { var expression = Parse(source, startIndex); return expression as PushProgram ?? new PushProgram(new[] { expression }); } private static IReadOnlyList GetSymbols(string source, out bool error) { var chars = source.ToCharArray(); var symbols = new List(); for (var i = 0; i < chars.Length; i++) { var cur = chars[i]; int endIndex; int length; string str; switch (cur) { case delimiter: break; case PushEnvironment.StringSymbol: endIndex = source.IndexOf(PushEnvironment.StringSymbol, i + 1); if (endIndex < i) { error = true; return symbols; } length = endIndex - i + 1; str = source.Substring(i, length); symbols.Add(str); i += length; // skip following space break; case PushEnvironment.CharSymbol: endIndex = source.IndexOf(PushEnvironment.CharSymbol, i + 1); if (endIndex < i) { error = true; return symbols; } length = endIndex - i + 1; str = source.Substring(i, length); str = EscapeLikeALiteral(str); symbols.Add(str); i += length; // skip following space break; case PushEnvironment.VectorStartSymbol: endIndex = source.IndexOf(PushEnvironment.VectorEndSymbol, i + 1); if (endIndex < i) { error = true; return symbols; } length = endIndex - i + 1; str = source.Substring(i, length); symbols.Add(str); i += length; // skip following space break; default: endIndex = source.IndexOf(' ', i + 1); length = endIndex < i ? chars.Length - i : endIndex - i; str = source.Substring(i, length); symbols.Add(str); i += length; // skip following space break; } } error = true; return symbols; } private static Expression Parse(IReadOnlyList symbols, int startIndex, out int endIndex) { var expressions = new List(); for (var i = startIndex; i < symbols.Count; i++) { var symbol = symbols[i].TrimEnd(symbolTrim); if (string.IsNullOrWhiteSpace(symbol)) continue; switch (symbol) { case PushEnvironment.ProgramStartSymbolStr: var subExpression = Parse(symbols, i + 1, out endIndex); expressions.Insert(0, subExpression); i = endIndex; continue; case PushEnvironment.ProgramEndSymbolStr: endIndex = i; return new PushProgram(expressions); } // literal Expression expression; if (TryParseLiteral(symbol, out expression)) { expressions.Insert(0, expression); continue; } // expression if (ExpressionTable.TryGetStatelessExpression(symbol, out expression) || ExpressionTable.TryGetStatefulExpression(symbol, out expression)) { expressions.Insert(0, expression); continue; } // identifier - custom expression or named literals expressions.Insert(0, new NameDefineXExecExpression(symbol)); } endIndex = symbols.Count - 1; switch (expressions.Count) { case 0: return PushProgram.Empty; case 1: return expressions[0]; default: return new PushProgram(expressions); } } private static bool TryParseLiteral(string word, out Expression expression) { if (word.StartsWith(PushEnvironment.CharSymbolStr) && word.EndsWith(PushEnvironment.CharSymbolStr) && word.Length == 3) { expression = new CharPushExpression(word[1]); return true; } if (word.StartsWith(PushEnvironment.StringSymbolStr) && word.EndsWith(PushEnvironment.StringSymbolStr)) { var stringValue = word.Length == 0 ? string.Empty : word.Substring(1, word.Length - 2); expression = new StringPushExpression(stringValue); return true; } // "[]" has no values and can therefor not parsed correctly as vector of a specific type if (word.StartsWith(PushEnvironment.VectorStartSymbolStr) && word.EndsWith(PushEnvironment.VectorEndSymbolStr) && word.Length > 2) { var vectorEntries = word .Substring(1, word.Length - 2) .Split(PushEnvironment.VectorSeparatorSymbol) .ToList(); if (vectorEntries.Count == 0) { expression = null; return false; } Expression e; var first = vectorEntries.First(); if (!TryParseLiteral(first, out e)) { expression = null; return false; } if (e.GetType() == typeof(IntegerPushExpression)) { var integerValues = vectorEntries.Select(long.Parse).ToList(); expression = new IntegerVectorPushExpression(integerValues); } else if (e.GetType() == typeof(FloatPushExpression)) { var doubleValues = vectorEntries.Select(x => double.Parse(x, NumberStyles.Float, cultureInfo)).ToList(); expression = new FloatVectorPushExpression(doubleValues); } else if (e.GetType() == typeof(StringPushExpression)) { var stringValues = vectorEntries.Select(str => str.Substring(1, str.Length - 2)).ToList(); expression = new StringVectorPushExpression(stringValues); } else if (e.GetType() == typeof(BooleanPushExpression)) { var booleanValues = vectorEntries.Select(bool.Parse).ToList(); expression = new BooleanVectorPushExpression(booleanValues); } else { expression = null; return false; } return true; } long longValue; if (long.TryParse(word, out longValue)) { expression = new IntegerPushExpression(longValue); return true; } double floatValue; if (double.TryParse(word, NumberStyles.Float, cultureInfo, out floatValue)) { expression = new FloatPushExpression(floatValue); return true; } bool booleanValue; if (bool.TryParse(word, out booleanValue)) { expression = new BooleanPushExpression(booleanValue); return true; } expression = null; return false; } private static string EscapeLikeALiteral(string src) { return Regex.Replace(src, @"\\(?['""\\0abfnrtv])", m => { var s = m.Groups["simple"].Value; switch (s) { case "'": return "'"; case "\"": return "\""; case "0": return "\0"; case "a": return "\a"; case "b": return "\b"; case "f": return "\f"; case "n": return "\n"; case "r": return "\r"; case "t": return "\t"; case "v": return "\v"; default: throw new InvalidOperationException(); } }); } } }