namespace HeuristicLab.Problems.ProgramSynthesis.Push.Evaluator { using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using HeuristicLab.BenchmarkSuite; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.ProgramSynthesis.Base.Extensions; using HeuristicLab.Problems.ProgramSynthesis.Push.Configuration; using HeuristicLab.Problems.ProgramSynthesis.Push.Expressions; using HeuristicLab.Problems.ProgramSynthesis.Push.Interpreter; using HeuristicLab.Problems.ProgramSynthesis.Push.Problem; using HeuristicLab.Problems.ProgramSynthesis.Push.Stack; [StorableClass] public class PushBenchmarkSuiteEvaluator : ParameterizedNamedItem, IPushEvaluator { private const string DATA_BOUNDS_PARAMETER_NAME = "DataBounds"; private const string DATA_PARAMETER_NAME = "Data"; private const string DATA_PARAMETER_DESCRIPTION = "Program Synthesis"; public PushBenchmarkSuiteEvaluator() { Parameters.Add(new FixedValueParameter(DATA_BOUNDS_PARAMETER_NAME)); Parameters.Add(new ValueParameter( DATA_PARAMETER_NAME, DATA_PARAMETER_DESCRIPTION)); } public PushBenchmarkSuiteEvaluator(ProblemData data) : this() { LoadData(data); } [StorableConstructor] public PushBenchmarkSuiteEvaluator(bool deserializing) : base(deserializing) { } public PushBenchmarkSuiteEvaluator(PushBenchmarkSuiteEvaluator origin, Cloner cloner) : base(origin, cloner) { Data = cloner.Clone(origin.Data); } public override IDeepCloneable Clone(Cloner cloner) { return new PushBenchmarkSuiteEvaluator(this, cloner); } public IValueParameter DataParameter { get { return (IValueParameter)Parameters[DATA_PARAMETER_NAME]; } } public ProblemData Data { get { return DataParameter.Value; } set { DataParameter.Value = value; } } public IValueParameter DataBoundsParameter { get { return (IValueParameter)Parameters[DATA_BOUNDS_PARAMETER_NAME]; } } public DataBounds DataBounds { get { return DataBoundsParameter.Value; } set { DataBoundsParameter.Value = value; } } public void LoadData(ProblemData data) { Data = data; DataBounds.TrainingRange.Start = 0; DataBounds.TrainingRange.End = Data.TrainingCount; DataBounds.TestRange.Start = Data.TrainingCount; DataBounds.TestRange.End = Data.TrainingCount + Data.TestCount; } public EvaluationResult Evaluate(IPushInterpreter interpreter, PushProgram program, int start, int end) { var length = end - start; if (length <= 0) return null; var evaluationResult = new EvaluationResult(length); for (var i = start; i < end; i++) { var result = Evaluate(interpreter, program, i); evaluationResult.ExampleQualities[i - start] = result; evaluationResult.AvgQuality += result; interpreter.Reset(); } evaluationResult.AvgQuality /= length; return evaluationResult; } public EvaluationResult EvaluateTest(IPushInterpreter interpreter, PushProgram program) { return Evaluate(interpreter, program, DataBounds.TestRange.Start, DataBounds.TestRange.End); } public EvaluationResult EvaluateTest(IReadOnlyPushConfiguration config, PushProgram program, IRandom random) { var interpreter = new PushInterpreter(config, random); return EvaluateTest(interpreter, program); } public EvaluationResult EvaluateTest(PushInterpreterPool pool, PushProgram program, IRandom random) { using (var interpreter = pool.Create(random)) { return EvaluateTest(interpreter, program); } } public EvaluationResult EvaluateTraining(IPushInterpreter interpreter, PushProgram program) { return Evaluate(interpreter, program, DataBounds.TrainingRange.Start, DataBounds.TrainingRange.End); } public EvaluationResult EvaluateTraining(IReadOnlyPushConfiguration config, PushProgram program, IRandom random) { var interpreter = new PushInterpreter(config, random); return EvaluateTraining(interpreter, program); } public EvaluationResult EvaluateTraining(PushInterpreterPool pool, PushProgram program, IRandom random) { using (var interpreter = pool.Create(random)) { return EvaluateTraining(interpreter, program); } } public double Evaluate(IPushInterpreter interpreter, PushProgram program, int exampleIndex) { var example = Data.Examples[exampleIndex]; interpreter.InitExample(example); interpreter.Run(program); switch (Data.ProblemType) { // special case ProblemType.NumberIO: return NumberIo(interpreter, example); case ProblemType.StringDifferences: return StringDifferences(interpreter, example); case ProblemType.EvenSquares: return EvenSquares(interpreter, example); case ProblemType.WallisPi: return WallisPi(interpreter, example); case ProblemType.VectorSummed: return VectorsSummed(interpreter, example); case ProblemType.XWordLines: return XWordLines(interpreter, example); case ProblemType.NegativeToZero: return NegativeToZero(interpreter, example); case ProblemType.WordStats: return WordStats(interpreter, example); case ProblemType.Checksum: return Checksum(interpreter, example); case ProblemType.Grades: return Grade(interpreter, example); case ProblemType.Syllables: return Syllables(interpreter, example); // printed string right/wrong case ProblemType.Smallest: case ProblemType.Median: return PrintedStringRight(interpreter, example); case ProblemType.MedianIntegerError: return MedianIntegerError(interpreter, example); // boolean error case ProblemType.MirrorImage: case ProblemType.SuperAnagrams: case ProblemType.CompareStringLengths: return GetDiff(example.OutputBoolean, interpreter.BooleanStack, Data.WorstResult, BooleanDiffer); // float error case ProblemType.VectorAverage: return GetDiff(example.OutputFloat, interpreter.FloatStack, Data.WorstResult, (a, b) => FloatDiffer(a, b, example.OutputFloatPrecision, Data.WorstResult)); // integer error case ProblemType.LastIndexOfZero: case ProblemType.CountOdds: case ProblemType.SumOfSquares: case ProblemType.ScrabbleScore: case ProblemType.CollatzNumbers: return GetDiff(example.OutputInteger, interpreter.IntegerStack, Data.WorstResult, IntegerDiffer); // levenshtein distance + integer error case ProblemType.ReplaceSpaceWithNewline: return GetPrintDiffer(example.OutputPrint, interpreter.PrintStack, Data.WorstResult) + GetDiff(example.OutputInteger, interpreter.IntegerStack, Data.WorstResult, IntegerDiffer); // printed levenshtein distance case ProblemType.ForLoopIndex: case ProblemType.DoubleLetters: case ProblemType.StringLengthsBackwards: case ProblemType.PigLatin: case ProblemType.Digits: case ProblemType.SmallOrLarge: return GetPrintDiffer(example.OutputPrint, interpreter.PrintStack, Data.WorstResult); } return Default(interpreter, example); } private double Default(IPushInterpreter interpreter, Example example) { var integerDiff = GetDiff(example.OutputInteger, interpreter.IntegerStack, Data.WorstResult, IntegerDiffer); var floatDiff = GetDiff(example.OutputFloat, interpreter.FloatStack, Data.WorstResult, (a, b) => FloatDiffer(a, b, example.OutputFloatPrecision, Data.WorstResult)); var booleanDiff = GetDiff(example.OutputBoolean, interpreter.BooleanStack, Data.WorstResult, BooleanDiffer); var stringDiff = GetDiff(example.OutputString, interpreter.StringStack, Data.WorstResult, StringDiffer); var charDiff = GetDiff(example.OutputChar, interpreter.CharStack, Data.WorstResult, CharDiffer); var printDiff = GetPrintDiffer(example.OutputPrint, interpreter.PrintStack, Data.WorstResult); var integerVectorDiff = GetVectorDiff(example.OutputIntegerVector, interpreter.IntegerVectorStack, Data.WorstResult, (a, b) => VectorDiffer(a, b, IntegerDiffer)); var floatVectorDiff = GetVectorDiff(example.OutputFloatVector, interpreter.FloatVectorStack, Data.WorstResult, (a, b) => VectorDiffer(a, b, (x, y) => FloatDiffer(x, y, example.OutputFloatVectorPrecision, Data.WorstResult))); var stringVectorDiff = GetVectorDiff(example.OutputStringVector, interpreter.StringVectorStack, Data.WorstResult, (a, b) => VectorDiffer(a, b, StringDiffer)); var result = integerDiff + floatDiff + booleanDiff + stringDiff + charDiff + printDiff + integerVectorDiff + floatVectorDiff + stringVectorDiff; return result; } private double PrintedStringRight(IPushInterpreter interpreter, Example example) { return interpreter.PrintStack.IsEmpty || !interpreter.PrintStack.ToString().Equals(example.OutputPrint) ? 1 : 0; } private double MedianIntegerError(IPushInterpreter interpreter, Example example) { var printResult = interpreter.PrintStack.ToString(); long result; if (long.TryParse(printResult, out result)) { var diff = result.AbsoluteDiff(example.OutputInteger[0]); return Math.Min(Data.WorstResult, diff); } return Data.WorstResult; } private double NumberIo(IPushInterpreter interpreter, Example example) { if (!string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) return Data.WorstResult; var printResult = interpreter.PrintStack.ToString(); double value; if (double.TryParse(printResult, NumberStyles.Number | NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out value)) { var diff = example.OutputFloat[0].AbsoluteDiff(value); diff = Math.Min(diff, Data.WorstResult); return diff; } var penaltyError = Data.WorstResult / 4.0d; var levenshteinDistance = GetPrintDiffer( example.OutputPrint, printResult, Data.WorstResult); var result = levenshteinDistance + penaltyError; return Math.Min(result, Data.WorstResult); } private double StringDifferences(IPushInterpreter interpreter, Example example) { if (string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) { return 0.0; } if (!string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) { return Data.WorstResult; } var printResult = interpreter.PrintStack.ToString(); var distance = example.OutputPrint.LevenshteinDistance(printResult); var lineCountWithCorrectFormat = interpreter.PrintStack .AsStrings() .Count(line => { var parts = line.Split(' '); if (parts.Length != 3) return false; var part0 = parts[0].Trim(); if (part0.Length == 0 || !part0.IsNumeric()) return false; var part1 = parts[1].Trim(); if (part1.Length != 1) return false; var part2 = parts[2].Trim(); if (part2.Length != 1) return false; return true; }); var lineCountWithInvalidFormat = example.OutputPrintLineCount - lineCountWithCorrectFormat; lineCountWithInvalidFormat = Math.Min(lineCountWithInvalidFormat, example.OutputPrintLineCount); var result = distance + lineCountWithInvalidFormat; return Math.Min(result, Data.WorstResult); } private double EvenSquares(IPushInterpreter interpreter, Example example) { if (string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) { return 0.0; } if (!string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) { return Data.WorstResult; } var printResult = interpreter.PrintStack.ToString(); var distance = example.OutputPrint.LevenshteinDistance(printResult); var printLines = interpreter.PrintStack.AsStrings().ToArray(); var lineCountWithInvalidFormat = printLines .Take(example.OutputPrintLineCount) .Count(line => !line.IsNumeric()); var compareLength = Math.Min(printLines.Length, example.OutputPrintLineCount); var integerError = 0L; for (var i = 0; i < compareLength; i++) { long value; if (long.TryParse(printLines[i], out value)) { integerError += value.AbsoluteDiff(example.OutputIntegerVector[0][i]); } } var result = distance + lineCountWithInvalidFormat + integerError / compareLength; return Math.Min(result, Data.WorstResult); } private double WallisPi(IPushInterpreter interpreter, Example example) { if (interpreter.FloatStack.IsEmpty) return Data.WorstResult; var floatDiff = GetDiff(example.OutputFloat, interpreter.FloatStack, Data.WorstResult, (a, b) => FloatDiffer(a, b, example.OutputFloatPrecision, Data.WorstResult)); var expectedStr = example.OutputFloat[0].ToString(Data.FloatStringFormat, CultureInfo.CurrentCulture); var resultStr = interpreter.FloatStack.TopOrDefault.ToString(Data.FloatStringFormat, CultureInfo.CurrentCulture); var distance = expectedStr.LevenshteinDistance(resultStr); var result = floatDiff + distance; return Math.Min(result, Data.WorstResult); } private double VectorsSummed(IPushInterpreter interpreter, Example example) { if (interpreter.IntegerVectorStack.IsEmpty && example.OutputIntegerVector.Length > 0) return Data.WorstResult; var resultVector = interpreter.IntegerVectorStack[0]; var expectedVector = example.OutputIntegerVector[0]; var comparableLength = Math.Min(resultVector.Count, expectedVector.Length); // add penalty if length does not match //var result = expectedVector.Length > 0 // ? (expectedVector.Length - comparableLength) * (Data.WorstResult / expectedVector.Length) // : 0; if (comparableLength == 0) { return Data.WorstResult; } var result = 0d; for (var i = 0; i < comparableLength; i++) { result += resultVector[i].AbsoluteDiff(expectedVector[i]); } for (var i = comparableLength; i < expectedVector.Length; i++) { result += expectedVector[i]; } return Math.Min(result, Data.WorstResult); } private double XWordLines(IPushInterpreter interpreter, Example example) { if (string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) { return 0.0; } if (!string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) return Data.WorstResult; var estimatedWordsPerLineCount = example.InputInteger[0]; var estimatedWordCount = example.OutputInteger[0]; var estimatedLastWordCount = estimatedWordCount - estimatedWordsPerLineCount * Math.Max(0, example.OutputPrintLineCount - 1); var printResult = interpreter.PrintStack.ToString(); var distance = example.OutputPrint.LevenshteinDistance(printResult); var lineCount = interpreter.PrintStack.Count; var lineCountError = example.OutputPrintLineCount.AbsoluteDiff(lineCount); var totalWordsPerLineError = 0L; for (var i = 0; i < interpreter.PrintStack.Lines.Count - 1; i++) { totalWordsPerLineError += interpreter.PrintStack.Lines[i].Split(' ').Length.AbsoluteDiff(estimatedWordsPerLineCount); } // last line var lastLine = interpreter.PrintStack.Lines[interpreter.PrintStack.Lines.Count - 1]; totalWordsPerLineError += lastLine.Split(' ').Length.AbsoluteDiff(estimatedLastWordCount); var result = distance + lineCountError + totalWordsPerLineError; return Math.Min(result, Data.WorstResult); } private double NegativeToZero(IPushInterpreter interpreter, Example example) { if (interpreter.IntegerVectorStack.IsEmpty && example.OutputIntegerVector.Length > 0) return Data.WorstResult; var resultVector = interpreter.IntegerVectorStack.Top; var expectedVector = example.OutputIntegerVector[0]; var comparableLength = Math.Min(resultVector.Count, expectedVector.Length); // add penalty if length does not match //var result = expectedVector.Length > 0 // ? (expectedVector.Length - comparableLength) * (Data.WorstResult / expectedVector.Length) // : 0; if (comparableLength == 0) { return Data.WorstResult; } var result = 0d; for (var i = 0; i < comparableLength; i++) { var expectedStr = expectedVector[i].ToString(); var resultStr = resultVector[i].ToString(); result += expectedStr.LevenshteinDistance(resultStr); } for (var i = comparableLength; i < expectedVector.Length; i++) { result += expectedVector[i].ToString().Length; } return Math.Min(result, Data.WorstResult); } private double WordStats(IPushInterpreter interpreter, Example example) { if (string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) { return 0.0; } if (!string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) return Data.WorstResult; var printStr = interpreter.PrintStack.ToString(); var distance = example.OutputPrint.LevenshteinDistance(printStr); var statsError = 0.0; long numberOfSentences; var expectedNumberOfSentences = example.OutputInteger[0]; statsError += BenchmarkSuite.Problems.WordStats.GetNumberOfSentences(printStr, out numberOfSentences) ? expectedNumberOfSentences.AbsoluteDiff(numberOfSentences) : Data.WorstResult / 3.0; double averageSentenceLength; var expectedAverageSentenceLength = example.OutputFloat[0]; statsError += BenchmarkSuite.Problems.WordStats.GetAverageSentenceLength(printStr, out averageSentenceLength) ? expectedAverageSentenceLength.AbsoluteDiff(averageSentenceLength) : Data.WorstResult / 3.0; var result = distance + statsError; return Math.Min(result, Data.WorstResult); } private double Checksum(IPushInterpreter interpreter, Example example) { if (!string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) return Data.WorstResult; var printStr = interpreter.PrintStack.ToString(); var distance = example.OutputPrint.LevenshteinDistance(printStr); var expectedLastCharIndex = example.OutputPrint.Length - 1; var resultLastCharIndex = printStr.Length - 1; var result = distance; if (expectedLastCharIndex >= 0 && resultLastCharIndex >= 0) { var expectedLastChar = example.OutputPrint[expectedLastCharIndex]; var resultLastChar = printStr[resultLastCharIndex]; result += expectedLastChar.AbsoluteDiff(resultLastChar); } return Math.Min(result, Data.WorstResult); } private double Syllables(IPushInterpreter interpreter, Example example) { if (!string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) return Data.WorstResult; var printStr = interpreter.PrintStack.ToString(); var distance = example.OutputPrint.LevenshteinDistance(printStr); long numberOfSyllables; var expectedNumberOfSyllables = example.OutputInteger[0]; var numberOfSyllablesError = BenchmarkSuite.Problems.Syllables.GetNumberOfSyllables(printStr, out numberOfSyllables) ? expectedNumberOfSyllables.AbsoluteDiff(numberOfSyllables) : Data.WorstResult / 2.0; var result = distance + numberOfSyllablesError; return Math.Min(result, Data.WorstResult); } private double Grade(IPushInterpreter interpreter, Example example) { if (!string.IsNullOrEmpty(example.OutputPrint) && interpreter.PrintStack.IsEmpty) return Data.WorstResult; var printStr = interpreter.PrintStack.ToString(); var distance = example.OutputPrint.LevenshteinDistance(printStr); char grade; var expectedGrade = example.OutputChar[0]; var gradeError = BenchmarkSuite.Problems.Grades.GetGrade(printStr, out grade) ? expectedGrade.AbsoluteDiff(grade) : char.MaxValue; var result = distance + gradeError; return Math.Min(result, Data.WorstResult); } private static double FloatDiffer(double a, double b, int digits, double worst) { var result = Math.Round(a, digits) - Math.Round(b, digits); // ReSharper disable once CompareOfFloatsByEqualityOperator if (result == double.MinValue || double.IsPositiveInfinity(result) || double.IsNaN(result) || // ReSharper disable once CompareOfFloatsByEqualityOperator result == double.MaxValue || double.IsNegativeInfinity(result)) return worst; return Math.Abs(result); } private static double IntegerDiffer(long a, long b) { var result = a - b; return result == long.MinValue ? long.MaxValue : Math.Abs(result); } private static double BooleanDiffer(bool a, bool b) { return a == b ? 0 : 1; } private static double StringDiffer(string a, string b) { return a.LevenshteinDistance(b); } private static double CharDiffer(char a, char b) { return IntegerDiffer(a, b); } private static double VectorDiffer(IReadOnlyList estimated, IReadOnlyList outcome, Func differ) { var length = Math.Min(estimated.Count, outcome.Count); var result = 0d; for (var i = 0; i < length; i++) { result += differ(estimated[i], outcome[i]); } if (estimated.Count > outcome.Count) { var remainingLength = estimated.Count - length; for (var i = 0; i < remainingLength; i++) result += differ(estimated[i], default(T)); } return result; } private static double GetPrintDiffer(string estimated, string printResult, double worstResult) { var distance = estimated.LevenshteinDistance(printResult); return Math.Min(distance, worstResult); } private static double GetPrintDiffer(string estimated, IPrintStack printStack, double worstResult) { var printResult = printStack.ToString(); var distance = estimated.LevenshteinDistance(printResult); return Math.Min(distance, worstResult); } private static double GetVectorDiff(IReadOnlyList> estimated, IPushStack> resultStack, double worstResult, Func, IReadOnlyList, double> differ) where T : IComparable { if (estimated.Count == 0) return 0d; if (resultStack.IsEmpty && estimated.Count > 0) return worstResult; var diff = 0d; var comparableLength = Math.Min(estimated.Count, resultStack.Count); if (!resultStack.IsEmpty) { for (var i = 0; i < comparableLength && diff < worstResult; i++) { diff += differ(estimated[i], resultStack[i]); } } var emptyTArray = new T[0]; for (var i = comparableLength; i < estimated.Count - comparableLength && diff < worstResult; i++) { diff += differ(estimated[i], emptyTArray); } return double.IsNaN(diff) || double.IsInfinity(diff) ? worstResult : Math.Min(diff, worstResult); } private static double GetDiff(IReadOnlyList estimated, IPushStack resultStack, double worstResult, Func differ) where T : IComparable { if (estimated.Count == 0) return 0d; if (resultStack.IsEmpty && estimated.Count > 0) return worstResult; var diff = 0d; var comparableLength = Math.Min(estimated.Count, resultStack.Count); if (!resultStack.IsEmpty) { for (var i = 0; i < comparableLength && diff < worstResult; i++) { diff += differ(estimated[i], resultStack[i]); } } for (var i = comparableLength; i < estimated.Count - comparableLength && diff < worstResult; i++) { diff += differ(estimated[i], default(T)); } return double.IsNaN(diff) || double.IsInfinity(diff) ? worstResult : Math.Min(diff, worstResult); } } }