Changeset 16088


Ignore:
Timestamp:
08/27/18 19:03:10 (12 months ago)
Author:
lkammere
Message:

#2886: Store pareto-optimal sentences (quality/complexity) to grammar enumeration.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/BestSolutionAnalyzer.cs

    r16053 r16088  
    2222using System;
    2323using System.Diagnostics;
     24using System.Linq;
    2425using HeuristicLab.Analysis;
    2526using HeuristicLab.Common;
     
    2728using HeuristicLab.Data;
    2829using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     30using HeuristicLab.Optimization;
    2931using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3032using HeuristicLab.Problems.DataAnalysis.Symbolic;
     
    4143    public static readonly string BestComplexityResultName = "Best solution complexity";
    4244    public static readonly string BestSolutions = "Best solutions";
     45    public static readonly string ParetoFrontResultName = "Pareto Front";
     46    public static readonly string ParetoFrontAnalysisResultName = "Pareto Front Analysis";
    4347
    4448    private static readonly ISymbolicDataAnalysisExpressionTreeInterpreter expressionTreeLinearInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
     
    6973
    7074      var results = algorithm.Results;
    71       var grammar = algorithm.Grammar;
    7275      var problemData = algorithm.Problem.ProblemData;
    7376
     
    7679
    7780      double r2 = algorithm.Evaluator.Evaluate(problemData, tree);
    78       double bestR2 = results.ContainsKey(BestTrainingQualityResultName) ? GetValue<double>(results[BestTrainingQualityResultName].Value) : 0.0;
    79       if (r2 < bestR2)
    80         return;
     81      int rank = GetRank(sentence);
    8182
    82       var bestComplexity = results.ContainsKey(BestComplexityResultName) ? GetValue<int>(results[BestComplexityResultName].Value) : int.MaxValue;
    83       var complexity = sentence.Complexity;
    84 
    85       if (algorithm.BestTrainingSentence == null || r2 > bestR2 || (r2.IsAlmost(bestR2) && complexity < bestComplexity)) {
    86         algorithm.BestTrainingSentence = sentence;
    87 
     83      // Store solution in pareto front
     84      if (IsParetoOptimal(algorithm, rank, r2)) {
    8885        var model = new SymbolicRegressionModel(problemData.TargetVariable, tree, expressionTreeLinearInterpreter);
    8986        model.Scale(problemData);
    9087        var bestSolution = model.CreateRegressionSolution(problemData);
    9188
    92         results.AddOrUpdateResult(BestTrainingQualityResultName, new DoubleValue(bestSolution.TrainingRSquared));
    93         results.AddOrUpdateResult(BestTestQualityResultName, new DoubleValue(bestSolution.TestRSquared));
    94         results.AddOrUpdateResult(BestTrainingModelResultName, bestSolution.Model);
    95         results.AddOrUpdateResult(BestTrainingSolutionResultName, bestSolution);
    96         results.AddOrUpdateResult(BestComplexityResultName, new IntValue(complexity));
     89        AddToParetoFront(algorithm, rank, r2, bestSolution);
    9790
    98         // record best sentence quality & length
    99         DataTable dt;
    100         if (!results.ContainsKey(BestSolutions)) {
    101           var names = new[] { "Quality", "Length", "Complexity", "Timestamp" };
    102           dt = new DataTable();
    103           foreach (var name in names) {
    104             dt.Rows.Add(new DataRow(name) { VisualProperties = { StartIndexZero = true } });
     91        // Store overall best solution
     92        double bestR2 = results.ContainsKey(BestTrainingQualityResultName)
     93          ? GetValue<double>(results[BestTrainingQualityResultName].Value)
     94          : 0.0;
     95        var bestComplexity = results.ContainsKey(BestComplexityResultName) ? GetValue<int>(results[BestComplexityResultName].Value) : int.MaxValue;
     96        var complexity = sentence.Complexity;
     97
     98        if (algorithm.BestTrainingSentence == null || r2 > bestR2 || (r2.IsAlmost(bestR2) && complexity < bestComplexity)) {
     99          algorithm.BestTrainingSentence = sentence;
     100
     101          results.AddOrUpdateResult(BestTrainingQualityResultName, new DoubleValue(bestSolution.TrainingRSquared));
     102          results.AddOrUpdateResult(BestTestQualityResultName, new DoubleValue(bestSolution.TestRSquared));
     103          results.AddOrUpdateResult(BestTrainingModelResultName, bestSolution.Model);
     104          results.AddOrUpdateResult(BestTrainingSolutionResultName, bestSolution);
     105          results.AddOrUpdateResult(BestComplexityResultName, new IntValue(complexity));
     106
     107          // record best sentence quality & length
     108          DataTable dt;
     109          if (!results.ContainsKey(BestSolutions)) {
     110            var names = new[] { "Quality", "Length", "Complexity", "Timestamp" };
     111            dt = new DataTable();
     112            foreach (var name in names) {
     113              dt.Rows.Add(new DataRow(name) { VisualProperties = { StartIndexZero = true } });
     114            }
     115            results.AddOrUpdateResult(BestSolutions, dt);
    105116          }
    106           results.AddOrUpdateResult(BestSolutions, dt);
     117          dt = (DataTable)results[BestSolutions].Value;
     118          dt.Rows["Quality"].Values.Add(r2);
     119          dt.Rows["Length"].Values.Add((double)sentence.Count);
     120          dt.Rows["Complexity"].Values.Add(complexity);
     121          dt.Rows["Timestamp"].Values.Add(algorithm.ExecutionTime.TotalMilliseconds / 1000d);
    107122        }
    108         dt = (DataTable)results[BestSolutions].Value;
    109         dt.Rows["Quality"].Values.Add(r2);
    110         dt.Rows["Length"].Values.Add((double)sentence.Count);
    111         dt.Rows["Complexity"].Values.Add(complexity);
    112         dt.Rows["Timestamp"].Values.Add(algorithm.ExecutionTime.TotalMilliseconds / 1000d);
    113123      }
    114124
     
    125135      return v.Value;
    126136    }
     137
     138    private int GetRank(SymbolList s) {
     139      return s.Complexity;
     140    }
     141
     142    private bool IsParetoOptimal(GrammarEnumerationAlgorithm algorithm, int currRank, double currQuality) {
     143      if (!algorithm.Results.ContainsKey(ParetoFrontResultName)) return true;
     144
     145      ItemList<DoubleArray> paretoFront = (ItemList<DoubleArray>)algorithm.Results[ParetoFrontResultName].Value;
     146
     147      DoubleArray precedingRank = paretoFront.OrderByDescending(v => v[0]).FirstOrDefault(v => v[0] <= currRank);
     148
     149      return precedingRank == null || currQuality > precedingRank[1];
     150    }
     151
     152    private void AddToParetoFront(GrammarEnumerationAlgorithm algorithm, int currRank, double currQuality, ISymbolicRegressionSolution solution) {
     153      if (!algorithm.Results.ContainsKey(ParetoFrontResultName)) {
     154        algorithm.Results.Add(new Result(ParetoFrontResultName, new ItemList<DoubleArray>()));
     155
     156        var scatterPlot = new ScatterPlot(ParetoFrontAnalysisResultName, ParetoFrontAnalysisResultName);
     157        algorithm.Results.Add(new Result(ParetoFrontAnalysisResultName, scatterPlot));
     158        scatterPlot.Rows.Add(new ScatterPlotDataRow());
     159
     160        scatterPlot.VisualProperties.XAxisTitle = "Complexity";
     161        scatterPlot.VisualProperties.YAxisTitle = "R²";
     162        scatterPlot.Rows.First().VisualProperties.PointSize = 10;
     163      }
     164
     165      ItemList<DoubleArray> paretoFront = (ItemList<DoubleArray>)algorithm.Results[ParetoFrontResultName].Value;
     166      ScatterPlotDataRow plot = ((ScatterPlot)algorithm.Results[ParetoFrontAnalysisResultName].Value).Rows.First();
     167
     168
     169      // Delete solutions with higher rank, which are now dominated.
     170      foreach (DoubleArray succeedingRank in paretoFront.Where(k => k[0] >= currRank).ToArray()) {
     171        if (succeedingRank[1] < currQuality) {
     172          paretoFront.Remove(succeedingRank);
     173          RemovePoint(plot, succeedingRank[0]);
     174        }
     175      }
     176
     177      paretoFront.Add(new DoubleArray(new double[] { currRank, currQuality }));
     178      plot.Points.Add(new Point2D<double>(currRank, currQuality, solution));
     179    }
     180
     181    private void RemovePoint(ScatterPlotDataRow plot, double rank) {
     182      plot.Points.RemoveAll(p => p.X.IsAlmost(rank));
     183    }
    127184  }
    128185}
Note: See TracChangeset for help on using the changeset viewer.