#region License Information /* HeuristicLab * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System.Linq; using HeuristicLab.Analysis; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis.Symbolic; namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers { [Item("BestSymbolicRegressionSolutionAnalyzer", "An operator for analyzing the best solution of symbolic regression problems given in symbolic expression tree encoding.")] [StorableClass] public sealed class BestSymbolicRegressionSolutionAnalyzer : RegressionSolutionAnalyzer, ISymbolicRegressionAnalyzer { private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; private const string BestSolutionInputvariableCountResultName = "Variables used by best solution"; private const string VariableFrequenciesParameterName = "VariableFrequencies"; private const string VariableImpactsResultName = "Integrated variable frequencies"; private const string BestSolutionParameterName = "BestSolution"; private const string BestSolutionComplexity = "Best solution complexity"; #region parameter properties public ScopeTreeLookupParameter SymbolicExpressionTreeParameter { get { return (ScopeTreeLookupParameter)Parameters[SymbolicExpressionTreeParameterName]; } } public IValueLookupParameter SymbolicExpressionTreeInterpreterParameter { get { return (IValueLookupParameter)Parameters[SymbolicExpressionTreeInterpreterParameterName]; } } public ILookupParameter BestSolutionParameter { get { return (ILookupParameter)Parameters[BestSolutionParameterName]; } } public ILookupParameter VariableFrequenciesParameter { get { return (ILookupParameter)Parameters[VariableFrequenciesParameterName]; } } #endregion #region properties public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter { get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; } } public ItemArray SymbolicExpressionTree { get { return SymbolicExpressionTreeParameter.ActualValue; } } public DataTable VariableFrequencies { get { return VariableFrequenciesParameter.ActualValue; } } #endregion public BestSymbolicRegressionSolutionAnalyzer() : base() { Parameters.Add(new ScopeTreeLookupParameter(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze.")); Parameters.Add(new ValueLookupParameter(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees.")); Parameters.Add(new LookupParameter(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts")); Parameters.Add(new LookupParameter(BestSolutionParameterName, "The best symbolic regression solution.")); } [StorableHook(HookType.AfterDeserialization)] private void Initialize() { if (!Parameters.ContainsKey(VariableFrequenciesParameterName)) { Parameters.Add(new LookupParameter(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts")); } } protected override DataAnalysisSolution UpdateBestSolution() { double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity; double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity; int i = Quality.Select((x, index) => new { index, x.Value }).OrderBy(x => x.Value).First().index; if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value > Quality[i].Value) { var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(), SymbolicExpressionTree[i]); var solution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(), model, lowerEstimationLimit, upperEstimationLimit); solution.Name = BestSolutionParameterName; solution.Description = "Best solution on validation partition found over the whole run."; BestSolutionParameter.ActualValue = solution; BestSolutionQualityParameter.ActualValue = Quality[i]; BestSymbolicRegressionSolutionAnalyzer.UpdateSymbolicRegressionBestSolutionResults(solution, ProblemData, Results, VariableFrequencies); } return BestSolutionParameter.ActualValue; } public static void UpdateBestSolutionResults(SymbolicRegressionSolution bestSolution, DataAnalysisProblemData problemData, ResultCollection results, IntValue currentGeneration, DataTable variableFrequencies) { RegressionSolutionAnalyzer.UpdateBestSolutionResults(bestSolution, problemData, results, currentGeneration); UpdateSymbolicRegressionBestSolutionResults(bestSolution, problemData, results, variableFrequencies); } private static void UpdateSymbolicRegressionBestSolutionResults(SymbolicRegressionSolution bestSolution, DataAnalysisProblemData problemData, ResultCollection results, DataTable variableFrequencies) { if (results.ContainsKey(BestSolutionInputvariableCountResultName)) { results[BestSolutionInputvariableCountResultName].Value = new IntValue(bestSolution.Model.InputVariables.Count()); results[VariableImpactsResultName].Value = CalculateVariableImpacts(variableFrequencies); var sizeTable = (DataTable)results[BestSolutionComplexity].Value; sizeTable.Rows["Best solution size"].Values.Add(bestSolution.Model.SymbolicExpressionTree.Size); sizeTable.Rows["Best solution height"].Values.Add(bestSolution.Model.SymbolicExpressionTree.Height); sizeTable.Rows["Best solution variables"].Values.Add(bestSolution.Model.InputVariables.Count()); } else { results.Add(new Result(BestSolutionInputvariableCountResultName, new IntValue(bestSolution.Model.InputVariables.Count()))); results.Add(new Result(VariableImpactsResultName, CalculateVariableImpacts(variableFrequencies))); var sizeTable = new DataTable("Best solution complexity"); sizeTable.Rows.Add(new DataRow("Best solution size")); sizeTable.Rows.Add(new DataRow("Best solution height")); sizeTable.Rows.Add(new DataRow("Best solution variables")); sizeTable.Rows["Best solution size"].Values.Add(bestSolution.Model.SymbolicExpressionTree.Size); sizeTable.Rows["Best solution height"].Values.Add(bestSolution.Model.SymbolicExpressionTree.Height); sizeTable.Rows["Best solution variables"].Values.Add(bestSolution.Model.InputVariables.Count()); results.Add(new Result(BestSolutionComplexity, sizeTable)); } } private static DoubleMatrix CalculateVariableImpacts(DataTable variableFrequencies) { if (variableFrequencies != null) { var impacts = new DoubleMatrix(variableFrequencies.Rows.Count, 1, new string[] { "Impact" }, variableFrequencies.Rows.Select(x => x.Name)); impacts.SortableView = true; int rowIndex = 0; foreach (var dataRow in variableFrequencies.Rows) { string variableName = dataRow.Name; impacts[rowIndex++, 0] = dataRow.Values.Average(); } return impacts; } else return new DoubleMatrix(1, 1); } } }