#region License Information /* HeuristicLab * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Linq; using alglib; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Operators; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using System.Collections.Generic; using HeuristicLab.Problems.DataAnalysis.Evaluators; using HeuristicLab.Analysis; namespace HeuristicLab.Problems.DataAnalysis.Operators { [Item("Covariant Parsimony Pressure", "Covariant Parsimony Pressure.")] [StorableClass] public class CovariantParsimonyPressure : SingleSuccessorOperator { public IScopeTreeLookupParameter SymbolicExpressionTreeParameter { get { return (IScopeTreeLookupParameter)Parameters["SymbolicExpressionTree"]; } } public IScopeTreeLookupParameter QualityParameter { get { return (IScopeTreeLookupParameter)Parameters["Quality"]; } } public IScopeTreeLookupParameter AdjustedQualityParameter { get { return (IScopeTreeLookupParameter)Parameters["AdjustedQuality"]; } } public ILookupParameter MaximizationParameter { get { return (ILookupParameter)Parameters["Maximization"]; } } public IValueLookupParameter KParameter { get { return (IValueLookupParameter)Parameters["K"]; } } public ILookupParameter GenerationsParameter { get { return (ILookupParameter)Parameters["Generations"]; } } public IValueLookupParameter FirstGenerationParameter { get { return (IValueLookupParameter)Parameters["FirstGenerationParameter"]; } } public IValueLookupParameter AntiOverfitParameter { get { return (IValueLookupParameter)Parameters["AntiOverfit"]; } } public ILookupParameter ValidationQualityParameter { get { return (ILookupParameter)Parameters["Validation Quality"]; } } public ILookupParameter CurrentBestValidationQualityParameter { get { return (ILookupParameter)Parameters["Current best validation quality"]; } } public ILookupParameter BestValidationQualityParameter { get { return (ILookupParameter)Parameters["Best solution quality (validation)"]; } } public ILookupParameter LengthCorrelationParameter { get { return (ILookupParameter)Parameters["Correlation(Length, AdjustedFitness)"]; } } public ILookupParameter FitnessCorrelationParameter { get { return (ILookupParameter)Parameters["Correlation(Fitness, AdjustedFitness)"]; } } public IValueLookupParameter GenerationSpanParameter { get { return (IValueLookupParameter)Parameters["GenerationSpan"]; } } public IValueLookupParameter OverfittingLimitParameter { get { return (IValueLookupParameter)Parameters["OverfittingLimit"]; } } public IValueLookupParameter ComplexityAdaptionParameter { get { return (IValueLookupParameter)Parameters["ComplexityAdaption"]; } } public ILookupParameter QualitiesParameter { get { return (ILookupParameter)Parameters["Qualities"]; } } public CovariantParsimonyPressure(bool deserializing) : base(deserializing) { } public CovariantParsimonyPressure() : base() { Parameters.Add(new ScopeTreeLookupParameter("SymbolicExpressionTree")); Parameters.Add(new ScopeTreeLookupParameter("Quality")); Parameters.Add(new ScopeTreeLookupParameter("AdjustedQuality")); Parameters.Add(new LookupParameter("Maximization")); Parameters.Add(new ValueLookupParameter("K", new DoubleValue(1.0))); Parameters.Add(new LookupParameter("Generations")); Parameters.Add(new ValueLookupParameter("FirstGenerationParameter", new IntValue(5))); Parameters.Add(new ValueLookupParameter("AntiOverfit", new BoolValue(false))); //Parameters.Add(new LookupParameter("Current best validation quality")); //Parameters.Add(new LookupParameter("Best solution quality (validation)")); Parameters.Add(new LookupParameter("Validation Quality")); Parameters.Add(new LookupParameter("Qualities")); Parameters.Add(new ValueLookupParameter("GenerationSpan", new IntValue(5))); Parameters.Add(new ValueLookupParameter("OverfittingLimit", new PercentValue(5))); Parameters.Add(new ValueLookupParameter("ComplexityAdaption", new PercentValue(-5))); Parameters.Add(new LookupParameter("Correlation(Length, AdjustedFitness)")); Parameters.Add(new LookupParameter("Correlation(Fitness, AdjustedFitness)")); } [StorableHook(Persistence.Default.CompositeSerializers.Storable.HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey("Maximization")) Parameters.Add(new LookupParameter("Maximization")); if (!Parameters.ContainsKey("K")) Parameters.Add(new ValueLookupParameter("K", new DoubleValue(1.0))); if (!Parameters.ContainsKey("AdjustedQuality")) { Parameters.Add(new ScopeTreeLookupParameter("AdjustedQuality")); } if (!Parameters.ContainsKey("Generations")) { Parameters.Add(new LookupParameter("Generations")); } if (!Parameters.ContainsKey("FirstGenerationParameter")) { Parameters.Add(new ValueLookupParameter("FirstGenerationParameter", new IntValue(5))); } if (!Parameters.ContainsKey("AntiOverfit")) { Parameters.Add(new ValueLookupParameter("AntiOverfit", new BoolValue(false))); } //if (!Parameters.ContainsKey("Current best validation quality")) { // Parameters.Add(new LookupParameter("Current best validation quality")); //} //if (!Parameters.ContainsKey("Best solution quality (validation)")) { // Parameters.Add(new LookupParameter("Best solution quality (validation)")); //} if (!Parameters.ContainsKey("Correlation(Length, AdjustedFitness)")) { Parameters.Add(new LookupParameter("Correlation(Length, AdjustedFitness)")); } if (!Parameters.ContainsKey("Correlation(Fitness, AdjustedFitness)")) { Parameters.Add(new LookupParameter("Correlation(Fitness, AdjustedFitness)")); } if (!Parameters.ContainsKey("Validation Quality")) { Parameters.Add(new LookupParameter("Validation Quality")); } if (!Parameters.ContainsKey("Qualities")) { Parameters.Add(new LookupParameter("Qualities")); } if (!Parameters.ContainsKey("GenerationSpan")) { Parameters.Add(new ValueLookupParameter("GenerationSpan", new IntValue(5))); } if (!Parameters.ContainsKey("OverfittingLimit")) { Parameters.Add(new ValueLookupParameter("OverfittingLimit", new PercentValue(5))); } if (!Parameters.ContainsKey("ComplexityAdaption")) { Parameters.Add(new ValueLookupParameter("ComplexityAdaption", new PercentValue(-5))); } } public override IOperation Apply() { ItemArray trees = SymbolicExpressionTreeParameter.ActualValue; ItemArray qualities = QualityParameter.ActualValue; // always apply Parsimony pressure if anti-overfit is false // otherwise appliy PP only when we are currently overfitting if (GenerationsParameter.ActualValue != null && GenerationsParameter.ActualValue.Value >= FirstGenerationParameter.ActualValue.Value && (AntiOverfitParameter.ActualValue.Value == false || IsOverfitting())) { var lengths = from tree in trees select tree.Size; double k = KParameter.ActualValue.Value; // calculate cov(f, l) and cov(l, l^k) OnlineCovarianceEvaluator lengthFitnessCovEvaluator = new OnlineCovarianceEvaluator(); OnlineCovarianceEvaluator lengthAdjLengthCovEvaluator = new OnlineCovarianceEvaluator(); OnlineMeanAndVarianceCalculator lengthMeanCalculator = new OnlineMeanAndVarianceCalculator(); OnlineMeanAndVarianceCalculator fitnessMeanCalculator = new OnlineMeanAndVarianceCalculator(); OnlineMeanAndVarianceCalculator adjLengthMeanCalculator = new OnlineMeanAndVarianceCalculator(); var lengthEnumerator = lengths.GetEnumerator(); var qualityEnumerator = qualities.GetEnumerator(); while (lengthEnumerator.MoveNext() & qualityEnumerator.MoveNext()) { double fitness = qualityEnumerator.Current.Value; if (!MaximizationParameter.ActualValue.Value) { // use f = 1 / (1 + quality) for minimization problems fitness = 1.0 / (1.0 + fitness); } lengthFitnessCovEvaluator.Add(lengthEnumerator.Current, fitness); lengthAdjLengthCovEvaluator.Add(lengthEnumerator.Current, Math.Pow(lengthEnumerator.Current, k)); lengthMeanCalculator.Add(lengthEnumerator.Current); fitnessMeanCalculator.Add(fitness); adjLengthMeanCalculator.Add(Math.Pow(lengthEnumerator.Current, k)); } double sizeAdaption = lengthMeanCalculator.Mean * ComplexityAdaptionParameter.ActualValue.Value; if (sizeAdaption < 0) sizeAdaption = Math.Floor(sizeAdaption); else sizeAdaption = Math.Ceiling(sizeAdaption); double g = lengthMeanCalculator.Mean + sizeAdaption; // cov(l, f) - (g(t+1) - mu(t)) avgF // c(t) = -------------------------------------------- // cov(l, l^k) - (g(t+1) - mu(t)) E[l^k] double c = lengthFitnessCovEvaluator.Covariance - (g - lengthMeanCalculator.Mean) * fitnessMeanCalculator.Mean; c /= lengthAdjLengthCovEvaluator.Covariance - (g - lengthMeanCalculator.Mean) * adjLengthMeanCalculator.Mean; // adjust fitness bool maximization = MaximizationParameter.ActualValue.Value; lengthEnumerator = lengths.GetEnumerator(); qualityEnumerator = qualities.GetEnumerator(); int i = 0; ItemArray adjQualities = new ItemArray(qualities.Length); while (lengthEnumerator.MoveNext() & qualityEnumerator.MoveNext()) { adjQualities[i++] = new DoubleValue(qualityEnumerator.Current.Value - c * Math.Pow(lengthEnumerator.Current, k)); } AdjustedQualityParameter.ActualValue = adjQualities; double[] lengthArr = lengths.Select(x => (double)x).ToArray(); double[] adjFitess = (from f in AdjustedQualityParameter.ActualValue select f.Value).ToArray(); double[] fitnessArr = (from f in QualityParameter.ActualValue let normFit = maximization ? f.Value : 1.0 / (1.0 + f.Value) select normFit).ToArray(); LengthCorrelationParameter.ActualValue = new DoubleValue(alglib.correlation.spearmanrankcorrelation(lengthArr, adjFitess, lengthArr.Length)); FitnessCorrelationParameter.ActualValue = new DoubleValue(alglib.correlation.spearmanrankcorrelation(fitnessArr, adjFitess, lengthArr.Length)); } else { // adjusted fitness is equal to fitness AdjustedQualityParameter.ActualValue = (ItemArray)QualityParameter.ActualValue.Clone(); FitnessCorrelationParameter.ActualValue = new DoubleValue(1.0); double[] lengths = (from tree in trees select (double)tree.Size).ToArray(); double[] fitess = (from f in AdjustedQualityParameter.ActualValue select f.Value).ToArray(); LengthCorrelationParameter.ActualValue = new DoubleValue(alglib.correlation.spearmanrankcorrelation(lengths, fitess, lengths.Length)); } return base.Apply(); } private bool IsOverfitting() { bool maximization = MaximizationParameter.ActualValue.Value; DataTable trainingQualities = QualitiesParameter.ActualValue; DataTable validationQualities = ValidationQualityParameter.ActualValue; int genSpan = GenerationSpanParameter.ActualValue.Value; if (validationQualities == null || trainingQualities == null) return false; if (validationQualities.Rows["Best solution quality (validation)"].Values.Count < genSpan) return false; IEnumerable bestTrainingQualities = trainingQualities.Rows["CurrentBestQuality"].Values; IEnumerable bestValidationQualities = validationQualities.Rows["Current best validation quality"].Values; double trainingAvg = bestTrainingQualities.Reverse().Take(genSpan).Average(); double validationAvg = bestValidationQualities.Reverse().Take(genSpan).Average(); double maxPercentDiff = OverfittingLimitParameter.ActualValue.Value; double percentDiff = maximization ? trainingAvg / validationAvg - 1 : validationAvg / trainingAvg - 1; return percentDiff > maxPercentDiff; } } }