Changeset 4255
- Timestamp:
- 08/18/10 19:46:02 (14 years ago)
- Location:
- branches/DataAnalysis
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs
r4244 r4255 146 146 get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; } 147 147 } 148 public ILookupParameter<DataTable> BestSolutionQualityValuesParameter { 149 get { return (ILookupParameter<DataTable>)Parameters[BestSolutionQualityValuesParameterName]; } 150 } 148 151 public ILookupParameter<ResultCollection> ResultsParameter { 149 152 get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; } … … 230 233 Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set.")); 231 234 Parameters.Add(new LookupParameter<DoubleValue>(CurrentBestValidationQualityParameterName, "The quality of the best solution (on the validation set) of the current generation.")); 235 Parameters.Add(new LookupParameter<DataTable>(BestSolutionQualityValuesParameterName)); 232 236 Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts")); 233 237 } … … 244 248 if (!Parameters.ContainsKey(MaximizationParameterName)) { 245 249 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 250 } 251 if (!Parameters.ContainsKey(BestSolutionQualityValuesParameterName)) { 252 Parameters.Add(new LookupParameter<DataTable>(BestSolutionQualityValuesParameterName)); 246 253 } 247 254 #endregion … … 323 330 AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName); 324 331 AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName); 332 333 BestSolutionQualityValuesParameter.ActualValue = validationValues; 334 325 335 return base.Apply(); 326 336 } -
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLab.Problems.DataAnalysis-3.3.csproj
r4233 r4255 182 182 </ItemGroup> 183 183 <ItemGroup> 184 <ProjectReference Include="..\..\HeuristicLab.Analysis\3.3\HeuristicLab.Analysis-3.3.csproj"> 185 <Project>{887425B4-4348-49ED-A457-B7D2C26DDBF9}</Project> 186 <Name>HeuristicLab.Analysis-3.3</Name> 187 </ProjectReference> 184 188 <ProjectReference Include="..\..\HeuristicLab.Collections\3.3\HeuristicLab.Collections-3.3.csproj"> 185 189 <Project>{958B43BC-CC5C-4FA2-8628-2B3B01D890B6}</Project> -
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/Operators/CovariantParsimonyPressure.cs
r4233 r4255 32 32 using System.Collections.Generic; 33 33 using HeuristicLab.Problems.DataAnalysis.Evaluators; 34 using HeuristicLab.Analysis; 34 35 35 36 namespace HeuristicLab.Problems.DataAnalysis.Operators { … … 43 44 get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["Quality"]; } 44 45 } 46 public IScopeTreeLookupParameter<DoubleValue> AdjustedQualityParameter { 47 get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["AdjustedQuality"]; } 48 } 49 45 50 public ILookupParameter<BoolValue> MaximizationParameter { 46 51 get { return (ILookupParameter<BoolValue>)Parameters["Maximization"]; } … … 49 54 get { return (IValueLookupParameter<DoubleValue>)Parameters["K"]; } 50 55 } 51 56 public ILookupParameter<IntValue> GenerationsParameter { 57 get { return (ILookupParameter<IntValue>)Parameters["Generations"]; } 58 } 59 public IValueLookupParameter<IntValue> FirstGenerationParameter { 60 get { return (IValueLookupParameter<IntValue>)Parameters["FirstGenerationParameter"]; } 61 } 62 public IValueLookupParameter<BoolValue> AntiOverfitParameter { 63 get { return (IValueLookupParameter<BoolValue>)Parameters["AntiOverfit"]; } 64 } 65 public ILookupParameter<DataTable> ValidationQualityParameter { 66 get { return (ILookupParameter<DataTable>)Parameters["Validation Quality"]; } 67 } 68 public ILookupParameter<DoubleValue> CurrentBestValidationQualityParameter { 69 get { return (ILookupParameter<DoubleValue>)Parameters["Current best validation quality"]; } 70 } 71 public ILookupParameter<DoubleValue> BestValidationQualityParameter { 72 get { return (ILookupParameter<DoubleValue>)Parameters["Best solution quality (validation)"]; } 73 } 74 public ILookupParameter<DoubleValue> LengthCorrelationParameter { 75 get { return (ILookupParameter<DoubleValue>)Parameters["Correlation(Length, AdjustedFitness)"]; } 76 } 77 public ILookupParameter<DoubleValue> FitnessCorrelationParameter { 78 get { return (ILookupParameter<DoubleValue>)Parameters["Correlation(Fitness, AdjustedFitness)"]; } 79 } 80 public IValueLookupParameter<IntValue> GenerationSpanParameter { 81 get { return (IValueLookupParameter<IntValue>)Parameters["GenerationSpan"]; } 82 } 83 public IValueLookupParameter<PercentValue> OverfittingLimitParameter { 84 get { return (IValueLookupParameter<PercentValue>)Parameters["OverfittingLimit"]; } 85 } 86 public IValueLookupParameter<PercentValue> ComplexityAdaptionParameter { 87 get { return (IValueLookupParameter<PercentValue>)Parameters["ComplexityAdaption"]; } 88 } 89 public ILookupParameter<DataTable> QualitiesParameter { 90 get { return (ILookupParameter<DataTable>)Parameters["Qualities"]; } 91 } 52 92 53 93 public CovariantParsimonyPressure(bool deserializing) : base(deserializing) { } … … 56 96 Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>("SymbolicExpressionTree")); 57 97 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality")); 98 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("AdjustedQuality")); 58 99 Parameters.Add(new LookupParameter<BoolValue>("Maximization")); 59 100 Parameters.Add(new ValueLookupParameter<DoubleValue>("K", new DoubleValue(1.0))); 101 Parameters.Add(new LookupParameter<IntValue>("Generations")); 102 Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(5))); 103 Parameters.Add(new ValueLookupParameter<BoolValue>("AntiOverfit", new BoolValue(false))); 104 //Parameters.Add(new LookupParameter<DoubleValue>("Current best validation quality")); 105 //Parameters.Add(new LookupParameter<DoubleValue>("Best solution quality (validation)")); 106 Parameters.Add(new LookupParameter<DataTable>("Validation Quality")); 107 Parameters.Add(new LookupParameter<DataTable>("Qualities")); 108 Parameters.Add(new ValueLookupParameter<IntValue>("GenerationSpan", new IntValue(5))); 109 Parameters.Add(new ValueLookupParameter<PercentValue>("OverfittingLimit", new PercentValue(5))); 110 Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-5))); 111 Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Length, AdjustedFitness)")); 112 Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Fitness, AdjustedFitness)")); 60 113 } 61 114 … … 66 119 if (!Parameters.ContainsKey("K")) 67 120 Parameters.Add(new ValueLookupParameter<DoubleValue>("K", new DoubleValue(1.0))); 121 if (!Parameters.ContainsKey("AdjustedQuality")) { 122 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("AdjustedQuality")); 123 } 124 if (!Parameters.ContainsKey("Generations")) { 125 Parameters.Add(new LookupParameter<IntValue>("Generations")); 126 } 127 if (!Parameters.ContainsKey("FirstGenerationParameter")) { 128 Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(5))); 129 } 130 if (!Parameters.ContainsKey("AntiOverfit")) { 131 Parameters.Add(new ValueLookupParameter<BoolValue>("AntiOverfit", new BoolValue(false))); 132 } 133 //if (!Parameters.ContainsKey("Current best validation quality")) { 134 // Parameters.Add(new LookupParameter<DoubleValue>("Current best validation quality")); 135 //} 136 //if (!Parameters.ContainsKey("Best solution quality (validation)")) { 137 // Parameters.Add(new LookupParameter<DoubleValue>("Best solution quality (validation)")); 138 //} 139 if (!Parameters.ContainsKey("Correlation(Length, AdjustedFitness)")) { 140 Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Length, AdjustedFitness)")); 141 } 142 if (!Parameters.ContainsKey("Correlation(Fitness, AdjustedFitness)")) { 143 Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Fitness, AdjustedFitness)")); 144 } 145 if (!Parameters.ContainsKey("Validation Quality")) { 146 Parameters.Add(new LookupParameter<DataTable>("Validation Quality")); 147 } 148 if (!Parameters.ContainsKey("Qualities")) { 149 Parameters.Add(new LookupParameter<DataTable>("Qualities")); 150 } 151 if (!Parameters.ContainsKey("GenerationSpan")) { 152 Parameters.Add(new ValueLookupParameter<IntValue>("GenerationSpan", new IntValue(5))); 153 } 154 if (!Parameters.ContainsKey("OverfittingLimit")) { 155 Parameters.Add(new ValueLookupParameter<PercentValue>("OverfittingLimit", new PercentValue(5))); 156 } 157 if (!Parameters.ContainsKey("ComplexityAdaption")) { 158 Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-5))); 159 } 68 160 } 69 161 70 162 public override IOperation Apply() { 71 var trees = SymbolicExpressionTreeParameter.ActualValue; 72 var qualities = QualityParameter.ActualValue; 73 var lengths = from tree in trees 74 select tree.Size; 75 double k = KParameter.ActualValue.Value; 76 77 // calculate cov(f, l) and cov(l, l^k) 78 OnlineCovarianceEvaluator lengthFitnessCovEvaluator = new OnlineCovarianceEvaluator(); 79 OnlineCovarianceEvaluator lengthAdjLengthCovEvaluator = new OnlineCovarianceEvaluator(); 80 var lengthEnumerator = lengths.GetEnumerator(); 81 var qualityEnumerator = qualities.GetEnumerator(); 82 while (lengthEnumerator.MoveNext() & qualityEnumerator.MoveNext()) { 83 double fitness = qualityEnumerator.Current.Value; 84 if (!MaximizationParameter.ActualValue.Value) { 85 // use f = 1 / (1 + quality) for minimization problems 86 fitness = 1.0 / (1.0 + fitness); 163 ItemArray<SymbolicExpressionTree> trees = SymbolicExpressionTreeParameter.ActualValue; 164 ItemArray<DoubleValue> qualities = QualityParameter.ActualValue; 165 // always apply Parsimony pressure if anti-overfit is false 166 // otherwise appliy PP only when we are currently overfitting 167 if (GenerationsParameter.ActualValue != null && GenerationsParameter.ActualValue.Value >= FirstGenerationParameter.ActualValue.Value && 168 (AntiOverfitParameter.ActualValue.Value == false || IsOverfitting())) { 169 var lengths = from tree in trees 170 select tree.Size; 171 double k = KParameter.ActualValue.Value; 172 173 // calculate cov(f, l) and cov(l, l^k) 174 OnlineCovarianceEvaluator lengthFitnessCovEvaluator = new OnlineCovarianceEvaluator(); 175 OnlineCovarianceEvaluator lengthAdjLengthCovEvaluator = new OnlineCovarianceEvaluator(); 176 OnlineMeanAndVarianceCalculator lengthMeanCalculator = new OnlineMeanAndVarianceCalculator(); 177 OnlineMeanAndVarianceCalculator fitnessMeanCalculator = new OnlineMeanAndVarianceCalculator(); 178 OnlineMeanAndVarianceCalculator adjLengthMeanCalculator = new OnlineMeanAndVarianceCalculator(); 179 var lengthEnumerator = lengths.GetEnumerator(); 180 var qualityEnumerator = qualities.GetEnumerator(); 181 while (lengthEnumerator.MoveNext() & qualityEnumerator.MoveNext()) { 182 double fitness = qualityEnumerator.Current.Value; 183 if (!MaximizationParameter.ActualValue.Value) { 184 // use f = 1 / (1 + quality) for minimization problems 185 fitness = 1.0 / (1.0 + fitness); 186 } 187 lengthFitnessCovEvaluator.Add(lengthEnumerator.Current, fitness); 188 lengthAdjLengthCovEvaluator.Add(lengthEnumerator.Current, Math.Pow(lengthEnumerator.Current, k)); 189 lengthMeanCalculator.Add(lengthEnumerator.Current); 190 fitnessMeanCalculator.Add(fitness); 191 adjLengthMeanCalculator.Add(Math.Pow(lengthEnumerator.Current, k)); 87 192 } 88 lengthFitnessCovEvaluator.Add(lengthEnumerator.Current, fitness); 89 lengthAdjLengthCovEvaluator.Add(lengthEnumerator.Current, Math.Pow(lengthEnumerator.Current, k)); 90 } 91 92 // c = cov(l, f) / cov(l, l^k) 93 double c = lengthFitnessCovEvaluator.Covariance / lengthAdjLengthCovEvaluator.Covariance; 94 95 // adjust fitness 193 194 double sizeAdaption = lengthMeanCalculator.Mean * ComplexityAdaptionParameter.ActualValue.Value; 195 if (sizeAdaption < 0) sizeAdaption = Math.Floor(sizeAdaption); 196 else sizeAdaption = Math.Ceiling(sizeAdaption); 197 double g = lengthMeanCalculator.Mean + sizeAdaption; 198 199 // cov(l, f) - (g(t+1) - mu(t)) avgF 200 // c(t) = -------------------------------------------- 201 // cov(l, l^k) - (g(t+1) - mu(t)) E[l^k] 202 double c = lengthFitnessCovEvaluator.Covariance - (g - lengthMeanCalculator.Mean) * fitnessMeanCalculator.Mean; 203 c /= lengthAdjLengthCovEvaluator.Covariance - (g - lengthMeanCalculator.Mean) * adjLengthMeanCalculator.Mean; 204 205 // adjust fitness 206 bool maximization = MaximizationParameter.ActualValue.Value; 207 208 lengthEnumerator = lengths.GetEnumerator(); 209 qualityEnumerator = qualities.GetEnumerator(); 210 int i = 0; 211 ItemArray<DoubleValue> adjQualities = new ItemArray<DoubleValue>(qualities.Length); 212 213 while (lengthEnumerator.MoveNext() & qualityEnumerator.MoveNext()) { 214 adjQualities[i++] = new DoubleValue(qualityEnumerator.Current.Value - c * Math.Pow(lengthEnumerator.Current, k)); 215 } 216 AdjustedQualityParameter.ActualValue = adjQualities; 217 double[] lengthArr = lengths.Select(x => (double)x).ToArray<double>(); 218 219 double[] adjFitess = (from f in AdjustedQualityParameter.ActualValue 220 select f.Value).ToArray<double>(); 221 double[] fitnessArr = (from f in QualityParameter.ActualValue 222 let normFit = maximization ? f.Value : 1.0 / (1.0 + f.Value) 223 select normFit).ToArray<double>(); 224 225 LengthCorrelationParameter.ActualValue = new DoubleValue(alglib.correlation.spearmanrankcorrelation(lengthArr, adjFitess, lengthArr.Length)); 226 FitnessCorrelationParameter.ActualValue = new DoubleValue(alglib.correlation.spearmanrankcorrelation(fitnessArr, adjFitess, lengthArr.Length)); 227 228 } else { 229 // adjusted fitness is equal to fitness 230 AdjustedQualityParameter.ActualValue = (ItemArray<DoubleValue>)QualityParameter.ActualValue.Clone(); 231 FitnessCorrelationParameter.ActualValue = new DoubleValue(1.0); 232 233 double[] lengths = (from tree in trees 234 select (double)tree.Size).ToArray<double>(); 235 236 double[] fitess = (from f in AdjustedQualityParameter.ActualValue 237 select f.Value).ToArray<double>(); 238 239 LengthCorrelationParameter.ActualValue = new DoubleValue(alglib.correlation.spearmanrankcorrelation(lengths, fitess, lengths.Length)); 240 } 241 return base.Apply(); 242 } 243 244 private bool IsOverfitting() { 96 245 bool maximization = MaximizationParameter.ActualValue.Value; 97 98 lengthEnumerator = lengths.GetEnumerator(); 99 qualityEnumerator = qualities.GetEnumerator(); 100 while (lengthEnumerator.MoveNext() & qualityEnumerator.MoveNext()) { 101 qualityEnumerator.Current.Value = qualityEnumerator.Current.Value - c * Math.Pow(lengthEnumerator.Current, k); 102 } 103 104 return base.Apply(); 246 DataTable trainingQualities = QualitiesParameter.ActualValue; 247 DataTable validationQualities = ValidationQualityParameter.ActualValue; 248 int genSpan = GenerationSpanParameter.ActualValue.Value; 249 if (validationQualities == null || trainingQualities == null) return false; 250 if (validationQualities.Rows["Best solution quality (validation)"].Values.Count < genSpan) return false; 251 252 IEnumerable<double> bestTrainingQualities = trainingQualities.Rows["CurrentBestQuality"].Values; 253 IEnumerable<double> bestValidationQualities = validationQualities.Rows["Current best validation quality"].Values; 254 255 double trainingAvg = bestTrainingQualities.Reverse().Take(genSpan).Average(); 256 double validationAvg = bestValidationQualities.Reverse().Take(genSpan).Average(); 257 258 double maxPercentDiff = OverfittingLimitParameter.ActualValue.Value; 259 260 double percentDiff = maximization ? trainingAvg / validationAvg - 1 : validationAvg / trainingAvg - 1; 261 return percentDiff > maxPercentDiff; 105 262 } 106 263 }
Note: See TracChangeset
for help on using the changeset viewer.