- Timestamp:
- 08/24/10 19:25:11 (14 years ago)
- Location:
- branches/DataAnalysis
- Files:
-
- 1 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/OverfittingAnalyzer.cs
r4297 r4309 218 218 ItemArray<DoubleValue> validationQualities = ValidationQualityParameter.ActualValue; 219 219 220 string targetVariable = ProblemData.TargetVariable.Value;221 222 // select a random subset of rows in the validation set223 int validationStart = ValidiationSamplesStart.Value;224 int validationEnd = ValidationSamplesEnd.Value;225 int seed = Random.Next();226 int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);227 if (count == 0) count = 1;228 IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);229 230 double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;231 double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;220 //string targetVariable = ProblemData.TargetVariable.Value; 221 222 //// select a random subset of rows in the validation set 223 //int validationStart = ValidiationSamplesStart.Value; 224 //int validationEnd = ValidationSamplesEnd.Value; 225 //int seed = Random.Next(); 226 //int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value); 227 //if (count == 0) count = 1; 228 //IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count); 229 230 //double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity; 231 //double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity; 232 232 233 233 //double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity; … … 250 250 //if (RelativeValidationQualityParameter.ActualValue == null) { 251 251 // first call initialize the relative quality using the difference between average training and validation quality 252 double avgTrainingQuality = qualities.Select(x => x.Value). Median();253 double avgValidationQuality = validationQualities.Select(x => x.Value). Median();252 double avgTrainingQuality = qualities.Select(x => x.Value).Average(); 253 double avgValidationQuality = validationQualities.Select(x => x.Value).Average(); 254 254 255 255 if (Maximization.Value) … … 284 284 bool overfitting = 285 285 avgTrainingQuality > InitialTrainingQualityParameter.ActualValue.Value && // better on training than in initial generation 286 // RelativeValidationQualityParameter.ActualValue.Value < 0.0 && // validation quality is worse than training quality 286 287 r < CorrelationLimitParameter.ActualValue.Value; // low correlation between training and validation quality 287 288 288 //// if validation quality is within a certain margin of percentage deviation (default -5% .. 5%) then there is no overfitting289 //// correlation is also bad when underfitting but validation quality cannot be a lot larger than training quality if overfitting290 //(RelativeValidationQualityParameter.ActualValue.Value > RelativeValidationQualityUpperLimitParameter.ActualValue.Value || // better on training than on validation291 // RelativeValidationQualityParameter.ActualValue.Value < RelativeValidationQualityLowerLimitParameter.ActualValue.Value); // better on training than on validation292 289 293 290 OverfittingParameter.ActualValue = new BoolValue(overfitting); -
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLab.Problems.DataAnalysis-3.3.csproj
r4255 r4309 133 133 <Compile Include="Interfaces\IOnlineEvaluator.cs" /> 134 134 <Compile Include="MatrixExtensions.cs" /> 135 <Compile Include="Operators\CovariantParsimonyPressureAdder.cs" /> 135 136 <Compile Include="Operators\CovariantParsimonyPressure.cs" /> 136 137 <Compile Include="Operators\DynamicDepthLimitInitializer.cs" /> -
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/Operators/CovariantParsimonyPressure.cs
r4272 r4309 47 47 get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["AdjustedQuality"]; } 48 48 } 49 50 49 public ILookupParameter<BoolValue> MaximizationParameter { 51 50 get { return (ILookupParameter<BoolValue>)Parameters["Maximization"]; } … … 54 53 get { return (IValueLookupParameter<DoubleValue>)Parameters["K"]; } 55 54 } 55 public ILookupParameter<DoubleValue> CParameter { 56 get { return (ILookupParameter<DoubleValue>)Parameters["C"]; } 57 } 56 58 public ILookupParameter<IntValue> GenerationsParameter { 57 59 get { return (ILookupParameter<IntValue>)Parameters["Generations"]; } … … 63 65 get { return (IValueLookupParameter<BoolValue>)Parameters["ApplyParsimonyPressure"]; } 64 66 } 65 public ILookupParameter<DataTable> ValidationQualityParameter {66 get { return (ILookupParameter<DataTable>)Parameters["Validation Quality"]; }67 }68 //public ILookupParameter<DoubleValue> CurrentBestValidationQualityParameter {69 // get { return (ILookupParameter<DoubleValue>)Parameters["Current best validation quality"]; }70 //}71 //public ILookupParameter<DoubleValue> BestValidationQualityParameter {72 // get { return (ILookupParameter<DoubleValue>)Parameters["Best solution quality (validation)"]; }73 //}74 67 public ILookupParameter<DoubleValue> LengthCorrelationParameter { 75 68 get { return (ILookupParameter<DoubleValue>)Parameters["Correlation(Length, AdjustedFitness)"]; } … … 78 71 get { return (ILookupParameter<DoubleValue>)Parameters["Correlation(Fitness, AdjustedFitness)"]; } 79 72 } 80 //public IValueLookupParameter<IntValue> GenerationSpanParameter {81 // get { return (IValueLookupParameter<IntValue>)Parameters["GenerationSpan"]; }82 //}83 //public IValueLookupParameter<PercentValue> OverfittingLimitParameter {84 // get { return (IValueLookupParameter<PercentValue>)Parameters["OverfittingLimit"]; }85 //}86 73 public IValueLookupParameter<PercentValue> ComplexityAdaptionParameter { 87 74 get { return (IValueLookupParameter<PercentValue>)Parameters["ComplexityAdaption"]; } 88 }89 public ILookupParameter<DataTable> QualitiesParameter {90 get { return (ILookupParameter<DataTable>)Parameters["Qualities"]; }91 75 } 92 76 public IValueLookupParameter<DoubleValue> MinAverageSizeParameter { … … 103 87 Parameters.Add(new ValueLookupParameter<DoubleValue>("K", new DoubleValue(1.0))); 104 88 Parameters.Add(new LookupParameter<IntValue>("Generations")); 105 Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue( 5)));89 Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(1))); 106 90 Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyParsimonyPressure")); 107 //Parameters.Add(new LookupParameter<DoubleValue>("Current best validation quality")); 108 //Parameters.Add(new LookupParameter<DoubleValue>("Best solution quality (validation)")); 109 Parameters.Add(new LookupParameter<DataTable>("Validation Quality")); 110 Parameters.Add(new LookupParameter<DataTable>("Qualities")); 111 //Parameters.Add(new ValueLookupParameter<IntValue>("GenerationSpan", new IntValue(5))); 112 //Parameters.Add(new ValueLookupParameter<PercentValue>("OverfittingLimit", new PercentValue(5))); 113 Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-5))); 91 Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-0.01))); 114 92 Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Length, AdjustedFitness)")); 115 93 Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Fitness, AdjustedFitness)")); 116 94 Parameters.Add(new ValueLookupParameter<DoubleValue>("MinAverageSize", new DoubleValue(15))); 95 Parameters.Add(new LookupParameter<DoubleValue>("C")); 117 96 } 118 97 … … 130 109 } 131 110 if (!Parameters.ContainsKey("FirstGenerationParameter")) { 132 Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue( 5)));111 Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(1))); 133 112 } 134 113 if (!Parameters.ContainsKey("ApplyParsimonyPressure")) { 135 114 Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyParsimonyPressure")); 136 115 } 137 //if (!Parameters.ContainsKey("Current best validation quality")) {138 // Parameters.Add(new LookupParameter<DoubleValue>("Current best validation quality"));139 //}140 //if (!Parameters.ContainsKey("Best solution quality (validation)")) {141 // Parameters.Add(new LookupParameter<DoubleValue>("Best solution quality (validation)"));142 //}143 if (!Parameters.ContainsKey("Correlation(Length, AdjustedFitness)")) {144 Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Length, AdjustedFitness)"));145 }146 if (!Parameters.ContainsKey("Correlation(Fitness, AdjustedFitness)")) {147 Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Fitness, AdjustedFitness)"));148 }149 if (!Parameters.ContainsKey("Validation Quality")) {150 Parameters.Add(new LookupParameter<DataTable>("Validation Quality"));151 }152 if (!Parameters.ContainsKey("Qualities")) {153 Parameters.Add(new LookupParameter<DataTable>("Qualities"));154 }155 116 if (!Parameters.ContainsKey("ComplexityAdaption")) { 156 Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(- 5)));117 Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-0.01))); 157 118 } 158 119 if (!Parameters.ContainsKey("MinAverageSize")) { 159 120 Parameters.Add(new ValueLookupParameter<DoubleValue>("MinAverageSize", new DoubleValue(15))); 121 } 122 if (!Parameters.ContainsKey("C")) { 123 Parameters.Add(new LookupParameter<DoubleValue>("C")); 160 124 } 161 125 } … … 194 158 195 159 double sizeAdaption = lengthMeanCalculator.Mean * ComplexityAdaptionParameter.ActualValue.Value; 196 if (sizeAdaption < 0) sizeAdaption = Math.Floor(sizeAdaption); 197 else sizeAdaption = Math.Ceiling(sizeAdaption); 198 double g = lengthMeanCalculator.Mean + sizeAdaption; 199 if (g < MinAverageSizeParameter.ActualValue.Value) 200 g = MinAverageSizeParameter.ActualValue.Value; 160 if (lengthMeanCalculator.Mean + sizeAdaption < MinAverageSizeParameter.ActualValue.Value) 161 sizeAdaption = 0.0; 201 162 202 163 // cov(l, f) - (g(t+1) - mu(t)) avgF 203 164 // c(t) = -------------------------------------------- 204 165 // cov(l, l^k) - (g(t+1) - mu(t)) E[l^k] 205 double c = lengthFitnessCovEvaluator.Covariance - (g - lengthMeanCalculator.Mean) * fitnessMeanCalculator.Mean; 206 c /= lengthAdjLengthCovEvaluator.Covariance - (g - lengthMeanCalculator.Mean) * adjLengthMeanCalculator.Mean; 166 double c = lengthFitnessCovEvaluator.Covariance - sizeAdaption * fitnessMeanCalculator.Mean; 167 c /= lengthAdjLengthCovEvaluator.Covariance - sizeAdaption * adjLengthMeanCalculator.Mean; 168 169 CParameter.ActualValue = new DoubleValue(c); 207 170 208 171 // adjust fitness … … 230 193 231 194 } else { 195 CParameter.ActualValue = new DoubleValue(0.0); 232 196 // adjusted fitness is equal to fitness 233 197 AdjustedQualityParameter.ActualValue = (ItemArray<DoubleValue>)QualityParameter.ActualValue.Clone();
Note: See TracChangeset
for help on using the changeset viewer.