- Timestamp:
- 01/03/11 14:48:11 (14 years ago)
- Location:
- trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionOverfittingAnalyzer.cs
r5192 r5197 38 38 [Item("SymbolicRegressionOverfittingAnalyzer", "Calculates and tracks correlation of training and validation fitness of symbolic regression models.")] 39 39 [StorableClass] 40 public sealed class SymbolicRegressionOverfittingAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer { 41 private const string RandomParameterName = "Random"; 42 private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; 40 public sealed class SymbolicRegressionOverfittingAnalyzer : SymbolicRegressionValidationAnalyzer, ISymbolicRegressionAnalyzer { 43 41 private const string MaximizationParameterName = "Maximization"; 44 42 private const string QualityParameterName = "Quality"; 45 private const string ValidationQualityParameterName = "ValidationQuality";46 43 private const string TrainingValidationCorrelationParameterName = "TrainingValidationCorrelation"; 47 44 private const string TrainingValidationCorrelationTableParameterName = "TrainingValidationCorrelationTable"; … … 50 47 private const string OverfittingParameterName = "IsOverfitting"; 51 48 private const string ResultsParameterName = "Results"; 52 private const string EvaluatorParameterName = "Evaluator";53 private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";54 private const string ProblemDataParameterName = "ProblemData";55 private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";56 private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";57 private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";58 private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";59 private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";60 49 61 50 #region parameter properties 62 public ILookupParameter<IRandom> RandomParameter {63 get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }64 }65 public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {66 get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }67 }68 51 public ScopeTreeLookupParameter<DoubleValue> QualityParameter { 69 52 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; } 70 53 } 71 public ScopeTreeLookupParameter<DoubleValue> ValidationQualityParameter {72 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[ValidationQualityParameterName]; }73 }74 54 public ILookupParameter<BoolValue> MaximizationParameter { 75 55 get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; } 76 }77 public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {78 get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }79 }80 public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {81 get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }82 }83 public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {84 get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }85 }86 public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {87 get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }88 }89 public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {90 get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }91 }92 public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {93 get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }94 }95 public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {96 get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }97 }98 public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {99 get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }100 56 } 101 57 public ILookupParameter<DoubleValue> TrainingValidationQualityCorrelationParameter { … … 119 75 #endregion 120 76 #region properties 121 public IRandom Random {122 get { return RandomParameter.ActualValue; }123 }124 77 public BoolValue Maximization { 125 78 get { return MaximizationParameter.ActualValue; } 126 }127 public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {128 get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }129 }130 public ISymbolicRegressionEvaluator Evaluator {131 get { return EvaluatorParameter.ActualValue; }132 }133 public DataAnalysisProblemData ProblemData {134 get { return ProblemDataParameter.ActualValue; }135 }136 public IntValue ValidiationSamplesStart {137 get { return ValidationSamplesStartParameter.ActualValue; }138 }139 public IntValue ValidationSamplesEnd {140 get { return ValidationSamplesEndParameter.ActualValue; }141 }142 public PercentValue RelativeNumberOfEvaluatedSamples {143 get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }144 }145 146 public DoubleValue UpperEstimationLimit {147 get { return UpperEstimationLimitParameter.ActualValue; }148 }149 public DoubleValue LowerEstimationLimit {150 get { return LowerEstimationLimitParameter.ActualValue; }151 79 } 152 80 #endregion … … 157 85 public SymbolicRegressionOverfittingAnalyzer() 158 86 : base() { 159 Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));160 87 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "Training fitness")); 161 88 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 162 163 Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));164 Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));165 Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));166 Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));167 Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));168 Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));169 Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));170 Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));171 Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));172 173 89 Parameters.Add(new LookupParameter<DoubleValue>(TrainingValidationCorrelationParameterName, "Correlation of training and validation fitnesses")); 174 90 Parameters.Add(new LookupParameter<DataTable>(TrainingValidationCorrelationTableParameterName, "Data table of training and validation fitness correlation values over the whole run.")); … … 187 103 } 188 104 189 public override IOperation Apply() { 190 ItemArray<DoubleValue> qualities = QualityParameter.ActualValue; 191 double[] trainingArr = qualities.Select(x => x.Value).ToArray(); 192 double[] validationArr = new double[trainingArr.Length]; 105 protected override void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality) { 106 double[] trainingQuality = QualityParameter.ActualValue.Select(x => x.Value).ToArray(); 193 107 194 #region calculate validation fitness 195 string targetVariable = ProblemData.TargetVariable.Value; 196 197 // select a random subset of rows in the validation set 198 int validationStart = ValidiationSamplesStart.Value; 199 int validationEnd = ValidationSamplesEnd.Value; 200 int seed = Random.Next(); 201 int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value); 202 if (count == 0) count = 1; 203 IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count) 204 .Where(row => row < ProblemData.TestSamplesStart.Value || ProblemData.TestSamplesEnd.Value <= row); 205 206 double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity; 207 double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity; 208 209 var trees = SymbolicExpressionTreeParameter.ActualValue; 210 211 for (int i = 0; i < validationArr.Length; i++) { 212 var tree = trees[i]; 213 double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree, 214 lowerEstimationLimit, upperEstimationLimit, 215 ProblemData.Dataset, targetVariable, 216 rows); 217 validationArr[i] = quality; 218 } 219 220 #endregion 221 222 223 double r = alglib.spearmancorr2(trainingArr, validationArr); 108 double r = alglib.spearmancorr2(trainingQuality, validationQuality); 224 109 225 110 TrainingValidationQualityCorrelationParameter.ActualValue = new DoubleValue(r); … … 245 130 246 131 OverfittingParameter.ActualValue = new BoolValue(overfitting); 247 248 return base.Apply();249 132 } 250 133 }
Note: See TracChangeset
for help on using the changeset viewer.