Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/03/11 14:48:11 (14 years ago)
Author:
gkronber
Message:

Introduced base class for operators that evaluate symbolic regression models on a validation set. #1356

Location:
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionOverfittingAnalyzer.cs

    r5192 r5197  
    3838  [Item("SymbolicRegressionOverfittingAnalyzer", "Calculates and tracks correlation of training and validation fitness of symbolic regression models.")]
    3939  [StorableClass]
    40   public sealed class SymbolicRegressionOverfittingAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
    41     private const string RandomParameterName = "Random";
    42     private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
     40  public sealed class SymbolicRegressionOverfittingAnalyzer : SymbolicRegressionValidationAnalyzer, ISymbolicRegressionAnalyzer {
    4341    private const string MaximizationParameterName = "Maximization";
    4442    private const string QualityParameterName = "Quality";
    45     private const string ValidationQualityParameterName = "ValidationQuality";
    4643    private const string TrainingValidationCorrelationParameterName = "TrainingValidationCorrelation";
    4744    private const string TrainingValidationCorrelationTableParameterName = "TrainingValidationCorrelationTable";
     
    5047    private const string OverfittingParameterName = "IsOverfitting";
    5148    private const string ResultsParameterName = "Results";
    52     private const string EvaluatorParameterName = "Evaluator";
    53     private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
    54     private const string ProblemDataParameterName = "ProblemData";
    55     private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
    56     private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
    57     private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
    58     private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
    59     private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
    6049
    6150    #region parameter properties
    62     public ILookupParameter<IRandom> RandomParameter {
    63       get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
    64     }
    65     public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
    66       get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
    67     }
    6851    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
    6952      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
    7053    }
    71     public ScopeTreeLookupParameter<DoubleValue> ValidationQualityParameter {
    72       get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[ValidationQualityParameterName]; }
    73     }
    7454    public ILookupParameter<BoolValue> MaximizationParameter {
    7555      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
    76     }
    77     public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
    78       get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
    79     }
    80     public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
    81       get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
    82     }
    83     public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
    84       get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
    85     }
    86     public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
    87       get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
    88     }
    89     public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
    90       get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
    91     }
    92     public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
    93       get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
    94     }
    95     public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
    96       get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
    97     }
    98     public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
    99       get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
    10056    }
    10157    public ILookupParameter<DoubleValue> TrainingValidationQualityCorrelationParameter {
     
    11975    #endregion
    12076    #region properties
    121     public IRandom Random {
    122       get { return RandomParameter.ActualValue; }
    123     }
    12477    public BoolValue Maximization {
    12578      get { return MaximizationParameter.ActualValue; }
    126     }
    127     public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
    128       get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
    129     }
    130     public ISymbolicRegressionEvaluator Evaluator {
    131       get { return EvaluatorParameter.ActualValue; }
    132     }
    133     public DataAnalysisProblemData ProblemData {
    134       get { return ProblemDataParameter.ActualValue; }
    135     }
    136     public IntValue ValidiationSamplesStart {
    137       get { return ValidationSamplesStartParameter.ActualValue; }
    138     }
    139     public IntValue ValidationSamplesEnd {
    140       get { return ValidationSamplesEndParameter.ActualValue; }
    141     }
    142     public PercentValue RelativeNumberOfEvaluatedSamples {
    143       get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
    144     }
    145 
    146     public DoubleValue UpperEstimationLimit {
    147       get { return UpperEstimationLimitParameter.ActualValue; }
    148     }
    149     public DoubleValue LowerEstimationLimit {
    150       get { return LowerEstimationLimitParameter.ActualValue; }
    15179    }
    15280    #endregion
     
    15785    public SymbolicRegressionOverfittingAnalyzer()
    15886      : base() {
    159       Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
    16087      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "Training fitness"));
    16188      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
    162 
    163       Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
    164       Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
    165       Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
    166       Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
    167       Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
    168       Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
    169       Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
    170       Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
    171       Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
    172 
    17389      Parameters.Add(new LookupParameter<DoubleValue>(TrainingValidationCorrelationParameterName, "Correlation of training and validation fitnesses"));
    17490      Parameters.Add(new LookupParameter<DataTable>(TrainingValidationCorrelationTableParameterName, "Data table of training and validation fitness correlation values over the whole run."));
     
    187103    }
    188104
    189     public override IOperation Apply() {
    190       ItemArray<DoubleValue> qualities = QualityParameter.ActualValue;
    191       double[] trainingArr = qualities.Select(x => x.Value).ToArray();
    192       double[] validationArr = new double[trainingArr.Length];
     105    protected override void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality) {
     106      double[] trainingQuality = QualityParameter.ActualValue.Select(x => x.Value).ToArray();
    193107
    194       #region calculate validation fitness
    195       string targetVariable = ProblemData.TargetVariable.Value;
    196 
    197       // select a random subset of rows in the validation set
    198       int validationStart = ValidiationSamplesStart.Value;
    199       int validationEnd = ValidationSamplesEnd.Value;
    200       int seed = Random.Next();
    201       int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
    202       if (count == 0) count = 1;
    203       IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count)
    204         .Where(row => row < ProblemData.TestSamplesStart.Value || ProblemData.TestSamplesEnd.Value <= row);
    205 
    206       double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
    207       double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
    208 
    209       var trees = SymbolicExpressionTreeParameter.ActualValue;
    210 
    211       for (int i = 0; i < validationArr.Length; i++) {
    212         var tree = trees[i];
    213         double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree,
    214             lowerEstimationLimit, upperEstimationLimit,
    215             ProblemData.Dataset, targetVariable,
    216            rows);
    217         validationArr[i] = quality;
    218       }
    219      
    220       #endregion
    221 
    222 
    223       double r = alglib.spearmancorr2(trainingArr, validationArr);
     108      double r = alglib.spearmancorr2(trainingQuality, validationQuality);
    224109
    225110      TrainingValidationQualityCorrelationParameter.ActualValue = new DoubleValue(r);
     
    245130
    246131      OverfittingParameter.ActualValue = new BoolValue(overfitting);
    247 
    248       return base.Apply();
    249132    }
    250133  }
Note: See TracChangeset for help on using the changeset viewer.