Changeset 4127


Ignore:
Timestamp:
08/01/10 18:13:42 (9 years ago)
Author:
gkronber
Message:

Added correlation coefficient evaluator for symbolic regression problems. #1117

Location:
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/HeuristicLab.Problems.DataAnalysis.Regression-3.3.csproj

    r4112 r4127  
    129129    <Compile Include="Symbolic\Analyzers\SymbolicRegressionVariableFrequencyAnalyzer.cs" />
    130130    <Compile Include="Symbolic\Analyzers\ValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs" />
     131    <Compile Include="Symbolic\SymbolicRegressionPearsonsRSquaredEvaluator.cs" />
    131132    <Compile Include="Symbolic\SymbolicRegressionScaledMeanAndVarianceSquaredErrorEvaluator.cs" />
    132133    <Compile Include="Symbolic\SimpleSymbolicRegressionEvaluator.cs" />
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs

    r4068 r4127  
    4040  [StorableClass]
    4141  public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
     42    private const string RandomParameterName = "Random";
    4243    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
    4344    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
     
    4647    private const string ValidationSamplesEndParameterName = "SamplesEnd";
    4748    private const string QualityParameterName = "Quality";
    48     private const string ScaledQualityParameterName = "ScaledQuality";
    4949    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
    5050    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
    51     private const string AlphaParameterName = "Alpha";
    52     private const string BetaParameterName = "Beta";
    5351    private const string BestSolutionParameterName = "Best solution (validation)";
    5452    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
     
    5957    private const string BestKnownQualityParameterName = "BestKnownQuality";
    6058    private const string GenerationsParameterName = "Generations";
     59    private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
    6160
    6261    private const string TrainingMeanSquaredErrorQualityParameterName = "Mean squared error (training)";
     
    101100
    102101    #region parameter properties
     102    public ILookupParameter<IRandom> RandomParameter {
     103      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
     104    }
    103105    public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
    104106      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
    105107    }
    106     public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
    107       get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
    108     }
    109     public ScopeTreeLookupParameter<DoubleValue> AlphaParameter {
    110       get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[AlphaParameterName]; }
    111     }
    112     public ScopeTreeLookupParameter<DoubleValue> BetaParameter {
    113       get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[BetaParameterName]; }
    114     }
    115108    public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
    116109      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
     
    125118      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
    126119    }
     120    public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
     121      get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
     122    }
     123
    127124    public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
    128125      get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
     
    152149    #endregion
    153150    #region properties
     151    public IRandom Random {
     152      get { return RandomParameter.ActualValue; }
     153    }
    154154    public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
    155155      get { return SymbolicExpressionTreeParameter.ActualValue; }
    156156    }
    157     public ItemArray<DoubleValue> Quality {
    158       get { return QualityParameter.ActualValue; }
    159     }
    160     public ItemArray<DoubleValue> Alpha {
    161       get { return AlphaParameter.ActualValue; }
    162     }
    163     public ItemArray<DoubleValue> Beta {
    164       get { return BetaParameter.ActualValue; }
    165     }
    166157    public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
    167158      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
     
    176167      get { return ValidationSamplesEndParameter.ActualValue; }
    177168    }
     169    public PercentValue RelativeNumberOfEvaluatedSamples {
     170      get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
     171    }
     172
    178173    public DoubleValue UpperEstimationLimit {
    179174      get { return UpperEstimationLimitParameter.ActualValue; }
     
    196191    public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()
    197192      : base() {
     193      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
    198194      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
    199195      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic expression trees to analyze."));
    200       Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(AlphaParameterName, "The alpha parameter for linear scaling."));
    201       Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(BetaParameterName, "The beta parameter for linear scaling."));
    202196      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
    203197      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
    204198      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
    205199      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
     200      Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
    206201      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
    207202      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
     
    218213
    219214    public override IOperation Apply() {
    220       var alphas = Alpha;
    221       var betas = Beta;
    222215      var trees = SymbolicExpressionTree;
    223216
    224       IEnumerable<SymbolicExpressionTree> scaledTrees;
    225       if (alphas.Length == trees.Length) {
    226         scaledTrees = from i in Enumerable.Range(0, trees.Length)
    227                       select SymbolicRegressionSolutionLinearScaler.Scale(trees[i], alphas[i].Value, betas[i].Value);
    228       } else {
    229         scaledTrees = trees;
    230       }
    231 
    232217      string targetVariable = ProblemData.TargetVariable.Value;
     218
     219      // select a random subset of rows in the validation set
    233220      int validationStart = ValidiationSamplesStart.Value;
    234221      int validationEnd = ValidationSamplesEnd.Value;
     222      uint seed = (uint)Random.Next();
     223      int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
     224      if (count == 0) count = 1;
     225      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);
     226
    235227      double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
    236228      double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
    237229
    238       double bestValidationMse = double.MaxValue;
     230      double bestValidationRSquared = -1.0;
    239231      SymbolicExpressionTree bestTree = null;
    240232
    241       OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
    242       foreach (var scaledTree in scaledTrees) {
    243         double validationMse = SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, scaledTree,
     233      foreach (var tree in trees) {
     234        double validationRSquared = SymbolicRegressionPearsonsRSquaredEvaluator.Calculate(SymbolicExpressionTreeInterpreter, tree,
    244235          lowerEstimationLimit, upperEstimationLimit,
    245236          ProblemData.Dataset, targetVariable,
    246          Enumerable.Range(validationStart, validationEnd - validationStart));
    247 
    248         if (validationMse < bestValidationMse) {
    249           bestValidationMse = validationMse;
    250           bestTree = scaledTree;
     237         rows);
     238
     239        if (validationRSquared > bestValidationRSquared) {
     240          bestValidationRSquared = validationRSquared;
     241          bestTree = tree;
    251242        }
    252243      }
    253244
    254       if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value > bestValidationMse) {
     245
     246      // if the best validation tree is better than the current best solution => update
     247      if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value < bestValidationRSquared) {
     248        // calculate scaling parameters and validation MSE only for the best tree
     249        // scale tree for solution
     250        double alpha, beta;
     251        double validationMSE = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
     252          lowerEstimationLimit, upperEstimationLimit,
     253          ProblemData.Dataset, targetVariable,
     254          rows, out beta, out alpha);
     255
     256        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
    255257        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
    256           bestTree);
     258          scaledTree);
    257259        var solution = new SymbolicRegressionSolution(ProblemData, model, lowerEstimationLimit, upperEstimationLimit);
    258260        solution.Name = BestSolutionParameterName;
     
    260262
    261263        BestSolutionParameter.ActualValue = solution;
    262         BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationMse);
     264        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationRSquared);
    263265
    264266        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
     
    271273      }
    272274      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
    273       Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationMse);
     275      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationRSquared);
    274276
    275277      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
    276278      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
    277       AddValue(validationValues, bestValidationMse, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
     279      AddValue(validationValues, bestValidationRSquared, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
    278280      return base.Apply();
    279281    }
    280282
    281283    [StorableHook(HookType.AfterDeserialization)]
    282     private void Initialize() {
    283       if (!Parameters.ContainsKey(AlphaParameterName)) {
    284         Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(AlphaParameterName, "The alpha parameter for linear scaling."));
    285       }
    286       if (!Parameters.ContainsKey(BetaParameterName)) {
    287         Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(BetaParameterName, "The beta parameter for linear scaling."));
    288       }
    289       if (!Parameters.ContainsKey(VariableFrequenciesParameterName)) {
    290         Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
    291       }
    292       if (!Parameters.ContainsKey(GenerationsParameterName)) {
    293         Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
    294       }
    295     }
     284    private void Initialize() { }
    296285
    297286    private static void AddValue(DataTable table, double data, string name, string description) {
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs

    r4118 r4127  
    376376          fixedBestValidationSolutionAnalyzer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd;
    377377          fixedBestValidationSolutionAnalyzer.BestKnownQualityParameter.ActualName = BestKnownQualityParameter.Name;
    378           fixedBestValidationSolutionAnalyzer.QualityParameter.ActualName = Evaluator.QualityParameter.ActualName;
    379378        }
    380         var bestValidationSolutionAnalyzer = analyzer as FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer;
     379        var bestValidationSolutionAnalyzer = analyzer as ValidationBestScaledSymbolicRegressionSolutionAnalyzer;
    381380        if (bestValidationSolutionAnalyzer != null) {
    382381          bestValidationSolutionAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
Note: See TracChangeset for help on using the changeset viewer.