Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.FeatureSelection/3.3/Evaluators/LinearRegressionFeatureSelectionEvaluator.cs @ 11883

Last change on this file since 11883 was 5275, checked in by gkronber, 14 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 4.8 KB
RevLine 
[4082]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22
23using HeuristicLab.Optimization;
24using HeuristicLab.Core;
25using HeuristicLab.Common;
26using HeuristicLab.Encodings.BinaryVectorEncoding;
27using HeuristicLab.Operators;
28using HeuristicLab.Parameters;
29using HeuristicLab.Data;
30using HeuristicLab.Problems.DataAnalysis.Regression.LinearRegression;
31using System.Linq;
32using System.Collections.Generic;
33using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
34using HeuristicLab.Problems.DataAnalysis.Evaluators;
[5275]35using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
[4082]36namespace HeuristicLab.Problems.DataAnalysis.FeatureSelection {
37  public class LinearRegressionFeatureSelectionEvaluator : SingleSuccessorOperator, IFeatureSelectionEvaluator {
38    #region parameter properties
39    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
40      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters["DataAnalysisProblemData"]; }
41    }
42
43    public ILookupParameter<BinaryVector> SolutionParameter {
44      get { return (ILookupParameter<BinaryVector>)Parameters["FeatureArray"]; }
45    }
46    public ILookupParameter<DoubleArray> QualitiesParameter {
47      get { return (ILookupParameter<DoubleArray>)Parameters["Qualities"]; }
48    }
49
50    #endregion
51    #region properties
52    public DataAnalysisProblemData DataAnalysisProblemData {
53      get { return DataAnalysisProblemDataParameter.ActualValue; }
54    }
55    public BinaryVector FeatureArray {
56      get { return SolutionParameter.ActualValue; }
57    }
58    #endregion
[5275]59    [StorableConstructor]
60    protected LinearRegressionFeatureSelectionEvaluator(bool deserializing) : base(deserializing) { }
61    protected LinearRegressionFeatureSelectionEvaluator(LinearRegressionFeatureSelectionEvaluator original, Cloner cloner)
62      : base(original, cloner) {
63    }
[4082]64    public LinearRegressionFeatureSelectionEvaluator()
65      : base() {
66        Parameters.Add(new LookupParameter<DataAnalysisProblemData>("DataAnalysisProblemData", "The data for the data analysis problem."));
67      Parameters.Add(new LookupParameter<BinaryVector>("FeatureArray", "The binary array of features to use for linear regression."));
68      Parameters.Add(new LookupParameter<DoubleArray>("Qualities", "The qualities of the linear regression solution (MSE, size)."));
69    }
[5275]70    public override IDeepCloneable Clone(Cloner cloner) {
71      return new LinearRegressionFeatureSelectionEvaluator(this, cloner);
72    }
[4082]73    public override IOperation Apply() {
74      var dataset = DataAnalysisProblemData.Dataset;
75      string targetVariable = DataAnalysisProblemData.TargetVariable.Value;
76
77      int start = DataAnalysisProblemData.TrainingSamplesStart.Value;
78      int end = DataAnalysisProblemData.TrainingSamplesEnd.Value;
79      List<string> allowedInputVariables = new List<string>();
80      int c = 0;
81      foreach (var indexedItem in DataAnalysisProblemData.InputVariables.CheckedItems) {
82        if (FeatureArray[c]) {
83          allowedInputVariables.Add(indexedItem.Value.Value);
84        }
85        c++;
86      }
87      int featureCount;
88      double mse;
89      if (allowedInputVariables.Count > 0) {
90        double rmsError, cvRmsError;
91        var tree = LinearRegressionSolutionCreator.CreateSymbolicExpressionTree(dataset, targetVariable, allowedInputVariables, start, end, out rmsError, out cvRmsError);
92        featureCount = allowedInputVariables.Count;
[4109]93        mse = cvRmsError * cvRmsError;
[4082]94      } else {
95        featureCount = 0;
96        // when zero features are selected the linear regression should produce a constant (the mean)
97        // the mse is then the variance of the target variable values
98        mse = dataset.GetEnumeratedVariableValues(targetVariable, start, end).Variance();
99      }
100      DoubleArray qualities = new DoubleArray(2);
101      qualities[0] = featureCount;
102      qualities[1] = mse;
103
104      QualitiesParameter.ActualValue = qualities;
105      return base.Apply();
106    }
107  }
108}
Note: See TracBrowser for help on using the repository browser.