Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
05/11/11 15:56:17 (14 years ago)
Author:
gkronber
Message:

#1450: merged r5816 from the branch and implemented first version of ensemble solutions for regression. The ensembles are only produced by cross validation.

Location:
trunk/sources
Files:
7 edited
4 copied

Legend:

Unmodified
Added
Removed
  • trunk/sources

  • trunk/sources/HeuristicLab.Problems.DataAnalysis

  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj

    r5943 r6184  
    109109  <ItemGroup>
    110110    <Compile Include="DoubleLimit.cs" />
     111    <Compile Include="Implementation\Classification\ClassificationEnsembleModel.cs">
     112      <SubType>Code</SubType>
     113    </Compile>
    111114    <Compile Include="Implementation\Classification\ClassificationProblemData.cs" />
    112115    <Compile Include="Implementation\Classification\ClassificationProblem.cs" />
     
    115118    <Compile Include="Implementation\Clustering\ClusteringProblemData.cs" />
    116119    <Compile Include="Implementation\Clustering\ClusteringSolution.cs" />
    117     <Compile Include="Implementation\Classification\ClassificationEnsembleModel.cs" />
     120    <Compile Include="Implementation\Regression\RegressionEnsembleModel.cs">
     121      <SubType>Code</SubType>
     122    </Compile>
     123    <Compile Include="Implementation\Regression\RegressionEnsembleSolution.cs" />
     124    <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs">
     125      <SubType>Code</SubType>
     126    </Compile>
     127    <Compile Include="Interfaces\Classification\IClassificationEnsembleSolution.cs" />
    118128    <Compile Include="Interfaces\Classification\IDiscriminantFunctionThresholdCalculator.cs" />
    119     <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs" />
    120     <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs" />
     129    <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs">
     130      <SubType>Code</SubType>
     131    </Compile>
     132    <Compile Include="Interfaces\Regression\IRegressionEnsembleSolution.cs" />
    121133    <Compile Include="OnlineCalculators\OnlineLinearScalingParameterCalculator.cs" />
    122     <Compile Include="Implementation\Regression\RegressionEnsembleModel.cs" />
    123134    <Compile Include="Implementation\Classification\DiscriminantFunctionClassificationModel.cs" />
    124135    <Compile Include="Implementation\Classification\DiscriminantFunctionClassificationSolution.cs" />
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolution.cs

    r5942 r6184  
    3030namespace HeuristicLab.Problems.DataAnalysis {
    3131  /// <summary>
    32   /// Abstract base class for classification data analysis solutions
     32  /// Represents a classification data analysis solution
    3333  /// </summary>
    3434  [StorableClass]
    35   public abstract class ClassificationSolution : DataAnalysisSolution, IClassificationSolution {
     35  public class ClassificationSolution : DataAnalysisSolution, IClassificationSolution {
    3636    private const string TrainingAccuracyResultName = "Accuracy (training)";
    3737    private const string TestAccuracyResultName = "Accuracy (test)";
     
    6767      Add(new Result(TestAccuracyResultName, "Accuracy of the model on the test partition (percentage of correctly classified instances).", new PercentValue()));
    6868      RecalculateResults();
     69    }
     70
     71    public override IDeepCloneable Clone(Cloner cloner) {
     72      return new ClassificationSolution(this, cloner);
    6973    }
    7074
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringSolution.cs

    r5809 r6184  
    2727namespace HeuristicLab.Problems.DataAnalysis {
    2828  /// <summary>
    29   /// Abstract base class for clustering data analysis solutions
     29  /// Represents a clustering data analysis solution
    3030  /// </summary>
    3131  [StorableClass]
    32   public abstract class ClusteringSolution : DataAnalysisSolution, IClusteringSolution {
     32  public class ClusteringSolution : DataAnalysisSolution, IClusteringSolution {
    3333
    3434    [StorableConstructor]
     
    3939    public ClusteringSolution(IClusteringModel model, IClusteringProblemData problemData)
    4040      : base(model, problemData) {
     41    }
     42
     43    public override IDeepCloneable Clone(Cloner cloner) {
     44      return new ClusteringSolution(this, cloner);
    4145    }
    4246
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionEnsembleSolution.cs

    r6180 r6184  
    7575        throw new ArgumentException();
    7676      }
     77
     78      RecalculateResults();
     79    }
     80
     81    private void RecalculateResults() {
     82      double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values
     83      var trainingIndizes = Enumerable.Range(ProblemData.TrainingPartition.Start,
     84        ProblemData.TrainingPartition.End - ProblemData.TrainingPartition.Start);
     85      IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, trainingIndizes);
     86      double[] estimatedTestValues = EstimatedTestValues.ToArray(); // cache values
     87      IEnumerable<double> originalTestValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TestIndizes);
     88
     89      OnlineCalculatorError errorState;
     90      double trainingMSE = OnlineMeanSquaredErrorCalculator.Calculate(estimatedTrainingValues, originalTrainingValues, out errorState);
     91      TrainingMeanSquaredError = errorState == OnlineCalculatorError.None ? trainingMSE : double.NaN;
     92      double testMSE = OnlineMeanSquaredErrorCalculator.Calculate(estimatedTestValues, originalTestValues, out errorState);
     93      TestMeanSquaredError = errorState == OnlineCalculatorError.None ? testMSE : double.NaN;
     94
     95      double trainingR2 = OnlinePearsonsRSquaredCalculator.Calculate(estimatedTrainingValues, originalTrainingValues, out errorState);
     96      TrainingRSquared = errorState == OnlineCalculatorError.None ? trainingR2 : double.NaN;
     97      double testR2 = OnlinePearsonsRSquaredCalculator.Calculate(estimatedTestValues, originalTestValues, out errorState);
     98      TestRSquared = errorState == OnlineCalculatorError.None ? testR2 : double.NaN;
     99
     100      double trainingRelError = OnlineMeanAbsolutePercentageErrorCalculator.Calculate(estimatedTrainingValues, originalTrainingValues, out errorState);
     101      TrainingRelativeError = errorState == OnlineCalculatorError.None ? trainingRelError : double.NaN;
     102      double testRelError = OnlineMeanAbsolutePercentageErrorCalculator.Calculate(estimatedTestValues, originalTestValues, out errorState);
     103      TestRelativeError = errorState == OnlineCalculatorError.None ? testRelError : double.NaN;
     104
     105      double trainingNMSE = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(estimatedTrainingValues, originalTrainingValues, out errorState);
     106      TrainingNormalizedMeanSquaredError = errorState == OnlineCalculatorError.None ? trainingNMSE : double.NaN;
     107      double testNMSE = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(estimatedTestValues, originalTestValues, out errorState);
     108      TestNormalizedMeanSquaredError = errorState == OnlineCalculatorError.None ? testNMSE : double.NaN;
    77109    }
    78110
     
    83115    public override IEnumerable<double> EstimatedTrainingValues {
    84116      get {
     117        var rows = Enumerable.Range(ProblemData.TrainingPartition.Start, ProblemData.TrainingPartition.End - ProblemData.TrainingPartition.Start);
    85118        var estimatedValuesEnumerators = (from model in Model.Models
    86                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, ProblemData.TestIndizes).GetEnumerator() })
     119                                          select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() })
    87120                                         .ToList();
    88         var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator();
    89         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.All(en => en.EstimatedValuesEnumerator.MoveNext())) {
     121        var rowsEnumerator = rows.GetEnumerator();
     122        while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    90123          int currentRow = rowsEnumerator.Current;
    91124
    92125          var selectedEnumerators = from pair in estimatedValuesEnumerators
    93126                                    where trainingPartitions == null || !trainingPartitions.ContainsKey(pair.Model) ||
    94                                          (trainingPartitions[pair.Model].Start >= currentRow && trainingPartitions[pair.Model].End < currentRow)
     127                                         (trainingPartitions[pair.Model].Start <= currentRow && currentRow < trainingPartitions[pair.Model].End)
    95128                                    select pair.EstimatedValuesEnumerator;
    96129          yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current));
     
    105138                                         .ToList();
    106139        var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator();
    107         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.All(en => en.EstimatedValuesEnumerator.MoveNext())) {
     140        while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    108141          int currentRow = rowsEnumerator.Current;
    109142
    110143          var selectedEnumerators = from pair in estimatedValuesEnumerators
    111144                                    where testPartitions == null || !testPartitions.ContainsKey(pair.Model) ||
    112                                       (testPartitions[pair.Model].Start >= currentRow && testPartitions[pair.Model].End < currentRow)
     145                                      (testPartitions[pair.Model].Start <= currentRow && currentRow < testPartitions[pair.Model].End)
    113146                                    select pair.EstimatedValuesEnumerator;
    114147
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolution.cs

    r5962 r6184  
    3030namespace HeuristicLab.Problems.DataAnalysis {
    3131  /// <summary>
    32   /// Abstract base class for regression data analysis solutions
     32  /// Represents a regression data analysis solution
    3333  /// </summary>
    3434  [StorableClass]
    35   public abstract class RegressionSolution : DataAnalysisSolution, IRegressionSolution {
     35  public class RegressionSolution : DataAnalysisSolution, IRegressionSolution {
    3636    private const string TrainingMeanSquaredErrorResultName = "Mean squared error (training)";
    3737    private const string TestMeanSquaredErrorResultName = "Mean squared error (test)";
     
    5555    public double TrainingMeanSquaredError {
    5656      get { return ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value; }
    57       private set { ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value = value; }
     57      protected set { ((DoubleValue)this[TrainingMeanSquaredErrorResultName].Value).Value = value; }
    5858    }
    5959
    6060    public double TestMeanSquaredError {
    6161      get { return ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value; }
    62       private set { ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value = value; }
     62      protected set { ((DoubleValue)this[TestMeanSquaredErrorResultName].Value).Value = value; }
    6363    }
    6464
    6565    public double TrainingRSquared {
    6666      get { return ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value; }
    67       private set { ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value = value; }
     67      protected set { ((DoubleValue)this[TrainingSquaredCorrelationResultName].Value).Value = value; }
    6868    }
    6969
    7070    public double TestRSquared {
    7171      get { return ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value; }
    72       private set { ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value = value; }
     72      protected set { ((DoubleValue)this[TestSquaredCorrelationResultName].Value).Value = value; }
    7373    }
    7474
    7575    public double TrainingRelativeError {
    7676      get { return ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value; }
    77       private set { ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value = value; }
     77      protected set { ((DoubleValue)this[TrainingRelativeErrorResultName].Value).Value = value; }
    7878    }
    7979
    8080    public double TestRelativeError {
    8181      get { return ((DoubleValue)this[TestRelativeErrorResultName].Value).Value; }
    82       private set { ((DoubleValue)this[TestRelativeErrorResultName].Value).Value = value; }
     82      protected set { ((DoubleValue)this[TestRelativeErrorResultName].Value).Value = value; }
    8383    }
    8484
    8585    public double TrainingNormalizedMeanSquaredError {
    8686      get { return ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value; }
    87       private set { ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value = value; }
     87      protected set { ((DoubleValue)this[TrainingNormalizedMeanSquaredErrorResultName].Value).Value = value; }
    8888    }
    8989
    9090    public double TestNormalizedMeanSquaredError {
    9191      get { return ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value; }
    92       private set { ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value = value; }
     92      protected set { ((DoubleValue)this[TestNormalizedMeanSquaredErrorResultName].Value).Value = value; }
    9393    }
    9494
     
    113113    }
    114114
     115    public override IDeepCloneable Clone(Cloner cloner) {
     116      return new RegressionSolution(this, cloner);
     117    }
     118
    115119    protected override void OnProblemDataChanged(EventArgs e) {
    116120      base.OnProblemDataChanged(e);
     
    122126    }
    123127
    124     protected void RecalculateResults() {
     128    private void RecalculateResults() {
    125129      double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values
    126130      IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Tests/StatisticCalculatorsTest.cs

    r5944 r6184  
    7878          double variance = calculator.Variance;
    7979
    80           Assert.AreEqual(mean_alglib, mean, 1E-6 * scale);
    81           Assert.AreEqual(variance_alglib, variance, 1E-6 * scale);
     80          Assert.AreEqual(mean_alglib.ToString(), mean.ToString());
     81          Assert.AreEqual(variance_alglib.ToString(), variance.ToString());
    8282        }
    8383      }
     
    109109            double r2 = r2Calculator.RSquared;
    110110
    111             Assert.AreEqual(r2_alglib, r2, 1E-6 * Math.Max(c1Scale, c2Scale));
     111            Assert.AreEqual(r2_alglib.ToString(), r2.ToString());
    112112          }
    113113        }
    114114      }
    115115    }
     116    [TestMethod]
     117    public void CalculatePearsonsRSquaredOfConstantTest() {
     118      System.Random random = new System.Random(31415);
     119      int n = 12;
     120      int cols = testData.GetLength(1);
     121      for (int c1 = 0; c1 < cols; c1++) {
     122        double c1Scale = random.NextDouble() * 1E7;
     123        double c2Scale = 1.0;
     124        IEnumerable<double> x = from rows in Enumerable.Range(0, n)
     125                                select testData[rows, c1] * c1Scale;
     126        IEnumerable<double> y = (new List<double>() { 150494407424305.44 })
     127          .Concat(Enumerable.Repeat(150494407424305.47, n - 1));
     128        double[] xs = x.ToArray();
     129        double[] ys = y.ToArray();
     130        double r2_alglib = alglib.pearsoncorrelation(xs, ys, n);
     131        r2_alglib *= r2_alglib;
     132
     133        var r2Calculator = new OnlinePearsonsRSquaredCalculator();
     134        for (int i = 0; i < n; i++) {
     135          r2Calculator.Add(xs[i], ys[i]);
     136        }
     137        double r2 = r2Calculator.RSquared;
     138
     139        Assert.AreEqual(r2_alglib.ToString(), r2.ToString());
     140      }
     141    }
    116142  }
    117143}
Note: See TracChangeset for help on using the changeset viewer.