Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/10/12 15:26:13 (12 years ago)
Author:
sforsten
Message:

#1292:

  • merged r8034:8179 from trunk
  • added BackgroundWorker
  • added ProgressBar
  • added SpearmansRankCorrelationCoefficientCalculator
  • corrected bug in HoeffdingsDependenceCalculator
  • made some changes in the GUI
Location:
branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis
Files:
2 added
19 edited

Legend:

Unmodified
Added
Removed
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis

  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj

    r8038 r8276  
    155155    <Compile Include="Implementation\Clustering\ClusteringProblemData.cs" />
    156156    <Compile Include="Implementation\Clustering\ClusteringSolution.cs" />
     157    <Compile Include="Implementation\ExtendedHeatMap.cs" />
    157158    <Compile Include="Implementation\Regression\ConstantRegressionModel.cs" />
    158159    <Compile Include="Implementation\Regression\ConstantRegressionSolution.cs" />
     
    213214    <Compile Include="OnlineCalculators\OnlinePearsonsRSquaredCalculator.cs" />
    214215    <Compile Include="Implementation\Regression\RegressionSolution.cs" />
     216    <Compile Include="OnlineCalculators\SpearmansRankCorrelationCoefficientCalculator.cs" />
    215217    <Compile Include="Plugin.cs" />
    216218    <Compile Include="Implementation\Classification\ThresholdCalculators\AccuracyMaximizationThresholdCalculator.cs" />
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationEnsembleSolution.cs

    r7259 r8276  
    3737  [Creatable("Data Analysis - Ensembles")]
    3838  public sealed class ClassificationEnsembleSolution : ClassificationSolution, IClassificationEnsembleSolution {
     39    private readonly Dictionary<int, double> trainingEvaluationCache = new Dictionary<int, double>();
     40    private readonly Dictionary<int, double> testEvaluationCache = new Dictionary<int, double>();
     41
    3942    public new IClassificationEnsembleModel Model {
    4043      get { return (IClassificationEnsembleModel)base.Model; }
     
    8588      }
    8689
     90      trainingEvaluationCache = new Dictionary<int, double>(original.ProblemData.TrainingIndices.Count());
     91      testEvaluationCache = new Dictionary<int, double>(original.ProblemData.TestIndices.Count());
     92
    8793      classificationSolutions = cloner.Clone(original.classificationSolutions);
    8894      RegisterClassificationSolutionsEventHandler();
     
    128134      }
    129135
     136      trainingEvaluationCache = new Dictionary<int, double>(problemData.TrainingIndices.Count());
     137      testEvaluationCache = new Dictionary<int, double>(problemData.TestIndices.Count());
     138
    130139      RegisterClassificationSolutionsEventHandler();
    131140      classificationSolutions.AddRange(solutions);
     
    148157    public override IEnumerable<double> EstimatedTrainingClassValues {
    149158      get {
    150         var rows = ProblemData.TrainingIndizes;
    151         var estimatedValuesEnumerators = (from model in Model.Models
    152                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() })
    153                                          .ToList();
    154         var rowsEnumerator = rows.GetEnumerator();
    155         // aggregate to make sure that MoveNext is called for all enumerators
    156         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    157           int currentRow = rowsEnumerator.Current;
    158 
    159           var selectedEnumerators = from pair in estimatedValuesEnumerators
    160                                     where RowIsTrainingForModel(currentRow, pair.Model) && !RowIsTestForModel(currentRow, pair.Model)
    161                                     select pair.EstimatedValuesEnumerator;
    162           yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current));
     159        var rows = ProblemData.TrainingIndices;
     160        var rowsToEvaluate = rows.Except(trainingEvaluationCache.Keys);
     161        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     162        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, (r, m) => RowIsTrainingForModel(r, m) && !RowIsTestForModel(r, m)).GetEnumerator();
     163
     164        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     165          trainingEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
    163166        }
     167
     168        return rows.Select(row => trainingEvaluationCache[row]);
    164169      }
    165170    }
     
    167172    public override IEnumerable<double> EstimatedTestClassValues {
    168173      get {
    169         var rows = ProblemData.TestIndizes;
    170         var estimatedValuesEnumerators = (from model in Model.Models
    171                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() })
    172                                          .ToList();
    173         var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator();
    174         // aggregate to make sure that MoveNext is called for all enumerators
    175         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    176           int currentRow = rowsEnumerator.Current;
    177 
    178           var selectedEnumerators = from pair in estimatedValuesEnumerators
    179                                     where RowIsTestForModel(currentRow, pair.Model)
    180                                     select pair.EstimatedValuesEnumerator;
    181 
    182           yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current));
     174        var rows = ProblemData.TestIndices;
     175        var rowsToEvaluate = rows.Except(testEvaluationCache.Keys);
     176        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     177        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, RowIsTestForModel).GetEnumerator();
     178
     179        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     180          testEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
    183181        }
     182
     183        return rows.Select(row => testEvaluationCache[row]);
     184      }
     185    }
     186
     187    private IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows, Func<int, IClassificationModel, bool> modelSelectionPredicate) {
     188      var estimatedValuesEnumerators = (from model in Model.Models
     189                                        select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() })
     190                                       .ToList();
     191      var rowsEnumerator = rows.GetEnumerator();
     192      // aggregate to make sure that MoveNext is called for all enumerators
     193      while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
     194        int currentRow = rowsEnumerator.Current;
     195
     196        var selectedEnumerators = from pair in estimatedValuesEnumerators
     197                                  where modelSelectionPredicate(currentRow, pair.Model)
     198                                  select pair.EstimatedValuesEnumerator;
     199
     200        yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current));
    184201      }
    185202    }
     
    196213
    197214    public override IEnumerable<double> GetEstimatedClassValues(IEnumerable<int> rows) {
    198       return from xs in GetEstimatedClassValueVectors(ProblemData.Dataset, rows)
    199              select AggregateEstimatedClassValues(xs);
     215      var rowsToEvaluate = rows.Except(evaluationCache.Keys);
     216      var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     217      var valuesEnumerator = (from xs in GetEstimatedClassValueVectors(ProblemData.Dataset, rowsToEvaluate)
     218                              select AggregateEstimatedClassValues(xs))
     219                             .GetEnumerator();
     220
     221      while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     222        evaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
     223      }
     224
     225      return rows.Select(row => evaluationCache[row]);
    200226    }
    201227
     
    223249
    224250    protected override void OnProblemDataChanged() {
     251      trainingEvaluationCache.Clear();
     252      testEvaluationCache.Clear();
     253      evaluationCache.Clear();
     254
    225255      IClassificationProblemData problemData = new ClassificationProblemData(ProblemData.Dataset,
    226256                                                                     ProblemData.AllowedInputVariables,
     
    251281    public void AddClassificationSolutions(IEnumerable<IClassificationSolution> solutions) {
    252282      classificationSolutions.AddRange(solutions);
     283
     284      trainingEvaluationCache.Clear();
     285      testEvaluationCache.Clear();
     286      evaluationCache.Clear();
    253287    }
    254288    public void RemoveClassificationSolutions(IEnumerable<IClassificationSolution> solutions) {
    255289      classificationSolutions.RemoveRange(solutions);
     290
     291      trainingEvaluationCache.Clear();
     292      testEvaluationCache.Clear();
     293      evaluationCache.Clear();
    256294    }
    257295
     
    275313      trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition;
    276314      testPartitions[solution.Model] = solution.ProblemData.TestPartition;
     315
     316      trainingEvaluationCache.Clear();
     317      testEvaluationCache.Clear();
     318      evaluationCache.Clear();
    277319    }
    278320
     
    282324      trainingPartitions.Remove(solution.Model);
    283325      testPartitions.Remove(solution.Model);
     326
     327      trainingEvaluationCache.Clear();
     328      testEvaluationCache.Clear();
     329      evaluationCache.Clear();
    284330    }
    285331  }
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs

    r7823 r8276  
    207207
    208208    #region parameter properties
    209     public ConstrainedValueParameter<StringValue> TargetVariableParameter {
    210       get { return (ConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
     209    public IConstrainedValueParameter<StringValue> TargetVariableParameter {
     210      get { return (IConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
    211211    }
    212212    public IFixedValueParameter<StringMatrix> ClassNamesParameter {
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolution.cs

    r7259 r8276  
    4444    public ClassificationSolution(IClassificationModel model, IClassificationProblemData problemData)
    4545      : base(model, problemData) {
    46       evaluationCache = new Dictionary<int, double>();
     46      evaluationCache = new Dictionary<int, double>(problemData.Dataset.Rows);
    4747    }
    4848
     
    5151    }
    5252    public override IEnumerable<double> EstimatedTrainingClassValues {
    53       get { return GetEstimatedClassValues(ProblemData.TrainingIndizes); }
     53      get { return GetEstimatedClassValues(ProblemData.TrainingIndices); }
    5454    }
    5555    public override IEnumerable<double> EstimatedTestClassValues {
    56       get { return GetEstimatedClassValues(ProblemData.TestIndizes); }
     56      get { return GetEstimatedClassValues(ProblemData.TestIndices); }
    5757    }
    5858
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationSolutionBase.cs

    r7259 r8276  
    8787    protected void CalculateResults() {
    8888      double[] estimatedTrainingClassValues = EstimatedTrainingClassValues.ToArray(); // cache values
    89       double[] originalTrainingClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();
     89      double[] originalTrainingClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).ToArray();
    9090      double[] estimatedTestClassValues = EstimatedTestClassValues.ToArray(); // cache values
    91       double[] originalTestClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();
     91      double[] originalTestClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices).ToArray();
    9292
    9393      OnlineCalculatorError errorState;
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationSolution.cs

    r7259 r8276  
    5959    }
    6060    public override IEnumerable<double> EstimatedTrainingClassValues {
    61       get { return GetEstimatedClassValues(ProblemData.TrainingIndizes); }
     61      get { return GetEstimatedClassValues(ProblemData.TrainingIndices); }
    6262    }
    6363    public override IEnumerable<double> EstimatedTestClassValues {
    64       get { return GetEstimatedClassValues(ProblemData.TestIndizes); }
     64      get { return GetEstimatedClassValues(ProblemData.TestIndices); }
    6565    }
    6666
     
    8282    }
    8383    public override IEnumerable<double> EstimatedTrainingValues {
    84       get { return GetEstimatedValues(ProblemData.TrainingIndizes); }
     84      get { return GetEstimatedValues(ProblemData.TrainingIndices); }
    8585    }
    8686    public override IEnumerable<double> EstimatedTestValues {
    87       get { return GetEstimatedValues(ProblemData.TestIndizes); }
     87      get { return GetEstimatedValues(ProblemData.TestIndices); }
    8888    }
    8989
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationSolutionBase.cs

    r7259 r8276  
    103103    protected void CalculateRegressionResults() {
    104104      double[] estimatedTrainingValues = EstimatedTrainingValues.ToArray(); // cache values
    105       double[] originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes).ToArray();
     105      double[] originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).ToArray();
    106106      double[] estimatedTestValues = EstimatedTestValues.ToArray(); // cache values
    107       double[] originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes).ToArray();
     107      double[] originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices).ToArray();
    108108
    109109      OnlineCalculatorError errorState;
     
    140140      double[] classValues;
    141141      double[] thresholds;
    142       var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
     142      var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices);
    143143      AccuracyMaximizationThresholdCalculator.CalculateThresholds(ProblemData, EstimatedTrainingValues, targetClassValues, out classValues, out thresholds);
    144144
     
    149149      double[] classValues;
    150150      double[] thresholds;
    151       var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
     151      var targetClassValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices);
    152152      NormalDistributionCutPointsThresholdCalculator.CalculateThresholds(ProblemData, EstimatedTrainingValues, targetClassValues, out classValues, out thresholds);
    153153
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ThresholdCalculators/AccuracyMaximizationThresholdCalculator.cs

    r7259 r8276  
    5454    public static void CalculateThresholds(IClassificationProblemData problemData, IEnumerable<double> estimatedValues, IEnumerable<double> targetClassValues, out double[] classValues, out double[] thresholds) {
    5555      int slices = 100;
     56      double minThresholdInc = 10e-5; // necessary to prevent infinite loop when maxEstimated - minEstimated is effectively zero (constant model)
    5657      List<double> estimatedValuesList = estimatedValues.ToList();
    5758      double maxEstimatedValue = estimatedValuesList.Max();
    5859      double minEstimatedValue = estimatedValuesList.Min();
    59       double thresholdIncrement = (maxEstimatedValue - minEstimatedValue) / slices;
     60      double thresholdIncrement = Math.Max((maxEstimatedValue - minEstimatedValue) / slices, minThresholdInc);
    6061      var estimatedAndTargetValuePairs =
    6162        estimatedValuesList.Zip(targetClassValues, (x, y) => new { EstimatedValue = x, TargetClassValue = y })
     
    7071
    7172      // incrementally calculate accuracy of all possible thresholds
    72       int[,] confusionMatrix = new int[nClasses, nClasses];
    73 
    7473      for (int i = 1; i < thresholds.Length; i++) {
    7574        double lowerThreshold = thresholds[i - 1];
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringSolution.cs

    r7259 r8276  
    6868    public virtual IEnumerable<int> TrainingClusterValues {
    6969      get {
    70         return GetClusterValues(ProblemData.TrainingIndizes);
     70        return GetClusterValues(ProblemData.TrainingIndices);
    7171      }
    7272    }
     
    7474    public virtual IEnumerable<int> TestClusterValues {
    7575      get {
    76         return GetClusterValues(ProblemData.TestIndizes);
     76        return GetClusterValues(ProblemData.TestIndices);
    7777      }
    7878    }
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs

    r8038 r8276  
    2323using System.Collections.Generic;
    2424using System.Linq;
    25 using HeuristicLab.Analysis;
    2625using HeuristicLab.Collections;
    2726using HeuristicLab.Common;
     
    5352      get { return (IFixedValueParameter<IntRange>)Parameters[TestPartitionParameterName]; }
    5453    }
    55     public IFixedValueParameter<HeatMap> DatasetHeatMapParameter {
    56       get { return (IFixedValueParameter<HeatMap>)Parameters[DatasetHeatMapParameterName]; }
     54    public IFixedValueParameter<ExtendedHeatMap> DatasetHeatMapParameter {
     55      get { return (IFixedValueParameter<ExtendedHeatMap>)Parameters[DatasetHeatMapParameterName]; }
    5756    }
    5857    #endregion
     
    7978      get { return TestPartitionParameter.Value; }
    8079    }
    81     public HeatMap DatasetHeatMap {
     80    public ExtendedHeatMap DatasetHeatMap {
    8281      get { return DatasetHeatMapParameter.Value; }
    8382    }
    8483
    85     public virtual IEnumerable<int> TrainingIndizes {
     84    public virtual IEnumerable<int> TrainingIndices {
    8685      get {
    8786        return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start))
     
    8988      }
    9089    }
    91     public virtual IEnumerable<int> TestIndizes {
     90    public virtual IEnumerable<int> TestIndices {
    9291      get {
    9392        return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start))
     
    140139      Parameters.Add(new FixedValueParameter<IntRange>(TrainingPartitionParameterName, "", new IntRange(trainingPartitionStart, trainingPartitionEnd)));
    141140      Parameters.Add(new FixedValueParameter<IntRange>(TestPartitionParameterName, "", new IntRange(testPartitionStart, testPartitionEnd)));
    142       Parameters.Add(new FixedValueParameter<HeatMap>(DatasetHeatMapParameterName, "", CalculateHeatMap(dataset)));
     141      Parameters.Add(new FixedValueParameter<ExtendedHeatMap>(DatasetHeatMapParameterName, "", new ExtendedHeatMap(this)));
    143142
    144143      ((ValueParameter<Dataset>)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false;
    145144      RegisterEventHandlers();
    146     }
    147 
    148     private HeatMap CalculateHeatMap(Dataset dataset) {
    149       IList<string> doubleVariableNames = dataset.DoubleVariables.ToList();
    150       OnlineCalculatorError error;
    151       int length = doubleVariableNames.Count;
    152       double[,] elements = new double[length, length];
    153 
    154       for (int i = 0; i < length; i++) {
    155         for (int j = 0; j < i + 1; j++) {
    156           elements[i, j] = OnlinePearsonsRSquaredCalculator.Calculate(dataset.GetDoubleValues(doubleVariableNames[length - 1 - i]), dataset.GetDoubleValues(doubleVariableNames[j]), out error);
    157           elements[j, i] = elements[i, j];
    158           if (!error.Equals(OnlineCalculatorError.None)) {
    159             throw new ArgumentException("Calculator returned " + error);
    160           }
    161         }
    162       }
    163       return new HeatMap(elements, "Hoeffdings Dependence");
    164145    }
    165146
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionEnsembleSolution.cs

    r7738 r8276  
    3737  [Creatable("Data Analysis - Ensembles")]
    3838  public sealed class RegressionEnsembleSolution : RegressionSolution, IRegressionEnsembleSolution {
     39    private readonly Dictionary<int, double> trainingEvaluationCache = new Dictionary<int, double>();
     40    private readonly Dictionary<int, double> testEvaluationCache = new Dictionary<int, double>();
     41
    3942    public new IRegressionEnsembleModel Model {
    4043      get { return (IRegressionEnsembleModel)base.Model; }
     
    5255
    5356    [Storable]
    54     private Dictionary<IRegressionModel, IntRange> trainingPartitions;
     57    private readonly Dictionary<IRegressionModel, IntRange> trainingPartitions;
    5558    [Storable]
    56     private Dictionary<IRegressionModel, IntRange> testPartitions;
     59    private readonly Dictionary<IRegressionModel, IntRange> testPartitions;
    5760
    5861    [StorableConstructor]
     
    8689      }
    8790
     91      trainingEvaluationCache = new Dictionary<int, double>(original.ProblemData.TrainingIndices.Count());
     92      testEvaluationCache = new Dictionary<int, double>(original.ProblemData.TestIndices.Count());
     93
    8894      regressionSolutions = cloner.Clone(original.regressionSolutions);
    8995      RegisterRegressionSolutionsEventHandler();
     
    133139      }
    134140
     141      trainingEvaluationCache = new Dictionary<int, double>(problemData.TrainingIndices.Count());
     142      testEvaluationCache = new Dictionary<int, double>(problemData.TestIndices.Count());
     143
    135144      RegisterRegressionSolutionsEventHandler();
    136145      regressionSolutions.AddRange(solutions);
     
    153162    public override IEnumerable<double> EstimatedTrainingValues {
    154163      get {
    155         var rows = ProblemData.TrainingIndizes;
    156         var estimatedValuesEnumerators = (from model in Model.Models
    157                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() })
    158                                          .ToList();
    159         var rowsEnumerator = rows.GetEnumerator();
    160         // aggregate to make sure that MoveNext is called for all enumerators
    161         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    162           int currentRow = rowsEnumerator.Current;
    163 
    164           var selectedEnumerators = from pair in estimatedValuesEnumerators
    165                                     where RowIsTrainingForModel(currentRow, pair.Model) && !RowIsTestForModel(currentRow, pair.Model)
    166                                     select pair.EstimatedValuesEnumerator;
    167           yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current));
     164        var rows = ProblemData.TrainingIndices;
     165        var rowsToEvaluate = rows.Except(trainingEvaluationCache.Keys);
     166        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     167        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, (r, m) => RowIsTrainingForModel(r, m) && !RowIsTestForModel(r, m)).GetEnumerator();
     168
     169        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     170          trainingEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
    168171        }
     172
     173        return rows.Select(row => trainingEvaluationCache[row]);
    169174      }
    170175    }
     
    172177    public override IEnumerable<double> EstimatedTestValues {
    173178      get {
    174         var rows = ProblemData.TestIndizes;
    175         var estimatedValuesEnumerators = (from model in Model.Models
    176                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() })
    177                                          .ToList();
    178         var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator();
    179         // aggregate to make sure that MoveNext is called for all enumerators
    180         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    181           int currentRow = rowsEnumerator.Current;
    182 
    183           var selectedEnumerators = from pair in estimatedValuesEnumerators
    184                                     where RowIsTestForModel(currentRow, pair.Model)
    185                                     select pair.EstimatedValuesEnumerator;
    186 
    187           yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current));
     179        var rows = ProblemData.TestIndices;
     180        var rowsToEvaluate = rows.Except(testEvaluationCache.Keys);
     181        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     182        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, RowIsTestForModel).GetEnumerator();
     183
     184        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     185          testEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
    188186        }
     187
     188        return rows.Select(row => testEvaluationCache[row]);
     189      }
     190    }
     191
     192    private IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows, Func<int, IRegressionModel, bool> modelSelectionPredicate) {
     193      var estimatedValuesEnumerators = (from model in Model.Models
     194                                        select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() })
     195                                       .ToList();
     196      var rowsEnumerator = rows.GetEnumerator();
     197      // aggregate to make sure that MoveNext is called for all enumerators
     198      while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
     199        int currentRow = rowsEnumerator.Current;
     200
     201        var selectedEnumerators = from pair in estimatedValuesEnumerators
     202                                  where modelSelectionPredicate(currentRow, pair.Model)
     203                                  select pair.EstimatedValuesEnumerator;
     204
     205        yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current));
    189206      }
    190207    }
     
    201218
    202219    public override IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows) {
    203       return from xs in GetEstimatedValueVectors(ProblemData.Dataset, rows)
    204              select AggregateEstimatedValues(xs);
     220      var rowsToEvaluate = rows.Except(evaluationCache.Keys);
     221      var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     222      var valuesEnumerator = (from xs in GetEstimatedValueVectors(ProblemData.Dataset, rowsToEvaluate)
     223                              select AggregateEstimatedValues(xs))
     224                             .GetEnumerator();
     225
     226      while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     227        evaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
     228      }
     229
     230      return rows.Select(row => evaluationCache[row]);
    205231    }
    206232
     
    223249
    224250    protected override void OnProblemDataChanged() {
     251      trainingEvaluationCache.Clear();
     252      testEvaluationCache.Clear();
     253      evaluationCache.Clear();
    225254      IRegressionProblemData problemData = new RegressionProblemData(ProblemData.Dataset,
    226255                                                                     ProblemData.AllowedInputVariables,
     
    251280    public void AddRegressionSolutions(IEnumerable<IRegressionSolution> solutions) {
    252281      regressionSolutions.AddRange(solutions);
     282
     283      trainingEvaluationCache.Clear();
     284      testEvaluationCache.Clear();
     285      evaluationCache.Clear();
    253286    }
    254287    public void RemoveRegressionSolutions(IEnumerable<IRegressionSolution> solutions) {
    255288      regressionSolutions.RemoveRange(solutions);
     289
     290      trainingEvaluationCache.Clear();
     291      testEvaluationCache.Clear();
     292      evaluationCache.Clear();
    256293    }
    257294
     
    275312      trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition;
    276313      testPartitions[solution.Model] = solution.ProblemData.TestPartition;
     314
     315      trainingEvaluationCache.Clear();
     316      testEvaluationCache.Clear();
     317      evaluationCache.Clear();
    277318    }
    278319
     
    282323      trainingPartitions.Remove(solution.Model);
    283324      testPartitions.Remove(solution.Model);
     325
     326      trainingEvaluationCache.Clear();
     327      testEvaluationCache.Clear();
     328      evaluationCache.Clear();
    284329    }
    285330  }
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs

    r7823 r8276  
    9595    #endregion
    9696
    97     public ConstrainedValueParameter<StringValue> TargetVariableParameter {
    98       get { return (ConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
     97    public IConstrainedValueParameter<StringValue> TargetVariableParameter {
     98      get { return (IConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
    9999    }
    100100    public string TargetVariable {
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolution.cs

    r7735 r8276  
    5555    }
    5656    public override IEnumerable<double> EstimatedTrainingValues {
    57       get { return GetEstimatedValues(ProblemData.TrainingIndizes); }
     57      get { return GetEstimatedValues(ProblemData.TrainingIndices); }
    5858    }
    5959    public override IEnumerable<double> EstimatedTestValues {
    60       get { return GetEstimatedValues(ProblemData.TestIndizes); }
     60      get { return GetEstimatedValues(ProblemData.TestIndices); }
    6161    }
    6262
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionBase.cs

    r7735 r8276  
    138138        OnlineCalculatorError errorState;
    139139        Add(new Result(TrainingMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the training partition", new DoubleValue()));
    140         double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes), out errorState);
     140        double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices), out errorState);
    141141        TrainingMeanAbsoluteError = errorState == OnlineCalculatorError.None ? trainingMAE : double.NaN;
    142142      }
     
    145145        OnlineCalculatorError errorState;
    146146        Add(new Result(TestMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the test partition", new DoubleValue()));
    147         double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes), out errorState);
     147        double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices), out errorState);
    148148        TestMeanAbsoluteError = errorState == OnlineCalculatorError.None ? testMAE : double.NaN;
    149149      }
     
    152152        OnlineCalculatorError errorState;
    153153        Add(new Result(TrainingMeanErrorResultName, "Mean of errors of the model on the training partition", new DoubleValue()));
    154         double trainingME = OnlineMeanErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes), out errorState);
     154        double trainingME = OnlineMeanErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices), out errorState);
    155155        TrainingMeanError = errorState == OnlineCalculatorError.None ? trainingME : double.NaN;
    156156      }
     
    158158        OnlineCalculatorError errorState;
    159159        Add(new Result(TestMeanErrorResultName, "Mean of errors of the model on the test partition", new DoubleValue()));
    160         double testME = OnlineMeanErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes), out errorState);
     160        double testME = OnlineMeanErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices), out errorState);
    161161        TestMeanError = errorState == OnlineCalculatorError.None ? testME : double.NaN;
    162162      }
     
    166166    protected void CalculateResults() {
    167167      IEnumerable<double> estimatedTrainingValues = EstimatedTrainingValues; // cache values
    168       IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
     168      IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices);
    169169      IEnumerable<double> estimatedTestValues = EstimatedTestValues; // cache values
    170       IEnumerable<double> originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes);
     170      IEnumerable<double> originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices);
    171171
    172172      OnlineCalculatorError errorState;
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs

    r7259 r8276  
    3636    IntRange TestPartition { get; }
    3737
    38     IEnumerable<int> TrainingIndizes { get; }
    39     IEnumerable<int> TestIndizes { get; }
     38    IEnumerable<int> TrainingIndices { get; }
     39    IEnumerable<int> TestIndices { get; }
    4040
    4141    bool IsTrainingSample(int index);
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IOnlineCalculator.cs

    r7259 r8276  
    2424namespace HeuristicLab.Problems.DataAnalysis {
    2525  [Flags]
    26   public enum OnlineCalculatorError { 
     26  public enum OnlineCalculatorError {
    2727    /// <summary>
    2828    /// No error occurred
    2929    /// </summary>
    30     None = 0, 
     30    None = 0,
    3131    /// <summary>
    3232    /// An invalid value has been added (often +/- Infinity and NaN are invalid values)
    3333    /// </summary>
    34     InvalidValueAdded = 1, 
     34    InvalidValueAdded = 1,
    3535    /// <summary>
    3636    /// The number of elements added to the evaluator is not sufficient to calculate the result value
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/HoeffdingsDependenceCalculator.cs

    r7969 r8276  
    2323using System.Collections.Generic;
    2424using System.Linq;
    25 using HeuristicLab.Common;
    2625
    2726namespace HeuristicLab.Problems.DataAnalysis {
  • branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4/OnlineCalculators/OnlineLinearScalingParameterCalculator.cs

    r7259 r8276  
    5555    }
    5656
    57     private int cnt;
    5857    private OnlineMeanAndVarianceCalculator targetMeanCalculator;
    5958    private OnlineMeanAndVarianceCalculator originalMeanAndVarianceCalculator;
     
    6867
    6968    public void Reset() {
    70       cnt = 0;
    7169      targetMeanCalculator.Reset();
    7270      originalMeanAndVarianceCalculator.Reset();
     
    8583      originalTargetCovarianceCalculator.Add(original, target);
    8684
    87       cnt++;
    8885    }
    8986
Note: See TracChangeset for help on using the changeset viewer.