Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/26/12 09:51:13 (12 years ago)
Author:
jkarder
Message:

#1331: merged r8086:8330 from trunk

Location:
branches/ScatterSearch (trunk integration)
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • branches/ScatterSearch (trunk integration)

  • branches/ScatterSearch (trunk integration)/HeuristicLab.Problems.DataAnalysis

  • branches/ScatterSearch (trunk integration)/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionEnsembleSolution.cs

    r7738 r8331  
    3737  [Creatable("Data Analysis - Ensembles")]
    3838  public sealed class RegressionEnsembleSolution : RegressionSolution, IRegressionEnsembleSolution {
     39    private readonly Dictionary<int, double> trainingEvaluationCache = new Dictionary<int, double>();
     40    private readonly Dictionary<int, double> testEvaluationCache = new Dictionary<int, double>();
     41
    3942    public new IRegressionEnsembleModel Model {
    4043      get { return (IRegressionEnsembleModel)base.Model; }
     
    5255
    5356    [Storable]
    54     private Dictionary<IRegressionModel, IntRange> trainingPartitions;
     57    private readonly Dictionary<IRegressionModel, IntRange> trainingPartitions;
    5558    [Storable]
    56     private Dictionary<IRegressionModel, IntRange> testPartitions;
     59    private readonly Dictionary<IRegressionModel, IntRange> testPartitions;
    5760
    5861    [StorableConstructor]
     
    8689      }
    8790
     91      trainingEvaluationCache = new Dictionary<int, double>(original.ProblemData.TrainingIndices.Count());
     92      testEvaluationCache = new Dictionary<int, double>(original.ProblemData.TestIndices.Count());
     93
    8894      regressionSolutions = cloner.Clone(original.regressionSolutions);
    8995      RegisterRegressionSolutionsEventHandler();
     
    133139      }
    134140
     141      trainingEvaluationCache = new Dictionary<int, double>(problemData.TrainingIndices.Count());
     142      testEvaluationCache = new Dictionary<int, double>(problemData.TestIndices.Count());
     143
    135144      RegisterRegressionSolutionsEventHandler();
    136145      regressionSolutions.AddRange(solutions);
     
    153162    public override IEnumerable<double> EstimatedTrainingValues {
    154163      get {
    155         var rows = ProblemData.TrainingIndizes;
    156         var estimatedValuesEnumerators = (from model in Model.Models
    157                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() })
    158                                          .ToList();
    159         var rowsEnumerator = rows.GetEnumerator();
    160         // aggregate to make sure that MoveNext is called for all enumerators
    161         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    162           int currentRow = rowsEnumerator.Current;
    163 
    164           var selectedEnumerators = from pair in estimatedValuesEnumerators
    165                                     where RowIsTrainingForModel(currentRow, pair.Model) && !RowIsTestForModel(currentRow, pair.Model)
    166                                     select pair.EstimatedValuesEnumerator;
    167           yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current));
     164        var rows = ProblemData.TrainingIndices;
     165        var rowsToEvaluate = rows.Except(trainingEvaluationCache.Keys);
     166        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     167        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, (r, m) => RowIsTrainingForModel(r, m) && !RowIsTestForModel(r, m)).GetEnumerator();
     168
     169        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     170          trainingEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
    168171        }
     172
     173        return rows.Select(row => trainingEvaluationCache[row]);
    169174      }
    170175    }
     
    172177    public override IEnumerable<double> EstimatedTestValues {
    173178      get {
    174         var rows = ProblemData.TestIndizes;
    175         var estimatedValuesEnumerators = (from model in Model.Models
    176                                           select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() })
    177                                          .ToList();
    178         var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator();
    179         // aggregate to make sure that MoveNext is called for all enumerators
    180         while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
    181           int currentRow = rowsEnumerator.Current;
    182 
    183           var selectedEnumerators = from pair in estimatedValuesEnumerators
    184                                     where RowIsTestForModel(currentRow, pair.Model)
    185                                     select pair.EstimatedValuesEnumerator;
    186 
    187           yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current));
     179        var rows = ProblemData.TestIndices;
     180        var rowsToEvaluate = rows.Except(testEvaluationCache.Keys);
     181        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     182        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, RowIsTestForModel).GetEnumerator();
     183
     184        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     185          testEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
    188186        }
     187
     188        return rows.Select(row => testEvaluationCache[row]);
     189      }
     190    }
     191
     192    private IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows, Func<int, IRegressionModel, bool> modelSelectionPredicate) {
     193      var estimatedValuesEnumerators = (from model in Model.Models
     194                                        select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedValues(ProblemData.Dataset, rows).GetEnumerator() })
     195                                       .ToList();
     196      var rowsEnumerator = rows.GetEnumerator();
     197      // aggregate to make sure that MoveNext is called for all enumerators
     198      while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
     199        int currentRow = rowsEnumerator.Current;
     200
     201        var selectedEnumerators = from pair in estimatedValuesEnumerators
     202                                  where modelSelectionPredicate(currentRow, pair.Model)
     203                                  select pair.EstimatedValuesEnumerator;
     204
     205        yield return AggregateEstimatedValues(selectedEnumerators.Select(x => x.Current));
    189206      }
    190207    }
     
    201218
    202219    public override IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows) {
    203       return from xs in GetEstimatedValueVectors(ProblemData.Dataset, rows)
    204              select AggregateEstimatedValues(xs);
     220      var rowsToEvaluate = rows.Except(evaluationCache.Keys);
     221      var rowsEnumerator = rowsToEvaluate.GetEnumerator();
     222      var valuesEnumerator = (from xs in GetEstimatedValueVectors(ProblemData.Dataset, rowsToEvaluate)
     223                              select AggregateEstimatedValues(xs))
     224                             .GetEnumerator();
     225
     226      while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
     227        evaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
     228      }
     229
     230      return rows.Select(row => evaluationCache[row]);
    205231    }
    206232
     
    223249
    224250    protected override void OnProblemDataChanged() {
     251      trainingEvaluationCache.Clear();
     252      testEvaluationCache.Clear();
     253      evaluationCache.Clear();
    225254      IRegressionProblemData problemData = new RegressionProblemData(ProblemData.Dataset,
    226255                                                                     ProblemData.AllowedInputVariables,
     
    251280    public void AddRegressionSolutions(IEnumerable<IRegressionSolution> solutions) {
    252281      regressionSolutions.AddRange(solutions);
     282
     283      trainingEvaluationCache.Clear();
     284      testEvaluationCache.Clear();
     285      evaluationCache.Clear();
    253286    }
    254287    public void RemoveRegressionSolutions(IEnumerable<IRegressionSolution> solutions) {
    255288      regressionSolutions.RemoveRange(solutions);
     289
     290      trainingEvaluationCache.Clear();
     291      testEvaluationCache.Clear();
     292      evaluationCache.Clear();
    256293    }
    257294
     
    275312      trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition;
    276313      testPartitions[solution.Model] = solution.ProblemData.TestPartition;
     314
     315      trainingEvaluationCache.Clear();
     316      testEvaluationCache.Clear();
     317      evaluationCache.Clear();
    277318    }
    278319
     
    282323      trainingPartitions.Remove(solution.Model);
    283324      testPartitions.Remove(solution.Model);
     325
     326      trainingEvaluationCache.Clear();
     327      testEvaluationCache.Clear();
     328      evaluationCache.Clear();
    284329    }
    285330  }
  • branches/ScatterSearch (trunk integration)/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs

    r8086 r8331  
    9595    #endregion
    9696
    97     public ConstrainedValueParameter<StringValue> TargetVariableParameter {
    98       get { return (ConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
     97    public IConstrainedValueParameter<StringValue> TargetVariableParameter {
     98      get { return (IConstrainedValueParameter<StringValue>)Parameters[TargetVariableParameterName]; }
    9999    }
    100100    public string TargetVariable {
  • branches/ScatterSearch (trunk integration)/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolution.cs

    r7735 r8331  
    5555    }
    5656    public override IEnumerable<double> EstimatedTrainingValues {
    57       get { return GetEstimatedValues(ProblemData.TrainingIndizes); }
     57      get { return GetEstimatedValues(ProblemData.TrainingIndices); }
    5858    }
    5959    public override IEnumerable<double> EstimatedTestValues {
    60       get { return GetEstimatedValues(ProblemData.TestIndizes); }
     60      get { return GetEstimatedValues(ProblemData.TestIndices); }
    6161    }
    6262
  • branches/ScatterSearch (trunk integration)/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionBase.cs

    r7735 r8331  
    138138        OnlineCalculatorError errorState;
    139139        Add(new Result(TrainingMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the training partition", new DoubleValue()));
    140         double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes), out errorState);
     140        double trainingMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices), out errorState);
    141141        TrainingMeanAbsoluteError = errorState == OnlineCalculatorError.None ? trainingMAE : double.NaN;
    142142      }
     
    145145        OnlineCalculatorError errorState;
    146146        Add(new Result(TestMeanAbsoluteErrorResultName, "Mean of absolute errors of the model on the test partition", new DoubleValue()));
    147         double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes), out errorState);
     147        double testMAE = OnlineMeanAbsoluteErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices), out errorState);
    148148        TestMeanAbsoluteError = errorState == OnlineCalculatorError.None ? testMAE : double.NaN;
    149149      }
     
    152152        OnlineCalculatorError errorState;
    153153        Add(new Result(TrainingMeanErrorResultName, "Mean of errors of the model on the training partition", new DoubleValue()));
    154         double trainingME = OnlineMeanErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes), out errorState);
     154        double trainingME = OnlineMeanErrorCalculator.Calculate(EstimatedTrainingValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices), out errorState);
    155155        TrainingMeanError = errorState == OnlineCalculatorError.None ? trainingME : double.NaN;
    156156      }
     
    158158        OnlineCalculatorError errorState;
    159159        Add(new Result(TestMeanErrorResultName, "Mean of errors of the model on the test partition", new DoubleValue()));
    160         double testME = OnlineMeanErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes), out errorState);
     160        double testME = OnlineMeanErrorCalculator.Calculate(EstimatedTestValues, ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices), out errorState);
    161161        TestMeanError = errorState == OnlineCalculatorError.None ? testME : double.NaN;
    162162      }
     
    166166    protected void CalculateResults() {
    167167      IEnumerable<double> estimatedTrainingValues = EstimatedTrainingValues; // cache values
    168       IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndizes);
     168      IEnumerable<double> originalTrainingValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices);
    169169      IEnumerable<double> estimatedTestValues = EstimatedTestValues; // cache values
    170       IEnumerable<double> originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndizes);
     170      IEnumerable<double> originalTestValues = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TestIndices);
    171171
    172172      OnlineCalculatorError errorState;
Note: See TracChangeset for help on using the changeset viewer.