Free cookie consent management tool by TermsFeed Policy Generator

source: branches/GP-MoveOperators/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationEnsembleSolution.cs @ 11297

Last change on this file since 11297 was 8660, checked in by gkronber, 12 years ago

#1847 merged r8205:8635 from trunk into branch

File size: 15.9 KB
RevLine 
[5816]1#region License Information
2/* HeuristicLab
[7259]3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[5816]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[6589]22using System;
[5816]23using System.Collections.Generic;
24using System.Linq;
[6613]25using HeuristicLab.Collections;
[5816]26using HeuristicLab.Common;
27using HeuristicLab.Core;
[6589]28using HeuristicLab.Data;
[5816]29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30
31namespace HeuristicLab.Problems.DataAnalysis {
32  /// <summary>
33  /// Represents classification solutions that contain an ensemble of multiple classification models
34  /// </summary>
35  [StorableClass]
36  [Item("Classification Ensemble Solution", "A classification solution that contains an ensemble of multiple classification models")]
[6666]37  [Creatable("Data Analysis - Ensembles")]
[6592]38  public sealed class ClassificationEnsembleSolution : ClassificationSolution, IClassificationEnsembleSolution {
[8206]39    private readonly Dictionary<int, double> trainingEvaluationCache = new Dictionary<int, double>();
40    private readonly Dictionary<int, double> testEvaluationCache = new Dictionary<int, double>();
41
[6239]42    public new IClassificationEnsembleModel Model {
43      get { return (IClassificationEnsembleModel)base.Model; }
44    }
[6666]45    public new ClassificationEnsembleProblemData ProblemData {
46      get { return (ClassificationEnsembleProblemData)base.ProblemData; }
47      set { base.ProblemData = value; }
48    }
[6239]49
[6613]50    private readonly ItemCollection<IClassificationSolution> classificationSolutions;
51    public IItemCollection<IClassificationSolution> ClassificationSolutions {
52      get { return classificationSolutions; }
53    }
54
[5816]55    [Storable]
[6239]56    private Dictionary<IClassificationModel, IntRange> trainingPartitions;
57    [Storable]
58    private Dictionary<IClassificationModel, IntRange> testPartitions;
59
[6613]60    [StorableConstructor]
61    private ClassificationEnsembleSolution(bool deserializing)
62      : base(deserializing) {
63      classificationSolutions = new ItemCollection<IClassificationSolution>();
64    }
65    [StorableHook(HookType.AfterDeserialization)]
66    private void AfterDeserialization() {
67      foreach (var model in Model.Models) {
68        IClassificationProblemData problemData = (IClassificationProblemData)ProblemData.Clone();
69        problemData.TrainingPartition.Start = trainingPartitions[model].Start;
70        problemData.TrainingPartition.End = trainingPartitions[model].End;
71        problemData.TestPartition.Start = testPartitions[model].Start;
72        problemData.TestPartition.End = testPartitions[model].End;
[6239]73
[6613]74        classificationSolutions.Add(model.CreateClassificationSolution(problemData));
75      }
76      RegisterClassificationSolutionsEventHandler();
77    }
78
[6592]79    private ClassificationEnsembleSolution(ClassificationEnsembleSolution original, Cloner cloner)
[5816]80      : base(original, cloner) {
[6239]81      trainingPartitions = new Dictionary<IClassificationModel, IntRange>();
82      testPartitions = new Dictionary<IClassificationModel, IntRange>();
[6302]83      foreach (var pair in original.trainingPartitions) {
84        trainingPartitions[cloner.Clone(pair.Key)] = cloner.Clone(pair.Value);
[6239]85      }
[6302]86      foreach (var pair in original.testPartitions) {
87        testPartitions[cloner.Clone(pair.Key)] = cloner.Clone(pair.Value);
88      }
[6613]89
[8206]90      trainingEvaluationCache = new Dictionary<int, double>(original.ProblemData.TrainingIndices.Count());
91      testEvaluationCache = new Dictionary<int, double>(original.ProblemData.TestIndices.Count());
92
[6613]93      classificationSolutions = cloner.Clone(original.classificationSolutions);
94      RegisterClassificationSolutionsEventHandler();
[5816]95    }
[6613]96
[6666]97    public ClassificationEnsembleSolution()
98      : base(new ClassificationEnsembleModel(), ClassificationEnsembleProblemData.EmptyProblemData) {
99      trainingPartitions = new Dictionary<IClassificationModel, IntRange>();
100      testPartitions = new Dictionary<IClassificationModel, IntRange>();
101      classificationSolutions = new ItemCollection<IClassificationSolution>();
102
103      RegisterClassificationSolutionsEventHandler();
104    }
105
[8660]106    public ClassificationEnsembleSolution(IClassificationProblemData problemData) :
107      this(Enumerable.Empty<IClassificationModel>(), problemData) { }
108
[6239]109    public ClassificationEnsembleSolution(IEnumerable<IClassificationModel> models, IClassificationProblemData problemData)
[6613]110      : this(models, problemData,
111             models.Select(m => (IntRange)problemData.TrainingPartition.Clone()),
112             models.Select(m => (IntRange)problemData.TestPartition.Clone())
113      ) { }
[5816]114
[6239]115    public ClassificationEnsembleSolution(IEnumerable<IClassificationModel> models, IClassificationProblemData problemData, IEnumerable<IntRange> trainingPartitions, IEnumerable<IntRange> testPartitions)
[6613]116      : base(new ClassificationEnsembleModel(Enumerable.Empty<IClassificationModel>()), new ClassificationEnsembleProblemData(problemData)) {
[6239]117      this.trainingPartitions = new Dictionary<IClassificationModel, IntRange>();
118      this.testPartitions = new Dictionary<IClassificationModel, IntRange>();
[6613]119      this.classificationSolutions = new ItemCollection<IClassificationSolution>();
120
121      List<IClassificationSolution> solutions = new List<IClassificationSolution>();
122      var modelEnumerator = models.GetEnumerator();
123      var trainingPartitionEnumerator = trainingPartitions.GetEnumerator();
124      var testPartitionEnumerator = testPartitions.GetEnumerator();
125
126      while (modelEnumerator.MoveNext() & trainingPartitionEnumerator.MoveNext() & testPartitionEnumerator.MoveNext()) {
127        var p = (IClassificationProblemData)problemData.Clone();
128        p.TrainingPartition.Start = trainingPartitionEnumerator.Current.Start;
129        p.TrainingPartition.End = trainingPartitionEnumerator.Current.End;
130        p.TestPartition.Start = testPartitionEnumerator.Current.Start;
131        p.TestPartition.End = testPartitionEnumerator.Current.End;
132
133        solutions.Add(modelEnumerator.Current.CreateClassificationSolution(p));
134      }
135      if (modelEnumerator.MoveNext() | trainingPartitionEnumerator.MoveNext() | testPartitionEnumerator.MoveNext()) {
136        throw new ArgumentException();
137      }
138
[8206]139      trainingEvaluationCache = new Dictionary<int, double>(problemData.TrainingIndices.Count());
140      testEvaluationCache = new Dictionary<int, double>(problemData.TestIndices.Count());
141
[6613]142      RegisterClassificationSolutionsEventHandler();
143      classificationSolutions.AddRange(solutions);
[6239]144    }
145
[5816]146    public override IDeepCloneable Clone(Cloner cloner) {
147      return new ClassificationEnsembleSolution(this, cloner);
148    }
[6613]149    private void RegisterClassificationSolutionsEventHandler() {
150      classificationSolutions.ItemsAdded += new CollectionItemsChangedEventHandler<IClassificationSolution>(classificationSolutions_ItemsAdded);
151      classificationSolutions.ItemsRemoved += new CollectionItemsChangedEventHandler<IClassificationSolution>(classificationSolutions_ItemsRemoved);
152      classificationSolutions.CollectionReset += new CollectionItemsChangedEventHandler<IClassificationSolution>(classificationSolutions_CollectionReset);
153    }
[5816]154
[6589]155    protected override void RecalculateResults() {
156      CalculateResults();
157    }
158
[6613]159    #region Evaluation
[6239]160    public override IEnumerable<double> EstimatedTrainingClassValues {
161      get {
[8206]162        var rows = ProblemData.TrainingIndices;
163        var rowsToEvaluate = rows.Except(trainingEvaluationCache.Keys);
164        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
165        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, (r, m) => RowIsTrainingForModel(r, m) && !RowIsTestForModel(r, m)).GetEnumerator();
[5816]166
[8206]167        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
168          trainingEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
[6239]169        }
[8206]170
171        return rows.Select(row => trainingEvaluationCache[row]);
[6239]172      }
173    }
174
175    public override IEnumerable<double> EstimatedTestClassValues {
176      get {
[8206]177        var rows = ProblemData.TestIndices;
178        var rowsToEvaluate = rows.Except(testEvaluationCache.Keys);
179        var rowsEnumerator = rowsToEvaluate.GetEnumerator();
180        var valuesEnumerator = GetEstimatedValues(rowsToEvaluate, RowIsTestForModel).GetEnumerator();
[6239]181
[8206]182        while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
183          testEvaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
184        }
[6239]185
[8206]186        return rows.Select(row => testEvaluationCache[row]);
[6239]187      }
188    }
189
[8206]190    private IEnumerable<double> GetEstimatedValues(IEnumerable<int> rows, Func<int, IClassificationModel, bool> modelSelectionPredicate) {
191      var estimatedValuesEnumerators = (from model in Model.Models
192                                        select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() })
193                                       .ToList();
194      var rowsEnumerator = rows.GetEnumerator();
195      // aggregate to make sure that MoveNext is called for all enumerators
196      while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
197        int currentRow = rowsEnumerator.Current;
198
199        var selectedEnumerators = from pair in estimatedValuesEnumerators
200                                  where modelSelectionPredicate(currentRow, pair.Model)
201                                  select pair.EstimatedValuesEnumerator;
202
203        yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current));
204      }
205    }
206
[6254]207    private bool RowIsTrainingForModel(int currentRow, IClassificationModel model) {
208      return trainingPartitions == null || !trainingPartitions.ContainsKey(model) ||
209              (trainingPartitions[model].Start <= currentRow && currentRow < trainingPartitions[model].End);
210    }
211
212    private bool RowIsTestForModel(int currentRow, IClassificationModel model) {
213      return testPartitions == null || !testPartitions.ContainsKey(model) ||
214              (testPartitions[model].Start <= currentRow && currentRow < testPartitions[model].End);
215    }
216
[6239]217    public override IEnumerable<double> GetEstimatedClassValues(IEnumerable<int> rows) {
[8206]218      var rowsToEvaluate = rows.Except(evaluationCache.Keys);
219      var rowsEnumerator = rowsToEvaluate.GetEnumerator();
220      var valuesEnumerator = (from xs in GetEstimatedClassValueVectors(ProblemData.Dataset, rowsToEvaluate)
221                              select AggregateEstimatedClassValues(xs))
222                             .GetEnumerator();
223
224      while (rowsEnumerator.MoveNext() & valuesEnumerator.MoveNext()) {
225        evaluationCache.Add(rowsEnumerator.Current, valuesEnumerator.Current);
226      }
227
228      return rows.Select(row => evaluationCache[row]);
[6239]229    }
230
[5816]231    public IEnumerable<IEnumerable<double>> GetEstimatedClassValueVectors(Dataset dataset, IEnumerable<int> rows) {
[6982]232      if (!Model.Models.Any()) yield break;
[6239]233      var estimatedValuesEnumerators = (from model in Model.Models
[5816]234                                        select model.GetEstimatedClassValues(dataset, rows).GetEnumerator())
235                                       .ToList();
236
237      while (estimatedValuesEnumerators.All(en => en.MoveNext())) {
238        yield return from enumerator in estimatedValuesEnumerators
239                     select enumerator.Current;
240      }
241    }
242
[6239]243    private double AggregateEstimatedClassValues(IEnumerable<double> estimatedClassValues) {
244      return estimatedClassValues
245      .GroupBy(x => x)
246      .OrderBy(g => -g.Count())
247      .Select(g => g.Key)
[6254]248      .DefaultIfEmpty(double.NaN)
[6239]249      .First();
[5816]250    }
[6613]251    #endregion
[6520]252
[6666]253    protected override void OnProblemDataChanged() {
[8206]254      trainingEvaluationCache.Clear();
255      testEvaluationCache.Clear();
256      evaluationCache.Clear();
257
[6666]258      IClassificationProblemData problemData = new ClassificationProblemData(ProblemData.Dataset,
259                                                                     ProblemData.AllowedInputVariables,
260                                                                     ProblemData.TargetVariable);
261      problemData.TrainingPartition.Start = ProblemData.TrainingPartition.Start;
262      problemData.TrainingPartition.End = ProblemData.TrainingPartition.End;
263      problemData.TestPartition.Start = ProblemData.TestPartition.Start;
264      problemData.TestPartition.End = ProblemData.TestPartition.End;
265
266      foreach (var solution in ClassificationSolutions) {
267        if (solution is ClassificationEnsembleSolution)
268          solution.ProblemData = ProblemData;
269        else
270          solution.ProblemData = problemData;
271      }
272      foreach (var trainingPartition in trainingPartitions.Values) {
273        trainingPartition.Start = ProblemData.TrainingPartition.Start;
274        trainingPartition.End = ProblemData.TrainingPartition.End;
275      }
276      foreach (var testPartition in testPartitions.Values) {
277        testPartition.Start = ProblemData.TestPartition.Start;
278        testPartition.End = ProblemData.TestPartition.End;
279      }
280
281      base.OnProblemDataChanged();
282    }
283
[6613]284    public void AddClassificationSolutions(IEnumerable<IClassificationSolution> solutions) {
285      classificationSolutions.AddRange(solutions);
[8206]286
287      trainingEvaluationCache.Clear();
288      testEvaluationCache.Clear();
289      evaluationCache.Clear();
[6613]290    }
291    public void RemoveClassificationSolutions(IEnumerable<IClassificationSolution> solutions) {
292      classificationSolutions.RemoveRange(solutions);
[8206]293
294      trainingEvaluationCache.Clear();
295      testEvaluationCache.Clear();
296      evaluationCache.Clear();
[6613]297    }
[6520]298
[6613]299    private void classificationSolutions_ItemsAdded(object sender, CollectionItemsChangedEventArgs<IClassificationSolution> e) {
300      foreach (var solution in e.Items) AddClassificationSolution(solution);
[6520]301      RecalculateResults();
302    }
[6613]303    private void classificationSolutions_ItemsRemoved(object sender, CollectionItemsChangedEventArgs<IClassificationSolution> e) {
304      foreach (var solution in e.Items) RemoveClassificationSolution(solution);
305      RecalculateResults();
306    }
307    private void classificationSolutions_CollectionReset(object sender, CollectionItemsChangedEventArgs<IClassificationSolution> e) {
308      foreach (var solution in e.OldItems) RemoveClassificationSolution(solution);
309      foreach (var solution in e.Items) AddClassificationSolution(solution);
310      RecalculateResults();
311    }
[6520]312
[6613]313    private void AddClassificationSolution(IClassificationSolution solution) {
314      if (Model.Models.Contains(solution.Model)) throw new ArgumentException();
315      Model.Add(solution.Model);
316      trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition;
317      testPartitions[solution.Model] = solution.ProblemData.TestPartition;
[8206]318
319      trainingEvaluationCache.Clear();
320      testEvaluationCache.Clear();
321      evaluationCache.Clear();
[6613]322    }
[6520]323
[6613]324    private void RemoveClassificationSolution(IClassificationSolution solution) {
325      if (!Model.Models.Contains(solution.Model)) throw new ArgumentException();
326      Model.Remove(solution.Model);
327      trainingPartitions.Remove(solution.Model);
328      testPartitions.Remove(solution.Model);
[8206]329
330      trainingEvaluationCache.Clear();
331      testEvaluationCache.Clear();
332      evaluationCache.Clear();
[6520]333    }
[5816]334  }
335}
Note: See TracBrowser for help on using the repository browser.