Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
05/20/11 16:10:07 (14 years ago)
Author:
gkronber
Message:

#1450: implemented support for ensemble solutions for classification.

Location:
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationEnsembleModel.cs

    r5809 r6239  
    3939      get { return new List<IClassificationModel>(models); }
    4040    }
     41
    4142    [StorableConstructor]
    4243    protected ClassificationEnsembleModel(bool deserializing) : base(deserializing) { }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationEnsembleSolution.cs

    r6184 r6239  
    2525using HeuristicLab.Core;
    2626using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     27using HeuristicLab.Data;
     28using System;
    2729
    2830namespace HeuristicLab.Problems.DataAnalysis {
     
    3335  [Item("Classification Ensemble Solution", "A classification solution that contains an ensemble of multiple classification models")]
    3436  // [Creatable("Data Analysis")]
    35   public class ClassificationEnsembleSolution : NamedItem, IClassificationEnsembleSolution {
     37  public class ClassificationEnsembleSolution : ClassificationSolution, IClassificationEnsembleSolution {
     38
     39    public new IClassificationEnsembleModel Model {
     40      set { base.Model = value; }
     41      get { return (IClassificationEnsembleModel)base.Model; }
     42    }
    3643
    3744    [Storable]
    38     private List<IClassificationModel> models;
    39     public IEnumerable<IClassificationModel> Models {
    40       get { return new List<IClassificationModel>(models); }
    41     }
     45    private Dictionary<IClassificationModel, IntRange> trainingPartitions;
     46    [Storable]
     47    private Dictionary<IClassificationModel, IntRange> testPartitions;
     48
     49
    4250    [StorableConstructor]
    4351    protected ClassificationEnsembleSolution(bool deserializing) : base(deserializing) { }
    4452    protected ClassificationEnsembleSolution(ClassificationEnsembleSolution original, Cloner cloner)
    4553      : base(original, cloner) {
    46       this.models = original.Models.Select(m => cloner.Clone(m)).ToList();
     54      trainingPartitions = new Dictionary<IClassificationModel, IntRange>();
     55      testPartitions = new Dictionary<IClassificationModel, IntRange>();
     56      foreach (var model in Model.Models) {
     57        trainingPartitions[model] = (IntRange)ProblemData.TrainingPartition.Clone();
     58        testPartitions[model] = (IntRange)ProblemData.TestPartition.Clone();
     59      }
     60      RecalculateResults();
    4761    }
    48     public ClassificationEnsembleSolution(IEnumerable<IClassificationModel> models)
    49       : base() {
     62    public ClassificationEnsembleSolution(IEnumerable<IClassificationModel> models, IClassificationProblemData problemData)
     63      : base(new ClassificationEnsembleModel(models), new ClassificationEnsembleProblemData(problemData)) {
    5064      this.name = ItemName;
    5165      this.description = ItemDescription;
    52       this.models = new List<IClassificationModel>(models);
     66      trainingPartitions = new Dictionary<IClassificationModel, IntRange>();
     67      testPartitions = new Dictionary<IClassificationModel, IntRange>();
     68      foreach (var model in models) {
     69        trainingPartitions[model] = (IntRange)problemData.TrainingPartition.Clone();
     70        testPartitions[model] = (IntRange)problemData.TestPartition.Clone();
     71      }
     72      RecalculateResults();
     73    }
     74
     75    public ClassificationEnsembleSolution(IEnumerable<IClassificationModel> models, IClassificationProblemData problemData, IEnumerable<IntRange> trainingPartitions, IEnumerable<IntRange> testPartitions)
     76      : base(new ClassificationEnsembleModel(models), new ClassificationEnsembleProblemData(problemData)) {
     77      this.trainingPartitions = new Dictionary<IClassificationModel, IntRange>();
     78      this.testPartitions = new Dictionary<IClassificationModel, IntRange>();
     79      var modelEnumerator = models.GetEnumerator();
     80      var trainingPartitionEnumerator = trainingPartitions.GetEnumerator();
     81      var testPartitionEnumerator = testPartitions.GetEnumerator();
     82      while (modelEnumerator.MoveNext() & trainingPartitionEnumerator.MoveNext() & testPartitionEnumerator.MoveNext()) {
     83        this.trainingPartitions[modelEnumerator.Current] = (IntRange)trainingPartitionEnumerator.Current.Clone();
     84        this.testPartitions[modelEnumerator.Current] = (IntRange)testPartitionEnumerator.Current.Clone();
     85      }
     86      if (modelEnumerator.MoveNext() | trainingPartitionEnumerator.MoveNext() | testPartitionEnumerator.MoveNext()) {
     87        throw new ArgumentException();
     88      }
     89      RecalculateResults();
    5390    }
    5491
     
    5794    }
    5895
    59     #region IClassificationEnsembleModel Members
     96    public override IEnumerable<double> EstimatedTrainingClassValues {
     97      get {
     98        var rows = ProblemData.TrainingIndizes;
     99        var estimatedValuesEnumerators = (from model in Model.Models
     100                                          select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() })
     101                                         .ToList();
     102        var rowsEnumerator = rows.GetEnumerator();
     103        // aggregate to make sure that MoveNext is called for all enumerators
     104        while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
     105          int currentRow = rowsEnumerator.Current;
     106
     107          var selectedEnumerators = from pair in estimatedValuesEnumerators
     108                                    where trainingPartitions == null || !trainingPartitions.ContainsKey(pair.Model) ||
     109                                         (trainingPartitions[pair.Model].Start <= currentRow && currentRow < trainingPartitions[pair.Model].End)
     110                                    select pair.EstimatedValuesEnumerator;
     111          yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current));
     112        }
     113      }
     114    }
     115
     116    public override IEnumerable<double> EstimatedTestClassValues {
     117      get {
     118        var rows = ProblemData.TestIndizes;
     119        var estimatedValuesEnumerators = (from model in Model.Models
     120                                          select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() })
     121                                         .ToList();
     122        var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator();
     123        // aggregate to make sure that MoveNext is called for all enumerators
     124        while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) {
     125          int currentRow = rowsEnumerator.Current;
     126
     127          var selectedEnumerators = from pair in estimatedValuesEnumerators
     128                                    where testPartitions == null || !testPartitions.ContainsKey(pair.Model) ||
     129                                      (testPartitions[pair.Model].Start <= currentRow && currentRow < testPartitions[pair.Model].End)
     130                                    select pair.EstimatedValuesEnumerator;
     131
     132          yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current));
     133        }
     134      }
     135    }
     136
     137    public override IEnumerable<double> GetEstimatedClassValues(IEnumerable<int> rows) {
     138      return from xs in GetEstimatedClassValueVectors(ProblemData.Dataset, rows)
     139             select AggregateEstimatedClassValues(xs);
     140    }
    60141
    61142    public IEnumerable<IEnumerable<double>> GetEstimatedClassValueVectors(Dataset dataset, IEnumerable<int> rows) {
    62       var estimatedValuesEnumerators = (from model in models
     143      var estimatedValuesEnumerators = (from model in Model.Models
    63144                                        select model.GetEstimatedClassValues(dataset, rows).GetEnumerator())
    64145                                       .ToList();
     
    70151    }
    71152
    72     #endregion
    73 
    74     #region IClassificationModel Members
    75 
    76     public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
    77       foreach (var estimatedValuesVector in GetEstimatedClassValueVectors(dataset, rows)) {
    78         // return the class which is most often occuring
    79         yield return
    80           estimatedValuesVector
    81           .GroupBy(x => x)
    82           .OrderBy(g => -g.Count())
    83           .Select(g => g.Key)
    84           .First();
    85       }
     153    private double AggregateEstimatedClassValues(IEnumerable<double> estimatedClassValues) {
     154      return estimatedClassValues
     155      .GroupBy(x => x)
     156      .OrderBy(g => -g.Count())
     157      .Select(g => g.Key)
     158      .First();
    86159    }
    87 
    88     #endregion
    89160  }
    90161}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionEnsembleSolution.cs

    r6238 r6239  
    4949    protected RegressionEnsembleSolution(RegressionEnsembleSolution original, Cloner cloner)
    5050      : base(original, cloner) {
     51      trainingPartitions = new Dictionary<IRegressionModel, IntRange>();
     52      testPartitions = new Dictionary<IRegressionModel, IntRange>();
     53      foreach (var model in Model.Models) {
     54        trainingPartitions[model] = (IntRange)ProblemData.TrainingPartition.Clone();
     55        testPartitions[model] = (IntRange)ProblemData.TestPartition.Clone();
     56      }
    5157    }
     58
    5259    public RegressionEnsembleSolution(IEnumerable<IRegressionModel> models, IRegressionProblemData problemData)
    5360      : base(new RegressionEnsembleModel(models), new RegressionEnsembleProblemData(problemData)) {
     
    141148    private double AggregateEstimatedValues(IEnumerable<double> estimatedValues) {
    142149      return estimatedValues.DefaultIfEmpty(double.NaN).Average();
    143     }
    144 
    145     //[Storable]
    146     //private string name;
    147     //public string Name {
    148     //  get {
    149     //    return name;
    150     //  }
    151     //  set {
    152     //    if (value != null && value != name) {
    153     //      var cancelEventArgs = new CancelEventArgs<string>(value);
    154     //      OnNameChanging(cancelEventArgs);
    155     //      if (cancelEventArgs.Cancel == false) {
    156     //        name = value;
    157     //        OnNamedChanged(EventArgs.Empty);
    158     //      }
    159     //    }
    160     //  }
    161     //}
    162 
    163     //public bool CanChangeName {
    164     //  get { return true; }
    165     //}
    166 
    167     //[Storable]
    168     //private string description;
    169     //public string Description {
    170     //  get {
    171     //    return description;
    172     //  }
    173     //  set {
    174     //    if (value != null && value != description) {
    175     //      description = value;
    176     //      OnDescriptionChanged(EventArgs.Empty);
    177     //    }
    178     //  }
    179     //}
    180 
    181     //public bool CanChangeDescription {
    182     //  get { return true; }
    183     //}
    184 
    185     //#region events
    186     //public event EventHandler<CancelEventArgs<string>> NameChanging;
    187     //private void OnNameChanging(CancelEventArgs<string> cancelEventArgs) {
    188     //  var listener = NameChanging;
    189     //  if (listener != null) listener(this, cancelEventArgs);
    190     //}
    191 
    192     //public event EventHandler NameChanged;
    193     //private void OnNamedChanged(EventArgs e) {
    194     //  var listener = NameChanged;
    195     //  if (listener != null) listener(this, e);
    196     //}
    197 
    198     //public event EventHandler DescriptionChanged;
    199     //private void OnDescriptionChanged(EventArgs e) {
    200     //  var listener = DescriptionChanged;
    201     //  if (listener != null) listener(this, e);
    202     //}
    203     // #endregion
     150    }   
    204151  }
    205152}
Note: See TracChangeset for help on using the changeset viewer.