Changeset 6239
- Timestamp:
- 05/20/11 16:10:07 (14 years ago)
- Location:
- trunk/sources
- Files:
-
- 1 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs
r6184 r6239 376 376 results.Add(result.Name, result.Value); 377 377 } 378 foreach (IResult result in ExtractAndAggregateClassificationSolutions(resultCollections)) { 379 results.Add(result.Name, result.Value); 380 } 378 381 results.Add("Execution Time", new TimeSpanValue(this.ExecutionTime)); 379 382 results.Add("CrossValidation Folds", new RunCollection(runs)); … … 406 409 } 407 410 411 private IEnumerable<IResult> ExtractAndAggregateClassificationSolutions(IEnumerable<KeyValuePair<string, IItem>> resultCollections) { 412 Dictionary<string, List<IClassificationSolution>> resultSolutions = new Dictionary<string, List<IClassificationSolution>>(); 413 foreach (var result in resultCollections) { 414 var classificationSolution = result.Value as IClassificationSolution; 415 if (classificationSolution != null) { 416 if (resultSolutions.ContainsKey(result.Key)) { 417 resultSolutions[result.Key].Add(classificationSolution); 418 } else { 419 resultSolutions.Add(result.Key, new List<IClassificationSolution>() { classificationSolution }); 420 } 421 } 422 } 423 List<IResult> aggregatedResults = new List<IResult>(); 424 foreach (KeyValuePair<string, List<IClassificationSolution>> solutions in resultSolutions) { 425 var problemDataClone = (IClassificationProblemData)Problem.ProblemData.Clone(); 426 problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value; 427 problemDataClone.TestPartition.Start = SamplesStart.Value; problemDataClone.TestPartition.End = SamplesEnd.Value; 428 var ensembleSolution = new ClassificationEnsembleSolution(solutions.Value.Select(x => x.Model), problemDataClone, 429 solutions.Value.Select(x => x.ProblemData.TrainingPartition), 430 solutions.Value.Select(x => x.ProblemData.TestPartition)); 431 432 aggregatedResults.Add(new Result(solutions.Key, ensembleSolution)); 433 } 434 return aggregatedResults; 435 } 436 408 437 private static IEnumerable<IResult> ExtractAndAggregateResults<T>(IEnumerable<KeyValuePair<string, IItem>> results) 409 438 where T : class, IItem, new() { -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionConfusionMatrixView.cs
r5975 r6239 103 103 IEnumerable<int> rows; 104 104 105 double[] predictedValues; 105 106 if (cmbSamples.SelectedItem.ToString() == TrainingSamples) { 106 107 rows = Content.ProblemData.TrainingIndizes; 108 predictedValues = Content.EstimatedTrainingClassValues.ToArray(); 107 109 } else if (cmbSamples.SelectedItem.ToString() == TestSamples) { 108 110 rows = Content.ProblemData.TestIndizes; 111 predictedValues = Content.EstimatedTestClassValues.ToArray(); 109 112 } else throw new InvalidOperationException(); 113 114 double[] targetValues = Content.ProblemData.Dataset.GetEnumeratedVariableValues(Content.ProblemData.TargetVariable, rows).ToArray(); 110 115 111 116 Dictionary<double, int> classValueIndexMapping = new Dictionary<double, int>(); … … 115 120 index++; 116 121 } 117 118 double[] targetValues = Content.ProblemData.Dataset.GetEnumeratedVariableValues(Content.ProblemData.TargetVariable, rows).ToArray();119 double[] predictedValues = Content.GetEstimatedClassValues(rows).ToArray();120 122 121 123 for (int i = 0; i < targetValues.Length; i++) { -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionEstimatedClassValuesView.cs
r5975 r6239 32 32 [Content(typeof(IClassificationSolution))] 33 33 public partial class ClassificationSolutionEstimatedClassValuesView : ItemView, IClassificationSolutionEvaluationView { 34 private const string TARGETVARIABLE_SERIES_NAME = "TargetVariable"; 35 private const string ESTIMATEDVALUES_SERIES_NAME = "EstimatedClassValues"; 34 private const string TARGETVARIABLE_SERIES_NAME = "Target Variable"; 35 private const string ESTIMATEDVALUES_TRAINING_SERIES_NAME = "Estimated Class Values (training)"; 36 private const string ESTIMATEDVALUES_TEST_SERIES_NAME = "Estimated Class Values (test)"; 36 37 37 38 public new IClassificationSolution Content { … … 85 86 DoubleMatrix matrix = null; 86 87 if (Content != null) { 87 double[,] values = new double[Content.ProblemData.Dataset.Rows, 2]; 88 double[,] values = new double[Content.ProblemData.Dataset.Rows, 3]; 89 // fill with NaN 90 for (int row = 0; row < Content.ProblemData.Dataset.Rows; row++) 91 for (int column = 0; column < 3; column++) 92 values[row, column] = double.NaN; 88 93 89 94 double[] target = Content.ProblemData.Dataset.GetVariableValues(Content.ProblemData.TargetVariable); 90 double[] estimated = Content.EstimatedClassValues.ToArray();91 95 for (int row = 0; row < target.Length; row++) { 92 96 values[row, 0] = target[row]; 93 values[row, 1] = estimated[row]; 97 } 98 var estimatedTraining = Content.EstimatedTrainingClassValues.GetEnumerator(); 99 estimatedTraining.MoveNext(); 100 foreach (var trainingRow in Content.ProblemData.TrainingIndizes) { 101 values[trainingRow, 1] = estimatedTraining.Current; 102 estimatedTraining.MoveNext(); 103 } 104 var estimatedTest = Content.EstimatedTestClassValues.GetEnumerator(); 105 estimatedTest.MoveNext(); 106 foreach (var testRow in Content.ProblemData.TestIndizes) { 107 values[testRow, 2] = estimatedTest.Current; 108 estimatedTest.MoveNext(); 94 109 } 95 110 96 111 matrix = new DoubleMatrix(values); 97 matrix.ColumnNames = new string[] { TARGETVARIABLE_SERIES_NAME, ESTIMATEDVALUES_ SERIES_NAME };112 matrix.ColumnNames = new string[] { TARGETVARIABLE_SERIES_NAME, ESTIMATEDVALUES_TRAINING_SERIES_NAME, ESTIMATEDVALUES_TEST_SERIES_NAME }; 98 113 } 99 114 matrixView.Content = matrix; -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj
r6238 r6239 109 109 <ItemGroup> 110 110 <Compile Include="DoubleLimit.cs" /> 111 <Compile Include="Implementation\Classification\ClassificationEnsembleModel.cs"> 112 <SubType>Code</SubType> 113 </Compile> 114 <Compile Include="Implementation\Classification\ClassificationEnsembleSolution.cs" /> 111 115 <Compile Include="Implementation\Classification\ClassificationProblemData.cs" /> 112 116 <Compile Include="Implementation\Classification\ClassificationProblem.cs" /> 113 117 <Compile Include="Implementation\Classification\ClassificationSolution.cs" /> 118 <Compile Include="Implementation\Classification\ClassificationEnsembleProblemData.cs" /> 114 119 <Compile Include="Implementation\Clustering\ClusteringProblem.cs" /> 115 120 <Compile Include="Implementation\Clustering\ClusteringProblemData.cs" /> … … 120 125 </Compile> 121 126 <Compile Include="Implementation\Regression\RegressionEnsembleSolution.cs" /> 127 <Compile Include="Interfaces\Classification\IClassificationEnsembleModel.cs"> 128 <SubType>Code</SubType> 129 </Compile> 130 <Compile Include="Interfaces\Classification\IClassificationEnsembleSolution.cs"> 131 <SubType>Code</SubType> 132 </Compile> 122 133 <Compile Include="Interfaces\Classification\IDiscriminantFunctionThresholdCalculator.cs" /> 123 134 <Compile Include="Interfaces\Regression\IRegressionEnsembleModel.cs"> -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationEnsembleModel.cs
r5809 r6239 39 39 get { return new List<IClassificationModel>(models); } 40 40 } 41 41 42 [StorableConstructor] 42 43 protected ClassificationEnsembleModel(bool deserializing) : base(deserializing) { } -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationEnsembleSolution.cs
r6184 r6239 25 25 using HeuristicLab.Core; 26 26 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 27 using HeuristicLab.Data; 28 using System; 27 29 28 30 namespace HeuristicLab.Problems.DataAnalysis { … … 33 35 [Item("Classification Ensemble Solution", "A classification solution that contains an ensemble of multiple classification models")] 34 36 // [Creatable("Data Analysis")] 35 public class ClassificationEnsembleSolution : NamedItem, IClassificationEnsembleSolution { 37 public class ClassificationEnsembleSolution : ClassificationSolution, IClassificationEnsembleSolution { 38 39 public new IClassificationEnsembleModel Model { 40 set { base.Model = value; } 41 get { return (IClassificationEnsembleModel)base.Model; } 42 } 36 43 37 44 [Storable] 38 private List<IClassificationModel> models; 39 public IEnumerable<IClassificationModel> Models { 40 get { return new List<IClassificationModel>(models); } 41 } 45 private Dictionary<IClassificationModel, IntRange> trainingPartitions; 46 [Storable] 47 private Dictionary<IClassificationModel, IntRange> testPartitions; 48 49 42 50 [StorableConstructor] 43 51 protected ClassificationEnsembleSolution(bool deserializing) : base(deserializing) { } 44 52 protected ClassificationEnsembleSolution(ClassificationEnsembleSolution original, Cloner cloner) 45 53 : base(original, cloner) { 46 this.models = original.Models.Select(m => cloner.Clone(m)).ToList(); 54 trainingPartitions = new Dictionary<IClassificationModel, IntRange>(); 55 testPartitions = new Dictionary<IClassificationModel, IntRange>(); 56 foreach (var model in Model.Models) { 57 trainingPartitions[model] = (IntRange)ProblemData.TrainingPartition.Clone(); 58 testPartitions[model] = (IntRange)ProblemData.TestPartition.Clone(); 59 } 60 RecalculateResults(); 47 61 } 48 public ClassificationEnsembleSolution(IEnumerable<IClassificationModel> models )49 : base( ) {62 public ClassificationEnsembleSolution(IEnumerable<IClassificationModel> models, IClassificationProblemData problemData) 63 : base(new ClassificationEnsembleModel(models), new ClassificationEnsembleProblemData(problemData)) { 50 64 this.name = ItemName; 51 65 this.description = ItemDescription; 52 this.models = new List<IClassificationModel>(models); 66 trainingPartitions = new Dictionary<IClassificationModel, IntRange>(); 67 testPartitions = new Dictionary<IClassificationModel, IntRange>(); 68 foreach (var model in models) { 69 trainingPartitions[model] = (IntRange)problemData.TrainingPartition.Clone(); 70 testPartitions[model] = (IntRange)problemData.TestPartition.Clone(); 71 } 72 RecalculateResults(); 73 } 74 75 public ClassificationEnsembleSolution(IEnumerable<IClassificationModel> models, IClassificationProblemData problemData, IEnumerable<IntRange> trainingPartitions, IEnumerable<IntRange> testPartitions) 76 : base(new ClassificationEnsembleModel(models), new ClassificationEnsembleProblemData(problemData)) { 77 this.trainingPartitions = new Dictionary<IClassificationModel, IntRange>(); 78 this.testPartitions = new Dictionary<IClassificationModel, IntRange>(); 79 var modelEnumerator = models.GetEnumerator(); 80 var trainingPartitionEnumerator = trainingPartitions.GetEnumerator(); 81 var testPartitionEnumerator = testPartitions.GetEnumerator(); 82 while (modelEnumerator.MoveNext() & trainingPartitionEnumerator.MoveNext() & testPartitionEnumerator.MoveNext()) { 83 this.trainingPartitions[modelEnumerator.Current] = (IntRange)trainingPartitionEnumerator.Current.Clone(); 84 this.testPartitions[modelEnumerator.Current] = (IntRange)testPartitionEnumerator.Current.Clone(); 85 } 86 if (modelEnumerator.MoveNext() | trainingPartitionEnumerator.MoveNext() | testPartitionEnumerator.MoveNext()) { 87 throw new ArgumentException(); 88 } 89 RecalculateResults(); 53 90 } 54 91 … … 57 94 } 58 95 59 #region IClassificationEnsembleModel Members 96 public override IEnumerable<double> EstimatedTrainingClassValues { 97 get { 98 var rows = ProblemData.TrainingIndizes; 99 var estimatedValuesEnumerators = (from model in Model.Models 100 select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() }) 101 .ToList(); 102 var rowsEnumerator = rows.GetEnumerator(); 103 // aggregate to make sure that MoveNext is called for all enumerators 104 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) { 105 int currentRow = rowsEnumerator.Current; 106 107 var selectedEnumerators = from pair in estimatedValuesEnumerators 108 where trainingPartitions == null || !trainingPartitions.ContainsKey(pair.Model) || 109 (trainingPartitions[pair.Model].Start <= currentRow && currentRow < trainingPartitions[pair.Model].End) 110 select pair.EstimatedValuesEnumerator; 111 yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current)); 112 } 113 } 114 } 115 116 public override IEnumerable<double> EstimatedTestClassValues { 117 get { 118 var rows = ProblemData.TestIndizes; 119 var estimatedValuesEnumerators = (from model in Model.Models 120 select new { Model = model, EstimatedValuesEnumerator = model.GetEstimatedClassValues(ProblemData.Dataset, rows).GetEnumerator() }) 121 .ToList(); 122 var rowsEnumerator = ProblemData.TestIndizes.GetEnumerator(); 123 // aggregate to make sure that MoveNext is called for all enumerators 124 while (rowsEnumerator.MoveNext() & estimatedValuesEnumerators.Select(en => en.EstimatedValuesEnumerator.MoveNext()).Aggregate(true, (acc, b) => acc & b)) { 125 int currentRow = rowsEnumerator.Current; 126 127 var selectedEnumerators = from pair in estimatedValuesEnumerators 128 where testPartitions == null || !testPartitions.ContainsKey(pair.Model) || 129 (testPartitions[pair.Model].Start <= currentRow && currentRow < testPartitions[pair.Model].End) 130 select pair.EstimatedValuesEnumerator; 131 132 yield return AggregateEstimatedClassValues(selectedEnumerators.Select(x => x.Current)); 133 } 134 } 135 } 136 137 public override IEnumerable<double> GetEstimatedClassValues(IEnumerable<int> rows) { 138 return from xs in GetEstimatedClassValueVectors(ProblemData.Dataset, rows) 139 select AggregateEstimatedClassValues(xs); 140 } 60 141 61 142 public IEnumerable<IEnumerable<double>> GetEstimatedClassValueVectors(Dataset dataset, IEnumerable<int> rows) { 62 var estimatedValuesEnumerators = (from model in models143 var estimatedValuesEnumerators = (from model in Model.Models 63 144 select model.GetEstimatedClassValues(dataset, rows).GetEnumerator()) 64 145 .ToList(); … … 70 151 } 71 152 72 #endregion 73 74 #region IClassificationModel Members 75 76 public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) { 77 foreach (var estimatedValuesVector in GetEstimatedClassValueVectors(dataset, rows)) { 78 // return the class which is most often occuring 79 yield return 80 estimatedValuesVector 81 .GroupBy(x => x) 82 .OrderBy(g => -g.Count()) 83 .Select(g => g.Key) 84 .First(); 85 } 153 private double AggregateEstimatedClassValues(IEnumerable<double> estimatedClassValues) { 154 return estimatedClassValues 155 .GroupBy(x => x) 156 .OrderBy(g => -g.Count()) 157 .Select(g => g.Key) 158 .First(); 86 159 } 87 88 #endregion89 160 } 90 161 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionEnsembleSolution.cs
r6238 r6239 49 49 protected RegressionEnsembleSolution(RegressionEnsembleSolution original, Cloner cloner) 50 50 : base(original, cloner) { 51 trainingPartitions = new Dictionary<IRegressionModel, IntRange>(); 52 testPartitions = new Dictionary<IRegressionModel, IntRange>(); 53 foreach (var model in Model.Models) { 54 trainingPartitions[model] = (IntRange)ProblemData.TrainingPartition.Clone(); 55 testPartitions[model] = (IntRange)ProblemData.TestPartition.Clone(); 56 } 51 57 } 58 52 59 public RegressionEnsembleSolution(IEnumerable<IRegressionModel> models, IRegressionProblemData problemData) 53 60 : base(new RegressionEnsembleModel(models), new RegressionEnsembleProblemData(problemData)) { … … 141 148 private double AggregateEstimatedValues(IEnumerable<double> estimatedValues) { 142 149 return estimatedValues.DefaultIfEmpty(double.NaN).Average(); 143 } 144 145 //[Storable] 146 //private string name; 147 //public string Name { 148 // get { 149 // return name; 150 // } 151 // set { 152 // if (value != null && value != name) { 153 // var cancelEventArgs = new CancelEventArgs<string>(value); 154 // OnNameChanging(cancelEventArgs); 155 // if (cancelEventArgs.Cancel == false) { 156 // name = value; 157 // OnNamedChanged(EventArgs.Empty); 158 // } 159 // } 160 // } 161 //} 162 163 //public bool CanChangeName { 164 // get { return true; } 165 //} 166 167 //[Storable] 168 //private string description; 169 //public string Description { 170 // get { 171 // return description; 172 // } 173 // set { 174 // if (value != null && value != description) { 175 // description = value; 176 // OnDescriptionChanged(EventArgs.Empty); 177 // } 178 // } 179 //} 180 181 //public bool CanChangeDescription { 182 // get { return true; } 183 //} 184 185 //#region events 186 //public event EventHandler<CancelEventArgs<string>> NameChanging; 187 //private void OnNameChanging(CancelEventArgs<string> cancelEventArgs) { 188 // var listener = NameChanging; 189 // if (listener != null) listener(this, cancelEventArgs); 190 //} 191 192 //public event EventHandler NameChanged; 193 //private void OnNamedChanged(EventArgs e) { 194 // var listener = NameChanged; 195 // if (listener != null) listener(this, e); 196 //} 197 198 //public event EventHandler DescriptionChanged; 199 //private void OnDescriptionChanged(EventArgs e) { 200 // var listener = DescriptionChanged; 201 // if (listener != null) listener(this, e); 202 //} 203 // #endregion 150 } 204 151 } 205 152 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Classification/IClassificationEnsembleSolution.cs
r6184 r6239 23 23 namespace HeuristicLab.Problems.DataAnalysis { 24 24 public interface IClassificationEnsembleSolution : IClassificationSolution { 25 IEnumerable<IClassificationModel> Models{ get; }25 new IClassificationEnsembleModel Model { get; } 26 26 IEnumerable<IEnumerable<double>> GetEstimatedClassValueVectors(Dataset dataset, IEnumerable<int> rows); 27 27 }
Note: See TracChangeset
for help on using the changeset viewer.