Changeset 14869 for branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers
- Timestamp:
- 04/14/17 08:58:45 (8 years ago)
- Location:
- branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 5 edited
- 2 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
/stable/HeuristicLab.Algorithms.DataAnalysis/3.4 merged eligible /trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 merged eligible /branches/1721-RandomForestPersistence/HeuristicLab.Algorithms.DataAnalysis/3.4 10321-10322 /branches/Benchmarking/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 6917-7005 /branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4 9070-13099 /branches/CloningRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4 4656-4721 /branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4 5471-5808 /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Algorithms.DataAnalysis/3.4 5815-6180 /branches/DataAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.4 4458-4459,4462,4464 /branches/DataPreprocessing/HeuristicLab.Algorithms.DataAnalysis/3.4 10085-11101 /branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4 6284-6795 /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Algorithms.DataAnalysis/3.4 5060 /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 11570-12508 /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Algorithms.DataAnalysis/3.4 11130-12721 /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4 13819-14091 /branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4 8116-8789 /branches/LogResidualEvaluator/HeuristicLab.Algorithms.DataAnalysis/3.4 10202-10483 /branches/NET40/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 5138-5162 /branches/ParallelEngine/HeuristicLab.Algorithms.DataAnalysis/3.4 5175-5192 /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Algorithms.DataAnalysis/3.4 7773-7810 /branches/QAPAlgorithms/HeuristicLab.Algorithms.DataAnalysis/3.4 6350-6627 /branches/Restructure trunk solution/HeuristicLab.Algorithms.DataAnalysis/3.4 6828 /branches/SpectralKernelForGaussianProcesses/HeuristicLab.Algorithms.DataAnalysis/3.4 10204-10479 /branches/SuccessProgressAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.4 5370-5682 /branches/Trunk/HeuristicLab.Algorithms.DataAnalysis/3.4 6829-6865 /branches/VNS/HeuristicLab.Algorithms.DataAnalysis/3.4 5594-5752 /branches/histogram/HeuristicLab.Algorithms.DataAnalysis/3.4 5959-6341
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
-
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneR.cs
r14185 r14869 20 20 #endregion 21 21 22 using System; 22 23 using System.Collections.Generic; 23 24 using System.Linq; 25 using System.Threading; 24 26 using HeuristicLab.Common; 25 27 using HeuristicLab.Core; … … 58 60 } 59 61 60 protected override void Run( ) {62 protected override void Run(CancellationToken cancellationToken) { 61 63 var solution = CreateOneRSolution(Problem.ProblemData, MinBucketSizeParameter.Value.Value); 62 64 Results.Add(new Result("OneR solution", "The 1R classifier.", solution)); … … 64 66 65 67 public static IClassificationSolution CreateOneRSolution(IClassificationProblemData problemData, int minBucketSize = 6) { 68 var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); 69 var model1 = FindBestDoubleVariableModel(problemData, minBucketSize); 70 var model2 = FindBestFactorModel(problemData); 71 72 if (model1 == null && model2 == null) throw new InvalidProgramException("Could not create OneR solution"); 73 else if (model1 == null) return new OneFactorClassificationSolution(model2, (IClassificationProblemData)problemData.Clone()); 74 else if (model2 == null) return new OneRClassificationSolution(model1, (IClassificationProblemData)problemData.Clone()); 75 else { 76 var model1EstimatedValues = model1.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices); 77 var model1NumCorrect = classValues.Zip(model1EstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e); 78 79 var model2EstimatedValues = model2.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices); 80 var model2NumCorrect = classValues.Zip(model2EstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e); 81 82 if (model1NumCorrect > model2NumCorrect) { 83 return new OneRClassificationSolution(model1, (IClassificationProblemData)problemData.Clone()); 84 } else { 85 return new OneFactorClassificationSolution(model2, (IClassificationProblemData)problemData.Clone()); 86 } 87 } 88 } 89 90 private static OneRClassificationModel FindBestDoubleVariableModel(IClassificationProblemData problemData, int minBucketSize = 6) { 66 91 var bestClassified = 0; 67 92 List<Split> bestSplits = null; … … 70 95 var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); 71 96 72 foreach (var variable in problemData.AllowedInputVariables) { 97 var allowedInputVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<double>); 98 99 if (!allowedInputVariables.Any()) return null; 100 101 foreach (var variable in allowedInputVariables) { 73 102 var inputValues = problemData.Dataset.GetDoubleValues(variable, problemData.TrainingIndices); 74 103 var samples = inputValues.Zip(classValues, (i, v) => new Sample(i, v)).OrderBy(s => s.inputValue); 75 104 76 var missingValuesDistribution = samples.Where(s => double.IsNaN(s.inputValue)).GroupBy(s => s.classValue).ToDictionary(s => s.Key, s => s.Count()).MaxItems(s => s.Value).FirstOrDefault(); 105 var missingValuesDistribution = samples 106 .Where(s => double.IsNaN(s.inputValue)).GroupBy(s => s.classValue) 107 .ToDictionary(s => s.Key, s => s.Count()) 108 .MaxItems(s => s.Value) 109 .FirstOrDefault(); 77 110 78 111 //calculate class distributions for all distinct inputValues … … 119 152 while (sample.inputValue >= splits[splitIndex].thresholdValue) 120 153 splitIndex++; 121 correctClassified += sample.classValue == splits[splitIndex].classValue? 1 : 0;154 correctClassified += sample.classValue.IsAlmost(splits[splitIndex].classValue) ? 1 : 0; 122 155 } 123 156 correctClassified += missingValuesDistribution.Value; … … 133 166 //remove neighboring splits with the same class value 134 167 for (int i = 0; i < bestSplits.Count - 1; i++) { 135 if (bestSplits[i].classValue == bestSplits[i + 1].classValue) {168 if (bestSplits[i].classValue.IsAlmost(bestSplits[i + 1].classValue)) { 136 169 bestSplits.Remove(bestSplits[i]); 137 170 i--; … … 139 172 } 140 173 141 var model = new OneRClassificationModel(problemData.TargetVariable, bestVariable, bestSplits.Select(s => s.thresholdValue).ToArray(), bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass); 142 var solution = new OneRClassificationSolution(model, (IClassificationProblemData)problemData.Clone()); 143 144 return solution; 174 var model = new OneRClassificationModel(problemData.TargetVariable, bestVariable, 175 bestSplits.Select(s => s.thresholdValue).ToArray(), 176 bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass); 177 178 return model; 179 } 180 private static OneFactorClassificationModel FindBestFactorModel(IClassificationProblemData problemData) { 181 var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); 182 var defaultClass = FindMostFrequentClassValue(classValues); 183 // only select string variables 184 var allowedInputVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<string>); 185 186 if (!allowedInputVariables.Any()) return null; 187 188 OneFactorClassificationModel bestModel = null; 189 var bestModelNumCorrect = 0; 190 191 foreach (var variable in allowedInputVariables) { 192 var variableValues = problemData.Dataset.GetStringValues(variable, problemData.TrainingIndices); 193 var groupedClassValues = variableValues 194 .Zip(classValues, (v, c) => new KeyValuePair<string, double>(v, c)) 195 .GroupBy(kvp => kvp.Key) 196 .ToDictionary(g => g.Key, g => FindMostFrequentClassValue(g.Select(kvp => kvp.Value))); 197 198 var model = new OneFactorClassificationModel(problemData.TargetVariable, variable, 199 groupedClassValues.Select(kvp => kvp.Key).ToArray(), groupedClassValues.Select(kvp => kvp.Value).ToArray(), defaultClass); 200 201 var modelEstimatedValues = model.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices); 202 var modelNumCorrect = classValues.Zip(modelEstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e); 203 if (modelNumCorrect > bestModelNumCorrect) { 204 bestModelNumCorrect = modelNumCorrect; 205 bestModel = model; 206 } 207 } 208 209 return bestModel; 210 } 211 212 private static double FindMostFrequentClassValue(IEnumerable<double> classValues) { 213 return classValues.GroupBy(c => c).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); 145 214 } 146 215 -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneRClassificationModel.cs
r14185 r14869 31 31 [StorableClass] 32 32 [Item("OneR Classification Model", "A model that uses intervals for one variable to determine the class.")] 33 public class OneRClassificationModel : ClassificationModel {33 public sealed class OneRClassificationModel : ClassificationModel { 34 34 public override IEnumerable<string> VariablesUsedForPrediction { 35 35 get { return new[] { Variable }; } … … 37 37 38 38 [Storable] 39 pr otectedstring variable;39 private string variable; 40 40 public string Variable { 41 41 get { return variable; } … … 43 43 44 44 [Storable] 45 pr otecteddouble[] splits;45 private double[] splits; 46 46 public double[] Splits { 47 47 get { return splits; } … … 49 49 50 50 [Storable] 51 pr otecteddouble[] classes;51 private double[] classes; 52 52 public double[] Classes { 53 53 get { return classes; } … … 55 55 56 56 [Storable] 57 pr otecteddouble missingValuesClass;57 private double missingValuesClass; 58 58 public double MissingValuesClass { 59 59 get { return missingValuesClass; } … … 61 61 62 62 [StorableConstructor] 63 pr otectedOneRClassificationModel(bool deserializing) : base(deserializing) { }64 pr otectedOneRClassificationModel(OneRClassificationModel original, Cloner cloner)63 private OneRClassificationModel(bool deserializing) : base(deserializing) { } 64 private OneRClassificationModel(OneRClassificationModel original, Cloner cloner) 65 65 : base(original, cloner) { 66 66 this.variable = (string)original.variable; 67 67 this.splits = (double[])original.splits.Clone(); 68 68 this.classes = (double[])original.classes.Clone(); 69 this.missingValuesClass = original.missingValuesClass; 69 70 } 70 71 public override IDeepCloneable Clone(Cloner cloner) { return new OneRClassificationModel(this, cloner); } -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneRClassificationSolution.cs
r14185 r14869 28 28 [StorableClass] 29 29 [Item(Name = "OneR Classification Solution", Description = "Represents a OneR classification solution which uses only a single feature with potentially multiple thresholds for class prediction.")] 30 public class OneRClassificationSolution : ClassificationSolution {30 public sealed class OneRClassificationSolution : ClassificationSolution { 31 31 public new OneRClassificationModel Model { 32 32 get { return (OneRClassificationModel)base.Model; } … … 35 35 36 36 [StorableConstructor] 37 pr otectedOneRClassificationSolution(bool deserializing) : base(deserializing) { }38 pr otectedOneRClassificationSolution(OneRClassificationSolution original, Cloner cloner) : base(original, cloner) { }37 private OneRClassificationSolution(bool deserializing) : base(deserializing) { } 38 private OneRClassificationSolution(OneRClassificationSolution original, Cloner cloner) : base(original, cloner) { } 39 39 public OneRClassificationSolution(OneRClassificationModel model, IClassificationProblemData problemData) 40 40 : base(model, problemData) { -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/ZeroR.cs
r14185 r14869 21 21 22 22 using System.Linq; 23 using System.Threading; 23 24 using HeuristicLab.Common; 24 25 using HeuristicLab.Core; … … 49 50 } 50 51 51 protected override void Run( ) {52 protected override void Run(CancellationToken cancellationToken) { 52 53 var solution = CreateZeroRSolution(Problem.ProblemData); 53 54 Results.Add(new Result("ZeroR solution", "The simplest possible classifier, ZeroR always predicts the majority class.", solution));
Note: See TracChangeset
for help on using the changeset viewer.