Changeset 9078 for branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification
- Timestamp:
- 12/19/12 11:16:51 (12 years ago)
- Location:
- branches/RuntimeOptimizer
- Files:
-
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/RuntimeOptimizer
- Property svn:mergeinfo changed
/trunk/sources merged: 8972-8974,8976,8978-8994,8999-9019,9021-9031,9033-9039,9043,9049,9052,9055-9057,9063,9068,9072,9075-9076
- Property svn:mergeinfo changed
-
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification merged: 8972,8978-8979,9002-9003
- Property svn:mergeinfo changed
-
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/Interfaces/ISymbolicClassificationModel.cs
r8594 r9078 27 27 void RecalculateModelParameters(IClassificationProblemData problemData, IEnumerable<int> rows); 28 28 new ISymbolicClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData); 29 30 void Scale(IClassificationProblemData problemData); 29 31 } 30 32 } -
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/ModelCreators/NearestNeighborModelCreator.cs
r8606 r9078 48 48 public NearestNeighborModelCreator() 49 49 : base() { 50 Parameters.Add(new FixedValueParameter<IntValue>("K", "The number of neighbours to use to determine the class.", new IntValue( 3)));50 Parameters.Add(new FixedValueParameter<IntValue>("K", "The number of neighbours to use to determine the class.", new IntValue(11))); 51 51 } 52 52 -
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/MultiObjective/SymbolicClassificationMultiObjectiveTrainingBestSolutionAnalyzer.cs
r8883 r9078 82 82 protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree, double[] bestQuality) { 83 83 var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper); 84 if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);84 if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue); 85 85 86 86 model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices); -
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/MultiObjective/SymbolicClassificationMultiObjectiveValidationBestSolutionAnalyzer.cs
r8883 r9078 72 72 protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree, double[] bestQualities) { 73 73 var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper); 74 if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);74 if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue); 75 75 76 76 model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices); -
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectivePenaltyScoreEvaluator.cs
r8883 r9078 92 92 93 93 var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel(tree, SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper); 94 if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, problemData, problemData.TargetVariable);94 if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(problemData); 95 95 model.RecalculateModelParameters(problemData, rows); 96 96 double penalty = Calculate(model, problemData, rows); -
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveTrainingBestSolutionAnalyzer.cs
r8883 r9078 82 82 protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree, double bestQuality) { 83 83 var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper); 84 if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);84 if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue); 85 85 86 86 model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices); -
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveTrainingParetoBestSolutionAnalyzer.cs
r8883 r9078 65 65 protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree) { 66 66 var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper); 67 if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);67 if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue); 68 68 69 69 model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices); -
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveValidationBestSolutionAnalyzer.cs
r8883 r9078 72 72 protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree, double bestQuality) { 73 73 var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper); 74 if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);74 if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue); 75 75 76 76 model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices); -
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveValidationParetoBestSolutionAnalyzer.cs
r8883 r9078 65 65 protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree) { 66 66 var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper); 67 if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);67 if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue); 68 68 69 69 model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices); -
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicClassificationModel.cs
r8664 r9078 62 62 return CreateClassificationSolution(problemData); 63 63 } 64 65 public void Scale(IClassificationProblemData problemData) { 66 Scale(problemData, problemData.TargetVariable); 67 } 64 68 } 65 69 } -
branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicNearestNeighbourClassificationModel.cs
r8606 r9078 39 39 private int k; 40 40 [Storable] 41 private List<KeyValuePair<double, double>> trainedTargetPair; 41 private List<double> trainedClasses; 42 [Storable] 43 private List<double> trainedEstimatedValues; 44 45 [Storable] 46 private ClassFrequencyComparer frequencyComparer; 42 47 43 48 [StorableConstructor] … … 46 51 : base(original, cloner) { 47 52 k = original.k; 48 trainedTargetPair = new List<KeyValuePair<double, double>>(original.trainedTargetPair); 53 frequencyComparer = new ClassFrequencyComparer(original.frequencyComparer); 54 trainedEstimatedValues = new List<double>(original.trainedEstimatedValues); 55 trainedClasses = new List<double>(original.trainedClasses); 49 56 } 50 57 public SymbolicNearestNeighbourClassificationModel(int k, ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue) 51 58 : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) { 52 59 this.k = k; 53 this.trainedTargetPair = new List<KeyValuePair<double, double>>(); 60 frequencyComparer = new ClassFrequencyComparer(); 61 54 62 } 55 63 … … 59 67 60 68 public override IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) { 61 var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows) ;62 var neighbors = new Dictionary<double, int>();69 var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows) 70 .LimitToRange(LowerEstimationLimit, UpperEstimationLimit); 63 71 foreach (var ev in estimatedValues) { 64 int lower = 0, upper = 1; 65 double sdist = Math.Abs(ev - trainedTargetPair[0].Key); 66 for (int i = 1; i < trainedTargetPair.Count; i++) { 67 double d = Math.Abs(ev - trainedTargetPair[i].Key); 68 if (d > sdist) break; 69 lower = i; 70 upper = i + 1; 71 sdist = d; 72 } 73 neighbors.Clear(); 74 neighbors[trainedTargetPair[lower].Value] = 1; 75 lower--; 76 for (int i = 1; i < Math.Min(k, trainedTargetPair.Count); i++) { 77 if (upper >= trainedTargetPair.Count || (lower > 0 && ev - trainedTargetPair[lower].Key < trainedTargetPair[upper].Key - ev)) { 78 if (!neighbors.ContainsKey(trainedTargetPair[lower].Value)) 79 neighbors[trainedTargetPair[lower].Value] = 1; 80 else neighbors[trainedTargetPair[lower].Value]++; 81 lower--; 82 } else { 83 if (!neighbors.ContainsKey(trainedTargetPair[upper].Value)) 84 neighbors[trainedTargetPair[upper].Value] = 1; 85 else neighbors[trainedTargetPair[upper].Value]++; 86 upper++; 72 // find the range [lower, upper[ of trainedTargetValues that contains the k closest neighbours 73 // the range can span more than k elements when there are equal estimated values 74 75 // find the index of the training-point to which distance is shortest 76 int lower = trainedEstimatedValues.BinarySearch(ev); 77 int upper; 78 // if the element was not found exactly, BinarySearch returns the complement of the index of the next larger item 79 if (lower < 0) { 80 lower = ~lower; 81 // lower is not necessarily the closer one 82 // determine which element is closer to ev (lower - 1) or (lower) 83 if (lower == trainedEstimatedValues.Count || 84 (lower > 0 && Math.Abs(ev - trainedEstimatedValues[lower - 1]) < Math.Abs(ev - trainedEstimatedValues[lower]))) { 85 lower = lower - 1; 87 86 } 88 87 } 89 yield return neighbors.MaxItems(x => x.Value).First().Key; 88 upper = lower + 1; 89 // at this point we have a range [lower, upper[ that includes only the closest element to ev 90 91 // expand the range to left or right looking for the nearest neighbors 92 while (upper - lower < Math.Min(k, trainedEstimatedValues.Count)) { 93 bool lowerIsCloser = upper >= trainedEstimatedValues.Count || 94 (lower > 0 && ev - trainedEstimatedValues[lower] <= trainedEstimatedValues[upper] - ev); 95 bool upperIsCloser = lower <= 0 || 96 (upper < trainedEstimatedValues.Count && 97 ev - trainedEstimatedValues[lower] >= trainedEstimatedValues[upper] - ev); 98 if (!lowerIsCloser && !upperIsCloser) break; 99 if (lowerIsCloser) { 100 lower--; 101 // eat up all equal values 102 while (lower > 0 && trainedEstimatedValues[lower - 1].IsAlmost(trainedEstimatedValues[lower])) 103 lower--; 104 } 105 if (upperIsCloser) { 106 upper++; 107 while (upper < trainedEstimatedValues.Count && 108 trainedEstimatedValues[upper - 1].IsAlmost(trainedEstimatedValues[upper])) 109 upper++; 110 } 111 } 112 // majority voting with preference for bigger class in case of tie 113 yield return Enumerable.Range(lower, upper - lower) 114 .Select(i => trainedClasses[i]) 115 .GroupBy(c => c) 116 .Select(g => new { Class = g.Key, Votes = g.Count() }) 117 .MaxItems(p => p.Votes) 118 .OrderByDescending(m => m.Class, frequencyComparer) 119 .First().Class; 90 120 } 91 121 } 92 122 93 123 public override void RecalculateModelParameters(IClassificationProblemData problemData, IEnumerable<int> rows) { 94 var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, problemData.Dataset, rows); 124 var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, problemData.Dataset, rows) 125 .LimitToRange(LowerEstimationLimit, UpperEstimationLimit); 95 126 var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); 96 var pair = estimatedValues.Zip(targetValues, (e, t) => new { Estimated = e, Target = t }); 127 var trainedClasses = targetValues.ToArray(); 128 var trainedEstimatedValues = estimatedValues.ToArray(); 97 129 98 // there could be more than one target value per estimated value 99 var dict = new Dictionary<double, Dictionary<double, int>>(); 100 foreach (var p in pair) { 101 if (!dict.ContainsKey(p.Estimated)) dict[p.Estimated] = new Dictionary<double, int>(); 102 if (!dict[p.Estimated].ContainsKey(p.Target)) dict[p.Estimated][p.Target] = 0; 103 dict[p.Estimated][p.Target]++; 104 } 130 Array.Sort(trainedEstimatedValues, trainedClasses); 131 this.trainedClasses = new List<double>(trainedClasses); 132 this.trainedEstimatedValues = new List<double>(trainedEstimatedValues); 105 133 106 trainedTargetPair = new List<KeyValuePair<double, double>>(); 107 foreach (var ev in dict) { 108 var target = ev.Value.MaxItems(x => x.Value).First().Key; 109 trainedTargetPair.Add(new KeyValuePair<double, double>(ev.Key, target)); 110 } 111 trainedTargetPair = trainedTargetPair.OrderBy(x => x.Key).ToList(); 134 var freq = trainedClasses 135 .GroupBy(c => c) 136 .ToDictionary(g => g.Key, g => g.Count()); 137 this.frequencyComparer = new ClassFrequencyComparer(freq); 112 138 } 113 139 114 140 public override ISymbolicClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 115 return new SymbolicClassificationSolution((ISymbolicClassificationModel)this.Clone(), problemData); 141 return new SymbolicClassificationSolution((ISymbolicClassificationModel)Clone(), problemData); 142 } 143 } 144 145 [StorableClass] 146 internal sealed class ClassFrequencyComparer : IComparer<double> { 147 [Storable] 148 private readonly Dictionary<double, int> classFrequencies; 149 150 [StorableConstructor] 151 private ClassFrequencyComparer(bool deserializing) { } 152 public ClassFrequencyComparer() { 153 classFrequencies = new Dictionary<double, int>(); 154 } 155 public ClassFrequencyComparer(Dictionary<double, int> frequencies) { 156 classFrequencies = frequencies; 157 } 158 public ClassFrequencyComparer(ClassFrequencyComparer original) { 159 classFrequencies = new Dictionary<double, int>(original.classFrequencies); 160 } 161 162 public int Compare(double x, double y) { 163 bool cx = classFrequencies.ContainsKey(x), cy = classFrequencies.ContainsKey(y); 164 if (cx && cy) 165 return classFrequencies[x].CompareTo(classFrequencies[y]); 166 if (cx) return 1; 167 return -1; 116 168 } 117 169 }
Note: See TracChangeset
for help on using the changeset viewer.