Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
12/19/12 11:16:51 (12 years ago)
Author:
abeham
Message:

#1985: Updated branch from trunk

Location:
branches/RuntimeOptimizer
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • branches/RuntimeOptimizer

  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification

  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/Interfaces/ISymbolicClassificationModel.cs

    r8594 r9078  
    2727    void RecalculateModelParameters(IClassificationProblemData problemData, IEnumerable<int> rows);
    2828    new ISymbolicClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData);
     29
     30    void Scale(IClassificationProblemData problemData);
    2931  }
    3032}
  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/ModelCreators/NearestNeighborModelCreator.cs

    r8606 r9078  
    4848    public NearestNeighborModelCreator()
    4949      : base() {
    50       Parameters.Add(new FixedValueParameter<IntValue>("K", "The number of neighbours to use to determine the class.", new IntValue(3)));
     50      Parameters.Add(new FixedValueParameter<IntValue>("K", "The number of neighbours to use to determine the class.", new IntValue(11)));
    5151    }
    5252
  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/MultiObjective/SymbolicClassificationMultiObjectiveTrainingBestSolutionAnalyzer.cs

    r8883 r9078  
    8282    protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree, double[] bestQuality) {
    8383      var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper);
    84       if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);
     84      if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue);
    8585
    8686      model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices);
  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/MultiObjective/SymbolicClassificationMultiObjectiveValidationBestSolutionAnalyzer.cs

    r8883 r9078  
    7272    protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree, double[] bestQualities) {
    7373      var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper);
    74       if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);
     74      if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue);
    7575
    7676      model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices);
  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectivePenaltyScoreEvaluator.cs

    r8883 r9078  
    9292
    9393      var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel(tree, SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper);
    94       if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, problemData, problemData.TargetVariable);
     94      if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(problemData);
    9595      model.RecalculateModelParameters(problemData, rows);
    9696      double penalty = Calculate(model, problemData, rows);
  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveTrainingBestSolutionAnalyzer.cs

    r8883 r9078  
    8282    protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree, double bestQuality) {
    8383      var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper);
    84       if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);
     84      if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue);
    8585
    8686      model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices);
  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveTrainingParetoBestSolutionAnalyzer.cs

    r8883 r9078  
    6565    protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree) {
    6666      var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper);
    67       if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);
     67      if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue);
    6868
    6969      model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices);
  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveValidationBestSolutionAnalyzer.cs

    r8883 r9078  
    7272    protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree, double bestQuality) {
    7373      var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper);
    74       if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);
     74      if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue);
    7575
    7676      model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices);
  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveValidationParetoBestSolutionAnalyzer.cs

    r8883 r9078  
    6565    protected override ISymbolicClassificationSolution CreateSolution(ISymbolicExpressionTree bestTree) {
    6666      var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel((ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper);
    67       if (ApplyLinearScalingParameter.ActualValue.Value) SymbolicClassificationModel.Scale(model, ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TargetVariable);
     67      if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue);
    6868
    6969      model.RecalculateModelParameters(ProblemDataParameter.ActualValue, ProblemDataParameter.ActualValue.TrainingIndices);
  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicClassificationModel.cs

    r8664 r9078  
    6262      return CreateClassificationSolution(problemData);
    6363    }
     64
     65    public void Scale(IClassificationProblemData problemData) {
     66      Scale(problemData, problemData.TargetVariable);
     67    }
    6468  }
    6569}
  • branches/RuntimeOptimizer/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicNearestNeighbourClassificationModel.cs

    r8606 r9078  
    3939    private int k;
    4040    [Storable]
    41     private List<KeyValuePair<double, double>> trainedTargetPair;
     41    private List<double> trainedClasses;
     42    [Storable]
     43    private List<double> trainedEstimatedValues;
     44
     45    [Storable]
     46    private ClassFrequencyComparer frequencyComparer;
    4247
    4348    [StorableConstructor]
     
    4651      : base(original, cloner) {
    4752      k = original.k;
    48       trainedTargetPair = new List<KeyValuePair<double, double>>(original.trainedTargetPair);
     53      frequencyComparer = new ClassFrequencyComparer(original.frequencyComparer);
     54      trainedEstimatedValues = new List<double>(original.trainedEstimatedValues);
     55      trainedClasses = new List<double>(original.trainedClasses);
    4956    }
    5057    public SymbolicNearestNeighbourClassificationModel(int k, ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue)
    5158      : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) {
    5259      this.k = k;
    53       this.trainedTargetPair = new List<KeyValuePair<double, double>>();
     60      frequencyComparer = new ClassFrequencyComparer();
     61
    5462    }
    5563
     
    5967
    6068    public override IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
    61       var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows);
    62       var neighbors = new Dictionary<double, int>();
     69      var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows)
     70                                       .LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
    6371      foreach (var ev in estimatedValues) {
    64         int lower = 0, upper = 1;
    65         double sdist = Math.Abs(ev - trainedTargetPair[0].Key);
    66         for (int i = 1; i < trainedTargetPair.Count; i++) {
    67           double d = Math.Abs(ev - trainedTargetPair[i].Key);
    68           if (d > sdist) break;
    69           lower = i;
    70           upper = i + 1;
    71           sdist = d;
    72         }
    73         neighbors.Clear();
    74         neighbors[trainedTargetPair[lower].Value] = 1;
    75         lower--;
    76         for (int i = 1; i < Math.Min(k, trainedTargetPair.Count); i++) {
    77           if (upper >= trainedTargetPair.Count || (lower > 0 && ev - trainedTargetPair[lower].Key < trainedTargetPair[upper].Key - ev)) {
    78             if (!neighbors.ContainsKey(trainedTargetPair[lower].Value))
    79               neighbors[trainedTargetPair[lower].Value] = 1;
    80             else neighbors[trainedTargetPair[lower].Value]++;
    81             lower--;
    82           } else {
    83             if (!neighbors.ContainsKey(trainedTargetPair[upper].Value))
    84               neighbors[trainedTargetPair[upper].Value] = 1;
    85             else neighbors[trainedTargetPair[upper].Value]++;
    86             upper++;
     72        // find the range [lower, upper[ of trainedTargetValues that contains the k closest neighbours
     73        // the range can span more than k elements when there are equal estimated values
     74
     75        // find the index of the training-point to which distance is shortest
     76        int lower = trainedEstimatedValues.BinarySearch(ev);
     77        int upper;
     78        // if the element was not found exactly, BinarySearch returns the complement of the index of the next larger item
     79        if (lower < 0) {
     80          lower = ~lower;
     81          // lower is not necessarily the closer one
     82          // determine which element is closer to ev (lower - 1) or (lower)
     83          if (lower == trainedEstimatedValues.Count ||
     84            (lower > 0 && Math.Abs(ev - trainedEstimatedValues[lower - 1]) < Math.Abs(ev - trainedEstimatedValues[lower]))) {
     85            lower = lower - 1;
    8786          }
    8887        }
    89         yield return neighbors.MaxItems(x => x.Value).First().Key;
     88        upper = lower + 1;
     89        // at this point we have a range [lower, upper[ that includes only the closest element to ev
     90
     91        // expand the range to left or right looking for the nearest neighbors
     92        while (upper - lower < Math.Min(k, trainedEstimatedValues.Count)) {
     93          bool lowerIsCloser = upper >= trainedEstimatedValues.Count ||
     94                               (lower > 0 && ev - trainedEstimatedValues[lower] <= trainedEstimatedValues[upper] - ev);
     95          bool upperIsCloser = lower <= 0 ||
     96                               (upper < trainedEstimatedValues.Count &&
     97                                ev - trainedEstimatedValues[lower] >= trainedEstimatedValues[upper] - ev);
     98          if (!lowerIsCloser && !upperIsCloser) break;
     99          if (lowerIsCloser) {
     100            lower--;
     101            // eat up all equal values
     102            while (lower > 0 && trainedEstimatedValues[lower - 1].IsAlmost(trainedEstimatedValues[lower]))
     103              lower--;
     104          }
     105          if (upperIsCloser) {
     106            upper++;
     107            while (upper < trainedEstimatedValues.Count &&
     108                   trainedEstimatedValues[upper - 1].IsAlmost(trainedEstimatedValues[upper]))
     109              upper++;
     110          }
     111        }
     112        // majority voting with preference for bigger class in case of tie
     113        yield return Enumerable.Range(lower, upper - lower)
     114          .Select(i => trainedClasses[i])
     115          .GroupBy(c => c)
     116          .Select(g => new { Class = g.Key, Votes = g.Count() })
     117          .MaxItems(p => p.Votes)
     118          .OrderByDescending(m => m.Class, frequencyComparer)
     119          .First().Class;
    90120      }
    91121    }
    92122
    93123    public override void RecalculateModelParameters(IClassificationProblemData problemData, IEnumerable<int> rows) {
    94       var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, problemData.Dataset, rows);
     124      var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, problemData.Dataset, rows)
     125                                       .LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
    95126      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
    96       var pair = estimatedValues.Zip(targetValues, (e, t) => new { Estimated = e, Target = t });
     127      var trainedClasses = targetValues.ToArray();
     128      var trainedEstimatedValues = estimatedValues.ToArray();
    97129
    98       // there could be more than one target value per estimated value
    99       var dict = new Dictionary<double, Dictionary<double, int>>();
    100       foreach (var p in pair) {
    101         if (!dict.ContainsKey(p.Estimated)) dict[p.Estimated] = new Dictionary<double, int>();
    102         if (!dict[p.Estimated].ContainsKey(p.Target)) dict[p.Estimated][p.Target] = 0;
    103         dict[p.Estimated][p.Target]++;
    104       }
     130      Array.Sort(trainedEstimatedValues, trainedClasses);
     131      this.trainedClasses = new List<double>(trainedClasses);
     132      this.trainedEstimatedValues = new List<double>(trainedEstimatedValues);
    105133
    106       trainedTargetPair = new List<KeyValuePair<double, double>>();
    107       foreach (var ev in dict) {
    108         var target = ev.Value.MaxItems(x => x.Value).First().Key;
    109         trainedTargetPair.Add(new KeyValuePair<double, double>(ev.Key, target));
    110       }
    111       trainedTargetPair = trainedTargetPair.OrderBy(x => x.Key).ToList();
     134      var freq = trainedClasses
     135        .GroupBy(c => c)
     136        .ToDictionary(g => g.Key, g => g.Count());
     137      this.frequencyComparer = new ClassFrequencyComparer(freq);
    112138    }
    113139
    114140    public override ISymbolicClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
    115       return new SymbolicClassificationSolution((ISymbolicClassificationModel)this.Clone(), problemData);
     141      return new SymbolicClassificationSolution((ISymbolicClassificationModel)Clone(), problemData);
     142    }
     143  }
     144
     145  [StorableClass]
     146  internal sealed class ClassFrequencyComparer : IComparer<double> {
     147    [Storable]
     148    private readonly Dictionary<double, int> classFrequencies;
     149
     150    [StorableConstructor]
     151    private ClassFrequencyComparer(bool deserializing) { }
     152    public ClassFrequencyComparer() {
     153      classFrequencies = new Dictionary<double, int>();
     154    }
     155    public ClassFrequencyComparer(Dictionary<double, int> frequencies) {
     156      classFrequencies = frequencies;
     157    }
     158    public ClassFrequencyComparer(ClassFrequencyComparer original) {
     159      classFrequencies = new Dictionary<double, int>(original.classFrequencies);
     160    }
     161
     162    public int Compare(double x, double y) {
     163      bool cx = classFrequencies.ContainsKey(x), cy = classFrequencies.ContainsKey(y);
     164      if (cx && cy)
     165        return classFrequencies[x].CompareTo(classFrequencies[y]);
     166      if (cx) return 1;
     167      return -1;
    116168    }
    117169  }
Note: See TracChangeset for help on using the changeset viewer.