Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/14/12 18:58:15 (12 years ago)
Author:
gkronber
Message:

#1847 merged r8205:8635 from trunk into branch

Location:
branches/GP-MoveOperators
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • branches/GP-MoveOperators

  • branches/GP-MoveOperators/HeuristicLab.Algorithms.DataAnalysis/3.4

    • Property svn:ignore
      •  

        old new  
        55*.vs10x
        66Plugin.cs
         7*.user
  • branches/GP-MoveOperators/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs

    r8206 r8660  
    2121
    2222using System;
    23 using System.Collections.Generic;
    2423using System.Linq;
    2524using HeuristicLab.Common;
    2625using HeuristicLab.Core;
    2726using HeuristicLab.Data;
    28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2927using HeuristicLab.Optimization;
     28using HeuristicLab.Parameters;
    3029using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3130using HeuristicLab.Problems.DataAnalysis;
    32 using HeuristicLab.Problems.DataAnalysis.Symbolic;
    33 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
    34 using HeuristicLab.Parameters;
    3531
    3632namespace HeuristicLab.Algorithms.DataAnalysis {
     
    8480
    8581    public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k) {
    86       Dataset dataset = problemData.Dataset;
    87       string targetVariable = problemData.TargetVariable;
    88       IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    89       IEnumerable<int> rows = problemData.TrainingIndices;
    90       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    91       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    92         throw new NotSupportedException("Nearest neighbour classification does not support NaN or infinity values in the input dataset.");
     82      var problemDataClone = (IClassificationProblemData)problemData.Clone();
     83      return new NearestNeighbourClassificationSolution(problemDataClone, Train(problemDataClone, k));
     84    }
    9385
    94       alglib.nearestneighbor.kdtree kdtree = new alglib.nearestneighbor.kdtree();
    95 
    96       int nRows = inputMatrix.GetLength(0);
    97       int nFeatures = inputMatrix.GetLength(1) - 1;
    98       double[] classValues = dataset.GetDoubleValues(targetVariable).Distinct().OrderBy(x => x).ToArray();
    99       int nClasses = classValues.Count();
    100       // map original class values to values [0..nClasses-1]
    101       Dictionary<double, double> classIndices = new Dictionary<double, double>();
    102       for (int i = 0; i < nClasses; i++) {
    103         classIndices[classValues[i]] = i;
    104       }
    105       for (int row = 0; row < nRows; row++) {
    106         inputMatrix[row, nFeatures] = classIndices[inputMatrix[row, nFeatures]];
    107       }
    108       alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdtree);
    109       var problemDataClone = (IClassificationProblemData) problemData.Clone();
    110       return new NearestNeighbourClassificationSolution(problemDataClone, new NearestNeighbourModel(kdtree, k, targetVariable, allowedInputVariables, problemDataClone.ClassValues.ToArray()));
     86    public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k) {
     87      return new NearestNeighbourModel(problemData.Dataset,
     88        problemData.TrainingIndices,
     89        k,
     90        problemData.TargetVariable,
     91        problemData.AllowedInputVariables,
     92        problemData.ClassValues.ToArray());
    11193    }
    11294    #endregion
  • branches/GP-MoveOperators/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r7294 r8660  
    3333  /// </summary>
    3434  [StorableClass]
    35   [Item("NearestNeighbourModel", "Represents a neural network for regression and classification.")]
     35  [Item("NearestNeighbourModel", "Represents a nearest neighbour model for regression and classification.")]
    3636  public sealed class NearestNeighbourModel : NamedItem, INearestNeighbourModel {
    3737
     
    5656    [Storable]
    5757    private int k;
     58
    5859    [StorableConstructor]
    5960    private NearestNeighbourModel(bool deserializing)
     
    9596        this.classValues = (double[])original.classValues.Clone();
    9697    }
    97     public NearestNeighbourModel(alglib.nearestneighbor.kdtree kdTree, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null)
    98       : base() {
    99       this.name = ItemName;
    100       this.description = ItemDescription;
    101       this.kdTree = kdTree;
     98    public NearestNeighbourModel(Dataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) {
     99      Name = ItemName;
     100      Description = ItemDescription;
    102101      this.k = k;
    103102      this.targetVariable = targetVariable;
    104103      this.allowedInputVariables = allowedInputVariables.ToArray();
    105       if (classValues != null)
     104
     105      var inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,
     106                                   allowedInputVariables.Concat(new string[] { targetVariable }),
     107                                   rows);
     108
     109      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     110        throw new NotSupportedException(
     111          "Nearest neighbour classification does not support NaN or infinity values in the input dataset.");
     112
     113      this.kdTree = new alglib.nearestneighbor.kdtree();
     114
     115      var nRows = inputMatrix.GetLength(0);
     116      var nFeatures = inputMatrix.GetLength(1) - 1;
     117
     118      if (classValues != null) {
    106119        this.classValues = (double[])classValues.Clone();
     120        int nClasses = classValues.Length;
     121        // map original class values to values [0..nClasses-1]
     122        var classIndices = new Dictionary<double, double>();
     123        for (int i = 0; i < nClasses; i++)
     124          classIndices[classValues[i]] = i;
     125
     126        for (int row = 0; row < nRows; row++) {
     127          inputMatrix[row, nFeatures] = classIndices[inputMatrix[row, nFeatures]];
     128        }
     129      }
     130      alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdTree);
    107131    }
    108132
     
    140164
    141165    public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
     166      if (classValues == null) throw new InvalidOperationException("No class values are defined.");
    142167      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    143168
     
    177202
    178203    public INearestNeighbourRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
    179       return new NearestNeighbourRegressionSolution(problemData, this);
     204      return new NearestNeighbourRegressionSolution(new RegressionProblemData(problemData), this);
    180205    }
    181206    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
     
    183208    }
    184209    public INearestNeighbourClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
    185       return new NearestNeighbourClassificationSolution(problemData, this);
     210      return new NearestNeighbourClassificationSolution(new ClassificationProblemData(problemData), this);
    186211    }
    187212    IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) {
  • branches/GP-MoveOperators/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs

    r8206 r8660  
    2121
    2222using System;
    23 using System.Collections.Generic;
    24 using System.Linq;
    2523using HeuristicLab.Common;
    2624using HeuristicLab.Core;
    2725using HeuristicLab.Data;
    28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2926using HeuristicLab.Optimization;
     27using HeuristicLab.Parameters;
    3028using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3129using HeuristicLab.Problems.DataAnalysis;
    32 using HeuristicLab.Problems.DataAnalysis.Symbolic;
    33 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
    34 using HeuristicLab.Parameters;
    3530
    3631namespace HeuristicLab.Algorithms.DataAnalysis {
     
    8479
    8580    public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k) {
    86       Dataset dataset = problemData.Dataset;
    87       string targetVariable = problemData.TargetVariable;
    88       IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    89       IEnumerable<int> rows = problemData.TrainingIndices;
    90       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    91       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    92         throw new NotSupportedException("Nearest neighbour regression does not support NaN or infinity values in the input dataset.");
     81      var clonedProblemData = (IRegressionProblemData)problemData.Clone();
     82      return new NearestNeighbourRegressionSolution(clonedProblemData, Train(problemData, k));
     83    }
    9384
    94       alglib.nearestneighbor.kdtree kdtree = new alglib.nearestneighbor.kdtree();
    95 
    96       int nRows = inputMatrix.GetLength(0);
    97 
    98       alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdtree);
    99 
    100       return new NearestNeighbourRegressionSolution((IRegressionProblemData)problemData.Clone(), new NearestNeighbourModel(kdtree, k, targetVariable, allowedInputVariables));
     85    public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k) {
     86      return new NearestNeighbourModel(problemData.Dataset,
     87        problemData.TrainingIndices,
     88        k,
     89        problemData.TargetVariable,
     90        problemData.AllowedInputVariables);
    10191    }
    10292    #endregion
Note: See TracChangeset for help on using the changeset viewer.