Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/18/19 14:39:01 (6 years ago)
Author:
bwerth
Message:

#2847 renamed branch to include ticket number; merged current trunk version into branch; updated Build.cmd and Build.ps1

Location:
branches/2847_M5Regression
Files:
7 edited
1 moved

Legend:

Unmodified
Added
Removed
  • branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis

  • branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4

  • branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs

    r14523 r16538  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    4242    private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution";
    4343    private const string WeightsParameterName = "Weights";
    44 
     44    private const string SelfMatchParameterName = "SelfMatch";
    4545
    4646    #region parameter properties
    4747    public IFixedValueParameter<IntValue> KParameter {
    4848      get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }
     49    }
     50    public IFixedValueParameter<BoolValue> SelfMatchParameter {
     51      get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }
    4952    }
    5053    public IValueParameter<DoubleArray> WeightsParameter {
     
    5356    #endregion
    5457    #region properties
     58    public bool SelfMatch {
     59      get { return SelfMatchParameter.Value.Value; }
     60      set { SelfMatchParameter.Value.Value = value; }
     61    }
    5562    public int K {
    5663      get { return KParameter.Value.Value; }
     
    7380    public NearestNeighbourClassification()
    7481      : base() {
     82      Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    7583      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
    7684      Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     
    8391      if (!Parameters.ContainsKey(WeightsParameterName)) {
    8492        Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     93      }
     94      if (!Parameters.ContainsKey(SelfMatchParameterName)) {
     95        Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    8596      }
    8697      #endregion
     
    95106      double[] weights = null;
    96107      if (Weights != null) weights = Weights.CloneAsArray();
    97       var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights);
     108      var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch, weights);
    98109      Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution));
    99110    }
    100111
    101     public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) {
     112    public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    102113      var problemDataClone = (IClassificationProblemData)problemData.Clone();
    103       return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone);
     114      return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch, weights), problemDataClone);
    104115    }
    105116
    106     public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) {
     117    public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    107118      return new NearestNeighbourModel(problemData.Dataset,
    108119        problemData.TrainingIndices,
    109120        k,
     121        selfMatch,
    110122        problemData.TargetVariable,
    111123        problemData.AllowedInputVariables,
  • branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassificationSolution.cs

    r14185 r16538  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
  • branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r14843 r16538  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    3737
    3838    private readonly object kdTreeLockObject = new object();
     39
    3940    private alglib.nearestneighbor.kdtree kdTree;
    4041    public alglib.nearestneighbor.kdtree KDTree {
     
    4950    }
    5051
    51 
    5252    public override IEnumerable<string> VariablesUsedForPrediction {
    5353      get { return allowedInputVariables; }
     
    6060    [Storable]
    6161    private int k;
     62    [Storable(DefaultValue = false)]
     63    private bool selfMatch;
    6264    [Storable(DefaultValue = null)]
    6365    private double[] weights; // not set for old versions loaded from disk
     
    9799      kdTree.x = (double[])original.kdTree.x.Clone();
    98100      kdTree.xy = (double[,])original.kdTree.xy.Clone();
    99 
     101      selfMatch = original.selfMatch;
    100102      k = original.k;
    101103      isCompatibilityLoaded = original.IsCompatibilityLoaded;
     
    110112        this.classValues = (double[])original.classValues.Clone();
    111113    }
    112     public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
     114    public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
    113115      : base(targetVariable) {
    114116      Name = ItemName;
    115117      Description = ItemDescription;
     118      this.selfMatch = selfMatch;
    116119      this.k = k;
    117120      this.allowedInputVariables = allowedInputVariables.ToArray();
     
    130133          // automatic determination of weights (all features should have variance = 1)
    131134          this.weights = this.allowedInputVariables
    132             .Select(name => 1.0 / dataset.GetDoubleValues(name, rows).StandardDeviationPop())
     135            .Select(name => {
     136              var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop();
     137              return pop.IsAlmost(0) ? 1.0 : 1.0 / pop;
     138            })
    133139            .Concat(new double[] { 1.0 }) // no scaling for target variable
    134140            .ToArray();
     
    142148      }
    143149
    144       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     150      if (inputMatrix.ContainsNanOrInfinity())
    145151        throw new NotSupportedException(
    146152          "Nearest neighbour model does not support NaN or infinity values in the input dataset.");
     
    198204        int numNeighbours;
    199205        lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure
    200           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     206          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    201207          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    202208          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
    203209        }
    204 
     210        if (selfMatch) {
     211          // weights for neighbours are 1/d.
     212          // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor.
     213          // if all k neighbours are selfmatches then they all have weight 0.01.
     214          double minDist = dists[0] + 1;
     215          for (int i = 0; i < numNeighbours; i++) {
     216            if ((minDist > dists[i]) && (dists[i] != 0)) {
     217              minDist = dists[i];
     218            }
     219          }
     220          minDist /= 100.0;
     221          for (int i = 0; i < numNeighbours; i++) {
     222            if (dists[i] == 0) {
     223              dists[i] = minDist;
     224            }
     225          }
     226        }
    205227        double distanceWeightedValue = 0.0;
    206228        double distsSum = 0.0;
     
    235257        lock (kdTreeLockObject) {
    236258          // gkronber: the following calls change the kdTree data structure
    237           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     259          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    238260          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    239261          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
     
    259281
    260282
     283    public bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage) {
     284      return RegressionModel.IsProblemDataCompatible(this, problemData, out errorMessage);
     285    }
     286
     287    public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
     288      if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
     289
     290      var regressionProblemData = problemData as IRegressionProblemData;
     291      if (regressionProblemData != null)
     292        return IsProblemDataCompatible(regressionProblemData, out errorMessage);
     293
     294      var classificationProblemData = problemData as IClassificationProblemData;
     295      if (classificationProblemData != null)
     296        return IsProblemDataCompatible(classificationProblemData, out errorMessage);
     297
     298      throw new ArgumentException("The problem data is not a regression nor a classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
     299    }
     300
    261301    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
    262302      return new NearestNeighbourRegressionSolution(this, new RegressionProblemData(problemData));
  • branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs

    r14523 r16538  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    4141    private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution";
    4242    private const string WeightsParameterName = "Weights";
     43    private const string SelfMatchParameterName = "SelfMatch";
    4344
    4445    #region parameter properties
     
    4647      get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }
    4748    }
    48 
     49    public IFixedValueParameter<BoolValue> SelfMatchParameter {
     50      get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }
     51    }
    4952    public IValueParameter<DoubleArray> WeightsParameter {
    5053      get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; }
     
    5962      }
    6063    }
    61 
     64    public bool SelfMatch {
     65      get { return SelfMatchParameter.Value.Value; }
     66      set { SelfMatchParameter.Value.Value = value; }
     67    }
    6268    public DoubleArray Weights {
    6369      get { return WeightsParameter.Value; }
     
    7581      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
    7682      Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     83      Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    7784      Problem = new RegressionProblem();
    7885    }
     
    8491      if (!Parameters.ContainsKey(WeightsParameterName)) {
    8592        Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     93      }
     94      if (!Parameters.ContainsKey(SelfMatchParameterName)) {
     95        Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    8696      }
    8797      #endregion
     
    96106      double[] weights = null;
    97107      if (Weights != null) weights = Weights.CloneAsArray();
    98       var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights);
     108      var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, SelfMatch, weights);
    99109      Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution));
    100110    }
    101111
    102     public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) {
     112    public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    103113      var clonedProblemData = (IRegressionProblemData)problemData.Clone();
    104       return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData);
     114      return new NearestNeighbourRegressionSolution(Train(problemData, k, selfMatch, weights), clonedProblemData);
    105115    }
    106116
    107     public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) {
     117    public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    108118      return new NearestNeighbourModel(problemData.Dataset,
    109119        problemData.TrainingIndices,
    110120        k,
     121        selfMatch,
    111122        problemData.TargetVariable,
    112123        problemData.AllowedInputVariables,
  • branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegressionSolution.cs

    r14185 r16538  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
Note: See TracChangeset for help on using the changeset viewer.