Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
12/19/18 14:56:54 (6 years ago)
Author:
msemenki
Message:

#2942: Add for KNN-Regression/Classification ability to utilize data points with zero distance to the query point. Alteration in the way weights are assigned to neighboring points (to except division-by-zero).

Location:
branches/2942_KNNRegressionClassification
Files:
1 added
1 edited
1 copied

Legend:

Unmodified
Added
Removed
  • branches/2942_KNNRegressionClassification/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r16243 r16408  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    3737
    3838    private readonly object kdTreeLockObject = new object();
     39
    3940    private alglib.nearestneighbor.kdtree kdTree;
    4041    public alglib.nearestneighbor.kdtree KDTree {
     
    6061    [Storable]
    6162    private int k;
     63    [Storable]
     64    private bool selfMatch;
    6265    [Storable(DefaultValue = null)]
    6366    private double[] weights; // not set for old versions loaded from disk
     
    97100      kdTree.x = (double[])original.kdTree.x.Clone();
    98101      kdTree.xy = (double[,])original.kdTree.xy.Clone();
    99 
     102      selfMatch = original.selfMatch;
    100103      k = original.k;
    101104      isCompatibilityLoaded = original.IsCompatibilityLoaded;
     
    110113        this.classValues = (double[])original.classValues.Clone();
    111114    }
    112     public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
     115    public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
    113116      : base(targetVariable) {
    114117      Name = ItemName;
    115118      Description = ItemDescription;
     119      this.selfMatch = selfMatch;
    116120      this.k = k;
    117121      this.allowedInputVariables = allowedInputVariables.ToArray();
     
    132136            .Select(name => {
    133137              var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop();
    134               return  pop.IsAlmost(0) ? 1.0 : 1.0/pop;
     138              return pop.IsAlmost(0) ? 1.0 : 1.0 / pop;
    135139            })
    136140            .Concat(new double[] { 1.0 }) // no scaling for target variable
     
    201205        int numNeighbours;
    202206        lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure
    203           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     207          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    204208          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    205209          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
    206210        }
    207 
     211        if (selfMatch) {
     212          double minDist = dists[0] + 1;
     213          for (int i = 0; i < numNeighbours; i++) {
     214            if ((minDist > dists[i]) && (dists[i] != 0)) {
     215              minDist = dists[i];
     216            }
     217          }
     218          minDist /= 100.0;
     219          for (int i = 0; i < numNeighbours; i++) {
     220            if (dists[i] == 0) {
     221              dists[i] = minDist;
     222            }
     223          }
     224        }
    208225        double distanceWeightedValue = 0.0;
    209226        double distsSum = 0.0;
     
    238255        lock (kdTreeLockObject) {
    239256          // gkronber: the following calls change the kdTree data structure
    240           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     257          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    241258          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    242259          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
Note: See TracChangeset for help on using the changeset viewer.