Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/10/19 15:40:44 (5 years ago)
Author:
gkronber
Message:

#2520: merged r16491:16528 from trunk to persistence branch

Location:
branches/2520_PersistenceReintegration
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2520_PersistenceReintegration

  • branches/2520_PersistenceReintegration/HeuristicLab.Algorithms.DataAnalysis

  • branches/2520_PersistenceReintegration/HeuristicLab.Algorithms.DataAnalysis/3.4

  • branches/2520_PersistenceReintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r16462 r16529  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    3737
    3838    private readonly object kdTreeLockObject = new object();
     39
    3940    private alglib.nearestneighbor.kdtree kdTree;
    4041    public alglib.nearestneighbor.kdtree KDTree {
     
    4950    }
    5051
    51 
    5252    public override IEnumerable<string> VariablesUsedForPrediction {
    5353      get { return allowedInputVariables; }
     
    6060    [Storable]
    6161    private int k;
     62    [Storable(DefaultValue = false)]
     63    private bool selfMatch;
    6264    [Storable(DefaultValue = null)]
    6365    private double[] weights; // not set for old versions loaded from disk
     
    9597      kdTree.x = (double[])original.kdTree.x.Clone();
    9698      kdTree.xy = (double[,])original.kdTree.xy.Clone();
    97 
     99      selfMatch = original.selfMatch;
    98100      k = original.k;
    99101      isCompatibilityLoaded = original.IsCompatibilityLoaded;
     
    108110        this.classValues = (double[])original.classValues.Clone();
    109111    }
    110     public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
     112    public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
    111113      : base(targetVariable) {
    112114      Name = ItemName;
    113115      Description = ItemDescription;
     116      this.selfMatch = selfMatch;
    114117      this.k = k;
    115118      this.allowedInputVariables = allowedInputVariables.ToArray();
     
    130133            .Select(name => {
    131134              var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop();
    132               return  pop.IsAlmost(0) ? 1.0 : 1.0/pop;
     135              return pop.IsAlmost(0) ? 1.0 : 1.0 / pop;
    133136            })
    134137            .Concat(new double[] { 1.0 }) // no scaling for target variable
     
    199202        int numNeighbours;
    200203        lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure
    201           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     204          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    202205          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    203206          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
    204207        }
    205 
     208        if (selfMatch) {
     209          // weights for neighbours are 1/d.
     210          // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor.
     211          // if all k neighbours are selfmatches then they all have weight 0.01.
     212          double minDist = dists[0] + 1;
     213          for (int i = 0; i < numNeighbours; i++) {
     214            if ((minDist > dists[i]) && (dists[i] != 0)) {
     215              minDist = dists[i];
     216            }
     217          }
     218          minDist /= 100.0;
     219          for (int i = 0; i < numNeighbours; i++) {
     220            if (dists[i] == 0) {
     221              dists[i] = minDist;
     222            }
     223          }
     224        }
    206225        double distanceWeightedValue = 0.0;
    207226        double distsSum = 0.0;
     
    236255        lock (kdTreeLockObject) {
    237256          // gkronber: the following calls change the kdTree data structure
    238           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     257          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    239258          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    240259          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
Note: See TracChangeset for help on using the changeset viewer.