Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/03/19 18:11:05 (6 years ago)
Author:
gkronber
Message:

#2942: merged changes from r16408, r16488, r16490 from branch to trunk (manually)

Location:
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs

    r15583 r16491  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    4242    private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution";
    4343    private const string WeightsParameterName = "Weights";
    44 
     44    private const string SelfMatchParameterName = "SelfMatch";
    4545
    4646    #region parameter properties
    4747    public IFixedValueParameter<IntValue> KParameter {
    4848      get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }
     49    }
     50    public IFixedValueParameter<BoolValue> SelfMatchParameter {
     51      get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }
    4952    }
    5053    public IValueParameter<DoubleArray> WeightsParameter {
     
    5356    #endregion
    5457    #region properties
     58    public bool SelfMatch {
     59      get { return SelfMatchParameter.Value.Value; }
     60      set { SelfMatchParameter.Value.Value = value; }
     61    }
    5562    public int K {
    5663      get { return KParameter.Value.Value; }
     
    7380    public NearestNeighbourClassification()
    7481      : base() {
     82      Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    7583      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
    7684      Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     
    8391      if (!Parameters.ContainsKey(WeightsParameterName)) {
    8492        Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     93      }
     94      if (!Parameters.ContainsKey(SelfMatchParameterName)) {
     95        Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    8596      }
    8697      #endregion
     
    95106      double[] weights = null;
    96107      if (Weights != null) weights = Weights.CloneAsArray();
    97       var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights);
     108      var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch, weights);
    98109      Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution));
    99110    }
    100111
    101     public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) {
     112    public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    102113      var problemDataClone = (IClassificationProblemData)problemData.Clone();
    103       return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone);
     114      return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch, weights), problemDataClone);
    104115    }
    105116
    106     public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) {
     117    public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    107118      return new NearestNeighbourModel(problemData.Dataset,
    108119        problemData.TrainingIndices,
    109120        k,
     121        selfMatch,
    110122        problemData.TargetVariable,
    111123        problemData.AllowedInputVariables,
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r16243 r16491  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    3737
    3838    private readonly object kdTreeLockObject = new object();
     39
    3940    private alglib.nearestneighbor.kdtree kdTree;
    4041    public alglib.nearestneighbor.kdtree KDTree {
     
    4950    }
    5051
    51 
    5252    public override IEnumerable<string> VariablesUsedForPrediction {
    5353      get { return allowedInputVariables; }
     
    6060    [Storable]
    6161    private int k;
     62    [Storable(DefaultValue = false)]
     63    private bool selfMatch;
    6264    [Storable(DefaultValue = null)]
    6365    private double[] weights; // not set for old versions loaded from disk
     
    9799      kdTree.x = (double[])original.kdTree.x.Clone();
    98100      kdTree.xy = (double[,])original.kdTree.xy.Clone();
    99 
     101      selfMatch = original.selfMatch;
    100102      k = original.k;
    101103      isCompatibilityLoaded = original.IsCompatibilityLoaded;
     
    110112        this.classValues = (double[])original.classValues.Clone();
    111113    }
    112     public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
     114    public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
    113115      : base(targetVariable) {
    114116      Name = ItemName;
    115117      Description = ItemDescription;
     118      this.selfMatch = selfMatch;
    116119      this.k = k;
    117120      this.allowedInputVariables = allowedInputVariables.ToArray();
     
    132135            .Select(name => {
    133136              var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop();
    134               return  pop.IsAlmost(0) ? 1.0 : 1.0/pop;
     137              return pop.IsAlmost(0) ? 1.0 : 1.0 / pop;
    135138            })
    136139            .Concat(new double[] { 1.0 }) // no scaling for target variable
     
    201204        int numNeighbours;
    202205        lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure
    203           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     206          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    204207          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    205208          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
    206209        }
    207 
     210        if (selfMatch) {
     211          // weights for neighbours are 1/d.
     212          // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor.
     213          // if all k neighbours are selfmatches then they all have weight 0.01.
     214          double minDist = dists[0] + 1;
     215          for (int i = 0; i < numNeighbours; i++) {
     216            if ((minDist > dists[i]) && (dists[i] != 0)) {
     217              minDist = dists[i];
     218            }
     219          }
     220          minDist /= 100.0;
     221          for (int i = 0; i < numNeighbours; i++) {
     222            if (dists[i] == 0) {
     223              dists[i] = minDist;
     224            }
     225          }
     226        }
    208227        double distanceWeightedValue = 0.0;
    209228        double distsSum = 0.0;
     
    238257        lock (kdTreeLockObject) {
    239258          // gkronber: the following calls change the kdTree data structure
    240           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     259          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    241260          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    242261          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs

    r15583 r16491  
    4141    private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution";
    4242    private const string WeightsParameterName = "Weights";
     43    private const string SelfMatchParameterName = "SelfMatch";
    4344
    4445    #region parameter properties
     
    4647      get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }
    4748    }
    48 
     49    public IFixedValueParameter<BoolValue> SelfMatchParameter {
     50      get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }
     51    }
    4952    public IValueParameter<DoubleArray> WeightsParameter {
    5053      get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; }
     
    5962      }
    6063    }
    61 
     64    public bool SelfMatch {
     65      get { return SelfMatchParameter.Value.Value; }
     66      set { SelfMatchParameter.Value.Value = value; }
     67    }
    6268    public DoubleArray Weights {
    6369      get { return WeightsParameter.Value; }
     
    7581      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
    7682      Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     83      Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    7784      Problem = new RegressionProblem();
    7885    }
     
    8491      if (!Parameters.ContainsKey(WeightsParameterName)) {
    8592        Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     93      }
     94      if (!Parameters.ContainsKey(SelfMatchParameterName)) {
     95        Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    8696      }
    8797      #endregion
     
    96106      double[] weights = null;
    97107      if (Weights != null) weights = Weights.CloneAsArray();
    98       var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights);
     108      var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, SelfMatch, weights);
    99109      Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution));
    100110    }
    101111
    102     public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) {
     112    public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    103113      var clonedProblemData = (IRegressionProblemData)problemData.Clone();
    104       return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData);
     114      return new NearestNeighbourRegressionSolution(Train(problemData, k, selfMatch, weights), clonedProblemData);
    105115    }
    106116
    107     public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) {
     117    public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    108118      return new NearestNeighbourModel(problemData.Dataset,
    109119        problemData.TrainingIndices,
    110120        k,
     121        selfMatch,
    111122        problemData.TargetVariable,
    112123        problemData.AllowedInputVariables,
Note: See TracChangeset for help on using the changeset viewer.