Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/23/19 20:56:15 (5 years ago)
Author:
gkronber
Message:

#2942: merged r16491 from trunk to stable

Location:
stable
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • stable

  • stable/HeuristicLab.Algorithms.DataAnalysis

  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4

  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs

    r17097 r17164  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    4242    private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution";
    4343    private const string WeightsParameterName = "Weights";
    44 
     44    private const string SelfMatchParameterName = "SelfMatch";
    4545
    4646    #region parameter properties
    4747    public IFixedValueParameter<IntValue> KParameter {
    4848      get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }
     49    }
     50    public IFixedValueParameter<BoolValue> SelfMatchParameter {
     51      get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }
    4952    }
    5053    public IValueParameter<DoubleArray> WeightsParameter {
     
    5356    #endregion
    5457    #region properties
     58    public bool SelfMatch {
     59      get { return SelfMatchParameter.Value.Value; }
     60      set { SelfMatchParameter.Value.Value = value; }
     61    }
    5562    public int K {
    5663      get { return KParameter.Value.Value; }
     
    7380    public NearestNeighbourClassification()
    7481      : base() {
     82      Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    7583      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
    7684      Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     
    8391      if (!Parameters.ContainsKey(WeightsParameterName)) {
    8492        Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     93      }
     94      if (!Parameters.ContainsKey(SelfMatchParameterName)) {
     95        Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    8596      }
    8697      #endregion
     
    95106      double[] weights = null;
    96107      if (Weights != null) weights = Weights.CloneAsArray();
    97       var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights);
     108      var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch, weights);
    98109      Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution));
    99110    }
    100111
    101     public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) {
     112    public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    102113      var problemDataClone = (IClassificationProblemData)problemData.Clone();
    103       return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone);
     114      return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch, weights), problemDataClone);
    104115    }
    105116
    106     public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) {
     117    public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    107118      return new NearestNeighbourModel(problemData.Dataset,
    108119        problemData.TrainingIndices,
    109120        k,
     121        selfMatch,
    110122        problemData.TargetVariable,
    111123        problemData.AllowedInputVariables,
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r17097 r17164  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    3737
    3838    private readonly object kdTreeLockObject = new object();
     39
    3940    private alglib.nearestneighbor.kdtree kdTree;
    4041    public alglib.nearestneighbor.kdtree KDTree {
     
    4950    }
    5051
    51 
    5252    public override IEnumerable<string> VariablesUsedForPrediction {
    5353      get { return allowedInputVariables; }
     
    6060    [Storable]
    6161    private int k;
     62    [Storable(DefaultValue = false)]
     63    private bool selfMatch;
    6264    [Storable(DefaultValue = null)]
    6365    private double[] weights; // not set for old versions loaded from disk
     
    9597      kdTree.x = (double[])original.kdTree.x.Clone();
    9698      kdTree.xy = (double[,])original.kdTree.xy.Clone();
    97 
     99      selfMatch = original.selfMatch;
    98100      k = original.k;
    99101      isCompatibilityLoaded = original.IsCompatibilityLoaded;
     
    108110        this.classValues = (double[])original.classValues.Clone();
    109111    }
    110     public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
     112    public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
    111113      : base(targetVariable) {
    112114      Name = ItemName;
    113115      Description = ItemDescription;
     116      this.selfMatch = selfMatch;
    114117      this.k = k;
    115118      this.allowedInputVariables = allowedInputVariables.ToArray();
     
    130133            .Select(name => {
    131134              var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop();
    132               return  pop.IsAlmost(0) ? 1.0 : 1.0/pop;
     135              return pop.IsAlmost(0) ? 1.0 : 1.0 / pop;
    133136            })
    134137            .Concat(new double[] { 1.0 }) // no scaling for target variable
     
    199202        int numNeighbours;
    200203        lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure
    201           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     204          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    202205          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    203206          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
    204207        }
    205 
     208        if (selfMatch) {
     209          // weights for neighbours are 1/d.
     210          // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor.
     211          // if all k neighbours are selfmatches then they all have weight 0.01.
     212          double minDist = dists[0] + 1;
     213          for (int i = 0; i < numNeighbours; i++) {
     214            if ((minDist > dists[i]) && (dists[i] != 0)) {
     215              minDist = dists[i];
     216            }
     217          }
     218          minDist /= 100.0;
     219          for (int i = 0; i < numNeighbours; i++) {
     220            if (dists[i] == 0) {
     221              dists[i] = minDist;
     222            }
     223          }
     224        }
    206225        double distanceWeightedValue = 0.0;
    207226        double distsSum = 0.0;
     
    236255        lock (kdTreeLockObject) {
    237256          // gkronber: the following calls change the kdTree data structure
    238           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
     257          numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    239258          alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    240259          alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs

    r17097 r17164  
    4141    private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution";
    4242    private const string WeightsParameterName = "Weights";
     43    private const string SelfMatchParameterName = "SelfMatch";
    4344
    4445    #region parameter properties
     
    4647      get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }
    4748    }
    48 
     49    public IFixedValueParameter<BoolValue> SelfMatchParameter {
     50      get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }
     51    }
    4952    public IValueParameter<DoubleArray> WeightsParameter {
    5053      get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; }
     
    5962      }
    6063    }
    61 
     64    public bool SelfMatch {
     65      get { return SelfMatchParameter.Value.Value; }
     66      set { SelfMatchParameter.Value.Value = value; }
     67    }
    6268    public DoubleArray Weights {
    6369      get { return WeightsParameter.Value; }
     
    7581      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
    7682      Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     83      Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    7784      Problem = new RegressionProblem();
    7885    }
     
    8491      if (!Parameters.ContainsKey(WeightsParameterName)) {
    8592        Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     93      }
     94      if (!Parameters.ContainsKey(SelfMatchParameterName)) {
     95        Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    8696      }
    8797      #endregion
     
    96106      double[] weights = null;
    97107      if (Weights != null) weights = Weights.CloneAsArray();
    98       var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights);
     108      var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, SelfMatch, weights);
    99109      Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution));
    100110    }
    101111
    102     public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) {
     112    public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    103113      var clonedProblemData = (IRegressionProblemData)problemData.Clone();
    104       return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData);
     114      return new NearestNeighbourRegressionSolution(Train(problemData, k, selfMatch, weights), clonedProblemData);
    105115    }
    106116
    107     public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) {
     117    public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
    108118      return new NearestNeighbourModel(problemData.Dataset,
    109119        problemData.TrainingIndices,
    110120        k,
     121        selfMatch,
    111122        problemData.TargetVariable,
    112123        problemData.AllowedInputVariables,
Note: See TracChangeset for help on using the changeset viewer.