1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Linq;


24  using System.Threading;


25  using HeuristicLab.Common;


26  using HeuristicLab.Core;


27  using HeuristicLab.Data;


28  using HeuristicLab.Optimization;


29  using HeuristicLab.Parameters;


30  using HEAL.Attic;


31  using HeuristicLab.Problems.DataAnalysis;


32 


33  namespace HeuristicLab.Algorithms.DataAnalysis {


34  /// <summary>


35  /// Nearest neighbour classification data analysis algorithm.


36  /// </summary>


37  [Item("Nearest Neighbour Classification (kNN)", "Nearest neighbour classification data analysis algorithm (wrapper for ALGLIB).")]


38  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 150)]


39  [StorableType("98161D6FD97745EAB899E47EE017865E")]


40  public sealed class NearestNeighbourClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> {


41  private const string KParameterName = "K";


42  private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution";


43  private const string WeightsParameterName = "Weights";


44  private const string SelfMatchParameterName = "SelfMatch";


45 


46  #region parameter properties


47  public IFixedValueParameter<IntValue> KParameter {


48  get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }


49  }


50  public IFixedValueParameter<BoolValue> SelfMatchParameter {


51  get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }


52  }


53  public IValueParameter<DoubleArray> WeightsParameter {


54  get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; }


55  }


56  #endregion


57  #region properties


58  public bool SelfMatch {


59  get { return SelfMatchParameter.Value.Value; }


60  set { SelfMatchParameter.Value.Value = value; }


61  }


62  public int K {


63  get { return KParameter.Value.Value; }


64  set {


65  if (value <= 0) throw new ArgumentException("K must be larger than zero.", "K");


66  else KParameter.Value.Value = value;


67  }


68  }


69  public DoubleArray Weights {


70  get { return WeightsParameter.Value; }


71  set { WeightsParameter.Value = value; }


72  }


73  #endregion


74 


75  [StorableConstructor]


76  private NearestNeighbourClassification(StorableConstructorFlag _) : base(_) { }


77  private NearestNeighbourClassification(NearestNeighbourClassification original, Cloner cloner)


78  : base(original, cloner) {


79  }


80  public NearestNeighbourClassification()


81  : base() {


82  Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));


83  Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));


84  Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));


85  Problem = new ClassificationProblem();


86  }


87  [StorableHook(HookType.AfterDeserialization)]


88  private void AfterDeserialization() {


89  // BackwardsCompatibility3.3


90  #region Backwards compatible code, remove with 3.4


91  if (!Parameters.ContainsKey(WeightsParameterName)) {


92  Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));


93  }


94  if (!Parameters.ContainsKey(SelfMatchParameterName)) {


95  Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));


96  }


97  #endregion


98  }


99 


100  public override IDeepCloneable Clone(Cloner cloner) {


101  return new NearestNeighbourClassification(this, cloner);


102  }


103 


104  #region nearest neighbour


105  protected override void Run(CancellationToken cancellationToken) {


106  double[] weights = null;


107  if (Weights != null) weights = Weights.CloneAsArray();


108  var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch, weights);


109  Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution));


110  }


111 


112  public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {


113  var problemDataClone = (IClassificationProblemData)problemData.Clone();


114  return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch, weights), problemDataClone);


115  }


116 


117  public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {


118  return new NearestNeighbourModel(problemData.Dataset,


119  problemData.TrainingIndices,


120  k,


121  selfMatch,


122  problemData.TargetVariable,


123  problemData.AllowedInputVariables,


124  weights,


125  problemData.ClassValues.ToArray());


126  }


127  #endregion


128  }


129  }

