[9270]  1  #region License Information


 2  /* HeuristicLab


[14185]  3  * Copyright (C) 20022016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


[9270]  4  *


 5  * This file is part of HeuristicLab.


 6  *


 7  * HeuristicLab is free software: you can redistribute it and/or modify


 8  * it under the terms of the GNU General Public License as published by


 9  * the Free Software Foundation, either version 3 of the License, or


 10  * (at your option) any later version.


 11  *


 12  * HeuristicLab is distributed in the hope that it will be useful,


 13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


 14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


 15  * GNU General Public License for more details.


 16  *


 17  * You should have received a copy of the GNU General Public License


 18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


 19  */


 20  #endregion


 21 


 22  using System;


 23  using System.Collections.Generic;


 24  using System.Linq;


 25  using HeuristicLab.Common;


 26  using HeuristicLab.Core;


 27  using HeuristicLab.Data;


 28  using HeuristicLab.Encodings.RealVectorEncoding;


 29  using HeuristicLab.Operators;


[11970]  30  using HeuristicLab.Optimization;


[9270]  31  using HeuristicLab.Parameters;


 32  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


 33  using HeuristicLab.Problems.DataAnalysis;


 34 


 35  namespace HeuristicLab.Algorithms.DataAnalysis {


 36  [Item("NcaGradientCalculator", "Calculates the quality and gradient of a certain NCA matrix.")]


 37  [StorableClass]


[11970]  38  public class NcaGradientCalculator : SingleSuccessorOperator, ISingleObjectiveOperator {


[9270]  39 


 40  #region Parameter Properties


 41  public ILookupParameter<IntValue> DimensionsParameter {


 42  get { return (ILookupParameter<IntValue>)Parameters["Dimensions"]; }


 43  }


 44 


 45  public ILookupParameter<IntValue> NeighborSamplesParameter {


 46  get { return (ILookupParameter<IntValue>)Parameters["NeighborSamples"]; }


 47  }


 48 


 49  public ILookupParameter<DoubleValue> RegularizationParameter {


 50  get { return (ILookupParameter<DoubleValue>)Parameters["Regularization"]; }


 51  }


 52 


 53  public ILookupParameter<RealVector> NcaMatrixParameter {


 54  get { return (ILookupParameter<RealVector>)Parameters["NcaMatrix"]; }


 55  }


 56 


 57  public ILookupParameter<RealVector> NcaMatrixGradientsParameter {


 58  get { return (ILookupParameter<RealVector>)Parameters["NcaMatrixGradients"]; }


 59  }


 60 


 61  public ILookupParameter<DoubleValue> QualityParameter {


 62  get { return (ILookupParameter<DoubleValue>)Parameters["Quality"]; }


 63  }


 64 


 65  public ILookupParameter<IClassificationProblemData> ProblemDataParameter {


 66  get { return (ILookupParameter<IClassificationProblemData>)Parameters["ProblemData"]; }


 67  }


 68  #endregion


 69 


 70  [StorableConstructor]


 71  protected NcaGradientCalculator(bool deserializing) : base(deserializing) { }


 72  protected NcaGradientCalculator(NcaGradientCalculator original, Cloner cloner) : base(original, cloner) { }


 73  public NcaGradientCalculator()


 74  : base() {


 75  Parameters.Add(new LookupParameter<IntValue>("Dimensions", "The dimensions to which the feature space should be reduced to."));


 76  Parameters.Add(new LookupParameter<IntValue>("NeighborSamples", "The number of neighbors that should be taken into account at maximum."));


 77  Parameters.Add(new LookupParameter<DoubleValue>("Regularization", "The regularization term that constrains the expansion of the projected space."));


 78  Parameters.Add(new LookupParameter<RealVector>("NcaMatrix", "The optimized matrix."));


 79  Parameters.Add(new LookupParameter<RealVector>("NcaMatrixGradients", "The gradients from the matrix that is being optimized."));


 80  Parameters.Add(new LookupParameter<DoubleValue>("Quality", "The quality of the current matrix."));


 81  Parameters.Add(new LookupParameter<IClassificationProblemData>("ProblemData", "The classification problem data."));


 82  }


 83 


 84  public override IDeepCloneable Clone(Cloner cloner) {


 85  return new NcaGradientCalculator(this, cloner);


 86  }


 87 


 88  public override IOperation Apply() {


 89  var problemData = ProblemDataParameter.ActualValue;


 90  var dimensions = DimensionsParameter.ActualValue.Value;


 91  var neighborSamples = NeighborSamplesParameter.ActualValue.Value;


 92  var regularization = RegularizationParameter.ActualValue.Value;


 93 


 94  var vector = NcaMatrixParameter.ActualValue;


 95  var gradients = NcaMatrixGradientsParameter.ActualValue;


 96  if (gradients == null) {


 97  gradients = new RealVector(vector.Length);


 98  NcaMatrixGradientsParameter.ActualValue = gradients;


 99  }


 100 


[9272]  101  var data = AlglibUtil.PrepareInputMatrix(problemData.Dataset, problemData.AllowedInputVariables,


 102  problemData.TrainingIndices);


[9270]  103  var classes = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();


 104 


 105  var quality = Gradient(vector, gradients, data, classes, dimensions, neighborSamples, regularization);


 106  QualityParameter.ActualValue = new DoubleValue(quality);


 107 


 108  return base.Apply();


 109  }


 110 


 111  private static double Gradient(RealVector A, RealVector grad, double[,] data, double[] classes, int dimensions, int neighborSamples, double regularization) {


 112  var instances = data.GetLength(0);


 113  var attributes = data.GetLength(1);


 114 


 115  var AMatrix = new Matrix(A, A.Length / dimensions, dimensions);


 116 


 117  alglib.sparsematrix probabilities;


 118  alglib.sparsecreate(instances, instances, out probabilities);


 119  var transformedDistances = new Dictionary<int, double>(instances);


 120  for (int i = 0; i < instances; i++) {


 121  var iVector = new Matrix(GetRow(data, i), data.GetLength(1));


 122  for (int k = 0; k < instances; k++) {


 123  if (k == i) {


 124  transformedDistances.Remove(k);


 125  continue;


 126  }


 127  var kVector = new Matrix(GetRow(data, k));


 128  transformedDistances[k] = Math.Exp(iVector.Multiply(AMatrix).Subtract(kVector.Multiply(AMatrix)).SumOfSquares());


 129  }


 130  var normalization = transformedDistances.Sum(x => x.Value);


 131  if (normalization <= 0) continue;


 132  foreach (var s in transformedDistances.Where(x => x.Value > 0).OrderByDescending(x => x.Value).Take(neighborSamples)) {


 133  alglib.sparseset(probabilities, i, s.Key, s.Value / normalization);


 134  }


 135  }


 136  alglib.sparseconverttocrs(probabilities); // needed to enumerate in order (topdown and leftright)


 137 


 138  int t0 = 0, t1 = 0, r, c;


 139  double val;


 140  var pi = new double[instances];


 141  while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {


 142  if (classes[r].IsAlmost(classes[c])) {


 143  pi[r] += val;


 144  }


 145  }


 146 


 147  var innerSum = new double[attributes, attributes];


 148  while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {


 149  var vector = new Matrix(GetRow(data, r)).Subtract(new Matrix(GetRow(data, c)));


 150  vector.OuterProduct(vector).Multiply(val * pi[r]).AddTo(innerSum);


 151 


 152  if (classes[r].IsAlmost(classes[c])) {


 153  vector.OuterProduct(vector).Multiply(val).AddTo(innerSum);


 154  }


 155  }


 156 


 157  var func = pi.Sum() + regularization * AMatrix.SumOfSquares();


 158 


 159  r = 0;


 160  var newGrad = AMatrix.Multiply(2.0).Transpose().Multiply(new Matrix(innerSum)).Transpose();


 161  foreach (var g in newGrad) {


 162  grad[r] = g + regularization * 2 * A[r];


 163  r++;


 164  }


 165 


 166  return func;


 167  }


 168 


 169  private static IEnumerable<double> GetRow(double[,] data, int row) {


 170  for (int i = 0; i < data.GetLength(1); i++)


 171  yield return data[row, i];


 172  }


 173  }


 174  } 
