Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/07/12 00:42:33 (12 years ago)
Author:
abeham
Message:

#1913:

  • Worked on NCA
  • Added scatter plot view for the model to show training data when it is reduced to two dimensions

It works, but I don't think it works correctly yet. I have randomized the initial matrix, because the starting point influences the achievable quality quite a bit.

Location:
branches/NCA
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/NCA

    • Property svn:ignore set to
      *.suo
  • branches/NCA/HeuristicLab.Algorithms.NCA/3.3

    • Property svn:ignore set to
      bin
      obj
      *.user
  • branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NeighborhoodComponentsAnalysis.cs

    r8412 r8420  
    3434      var matrix = new double[attributes * reduceDimensions];
    3535      // TODO: make some more clever initialization of matrix
     36      var rand = new Random();
     37      var counter = 0;
    3638      for (var i = 0; i < attributes; i++)
    3739        for (var j = 0; j < reduceDimensions; j++)
    38           matrix[j * attributes + i] = 1;
     40          matrix[counter++] = rand.NextDouble();
    3941
    4042      alglib.mincgstate state;
     
    4850
    4951      var transformationMatrix = new double[attributes, reduceDimensions];
     52      counter = 0;
    5053      for (var i = 0; i < attributes; i++)
    5154        for (var j = 0; j < reduceDimensions; j++)
    52           transformationMatrix[i, j] = matrix[j * attributes + i];
     55          transformationMatrix[i, j] = matrix[counter++];
    5356
    5457      var transformedTrainingset = new double[instances, reduceDimensions];
    5558      var rowCount = 0;
    56       for (var r = data.TrainingPartition.Start; r < data.TrainingPartition.End; r++) {
    57         if (r >= data.TestPartition.Start && r < data.TestPartition.End) continue;
     59      foreach (var r in data.TrainingIndices) {
    5860        var j = 0;
    5961        foreach (var v in data.AllowedInputVariables) {
     
    7779      var info = (OptimizationInfo)obj;
    7880      var instances = info.ProblemData.TrainingIndices.ToArray();
     81      var AMatrix = new Matrix(A, A.Length / info.ReduceDimensions, info.ReduceDimensions);
    7982      var distances = new double[instances.Length, instances.Length];
     83
    8084      for (int i = 0; i < instances.Length - 1; i++) {
     85        var iVector = new Matrix(GetRow(info.ProblemData, instances[i]));
    8186        for (int j = i + 1; j < instances.Length; j++) {
    82           distances[i, j] = VectorLength(VectorSubtract(
    83             Ax(A, GetRow(info.ProblemData, instances[i])),
    84             Ax(A, GetRow(info.ProblemData, instances[j]))));
     87          var jVector = new Matrix(GetRow(info.ProblemData, instances[j]));
     88          distances[i, j] = iVector.Multiply(AMatrix).Subtract(jVector.Multiply(AMatrix)).QuadraticSum();
    8589          distances[j, i] = distances[i, j];
    8690        }
    8791      }
    88       var probabilities = new double[instances.Length, instances.Length];
    89       for (int i = 0; i < instances.Length - 1; i++) {
     92      alglib.sparsematrix probabilities;
     93      alglib.sparsecreate(instances.Length, instances.Length, out probabilities);
     94      for (int i = 0; i < instances.Length; i++) {
    9095        var denom = 0.0;
    9196        for (int k = 0; k < instances.Length; k++) {
     
    9398          denom += Math.Exp(-(distances[i, k] * distances[i, k]));
    9499        }
    95         for (int j = i + 1; j < instances.Length; j++) {
    96           probabilities[i, j] = Math.Exp(-(distances[i, j] * distances[i, j])) / denom;
    97         }
    98       }
    99 
    100       var target = info.ProblemData.TargetVariable;
    101       var ds = info.ProblemData.Dataset;
    102       var pi = new double[instances.Length];
    103       func = 0.0;
    104       for (int i = 0; i < instances.Length; i++) {
    105         var classI = ds.GetDoubleValue(target, instances[i]);
    106         pi[i] = 0.0;
    107         for (int j = 0; j < instances.Length; j++) {
    108           if (i == j) continue;
    109           var classJ = ds.GetDoubleValue(target, instances[j]);
    110           if (classI.IsAlmost(classJ)) {
    111             func += probabilities[i, j];
    112             pi[i] += probabilities[i, j];
     100        if (denom > 0) {
     101          for (int j = 0; j < instances.Length; j++) {
     102            if (i == j) continue;
     103            var v = Math.Exp(-(distances[i, j] * distances[i, j])) / denom;
     104            alglib.sparseset(probabilities, i, j, v);
    113105          }
    114106        }
    115107      }
     108      alglib.sparseconverttocrs(probabilities); // needed to enumerate in order
    116109
    117       var sum = GetSum(probabilities, pi, info.ProblemData, instances);
    118       for (int i = 0; i < A.Length; i++) {
    119         grad[i] = 2.0 * A[i] * sum;
     110      int t0 = 0, t1 = 0, r, c;
     111      double val;
     112      var classes = info.ProblemData.Dataset.GetDoubleValues(info.ProblemData.TargetVariable, instances).ToArray();
     113      var pi = new double[instances.Length];
     114      while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {
     115        if (classes[r].IsAlmost(classes[c]))
     116          pi[r] += val;
    120117      }
     118      func = pi.Sum();
     119
     120      grad = AMatrix.Multiply(2.0).Transpose().Multiply(CalculateInnerSum(probabilities, pi, info.ProblemData, instances, classes)).Transpose().ToArray();
    121121    }
    122122
    123     private static double[,] GetSum(double[,] p, double[] pi, IClassificationProblemData data, int[] instances) {
     123    private static Matrix CalculateInnerSum(alglib.sparsematrix p, double[] pi, IClassificationProblemData data, int[] instances, double[] classes) {
     124      var attributes = data.AllowedInputVariables.Count();
    124125      var target = data.TargetVariable;
    125       var sum = new double[pi.Length, pi.Length];
    126       for (int i = 0; i < pi.Length; i++) {
    127         var classI = data.Dataset.GetDoubleValue(target, instances[i]);
    128         var sumK = 0.0;
    129         for (int k = 0; k < pi.Length; k++) {
    130           if (i == k) continue;
    131           sumK += p[i, k] * MatrixMultiply(VectorSubtract(GetRow(data, i), GetRow(data, k)), VectorSubtract(GetRow(data, i), GetRow(data, k)));
    132         }
    133         var sumJ = 0.0;
    134         for (int j = 0; j < pi.Length; j++) {
    135           if (i == j) continue;
    136           var classJ = data.Dataset.GetDoubleValue(target, instances[j]);
    137           if (classI.IsAlmost(classJ)) sumJ += p[i, j] * ScalarProduct(VectorSubtract(GetRow(data, i), GetRow(data, j)), VectorSubtract(GetRow(data, i), GetRow(data, j)));
    138         }
    139         sum += pi[i] * sumK - sumJ;
    140       }
    141       return sum;
    142     }
     126      int t0 = 0, t1 = 0, r, c;
     127      double v;
     128      var result = new double[attributes, attributes];
     129      while (alglib.sparseenumerate(p, ref t0, ref t1, out r, out c, out v)) {
     130        var vector = new Matrix(GetRow(data, instances[r])).Subtract(new Matrix(GetRow(data, instances[c]))).Apply();
     131        vector.OuterProduct(vector).Multiply(v * pi[r]).AddTo(result);
    143132
    144     private static double[,] MatrixMultiply(IEnumerable<double> vec1, IEnumerable<double> vec2) {
    145       var enumVec1 = vec1.GetEnumerator();
    146       var enumVec2 = vec2.GetEnumerator();
    147       bool f1, f2;
    148       while (true) {
    149         f1 = enumVec1.MoveNext();
    150         f2 = enumVec2.MoveNext();
    151         if (!f1 && f2 || f1 && !f2) throw new ArgumentException("vectors are of unequal length.");
    152         if (!f1 && !f2) yield break;
    153         yield return enumVec1.Current - enumVec2.Current;
    154       }
    155     }
    156 
    157     private static IEnumerable<double> Ax(IEnumerable<double> A, IEnumerable<double> x) {
    158       var enumA = A.GetEnumerator();
    159       var enumX = x.GetEnumerator();
    160       var sum = 0.0;
    161       while (true) {
    162         if (!enumA.MoveNext()) {
    163           yield return sum;
    164           yield break;
    165         } else if (!enumX.MoveNext()) {
    166           yield return sum;
    167           enumX = x.GetEnumerator();
    168           sum = 0.0;
    169         } else {
    170           sum += enumA.Current * enumX.Current;
     133        if (classes[r].IsAlmost(classes[c])) {
     134          vector.OuterProduct(vector).Multiply(-v).AddTo(result);
    171135        }
    172136      }
     137      return new Matrix(result);
    173138    }
    174139
    175140    private static IEnumerable<double> GetRow(IClassificationProblemData data, int row) {
    176141      return data.AllowedInputVariables.Select(v => data.Dataset.GetDoubleValue(v, row));
    177     }
    178 
    179     private static IEnumerable<double> VectorSubtract(IEnumerable<double> vec1, IEnumerable<double> vec2) {
    180       var enumVec1 = vec1.GetEnumerator();
    181       var enumVec2 = vec2.GetEnumerator();
    182       bool f1, f2;
    183       while (true) {
    184         f1 = enumVec1.MoveNext();
    185         f2 = enumVec2.MoveNext();
    186         if (!f1 && f2 || f1 && !f2) throw new ArgumentException("vectors are of unequal length.");
    187         if (!f1 && !f2) yield break;
    188         yield return enumVec1.Current - enumVec2.Current;
    189       }
    190     }
    191 
    192     private static double ScalarProduct(IEnumerable<double> vec1, IEnumerable<double> vec2) {
    193       var enumVec1 = vec1.GetEnumerator();
    194       var enumVec2 = vec2.GetEnumerator();
    195       bool f1, f2;
    196       double product = 0.0;
    197       while (true) {
    198         f1 = enumVec1.MoveNext();
    199         f2 = enumVec2.MoveNext();
    200         if (!f1 && f2 || f1 && !f2) throw new ArgumentException("vectors are of unequal length.");
    201         if (!f1 && !f2) return product;
    202         product += enumVec1.Current * enumVec2.Current;
    203       }
    204     }
    205 
    206     private static double VectorLength(IEnumerable<double> vector) {
    207       return Math.Sqrt(vector.Sum(x => x * x));
    208142    }
    209143
Note: See TracChangeset for help on using the changeset viewer.