Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Analysis/3.3/MultidimensionalScaling/MultidimensionalScaling.cs @ 6219

Last change on this file since 6219 was 5933, checked in by abeham, 14 years ago

#1330

  • Merged QAP from branch into trunk
  • Merged MDS from branch into trunk
  • Merged Swap2 moves from branch into trunk
File size: 8.7 KB
RevLine 
[5723]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using HeuristicLab.Data;
24
25namespace HeuristicLab.Analysis {
26  public static class MultidimensionalScaling {
27
28    /// <summary>
29    /// Performs the Kruskal-Shepard algorithm and applies a gradient descent method
30    /// to fit the coordinates such that the difference between the fit distances
[5931]31    /// and the dissimilarities becomes minimal.
[5723]32    /// </summary>
[5871]33    /// <remarks>
34    /// It will initialize the coordinates in a deterministic fashion such that all initial points are equally spaced on a circle.
35    /// </remarks>
[5931]36    /// <param name="dissimilarities">A symmetric NxN matrix that specifies the dissimilarities between each element i and j. Diagonal elements are ignored.</param>
[5871]37    ///
[5723]38    /// <returns>A Nx2 matrix where the first column represents the x- and the second column the y coordinates.</returns>
[5871]39    public static DoubleMatrix KruskalShepard(DoubleMatrix dissimilarities) {
40      if (dissimilarities == null) throw new ArgumentNullException("dissimilarities");
41      if (dissimilarities.Rows != dissimilarities.Columns) throw new ArgumentException("Dissimilarities must be a square matrix.", "dissimilarities");
[5723]42
[5871]43      int dimension = dissimilarities.Rows;
[5723]44      if (dimension == 1) return new DoubleMatrix(new double[,] { { 0, 0 } });
[5931]45      else if (dimension == 2) return new DoubleMatrix(new double[,] { { 0, 0 }, { 0, dissimilarities[0, 1] } });
[5723]46
47      DoubleMatrix coordinates = new DoubleMatrix(dimension, 2);
48      double rad = (2 * Math.PI) / coordinates.Rows;
49      for (int i = 0; i < dimension; i++) {
50        coordinates[i, 0] = 10 * Math.Cos(rad * i);
51        coordinates[i, 1] = 10 * Math.Sin(rad * i);
52      }
53
[5871]54      return KruskalShepard(dissimilarities, coordinates);
[5723]55    }
56
[5871]57    /// <summary>
58    /// Performs the Kruskal-Shepard algorithm and applies a gradient descent method
59    /// to fit the coordinates such that the difference between the fit distances
[5931]60    /// and the dissimilarities is minimal.
[5871]61    /// </summary>
62    /// <remarks>
63    /// It will use a pre-initialized x,y-coordinates matrix as a starting point of the gradient descent.
64    /// </remarks>
[5931]65    /// <param name="dissimilarities">A symmetric NxN matrix that specifies the dissimilarities between each element i and j. Diagonal elements are ignored.</param>
[5871]66    /// <param name="coordinates">The Nx2 matrix of initial coordinates.</param>
[5931]67    /// <param name="maximumIterations">The number of iterations for which the algorithm should run.
68    /// In every iteration it tries to find the best location for every item.</param>
[5871]69    /// <returns>A Nx2 matrix where the first column represents the x- and the second column the y coordinates.</returns>
[5932]70    public static DoubleMatrix KruskalShepard(DoubleMatrix dissimilarities, DoubleMatrix coordinates, int maximumIterations = 20) {
[5871]71      int dimension = dissimilarities.Rows;
72      if (dimension != dissimilarities.Columns || coordinates.Rows != dimension) throw new ArgumentException("The number of coordinates and the number of rows and columns in the dissimilarities matrix do not match.");
[5723]73
74      double epsg = 1e-7;
75      double epsf = 0;
76      double epsx = 0;
[5855]77      int maxits = 100;
[5723]78      alglib.mincgstate state = null;
79      alglib.mincgreport rep;
80
[5931]81      for (int iterations = 0; iterations < maximumIterations; iterations++) {
82        bool changed = false;
[5723]83        for (int i = 0; i < dimension; i++) {
84          double[] c = new double[] { coordinates[i, 0], coordinates[i, 1] };
85
[5855]86          try {
87            if ((iterations == 0 && i == 0)) {
88              alglib.mincgcreate(c, out state);
89              alglib.mincgsetcond(state, epsg, epsf, epsx, maxits);
90            } else {
91              alglib.mincgrestartfrom(state, c);
92            }
[5871]93            alglib.mincgoptimize(state, StressGradient, null, new Info(coordinates, dissimilarities, i));
[5855]94            alglib.mincgresults(state, out c, out rep);
[5871]95          } catch (alglib.alglibexception) { }
[5855]96          if (!double.IsNaN(c[0]) && !double.IsNaN(c[1])) {
[5931]97            changed = changed || (coordinates[i, 0] != c[0]) || (coordinates[i, 1] != c[1]);
[5855]98            coordinates[i, 0] = c[0];
99            coordinates[i, 1] = c[1];
[5723]100          }
101        }
[5931]102        if (!changed) break;
[5723]103      }
104      return coordinates;
105    }
106
[5871]107    // computes the function and the gradient of the raw stress function.
[5723]108    private static void StressGradient(double[] x, ref double func, double[] grad, object obj) {
109      func = 0; grad[0] = 0; grad[1] = 0;
110      Info info = (obj as Info);
111      for (int i = 0; i < info.Coordinates.Rows; i++) {
[5931]112        double c = info.Dissimilarities[info.Row, i];
[5723]113        if (i != info.Row) {
114          double a = info.Coordinates[i, 0];
115          double b = info.Coordinates[i, 1];
116          func += Stress(x, c, a, b);
117          grad[0] += ((2 * x[0] - 2 * a) * Math.Sqrt(x[1] * x[1] - 2 * b * x[1] + x[0] * x[0] - 2 * a * x[0] + b * b + a * a) - 2 * c * x[0] + 2 * a * c) / Math.Sqrt(x[1] * x[1] - 2 * b * x[1] + x[0] * x[0] - 2 * a * x[0] + b * b + a * a);
118          grad[1] += ((2 * x[1] - 2 * b) * Math.Sqrt(x[1] * x[1] - 2 * b * x[1] + x[0] * x[0] - 2 * a * x[0] + b * b + a * a) - 2 * c * x[1] + 2 * b * c) / Math.Sqrt(x[1] * x[1] - 2 * b * x[1] + x[0] * x[0] - 2 * a * x[0] + b * b + a * a);
119        }
120      }
121    }
122
123    private static double Stress(double[] x, double distance, double xCoord, double yCoord) {
124      return Stress(x[0], x[1], distance, xCoord, yCoord);
125    }
126
127    private static double Stress(double x, double y, double distance, double otherX, double otherY) {
128      double d = Math.Sqrt((x - otherX) * (x - otherX)
129                         + (y - otherY) * (y - otherY));
130      return (d - distance) * (d - distance);
131    }
132
[5871]133    /// <summary>
134    /// This method computes the normalized raw-stress value according to Groenen and van de Velden 2004. "Multidimensional Scaling". Technical report EI 2004-15.
135    /// </summary>
136    /// <remarks>
137    /// Throws an ArgumentException when the <paramref name="dissimilarities"/> matrix is not symmetric.
138    /// </remarks>
139    ///
140    /// <param name="dissimilarities">The matrix with the dissimilarities.</param>
141    /// <param name="coordinates">The actual location of the points.</param>
142    /// <returns>The normalized raw-stress value that describes the goodness-of-fit between the distances in the points and the size of the dissimilarities. If the value is &lt; 0.1 the fit is generally considered good. If between 0.1 and 0.2 it is considered acceptable, but the usefulness of the scaling with higher values is doubtful.</returns>
143    public static double CalculateNormalizedStress(DoubleMatrix dissimilarities, DoubleMatrix coordinates) {
144      int dimension = dissimilarities.Rows;
145      if (dimension != dissimilarities.Columns || dimension != coordinates.Rows) throw new ArgumentException("The number of coordinates and the number of rows and columns in the dissimilarities matrix do not match.");
[5855]146      double stress = 0, normalization = 0;
[5723]147      for (int i = 0; i < dimension - 1; i++) {
148        for (int j = i + 1; j < dimension; j++) {
[5931]149          if (dissimilarities[i, j] != dissimilarities[j, i]) throw new ArgumentException("Dissimilarities is not a symmetric matrix.", "dissimilarities");
[5871]150          if (dissimilarities[i, j] != 0) {
151            stress += Stress(coordinates[i, 0], coordinates[i, 1], dissimilarities[i, j], coordinates[j, 0], coordinates[j, 1]);
152            normalization += (dissimilarities[i, j] * dissimilarities[i, j]);
[5723]153          }
154        }
155      }
[5871]156      return stress / normalization;
[5723]157    }
158
159    private class Info {
160      public DoubleMatrix Coordinates { get; set; }
[5931]161      public DoubleMatrix Dissimilarities { get; set; }
[5723]162      public int Row { get; set; }
163
164      public Info(DoubleMatrix c, DoubleMatrix d, int r) {
165        Coordinates = c;
[5931]166        Dissimilarities = d;
[5723]167        Row = r;
168      }
169    }
170  }
171}
Note: See TracBrowser for help on using the repository browser.