[5723] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
[14185] | 3 | * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
[5723] | 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
| 23 | using HeuristicLab.Data;
|
---|
| 24 |
|
---|
| 25 | namespace HeuristicLab.Analysis {
|
---|
| 26 | public static class MultidimensionalScaling {
|
---|
| 27 |
|
---|
| 28 | /// <summary>
|
---|
| 29 | /// Performs the Kruskal-Shepard algorithm and applies a gradient descent method
|
---|
| 30 | /// to fit the coordinates such that the difference between the fit distances
|
---|
[5931] | 31 | /// and the dissimilarities becomes minimal.
|
---|
[5723] | 32 | /// </summary>
|
---|
[5871] | 33 | /// <remarks>
|
---|
| 34 | /// It will initialize the coordinates in a deterministic fashion such that all initial points are equally spaced on a circle.
|
---|
| 35 | /// </remarks>
|
---|
[5931] | 36 | /// <param name="dissimilarities">A symmetric NxN matrix that specifies the dissimilarities between each element i and j. Diagonal elements are ignored.</param>
|
---|
[5871] | 37 | ///
|
---|
[5723] | 38 | /// <returns>A Nx2 matrix where the first column represents the x- and the second column the y coordinates.</returns>
|
---|
[5871] | 39 | public static DoubleMatrix KruskalShepard(DoubleMatrix dissimilarities) {
|
---|
| 40 | if (dissimilarities == null) throw new ArgumentNullException("dissimilarities");
|
---|
| 41 | if (dissimilarities.Rows != dissimilarities.Columns) throw new ArgumentException("Dissimilarities must be a square matrix.", "dissimilarities");
|
---|
[5723] | 42 |
|
---|
[5871] | 43 | int dimension = dissimilarities.Rows;
|
---|
[5723] | 44 | if (dimension == 1) return new DoubleMatrix(new double[,] { { 0, 0 } });
|
---|
[5931] | 45 | else if (dimension == 2) return new DoubleMatrix(new double[,] { { 0, 0 }, { 0, dissimilarities[0, 1] } });
|
---|
[5723] | 46 |
|
---|
| 47 | DoubleMatrix coordinates = new DoubleMatrix(dimension, 2);
|
---|
| 48 | double rad = (2 * Math.PI) / coordinates.Rows;
|
---|
| 49 | for (int i = 0; i < dimension; i++) {
|
---|
| 50 | coordinates[i, 0] = 10 * Math.Cos(rad * i);
|
---|
| 51 | coordinates[i, 1] = 10 * Math.Sin(rad * i);
|
---|
| 52 | }
|
---|
| 53 |
|
---|
[5871] | 54 | return KruskalShepard(dissimilarities, coordinates);
|
---|
[5723] | 55 | }
|
---|
| 56 |
|
---|
[5871] | 57 | /// <summary>
|
---|
| 58 | /// Performs the Kruskal-Shepard algorithm and applies a gradient descent method
|
---|
| 59 | /// to fit the coordinates such that the difference between the fit distances
|
---|
[5931] | 60 | /// and the dissimilarities is minimal.
|
---|
[5871] | 61 | /// </summary>
|
---|
| 62 | /// <remarks>
|
---|
| 63 | /// It will use a pre-initialized x,y-coordinates matrix as a starting point of the gradient descent.
|
---|
| 64 | /// </remarks>
|
---|
[5931] | 65 | /// <param name="dissimilarities">A symmetric NxN matrix that specifies the dissimilarities between each element i and j. Diagonal elements are ignored.</param>
|
---|
[5871] | 66 | /// <param name="coordinates">The Nx2 matrix of initial coordinates.</param>
|
---|
[5931] | 67 | /// <param name="maximumIterations">The number of iterations for which the algorithm should run.
|
---|
| 68 | /// In every iteration it tries to find the best location for every item.</param>
|
---|
[5871] | 69 | /// <returns>A Nx2 matrix where the first column represents the x- and the second column the y coordinates.</returns>
|
---|
[6342] | 70 | public static DoubleMatrix KruskalShepard(DoubleMatrix dissimilarities, DoubleMatrix coordinates, int maximumIterations = 10) {
|
---|
[5871] | 71 | int dimension = dissimilarities.Rows;
|
---|
| 72 | if (dimension != dissimilarities.Columns || coordinates.Rows != dimension) throw new ArgumentException("The number of coordinates and the number of rows and columns in the dissimilarities matrix do not match.");
|
---|
[5723] | 73 |
|
---|
| 74 | double epsg = 1e-7;
|
---|
| 75 | double epsf = 0;
|
---|
| 76 | double epsx = 0;
|
---|
[6342] | 77 | int maxits = 0;
|
---|
[5723] | 78 |
|
---|
[6342] | 79 | alglib.minlmstate state;
|
---|
| 80 | alglib.minlmreport rep;
|
---|
[5931] | 81 | for (int iterations = 0; iterations < maximumIterations; iterations++) {
|
---|
| 82 | bool changed = false;
|
---|
[5723] | 83 | for (int i = 0; i < dimension; i++) {
|
---|
| 84 | double[] c = new double[] { coordinates[i, 0], coordinates[i, 1] };
|
---|
| 85 |
|
---|
[5855] | 86 | try {
|
---|
[6342] | 87 | alglib.minlmcreatevj(dimension - 1, c, out state);
|
---|
| 88 | alglib.minlmsetcond(state, epsg, epsf, epsx, maxits);
|
---|
| 89 | alglib.minlmoptimize(state, StressFitness, StressJacobian, null, new Info(coordinates, dissimilarities, i));
|
---|
| 90 | alglib.minlmresults(state, out c, out rep);
|
---|
[5871] | 91 | } catch (alglib.alglibexception) { }
|
---|
[5855] | 92 | if (!double.IsNaN(c[0]) && !double.IsNaN(c[1])) {
|
---|
[5931] | 93 | changed = changed || (coordinates[i, 0] != c[0]) || (coordinates[i, 1] != c[1]);
|
---|
[5855] | 94 | coordinates[i, 0] = c[0];
|
---|
| 95 | coordinates[i, 1] = c[1];
|
---|
[5723] | 96 | }
|
---|
| 97 | }
|
---|
[5931] | 98 | if (!changed) break;
|
---|
[5723] | 99 | }
|
---|
| 100 | return coordinates;
|
---|
| 101 | }
|
---|
| 102 |
|
---|
[6342] | 103 | private static void StressFitness(double[] x, double[] fi, object obj) {
|
---|
[5723] | 104 | Info info = (obj as Info);
|
---|
[7416] | 105 | int idx = 0;
|
---|
[5723] | 106 | for (int i = 0; i < info.Coordinates.Rows; i++) {
|
---|
[7416] | 107 | if (i == info.Row) continue;
|
---|
| 108 | if (!double.IsNaN(info.Dissimilarities[info.Row, i]))
|
---|
| 109 | fi[idx++] = Stress(x, info.Dissimilarities[info.Row, i], info.Coordinates[i, 0], info.Coordinates[i, 1]);
|
---|
| 110 | else fi[idx++] = 0.0;
|
---|
[6342] | 111 | }
|
---|
| 112 | }
|
---|
| 113 |
|
---|
| 114 | private static void StressJacobian(double[] x, double[] fi, double[,] jac, object obj) {
|
---|
| 115 | Info info = (obj as Info);
|
---|
| 116 | int idx = 0;
|
---|
| 117 | for (int i = 0; i < info.Coordinates.Rows; i++) {
|
---|
[7416] | 118 | if (i == info.Row) continue;
|
---|
| 119 | double c = info.Dissimilarities[info.Row, i];
|
---|
| 120 | double a = info.Coordinates[i, 0];
|
---|
| 121 | double b = info.Coordinates[i, 1];
|
---|
| 122 | if (!double.IsNaN(c)) {
|
---|
| 123 | fi[idx] = Stress(x, c, a, b); ;
|
---|
[6342] | 124 | jac[idx, 0] = 2 * (x[0] - a) * (Math.Sqrt((a - x[0]) * (a - x[0]) + (b - x[1]) * (b - x[1])) - c) / Math.Sqrt((a - x[0]) * (a - x[0]) + (b - x[1]) * (b - x[1]));
|
---|
| 125 | jac[idx, 1] = 2 * (x[1] - b) * (Math.Sqrt((a - x[0]) * (a - x[0]) + (b - x[1]) * (b - x[1])) - c) / Math.Sqrt((a - x[0]) * (a - x[0]) + (b - x[1]) * (b - x[1]));
|
---|
[7416] | 126 | } else {
|
---|
| 127 | fi[idx] = jac[idx, 0] = jac[idx, 1] = 0;
|
---|
[5723] | 128 | }
|
---|
[7416] | 129 | idx++;
|
---|
[5723] | 130 | }
|
---|
| 131 | }
|
---|
| 132 |
|
---|
| 133 | private static double Stress(double[] x, double distance, double xCoord, double yCoord) {
|
---|
| 134 | return Stress(x[0], x[1], distance, xCoord, yCoord);
|
---|
| 135 | }
|
---|
| 136 |
|
---|
| 137 | private static double Stress(double x, double y, double distance, double otherX, double otherY) {
|
---|
| 138 | double d = Math.Sqrt((x - otherX) * (x - otherX)
|
---|
| 139 | + (y - otherY) * (y - otherY));
|
---|
| 140 | return (d - distance) * (d - distance);
|
---|
| 141 | }
|
---|
| 142 |
|
---|
[5871] | 143 | /// <summary>
|
---|
| 144 | /// This method computes the normalized raw-stress value according to Groenen and van de Velden 2004. "Multidimensional Scaling". Technical report EI 2004-15.
|
---|
| 145 | /// </summary>
|
---|
| 146 | /// <remarks>
|
---|
| 147 | /// Throws an ArgumentException when the <paramref name="dissimilarities"/> matrix is not symmetric.
|
---|
| 148 | /// </remarks>
|
---|
| 149 | ///
|
---|
| 150 | /// <param name="dissimilarities">The matrix with the dissimilarities.</param>
|
---|
| 151 | /// <param name="coordinates">The actual location of the points.</param>
|
---|
| 152 | /// <returns>The normalized raw-stress value that describes the goodness-of-fit between the distances in the points and the size of the dissimilarities. If the value is < 0.1 the fit is generally considered good. If between 0.1 and 0.2 it is considered acceptable, but the usefulness of the scaling with higher values is doubtful.</returns>
|
---|
| 153 | public static double CalculateNormalizedStress(DoubleMatrix dissimilarities, DoubleMatrix coordinates) {
|
---|
| 154 | int dimension = dissimilarities.Rows;
|
---|
| 155 | if (dimension != dissimilarities.Columns || dimension != coordinates.Rows) throw new ArgumentException("The number of coordinates and the number of rows and columns in the dissimilarities matrix do not match.");
|
---|
[5855] | 156 | double stress = 0, normalization = 0;
|
---|
[5723] | 157 | for (int i = 0; i < dimension - 1; i++) {
|
---|
| 158 | for (int j = i + 1; j < dimension; j++) {
|
---|
[7416] | 159 | if (dissimilarities[i, j] != dissimilarities[j, i] && !(double.IsNaN(dissimilarities[i, j]) && double.IsNaN(dissimilarities[j, i])))
|
---|
| 160 | throw new ArgumentException("Dissimilarities is not a symmetric matrix.", "dissimilarities");
|
---|
| 161 | if (!double.IsNaN(dissimilarities[i, j])) {
|
---|
[5871] | 162 | stress += Stress(coordinates[i, 0], coordinates[i, 1], dissimilarities[i, j], coordinates[j, 0], coordinates[j, 1]);
|
---|
| 163 | normalization += (dissimilarities[i, j] * dissimilarities[i, j]);
|
---|
[5723] | 164 | }
|
---|
| 165 | }
|
---|
| 166 | }
|
---|
[5871] | 167 | return stress / normalization;
|
---|
[5723] | 168 | }
|
---|
| 169 |
|
---|
| 170 | private class Info {
|
---|
| 171 | public DoubleMatrix Coordinates { get; set; }
|
---|
[5931] | 172 | public DoubleMatrix Dissimilarities { get; set; }
|
---|
[5723] | 173 | public int Row { get; set; }
|
---|
| 174 |
|
---|
| 175 | public Info(DoubleMatrix c, DoubleMatrix d, int r) {
|
---|
| 176 | Coordinates = c;
|
---|
[5931] | 177 | Dissimilarities = d;
|
---|
[5723] | 178 | Row = r;
|
---|
| 179 | }
|
---|
| 180 | }
|
---|
| 181 | }
|
---|
| 182 | } |
---|