Changeset 15207 for trunk/sources/HeuristicLab.Algorithms.DataAnalysis
- Timestamp:
- 07/12/17 15:17:41 (7 years ago)
- Location:
- trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE
- Files:
-
- 1 added
- 1 deleted
- 8 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/CosineDistance.cs
r15206 r15207 34 34 /// </summary> 35 35 [StorableClass] 36 [Item(" InnerProductDistance", "The angluar distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")]37 public class InnerProductDistance : DistanceBase<IEnumerable<double>> {36 [Item("CosineDistance", "The angluar distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")] 37 public class CosineDistance : DistanceBase<IEnumerable<double>> { 38 38 39 #region HLConstructors 39 #region HLConstructors & Cloning 40 40 [StorableConstructor] 41 protected InnerProductDistance(bool deserializing) : base(deserializing) { }42 protected InnerProductDistance(InnerProductDistance original, Cloner cloner)41 protected CosineDistance(bool deserializing) : base(deserializing) { } 42 protected CosineDistance(CosineDistance original, Cloner cloner) 43 43 : base(original, cloner) { } 44 public InnerProductDistance() { }44 public CosineDistance() { } 45 45 public override IDeepCloneable Clone(Cloner cloner) { 46 return new InnerProductDistance(this, cloner);46 return new CosineDistance(this, cloner); 47 47 } 48 48 #endregion 49 49 50 50 #region statics 51 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) { 52 var xs = point1.GetEnumerator(); 53 var ys = point2.GetEnumerator(); 54 var sum = 0.0; 55 while(xs.MoveNext() & ys.MoveNext()) { 56 if(xs.Current < 0 || ys.Current < 0) throw new ArgumentException("Inner product distance is only defined for vectors with non-negative elements"); 57 sum += xs.Current * ys.Current; 51 public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) { 52 if (point1.Count != point2.Count) throw new ArgumentException("Cosine distance not defined on vectors of different length"); 53 var innerprod = 0.0; 54 var length1 = 0.0; 55 var length2 = 0.0; 56 57 for (var i = 0; i < point1.Count; i++) { 58 double d1 = point1[i], d2 = point2[i]; 59 innerprod += d1 * d2; 60 length1 += d1 * d1; 61 length2 += d2 * d2; 58 62 } 59 if(xs.MoveNext() || ys.MoveNext()) throw new ArgumentException("Enumerables contain a different number of elements"); 60 return sum; 63 var l = Math.Sqrt(length1 * length2); 64 if (l.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0"); 65 return 1 - innerprod / l; 61 66 } 62 67 #endregion 63 68 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 64 return GetDistance(a , b);69 return GetDistance(a.ToArray(), b.ToArray()); 65 70 } 66 71 } -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs
r14911 r15207 30 30 public abstract class DistanceBase<T> : Item, IDistance<T> { 31 31 32 #region HLConstructors & Boilerplate32 #region HLConstructors & Cloning 33 33 [StorableConstructor] 34 34 protected DistanceBase(bool deserializing) : base(deserializing) { } … … 42 42 return new DistanceComparer(item, this); 43 43 } 44 45 44 46 45 public double Get(object x, object y) { -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclideanDistance.cs
r14784 r15207 32 32 public class EuclideanDistance : DistanceBase<IEnumerable<double>> { 33 33 34 #region HLConstructors & Boilerplate34 #region HLConstructors & Cloning 35 35 [StorableConstructor] 36 36 protected EuclideanDistance(bool deserializing) : base(deserializing) { } … … 40 40 #endregion 41 41 42 public static double GetDistance(double[] point1, double[] point2) { 43 if (point1.Length != point2.Length) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); 44 return Math.Sqrt(point1.Zip(point2, (a1, b1) => (a1 - b1) * (a1 - b1)).Sum()); 42 public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) { 43 if (point1.Count != point2.Count) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); 44 var sum = 0.0; 45 for (var i = 0; i < point1.Count; i++) { 46 var d = point1[i] - point2[i]; 47 sum += d * d; 48 } 49 50 return Math.Sqrt(sum); 45 51 } 46 52 -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/ManhattanDistance.cs
r14911 r15207 32 32 public class ManhattanDistance : DistanceBase<IEnumerable<double>> { 33 33 34 #region HLConstructors & Boilerplate34 #region HLConstructors & Cloning 35 35 [StorableConstructor] 36 36 protected ManhattanDistance(bool deserializing) : base(deserializing) { } … … 47 47 public static double GetDistance(double[] point1, double[] point2) { 48 48 if (point1.Length != point2.Length) throw new ArgumentException("Manhattan distance not defined on vectors of different length"); 49 return point1.Zip(point2, (a1, b1) => Math.Abs(a1 - b1)).Sum(); 49 var sum = 0.0; 50 for (var i = 0; i < point1.Length; i++) 51 sum += Math.Abs(point1[i] + point2[i]); 52 return sum; 50 53 } 51 54 -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/PriorityQueue.cs
r14785 r15207 25 25 26 26 namespace HeuristicLab.Algorithms.DataAnalysis { 27 // TODO: move to HeuristicLab.Collections28 27 29 28 // Code Taken from branch: RoutePlanning, Heap4 … … 63 62 64 63 public KeyValuePair<TK, TV> PeekMin() { 65 if(size == 0) { 66 throw new InvalidOperationException("Heap is empty"); 67 } 64 if (size == 0) throw new InvalidOperationException("Heap is empty"); 68 65 return new KeyValuePair<TK, TV>(data[1].Key, data[1].Value); 69 66 } … … 86 83 size = sz - 1; 87 84 finalLayerDist++; 88 if (finalLayerDist == finalLayerSize) {85 if (finalLayerDist == finalLayerSize) { 89 86 finalLayerSize >>= 2; 90 87 finalLayerDist = 0; 91 88 } 92 89 93 while (succ < sz) {90 while (succ < sz) { 94 91 var minKey = data[succ].Key; 95 92 var delta = 0; 96 93 97 var otherKey = data[succ + 1].Key; 98 if(otherKey.CompareTo(minKey) < 0) { 94 for (var i = 1; i <= 3; i++) { 95 var otherKey = data[succ + i].Key; 96 if (otherKey.CompareTo(minKey) >= 0) continue; 99 97 minKey = otherKey; 100 delta = 1;98 delta = i; 101 99 } 102 otherKey = data[succ + 2].Key; 103 if(otherKey.CompareTo(minKey) < 0) { 104 minKey = otherKey; 105 delta = 2; 106 } 107 otherKey = data[succ + 3].Key; 108 if(otherKey.CompareTo(minKey) < 0) { 109 minKey = otherKey; 110 delta = 3; 111 } 100 112 101 succ += delta; 113 102 layerPos += delta; … … 136 125 pred = pred - layerDist; // finally preds index 137 126 138 while (data[pred].Key.CompareTo(bubble) > 0) { // must terminate since inf at root127 while (data[pred].Key.CompareTo(bubble) > 0) { // must terminate since inf at root 139 128 data[hole].Key = data[pred].Key; 140 129 data[hole].Value = data[pred].Value; … … 149 138 data[hole].Key = bubble; 150 139 data[hole].Value = data[sz].Value; 151 152 140 data[sz].Key = Supremum; // mark as deleted 153 141 } … … 158 146 finalLayerDist--; 159 147 160 if (finalLayerDist == -1) { // layer full161 // start next layer148 if (finalLayerDist == -1) { // layer full 149 // start next layer 162 150 finalLayerSize <<= 2; 163 151 finalLayerDist = finalLayerSize - 1; … … 172 160 pred = pred - layerDist; // finally preds index 173 161 var predKey = data[pred].Key; 174 while (predKey.CompareTo(key) > 0) {162 while (predKey.CompareTo(key) > 0) { 175 163 data[hole].Key = predKey; 176 164 data[hole].Value = data[pred].Value; … … 195 183 var sup = Supremum; 196 184 var cap = capacity; 197 for (var i = 1; i <= cap; i++) {185 for (var i = 1; i <= cap; i++) { 198 186 data[i].Key = sup; 199 187 } -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs
r14862 r15207 56 56 using System; 57 57 using System.Collections.Generic; 58 using System.Linq;59 using HeuristicLab.Common;60 58 61 59 namespace HeuristicLab.Algorithms.DataAnalysis { … … 63 61 /// Space partitioning tree (SPTree) 64 62 /// </summary> 65 public class SpacePartitioningTree : ISpacePartitioningTree { 66 private const uint QT_NODE_CAPACITY = 1; 67 68 private double[] buff; 69 private SpacePartitioningTree parent; 63 internal class SpacePartitioningTree { 64 private const uint QtNodeCapacity = 1; 65 66 #region Fields 70 67 private int dimension; 71 68 private bool isLeaf; … … 80 77 // Indices in this space-partitioning tree node, corresponding center-of-mass, and list of all children 81 78 private double[] centerOfMass; 82 private readonly int[] index = new int[Q T_NODE_CAPACITY];79 private readonly int[] index = new int[QtNodeCapacity]; 83 80 84 81 // Children 85 82 private SpacePartitioningTree[] children; 86 83 private uint noChildren; 84 #endregion 87 85 88 86 public SpacePartitioningTree(double[,] inpData) { … … 104 102 var width = new double[d]; 105 103 for (var i = 0; i < d; i++) width[i] = Math.Max(maxY[i] - meanY[i], meanY[i] - minY[i]) + 1e-5; 106 Init( null,inpData, meanY, width);104 Init(inpData, meanY, width); 107 105 Fill(n); 108 106 } 109 107 110 public SpacePartitioningTree(double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) { 111 Init(null, inpData, impCorner, impWith); 112 } 113 public SpacePartitioningTree(SpacePartitioningTree parent, double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) { 114 Init(parent, inpData, impCorner, impWith); 115 } 116 117 public ISpacePartitioningTree GetParent() { 118 return parent; 108 private SpacePartitioningTree(double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) { 109 Init(inpData, impCorner, impWith); 119 110 } 120 111 … … 132 123 133 124 // If there is space in this quad tree and it is a leaf, add the object here 134 if (isLeaf && size < Q T_NODE_CAPACITY) {125 if (isLeaf && size < QtNodeCapacity) { 135 126 index[size] = newIndex; 136 127 size++; … … 138 129 } 139 130 140 // Don't add duplicates for now (this is not very nice)131 // Don't add duplicates 141 132 var anyDuplicate = false; 142 133 for (uint n = 0; n < size; n++) { … … 161 152 } 162 153 163 public void Subdivide() {164 // Create new children165 var newCorner = new double[dimension];166 var newWidth = new double[dimension];167 for (var i = 0; i < noChildren; i++) {168 var div = 1;169 for (var d = 0; d < dimension; d++) {170 newWidth[d] = .5 * boundary.GetWidth(d);171 if ((i / div) % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d);172 else newCorner[d] = boundary.GetCorner(d) + .5 * boundary.GetWidth(d);173 div *= 2;174 }175 children[i] = new SpacePartitioningTree(this, data, newCorner, newWidth);176 }177 178 // Move existing points to correct children179 for (var i = 0; i < size; i++) {180 var success = false;181 for (var j = 0; j < noChildren; j++) {182 if (!success) success = children[j].Insert(index[i]);183 }184 index[i] = -1; // as in tSNE implementation by van der Maaten185 }186 187 // Empty parent node188 size = 0;189 isLeaf = false;190 }191 192 public bool IsCorrect() {193 var row = new double[dimension];194 for (var n = 0; n < size; n++) {195 Buffer.BlockCopy(data, sizeof(double) * dimension * index[n], row, 0, sizeof(double) * dimension);196 if (!boundary.ContainsPoint(row)) return false;197 }198 if (isLeaf) return true;199 var correct = true;200 for (var i = 0; i < noChildren; i++) correct = correct && children[i].IsCorrect();201 return correct;202 }203 204 public void GetAllIndices(int[] indices) {205 GetAllIndices(indices, 0);206 }207 208 public int GetAllIndices(int[] indices, int loc) {209 // Gather indices in current quadrant210 for (var i = 0; i < size; i++) indices[loc + i] = index[i];211 loc += (int)size;212 // Gather indices in children213 if (isLeaf) return loc;214 for (var i = 0; i < noChildren; i++) loc = children[i].GetAllIndices(indices, loc);215 return loc;216 }217 218 public int GetDepth() {219 return isLeaf ? 1 : 1 + children.Max(x => x.GetDepth());220 }221 222 154 public void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, ref double sumQ) { 223 155 // Make sure that we spend no time on empty nodes or self-interactions … … 226 158 // Compute distance between point and center-of-mass 227 159 var D = .0; 160 var buff = new double[dimension]; 228 161 for (var d = 0; d < dimension; d++) buff[d] = data[pointIndex, d] - centerOfMass[d]; 229 162 for (var d = 0; d < dimension; d++) D += buff[d] * buff[d]; … … 233 166 for (var d = 0; d < dimension; d++) { 234 167 var curWidth = boundary.GetWidth(d); 235 maxWidth = (maxWidth > curWidth)? maxWidth : curWidth;168 maxWidth = maxWidth > curWidth ? maxWidth : curWidth; 236 169 } 237 170 if (isLeaf || maxWidth / Math.Sqrt(D) < theta) { … … 250 183 } 251 184 252 // does not use the tree 253 public void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF) { 185 public static void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF, double[,] data, int dimension) { 254 186 // Loop over all edges in the graph 255 187 for (var k = 0; k < n; k++) { … … 259 191 // uses squared distance 260 192 var d = 1.0; 193 var buff = new double[dimension]; 261 194 for (var j = 0; j < dimension; j++) buff[j] = data[k, j] - data[colP[i], j]; 262 195 for (var j = 0; j < dimension; j++) d += buff[j] * buff[j]; … … 273 206 for (var i = 0; i < n; i++) Insert(i); 274 207 } 275 private void Init(SpacePartitioningTree p, double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) { 276 parent = p;208 209 private void Init(double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) { 277 210 dimension = inpData.GetLength(1); 278 211 noChildren = 2; … … 283 216 cumulativeSize = 0; 284 217 boundary = new Cell((uint)dimension); 218 285 219 inpCorner.ForEach((i, x) => boundary.SetCorner(i, x)); 286 220 inpWidth.ForEach((i, x) => boundary.SetWidth(i, x)); … … 288 222 children = new SpacePartitioningTree[noChildren]; 289 223 centerOfMass = new double[dimension]; 290 buff = new double[dimension]; 291 224 } 225 226 private void Subdivide() { 227 // Create new children 228 var newCorner = new double[dimension]; 229 var newWidth = new double[dimension]; 230 for (var i = 0; i < noChildren; i++) { 231 var div = 1; 232 for (var d = 0; d < dimension; d++) { 233 newWidth[d] = .5 * boundary.GetWidth(d); 234 if (i / div % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d); 235 else newCorner[d] = boundary.GetCorner(d) + .5 * boundary.GetWidth(d); 236 div *= 2; 237 } 238 children[i] = new SpacePartitioningTree(data, newCorner, newWidth); 239 } 240 241 // Move existing points to correct children 242 for (var i = 0; i < size; i++) { 243 var success = false; 244 for (var j = 0; j < noChildren; j++) { 245 if (!success) success = children[j].Insert(index[i]); 246 } 247 index[i] = -1; // as in tSNE implementation by van der Maaten 248 } 249 // Empty parent node 250 size = 0; 251 isLeaf = false; 292 252 } 293 253 #endregion 294 295 254 296 255 private class Cell { -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs
r14863 r15207 41 41 /// </summary> 42 42 [Item("tSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low " + 43 "dimensional space to allow visual cluster identification. ")]43 "dimensional space to allow visual cluster identification. Implemented similar to: https://lvdmaaten.github.io/tsne/#implementations (Barnes-Hut t-SNE). Described in : https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf")] 44 44 [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)] 45 45 [StorableClass] … … 198 198 199 199 private TSNEAlgorithm(TSNEAlgorithm original, Cloner cloner) : base(original, cloner) { 200 if (original.dataRowNames!=null)201 this.dataRowNames = new Dictionary<string, List<int>>(original.dataRowNames);200 if (original.dataRowNames != null) 201 this.dataRowNames = new Dictionary<string, List<int>>(original.dataRowNames); 202 202 if (original.dataRows != null) 203 203 this.dataRows = original.dataRows.ToDictionary(kvp => kvp.Key, kvp => cloner.Clone(kvp.Value)); … … 226 226 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random.", new BoolValue(true))); 227 227 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random.", new IntValue(0))); 228 Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, " name of the column specifying the class lables of each data point. If the label column can not be found training/test is used as labels.", new StringValue("none")));228 Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "Name of the column specifying the class lables of each data point. If the label column can not be found training/test is used as labels.", new StringValue("none"))); 229 229 Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Whether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored.", new BoolValue(true))); 230 230 Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "", new IntValue(50))); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs
r14862 r15207 56 56 using System; 57 57 using System.Collections.Generic; 58 using System.Linq;59 58 using HeuristicLab.Collections; 60 59 using HeuristicLab.Common; 61 60 using HeuristicLab.Core; 62 using HeuristicLab.Optimization;63 61 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 64 62 using HeuristicLab.Random; … … 70 68 [StorableClass] 71 69 public sealed class TSNEState : DeepCloneable { 70 #region Storables 72 71 // initialized once 73 72 [Storable] … … 122 121 [Storable] 123 122 public double[,] dY; 124 123 #endregion 124 125 #region Constructors & Cloning 125 126 private TSNEState(TSNEState original, Cloner cloner) : base(original, cloner) { 126 this.distance = cloner.Clone(original.distance);127 this.random = cloner.Clone(original.random);128 this.perplexity = original.perplexity;129 this.exact = original.exact;130 this.noDatapoints = original.noDatapoints;131 this.finalMomentum = original.finalMomentum;132 this.momSwitchIter = original.momSwitchIter;133 this.stopLyingIter = original.stopLyingIter;134 th is.theta = original.theta;135 this.eta = original.eta;136 this.newDimensions = original.newDimensions;137 if (original.valP != null) {138 this.valP = new double[original.valP.Length];139 Array.Copy(original.valP, this.valP, this.valP.Length);140 } 141 if (original.rowP != null) {142 this.rowP = new int[original.rowP.Length];143 Array.Copy(original.rowP, this.rowP, this.rowP.Length);144 } 145 if (original.colP != null) {146 this.colP = new int[original.colP.Length];147 Array.Copy(original.colP, this.colP, this.colP.Length);148 } 149 if (original.p != null) {150 this.p = new double[original.p.GetLength(0), original.p.GetLength(1)];151 Array.Copy(original.p, this.p, this.p.Length);152 } 153 this.newData = new double[original.newData.GetLength(0), original.newData.GetLength(1)];154 Array.Copy(original.newData, this.newData, this.newData.Length);155 this.iter = original.iter;156 this.currentMomentum = original.currentMomentum;157 this.gains = new double[original.gains.GetLength(0), original.gains.GetLength(1)];158 Array.Copy(original.gains, this.gains, this.gains.Length);159 this.uY = new double[original.uY.GetLength(0), original.uY.GetLength(1)];160 Array.Copy(original.uY, this.uY, this.uY.Length);161 this.dY = new double[original.dY.GetLength(0), original.dY.GetLength(1)];162 Array.Copy(original.dY, this.dY, this.dY.Length);127 distance = cloner.Clone(original.distance); 128 random = cloner.Clone(original.random); 129 perplexity = original.perplexity; 130 exact = original.exact; 131 noDatapoints = original.noDatapoints; 132 finalMomentum = original.finalMomentum; 133 momSwitchIter = original.momSwitchIter; 134 stopLyingIter = original.stopLyingIter; 135 theta = original.theta; 136 eta = original.eta; 137 newDimensions = original.newDimensions; 138 if (original.valP != null) { 139 valP = new double[original.valP.Length]; 140 Array.Copy(original.valP, valP, valP.Length); 141 } 142 if (original.rowP != null) { 143 rowP = new int[original.rowP.Length]; 144 Array.Copy(original.rowP, rowP, rowP.Length); 145 } 146 if (original.colP != null) { 147 colP = new int[original.colP.Length]; 148 Array.Copy(original.colP, colP, colP.Length); 149 } 150 if (original.p != null) { 151 p = new double[original.p.GetLength(0), original.p.GetLength(1)]; 152 Array.Copy(original.p, p, p.Length); 153 } 154 newData = new double[original.newData.GetLength(0), original.newData.GetLength(1)]; 155 Array.Copy(original.newData, newData, newData.Length); 156 iter = original.iter; 157 currentMomentum = original.currentMomentum; 158 gains = new double[original.gains.GetLength(0), original.gains.GetLength(1)]; 159 Array.Copy(original.gains, gains, gains.Length); 160 uY = new double[original.uY.GetLength(0), original.uY.GetLength(1)]; 161 Array.Copy(original.uY, uY, uY.Length); 162 dY = new double[original.dY.GetLength(0), original.dY.GetLength(1)]; 163 Array.Copy(original.dY, dY, dY.Length); 163 164 } 164 165 … … 177 178 this.stopLyingIter = stopLyingIter; 178 179 this.momSwitchIter = momSwitchIter; 179 this.currentMomentum = momentum;180 currentMomentum = momentum; 180 181 this.finalMomentum = finalMomentum; 181 182 this.eta = eta; 182 183 183 184 184 // initialize 185 185 noDatapoints = data.Length; 186 if (noDatapoints - 1 < 3 * perplexity)186 if (noDatapoints - 1 < 3 * perplexity) 187 187 throw new ArgumentException("Perplexity too large for the number of data points!"); 188 188 … … 192 192 uY = new double[noDatapoints, newDimensions]; 193 193 gains = new double[noDatapoints, newDimensions]; 194 for (var i = 0; i < noDatapoints; i++)195 for (var j = 0; j < newDimensions; j++)194 for (var i = 0; i < noDatapoints; i++) 195 for (var j = 0; j < newDimensions; j++) 196 196 gains[i, j] = 1.0; 197 197 … … 206 206 207 207 // Lie about the P-values (factor is 4 in the MATLAB implementation) 208 if (exact) for(var i = 0; i < noDatapoints; i++) for(var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;209 else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;208 if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0; 209 else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0; 210 210 211 211 // Initialize solution (randomly) 212 212 var rand = new NormalDistributedRandom(random, 0, 1); 213 for (var i = 0; i < noDatapoints; i++)214 for (var j = 0; j < newDimensions; j++)213 for (var i = 0; i < noDatapoints; i++) 214 for (var j = 0; j < newDimensions; j++) 215 215 newData[i, j] = rand.NextDouble() * .0001; 216 216 } 217 #endregion 217 218 218 219 public double EvaluateError() { … … 222 223 } 223 224 225 #region Helpers 224 226 private static void CalculateApproximateSimilarities(T[] data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) { 225 227 // Compute asymmetric pairwise input similarities … … 233 235 valP = sValP; 234 236 var sumP = .0; 235 for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];236 for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;237 for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i]; 238 for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP; 237 239 } 238 240 … … 242 244 ComputeGaussianPerplexity(data, distance, p, perplexity); 243 245 // Symmetrize input similarities 244 for (var n = 0; n < data.Length; n++) {245 for (var m = n + 1; m < data.Length; m++) {246 for (var n = 0; n < data.Length; n++) { 247 for (var m = n + 1; m < data.Length; m++) { 246 248 p[n, m] += p[m, n]; 247 249 p[m, n] = p[n, m]; … … 249 251 } 250 252 var sumP = .0; 251 for (var i = 0; i < data.Length; i++) for(var j = 0; j < data.Length; j++) sumP += p[i, j];252 for (var i = 0; i < data.Length; i++) for(var j = 0; j < data.Length; j++) p[i, j] /= sumP;253 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j]; 254 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP; 253 255 return p; 254 256 } 255 257 256 258 private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) { 257 if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!");258 259 intn = x.Count;259 if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!"); 260 261 var n = x.Count; 260 262 // Allocate the memory we need 261 263 rowP = new int[n + 1]; … … 264 266 var curP = new double[n - 1]; 265 267 rowP[0] = 0; 266 for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k;268 for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k; 267 269 268 270 var objX = new List<IndexedItem<T>>(); 269 for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i]));271 for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i])); 270 272 271 273 // Build ball tree on data set … … 273 275 274 276 // Loop over all points to find nearest neighbors 275 for (var i = 0; i < n; i++) {277 for (var i = 0; i < n; i++) { 276 278 IList<IndexedItem<T>> indices; 277 279 IList<double> distances; … … 285 287 var minBeta = double.MinValue; 286 288 var maxBeta = double.MaxValue; 287 const double tol = 1e-5; 289 const double tol = 1e-5; 288 290 289 291 // Iterate until we found a good perplexity 290 292 var iter = 0; double sumP = 0; 291 while (!found && iter < 200) {293 while (!found && iter < 200) { 292 294 293 295 // Compute Gaussian kernel row 294 for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]);296 for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]); 295 297 296 298 // Compute entropy of current row 297 299 sumP = double.Epsilon; 298 for (var m = 0; m < k; m++) sumP += curP[m];300 for (var m = 0; m < k; m++) sumP += curP[m]; 299 301 var h = .0; 300 for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]);302 for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]); 301 303 h = h / sumP + Math.Log(sumP); 302 304 303 305 // Evaluate whether the entropy is within the tolerance level 304 306 var hdiff = h - Math.Log(perplexity); 305 if (hdiff < tol && -hdiff < tol) {307 if (hdiff < tol && -hdiff < tol) { 306 308 found = true; 307 309 } else { 308 if (hdiff > 0) {310 if (hdiff > 0) { 309 311 minBeta = beta; 310 if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))312 if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue)) 311 313 beta *= 2.0; 312 314 else … … 314 316 } else { 315 317 maxBeta = beta; 316 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))318 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) 317 319 beta /= 2.0; 318 320 else … … 326 328 327 329 // Row-normalize current row of P and store in matrix 328 for (var m = 0; m < k; m++) curP[m] /= sumP;329 for (var m = 0; m < k; m++) {330 for (var m = 0; m < k; m++) curP[m] /= sumP; 331 for (var m = 0; m < k; m++) { 330 332 colP[rowP[i] + m] = indices[m + 1].Index; 331 333 valP[rowP[i] + m] = curP[m]; … … 337 339 var dd = ComputeDistances(x, distance); 338 340 339 intn = x.Length;341 var n = x.Length; 340 342 // Compute the Gaussian kernel row by row 341 for (var i = 0; i < n; i++) {343 for (var i = 0; i < n; i++) { 342 344 // Initialize some variables 343 345 var found = false; … … 350 352 // Iterate until we found a good perplexity 351 353 var iter = 0; 352 while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten354 while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten 353 355 354 356 // Compute Gaussian kernel row 355 for (var m = 0; m < n; m++) p[i, m] = Math.Exp(-beta * dd[i][m]);357 for (var m = 0; m < n; m++) p[i, m] = Math.Exp(-beta * dd[i][m]); 356 358 p[i, i] = double.Epsilon; 357 359 358 360 // Compute entropy of current row 359 361 sumP = double.Epsilon; 360 for (var m = 0; m < n; m++) sumP += p[i, m];362 for (var m = 0; m < n; m++) sumP += p[i, m]; 361 363 var h = 0.0; 362 for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]);364 for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]); 363 365 h = h / sumP + Math.Log(sumP); 364 366 365 367 // Evaluate whether the entropy is within the tolerance level 366 368 var hdiff = h - Math.Log(perplexity); 367 if (hdiff < tol && -hdiff < tol) {369 if (hdiff < tol && -hdiff < tol) { 368 370 found = true; 369 371 } else { 370 if (hdiff > 0) {372 if (hdiff > 0) { 371 373 minBeta = beta; 372 if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))374 if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue)) 373 375 beta *= 2.0; 374 376 else … … 376 378 } else { 377 379 maxBeta = beta; 378 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))380 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) 379 381 beta /= 2.0; 380 382 else … … 388 390 389 391 // Row normalize P 390 for (var m = 0; m < n; m++) p[i, m] /= sumP;392 for (var m = 0; m < n; m++) p[i, m] /= sumP; 391 393 } 392 394 } … … 394 396 private static double[][] ComputeDistances(T[] x, IDistance<T> distance) { 395 397 var res = new double[x.Length][]; 396 for (intr = 0; r < x.Length; r++) {398 for (var r = 0; r < x.Length; r++) { 397 399 var rowV = new double[x.Length]; 398 400 // all distances must be symmetric 399 for (intc = 0; c < r; c++) {401 for (var c = 0; c < r; c++) { 400 402 rowV[c] = res[c][r]; 401 403 } 402 404 rowV[r] = 0.0; // distance to self is zero for all distances 403 for (intc = r + 1; c < x.Length; c++) {405 for (var c = r + 1; c < x.Length; c++) { 404 406 rowV[c] = distance.Get(x[r], x[c]); 405 407 } … … 418 420 // Compute Q-matrix and normalization sum 419 421 var sumQ = double.Epsilon; 420 for (var n1 = 0; n1 < n; n1++) {421 for (var m = 0; m < n; m++) {422 if (n1 != m) {422 for (var n1 = 0; n1 < n; n1++) { 423 for (var m = 0; m < n; m++) { 424 if (n1 != m) { 423 425 q[n1, m] = 1 / (1 + dd[n1, m]); 424 426 sumQ += q[n1, m]; … … 426 428 } 427 429 } 428 for (var i = 0; i < n; i++) for(var j = 0; j < n; j++) q[i, j] /= sumQ;430 for (var i = 0; i < n; i++) for (var j = 0; j < n; j++) q[i, j] /= sumQ; 429 431 430 432 // Sum t-SNE error 431 433 var c = .0; 432 for (var i = 0; i < n; i++)433 for (var j = 0; j < n; j++) {434 for (var i = 0; i < n; i++) 435 for (var j = 0; j < n; j++) { 434 436 c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon)); 435 437 } … … 437 439 } 438 440 439 // The mapping of the approximate tSNE looks good but the error curve never changes.440 441 private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) { 441 442 // Get estimate of normalization term … … 444 445 var tree = new SpacePartitioningTree(y); 445 446 var buff = new double[d]; 446 doublesumQ = 0.0;447 for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, ref sumQ);447 var sumQ = 0.0; 448 for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, ref sumQ); 448 449 449 450 // Loop over all edges to compute t-SNE error 450 451 var c = .0; 451 for (var k = 0; k < n; k++) {452 for (var i = rowP[k]; i < rowP[k + 1]; i++) {452 for (var k = 0; k < n; k++) { 453 for (var i = rowP[k]; i < rowP[k + 1]; i++) { 453 454 var q = .0; 454 for (var j = 0; j < d; j++) buff[j] = y[k, j];455 for (var j = 0; j < d; j++) buff[j] -= y[colP[i], j];456 for (var j = 0; j < d; j++) q += buff[j] * buff[j];455 for (var j = 0; j < d; j++) buff[j] = y[k, j]; 456 for (var j = 0; j < d; j++) buff[j] -= y[colP[i], j]; 457 for (var j = 0; j < d; j++) q += buff[j] * buff[j]; 457 458 q = (1.0 / (1.0 + q)) / sumQ; 458 459 c += valP[i] * Math.Log((valP[i] + float.Epsilon) / (q + float.Epsilon)); … … 466 467 var n = rowP.Count - 1; 467 468 var rowCounts = new int[n]; 468 for (var j = 0; j < n; j++) {469 for (var i = rowP[j]; i < rowP[j + 1]; i++) {469 for (var j = 0; j < n; j++) { 470 for (var i = rowP[j]; i < rowP[j + 1]; i++) { 470 471 471 472 // Check whether element (col_P[i], n) is present 472 473 var present = false; 473 for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {474 if (colP[m] == j) present = true;474 for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) { 475 if (colP[m] == j) present = true; 475 476 } 476 if (present) rowCounts[j]++;477 if (present) rowCounts[j]++; 477 478 else { 478 479 rowCounts[j]++; … … 482 483 } 483 484 var noElem = 0; 484 for (var i = 0; i < n; i++) noElem += rowCounts[i];485 for (var i = 0; i < n; i++) noElem += rowCounts[i]; 485 486 486 487 // Allocate memory for symmetrized matrix … … 491 492 // Construct new row indices for symmetric matrix 492 493 symRowP[0] = 0; 493 for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i];494 for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i]; 494 495 495 496 // Fill the result matrix 496 497 var offset = new int[n]; 497 for (var j = 0; j < n; j++) {498 for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i])498 for (var j = 0; j < n; j++) { 499 for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i]) 499 500 500 501 // Check whether element (col_P[i], n) is present 501 502 var present = false; 502 for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {503 if (colP[m] != j) continue;503 for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) { 504 if (colP[m] != j) continue; 504 505 present = true; 505 if (j > colP[i]) continue; // make sure we do not add elements twice506 if (j > colP[i]) continue; // make sure we do not add elements twice 506 507 symColP[symRowP[j] + offset[j]] = colP[i]; 507 508 symColP[symRowP[colP[i]] + offset[colP[i]]] = j; … … 511 512 512 513 // If (colP[i], n) is not present, there is no addition involved 513 if (!present) {514 if (!present) { 514 515 symColP[symRowP[j] + offset[j]] = colP[i]; 515 516 symColP[symRowP[colP[i]] + offset[colP[i]]] = j; … … 519 520 520 521 // Update offsets 521 if (present && (j > colP[i])) continue;522 if (present && (j > colP[i])) continue; 522 523 offset[j]++; 523 if(colP[i] != j) offset[colP[i]]++; 524 } 525 } 526 527 for(var i = 0; i < noElem; i++) symValP[i] /= 2.0; 528 } 524 if (colP[i] != j) offset[colP[i]]++; 525 } 526 } 527 528 for (var i = 0; i < noElem; i++) symValP[i] /= 2.0; 529 } 530 #endregion 529 531 } 530 532 531 533 /// <summary> 532 /// S impleinterface to tSNE534 /// Static interface to tSNE 533 535 /// </summary> 534 536 /// <param name="data"></param> … … 548 550 int newDimensions = 2, double perplexity = 25, int iterations = 1000, 549 551 double theta = 0, 550 int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5,551 double finalMomentum = .8, double eta = 200.0552 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 553 double finalMomentum = .8, double eta = 10.0 552 554 ) { 553 555 var state = CreateState(data, distance, random, newDimensions, perplexity, 554 556 theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta); 555 557 556 for (inti = 0; i < iterations - 1; i++) {558 for (var i = 0; i < iterations - 1; i++) { 557 559 Iterate(state); 558 560 } … … 562 564 public static TSNEState CreateState(T[] data, IDistance<T> distance, IRandom random, 563 565 int newDimensions = 2, double perplexity = 25, double theta = 0, 564 int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5,565 double finalMomentum = .8, double eta = 200.0566 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 567 double finalMomentum = .8, double eta = 10.0 566 568 ) { 567 569 return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta); 568 570 } 569 571 570 571 572 public static double[,] Iterate(TSNEState state) { 572 if (state.exact)573 if (state.exact) 573 574 ComputeExactGradient(state.p, state.newData, state.noDatapoints, state.newDimensions, state.dY); 574 575 else … … 576 577 577 578 // Update gains 578 for (var i = 0; i < state.noDatapoints; i++) {579 for (var j = 0; j < state.newDimensions; j++) {579 for (var i = 0; i < state.noDatapoints; i++) { 580 for (var j = 0; j < state.newDimensions; j++) { 580 581 state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j]) 581 582 ? state.gains[i, j] + .2 // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct 582 583 : state.gains[i, j] * .8; 583 584 584 if (state.gains[i, j] < .01) state.gains[i, j] = .01;585 if (state.gains[i, j] < .01) state.gains[i, j] = .01; 585 586 } 586 587 } … … 588 589 589 590 // Perform gradient update (with momentum and gains) 590 for (var i = 0; i < state.noDatapoints; i++)591 for (var j = 0; j < state.newDimensions; j++)591 for (var i = 0; i < state.noDatapoints; i++) 592 for (var j = 0; j < state.newDimensions; j++) 592 593 state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j]; 593 594 594 for (var i = 0; i < state.noDatapoints; i++)595 for (var j = 0; j < state.newDimensions; j++)595 for (var i = 0; i < state.noDatapoints; i++) 596 for (var j = 0; j < state.newDimensions; j++) 596 597 state.newData[i, j] = state.newData[i, j] + state.uY[i, j]; 597 598 … … 600 601 601 602 // Stop lying about the P-values after a while, and switch momentum 602 if (state.iter == state.stopLyingIter) {603 if (state.exact)604 for (var i = 0; i < state.noDatapoints; i++)605 for (var j = 0; j < state.noDatapoints; j++)603 if (state.iter == state.stopLyingIter) { 604 if (state.exact) 605 for (var i = 0; i < state.noDatapoints; i++) 606 for (var j = 0; j < state.noDatapoints; j++) 606 607 state.p[i, j] /= 12.0; 607 608 else 608 for (var i = 0; i < state.rowP[state.noDatapoints]; i++)609 for (var i = 0; i < state.rowP[state.noDatapoints]; i++) 609 610 state.valP[i] /= 12.0; 610 611 } 611 612 612 if (state.iter == state.momSwitchIter)613 if (state.iter == state.momSwitchIter) 613 614 state.currentMomentum = state.finalMomentum; 614 615 … … 617 618 } 618 619 619 620 620 #region Helpers 621 621 private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) { 622 622 var tree = new SpacePartitioningTree(y); 623 doublesumQ = 0.0;623 var sumQ = 0.0; 624 624 var posF = new double[n, d]; 625 625 var negF = new double[n, d]; 626 tree.ComputeEdgeForces(rowP, colP, valP, n, posF);626 SpacePartitioningTree.ComputeEdgeForces(rowP, colP, valP, n, posF, y, d); 627 627 var row = new double[d]; 628 for (var n1 = 0; n1 < n; n1++) {629 Array.Clear(row, 0,row.Length);628 for (var n1 = 0; n1 < n; n1++) { 629 Array.Clear(row, 0, row.Length); 630 630 tree.ComputeNonEdgeForces(n1, theta, row, ref sumQ); 631 Buffer.BlockCopy(row, 0, negF, (sizeof(double) * n1 * d), d *sizeof(double));631 Buffer.BlockCopy(row, 0, negF, (sizeof(double) * n1 * d), d * sizeof(double)); 632 632 } 633 633 634 634 // Compute final t-SNE gradient 635 635 for (var i = 0; i < n; i++) 636 for (var j = 0; j < d; j++) {636 for (var j = 0; j < d; j++) { 637 637 dC[i, j] = posF[i, j] - negF[i, j] / sumQ; 638 638 } … … 640 640 641 641 private static void ComputeExactGradient(double[,] p, double[,] y, int n, int d, double[,] dC) { 642 643 642 // Make sure the current gradient contains zeros 644 for (var i = 0; i < n; i++) for(var j = 0; j < d; j++) dC[i, j] = 0.0;643 for (var i = 0; i < n; i++) for (var j = 0; j < d; j++) dC[i, j] = 0.0; 645 644 646 645 // Compute the squared Euclidean distance matrix … … 651 650 var q = new double[n, n]; 652 651 var sumQ = .0; 653 for (var n1 = 0; n1 < n; n1++) {654 for (var m = 0; m < n; m++) {655 if (n1 == m) continue;652 for (var n1 = 0; n1 < n; n1++) { 653 for (var m = 0; m < n; m++) { 654 if (n1 == m) continue; 656 655 q[n1, m] = 1 / (1 + dd[n1, m]); 657 656 sumQ += q[n1, m]; … … 660 659 661 660 // Perform the computation of the gradient 662 for (var n1 = 0; n1 < n; n1++) {663 for (var m = 0; m < n; m++) {664 if (n1 == m) continue;661 for (var n1 = 0; n1 < n; n1++) { 662 for (var m = 0; m < n; m++) { 663 if (n1 == m) continue; 665 664 var mult = (p[n1, m] - q[n1, m] / sumQ) * q[n1, m]; 666 for (var d1 = 0; d1 < d; d1++) {665 for (var d1 = 0; d1 < d; d1++) { 667 666 dC[n1, d1] += (y[n1, d1] - y[m, d1]) * mult; 668 667 } … … 673 672 private static void ComputeSquaredEuclideanDistance(double[,] x, int n, int d, double[,] dd) { 674 673 var dataSums = new double[n]; 675 for (var i = 0; i < n; i++) {676 for (var j = 0; j < d; j++) {674 for (var i = 0; i < n; i++) { 675 for (var j = 0; j < d; j++) { 677 676 dataSums[i] += x[i, j] * x[i, j]; 678 677 } 679 678 } 680 for (var i = 0; i < n; i++) {681 for (var m = 0; m < n; m++) {679 for (var i = 0; i < n; i++) { 680 for (var m = 0; m < n; m++) { 682 681 dd[i, m] = dataSums[i] + dataSums[m]; 683 682 } 684 683 } 685 for (var i = 0; i < n; i++) {684 for (var i = 0; i < n; i++) { 686 685 dd[i, i] = 0.0; 687 for (var m = i + 1; m < n; m++) {686 for (var m = i + 1; m < n; m++) { 688 687 dd[i, m] = 0.0; 689 for (var j = 0; j < d; j++) {688 for (var j = 0; j < d; j++) { 690 689 dd[i, m] += (x[i, j] - x[m, j]) * (x[i, j] - x[m, j]); 691 690 } … … 700 699 var d = x.GetLength(1); 701 700 var mean = new double[d]; 702 for (var i = 0; i < n; i++) {703 for (var j = 0; j < d; j++) {701 for (var i = 0; i < n; i++) { 702 for (var j = 0; j < d; j++) { 704 703 mean[j] += x[i, j]; 705 704 } 706 705 } 707 for (var i = 0; i < d; i++) {706 for (var i = 0; i < d; i++) { 708 707 mean[i] /= n; 709 708 } 710 709 // Subtract data mean 711 for (var i = 0; i < n; i++) {712 for (var j = 0; j < d; j++) {710 for (var i = 0; i < n; i++) { 711 for (var j = 0; j < d; j++) { 713 712 x[i, j] -= mean[j]; 714 713 } 715 714 } 716 715 } 716 #endregion 717 717 } 718 718 } -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VantagePointTree.cs
r14862 r15207 68 68 /// </summary> 69 69 /// <typeparam name="T"></typeparam> 70 public class VantagePointTree<T> : IVantagePointTree<T> { 71 #region properties 70 public class VantagePointTree<T> { 72 71 private readonly List<T> items; 73 72 private readonly Node root; 74 73 private readonly IDistance<T> distance; 75 #endregion76 74 77 75 public VantagePointTree(IDistance<T> distance, IEnumerable<T> items) : base() { … … 82 80 } 83 81 82 /// <summary> 83 /// provides the k-nearest neighbours to a certain target element 84 /// </summary> 85 /// <param name="target">The target element</param> 86 /// <param name="k">How many neighbours</param> 87 /// <param name="results">The nearest neighbouring elements</param> 88 /// <param name="distances">The distances form the target corresponding to the neighbouring elements</param> 84 89 public void Search(T target, int k, out IList<T> results, out IList<double> distances) { 85 90 var heap = new PriorityQueue<double, IndexedItem<double>>(double.MaxValue, double.MinValue, k); 86 doubletau = double.MaxValue;91 var tau = double.MaxValue; 87 92 Search(root, target, k, heap, ref tau); 88 93 var res = new List<T>(); 89 94 var dist = new List<double>(); 90 95 while (heap.Size > 0) { 91 res.Add(items[heap.PeekMinValue().Index]); 96 res.Add(items[heap.PeekMinValue().Index]);// actually max distance 92 97 dist.Add(heap.PeekMinValue().Value); 93 98 heap.RemoveMin(); 94 99 } 95 res.Reverse(); 100 res.Reverse(); 96 101 dist.Reverse(); 97 102 results = res; … … 104 109 if (dist < tau) { 105 110 if (heap.Size == k) heap.RemoveMin(); // remove furthest node from result list (if we already have k results) 106 heap.Insert(-dist, new IndexedItem<double>(node.index, dist)); 111 heap.Insert(-dist, new IndexedItem<double>(node.index, dist)); 107 112 if (heap.Size == k) tau = heap.PeekMinValue().Value; 108 113 }
Note: See TracChangeset
for help on using the changeset viewer.