Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/AngularDistance.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/AngularDistance.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/AngularDistance.cs (revision 14512)
@@ -0,0 +1,63 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using HeuristicLab.Common;
+using HeuristicLab.Core;
+using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
+
+namespace HeuristicLab.Problems.SurrogateProblem {
+ [StorableClass]
+ [Item("AngularDistance", "The Angluar Distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")]
+ public class AngularDistance : DistanceBase> {
+
+ #region HLConstructors
+ [StorableConstructor]
+ protected AngularDistance(bool deserializing) : base(deserializing) { }
+ protected AngularDistance(AngularDistance original, Cloner cloner)
+ : base(original, cloner) { }
+ public AngularDistance() { }
+ public override IDeepCloneable Clone(Cloner cloner) {
+ return new AngularDistance(this, cloner);
+ }
+ #endregion
+
+ #region statics
+ public static double GetDistance(IReadOnlyList point1, IReadOnlyList point2) {
+ if (point1.Count != point2.Count) throw new ArgumentException("Angluar distance not defined on vectors of different length");
+ var innerProduct = point1.Zip(point2, (x, y) => x * y).Sum();
+ var res = Math.Acos(innerProduct / (point1.Norm() * point2.Norm())) / Math.PI;
+ return double.IsNaN(res) ? 0 : res;
+ }
+ public static double GetDistance(IReadOnlyList a, IReadOnlyList b, double threshold) {
+ return GetDistance(a, b); //no shortcut evaluation for Angular Distance
+ }
+ #endregion
+ public override double Get(IReadOnlyList a, IReadOnlyList b) {
+ return GetDistance(a, b);
+ }
+ public override double Get(IReadOnlyList a, IReadOnlyList b, double threshold) {
+ return GetDistance(a, b, threshold);
+ }
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/DataPointDistance.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/DataPointDistance.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/DataPointDistance.cs (revision 14512)
@@ -0,0 +1,51 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using HeuristicLab.Common;
+using HeuristicLab.Core;
+using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
+
+namespace HeuristicLab.Problems.SurrogateProblem {
+ [StorableClass]
+ [Item("DataPointDistance", "A distance wrapper for DataPoints")]
+ internal class DataPointDistance : DistanceBase> where T : class {
+
+ #region properties
+ [Storable]
+ private IDistance dist;
+ #endregion
+
+ #region HLConstructors
+ [StorableConstructor]
+ protected DataPointDistance(bool deserializing) : base(deserializing) { }
+ public override IDeepCloneable Clone(Cloner cloner) { return new DataPointDistance(this, cloner); }
+ protected DataPointDistance(DataPointDistance original, Cloner cloner) : base(original, cloner) { dist = cloner.Clone(original.dist); }
+ public DataPointDistance(IDistance distance) {
+ dist = distance;
+ }
+ #endregion
+
+ public override double Get(IDataPoint a, IDataPoint b) { return dist.Get(a.X(), b.X()); }
+ public override double Get(IDataPoint a, IDataPoint b, double threshold) { return dist.Get(a.X(), b.X(), threshold); }
+
+ }
+}
+
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/DistanceBase.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/DistanceBase.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/DistanceBase.cs (revision 14512)
@@ -0,0 +1,60 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using System.Collections.Generic;
+using HeuristicLab.Common;
+using HeuristicLab.Core;
+using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
+
+namespace HeuristicLab.Problems.SurrogateProblem {
+ [StorableClass]
+ public abstract class DistanceBase : Item, IDistance {
+
+ #region HLConstructors
+ [StorableConstructor]
+ protected DistanceBase(bool deserializing) : base(deserializing) { }
+ protected DistanceBase(DistanceBase original, Cloner cloner)
+ : base(original, cloner) { }
+ protected DistanceBase() { }
+ #endregion
+
+ public abstract double Get(T a, T b);
+ public abstract double Get(T a, T b, double threshold);
+
+ public IComparer GetDistanceComparer(T item) {
+ return new DistanceComparer(item, this);
+ }
+
+ private class DistanceComparer : IComparer {
+ private readonly T item;
+ private readonly IDistance dist;
+
+ public DistanceComparer(T item, IDistance dist) {
+ this.dist = dist;
+ this.item = item;
+ }
+
+ public int Compare(T x, T y) {
+ return dist.Get(x, item).CompareTo(dist.Get(y, item));
+ }
+ }
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/EuclidianDistance.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/EuclidianDistance.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/EuclidianDistance.cs (revision 14512)
@@ -0,0 +1,71 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using HeuristicLab.Common;
+using HeuristicLab.Core;
+using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
+
+namespace HeuristicLab.Problems.SurrogateProblem {
+ [StorableClass]
+ [Item("EuclidianNorm", "A norm function that uses Euclidean distance")]
+ public class EuclidianDistance : DistanceBase> {
+
+ #region HLConstructors
+ [StorableConstructor]
+ protected EuclidianDistance(bool deserializing) : base(deserializing) { }
+ protected EuclidianDistance(EuclidianDistance original, Cloner cloner)
+ : base(original, cloner) { }
+ public EuclidianDistance() { }
+ public override IDeepCloneable Clone(Cloner cloner) {
+ return new EuclidianDistance(this, cloner);
+ }
+ #endregion
+
+ #region statics
+ public static double GetDistance(IReadOnlyList point1, IReadOnlyList point2) {
+ if (point1.Count != point2.Count) throw new ArgumentException("Euclidean distance not defined on vectors of different length");
+ return Math.Sqrt(point1.Zip(point2, (a1, b1) => (a1 - b1) * (a1 - b1)).Sum());
+ }
+ public static double GetDistance(IEnumerable a, IEnumerable b, double threshold) {
+ double sum = 0;
+ var it1 = a.GetEnumerator();
+ var it2 = b.GetEnumerator();
+ while (sum > threshold * threshold && it1.MoveNext() && it2.MoveNext()) {
+ var d = it1.Current - it2.Current;
+ sum += d * d;
+ }
+ it1.Dispose();
+ it2.Dispose();
+ return sum;
+ }
+ #endregion
+
+ public override double Get(IReadOnlyList a, IReadOnlyList b) {
+ return GetDistance(a, b);
+ }
+ public override double Get(IReadOnlyList a, IReadOnlyList b, double threshold) {
+ return GetDistance(a, b, threshold);
+ }
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/InnerProductDistance.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/InnerProductDistance.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/InnerProductDistance.cs (revision 14512)
@@ -0,0 +1,65 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using HeuristicLab.Common;
+using HeuristicLab.Core;
+using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
+
+namespace HeuristicLab.Problems.SurrogateProblem {
+
+ ///
+ /// this is not a proper distance
+ ///
+ [StorableClass]
+ [Item("InnerProductDistance", "The Angluar Distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")]
+ public class InnerProductDistance : DistanceBase> {
+
+ #region HLConstructors
+ [StorableConstructor]
+ protected InnerProductDistance(bool deserializing) : base(deserializing) { }
+ protected InnerProductDistance(InnerProductDistance original, Cloner cloner)
+ : base(original, cloner) { }
+ public InnerProductDistance() { }
+ public override IDeepCloneable Clone(Cloner cloner) {
+ return new InnerProductDistance(this, cloner);
+ }
+ #endregion
+
+ #region statics
+ public static double GetDistance(IReadOnlyList point1, IReadOnlyList point2) {
+ if (point1.Count != point2.Count) throw new ArgumentException("Inner Product distance not defined on vectors of different length");
+ return point1.Zip(point2, (x, y) => x * y).Sum();
+ }
+ public static double GetDistance(IEnumerable a, IEnumerable b, double threshold) {
+ return GetDistance(a.ToArray(), b.ToArray()); //no shortcut evaluation for Inner Product (summands may be negative => no way of telling if threshold is reached or not)
+ }
+ #endregion
+ public override double Get(IReadOnlyList a, IReadOnlyList b) {
+ return GetDistance(a, b);
+ }
+ public override double Get(IReadOnlyList a, IReadOnlyList b, double threshold) {
+ return GetDistance(a, b, threshold);
+ }
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/ManhattenDistance.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/ManhattenDistance.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Distances/ManhattenDistance.cs (revision 14512)
@@ -0,0 +1,72 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using HeuristicLab.Common;
+using HeuristicLab.Core;
+using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
+
+namespace HeuristicLab.Problems.SurrogateProblem {
+ [StorableClass]
+ [Item("ManhattenDistance", "A distance function that uses Block distance")]
+ public class ManhattenDistance : DistanceBase> {
+
+ #region HLConstructors
+ [StorableConstructor]
+ protected ManhattenDistance(bool deserializing) : base(deserializing) { }
+ protected ManhattenDistance(ManhattenDistance original, Cloner cloner)
+ : base(original, cloner) { }
+ public ManhattenDistance() { }
+ public override IDeepCloneable Clone(Cloner cloner) {
+ return new ManhattenDistance(this, cloner);
+ }
+ #endregion
+
+ public static double GetDistance(IReadOnlyList point1, IReadOnlyList point2) {
+ if (point1.Count != point2.Count) throw new ArgumentException("Manhatten distance not defined on vectors of different length");
+ return point1.Zip(point2, (a1, b1) => Math.Abs(a1 - b1)).Sum();
+ }
+
+ public static double GetDistance(IEnumerable a, IEnumerable b, double threshold) {
+ double sum = 0;
+ var it1 = a.GetEnumerator();
+ var it2 = b.GetEnumerator();
+ while (sum > threshold && it1.MoveNext() && it2.MoveNext()) {
+ sum += Math.Abs(it1.Current - it2.Current);
+ }
+ it1.Dispose();
+ it2.Dispose();
+ return sum;
+ }
+
+ public override double Get(IReadOnlyList a, IReadOnlyList b) {
+ return GetDistance(a, b);
+ }
+
+ public override double Get(IReadOnlyList a, IReadOnlyList b, double threshold) {
+ return GetDistance(a, b, threshold);
+ }
+
+
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj (revision 14511)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj (revision 14512)
@@ -331,4 +331,5 @@
+
@@ -337,4 +338,19 @@
Code
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -415,18 +431,20 @@
+
-
+
+
-
-
-
-
-
-
+
+
+
+
+
+
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ICell.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ICell.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ICell.cs (revision 14512)
@@ -0,0 +1,32 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using HeuristicLab.Common;
+
+namespace HeuristicLab.Algorithms.DataAnalysis {
+ public interface ICell : IDeepCloneable {
+ double GetCorner(int d);
+ double GetWidth(int d);
+ void SetCorner(int d, double val);
+ void SetWidth(int d, double val);
+ bool ContainsPoint(double[] point);
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IDataPoint.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IDataPoint.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IDataPoint.cs (revision 14512)
@@ -0,0 +1,30 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using HeuristicLab.Common;
+
+namespace HeuristicLab.Algorithms.DataAnalysis {
+ public interface IDataPoint : IDeepCloneable {
+ int Index { get; }
+ T X { get; }
+
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IDistance.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IDistance.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IDistance.cs (revision 14512)
@@ -0,0 +1,55 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using System.Collections.Generic;
+using HeuristicLab.Core;
+
+namespace HeuristicLab.Algorithms.DataAnalysis {
+ public interface IDistance : IItem {
+ ///
+ /// Calculates a distance measure between two arbitrary Vectors. The distance value d is
+ /// 1.) positive d(x,y)>=0
+ /// 2.) symmetric d(x,y) = d(y,x)
+ /// 3.) zero-reflexive d(x,x) =0;
+ ///
+ /// an array representing point x
+ /// >an array representing point y
+ /// d(x,y)
+ double Get(T a, T b);
+
+ ///
+ /// Calculates the correct kernel measure if it is smaller than threshold, but any value greater then threshold if the correct distance is greater.
+ /// This is for performace only
+ ///
+ ///
+ ///
+ ///
+ ///
+ double Get(T a, T b, double threshold);
+
+ ///
+ /// Returns a comparator wich compares the distances to item. (allows for sorting nearest/farthest neighbours)
+ ///
+ ///
+ ///
+ IComparer GetDistanceComparer(T item);
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IHeap.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IHeap.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IHeap.cs (revision 14512)
@@ -0,0 +1,34 @@
+#region License Information
+/* HeuristicLab
+* Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+*
+* This file is part of HeuristicLab.
+*
+* HeuristicLab is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* HeuristicLab is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with HeuristicLab. If not, see .
+*/
+
+#endregion
+
+using System.Collections.Generic;
+
+namespace HeuristicLab.Algorithms.DataAnalysis {
+ public interface IHeap {
+ int Size { get; }
+ KeyValuePair PeekMin();
+ TK PeekMinKey();
+ TV PeekMinValue();
+ void Insert(TK key, TV value);
+ void RemoveMin();
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IKernelFunction.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IKernelFunction.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IKernelFunction.cs (revision 14512)
@@ -0,0 +1,36 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+
+namespace HeuristicLab.Algorithms.DataAnalysis {
+ public interface IKernelFunction : ICovarianceFunction {
+ ///
+ /// Calculates a kernel measure between two aribitrary Vectors. The distance value d is
+ /// 1.) positive d(x,y)>0
+ /// 2.) symmetric d(x,y) = d(y,x)
+ /// 3.) only dependent on the norm ||x-y|| (Radial Basis criterium)
+ ///
+ /// an array representing point x
+ /// >an array representing point y
+ /// d(x,y)
+ double Get(T a, T b);
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ISPTree.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ISPTree.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ISPTree.cs (revision 14512)
@@ -0,0 +1,39 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using HeuristicLab.Common;
+
+namespace HeuristicLab.Algorithms.DataAnalysis {
+ public interface ISPTree : IDeepCloneable {
+
+ double[,] Data { set; }
+ int GetDepth();
+ ISPTree GetParent();
+ bool Insert(int newIndex);
+ void Subdivide();
+ bool IsCorrect();
+ void GetAllIndices(int[] indices);
+ int GetAllIndices(int[] indices, int loc);
+ void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, double[] sumQ);
+ void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF);
+
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ITSNE.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ITSNE.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/ITSNE.cs (revision 14512)
@@ -0,0 +1,28 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using HeuristicLab.Common;
+
+namespace HeuristicLab.Algorithms.DataAnalysis {
+ public interface ITSNE : IDeepCloneable {
+ double[,] Run(T[] data, int newDimensions, double perplexity, double theta);
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IVPTree.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IVPTree.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IVPTree.cs (revision 14512)
@@ -0,0 +1,30 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using System.Collections.Generic;
+using HeuristicLab.Common;
+
+namespace HeuristicLab.Algorithms.DataAnalysis {
+ public interface IVPTree : IDeepCloneable {
+ void Create(IEnumerable items);
+ void Search(T target, int k, out List results, out List distances);
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DataPointDistance.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DataPointDistance.cs (revision 14511)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DataPointDistance.cs (revision 14512)
@@ -20,5 +20,4 @@
#endregion
-using HeuristicLab.Algorithms.DataAnalysis.Distances;
using HeuristicLab.Common;
using HeuristicLab.Core;
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs (revision 14511)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs (revision 14512)
@@ -25,5 +25,5 @@
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
-namespace HeuristicLab.Algorithms.DataAnalysis.Distances {
+namespace HeuristicLab.Algorithms.DataAnalysis {
[StorableClass]
public abstract class DistanceBase : Item, IDistance {
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclidianDistance.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclidianDistance.cs (revision 14511)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclidianDistance.cs (revision 14512)
@@ -23,5 +23,4 @@
using System.Collections.Generic;
using System.Linq;
-using HeuristicLab.Algorithms.DataAnalysis.Distances;
using HeuristicLab.Common;
using HeuristicLab.Core;
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/FuctionalDistance.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/FuctionalDistance.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/FuctionalDistance.cs (revision 14512)
@@ -0,0 +1,58 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using System;
+using HeuristicLab.Common;
+using HeuristicLab.Core;
+using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
+
+namespace HeuristicLab.Algorithms.DataAnalysis {
+ [StorableClass]
+ [Item("FuctionalDistance", "A distance wrapper for DataPoints")]
+ internal class FuctionalDistance : DistanceBase where T : class {
+
+ #region properties
+ [Storable]
+ private Func dist;
+ #endregion
+
+ #region HLConstructors & Cloning
+ [StorableConstructor]
+ protected FuctionalDistance(bool deserializing) : base(deserializing) { }
+ protected FuctionalDistance(FuctionalDistance original, Cloner cloner) : base(original, cloner) {
+ dist = original.dist; //func needs to be stateless
+ }
+ public override IDeepCloneable Clone(Cloner cloner) { return new FuctionalDistance(this, cloner); }
+ public FuctionalDistance(Func distance) {
+ dist = distance;
+ }
+ #endregion
+
+ public override double Get(T a, T b) {
+ return dist.Invoke(a, b);
+ }
+
+ public override double Get(T a, T b, double threshold) {
+ return Get(a, b); //no threshold evaluation here
+ }
+ }
+}
+
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/InnerProductDistance.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/InnerProductDistance.cs (revision 14512)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/InnerProductDistance.cs (revision 14512)
@@ -0,0 +1,65 @@
+#region License Information
+/* HeuristicLab
+ * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
+ *
+ * This file is part of HeuristicLab.
+ *
+ * HeuristicLab is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * HeuristicLab is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with HeuristicLab. If not, see .
+ */
+#endregion
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using HeuristicLab.Common;
+using HeuristicLab.Core;
+using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
+
+namespace HeuristicLab.Algorithms.DataAnalysis {
+
+ ///
+ /// this is not a proper distance
+ ///
+ [StorableClass]
+ [Item("InnerProductDistance", "The Angluar Distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")]
+ public class InnerProductDistance : DistanceBase> {
+
+ #region HLConstructors
+ [StorableConstructor]
+ protected InnerProductDistance(bool deserializing) : base(deserializing) { }
+ protected InnerProductDistance(InnerProductDistance original, Cloner cloner)
+ : base(original, cloner) { }
+ public InnerProductDistance() { }
+ public override IDeepCloneable Clone(Cloner cloner) {
+ return new InnerProductDistance(this, cloner);
+ }
+ #endregion
+
+ #region statics
+ public static double GetDistance(IReadOnlyList point1, IReadOnlyList point2) {
+ if (point1.Count != point2.Count) throw new ArgumentException("Inner Product distance not defined on vectors of different length");
+ return point1.Zip(point2, (x, y) => x * y).Sum();
+ }
+ public static double GetDistance(IEnumerable a, IEnumerable b, double threshold) {
+ return GetDistance(a.ToArray(), b.ToArray()); //no shortcut evaluation for Inner Product (summands may be negative => no way of telling if threshold is reached or not)
+ }
+ #endregion
+ public override double Get(IReadOnlyList a, IReadOnlyList b) {
+ return GetDistance(a, b);
+ }
+ public override double Get(IReadOnlyList a, IReadOnlyList b, double threshold) {
+ return GetDistance(a, b, threshold);
+ }
+ }
+}
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNE.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNE.cs (revision 14511)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNE.cs (revision 14512)
@@ -57,4 +57,5 @@
using System.Collections.Generic;
using System.Linq;
+using HeuristicLab.Analysis;
using HeuristicLab.Common;
using HeuristicLab.Core;
@@ -70,4 +71,7 @@
private const string IterationResultName = "Iteration";
private const string ErrorResultName = "Error";
+ private const string ErrorPlotResultName = "ErrorPlot";
+ private const string ScatterPlotResultName = "Scatterplot";
+ private const string DataResultName = "Projected Data";
#region Properties
@@ -90,4 +94,12 @@
[Storable]
private ResultCollection results;
+ [Storable]
+ private Dictionary> dataRowLookup;
+ [Storable]
+ private Dictionary dataRows;
+ #endregion
+
+ #region Stopping
+ public volatile bool Running;
#endregion
@@ -105,7 +117,9 @@
random = cloner.Clone(random);
results = cloner.Clone(results);
+ dataRowLookup = original.dataRowLookup.ToDictionary(entry => entry.Key, entry => entry.Value.Select(x => x).ToList());
+ dataRows = original.dataRows.ToDictionary(entry => entry.Key, entry => cloner.Clone(entry.Value));
}
public override IDeepCloneable Clone(Cloner cloner) { return new TSNE(this, cloner); }
- public TSNE(IDistance distance, IRandom random, ResultCollection results = null, int maxIter = 1000, int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5, double finalMomentum = .8, double eta = 200.0) {
+ public TSNE(IDistance distance, IRandom random, ResultCollection results = null, int maxIter = 1000, int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5, double finalMomentum = .8, double eta = 200.0, Dictionary> dataRowLookup = null, Dictionary dataRows = null) {
this.distance = distance;
this.maxIter = maxIter;
@@ -117,4 +131,8 @@
this.random = random;
this.results = results;
+ this.dataRowLookup = dataRowLookup;
+ if (dataRows != null)
+ this.dataRows = dataRows;
+ else { this.dataRows = new Dictionary(); }
}
#endregion
@@ -124,15 +142,6 @@
var noDatapoints = data.Length;
if (noDatapoints - 1 < 3 * perplexity) throw new ArgumentException("Perplexity too large for the number of data points!");
-
- if (results != null) {
- if (!results.ContainsKey(IterationResultName)) {
- results.Add(new Result(IterationResultName, new IntValue(0)));
- } else ((IntValue)results[IterationResultName].Value).Value = 0;
- if (!results.ContainsKey(ErrorResultName)) {
- results.Add(new Result(ErrorResultName, new DoubleValue(0)));
- } else ((DoubleValue)results[ErrorResultName].Value).Value = 0;
- }
-
- // Determine whether we are using an exact algorithm
+ SetUpResults(data);
+ Running = true;
var exact = Math.Abs(theta) < double.Epsilon;
var newData = new double[noDatapoints, newDimensions];
@@ -141,91 +150,41 @@
var gains = new double[noDatapoints, newDimensions];
for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = 1.0;
-
- // Compute input similarities for exact t-SNE
double[,] p = null;
int[] rowP = null;
int[] colP = null;
double[] valP = null;
- if (exact) {
- // Compute similarities
- p = new double[noDatapoints, noDatapoints];
- ComputeGaussianPerplexity(data, noDatapoints, p, perplexity);
- // Symmetrize input similarities
- for (var n = 0; n < noDatapoints; n++) {
- for (var m = n + 1; m < noDatapoints; m++) {
- p[n, m] += p[m, n];
- p[m, n] = p[n, m];
- }
- }
- var sumP = .0;
- for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) sumP += p[i, j];
- for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] /= sumP;
- } // Compute input similarities for approximate t-SNE
- else {
- // Compute asymmetric pairwise input similarities
- ComputeGaussianPerplexity(data, noDatapoints, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity));
- // Symmetrize input similarities
- int[] sRowP, symColP;
- double[] sValP;
- SymmetrizeMatrix(rowP, colP, valP, out sRowP, out symColP, out sValP);
- rowP = sRowP;
- colP = symColP;
- valP = sValP;
- var sumP = .0;
- for (var i = 0; i < rowP[noDatapoints]; i++) sumP += valP[i];
- for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] /= sumP;
- }
+ var rand = new NormalDistributedRandom(random, 0, 1);
+
+ //Calculate Similarities
+ if (exact) p = CalculateExactSimilarites(data, perplexity);
+ else CalculateApproximateSimilarities(data, perplexity, out rowP, out colP, out valP);
// Lie about the P-values
- if (exact) {
- for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;
- } else {
- for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;
- }
-
- var rand = new NormalDistributedRandom(random, 0, 1);
+ if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;
+ else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;
+
// Initialize solution (randomly)
- for (var i = 0; i < noDatapoints; i++)
- for (var j = 0; j < newDimensions; j++)
- newData[i, j] = rand.NextDouble() * .0001;
+ for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = rand.NextDouble() * .0001;
// Perform main training loop
- for (var iter = 0; iter < maxIter; iter++) {
-
- // Compute (approximate) gradient
+ for (var iter = 0; iter < maxIter && Running; iter++) {
if (exact) ComputeExactGradient(p, newData, noDatapoints, newDimensions, dY);
else ComputeGradient(rowP, colP, valP, newData, noDatapoints, newDimensions, dY, theta);
-
// Update gains
- for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++)
- gains[i, j] = Math.Sign(dY[i, j]) != Math.Sign(uY[i, j]) ? gains[i, j] + .2 : gains[i, j] * .8;
+ for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) gains[i, j] = Math.Sign(dY[i, j]) != Math.Sign(uY[i, j]) ? gains[i, j] + .2 : gains[i, j] * .8;
for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) if (gains[i, j] < .01) gains[i, j] = .01;
-
// Perform gradient update (with momentum and gains)
for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) uY[i, j] = currentMomentum * uY[i, j] - eta * gains[i, j] * dY[i, j];
for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < newDimensions; j++) newData[i, j] = newData[i, j] + uY[i, j];
-
// Make solution zero-mean
ZeroMean(newData);
-
// Stop lying about the P-values after a while, and switch momentum
if (iter == stopLyingIter) {
- if (exact) {
- for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] /= 12.0;
- } else {
- for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] /= 12.0;
- }
+ if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] /= 12.0;
+ else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] /= 12.0;
}
if (iter == momSwitchIter) currentMomentum = finalMomentum;
- if (results == null) continue;
- var errors = new List();
- // Print out progress
- var c = exact
- ? EvaluateError(p, newData, noDatapoints, newDimensions)
- : EvaluateError(rowP, colP, valP, newData, theta);
- errors.Add(c);
- ((IntValue)results[IterationResultName].Value).Value = iter + 1;
- ((DoubleValue)results[ErrorResultName].Value).Value = errors.Last();
+ Analyze(exact, iter, p, rowP, colP, valP, newData, noDatapoints, newDimensions, theta);
}
return newData;
@@ -234,6 +193,111 @@
return new TSNE(distance, random).Run(data, newDimensions, perplexity, theta);
}
+ public static double[,] Run
(TR[] data, int newDimensions, double perplexity, double theta, Func
distance, IRandom random) where TR : class, IDeepCloneable {
+ return new TSNE
(new FuctionalDistance
(distance), random).Run(data, newDimensions, perplexity, theta);
+ }
#region helpers
+
+ private void SetUpResults(IReadOnlyCollection data) {
+ if (dataRowLookup == null) {
+ dataRowLookup = new Dictionary>();
+ dataRowLookup.Add("Data", Enumerable.Range(0, data.Count).ToList());
+ }
+ if (results == null) return;
+ if (!results.ContainsKey(IterationResultName)) results.Add(new Result(IterationResultName, new IntValue(0)));
+ else ((IntValue)results[IterationResultName].Value).Value = 0;
+
+ if (!results.ContainsKey(ErrorResultName)) results.Add(new Result(ErrorResultName, new DoubleValue(0)));
+ else ((DoubleValue)results[ErrorResultName].Value).Value = 0;
+
+ if (!results.ContainsKey(ErrorPlotResultName)) results.Add(new Result(ErrorPlotResultName, new DataTable(ErrorPlotResultName, "Development of errors during Gradiant descent")));
+ else results[ErrorPlotResultName].Value = new DataTable(ErrorPlotResultName, "Development of errors during Gradiant descent");
+
+ var plot = results[ErrorPlotResultName].Value as DataTable;
+ if (plot == null) throw new ArgumentException("could not create/access Error-DataTable in Results-Collection");
+ if (!plot.Rows.ContainsKey("errors")) {
+ plot.Rows.Add(new DataRow("errors"));
+ }
+ plot.Rows["errors"].Values.Clear();
+ results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", new ScatterPlot(DataResultName, "")));
+ results.Add(new Result(DataResultName, "Projected Data", new DoubleMatrix()));
+
+ }
+ private void Analyze(bool exact, int iter, double[,] p, int[] rowP, int[] colP, double[] valP, double[,] newData, int noDatapoints, int newDimensions, double theta) {
+ if (results == null) return;
+ var plot = results[ErrorPlotResultName].Value as DataTable;
+ if (plot == null) throw new ArgumentException("Could not create/access Error-DataTable in Results-Collection. Was it removed by some effect?");
+ var errors = plot.Rows["errors"].Values;
+ var c = exact
+ ? EvaluateError(p, newData, noDatapoints, newDimensions)
+ : EvaluateError(rowP, colP, valP, newData, theta);
+ errors.Add(c);
+ ((IntValue)results[IterationResultName].Value).Value = iter + 1;
+ ((DoubleValue)results[ErrorResultName].Value).Value = errors.Last();
+
+ var ndata = Normalize(newData);
+ results[DataResultName].Value = new DoubleMatrix(ndata);
+ var splot = results[ScatterPlotResultName].Value as ScatterPlot;
+ FillScatterPlot(ndata, splot);
+
+
+ }
+ private void FillScatterPlot(double[,] lowDimData, ScatterPlot plot) {
+ foreach (var rowName in dataRowLookup.Keys) {
+ if (!plot.Rows.ContainsKey(rowName)) {
+ plot.Rows.Add(dataRows.ContainsKey(rowName) ? dataRows[rowName] : new ScatterPlotDataRow(rowName, "", new List>()));
+ } else plot.Rows[rowName].Points.Clear();
+ plot.Rows[rowName].Points.AddRange(dataRowLookup[rowName].Select(i => new Point2D(lowDimData[i, 0], lowDimData[i, 1])));
+ }
+ }
+ private static double[,] Normalize(double[,] data) {
+ var max = new double[data.GetLength(1)];
+ var min = new double[data.GetLength(1)];
+ var res = new double[data.GetLength(0), data.GetLength(1)];
+ for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i];
+ for (var i = 0; i < data.GetLength(0); i++)
+ for (var j = 0; j < data.GetLength(1); j++) {
+ var v = data[i, j];
+ max[j] = Math.Max(max[j], v);
+ min[j] = Math.Min(min[j], v);
+ }
+ for (var i = 0; i < data.GetLength(0); i++) {
+ for (var j = 0; j < data.GetLength(1); j++) {
+ res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]);
+ }
+ }
+ return res;
+ }
+ private void CalculateApproximateSimilarities(T[] data, double perplexity, out int[] rowP, out int[] colP, out double[] valP) {
+ // Compute asymmetric pairwise input similarities
+ ComputeGaussianPerplexity(data, data.Length, out rowP, out colP, out valP, perplexity, (int)(3 * perplexity));
+ // Symmetrize input similarities
+ int[] sRowP, symColP;
+ double[] sValP;
+ SymmetrizeMatrix(rowP, colP, valP, out sRowP, out symColP, out sValP);
+ rowP = sRowP;
+ colP = symColP;
+ valP = sValP;
+ var sumP = .0;
+ for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];
+ for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;
+ }
+ private double[,] CalculateExactSimilarites(T[] data, double perplexity) {
+ // Compute similarities
+ var p = new double[data.Length, data.Length];
+ ComputeGaussianPerplexity(data, data.Length, p, perplexity);
+ // Symmetrize input similarities
+ for (var n = 0; n < data.Length; n++) {
+ for (var m = n + 1; m < data.Length; m++) {
+ p[n, m] += p[m, n];
+ p[m, n] = p[n, m];
+ }
+ }
+ var sumP = .0;
+ for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j];
+ for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP;
+ return p;
+ }
+
private void ComputeGaussianPerplexity(IReadOnlyList x, int n, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) {
if (perplexity > k) throw new ArgumentException("Perplexity should be lower than K!");
Index: /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAnalysis.cs
===================================================================
--- /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAnalysis.cs (revision 14511)
+++ /branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAnalysis.cs (revision 14512)
@@ -20,5 +20,6 @@
#endregion
-using System;
+using System.Collections.Generic;
+using System.Drawing;
using System.Linq;
using HeuristicLab.Analysis;
@@ -27,5 +28,4 @@
using HeuristicLab.Data;
using HeuristicLab.Encodings.RealVectorEncoding;
-using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
@@ -60,4 +60,5 @@
private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
private const string SeedParameterName = "Seed";
+ private const string ClassesParameterName = "ClassNames";
#endregion
@@ -67,7 +68,7 @@
get { return Parameters[PerplexityParameterName] as IFixedValueParameter; }
}
- public IFixedValueParameter ThetaParameter
- {
- get { return Parameters[ThetaParameterName] as IFixedValueParameter; }
+ public OptionalValueParameter ThetaParameter
+ {
+ get { return Parameters[ThetaParameterName] as OptionalValueParameter; }
}
public IFixedValueParameter NewDimensionsParameter
@@ -110,4 +111,8 @@
{
get { return Parameters[SeedParameterName] as IFixedValueParameter; }
+ }
+ public IFixedValueParameter ClassesParameter
+ {
+ get { return Parameters[ClassesParameterName] as IFixedValueParameter; }
}
#endregion
@@ -124,5 +129,5 @@
public double Theta
{
- get { return ThetaParameter.Value.Value; }
+ get { return ThetaParameter.Value == null ? 0 : ThetaParameter.Value.Value; }
}
public int NewDimensions
@@ -152,5 +157,8 @@
public double Eta
{
- get { return EtaParameter.Value.Value; }
+ get
+ {
+ return EtaParameter.Value == null ? 0 : EtaParameter.Value.Value;
+ }
}
public bool SetSeedRandomly
@@ -162,4 +170,11 @@
get { return (uint)SeedParameter.Value.Value; }
}
+ public string Classes
+ {
+ get { return ClassesParameter.Value.Value; }
+ }
+
+ [Storable]
+ public TSNE tsne;
#endregion
@@ -172,6 +187,6 @@
Problem = new RegressionProblem();
Parameters.Add(new ValueParameter>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclidianDistance()));
- Parameters.Add(new FixedValueParameter(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm", new DoubleValue(25)));
- Parameters.Add(new FixedValueParameter(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation", new DoubleValue(0.1)));
+ Parameters.Add(new FixedValueParameter(PerplexityParameterName, "Perplexity-Parameter of TSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended Value is Floor(number of points /3) or lower", new DoubleValue(25)));
+ Parameters.Add(new OptionalValueParameter(ThetaParameterName, "Value describing how much appoximated gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise \n CAUTION: exact calculation of forces requires building a non-sparse N*N matrix where N is the number of data points\n This may exceed memory limitations", new DoubleValue(0.1)));
Parameters.Add(new FixedValueParameter(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis", new IntValue(2)));
Parameters.Add(new FixedValueParameter(MaxIterationsParameterName, "Maximum number of iterations for gradient descent", new IntValue(1000)));
@@ -183,24 +198,55 @@
Parameters.Add(new FixedValueParameter(SetSeedRandomlyParameterName, "If the seed should be random", new BoolValue(true)));
Parameters.Add(new FixedValueParameter(SeedParameterName, "The seed used if it should not be random", new IntValue(0)));
+ Parameters.Add(new FixedValueParameter(ClassesParameterName, "name of the column specifying the class lables of each data point. \n if the lable column can not be found Training/Test is used as labels", new StringValue("none")));
}
#endregion
protected override void Run() {
- var lowDimData = new DoubleMatrix(GetProjectedData(Problem.ProblemData));
- Results.Add(new Result(ScatterPlotResultName, "Plot of the projected data", CreateScatterPlot(lowDimData, Problem.ProblemData)));
- Results.Add(new Result(DataResultName, "Projected Data", lowDimData));
- }
-
- private ScatterPlot CreateScatterPlot(DoubleMatrix lowDimData, IDataAnalysisProblemData problemData) {
- var plot = new ScatterPlot(DataResultName, "");
- Normalize(lowDimData);
- plot.Rows.Add(new ScatterPlotDataRow("Training", "Points of the training set", problemData.TrainingIndices.Select(i => new Point2D(lowDimData[i, 0], lowDimData[i, 1]))));
- plot.Rows.Add(new ScatterPlotDataRow("Test", "Points of the test set", problemData.TestIndices.Select(i => new Point2D(lowDimData[i, 0], lowDimData[i, 1]))));
- return plot;
- }
-
- private double[,] GetProjectedData(IDataAnalysisProblemData problemData) {
+ var data = CalculateProjectedData(Problem.ProblemData);
+ var lowDimData = new DoubleMatrix(data);
+ }
+
+ public override void Stop() {
+ base.Stop();
+ if (tsne != null) tsne.Running = false;
+ }
+
+ private double[,] CalculateProjectedData(IDataAnalysisProblemData problemData) {
+ var DataRowNames = new Dictionary>();
+ var rows = new Dictionary();
+
+ if (problemData.Dataset.VariableNames.Contains(Classes)) {
+ if ((problemData.Dataset as Dataset).VariableHasType(Classes)) {
+ var classes = problemData.Dataset.GetStringValues(Classes).ToArray();
+ for (int i = 0; i < classes.Length; i++) {
+ if (!DataRowNames.ContainsKey(classes[i])) DataRowNames.Add(classes[i], new List());
+ DataRowNames[classes[i]].Add(i); //always succeeds
+ }
+ } else if ((problemData.Dataset as Dataset).VariableHasType(Classes)) {
+ var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray();
+ var max = classValues.Max() + 0.1;
+ var min = classValues.Min() - 0.1;
+ var contours = 8;
+ for (int i = 0; i < contours; i++) {
+ var name = GetContourName(i, min, max, contours);
+ DataRowNames.Add(name, new List());
+ rows.Add(name, new ScatterPlotDataRow(name, "", new List>()));
+ rows[name].VisualProperties.Color = GetHeatMapColor(i, contours);
+ rows[name].VisualProperties.PointSize = i+3;
+ }
+ for (int i = 0; i < classValues.Length; i++) {
+ DataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); //always succeeds
+ }
+
+ }
+
+
+ } else {
+ DataRowNames.Add("Training", problemData.TrainingIndices.ToList());
+ DataRowNames.Add("Test", problemData.TestIndices.ToList());
+ }
+
var random = SetSeedRandomly ? new MersenneTwister() : new MersenneTwister(Seed);
- var tsne = new TSNE(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta);
+ tsne = new TSNE(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, DataRowNames, rows);
var dataset = problemData.Dataset;
var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
@@ -210,21 +256,19 @@
}
- private static void Normalize(DoubleMatrix data) {
- var max = new double[data.Columns];
- var min = new double[data.Columns];
- for (var i = 0; i < max.Length; i++) max[i] = min[i] = data[0, i];
- for (var i = 0; i < data.Rows; i++)
- for (var j = 0; j < data.Columns; j++) {
- var v = data[i, j];
- max[j] = Math.Max(max[j], v);
- min[j] = Math.Min(min[j], v);
- }
- for (var i = 0; i < data.Rows; i++) {
- for (var j = 0; j < data.Columns; j++) {
- data[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]);
- }
- }
-
- }
+ private static Color GetHeatMapColor(int contourNr, int noContours) {
+ var q = (double)contourNr / noContours; // q in [0,1]
+ var c = q < 0.5 ? Color.FromArgb((int)(q * 2 * 255), 255, 0) : Color.FromArgb(255, (int)((1 - q) * 2 * 255), 0);
+ return c;
+ }
+ private static string GetContourName(double value, double min, double max, int noContours) {
+ var size = (max - min) / noContours;
+ var contourNr = (int)((value - min) / size);
+ return GetContourName(contourNr, min, max, noContours);
+ }
+ private static string GetContourName(int i, double min, double max, int noContours) {
+ var size = (max - min) / noContours;
+ return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")";
+ }
+
}
}