Changeset 15249 for stable/HeuristicLab.Algorithms.DataAnalysis
- Timestamp:
- 07/15/17 10:29:40 (7 years ago)
- Location:
- stable
- Files:
-
- 2 deleted
- 20 edited
- 5 copied
Legend:
- Unmodified
- Added
- Removed
-
stable
- Property svn:mergeinfo changed
/trunk/sources merged: 14862-14863,14911,14936,15156-15158,15164,15169,15207-15209,15225,15227,15234,15248
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Algorithms.DataAnalysis merged: 14862-14863,14911,14936,15156-15158,15164,15169,15207-15209,15225,15227,15234,15248
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r15142 r15249 240 240 <Compile Include="Interfaces\ISupportVectorMachineSolution.cs" /> 241 241 <Compile Include="Interfaces\IDataAnalysisAlgorithm.cs" /> 242 <Compile Include="KernelRidgeRegression\KernelFunctions\CicularKernel.cs" /> 243 <Compile Include="KernelRidgeRegression\KernelFunctions\GaussianKernel.cs" /> 244 <Compile Include="KernelRidgeRegression\KernelFunctions\IKernel.cs" /> 245 <Compile Include="KernelRidgeRegression\KernelFunctions\InverseMultiquadraticKernel.cs" /> 246 <Compile Include="KernelRidgeRegression\KernelFunctions\KernelBase.cs" /> 247 <Compile Include="KernelRidgeRegression\KernelFunctions\MultiquadraticKernel.cs" /> 248 <Compile Include="KernelRidgeRegression\KernelFunctions\PolysplineKernel.cs" /> 249 <Compile Include="KernelRidgeRegression\KernelFunctions\ThinPlatePolysplineKernel.cs" /> 250 <Compile Include="KernelRidgeRegression\KernelRidgeRegression.cs" /> 251 <Compile Include="KernelRidgeRegression\KernelRidgeRegressionModel.cs" /> 242 252 <Compile Include="kMeans\KMeansClustering.cs" /> 243 253 <Compile Include="kMeans\KMeansClusteringModel.cs" /> … … 305 315 </Compile> 306 316 <Compile Include="TimeSeries\AutoregressiveModeling.cs" /> 317 <Compile Include="TSNE\Distances\CosineDistance.cs" /> 318 <Compile Include="TSNE\Distances\DistanceBase.cs" /> 319 <Compile Include="TSNE\Distances\EuclideanDistance.cs" /> 320 <Compile Include="TSNE\Distances\IndexedItemDistance.cs" /> 321 <Compile Include="TSNE\Distances\ManhattanDistance.cs" /> 322 <Compile Include="TSNE\Distances\IDistance.cs" /> 323 <Compile Include="TSNE\PriorityQueue.cs" /> 324 <Compile Include="TSNE\SpacePartitioningTree.cs" /> 325 <Compile Include="TSNE\TSNEAlgorithm.cs" /> 326 <Compile Include="TSNE\TSNEStatic.cs" /> 327 <Compile Include="TSNE\TSNEUtils.cs" /> 328 <Compile Include="TSNE\VantagePointTree.cs" /> 307 329 </ItemGroup> 308 330 <ItemGroup> -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/CicularKernel.cs
r14892 r15249 25 25 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 26 26 27 namespace HeuristicLab.Algorithms.DataAnalysis .KernelRidgeRegression{27 namespace HeuristicLab.Algorithms.DataAnalysis { 28 28 [StorableClass] 29 29 [Item("CircularKernel", "A circular kernel function 2*pi*(acos(-d)-d*(1-d²)^(0.5)) where n = ||x-c|| and d = n/beta \n As described in http://crsouza.com/2010/03/17/kernel-functions-for-machine-learning-applications/")] 30 30 public class CircularKernel : KernelBase { 31 32 #region HLConstructors & Boilerplate33 31 [StorableConstructor] 34 32 protected CircularKernel(bool deserializing) : base(deserializing) { } 35 [StorableHook(HookType.AfterDeserialization)] 36 private void AfterDeserialization() { } 33 37 34 protected CircularKernel(CircularKernel original, Cloner cloner) : base(original, cloner) { } 38 public CircularKernel() { 39 } 35 36 public CircularKernel() { } 37 40 38 public override IDeepCloneable Clone(Cloner cloner) { 41 39 return new CircularKernel(this, cloner); 42 40 } 43 #endregion44 41 45 42 protected override double Get(double norm) { 43 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null"); 46 44 var beta = Beta.Value; 47 45 if (Math.Abs(beta) < double.Epsilon) return double.NaN; … … 53 51 // 4*pi*n^3 / (beta^4 * sqrt(1-n^2/beta^2) 54 52 protected override double GetGradient(double norm) { 53 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null"); 55 54 var beta = Beta.Value; 56 55 if (Math.Abs(beta) < double.Epsilon) return double.NaN; -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/GaussianKernel.cs
r14891 r15249 27 27 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 28 28 29 namespace HeuristicLab.Algorithms.DataAnalysis .KernelRidgeRegression{29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 30 [StorableClass] 31 31 [Item("GaussianKernel", "A kernel function that uses Gaussian function exp(-n²/beta²). As described in http://crsouza.com/2010/03/17/kernel-functions-for-machine-learning-applications/")] 32 32 public class GaussianKernel : KernelBase { 33 34 #region HLConstructors & Boilerplate35 33 [StorableConstructor] 36 34 protected GaussianKernel(bool deserializing) : base(deserializing) { } 37 [StorableHook(HookType.AfterDeserialization)] 38 private void AfterDeserialization() { } 35 39 36 protected GaussianKernel(GaussianKernel original, Cloner cloner) : base(original, cloner) { } 37 40 38 public GaussianKernel() { 41 39 } 40 42 41 public override IDeepCloneable Clone(Cloner cloner) { 43 42 return new GaussianKernel(this, cloner); 44 43 } 45 #endregion46 44 47 45 protected override double Get(double norm) { 46 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null"); 48 47 var beta = Beta.Value; 49 48 if (Math.Abs(beta) < double.Epsilon) return double.NaN; … … 54 53 //2 * n²/b²* 1/b * exp(-n²/b²) 55 54 protected override double GetGradient(double norm) { 55 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null"); 56 56 var beta = Beta.Value; 57 57 if (Math.Abs(beta) < double.Epsilon) return double.NaN; -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/IKernel.cs
r14887 r15249 21 21 22 22 23 namespace HeuristicLab.Algorithms.DataAnalysis.KernelRidgeRegression { 23 using System; 24 25 namespace HeuristicLab.Algorithms.DataAnalysis { 24 26 public interface IKernel : ICovarianceFunction { 25 27 double? Beta { get; set; } // a kernel parameter 26 28 IDistance Distance { get; set; } // the distance function to use 29 30 event EventHandler BetaChanged; 31 event EventHandler DistanceChanged; 27 32 } 28 33 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/InverseMultiquadraticKernel.cs
r14891 r15249 25 25 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 26 26 27 namespace HeuristicLab.Algorithms.DataAnalysis .KernelRidgeRegression{27 namespace HeuristicLab.Algorithms.DataAnalysis { 28 28 [StorableClass] 29 29 [Item("InverseMultiquadraticKernel", "A kernel function that uses the inverse multi-quadratic function 1 / sqrt(1+||x-c||²/beta²). Similar to http://crsouza.com/2010/03/17/kernel-functions-for-machine-learning-applications/ with beta as a scaling factor.")] 30 30 public class InverseMultiquadraticKernel : KernelBase { 31 private const double C = 1.0; 31 32 32 private const double C = 1.0;33 #region HLConstructors & Boilerplate34 33 [StorableConstructor] 35 34 protected InverseMultiquadraticKernel(bool deserializing) : base(deserializing) { } 36 [StorableHook(HookType.AfterDeserialization)] 37 private void AfterDeserialization() { } 35 38 36 protected InverseMultiquadraticKernel(InverseMultiquadraticKernel original, Cloner cloner) : base(original, cloner) { } 37 39 38 public InverseMultiquadraticKernel() { } 39 40 40 public override IDeepCloneable Clone(Cloner cloner) { 41 41 return new InverseMultiquadraticKernel(this, cloner); 42 42 } 43 #endregion44 43 45 44 protected override double Get(double norm) { 45 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null"); 46 46 var beta = Beta.Value; 47 47 if (Math.Abs(beta) < double.Epsilon) return double.NaN; … … 52 52 //n²/(b³(n²/b² + C)^1.5) 53 53 protected override double GetGradient(double norm) { 54 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null"); 54 55 var beta = Beta.Value; 55 56 if (Math.Abs(beta) < double.Epsilon) return double.NaN; -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/KernelBase.cs
r14887 r15249 28 28 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 29 29 30 namespace HeuristicLab.Algorithms.DataAnalysis .KernelRidgeRegression{30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 31 [StorableClass] 32 32 public abstract class KernelBase : ParameterizedNamedItem, IKernel { 33 33 34 #region Parameternames35 34 private const string DistanceParameterName = "Distance"; 36 #endregion 37 #region Parameterproperties 38 public ValueParameter<IDistance> DistanceParameter { 39 get { return Parameters[DistanceParameterName] as ValueParameter<IDistance>; } 35 36 public IValueParameter<IDistance> DistanceParameter { 37 get { return (IValueParameter<IDistance>)Parameters[DistanceParameterName]; } 40 38 } 41 39 42 40 [Storable] 43 public double? Beta { get; set; } 44 #endregion 45 #region Properties 41 private double? beta; 42 public double? Beta { 43 get { return beta; } 44 set { 45 if (value != beta) { 46 beta = value; 47 RaiseBetaChanged(); 48 } 49 } 50 } 51 46 52 public IDistance Distance { 47 53 get { return DistanceParameter.Value; } 48 set { DistanceParameter.Value = value; } 54 set { 55 if (DistanceParameter.Value != value) { 56 DistanceParameter.Value = value; 57 } 58 } 49 59 } 50 51 #endregion52 60 53 61 [StorableConstructor] 54 62 protected KernelBase(bool deserializing) : base(deserializing) { } 55 [StorableHook(HookType.AfterDeserialization)]56 private void AfterDeserialization() { }57 63 58 64 protected KernelBase(KernelBase original, Cloner cloner) 59 65 : base(original, cloner) { 60 Beta = original.Beta; 66 beta = original.beta; 67 RegisterEvents(); 61 68 } 62 69 … … 64 71 Parameters.Add(new ValueParameter<IDistance>(DistanceParameterName, "The distance function used for kernel calculation")); 65 72 DistanceParameter.Value = new EuclideanDistance(); 73 RegisterEvents(); 74 } 75 76 [StorableHook(HookType.AfterDeserialization)] 77 private void AfterDeserialization() { 78 RegisterEvents(); 79 } 80 81 private void RegisterEvents() { 82 DistanceParameter.ValueChanged += (sender, args) => RaiseDistanceChanged(); 66 83 } 67 84 … … 82 99 public ParameterizedCovarianceFunction GetParameterizedCovarianceFunction(double[] p, int[] columnIndices) { 83 100 if (p.Length != GetNumberOfParameters(columnIndices.Length)) throw new ArgumentException("Illegal parametrization"); 84 var myClone = (KernelBase)Clone( new Cloner());101 var myClone = (KernelBase)Clone(); 85 102 myClone.SetParameter(p); 86 103 var cov = new ParameterizedCovarianceFunction { … … 101 118 return dist.Get(r1, r2); 102 119 } 120 121 #region events 122 public event EventHandler BetaChanged; 123 public event EventHandler DistanceChanged; 124 125 protected void RaiseBetaChanged() { 126 var handler = BetaChanged; 127 if (handler != null) handler(this, EventArgs.Empty); 128 } 129 130 protected void RaiseDistanceChanged() { 131 var handler = DistanceChanged; 132 if (handler != null) handler(this, EventArgs.Empty); 133 } 134 #endregion 103 135 } 104 136 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/MultiquadraticKernel.cs
r14891 r15249 25 25 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 26 26 27 namespace HeuristicLab.Algorithms.DataAnalysis .KernelRidgeRegression{27 namespace HeuristicLab.Algorithms.DataAnalysis { 28 28 [StorableClass] 29 29 // conditionally positive definite. (need to add polynomials) see http://num.math.uni-goettingen.de/schaback/teaching/sc.pdf … … 32 32 33 33 private const double C = 1.0; 34 #region HLConstructors & Boilerplate 34 35 35 [StorableConstructor] 36 36 protected MultiquadraticKernel(bool deserializing) : base(deserializing) { } 37 [StorableHook(HookType.AfterDeserialization)]38 private void AfterDeserialization() { }39 protected MultiquadraticKernel(MultiquadraticKernel original, Cloner cloner)40 : base(original, cloner) { }41 37 42 public MultiquadraticKernel() { 43 } 38 protected MultiquadraticKernel(MultiquadraticKernel original, Cloner cloner) : base(original, cloner) { } 39 40 public MultiquadraticKernel() { } 41 44 42 public override IDeepCloneable Clone(Cloner cloner) { 45 43 return new MultiquadraticKernel(this, cloner); 46 44 } 47 #endregion 45 48 46 protected override double Get(double norm) { 47 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null"); 49 48 var beta = Beta.Value; 50 49 if (Math.Abs(beta) < double.Epsilon) return double.NaN; … … 55 54 //-n²/(d³*sqrt(C+n²/d²)) 56 55 protected override double GetGradient(double norm) { 56 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null"); 57 57 var beta = Beta.Value; 58 58 if (Math.Abs(beta) < double.Epsilon) return double.NaN; -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/PolysplineKernel.cs
r14892 r15249 27 27 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 28 28 29 namespace HeuristicLab.Algorithms.DataAnalysis .KernelRidgeRegression{29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 30 [StorableClass] 31 31 // conditionally positive definite. (need to add polynomials) see http://num.math.uni-goettingen.de/schaback/teaching/sc.pdf … … 33 33 public class PolysplineKernel : KernelBase { 34 34 35 #region Parameternames36 35 private const string DegreeParameterName = "Degree"; 37 #endregion 38 #region Parameterproperties 36 39 37 public IFixedValueParameter<DoubleValue> DegreeParameter { 40 get { return Parameters[DegreeParameterName] as IFixedValueParameter<DoubleValue>; }38 get { return (IFixedValueParameter<DoubleValue>)Parameters[DegreeParameterName]; } 41 39 } 42 #endregion 43 #region Properties 40 44 41 public DoubleValue Degree { 45 42 get { return DegreeParameter.Value; } 46 43 } 47 #endregion48 44 49 #region HLConstructors & Boilerplate50 45 [StorableConstructor] 51 46 protected PolysplineKernel(bool deserializing) : base(deserializing) { } 52 [StorableHook(HookType.AfterDeserialization)] 53 private void AfterDeserialization() { } 47 54 48 protected PolysplineKernel(PolysplineKernel original, Cloner cloner) : base(original, cloner) { } 49 55 50 public PolysplineKernel() { 56 51 Parameters.Add(new FixedValueParameter<DoubleValue>(DegreeParameterName, "The degree of the kernel. Needs to be greater than zero.", new DoubleValue(1.0))); 57 52 } 53 58 54 public override IDeepCloneable Clone(Cloner cloner) { 59 55 return new PolysplineKernel(this, cloner); 60 56 } 61 #endregion62 57 63 58 protected override double Get(double norm) { 59 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null"); 64 60 var beta = Beta.Value; 65 61 if (Math.Abs(beta) < double.Epsilon) return double.NaN; … … 70 66 //-degree/beta * (norm/beta)^degree 71 67 protected override double GetGradient(double norm) { 68 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null"); 72 69 var beta = Beta.Value; 73 70 if (Math.Abs(beta) < double.Epsilon) return double.NaN; -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelFunctions/ThinPlatePolysplineKernel.cs
r14892 r15249 27 27 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 28 28 29 namespace HeuristicLab.Algorithms.DataAnalysis .KernelRidgeRegression{29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 30 [StorableClass] 31 31 // conditionally positive definite. (need to add polynomials) see http://num.math.uni-goettingen.de/schaback/teaching/sc.pdf … … 33 33 public class ThinPlatePolysplineKernel : KernelBase { 34 34 35 #region Parameternames36 35 private const string DegreeParameterName = "Degree"; 37 #endregion 38 #region Parameterproperties 36 39 37 public IFixedValueParameter<DoubleValue> DegreeParameter { 40 get { return Parameters[DegreeParameterName] as IFixedValueParameter<DoubleValue>; }38 get { return (IFixedValueParameter<DoubleValue>)Parameters[DegreeParameterName]; } 41 39 } 42 #endregion43 #region Properties44 40 public DoubleValue Degree { 45 41 get { return DegreeParameter.Value; } 46 42 } 47 #endregion48 43 49 #region HLConstructors & Boilerplate50 44 [StorableConstructor] 51 45 protected ThinPlatePolysplineKernel(bool deserializing) : base(deserializing) { } 52 [StorableHook(HookType.AfterDeserialization)] 53 private void AfterDeserialization() { } 46 54 47 protected ThinPlatePolysplineKernel(ThinPlatePolysplineKernel original, Cloner cloner) : base(original, cloner) { } 48 55 49 public ThinPlatePolysplineKernel() { 56 50 Parameters.Add(new FixedValueParameter<DoubleValue>(DegreeParameterName, "The degree of the kernel. Needs to be greater than zero.", new DoubleValue(2.0))); 57 51 } 52 58 53 public override IDeepCloneable Clone(Cloner cloner) { 59 54 return new ThinPlatePolysplineKernel(this, cloner); 60 55 } 61 #endregion62 56 63 57 protected override double Get(double norm) { 58 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance while Beta is null"); 64 59 var beta = Beta.Value; 65 60 if (Math.Abs(beta) < double.Epsilon) return double.NaN; … … 71 66 // (Degree/beta) * (norm/beta)^Degree * log(norm/beta) 72 67 protected override double GetGradient(double norm) { 68 if (Beta == null) throw new InvalidOperationException("Can not calculate kernel distance gradient while Beta is null"); 73 69 var beta = Beta.Value; 74 70 if (Math.Abs(beta) < double.Epsilon) return double.NaN; -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelRidgeRegression.cs
r14888 r15249 29 29 using HeuristicLab.Parameters; 30 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 31 using HeuristicLab.PluginInfrastructure; 31 32 using HeuristicLab.Problems.DataAnalysis; 32 33 33 namespace HeuristicLab.Algorithms.DataAnalysis .KernelRidgeRegression{34 namespace HeuristicLab.Algorithms.DataAnalysis { 34 35 [Item("Kernel Ridge Regression", "Kernelized ridge regression e.g. for radial basis function (RBF) regression.")] 35 36 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 100)] … … 57 58 58 59 #region parameter properties 59 public ValueParameter<IKernel> KernelParameter {60 get { return ( ValueParameter<IKernel>)Parameters[KernelParameterName]; }60 public IConstrainedValueParameter<IKernel> KernelParameter { 61 get { return (IConstrainedValueParameter<IKernel>)Parameters[KernelParameterName]; } 61 62 } 62 63 … … 102 103 public KernelRidgeRegression() { 103 104 Problem = new RegressionProblem(); 104 Parameters.Add(new ValueParameter<IKernel>(KernelParameterName, "The kernel", new GaussianKernel())); 105 var values = new ItemSet<IKernel>(ApplicationManager.Manager.GetInstances<IKernel>()); 106 Parameters.Add(new ConstrainedValueParameter<IKernel>(KernelParameterName, "The kernel", values, values.OfType<GaussianKernel>().FirstOrDefault())); 105 107 Parameters.Add(new FixedValueParameter<BoolValue>(ScaleInputVariablesParameterName, "Set to true if the input variables should be scaled to the interval [0..1]", new BoolValue(true))); 106 108 Parameters.Add(new FixedValueParameter<DoubleValue>(LambdaParameterName, "The log10-transformed weight for the regularization term lambda [-inf..+inf]. Small values produce more complex models, large values produce models with larger errors. Set to very small value (e.g. -1.0e15) for almost exact approximation", new DoubleValue(-2))); 107 Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta parameter for the kernel", new DoubleValue(2)));109 Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The inverse width of the kernel ]0..+inf]. The distance between points is divided by this value before being plugged into the kernel.", new DoubleValue(2))); 108 110 } 109 [StorableHook(HookType.AfterDeserialization)]110 private void AfterDeserialization() { }111 111 112 112 public override IDeepCloneable Clone(Cloner cloner) { … … 125 125 126 126 public static IRegressionSolution CreateRadialBasisRegressionSolution(IRegressionProblemData problemData, ICovarianceFunction kernel, double lambda, bool scaleInputs, out double rmsError, out double looCvRMSE) { 127 var model = new KernelRidgeRegressionModel(problemData.Dataset, problemData.TargetVariable, problemData.AllowedInputVariables, problemData.TrainingIndices, scaleInputs, kernel, lambda);127 var model = KernelRidgeRegressionModel.Create(problemData.Dataset, problemData.TargetVariable, problemData.AllowedInputVariables, problemData.TrainingIndices, scaleInputs, kernel, lambda); 128 128 rmsError = double.NaN; 129 129 if (problemData.TestIndices.Any()) { -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelRidgeRegressionModel.cs
r14892 r15249 28 28 using HeuristicLab.Problems.DataAnalysis; 29 29 30 namespace HeuristicLab.Algorithms.DataAnalysis .KernelRidgeRegression{30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 31 [StorableClass] 32 32 [Item("KernelRidgeRegressionModel", "A kernel ridge regression model")] … … 39 39 private readonly string[] allowedInputVariables; 40 40 public string[] AllowedInputVariables { 41 get { return allowedInputVariables ; }41 get { return allowedInputVariables.ToArray(); } 42 42 } 43 43 … … 81 81 yOffset = original.yOffset; 82 82 yScale = original.yScale; 83 if (original.kernel != null) 84 kernel = cloner.Clone(original.kernel); 83 kernel = original.kernel; 85 84 } 86 85 public override IDeepCloneable Clone(Cloner cloner) { … … 88 87 } 89 88 90 public KernelRidgeRegressionModel(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows, 91 bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) : base(targetVariable) { 92 if (kernel.GetNumberOfParameters(allowedInputVariables.Count()) > 0) throw new ArgumentException("All parameters in the kernel function must be specified."); 93 name = ItemName; 94 description = ItemDescription; 95 this.allowedInputVariables = allowedInputVariables.ToArray(); 89 public static KernelRidgeRegressionModel Create(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows, 90 bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) { 96 91 var trainingRows = rows.ToArray(); 97 this.kernel = (ICovarianceFunction)kernel.Clone();98 this.lambda = lambda; 92 var model = new KernelRidgeRegressionModel(dataset, targetVariable, allowedInputVariables, trainingRows, scaleInputs, kernel, lambda); 93 99 94 try { 100 if (scaleInputs) 101 scaling = CreateScaling(dataset, trainingRows); 102 trainX = ExtractData(dataset, trainingRows, scaling); 95 int info; 96 int n = model.trainX.GetLength(0); 97 alglib.densesolverreport denseSolveRep; 98 var gram = BuildGramMatrix(model.trainX, lambda, kernel); 99 var l = new double[n, n]; 100 Array.Copy(gram, l, l.Length); 101 102 double[] alpha = new double[n]; 103 double[,] invG; 103 104 var y = dataset.GetDoubleValues(targetVariable, trainingRows).ToArray(); 104 yOffset = y.Average();105 yScale = 1.0 / y.StandardDeviation();106 105 for (int i = 0; i < y.Length; i++) { 107 y[i] -= yOffset; 108 y[i] *= yScale; 109 } 110 int info; 111 int n = trainX.GetLength(0); 112 alglib.densesolverreport denseSolveRep; 113 var gram = BuildGramMatrix(trainX, lambda); 114 var l = new double[n, n]; Array.Copy(gram, l, l.Length); 115 116 double[,] invG; 106 y[i] -= model.yOffset; 107 y[i] *= model.yScale; 108 } 117 109 // cholesky decomposition 118 110 var res = alglib.trfac.spdmatrixcholesky(ref l, n, false); 119 if (res == false) { //t hrow new ArgumentException("Could not decompose matrix. Is it quadratic symmetric positive definite?");111 if (res == false) { //try lua decomposition if cholesky faild 120 112 int[] pivots; 121 113 var lua = new double[n, n]; … … 127 119 invG = lua; // rename 128 120 alglib.rmatrixluinverse(ref invG, pivots, n, out info, out rep); 129 if (info != 1) throw new ArgumentException("Could not invert Gram matrix.");130 121 } else { 131 122 alglib.spdmatrixcholeskysolve(l, n, false, y, out info, out denseSolveRep, out alpha); … … 135 126 invG = l; // rename 136 127 alglib.spdmatrixcholeskyinverse(ref invG, n, false, out info, out rep); 137 if (info != 1) throw new ArgumentException("Could not invert Gram matrix.");138 }128 } 129 if (info != 1) throw new ArgumentException("Could not invert Gram matrix."); 139 130 140 131 var ssqLooError = 0.0; … … 142 133 var pred_i = Util.ScalarProd(Util.GetRow(gram, i).ToArray(), alpha); 143 134 var looPred_i = pred_i - alpha[i] / invG[i, i]; 144 var error = (y[i] - looPred_i) / yScale;135 var error = (y[i] - looPred_i) / model.yScale; 145 136 ssqLooError += error * error; 146 137 } 147 LooCvRMSE = Math.Sqrt(ssqLooError / n); 138 139 Array.Copy(alpha, model.alpha, n); 140 model.LooCvRMSE = Math.Sqrt(ssqLooError / n); 148 141 } catch (alglib.alglibexception ae) { 149 142 // wrap exception so that calling code doesn't have to know about alglib implementation 150 143 throw new ArgumentException("There was a problem in the calculation of the kernel ridge regression model", ae); 151 144 } 145 return model; 146 } 147 148 private KernelRidgeRegressionModel(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, int[] rows, 149 bool scaleInputs, ICovarianceFunction kernel, double lambda = 0.1) : base(targetVariable) { 150 this.allowedInputVariables = allowedInputVariables.ToArray(); 151 if (kernel.GetNumberOfParameters(this.allowedInputVariables.Length) > 0) throw new ArgumentException("All parameters in the kernel function must be specified."); 152 name = ItemName; 153 description = ItemDescription; 154 155 this.kernel = (ICovarianceFunction)kernel.Clone(); 156 this.lambda = lambda; 157 if (scaleInputs) scaling = CreateScaling(dataset, rows, this.allowedInputVariables); 158 trainX = ExtractData(dataset, rows, this.allowedInputVariables, scaling); 159 var y = dataset.GetDoubleValues(targetVariable, rows).ToArray(); 160 yOffset = y.Average(); 161 yScale = 1.0 / y.StandardDeviation(); 162 alpha = new double[trainX.GetLength(0)]; 152 163 } 153 164 … … 155 166 #region IRegressionModel Members 156 167 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 157 var newX = ExtractData(dataset, rows, scaling);168 var newX = ExtractData(dataset, rows, allowedInputVariables, scaling); 158 169 var dim = newX.GetLength(1); 159 170 var cov = kernel.GetParameterizedCovarianceFunction(new double[0], Enumerable.Range(0, dim).ToArray()); … … 175 186 176 187 #region helpers 177 private double[,] BuildGramMatrix(double[,] data, double lambda) {188 private static double[,] BuildGramMatrix(double[,] data, double lambda, ICovarianceFunction kernel) { 178 189 var n = data.GetLength(0); 179 190 var dim = data.GetLength(1); … … 190 201 } 191 202 192 private ITransformation<double>[] CreateScaling(IDataset dataset, int[] rows) {193 var trans = new ITransformation<double>[allowedInputVariables. Length];203 private static ITransformation<double>[] CreateScaling(IDataset dataset, int[] rows, IReadOnlyCollection<string> allowedInputVariables) { 204 var trans = new ITransformation<double>[allowedInputVariables.Count]; 194 205 int i = 0; 195 206 foreach (var variable in allowedInputVariables) { … … 205 216 } 206 217 207 private double[,] ExtractData(IDataset dataset, IEnumerable<int> rows, ITransformation<double>[] scaling = null) {218 private static double[,] ExtractData(IDataset dataset, IEnumerable<int> rows, IReadOnlyCollection<string> allowedInputVariables, ITransformation<double>[] scaling = null) { 208 219 double[][] variables; 209 220 if (scaling != null) { -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/CosineDistance.cs
r15209 r15249 30 30 31 31 /// <summary> 32 /// The angluar distance as defined as a normalized distance measure dependent on the angle between two vectors. 33 /// It is designed for vectors with all positive coordinates. 32 /// The angular distance as defined as a normalized distance measure dependent on the angle between two vectors. 34 33 /// </summary> 35 34 [StorableClass] 36 [Item("CosineDistance", "The ang luar distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")]35 [Item("CosineDistance", "The angular distance as defined as a normalized distance measure dependent on the angle between two vectors.")] 37 36 public class CosineDistance : DistanceBase<IEnumerable<double>> { 38 37 -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs
r14767 r15249 20 20 #endregion 21 21 22 using System.Collections; 22 23 using System.Collections.Generic; 23 24 using HeuristicLab.Common; … … 29 30 public abstract class DistanceBase<T> : Item, IDistance<T> { 30 31 31 #region HLConstructors & Boilerplate32 #region HLConstructors & Cloning 32 33 [StorableConstructor] 33 34 protected DistanceBase(bool deserializing) : base(deserializing) { } … … 42 43 } 43 44 44 private class DistanceComparer : IComparer<T> { 45 public double Get(object x, object y) { 46 return Get((T)x, (T)y); 47 } 48 49 public IComparer GetDistanceComparer(object item) { 50 return new DistanceComparer((T)item, this); 51 } 52 53 private class DistanceComparer : IComparer<T>, IComparer { 45 54 private readonly T item; 46 55 private readonly IDistance<T> dist; … … 54 63 return dist.Get(x, item).CompareTo(dist.Get(y, item)); 55 64 } 65 66 public int Compare(object x, object y) { 67 return Compare((T)x, (T)y); 68 } 56 69 } 57 70 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclideanDistance.cs
r14784 r15249 32 32 public class EuclideanDistance : DistanceBase<IEnumerable<double>> { 33 33 34 #region HLConstructors & Boilerplate34 #region HLConstructors & Cloning 35 35 [StorableConstructor] 36 36 protected EuclideanDistance(bool deserializing) : base(deserializing) { } … … 40 40 #endregion 41 41 42 public static double GetDistance(double[] point1, double[] point2) { 43 if (point1.Length != point2.Length) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); 44 return Math.Sqrt(point1.Zip(point2, (a1, b1) => (a1 - b1) * (a1 - b1)).Sum()); 42 public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) { 43 if (point1.Count != point2.Count) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); 44 var sum = 0.0; 45 for (var i = 0; i < point1.Count; i++) { 46 var d = point1[i] - point2[i]; 47 sum += d * d; 48 } 49 50 return Math.Sqrt(sum); 45 51 } 46 52 -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/ManhattanDistance.cs
r14911 r15249 32 32 public class ManhattanDistance : DistanceBase<IEnumerable<double>> { 33 33 34 #region HLConstructors & Boilerplate34 #region HLConstructors & Cloning 35 35 [StorableConstructor] 36 36 protected ManhattanDistance(bool deserializing) : base(deserializing) { } … … 47 47 public static double GetDistance(double[] point1, double[] point2) { 48 48 if (point1.Length != point2.Length) throw new ArgumentException("Manhattan distance not defined on vectors of different length"); 49 return point1.Zip(point2, (a1, b1) => Math.Abs(a1 - b1)).Sum(); 49 var sum = 0.0; 50 for (var i = 0; i < point1.Length; i++) 51 sum += Math.Abs(point1[i] + point2[i]); 52 return sum; 50 53 } 51 54 -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/PriorityQueue.cs
r14785 r15249 25 25 26 26 namespace HeuristicLab.Algorithms.DataAnalysis { 27 // TODO: move to HeuristicLab.Collections28 27 29 28 // Code Taken from branch: RoutePlanning, Heap4 … … 63 62 64 63 public KeyValuePair<TK, TV> PeekMin() { 65 if(size == 0) { 66 throw new InvalidOperationException("Heap is empty"); 67 } 64 if (size == 0) throw new InvalidOperationException("Heap is empty"); 68 65 return new KeyValuePair<TK, TV>(data[1].Key, data[1].Value); 69 66 } … … 86 83 size = sz - 1; 87 84 finalLayerDist++; 88 if (finalLayerDist == finalLayerSize) {85 if (finalLayerDist == finalLayerSize) { 89 86 finalLayerSize >>= 2; 90 87 finalLayerDist = 0; 91 88 } 92 89 93 while (succ < sz) {90 while (succ < sz) { 94 91 var minKey = data[succ].Key; 95 92 var delta = 0; 96 93 97 var otherKey = data[succ + 1].Key; 98 if(otherKey.CompareTo(minKey) < 0) { 94 for (var i = 1; i <= 3; i++) { 95 var otherKey = data[succ + i].Key; 96 if (otherKey.CompareTo(minKey) >= 0) continue; 99 97 minKey = otherKey; 100 delta = 1;98 delta = i; 101 99 } 102 otherKey = data[succ + 2].Key; 103 if(otherKey.CompareTo(minKey) < 0) { 104 minKey = otherKey; 105 delta = 2; 106 } 107 otherKey = data[succ + 3].Key; 108 if(otherKey.CompareTo(minKey) < 0) { 109 minKey = otherKey; 110 delta = 3; 111 } 100 112 101 succ += delta; 113 102 layerPos += delta; … … 136 125 pred = pred - layerDist; // finally preds index 137 126 138 while (data[pred].Key.CompareTo(bubble) > 0) { // must terminate since inf at root127 while (data[pred].Key.CompareTo(bubble) > 0) { // must terminate since inf at root 139 128 data[hole].Key = data[pred].Key; 140 129 data[hole].Value = data[pred].Value; … … 149 138 data[hole].Key = bubble; 150 139 data[hole].Value = data[sz].Value; 151 152 140 data[sz].Key = Supremum; // mark as deleted 153 141 } … … 158 146 finalLayerDist--; 159 147 160 if (finalLayerDist == -1) { // layer full161 // start next layer148 if (finalLayerDist == -1) { // layer full 149 // start next layer 162 150 finalLayerSize <<= 2; 163 151 finalLayerDist = finalLayerSize - 1; … … 172 160 pred = pred - layerDist; // finally preds index 173 161 var predKey = data[pred].Key; 174 while (predKey.CompareTo(key) > 0) {162 while (predKey.CompareTo(key) > 0) { 175 163 data[hole].Key = predKey; 176 164 data[hole].Value = data[pred].Value; … … 195 183 var sup = Supremum; 196 184 var cap = capacity; 197 for (var i = 1; i <= cap; i++) {185 for (var i = 1; i <= cap; i++) { 198 186 data[i].Key = sup; 199 187 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/SpacePartitioningTree.cs
r14862 r15249 56 56 using System; 57 57 using System.Collections.Generic; 58 using System.Linq;59 using HeuristicLab.Common;60 58 61 59 namespace HeuristicLab.Algorithms.DataAnalysis { … … 63 61 /// Space partitioning tree (SPTree) 64 62 /// </summary> 65 public class SpacePartitioningTree : ISpacePartitioningTree { 66 private const uint QT_NODE_CAPACITY = 1; 67 68 private double[] buff; 69 private SpacePartitioningTree parent; 63 internal class SpacePartitioningTree { 64 private const uint QtNodeCapacity = 1; 65 66 #region Fields 70 67 private int dimension; 71 68 private bool isLeaf; … … 80 77 // Indices in this space-partitioning tree node, corresponding center-of-mass, and list of all children 81 78 private double[] centerOfMass; 82 private readonly int[] index = new int[Q T_NODE_CAPACITY];79 private readonly int[] index = new int[QtNodeCapacity]; 83 80 84 81 // Children 85 82 private SpacePartitioningTree[] children; 86 83 private uint noChildren; 84 #endregion 87 85 88 86 public SpacePartitioningTree(double[,] inpData) { … … 104 102 var width = new double[d]; 105 103 for (var i = 0; i < d; i++) width[i] = Math.Max(maxY[i] - meanY[i], meanY[i] - minY[i]) + 1e-5; 106 Init( null,inpData, meanY, width);104 Init(inpData, meanY, width); 107 105 Fill(n); 108 106 } 109 107 110 public SpacePartitioningTree(double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) { 111 Init(null, inpData, impCorner, impWith); 112 } 113 public SpacePartitioningTree(SpacePartitioningTree parent, double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) { 114 Init(parent, inpData, impCorner, impWith); 115 } 116 117 public ISpacePartitioningTree GetParent() { 118 return parent; 108 private SpacePartitioningTree(double[,] inpData, IEnumerable<double> impCorner, IEnumerable<double> impWith) { 109 Init(inpData, impCorner, impWith); 119 110 } 120 111 … … 132 123 133 124 // If there is space in this quad tree and it is a leaf, add the object here 134 if (isLeaf && size < Q T_NODE_CAPACITY) {125 if (isLeaf && size < QtNodeCapacity) { 135 126 index[size] = newIndex; 136 127 size++; … … 138 129 } 139 130 140 // Don't add duplicates for now (this is not very nice)131 // Don't add duplicates 141 132 var anyDuplicate = false; 142 133 for (uint n = 0; n < size; n++) { … … 161 152 } 162 153 163 public void Subdivide() {164 // Create new children165 var newCorner = new double[dimension];166 var newWidth = new double[dimension];167 for (var i = 0; i < noChildren; i++) {168 var div = 1;169 for (var d = 0; d < dimension; d++) {170 newWidth[d] = .5 * boundary.GetWidth(d);171 if ((i / div) % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d);172 else newCorner[d] = boundary.GetCorner(d) + .5 * boundary.GetWidth(d);173 div *= 2;174 }175 children[i] = new SpacePartitioningTree(this, data, newCorner, newWidth);176 }177 178 // Move existing points to correct children179 for (var i = 0; i < size; i++) {180 var success = false;181 for (var j = 0; j < noChildren; j++) {182 if (!success) success = children[j].Insert(index[i]);183 }184 index[i] = -1; // as in tSNE implementation by van der Maaten185 }186 187 // Empty parent node188 size = 0;189 isLeaf = false;190 }191 192 public bool IsCorrect() {193 var row = new double[dimension];194 for (var n = 0; n < size; n++) {195 Buffer.BlockCopy(data, sizeof(double) * dimension * index[n], row, 0, sizeof(double) * dimension);196 if (!boundary.ContainsPoint(row)) return false;197 }198 if (isLeaf) return true;199 var correct = true;200 for (var i = 0; i < noChildren; i++) correct = correct && children[i].IsCorrect();201 return correct;202 }203 204 public void GetAllIndices(int[] indices) {205 GetAllIndices(indices, 0);206 }207 208 public int GetAllIndices(int[] indices, int loc) {209 // Gather indices in current quadrant210 for (var i = 0; i < size; i++) indices[loc + i] = index[i];211 loc += (int)size;212 // Gather indices in children213 if (isLeaf) return loc;214 for (var i = 0; i < noChildren; i++) loc = children[i].GetAllIndices(indices, loc);215 return loc;216 }217 218 public int GetDepth() {219 return isLeaf ? 1 : 1 + children.Max(x => x.GetDepth());220 }221 222 154 public void ComputeNonEdgeForces(int pointIndex, double theta, double[] negF, ref double sumQ) { 223 155 // Make sure that we spend no time on empty nodes or self-interactions … … 226 158 // Compute distance between point and center-of-mass 227 159 var D = .0; 160 var buff = new double[dimension]; 228 161 for (var d = 0; d < dimension; d++) buff[d] = data[pointIndex, d] - centerOfMass[d]; 229 162 for (var d = 0; d < dimension; d++) D += buff[d] * buff[d]; … … 233 166 for (var d = 0; d < dimension; d++) { 234 167 var curWidth = boundary.GetWidth(d); 235 maxWidth = (maxWidth > curWidth)? maxWidth : curWidth;168 maxWidth = maxWidth > curWidth ? maxWidth : curWidth; 236 169 } 237 170 if (isLeaf || maxWidth / Math.Sqrt(D) < theta) { … … 250 183 } 251 184 252 // does not use the tree 253 public void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF) { 185 public static void ComputeEdgeForces(int[] rowP, int[] colP, double[] valP, int n, double[,] posF, double[,] data, int dimension) { 254 186 // Loop over all edges in the graph 255 187 for (var k = 0; k < n; k++) { … … 259 191 // uses squared distance 260 192 var d = 1.0; 193 var buff = new double[dimension]; 261 194 for (var j = 0; j < dimension; j++) buff[j] = data[k, j] - data[colP[i], j]; 262 195 for (var j = 0; j < dimension; j++) d += buff[j] * buff[j]; … … 273 206 for (var i = 0; i < n; i++) Insert(i); 274 207 } 275 private void Init(SpacePartitioningTree p, double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) { 276 parent = p;208 209 private void Init(double[,] inpData, IEnumerable<double> inpCorner, IEnumerable<double> inpWidth) { 277 210 dimension = inpData.GetLength(1); 278 211 noChildren = 2; … … 283 216 cumulativeSize = 0; 284 217 boundary = new Cell((uint)dimension); 218 285 219 inpCorner.ForEach((i, x) => boundary.SetCorner(i, x)); 286 220 inpWidth.ForEach((i, x) => boundary.SetWidth(i, x)); … … 288 222 children = new SpacePartitioningTree[noChildren]; 289 223 centerOfMass = new double[dimension]; 290 buff = new double[dimension]; 291 224 } 225 226 private void Subdivide() { 227 // Create new children 228 var newCorner = new double[dimension]; 229 var newWidth = new double[dimension]; 230 for (var i = 0; i < noChildren; i++) { 231 var div = 1; 232 for (var d = 0; d < dimension; d++) { 233 newWidth[d] = .5 * boundary.GetWidth(d); 234 if (i / div % 2 == 1) newCorner[d] = boundary.GetCorner(d) - .5 * boundary.GetWidth(d); 235 else newCorner[d] = boundary.GetCorner(d) + .5 * boundary.GetWidth(d); 236 div *= 2; 237 } 238 children[i] = new SpacePartitioningTree(data, newCorner, newWidth); 239 } 240 241 // Move existing points to correct children 242 for (var i = 0; i < size; i++) { 243 var success = false; 244 for (var j = 0; j < noChildren; j++) { 245 if (!success) success = children[j].Insert(index[i]); 246 } 247 index[i] = -1; // as in tSNE implementation by van der Maaten 248 } 249 // Empty parent node 250 size = 0; 251 isLeaf = false; 292 252 } 293 253 #endregion 294 295 254 296 255 private class Cell { -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs
r14863 r15249 32 32 using HeuristicLab.Parameters; 33 33 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 34 using HeuristicLab.PluginInfrastructure; 34 35 using HeuristicLab.Problems.DataAnalysis; 35 36 using HeuristicLab.Random; … … 41 42 /// </summary> 42 43 [Item("tSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low " + 43 "dimensional space to allow visual cluster identification. ")]44 "dimensional space to allow visual cluster identification. Implemented similar to: https://lvdmaaten.github.io/tsne/#implementations (Barnes-Hut t-SNE). Described in : https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf")] 44 45 [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)] 45 46 [StorableClass] … … 57 58 58 59 #region parameter names 59 private const string Distance ParameterName = "DistanceFunction";60 private const string DistanceFunctionParameterName = "DistanceFunction"; 60 61 private const string PerplexityParameterName = "Perplexity"; 61 62 private const string ThetaParameterName = "Theta"; … … 69 70 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 70 71 private const string SeedParameterName = "Seed"; 71 private const string Classes ParameterName = "ClassNames";72 private const string ClassesNameParameterName = "ClassesName"; 72 73 private const string NormalizationParameterName = "Normalization"; 73 74 private const string UpdateIntervalParameterName = "UpdateInterval"; … … 86 87 get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; } 87 88 } 88 public IFixedValueParameter< DoubleValue> ThetaParameter {89 get { return Parameters[ThetaParameterName] as IFixedValueParameter< DoubleValue>; }89 public IFixedValueParameter<PercentValue> ThetaParameter { 90 get { return Parameters[ThetaParameterName] as IFixedValueParameter<PercentValue>; } 90 91 } 91 92 public IFixedValueParameter<IntValue> NewDimensionsParameter { 92 93 get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; } 93 94 } 94 public I ValueParameter<IDistance<double[]>> DistanceParameter {95 get { return Parameters[Distance ParameterName] as IValueParameter<IDistance<double[]>>; }95 public IConstrainedValueParameter<IDistance<double[]>> DistanceFunctionParameter { 96 get { return Parameters[DistanceFunctionParameterName] as IConstrainedValueParameter<IDistance<double[]>>; } 96 97 } 97 98 public IFixedValueParameter<IntValue> MaxIterationsParameter { … … 119 120 get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; } 120 121 } 121 public I FixedValueParameter<StringValue> ClassesParameter {122 get { return Parameters[Classes ParameterName] as IFixedValueParameter<StringValue>; }122 public IConstrainedValueParameter<StringValue> ClassesNameParameter { 123 get { return Parameters[ClassesNameParameterName] as IConstrainedValueParameter<StringValue>; } 123 124 } 124 125 public IFixedValueParameter<BoolValue> NormalizationParameter { … … 131 132 132 133 #region Properties 133 public IDistance<double[]> Distance {134 get { return Distance Parameter.Value; }134 public IDistance<double[]> DistanceFunction { 135 get { return DistanceFunctionParameter.Value; } 135 136 } 136 137 public double Perplexity { … … 178 179 set { SeedParameter.Value.Value = value; } 179 180 } 180 public string Classes {181 get { return Classes Parameter.Value.Value; }182 set { Classes Parameter.Value.Value = value; }181 public string ClassesName { 182 get { return ClassesNameParameter.Value != null ? ClassesNameParameter.Value.Value : null; } 183 set { ClassesNameParameter.Value.Value = value; } 183 184 } 184 185 public bool Normalization { … … 198 199 199 200 private TSNEAlgorithm(TSNEAlgorithm original, Cloner cloner) : base(original, cloner) { 200 if (original.dataRowNames!=null)201 this.dataRowNames = new Dictionary<string, List<int>>(original.dataRowNames);201 if (original.dataRowNames != null) 202 this.dataRowNames = new Dictionary<string, List<int>>(original.dataRowNames); 202 203 if (original.dataRows != null) 203 204 this.dataRows = original.dataRows.ToDictionary(kvp => kvp.Key, kvp => cloner.Clone(kvp.Value)); … … 208 209 public override IDeepCloneable Clone(Cloner cloner) { return new TSNEAlgorithm(this, cloner); } 209 210 public TSNEAlgorithm() { 210 Problem = new RegressionProblem();211 Parameters.Add(new ValueParameter<IDistance<double[]>>(DistanceParameterName, "The distance function used to differentiate similar from non-similar points", new EuclideanDistance()));211 var distances = new ItemSet<IDistance<double[]>>(ApplicationManager.Manager.GetInstances<IDistance<double[]>>()); 212 Parameters.Add(new ConstrainedValueParameter<IDistance<double[]>>(DistanceFunctionParameterName, "The distance function used to differentiate similar from non-similar points", distances, distances.OfType<EuclideanDistance>().FirstOrDefault())); 212 213 Parameters.Add(new FixedValueParameter<DoubleValue>(PerplexityParameterName, "Perplexity-parameter of tSNE. Comparable to k in a k-nearest neighbour algorithm. Recommended value is floor(number of points /3) or lower", new DoubleValue(25))); 213 Parameters.Add(new FixedValueParameter< DoubleValue>(ThetaParameterName, "Value describing how much appoximated " +214 Parameters.Add(new FixedValueParameter<PercentValue>(ThetaParameterName, "Value describing how much appoximated " + 214 215 "gradients my differ from exact gradients. Set to 0 for exact calculation and in [0,1] otherwise. " + 215 216 "Appropriate values for theta are between 0.1 and 0.7 (default = 0.5). CAUTION: exact calculation of " + 216 217 "forces requires building a non-sparse N*N matrix where N is the number of data points. This may " + 217 218 "exceed memory limitations. The function is designed to run on large (N > 5000) data sets. It may give" + 218 " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new DoubleValue(0)));219 " poor performance on very small data sets(it is better to use a standard t - SNE implementation on such data).", new PercentValue(0))); 219 220 Parameters.Add(new FixedValueParameter<IntValue>(NewDimensionsParameterName, "Dimensionality of projected space (usually 2 for easy visual analysis)", new IntValue(2))); 220 221 Parameters.Add(new FixedValueParameter<IntValue>(MaxIterationsParameterName, "Maximum number of iterations for gradient descent.", new IntValue(1000))); … … 226 227 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "If the seed should be random.", new BoolValue(true))); 227 228 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The seed used if it should not be random.", new IntValue(0))); 228 Parameters.Add(new FixedValueParameter<StringValue>(ClassesParameterName, "name of the column specifying the class lables of each data point. If the label column can not be found training/test is used as labels.", new StringValue("none")));229 Parameters.Add(new OptionalConstrainedValueParameter<StringValue>(ClassesNameParameterName, "Name of the column specifying the class lables of each data point. If this is not set training/test is used as labels.")); 229 230 Parameters.Add(new FixedValueParameter<BoolValue>(NormalizationParameterName, "Whether the data should be zero centered and have variance of 1 for each variable, so different scalings are ignored.", new BoolValue(true))); 230 Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, " ", new IntValue(50)));231 Parameters.Add(new FixedValueParameter<IntValue>(UpdateIntervalParameterName, "The interval after which the results will be updated.", new IntValue(50))); 231 232 Parameters[UpdateIntervalParameterName].Hidden = true; 232 233 … … 236 237 StopLyingIterationParameter.Hidden = true; 237 238 EtaParameter.Hidden = false; 239 Problem = new RegressionProblem(); 238 240 } 239 241 #endregion … … 269 271 if (Normalization) data = NormalizeData(data); 270 272 271 state = TSNEStatic<double[]>.CreateState(data, Distance , random, NewDimensions, Perplexity, Theta,273 state = TSNEStatic<double[]>.CreateState(data, DistanceFunction, random, NewDimensions, Perplexity, Theta, 272 274 StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta); 273 275 … … 283 285 } 284 286 287 #region Events 288 protected override void OnProblemChanged() { 289 base.OnProblemChanged(); 290 if (Problem == null) return; 291 OnProblemDataChanged(this, null); 292 } 293 294 protected override void RegisterProblemEvents() { 295 base.RegisterProblemEvents(); 296 Problem.ProblemDataChanged += OnProblemDataChanged; 297 } 298 protected override void DeregisterProblemEvents() { 299 base.DeregisterProblemEvents(); 300 Problem.ProblemDataChanged -= OnProblemDataChanged; 301 } 302 303 private void OnProblemDataChanged(object sender, EventArgs args) { 304 if (Problem == null || Problem.ProblemData == null) return; 305 if (!Parameters.ContainsKey(ClassesNameParameterName)) return; 306 ClassesNameParameter.ValidValues.Clear(); 307 foreach (var input in Problem.ProblemData.InputVariables) ClassesNameParameter.ValidValues.Add(input); 308 } 309 310 #endregion 311 312 #region Helpers 285 313 private void SetUpResults(IReadOnlyCollection<double[]> data) { 286 314 if (Results == null) return; … … 291 319 292 320 //color datapoints acording to classes variable (be it double or string) 293 if (problemData.Dataset.VariableNames.Contains(Classes )) {294 if ((problemData.Dataset as Dataset).VariableHasType<string>(Classes )) {295 var classes = problemData.Dataset.GetStringValues(Classes ).ToArray();321 if (problemData.Dataset.VariableNames.Contains(ClassesName)) { 322 if ((problemData.Dataset as Dataset).VariableHasType<string>(ClassesName)) { 323 var classes = problemData.Dataset.GetStringValues(ClassesName).ToArray(); 296 324 for (var i = 0; i < classes.Length; i++) { 297 325 if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List<int>()); 298 326 dataRowNames[classes[i]].Add(i); 299 327 } 300 } else if ((problemData.Dataset as Dataset).VariableHasType<double>(Classes )) {301 var classValues = problemData.Dataset.GetDoubleValues(Classes ).ToArray();328 } else if ((problemData.Dataset as Dataset).VariableHasType<double>(ClassesName)) { 329 var classValues = problemData.Dataset.GetDoubleValues(ClassesName).ToArray(); 302 330 var max = classValues.Max() + 0.1; 303 331 var min = classValues.Min() - 0.1; … … 377 405 for (var i = 0; i < data.GetLength(0); i++) { 378 406 for (var j = 0; j < data.GetLength(1); j++) { 379 res[i, j] = (data[i, j] - (max[j] + min[j]) / 2) / (max[j] - min[j]); 407 var d = max[j] - min[j]; 408 var s = data[i, j] - (max[j] + min[j]) / 2; //shift data 409 if (d.IsAlmost(0)) res[i, j] = data[i, j]; //no scaling possible 410 else res[i, j] = s / d; //scale data 380 411 } 381 412 } … … 395 426 for (var i = 0; i < data.Count; i++) { 396 427 nData[i] = new double[n]; 397 for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / max[j];428 for (var j = 0; j < n; j++) nData[i][j] = max[j].IsAlmost(0) ? data[i][j] - mean[j] : (data[i][j] - mean[j]) / max[j]; 398 429 } 399 430 return nData; … … 416 447 return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")"; 417 448 } 449 #endregion 418 450 } 419 451 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEStatic.cs
r14862 r15249 56 56 using System; 57 57 using System.Collections.Generic; 58 using System.Linq;59 58 using HeuristicLab.Collections; 60 59 using HeuristicLab.Common; 61 60 using HeuristicLab.Core; 62 using HeuristicLab.Optimization;63 61 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 64 62 using HeuristicLab.Random; … … 70 68 [StorableClass] 71 69 public sealed class TSNEState : DeepCloneable { 70 #region Storables 72 71 // initialized once 73 72 [Storable] … … 122 121 [Storable] 123 122 public double[,] dY; 124 123 #endregion 124 125 #region Constructors & Cloning 125 126 private TSNEState(TSNEState original, Cloner cloner) : base(original, cloner) { 126 this.distance = cloner.Clone(original.distance);127 this.random = cloner.Clone(original.random);128 this.perplexity = original.perplexity;129 this.exact = original.exact;130 this.noDatapoints = original.noDatapoints;131 this.finalMomentum = original.finalMomentum;132 this.momSwitchIter = original.momSwitchIter;133 this.stopLyingIter = original.stopLyingIter;134 th is.theta = original.theta;135 this.eta = original.eta;136 this.newDimensions = original.newDimensions;137 if (original.valP != null) {138 this.valP = new double[original.valP.Length];139 Array.Copy(original.valP, this.valP, this.valP.Length);140 } 141 if (original.rowP != null) {142 this.rowP = new int[original.rowP.Length];143 Array.Copy(original.rowP, this.rowP, this.rowP.Length);144 } 145 if (original.colP != null) {146 this.colP = new int[original.colP.Length];147 Array.Copy(original.colP, this.colP, this.colP.Length);148 } 149 if (original.p != null) {150 this.p = new double[original.p.GetLength(0), original.p.GetLength(1)];151 Array.Copy(original.p, this.p, this.p.Length);152 } 153 this.newData = new double[original.newData.GetLength(0), original.newData.GetLength(1)];154 Array.Copy(original.newData, this.newData, this.newData.Length);155 this.iter = original.iter;156 this.currentMomentum = original.currentMomentum;157 this.gains = new double[original.gains.GetLength(0), original.gains.GetLength(1)];158 Array.Copy(original.gains, this.gains, this.gains.Length);159 this.uY = new double[original.uY.GetLength(0), original.uY.GetLength(1)];160 Array.Copy(original.uY, this.uY, this.uY.Length);161 this.dY = new double[original.dY.GetLength(0), original.dY.GetLength(1)];162 Array.Copy(original.dY, this.dY, this.dY.Length);127 distance = cloner.Clone(original.distance); 128 random = cloner.Clone(original.random); 129 perplexity = original.perplexity; 130 exact = original.exact; 131 noDatapoints = original.noDatapoints; 132 finalMomentum = original.finalMomentum; 133 momSwitchIter = original.momSwitchIter; 134 stopLyingIter = original.stopLyingIter; 135 theta = original.theta; 136 eta = original.eta; 137 newDimensions = original.newDimensions; 138 if (original.valP != null) { 139 valP = new double[original.valP.Length]; 140 Array.Copy(original.valP, valP, valP.Length); 141 } 142 if (original.rowP != null) { 143 rowP = new int[original.rowP.Length]; 144 Array.Copy(original.rowP, rowP, rowP.Length); 145 } 146 if (original.colP != null) { 147 colP = new int[original.colP.Length]; 148 Array.Copy(original.colP, colP, colP.Length); 149 } 150 if (original.p != null) { 151 p = new double[original.p.GetLength(0), original.p.GetLength(1)]; 152 Array.Copy(original.p, p, p.Length); 153 } 154 newData = new double[original.newData.GetLength(0), original.newData.GetLength(1)]; 155 Array.Copy(original.newData, newData, newData.Length); 156 iter = original.iter; 157 currentMomentum = original.currentMomentum; 158 gains = new double[original.gains.GetLength(0), original.gains.GetLength(1)]; 159 Array.Copy(original.gains, gains, gains.Length); 160 uY = new double[original.uY.GetLength(0), original.uY.GetLength(1)]; 161 Array.Copy(original.uY, uY, uY.Length); 162 dY = new double[original.dY.GetLength(0), original.dY.GetLength(1)]; 163 Array.Copy(original.dY, dY, dY.Length); 163 164 } 164 165 … … 177 178 this.stopLyingIter = stopLyingIter; 178 179 this.momSwitchIter = momSwitchIter; 179 this.currentMomentum = momentum;180 currentMomentum = momentum; 180 181 this.finalMomentum = finalMomentum; 181 182 this.eta = eta; 182 183 183 184 184 // initialize 185 185 noDatapoints = data.Length; 186 if (noDatapoints - 1 < 3 * perplexity)186 if (noDatapoints - 1 < 3 * perplexity) 187 187 throw new ArgumentException("Perplexity too large for the number of data points!"); 188 188 … … 192 192 uY = new double[noDatapoints, newDimensions]; 193 193 gains = new double[noDatapoints, newDimensions]; 194 for (var i = 0; i < noDatapoints; i++)195 for (var j = 0; j < newDimensions; j++)194 for (var i = 0; i < noDatapoints; i++) 195 for (var j = 0; j < newDimensions; j++) 196 196 gains[i, j] = 1.0; 197 197 … … 206 206 207 207 // Lie about the P-values (factor is 4 in the MATLAB implementation) 208 if (exact) for(var i = 0; i < noDatapoints; i++) for(var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0;209 else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0;208 if (exact) for (var i = 0; i < noDatapoints; i++) for (var j = 0; j < noDatapoints; j++) p[i, j] *= 12.0; 209 else for (var i = 0; i < rowP[noDatapoints]; i++) valP[i] *= 12.0; 210 210 211 211 // Initialize solution (randomly) 212 212 var rand = new NormalDistributedRandom(random, 0, 1); 213 for (var i = 0; i < noDatapoints; i++)214 for (var j = 0; j < newDimensions; j++)213 for (var i = 0; i < noDatapoints; i++) 214 for (var j = 0; j < newDimensions; j++) 215 215 newData[i, j] = rand.NextDouble() * .0001; 216 216 } 217 #endregion 217 218 218 219 public double EvaluateError() { … … 222 223 } 223 224 225 #region Helpers 224 226 private static void CalculateApproximateSimilarities(T[] data, IDistance<T> distance, double perplexity, out int[] rowP, out int[] colP, out double[] valP) { 225 227 // Compute asymmetric pairwise input similarities … … 233 235 valP = sValP; 234 236 var sumP = .0; 235 for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i];236 for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP;237 for (var i = 0; i < rowP[data.Length]; i++) sumP += valP[i]; 238 for (var i = 0; i < rowP[data.Length]; i++) valP[i] /= sumP; 237 239 } 238 240 … … 242 244 ComputeGaussianPerplexity(data, distance, p, perplexity); 243 245 // Symmetrize input similarities 244 for (var n = 0; n < data.Length; n++) {245 for (var m = n + 1; m < data.Length; m++) {246 for (var n = 0; n < data.Length; n++) { 247 for (var m = n + 1; m < data.Length; m++) { 246 248 p[n, m] += p[m, n]; 247 249 p[m, n] = p[n, m]; … … 249 251 } 250 252 var sumP = .0; 251 for (var i = 0; i < data.Length; i++) for(var j = 0; j < data.Length; j++) sumP += p[i, j];252 for (var i = 0; i < data.Length; i++) for(var j = 0; j < data.Length; j++) p[i, j] /= sumP;253 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j]; 254 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP; 253 255 return p; 254 256 } 255 257 256 258 private static void ComputeGaussianPerplexity(IReadOnlyList<T> x, IDistance<T> distance, out int[] rowP, out int[] colP, out double[] valP, double perplexity, int k) { 257 if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!");258 259 intn = x.Count;259 if (perplexity > k) throw new ArgumentException("Perplexity should be lower than k!"); 260 261 var n = x.Count; 260 262 // Allocate the memory we need 261 263 rowP = new int[n + 1]; … … 264 266 var curP = new double[n - 1]; 265 267 rowP[0] = 0; 266 for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k;268 for (var i = 0; i < n; i++) rowP[i + 1] = rowP[i] + k; 267 269 268 270 var objX = new List<IndexedItem<T>>(); 269 for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i]));271 for (var i = 0; i < n; i++) objX.Add(new IndexedItem<T>(i, x[i])); 270 272 271 273 // Build ball tree on data set … … 273 275 274 276 // Loop over all points to find nearest neighbors 275 for (var i = 0; i < n; i++) {277 for (var i = 0; i < n; i++) { 276 278 IList<IndexedItem<T>> indices; 277 279 IList<double> distances; … … 285 287 var minBeta = double.MinValue; 286 288 var maxBeta = double.MaxValue; 287 const double tol = 1e-5; 289 const double tol = 1e-5; 288 290 289 291 // Iterate until we found a good perplexity 290 292 var iter = 0; double sumP = 0; 291 while (!found && iter < 200) {293 while (!found && iter < 200) { 292 294 293 295 // Compute Gaussian kernel row 294 for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]);296 for (var m = 0; m < k; m++) curP[m] = Math.Exp(-beta * distances[m + 1]); 295 297 296 298 // Compute entropy of current row 297 299 sumP = double.Epsilon; 298 for (var m = 0; m < k; m++) sumP += curP[m];300 for (var m = 0; m < k; m++) sumP += curP[m]; 299 301 var h = .0; 300 for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]);302 for (var m = 0; m < k; m++) h += beta * (distances[m + 1] * curP[m]); 301 303 h = h / sumP + Math.Log(sumP); 302 304 303 305 // Evaluate whether the entropy is within the tolerance level 304 306 var hdiff = h - Math.Log(perplexity); 305 if (hdiff < tol && -hdiff < tol) {307 if (hdiff < tol && -hdiff < tol) { 306 308 found = true; 307 309 } else { 308 if (hdiff > 0) {310 if (hdiff > 0) { 309 311 minBeta = beta; 310 if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))312 if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue)) 311 313 beta *= 2.0; 312 314 else … … 314 316 } else { 315 317 maxBeta = beta; 316 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))318 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) 317 319 beta /= 2.0; 318 320 else … … 326 328 327 329 // Row-normalize current row of P and store in matrix 328 for (var m = 0; m < k; m++) curP[m] /= sumP;329 for (var m = 0; m < k; m++) {330 for (var m = 0; m < k; m++) curP[m] /= sumP; 331 for (var m = 0; m < k; m++) { 330 332 colP[rowP[i] + m] = indices[m + 1].Index; 331 333 valP[rowP[i] + m] = curP[m]; … … 337 339 var dd = ComputeDistances(x, distance); 338 340 339 intn = x.Length;341 var n = x.Length; 340 342 // Compute the Gaussian kernel row by row 341 for (var i = 0; i < n; i++) {343 for (var i = 0; i < n; i++) { 342 344 // Initialize some variables 343 345 var found = false; … … 350 352 // Iterate until we found a good perplexity 351 353 var iter = 0; 352 while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten354 while (!found && iter < 200) { // 200 iterations as in tSNE implementation by van der Maarten 353 355 354 356 // Compute Gaussian kernel row 355 for (var m = 0; m < n; m++) p[i, m] = Math.Exp(-beta * dd[i][m]);357 for (var m = 0; m < n; m++) p[i, m] = Math.Exp(-beta * dd[i][m]); 356 358 p[i, i] = double.Epsilon; 357 359 358 360 // Compute entropy of current row 359 361 sumP = double.Epsilon; 360 for (var m = 0; m < n; m++) sumP += p[i, m];362 for (var m = 0; m < n; m++) sumP += p[i, m]; 361 363 var h = 0.0; 362 for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]);364 for (var m = 0; m < n; m++) h += beta * (dd[i][m] * p[i, m]); 363 365 h = h / sumP + Math.Log(sumP); 364 366 365 367 // Evaluate whether the entropy is within the tolerance level 366 368 var hdiff = h - Math.Log(perplexity); 367 if (hdiff < tol && -hdiff < tol) {369 if (hdiff < tol && -hdiff < tol) { 368 370 found = true; 369 371 } else { 370 if (hdiff > 0) {372 if (hdiff > 0) { 371 373 minBeta = beta; 372 if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue))374 if (maxBeta.IsAlmost(double.MaxValue) || maxBeta.IsAlmost(double.MinValue)) 373 375 beta *= 2.0; 374 376 else … … 376 378 } else { 377 379 maxBeta = beta; 378 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue))380 if (minBeta.IsAlmost(double.MinValue) || minBeta.IsAlmost(double.MaxValue)) 379 381 beta /= 2.0; 380 382 else … … 388 390 389 391 // Row normalize P 390 for (var m = 0; m < n; m++) p[i, m] /= sumP;392 for (var m = 0; m < n; m++) p[i, m] /= sumP; 391 393 } 392 394 } … … 394 396 private static double[][] ComputeDistances(T[] x, IDistance<T> distance) { 395 397 var res = new double[x.Length][]; 396 for (intr = 0; r < x.Length; r++) {398 for (var r = 0; r < x.Length; r++) { 397 399 var rowV = new double[x.Length]; 398 400 // all distances must be symmetric 399 for (intc = 0; c < r; c++) {401 for (var c = 0; c < r; c++) { 400 402 rowV[c] = res[c][r]; 401 403 } 402 404 rowV[r] = 0.0; // distance to self is zero for all distances 403 for (intc = r + 1; c < x.Length; c++) {405 for (var c = r + 1; c < x.Length; c++) { 404 406 rowV[c] = distance.Get(x[r], x[c]); 405 407 } … … 418 420 // Compute Q-matrix and normalization sum 419 421 var sumQ = double.Epsilon; 420 for (var n1 = 0; n1 < n; n1++) {421 for (var m = 0; m < n; m++) {422 if (n1 != m) {422 for (var n1 = 0; n1 < n; n1++) { 423 for (var m = 0; m < n; m++) { 424 if (n1 != m) { 423 425 q[n1, m] = 1 / (1 + dd[n1, m]); 424 426 sumQ += q[n1, m]; … … 426 428 } 427 429 } 428 for (var i = 0; i < n; i++) for(var j = 0; j < n; j++) q[i, j] /= sumQ;430 for (var i = 0; i < n; i++) for (var j = 0; j < n; j++) q[i, j] /= sumQ; 429 431 430 432 // Sum t-SNE error 431 433 var c = .0; 432 for (var i = 0; i < n; i++)433 for (var j = 0; j < n; j++) {434 for (var i = 0; i < n; i++) 435 for (var j = 0; j < n; j++) { 434 436 c += p[i, j] * Math.Log((p[i, j] + float.Epsilon) / (q[i, j] + float.Epsilon)); 435 437 } … … 437 439 } 438 440 439 // The mapping of the approximate tSNE looks good but the error curve never changes.440 441 private static double EvaluateErrorApproximate(IReadOnlyList<int> rowP, IReadOnlyList<int> colP, IReadOnlyList<double> valP, double[,] y, double theta) { 441 442 // Get estimate of normalization term … … 444 445 var tree = new SpacePartitioningTree(y); 445 446 var buff = new double[d]; 446 doublesumQ = 0.0;447 for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, ref sumQ);447 var sumQ = 0.0; 448 for (var i = 0; i < n; i++) tree.ComputeNonEdgeForces(i, theta, buff, ref sumQ); 448 449 449 450 // Loop over all edges to compute t-SNE error 450 451 var c = .0; 451 for (var k = 0; k < n; k++) {452 for (var i = rowP[k]; i < rowP[k + 1]; i++) {452 for (var k = 0; k < n; k++) { 453 for (var i = rowP[k]; i < rowP[k + 1]; i++) { 453 454 var q = .0; 454 for (var j = 0; j < d; j++) buff[j] = y[k, j];455 for (var j = 0; j < d; j++) buff[j] -= y[colP[i], j];456 for (var j = 0; j < d; j++) q += buff[j] * buff[j];455 for (var j = 0; j < d; j++) buff[j] = y[k, j]; 456 for (var j = 0; j < d; j++) buff[j] -= y[colP[i], j]; 457 for (var j = 0; j < d; j++) q += buff[j] * buff[j]; 457 458 q = (1.0 / (1.0 + q)) / sumQ; 458 459 c += valP[i] * Math.Log((valP[i] + float.Epsilon) / (q + float.Epsilon)); … … 466 467 var n = rowP.Count - 1; 467 468 var rowCounts = new int[n]; 468 for (var j = 0; j < n; j++) {469 for (var i = rowP[j]; i < rowP[j + 1]; i++) {469 for (var j = 0; j < n; j++) { 470 for (var i = rowP[j]; i < rowP[j + 1]; i++) { 470 471 471 472 // Check whether element (col_P[i], n) is present 472 473 var present = false; 473 for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {474 if (colP[m] == j) present = true;474 for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) { 475 if (colP[m] == j) present = true; 475 476 } 476 if (present) rowCounts[j]++;477 if (present) rowCounts[j]++; 477 478 else { 478 479 rowCounts[j]++; … … 482 483 } 483 484 var noElem = 0; 484 for (var i = 0; i < n; i++) noElem += rowCounts[i];485 for (var i = 0; i < n; i++) noElem += rowCounts[i]; 485 486 486 487 // Allocate memory for symmetrized matrix … … 491 492 // Construct new row indices for symmetric matrix 492 493 symRowP[0] = 0; 493 for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i];494 for (var i = 0; i < n; i++) symRowP[i + 1] = symRowP[i] + rowCounts[i]; 494 495 495 496 // Fill the result matrix 496 497 var offset = new int[n]; 497 for (var j = 0; j < n; j++) {498 for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i])498 for (var j = 0; j < n; j++) { 499 for (var i = rowP[j]; i < rowP[j + 1]; i++) { // considering element(n, colP[i]) 499 500 500 501 // Check whether element (col_P[i], n) is present 501 502 var present = false; 502 for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) {503 if (colP[m] != j) continue;503 for (var m = rowP[colP[i]]; m < rowP[colP[i] + 1]; m++) { 504 if (colP[m] != j) continue; 504 505 present = true; 505 if (j > colP[i]) continue; // make sure we do not add elements twice506 if (j > colP[i]) continue; // make sure we do not add elements twice 506 507 symColP[symRowP[j] + offset[j]] = colP[i]; 507 508 symColP[symRowP[colP[i]] + offset[colP[i]]] = j; … … 511 512 512 513 // If (colP[i], n) is not present, there is no addition involved 513 if (!present) {514 if (!present) { 514 515 symColP[symRowP[j] + offset[j]] = colP[i]; 515 516 symColP[symRowP[colP[i]] + offset[colP[i]]] = j; … … 519 520 520 521 // Update offsets 521 if (present && (j > colP[i])) continue;522 if (present && (j > colP[i])) continue; 522 523 offset[j]++; 523 if(colP[i] != j) offset[colP[i]]++; 524 } 525 } 526 527 for(var i = 0; i < noElem; i++) symValP[i] /= 2.0; 528 } 524 if (colP[i] != j) offset[colP[i]]++; 525 } 526 } 527 528 for (var i = 0; i < noElem; i++) symValP[i] /= 2.0; 529 } 530 #endregion 529 531 } 530 532 531 533 /// <summary> 532 /// S impleinterface to tSNE534 /// Static interface to tSNE 533 535 /// </summary> 534 536 /// <param name="data"></param> … … 548 550 int newDimensions = 2, double perplexity = 25, int iterations = 1000, 549 551 double theta = 0, 550 int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5,551 double finalMomentum = .8, double eta = 200.0552 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 553 double finalMomentum = .8, double eta = 10.0 552 554 ) { 553 555 var state = CreateState(data, distance, random, newDimensions, perplexity, 554 556 theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta); 555 557 556 for (inti = 0; i < iterations - 1; i++) {558 for (var i = 0; i < iterations - 1; i++) { 557 559 Iterate(state); 558 560 } … … 562 564 public static TSNEState CreateState(T[] data, IDistance<T> distance, IRandom random, 563 565 int newDimensions = 2, double perplexity = 25, double theta = 0, 564 int stopLyingIter = 250, int momSwitchIter = 250, double momentum = .5,565 double finalMomentum = .8, double eta = 200.0566 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 567 double finalMomentum = .8, double eta = 10.0 566 568 ) { 567 569 return new TSNEState(data, distance, random, newDimensions, perplexity, theta, stopLyingIter, momSwitchIter, momentum, finalMomentum, eta); 568 570 } 569 571 570 571 572 public static double[,] Iterate(TSNEState state) { 572 if (state.exact)573 if (state.exact) 573 574 ComputeExactGradient(state.p, state.newData, state.noDatapoints, state.newDimensions, state.dY); 574 575 else … … 576 577 577 578 // Update gains 578 for (var i = 0; i < state.noDatapoints; i++) {579 for (var j = 0; j < state.newDimensions; j++) {579 for (var i = 0; i < state.noDatapoints; i++) { 580 for (var j = 0; j < state.newDimensions; j++) { 580 581 state.gains[i, j] = Math.Sign(state.dY[i, j]) != Math.Sign(state.uY[i, j]) 581 582 ? state.gains[i, j] + .2 // +0.2 nd *0.8 are used in two separate implementations of tSNE -> seems to be correct 582 583 : state.gains[i, j] * .8; 583 584 584 if (state.gains[i, j] < .01) state.gains[i, j] = .01;585 if (state.gains[i, j] < .01) state.gains[i, j] = .01; 585 586 } 586 587 } … … 588 589 589 590 // Perform gradient update (with momentum and gains) 590 for (var i = 0; i < state.noDatapoints; i++)591 for (var j = 0; j < state.newDimensions; j++)591 for (var i = 0; i < state.noDatapoints; i++) 592 for (var j = 0; j < state.newDimensions; j++) 592 593 state.uY[i, j] = state.currentMomentum * state.uY[i, j] - state.eta * state.gains[i, j] * state.dY[i, j]; 593 594 594 for (var i = 0; i < state.noDatapoints; i++)595 for (var j = 0; j < state.newDimensions; j++)595 for (var i = 0; i < state.noDatapoints; i++) 596 for (var j = 0; j < state.newDimensions; j++) 596 597 state.newData[i, j] = state.newData[i, j] + state.uY[i, j]; 597 598 … … 600 601 601 602 // Stop lying about the P-values after a while, and switch momentum 602 if (state.iter == state.stopLyingIter) {603 if (state.exact)604 for (var i = 0; i < state.noDatapoints; i++)605 for (var j = 0; j < state.noDatapoints; j++)603 if (state.iter == state.stopLyingIter) { 604 if (state.exact) 605 for (var i = 0; i < state.noDatapoints; i++) 606 for (var j = 0; j < state.noDatapoints; j++) 606 607 state.p[i, j] /= 12.0; 607 608 else 608 for (var i = 0; i < state.rowP[state.noDatapoints]; i++)609 for (var i = 0; i < state.rowP[state.noDatapoints]; i++) 609 610 state.valP[i] /= 12.0; 610 611 } 611 612 612 if (state.iter == state.momSwitchIter)613 if (state.iter == state.momSwitchIter) 613 614 state.currentMomentum = state.finalMomentum; 614 615 … … 617 618 } 618 619 619 620 620 #region Helpers 621 621 private static void ComputeApproximateGradient(int[] rowP, int[] colP, double[] valP, double[,] y, int n, int d, double[,] dC, double theta) { 622 622 var tree = new SpacePartitioningTree(y); 623 doublesumQ = 0.0;623 var sumQ = 0.0; 624 624 var posF = new double[n, d]; 625 625 var negF = new double[n, d]; 626 tree.ComputeEdgeForces(rowP, colP, valP, n, posF);626 SpacePartitioningTree.ComputeEdgeForces(rowP, colP, valP, n, posF, y, d); 627 627 var row = new double[d]; 628 for (var n1 = 0; n1 < n; n1++) {629 Array.Clear(row, 0,row.Length);628 for (var n1 = 0; n1 < n; n1++) { 629 Array.Clear(row, 0, row.Length); 630 630 tree.ComputeNonEdgeForces(n1, theta, row, ref sumQ); 631 Buffer.BlockCopy(row, 0, negF, (sizeof(double) * n1 * d), d *sizeof(double));631 Buffer.BlockCopy(row, 0, negF, (sizeof(double) * n1 * d), d * sizeof(double)); 632 632 } 633 633 634 634 // Compute final t-SNE gradient 635 635 for (var i = 0; i < n; i++) 636 for (var j = 0; j < d; j++) {636 for (var j = 0; j < d; j++) { 637 637 dC[i, j] = posF[i, j] - negF[i, j] / sumQ; 638 638 } … … 640 640 641 641 private static void ComputeExactGradient(double[,] p, double[,] y, int n, int d, double[,] dC) { 642 643 642 // Make sure the current gradient contains zeros 644 for (var i = 0; i < n; i++) for(var j = 0; j < d; j++) dC[i, j] = 0.0;643 for (var i = 0; i < n; i++) for (var j = 0; j < d; j++) dC[i, j] = 0.0; 645 644 646 645 // Compute the squared Euclidean distance matrix … … 651 650 var q = new double[n, n]; 652 651 var sumQ = .0; 653 for (var n1 = 0; n1 < n; n1++) {654 for (var m = 0; m < n; m++) {655 if (n1 == m) continue;652 for (var n1 = 0; n1 < n; n1++) { 653 for (var m = 0; m < n; m++) { 654 if (n1 == m) continue; 656 655 q[n1, m] = 1 / (1 + dd[n1, m]); 657 656 sumQ += q[n1, m]; … … 660 659 661 660 // Perform the computation of the gradient 662 for (var n1 = 0; n1 < n; n1++) {663 for (var m = 0; m < n; m++) {664 if (n1 == m) continue;661 for (var n1 = 0; n1 < n; n1++) { 662 for (var m = 0; m < n; m++) { 663 if (n1 == m) continue; 665 664 var mult = (p[n1, m] - q[n1, m] / sumQ) * q[n1, m]; 666 for (var d1 = 0; d1 < d; d1++) {665 for (var d1 = 0; d1 < d; d1++) { 667 666 dC[n1, d1] += (y[n1, d1] - y[m, d1]) * mult; 668 667 } … … 673 672 private static void ComputeSquaredEuclideanDistance(double[,] x, int n, int d, double[,] dd) { 674 673 var dataSums = new double[n]; 675 for (var i = 0; i < n; i++) {676 for (var j = 0; j < d; j++) {674 for (var i = 0; i < n; i++) { 675 for (var j = 0; j < d; j++) { 677 676 dataSums[i] += x[i, j] * x[i, j]; 678 677 } 679 678 } 680 for (var i = 0; i < n; i++) {681 for (var m = 0; m < n; m++) {679 for (var i = 0; i < n; i++) { 680 for (var m = 0; m < n; m++) { 682 681 dd[i, m] = dataSums[i] + dataSums[m]; 683 682 } 684 683 } 685 for (var i = 0; i < n; i++) {684 for (var i = 0; i < n; i++) { 686 685 dd[i, i] = 0.0; 687 for (var m = i + 1; m < n; m++) {686 for (var m = i + 1; m < n; m++) { 688 687 dd[i, m] = 0.0; 689 for (var j = 0; j < d; j++) {688 for (var j = 0; j < d; j++) { 690 689 dd[i, m] += (x[i, j] - x[m, j]) * (x[i, j] - x[m, j]); 691 690 } … … 700 699 var d = x.GetLength(1); 701 700 var mean = new double[d]; 702 for (var i = 0; i < n; i++) {703 for (var j = 0; j < d; j++) {701 for (var i = 0; i < n; i++) { 702 for (var j = 0; j < d; j++) { 704 703 mean[j] += x[i, j]; 705 704 } 706 705 } 707 for (var i = 0; i < d; i++) {706 for (var i = 0; i < d; i++) { 708 707 mean[i] /= n; 709 708 } 710 709 // Subtract data mean 711 for (var i = 0; i < n; i++) {712 for (var j = 0; j < d; j++) {710 for (var i = 0; i < n; i++) { 711 for (var j = 0; j < d; j++) { 713 712 x[i, j] -= mean[j]; 714 713 } 715 714 } 716 715 } 716 #endregion 717 717 } 718 718 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VantagePointTree.cs
r14862 r15249 68 68 /// </summary> 69 69 /// <typeparam name="T"></typeparam> 70 public class VantagePointTree<T> : IVantagePointTree<T> { 71 #region properties 70 public class VantagePointTree<T> { 72 71 private readonly List<T> items; 73 72 private readonly Node root; 74 73 private readonly IDistance<T> distance; 75 #endregion76 74 77 75 public VantagePointTree(IDistance<T> distance, IEnumerable<T> items) : base() { … … 82 80 } 83 81 82 /// <summary> 83 /// provides the k-nearest neighbours to a certain target element 84 /// </summary> 85 /// <param name="target">The target element</param> 86 /// <param name="k">How many neighbours</param> 87 /// <param name="results">The nearest neighbouring elements</param> 88 /// <param name="distances">The distances form the target corresponding to the neighbouring elements</param> 84 89 public void Search(T target, int k, out IList<T> results, out IList<double> distances) { 85 90 var heap = new PriorityQueue<double, IndexedItem<double>>(double.MaxValue, double.MinValue, k); 86 doubletau = double.MaxValue;91 var tau = double.MaxValue; 87 92 Search(root, target, k, heap, ref tau); 88 93 var res = new List<T>(); 89 94 var dist = new List<double>(); 90 95 while (heap.Size > 0) { 91 res.Add(items[heap.PeekMinValue().Index]); 96 res.Add(items[heap.PeekMinValue().Index]);// actually max distance 92 97 dist.Add(heap.PeekMinValue().Value); 93 98 heap.RemoveMin(); 94 99 } 95 res.Reverse(); 100 res.Reverse(); 96 101 dist.Reverse(); 97 102 results = res; … … 104 109 if (dist < tau) { 105 110 if (heap.Size == k) heap.RemoveMin(); // remove furthest node from result list (if we already have k results) 106 heap.Insert(-dist, new IndexedItem<double>(node.index, dist)); 111 heap.Insert(-dist, new IndexedItem<double>(node.index, dist)); 107 112 if (heap.Size == k) tau = heap.PeekMinValue().Value; 108 113 }
Note: See TracChangeset
for help on using the changeset viewer.