Free cookie consent management tool by TermsFeed Policy Generator

# Changeset 14767

Ignore:
Timestamp:
03/19/17 18:47:00 (7 years ago)
Message:

#2700: made some changes while reviewing the code

Location:
branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
6 deleted
11 edited

Unmodified
Removed
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

 r14682
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/TSNEInterfaces/IDistance.cs

 r14512 public interface IDistance : IItem { /// /// Calculates a distance measure between two arbitrary Vectors. The distance value d is /// 1.) positive  d(x,y)>=0 /// Calculates a distance measure between two objects. /// 1.) non-negative d(x,y) >= 0 /// 2.) symmetric d(x,y) = d(y,x) /// 3.) zero-reflexive d(x,x) =0; /// 3.) zero-reflexive d(x,x) = 0; /// /// an array representing point x /// >an array representing point y /// d(x,y) double Get(T a, T b); /// /// Calculates the correct kernel measure if it is smaller than threshold, but any value greater then threshold if the correct distance is greater. /// This is for performace only /// /// /// /// /// double Get(T a, T b, double threshold); double Get(T x, T y); ///
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/DataPoint.cs

 r14518 #endregion } }
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DataPointDistance.cs

 r14512 return dist.Get(a.X, b.X); } public override double Get(IDataPoint a, IDataPoint b, double threshold) { return dist.Get(a.X, b.X, threshold); } } }
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs

 r14512 public abstract double Get(T a, T b); public abstract double Get(T a, T b, double threshold); public IComparer GetDistanceComparer(T item) {
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/EuclidianDistance.cs

 r14512 return Math.Sqrt(point1.Zip(point2, (a1, b1) => (a1 - b1) * (a1 - b1)).Sum()); } public static double GetDistance(IEnumerable a, IEnumerable b, double threshold) { double sum = 0; var it1 = a.GetEnumerator(); var it2 = b.GetEnumerator(); while (sum > threshold * threshold && it1.MoveNext() && it2.MoveNext()) { var d = it1.Current - it2.Current; sum += d * d; } it1.Dispose(); it2.Dispose(); return sum; } #endregion return GetDistance(a.ToArray(), b.ToArray()); } public override double Get(IEnumerable a, IEnumerable b, double threshold) { return GetDistance(a, b, threshold); } } }
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/InnerProductDistance.cs

 r14512 /// /// this is not a proper distance /// The angluar distance as defined as a normalized distance measure dependent on the angle between two vectors. /// It is designed for vectors with all positive coordinates. /// [StorableClass] [Item("InnerProductDistance", "The Angluar Distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")] public class InnerProductDistance : DistanceBase> { [Item("InnerProductDistance", "The angluar distance as defined as a normalized distance measure dependent on the angle between two vectors.\nIt is designed for vectors with all positive coordinates")] public class InnerProductDistance : DistanceBase> { #region HLConstructors #region statics public static double GetDistance(IReadOnlyList point1, IReadOnlyList point2) { if (point1.Count != point2.Count) throw new ArgumentException("Inner Product distance not defined on vectors of different length"); return point1.Zip(point2, (x, y) => x * y).Sum(); } public static double GetDistance(IEnumerable a, IEnumerable b, double threshold) { return GetDistance(a.ToArray(), b.ToArray());     //no shortcut evaluation for Inner Product (summands may be negative => no way of telling if threshold is reached or not) public static double GetDistance(IEnumerable point1, IEnumerable point2) { var xs = point1.GetEnumerator(); var ys = point2.GetEnumerator(); var sum = 0.0; while(xs.MoveNext() & ys.MoveNext()) { if(xs.Current < 0 || ys.Current < 0) throw new ArgumentException("Inner product distance is only defined for vectors with non-negative elements"); sum += xs.Current * ys.Current; } if(xs.MoveNext() || ys.MoveNext()) throw new ArgumentException("Enumerables contain a different number of elements"); return sum; } #endregion public override double Get(IReadOnlyList a, IReadOnlyList b) { public override double Get(IEnumerable a, IEnumerable b) { return GetDistance(a, b); } public override double Get(IReadOnlyList a, IReadOnlyList b, double threshold) { return GetDistance(a, b, threshold); } }
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/PriorityQueue.cs

 r14414 // implementation based on C++ version from Peter Sanders // http://www.mpi-inf.mpg.de/~sanders/programs/spq/ public sealed class PriorityQueue : IHeap where TK : IComparable { public sealed class PriorityQueue where TK : IComparable { private class KNElement { public TK Key { get; set; } } private int capacity; private readonly int capacity; private int size; // index of last used element private int finalLayerSize; // size of first layer with free space private int finalLayerDist; // distance to end of layer //private KNElement[] rawData; private KNElement[] data;    // alligned version of rawData private readonly KNElement[] data;    // aligned version of rawData public PriorityQueue(TK supremum, TK infimum, int cap) { } public int Size { public int Size { get { return size; } } public TK Supremum { public TK Supremum { get { return data[capacity + 1].Key; } } public KeyValuePair PeekMin() { if (size == 0) { if(size == 0) { throw new InvalidOperationException("Heap is empty"); } size = sz - 1; finalLayerDist++; if (finalLayerDist == finalLayerSize) { if(finalLayerDist == finalLayerSize) { finalLayerSize >>= 2; finalLayerDist = 0; } while (succ < sz) { while(succ < sz) { var minKey = data[succ].Key; var delta = 0; var otherKey = data[succ + 1].Key; if (otherKey.CompareTo(minKey) < 0) { if(otherKey.CompareTo(minKey) < 0) { minKey = otherKey; delta = 1; } otherKey = data[succ + 2].Key; if (otherKey.CompareTo(minKey) < 0) { if(otherKey.CompareTo(minKey) < 0) { minKey = otherKey; delta = 2; } otherKey = data[succ + 3].Key; if (otherKey.CompareTo(minKey) < 0) { if(otherKey.CompareTo(minKey) < 0) { minKey = otherKey; delta = 3; pred = pred - layerDist; // finally preds index while (data[pred].Key.CompareTo(bubble) > 0) {  // must terminate since inf at root while(data[pred].Key.CompareTo(bubble) > 0) {  // must terminate since inf at root data[hole].Key = data[pred].Key; data[hole].Value = data[pred].Value; finalLayerDist--; if (finalLayerDist == -1) { // layer full // start next layer if(finalLayerDist == -1) { // layer full // start next layer finalLayerSize <<= 2; finalLayerDist = finalLayerSize - 1; pred = pred - layerDist; // finally preds index var predKey = data[pred].Key; while (predKey.CompareTo(key) > 0) { while(predKey.CompareTo(key) > 0) { data[hole].Key = predKey; data[hole].Value = data[pred].Value; var sup = Supremum; var cap = capacity; for (var i = 1; i <= cap; i++) { for(var i = 1; i <= cap; i++) { data[i].Key = sup; }
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNE.cs

 r14742 namespace HeuristicLab.Algorithms.DataAnalysis { [StorableClass] public class TSNE : DeepCloneable, ITSNE where T : class, IDeepCloneable { public class TSNE : DeepCloneable where T : class, IDeepCloneable { private const string IterationResultName = "Iteration"; return new TSNE(distance, random).Run(data, newDimensions, perplexity, theta); } public static double[,] Run(TR[] data, int newDimensions, double perplexity, double theta, Func distance, IRandom random) where TR : class, IDeepCloneable { return new TSNE(new FuctionalDistance(distance), random).Run(data, newDimensions, perplexity, theta); } #region helpers if (results == null) return; var plot = results[ErrorPlotResultName].Value as DataTable; if (plot == null) throw new ArgumentException("Could not create/access Error-DataTable in Results-Collection. Was it removed by some effect?"); if (plot == null) throw new ArgumentException("Could not create/access error data table in results collection. Was it removed by some effect?"); var errors = plot.Rows["errors"].Values; var c = exact
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAnalysis.cs

 r14742 namespace HeuristicLab.Algorithms.DataAnalysis { /// /// Linear regression data analysis algorithm. /// t-distributed stochastic neighbourhood embedding (TSNE) projects the data in a low dimensional /// space to allow visual cluster identification. /// [Item("TSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low dimensional space to allow visual cluster identification")] [Item("TSNE", "t-distributed stochastic neighbourhood embedding projects the data in a low " + "dimensional space to allow visual cluster identification.")] [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)] [StorableClass] public sealed class TSNEAnalysis : BasicAlgorithm { public override bool SupportsPause { public override bool SupportsPause { get { return false; } } public override Type ProblemType { public override Type ProblemType { get { return typeof(IDataAnalysisProblem); } } public new IDataAnalysisProblem Problem { public new IDataAnalysisProblem Problem { get { return (IDataAnalysisProblem)base.Problem; } set { base.Problem = value; } #region Parameterproperties public IFixedValueParameter PerplexityParameter { public IFixedValueParameter PerplexityParameter { get { return Parameters[PerplexityParameterName] as IFixedValueParameter; } } public OptionalValueParameter ThetaParameter { public OptionalValueParameter ThetaParameter { get { return Parameters[ThetaParameterName] as OptionalValueParameter; } } public IFixedValueParameter NewDimensionsParameter { public IFixedValueParameter NewDimensionsParameter { get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter; } } public IValueParameter> DistanceParameter { public IValueParameter> DistanceParameter { get { return Parameters[DistanceParameterName] as IValueParameter>; } } public IFixedValueParameter MaxIterationsParameter { public IFixedValueParameter MaxIterationsParameter { get { return Parameters[MaxIterationsParameterName] as IFixedValueParameter; } } public IFixedValueParameter StopLyingIterationParameter { public IFixedValueParameter StopLyingIterationParameter { get { return Parameters[StopLyingIterationParameterName] as IFixedValueParameter; } } public IFixedValueParameter MomentumSwitchIterationParameter { public IFixedValueParameter MomentumSwitchIterationParameter { get { return Parameters[MomentumSwitchIterationParameterName] as IFixedValueParameter; } } public IFixedValueParameter InitialMomentumParameter { public IFixedValueParameter InitialMomentumParameter { get { return Parameters[InitialMomentumParameterName] as IFixedValueParameter; } } public IFixedValueParameter FinalMomentumParameter { public IFixedValueParameter FinalMomentumParameter { get { return Parameters[FinalMomentumParameterName] as IFixedValueParameter; } } public IFixedValueParameter EtaParameter { public IFixedValueParameter EtaParameter { get { return Parameters[EtaParameterName] as IFixedValueParameter; } } public IFixedValueParameter SetSeedRandomlyParameter { public IFixedValueParameter SetSeedRandomlyParameter { get { return Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter; } } public IFixedValueParameter SeedParameter { public IFixedValueParameter SeedParameter { get { return Parameters[SeedParameterName] as IFixedValueParameter; } } public IFixedValueParameter ClassesParameter { public IFixedValueParameter ClassesParameter { get { return Parameters[ClassesParameterName] as IFixedValueParameter; } } public IFixedValueParameter NormalizationParameter { public IFixedValueParameter NormalizationParameter { get { return Parameters[NormalizationParameterName] as IFixedValueParameter; } } #region  Properties public IDistance Distance { public IDistance Distance { get { return DistanceParameter.Value; } } public double Perplexity { public double Perplexity { get { return PerplexityParameter.Value.Value; } } public double Theta { public double Theta { get { return ThetaParameter.Value == null ? 0 : ThetaParameter.Value.Value; } } public int NewDimensions { public int NewDimensions { get { return NewDimensionsParameter.Value.Value; } } public int MaxIterations { public int MaxIterations { get { return MaxIterationsParameter.Value.Value; } } public int StopLyingIteration { public int StopLyingIteration { get { return StopLyingIterationParameter.Value.Value; } } public int MomentumSwitchIteration { public int MomentumSwitchIteration { get { return MomentumSwitchIterationParameter.Value.Value; } } public double InitialMomentum { public double InitialMomentum { get { return InitialMomentumParameter.Value.Value; } } public double FinalMomentum { public double FinalMomentum { get { return FinalMomentumParameter.Value.Value; } } public double Eta { get { public double Eta { get { return EtaParameter.Value == null ? 0 : EtaParameter.Value.Value; } } public bool SetSeedRandomly { public bool SetSeedRandomly { get { return SetSeedRandomlyParameter.Value.Value; } } public uint Seed { public uint Seed { get { return (uint)SeedParameter.Value.Value; } } public string Classes { public string Classes { get { return ClassesParameter.Value.Value; } } public bool Normalization { public bool Normalization { get { return NormalizationParameter.Value.Value; } } public override void Stop() { base.Stop(); if (tsne != null) tsne.Running = false; if(tsne != null) tsne.Running = false; } //color datapoints acording to Classes-Variable (be it double or string) if (problemData.Dataset.VariableNames.Contains(Classes)) { if ((problemData.Dataset as Dataset).VariableHasType(Classes)) { if(problemData.Dataset.VariableNames.Contains(Classes)) { if((problemData.Dataset as Dataset).VariableHasType(Classes)) { var classes = problemData.Dataset.GetStringValues(Classes).ToArray(); for (var i = 0; i < classes.Length; i++) { if (!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List()); for(var i = 0; i < classes.Length; i++) { if(!dataRowNames.ContainsKey(classes[i])) dataRowNames.Add(classes[i], new List()); dataRowNames[classes[i]].Add(i); } } else if ((problemData.Dataset as Dataset).VariableHasType(Classes)) { } else if((problemData.Dataset as Dataset).VariableHasType(Classes)) { var classValues = problemData.Dataset.GetDoubleValues(Classes).ToArray(); var max = classValues.Max() + 0.1; var min = classValues.Min() - 0.1; const int contours = 8; for (var i = 0; i < contours; i++) { for(var i = 0; i < contours; i++) { var contourname = GetContourName(i, min, max, contours); dataRowNames.Add(contourname, new List()); rows[contourname].VisualProperties.PointSize = i + 3; } for (var i = 0; i < classValues.Length; i++) { for(var i = 0; i < classValues.Length; i++) { dataRowNames[GetContourName(classValues[i], min, max, contours)].Add(i); } //Set up and run TSNE if (SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next(); if(SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next(); var random = new MersenneTwister(Seed); tsne = new TSNE(Distance, random, Results, MaxIterations, StopLyingIteration, MomentumSwitchIteration, InitialMomentum, FinalMomentum, Eta, dataRowNames, rows); var allowedInputVariables = problemData.AllowedInputVariables.ToArray(); var data = new RealVector[dataset.Rows]; for (var row = 0; row < dataset.Rows; row++) data[row] = new RealVector(allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray()); if (Normalization) data = NormalizeData(data); for(var row = 0; row < dataset.Rows; row++) data[row] = new RealVector(allowedInputVariables.Select(col => dataset.GetDoubleValue(col, row)).ToArray()); if(Normalization) data = NormalizeData(data); tsne.Run(data, NewDimensions, Perplexity, Theta); } var sd = new double[n]; var nData = new RealVector[data.Count]; for (var i = 0; i < n; i++) { for(var i = 0; i < n; i++) { var i1 = i; sd[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i1]).StandardDeviation(); mean[i] = Enumerable.Range(0, data.Count).Select(x => data[x][i1]).Average(); } for (var i = 0; i < data.Count; i++) { for(var i = 0; i < data.Count; i++) { nData[i] = new RealVector(n); for (var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / sd[j]; for(var j = 0; j < n; j++) nData[i][j] = (data[i][j] - mean[j]) / sd[j]; } return nData; return "[" + (min + i * size) + ";" + (min + (i + 1) * size) + ")"; } } }
• ## branches/TSNE/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/VPTree.cs

 r14518 public void Search(T target, int k, out List results, out List distances) { IHeap heap = new PriorityQueue(double.MaxValue, double.MinValue, k); var heap = new PriorityQueue(double.MaxValue, double.MinValue, k); tau = double.MaxValue; Search(root, target, k, heap); } private void Search(Node node, T target, int k, IHeap heap) { private void Search(Node node, T target, int k, PriorityQueue heap) { if (node == null) return; var dist = distance.Get(items[node.index], target); if (dist < tau) { if (heap.Size == k) heap.RemoveMin(); heap.Insert(-dist, new HeapItem(node.index, dist));//TODO check if minheap or maxheap schould be used here heap.Insert(-dist, new HeapItem(node.index, dist));//TODO check if minheap or maxheap should be used here if (heap.Size == k) tau = heap.PeekMinValue().Dist; }
Note: See TracChangeset for help on using the changeset viewer.