Changeset 15479
- Timestamp:
- 11/20/17 15:29:53 (7 years ago)
- Location:
- branches/Weighted TSNE/3.4/TSNE
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/Weighted TSNE/3.4/TSNE/Distances/CosineDistance.cs
r15234 r15479 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Linq;25 24 using HeuristicLab.Common; 26 25 using HeuristicLab.Core; … … 28 27 29 28 namespace HeuristicLab.Algorithms.DataAnalysis { 30 31 29 /// <summary> 32 30 /// The angular distance as defined as a normalized distance measure dependent on the angle between two vectors. … … 35 33 [Item("CosineDistance", "The angular distance as defined as a normalized distance measure dependent on the angle between two vectors.")] 36 34 public class CosineDistance : DistanceBase<IEnumerable<double>> { 37 38 35 #region HLConstructors & Cloning 39 36 [StorableConstructor] … … 48 45 49 46 #region statics 50 public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) { 51 if (point1.Count != point2.Count) throw new ArgumentException("Cosine distance not defined on vectors of different length"); 52 var innerprod = 0.0; 53 var length1 = 0.0; 54 var length2 = 0.0; 55 56 for (var i = 0; i < point1.Count; i++) { 57 double d1 = point1[i], d2 = point2[i]; 58 innerprod += d1 * d2; 59 length1 += d1 * d1; 60 length2 += d2 * d2; 47 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) { 48 using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { 49 var innerprod = 0.0; 50 var length1 = 0.0; 51 var length2 = 0.0; 52 var p1Next = p1Enum.MoveNext(); 53 var p2Next = p2Enum.MoveNext(); 54 while (p1Next && p2Next) { 55 double d1 = p1Enum.Current, d2 = p2Enum.Current; 56 innerprod += d1 * d2; 57 length1 += d1 * d1; 58 length2 += d2 * d2; 59 p1Next = p1Enum.MoveNext(); 60 p2Next = p1Enum.MoveNext(); 61 } 62 var divisor = Math.Sqrt(length1 * length2); 63 if (divisor.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0"); 64 if (p2Next || p1Next) throw new ArgumentException("Cosine distance not defined on vectors of different length"); 65 return 1 - innerprod / divisor; 61 66 } 62 var l = Math.Sqrt(length1 * length2);63 if (l.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0");64 return 1 - innerprod / l;65 67 } 66 68 #endregion 67 69 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 68 return GetDistance(a .ToArray(), b.ToArray());70 return GetDistance(a, b); 69 71 } 70 72 } -
branches/Weighted TSNE/3.4/TSNE/Distances/DistanceBase.cs
r15451 r15479 51 51 } 52 52 53 privateclass DistanceComparer : IComparer<T>, IComparer {53 internal class DistanceComparer : IComparer<T>, IComparer { 54 54 private readonly T item; 55 55 private readonly IDistance<T> dist; -
branches/Weighted TSNE/3.4/TSNE/Distances/EuclideanDistance.cs
r15207 r15479 31 31 [Item("EuclideanDistance", "A norm function that uses Euclidean distance")] 32 32 public class EuclideanDistance : DistanceBase<IEnumerable<double>> { 33 34 33 #region HLConstructors & Cloning 35 34 [StorableConstructor] 36 35 protected EuclideanDistance(bool deserializing) : base(deserializing) { } 37 36 protected EuclideanDistance(EuclideanDistance original, Cloner cloner) : base(original, cloner) { } 38 public override IDeepCloneable Clone(Cloner cloner) { return new EuclideanDistance(this, cloner); } 37 public override IDeepCloneable Clone(Cloner cloner) { 38 return new EuclideanDistance(this, cloner); 39 } 39 40 public EuclideanDistance() { } 40 41 #endregion 41 42 42 public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) { 43 if (point1.Count != point2.Count) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); 44 var sum = 0.0; 45 for (var i = 0; i < point1.Count; i++) { 46 var d = point1[i] - point2[i]; 47 sum += d * d; 43 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) { 44 using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { 45 var sum = 0.0; 46 var p1Next = p1Enum.MoveNext(); 47 var p2Next = p2Enum.MoveNext(); 48 while (p1Next && p2Next) { 49 var d = p1Enum.Current - p2Enum.Current; 50 sum += d * d; 51 p1Next = p1Enum.MoveNext(); 52 p2Next = p1Enum.MoveNext(); 53 } 54 if (p2Next || p1Next) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); 55 return Math.Sqrt(sum); 48 56 } 49 50 return Math.Sqrt(sum);51 57 } 52 58 53 59 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 54 return GetDistance(a .ToArray(), b.ToArray());60 return GetDistance(a, b); 55 61 } 56 62 } -
branches/Weighted TSNE/3.4/TSNE/Distances/ManhattanDistance.cs
r15207 r15479 31 31 [Item("ManhattanDistance", "A distance function that uses block distance")] 32 32 public class ManhattanDistance : DistanceBase<IEnumerable<double>> { 33 34 33 #region HLConstructors & Cloning 35 34 [StorableConstructor] … … 45 44 #endregion 46 45 47 public static double GetDistance(double[] point1, double[] point2) { 48 if (point1.Length != point2.Length) throw new ArgumentException("Manhattan distance not defined on vectors of different length"); 49 var sum = 0.0; 50 for (var i = 0; i < point1.Length; i++) 51 sum += Math.Abs(point1[i] + point2[i]); 52 return sum; 46 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) { 47 using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { 48 var sum = 0.0; 49 var p1Next = p1Enum.MoveNext(); 50 var p2Next = p2Enum.MoveNext(); 51 while (p1Next && p2Next) { 52 sum += Math.Abs(p1Enum.Current - p2Enum.Current); 53 p1Next = p1Enum.MoveNext(); 54 p2Next = p1Enum.MoveNext(); 55 } 56 if (p2Next || p1Next) throw new ArgumentException("Manhattan distance not defined on vectors of different length"); 57 return sum; 58 } 53 59 } 54 60 55 61 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 56 return GetDistance(a .ToArray(), b.ToArray());62 return GetDistance(a, b); 57 63 } 58 64 } -
branches/Weighted TSNE/3.4/TSNE/Distances/WeightedEuclideanDistance.cs
r15455 r15479 21 21 22 22 using System; 23 using System.Collections; 23 24 using System.Collections.Generic; 24 25 using System.Linq; … … 31 32 namespace HeuristicLab.Algorithms.DataAnalysis { 32 33 [StorableClass] 33 [Item("WeightedEuclideanDistance", "A weighted norm function that uses Euclidean distance √(Σ(w[i] *(p1[i]-p2[i])²)/Σw[i])")]34 public class WeightedEuclideanDistance : DistanceBase<IEnumerable<double>> {34 [Item("WeightedEuclideanDistance", "A weighted norm function that uses Euclidean distance √(Σ(w[i]²*(p1[i]-p2[i])²))")] 35 public class WeightedEuclideanDistance : ParameterizedNamedItem, IDistance<IEnumerable<double>> { 35 36 public const string WeightsParameterName = "Weights"; 36 37 public IValueParameter<DoubleArray> WeigthsParameter { 37 get { return Parameters[WeightsParameterName] as IValueParameter<DoubleArray>; }38 get { return (IValueParameter<DoubleArray>) Parameters[WeightsParameterName]; } 38 39 } 39 40 … … 46 47 [StorableConstructor] 47 48 protected WeightedEuclideanDistance(bool deserializing) : base(deserializing) { } 48 protected WeightedEuclideanDistance(WeightedEuclideanDistance original, Cloner cloner) : base(original, cloner) { } 49 private void AfterDeserialization() { 50 RegisterParameterEvents(); 51 } 52 protected WeightedEuclideanDistance(WeightedEuclideanDistance original, Cloner cloner) : base(original, cloner) { 53 RegisterParameterEvents(); 54 } 49 55 public override IDeepCloneable Clone(Cloner cloner) { 50 56 return new WeightedEuclideanDistance(this, cloner); 51 57 } 52 58 public WeightedEuclideanDistance() { 53 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "The weights used to modify the euclidean distance. If no weights are specified a Random Forrest Regression / Classification is used to automatically set the weigths. ")); 59 Parameters.Add(new ValueParameter<DoubleArray>(WeightsParameterName, "The weights used to modify the euclidean distance.")); 60 RegisterParameterEvents(); 54 61 } 55 62 #endregion 56 63 57 public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2, DoubleArray impacts) { 58 if (point1.Count != point2.Count) throw new ArgumentException("Weighted Euclidean distance not defined on vectors of different length"); 59 if (impacts == null || impacts.Count() != point1.Count) throw new ArgumentException("Weighted Euclidean distance requires a non-null weight vector of length equal to the number of allowed input double variables the compared points"); 60 var sum = 0.0; 61 var sumW = 0.0; 62 for (var i = 0; i < point1.Count; i++) { 63 var d = point1[i] - point2[i]; 64 var w = impacts[i] * impacts[i]; 65 sum += d * d * w; 66 sumW += w; 64 public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2, IEnumerable<double> weights) { 65 using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator(), weEnum = weights.GetEnumerator()) { 66 var sum = 0.0; 67 var p1Next = p1Enum.MoveNext(); 68 var p2Next = p2Enum.MoveNext(); 69 var weNext = weEnum.MoveNext(); 70 while (p1Next && p2Next && weNext) { 71 var d = p1Enum.Current - p2Enum.Current; 72 var w = weEnum.Current; 73 sum += d * d * w * w; 74 p1Next = p1Enum.MoveNext(); 75 p2Next = p2Enum.MoveNext(); 76 weNext = weEnum.MoveNext(); 77 } 78 if (weNext) throw new ArgumentException("Weighted Euclidean distance requires a non-null weight vector of length equal to the number of allowed input double variables the compared points"); 79 if (p1Next || p2Next) throw new ArgumentException("Weighted Euclidean distance not defined on vectors of different length"); 80 return Math.Sqrt(sum); 67 81 } 68 return Math.Sqrt(sum / sumW);69 82 } 70 83 71 public override double Get(IEnumerable<double> a, IEnumerable<double> b) { 72 return GetDistance(a.ToArray(), b.ToArray(), Weights); 84 public double Get(IEnumerable<double> a, IEnumerable<double> b) { 85 return GetDistance(a, b, Weights); 86 } 87 public IComparer<IEnumerable<double>> GetDistanceComparer(IEnumerable<double> item) { 88 return new DistanceBase<IEnumerable<double>>.DistanceComparer(item, this); 89 } 90 public double Get(object x, object y) { 91 return Get((IEnumerable<double>) x, (IEnumerable<double>) y); 92 } 93 public IComparer GetDistanceComparer(object item) { 94 return new DistanceBase<IEnumerable<double>>.DistanceComparer((IEnumerable<double>) item, this); 95 } 96 97 private void RegisterParameterEvents() { 98 WeigthsParameter.ValueChanged += OnWeightsArrayChanged; 99 WeigthsParameter.Value.ItemChanged += OnWeightChanged; 100 } 101 private void OnWeightChanged(object sender, EventArgs<int> e) { 102 WeigthsParameter.Value.ItemChanged -= OnWeightChanged; 103 Weights[e.Value] = Math.Max(0, Weights[e.Value]); 104 WeigthsParameter.Value.ItemChanged -= OnWeightChanged; 105 } 106 private void OnWeightsArrayChanged(object sender, EventArgs e) { 107 for (int i = 0; i < Weights.Length; i++) 108 Weights[i] = Math.Max(0, Weights[i]); 109 WeigthsParameter.Value.ItemChanged += OnWeightChanged; 73 110 } 74 111 } -
branches/Weighted TSNE/3.4/TSNE/TSNEAlgorithm.cs
r15455 r15479 29 29 using HeuristicLab.Core; 30 30 using HeuristicLab.Data; 31 using HeuristicLab.Encodings.RealVectorEncoding;32 31 using HeuristicLab.Optimization; 33 32 using HeuristicLab.Parameters; … … 87 86 #region Parameter properties 88 87 public IFixedValueParameter<DoubleValue> PerplexityParameter { 89 get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; }88 get { return (IFixedValueParameter<DoubleValue>) Parameters[PerplexityParameterName]; } 90 89 } 91 90 public IFixedValueParameter<PercentValue> ThetaParameter { 92 get { return Parameters[ThetaParameterName] as IFixedValueParameter<PercentValue>; }91 get { return (IFixedValueParameter<PercentValue>) Parameters[ThetaParameterName]; } 93 92 } 94 93 public IFixedValueParameter<IntValue> NewDimensionsParameter { 95 get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; }94 get { return (IFixedValueParameter<IntValue>) Parameters[NewDimensionsParameterName]; } 96 95 } 97 96 public IConstrainedValueParameter<IDistance<double[]>> DistanceFunctionParameter { 98 get { return Parameters[DistanceFunctionParameterName] as IConstrainedValueParameter<IDistance<double[]>>; }97 get { return (IConstrainedValueParameter<IDistance<double[]>>) Parameters[DistanceFunctionParameterName]; } 99 98 } 100 99 public IFixedValueParameter<IntValue> MaxIterationsParameter { 101 get { return Parameters[MaxIterationsParameterName] as IFixedValueParameter<IntValue>; }100 get { return (IFixedValueParameter<IntValue>) Parameters[MaxIterationsParameterName]; } 102 101 } 103 102 public IFixedValueParameter<IntValue> StopLyingIterationParameter { 104 get { return Parameters[StopLyingIterationParameterName] as IFixedValueParameter<IntValue>; }103 get { return (IFixedValueParameter<IntValue>) Parameters[StopLyingIterationParameterName]; } 105 104 } 106 105 public IFixedValueParameter<IntValue> MomentumSwitchIterationParameter { 107 get { return Parameters[MomentumSwitchIterationParameterName] as IFixedValueParameter<IntValue>; }106 get { return (IFixedValueParameter<IntValue>) Parameters[MomentumSwitchIterationParameterName]; } 108 107 } 109 108 public IFixedValueParameter<DoubleValue> InitialMomentumParameter { 110 get { return Parameters[InitialMomentumParameterName] as IFixedValueParameter<DoubleValue>; }109 get { return (IFixedValueParameter<DoubleValue>) Parameters[InitialMomentumParameterName]; } 111 110 } 112 111 public IFixedValueParameter<DoubleValue> FinalMomentumParameter { 113 get { return Parameters[FinalMomentumParameterName] as IFixedValueParameter<DoubleValue>; }112 get { return (IFixedValueParameter<DoubleValue>) Parameters[FinalMomentumParameterName]; } 114 113 } 115 114 public IFixedValueParameter<DoubleValue> EtaParameter { 116 get { return Parameters[EtaParameterName] as IFixedValueParameter<DoubleValue>; }115 get { return (IFixedValueParameter<DoubleValue>) Parameters[EtaParameterName]; } 117 116 } 118 117 public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter { 119 get { return Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>; }118 get { return (IFixedValueParameter<BoolValue>) Parameters[SetSeedRandomlyParameterName]; } 120 119 } 121 120 public IFixedValueParameter<IntValue> SeedParameter { 122 get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; }121 get { return (IFixedValueParameter<IntValue>) Parameters[SeedParameterName]; } 123 122 } 124 123 public IConstrainedValueParameter<StringValue> ClassesNameParameter { 125 get { return Parameters[ClassesNameParameterName] as IConstrainedValueParameter<StringValue>; }124 get { return (IConstrainedValueParameter<StringValue>) Parameters[ClassesNameParameterName]; } 126 125 } 127 126 public IFixedValueParameter<BoolValue> NormalizationParameter { 128 get { return Parameters[NormalizationParameterName] as IFixedValueParameter<BoolValue>; }127 get { return (IFixedValueParameter<BoolValue>) Parameters[NormalizationParameterName]; } 129 128 } 130 129 public IFixedValueParameter<BoolValue> RandomInitializationParameter { 131 get { return Parameters[RandomInitializationParameterName] as IFixedValueParameter<BoolValue>; }130 get { return (IFixedValueParameter<BoolValue>) Parameters[RandomInitializationParameterName]; } 132 131 } 133 132 public IFixedValueParameter<IntValue> UpdateIntervalParameter { 134 get { return Parameters[UpdateIntervalParameterName] as IFixedValueParameter<IntValue>; }133 get { return (IFixedValueParameter<IntValue>) Parameters[UpdateIntervalParameterName]; } 135 134 } 136 135 #endregion … … 336 335 private void OnColumnsChanged(object sender, EventArgs e) { 337 336 if (Problem == null || Problem.ProblemData == null || Problem.ProblemData.Dataset == null || !Parameters.ContainsKey(DistanceFunctionParameterName)) return; 338 DistanceFunctionParameter.ValidValues.OfType<WeightedEuclideanDistance>().Single().Weights = new RealVector(Problem.ProblemData.AllowedInputVariables.Select(x => 1.0).ToArray());337 DistanceFunctionParameter.ValidValues.OfType<WeightedEuclideanDistance>().Single().Weights = new DoubleArray(Problem.ProblemData.AllowedInputVariables.Select(x => 1.0).ToArray()); 339 338 } 340 339 … … 531 530 532 531 private static Color ConvertTotalToRgb(double low, double high, double cell) { 532 var colorGradient = ColorGradient.Colors; 533 533 var range = high - low; 534 var h = cell / range; 535 return HsVtoRgb(h * 0.5, 1.0f, 1.0f); 536 } 537 538 //taken from https://stackoverflow.com/a/17099130 539 private static Color HsVtoRgb(double hue, double saturation, double value) { 540 while (hue > 1.0) { hue -= 1.0; } 541 while (hue < 0.0) { hue += 1.0; } 542 while (saturation > 1.0) { saturation -= 1.0; } 543 while (saturation < 0.0) { saturation += 1.0; } 544 while (value > 1.0) { value -= 1.0; } 545 while (value < 0.0) { value += 1.0; } 546 if (hue > 0.999) { hue = 0.999; } 547 if (hue < 0.001) { hue = 0.001; } 548 if (saturation > 0.999) { saturation = 0.999; } 549 if (saturation < 0.001) { return Color.FromArgb((int) (value * 255.0), (int) (value * 255.0), (int) (value * 255.0)); } 550 if (value > 0.999) { value = 0.999; } 551 if (value < 0.001) { value = 0.001; } 552 553 var h6 = hue * 6.0; 554 if (h6.IsAlmost(6.0)) { h6 = 0.0; } 555 var ihue = (int) h6; 556 var p = value * (1.0 - saturation); 557 var q = value * (1.0 - saturation * (h6 - ihue)); 558 var t = value * (1.0 - saturation * (1.0 - (h6 - ihue))); 559 switch (ihue) { 560 case 0: 561 return Color.FromArgb((int) (value * 255), (int) (t * 255), (int) (p * 255)); 562 case 1: 563 return Color.FromArgb((int) (q * 255), (int) (value * 255), (int) (p * 255)); 564 case 2: 565 return Color.FromArgb((int) (p * 255), (int) (value * 255), (int) (t * 255)); 566 case 3: 567 return Color.FromArgb((int) (p * 255), (int) (q * 255), (int) (value * 255)); 568 case 4: 569 return Color.FromArgb((int) (t * 255), (int) (p * 255), (int) (value * 255)); 570 default: 571 return Color.FromArgb((int) (value * 255), (int) (p * 255), (int) (q * 255)); 572 } 534 var h = cell / range * colorGradient.Count; 535 return colorGradient[(int) h]; 573 536 } 574 537 #endregion -
branches/Weighted TSNE/3.4/TSNE/TSNEStatic.cs
r15455 r15479 257 257 } 258 258 var sumP = .0; 259 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j]; 260 for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP; 259 for (var i = 0; i < data.Length; i++) { 260 for (var j = 0; j < data.Length; j++) { 261 sumP += p[i, j]; 262 } 263 } 264 for (var i = 0; i < data.Length; i++) { 265 for (var j = 0; j < data.Length; j++) { 266 p[i, j] /= sumP; 267 } 268 } 261 269 return p; 262 270 } … … 555 563 public static double[,] Run(T[] data, IDistance<T> distance, IRandom random, 556 564 int newDimensions = 2, double perplexity = 25, int iterations = 1000, 557 double theta = 0, 558 int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 565 double theta = 0, int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, 559 566 double finalMomentum = .8, double eta = 10.0 560 567 ) { … … 591 598 } 592 599 } 593 594 600 595 601 // Perform gradient update (with momentum and gains) -
branches/Weighted TSNE/3.4/TSNE/TSNEUtils.cs
r15455 r15479 66 66 /// <param name="comparer">comparer for list elemnts </param> 67 67 /// <returns></returns> 68 internal static void NthElement<T>(this IList<T> list, int left, int right, int n, IComparer<T> comparer) {68 internal static void PartialSort<T>(this IList<T> list, int left, int right, int n, IComparer<T> comparer) { 69 69 while (true) { 70 70 if (left == right) return; -
branches/Weighted TSNE/3.4/TSNE/VantagePointTree.cs
r15207 r15479 139 139 // Partition around the median distance 140 140 var median = (upper + lower) / 2; 141 items. NthElement(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower]));141 items.PartialSort(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower])); 142 142 143 143 // Threshold of the new node will be the distance to the median
Note: See TracChangeset
for help on using the changeset viewer.