# Changeset 15479

Ignore:
Timestamp:
11/20/17 15:29:53 (4 years ago)
Message:

#2850 worked on weighted tSNE

Location:
branches/Weighted TSNE/3.4/TSNE
Files:
9 edited

Unmodified
Removed
• ## branches/Weighted TSNE/3.4/TSNE/Distances/CosineDistance.cs

 r15234 using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; namespace HeuristicLab.Algorithms.DataAnalysis { /// /// The angular distance as defined as a normalized distance measure dependent on the angle between two vectors. [Item("CosineDistance", "The angular distance as defined as a normalized distance measure dependent on the angle between two vectors.")] public class CosineDistance : DistanceBase> { #region HLConstructors & Cloning [StorableConstructor] #region statics public static double GetDistance(IReadOnlyList point1, IReadOnlyList point2) { if (point1.Count != point2.Count) throw new ArgumentException("Cosine distance not defined on vectors of different length"); var innerprod = 0.0; var length1 = 0.0; var length2 = 0.0; for (var i = 0; i < point1.Count; i++) { double d1 = point1[i], d2 = point2[i]; innerprod += d1 * d2; length1 += d1 * d1; length2 += d2 * d2; public static double GetDistance(IEnumerable point1, IEnumerable point2) { using (IEnumerator p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { var innerprod = 0.0; var length1 = 0.0; var length2 = 0.0; var p1Next = p1Enum.MoveNext(); var p2Next = p2Enum.MoveNext(); while (p1Next && p2Next) { double d1 = p1Enum.Current, d2 = p2Enum.Current; innerprod += d1 * d2; length1 += d1 * d1; length2 += d2 * d2; p1Next = p1Enum.MoveNext(); p2Next = p1Enum.MoveNext(); } var divisor = Math.Sqrt(length1 * length2); if (divisor.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0"); if (p2Next || p1Next) throw new ArgumentException("Cosine distance not defined on vectors of different length"); return 1 - innerprod / divisor; } var l = Math.Sqrt(length1 * length2); if (l.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0"); return 1 - innerprod / l; } #endregion public override double Get(IEnumerable a, IEnumerable b) { return GetDistance(a.ToArray(), b.ToArray()); return GetDistance(a, b); } }
• ## branches/Weighted TSNE/3.4/TSNE/Distances/DistanceBase.cs

 r15451 } private class DistanceComparer : IComparer, IComparer { internal class DistanceComparer : IComparer, IComparer { private readonly T item; private readonly IDistance dist;
• ## branches/Weighted TSNE/3.4/TSNE/Distances/EuclideanDistance.cs

 r15207 [Item("EuclideanDistance", "A norm function that uses Euclidean distance")] public class EuclideanDistance : DistanceBase> { #region HLConstructors & Cloning [StorableConstructor] protected EuclideanDistance(bool deserializing) : base(deserializing) { } protected EuclideanDistance(EuclideanDistance original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new EuclideanDistance(this, cloner); } public override IDeepCloneable Clone(Cloner cloner) { return new EuclideanDistance(this, cloner); } public EuclideanDistance() { } #endregion public static double GetDistance(IReadOnlyList point1, IReadOnlyList point2) { if (point1.Count != point2.Count) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); var sum = 0.0; for (var i = 0; i < point1.Count; i++) { var d = point1[i] - point2[i]; sum += d * d; public static double GetDistance(IEnumerable point1, IEnumerable point2) { using (IEnumerator p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { var sum = 0.0; var p1Next = p1Enum.MoveNext(); var p2Next = p2Enum.MoveNext(); while (p1Next && p2Next) { var d = p1Enum.Current - p2Enum.Current; sum += d * d; p1Next = p1Enum.MoveNext(); p2Next = p1Enum.MoveNext(); } if (p2Next || p1Next) throw new ArgumentException("Euclidean distance not defined on vectors of different length"); return Math.Sqrt(sum); } return Math.Sqrt(sum); } public override double Get(IEnumerable a, IEnumerable b) { return GetDistance(a.ToArray(), b.ToArray()); return GetDistance(a, b); } }
• ## branches/Weighted TSNE/3.4/TSNE/Distances/ManhattanDistance.cs

 r15207 [Item("ManhattanDistance", "A distance function that uses block distance")] public class ManhattanDistance : DistanceBase> { #region HLConstructors & Cloning [StorableConstructor] #endregion public static double GetDistance(double[] point1, double[] point2) { if (point1.Length != point2.Length) throw new ArgumentException("Manhattan distance not defined on vectors of different length"); var sum = 0.0; for (var i = 0; i < point1.Length; i++) sum += Math.Abs(point1[i] + point2[i]); return sum; public static double GetDistance(IEnumerable point1, IEnumerable point2) { using (IEnumerator p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) { var sum = 0.0; var p1Next = p1Enum.MoveNext(); var p2Next = p2Enum.MoveNext(); while (p1Next && p2Next) { sum += Math.Abs(p1Enum.Current - p2Enum.Current); p1Next = p1Enum.MoveNext(); p2Next = p1Enum.MoveNext(); } if (p2Next || p1Next) throw new ArgumentException("Manhattan distance not defined on vectors of different length"); return sum; } } public override double Get(IEnumerable a, IEnumerable b) { return GetDistance(a.ToArray(), b.ToArray()); return GetDistance(a, b); } }
• ## branches/Weighted TSNE/3.4/TSNE/Distances/WeightedEuclideanDistance.cs

 r15455 using System; using System.Collections; using System.Collections.Generic; using System.Linq; namespace HeuristicLab.Algorithms.DataAnalysis { [StorableClass] [Item("WeightedEuclideanDistance", "A weighted norm function that uses Euclidean distance √(Σ(w[i]*(p1[i]-p2[i])²)/Σw[i])")] public class WeightedEuclideanDistance : DistanceBase> { [Item("WeightedEuclideanDistance", "A weighted norm function that uses Euclidean distance √(Σ(w[i]²*(p1[i]-p2[i])²))")] public class WeightedEuclideanDistance : ParameterizedNamedItem, IDistance> { public const string WeightsParameterName = "Weights"; public IValueParameter WeigthsParameter { get { return Parameters[WeightsParameterName] as IValueParameter; } get { return (IValueParameter) Parameters[WeightsParameterName]; } } [StorableConstructor] protected WeightedEuclideanDistance(bool deserializing) : base(deserializing) { } protected WeightedEuclideanDistance(WeightedEuclideanDistance original, Cloner cloner) : base(original, cloner) { } private void AfterDeserialization() { RegisterParameterEvents(); } protected WeightedEuclideanDistance(WeightedEuclideanDistance original, Cloner cloner) : base(original, cloner) { RegisterParameterEvents(); } public override IDeepCloneable Clone(Cloner cloner) { return new WeightedEuclideanDistance(this, cloner); } public WeightedEuclideanDistance() { Parameters.Add(new OptionalValueParameter(WeightsParameterName, "The weights used to modify the euclidean distance. If no weights are specified a Random Forrest Regression / Classification is used to automatically set the weigths. ")); Parameters.Add(new ValueParameter(WeightsParameterName, "The weights used to modify the euclidean distance.")); RegisterParameterEvents(); } #endregion public static double GetDistance(IReadOnlyList point1, IReadOnlyList point2, DoubleArray impacts) { if (point1.Count != point2.Count) throw new ArgumentException("Weighted Euclidean distance not defined on vectors of different length"); if (impacts == null || impacts.Count() != point1.Count) throw new ArgumentException("Weighted Euclidean distance requires a non-null weight vector of length equal to the number of allowed input double variables the compared points"); var sum = 0.0; var sumW = 0.0; for (var i = 0; i < point1.Count; i++) { var d = point1[i] - point2[i]; var w = impacts[i] * impacts[i]; sum += d * d * w; sumW += w; public static double GetDistance(IEnumerable point1, IEnumerable point2, IEnumerable weights) { using (IEnumerator p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator(), weEnum = weights.GetEnumerator()) { var sum = 0.0; var p1Next = p1Enum.MoveNext(); var p2Next = p2Enum.MoveNext(); var weNext = weEnum.MoveNext(); while (p1Next && p2Next && weNext) { var d = p1Enum.Current - p2Enum.Current; var w = weEnum.Current; sum += d * d * w * w; p1Next = p1Enum.MoveNext(); p2Next = p2Enum.MoveNext(); weNext = weEnum.MoveNext(); } if (weNext) throw new ArgumentException("Weighted Euclidean distance requires a non-null weight vector of length equal to the number of allowed input double variables the compared points"); if (p1Next || p2Next) throw new ArgumentException("Weighted Euclidean distance not defined on vectors of different length"); return Math.Sqrt(sum); } return Math.Sqrt(sum / sumW); } public override double Get(IEnumerable a, IEnumerable b) { return GetDistance(a.ToArray(), b.ToArray(), Weights); public double Get(IEnumerable a, IEnumerable b) { return GetDistance(a, b, Weights); } public IComparer> GetDistanceComparer(IEnumerable item) { return new DistanceBase>.DistanceComparer(item, this); } public double Get(object x, object y) { return Get((IEnumerable) x, (IEnumerable) y); } public IComparer GetDistanceComparer(object item) { return new DistanceBase>.DistanceComparer((IEnumerable) item, this); } private void RegisterParameterEvents() { WeigthsParameter.ValueChanged += OnWeightsArrayChanged; WeigthsParameter.Value.ItemChanged += OnWeightChanged; } private void OnWeightChanged(object sender, EventArgs e) { WeigthsParameter.Value.ItemChanged -= OnWeightChanged; Weights[e.Value] = Math.Max(0, Weights[e.Value]); WeigthsParameter.Value.ItemChanged -= OnWeightChanged; } private void OnWeightsArrayChanged(object sender, EventArgs e) { for (int i = 0; i < Weights.Length; i++) Weights[i] = Math.Max(0, Weights[i]); WeigthsParameter.Value.ItemChanged += OnWeightChanged; } }
• ## branches/Weighted TSNE/3.4/TSNE/TSNEAlgorithm.cs

 r15455 using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.RealVectorEncoding; using HeuristicLab.Optimization; using HeuristicLab.Parameters; #region Parameter properties public IFixedValueParameter PerplexityParameter { get { return Parameters[PerplexityParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[PerplexityParameterName]; } } public IFixedValueParameter ThetaParameter { get { return Parameters[ThetaParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[ThetaParameterName]; } } public IFixedValueParameter NewDimensionsParameter { get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[NewDimensionsParameterName]; } } public IConstrainedValueParameter> DistanceFunctionParameter { get { return Parameters[DistanceFunctionParameterName] as IConstrainedValueParameter>; } get { return (IConstrainedValueParameter>) Parameters[DistanceFunctionParameterName]; } } public IFixedValueParameter MaxIterationsParameter { get { return Parameters[MaxIterationsParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[MaxIterationsParameterName]; } } public IFixedValueParameter StopLyingIterationParameter { get { return Parameters[StopLyingIterationParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[StopLyingIterationParameterName]; } } public IFixedValueParameter MomentumSwitchIterationParameter { get { return Parameters[MomentumSwitchIterationParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[MomentumSwitchIterationParameterName]; } } public IFixedValueParameter InitialMomentumParameter { get { return Parameters[InitialMomentumParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[InitialMomentumParameterName]; } } public IFixedValueParameter FinalMomentumParameter { get { return Parameters[FinalMomentumParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[FinalMomentumParameterName]; } } public IFixedValueParameter EtaParameter { get { return Parameters[EtaParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[EtaParameterName]; } } public IFixedValueParameter SetSeedRandomlyParameter { get { return Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[SetSeedRandomlyParameterName]; } } public IFixedValueParameter SeedParameter { get { return Parameters[SeedParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[SeedParameterName]; } } public IConstrainedValueParameter ClassesNameParameter { get { return Parameters[ClassesNameParameterName] as IConstrainedValueParameter; } get { return (IConstrainedValueParameter) Parameters[ClassesNameParameterName]; } } public IFixedValueParameter NormalizationParameter { get { return Parameters[NormalizationParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[NormalizationParameterName]; } } public IFixedValueParameter RandomInitializationParameter { get { return Parameters[RandomInitializationParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[RandomInitializationParameterName]; } } public IFixedValueParameter UpdateIntervalParameter { get { return Parameters[UpdateIntervalParameterName] as IFixedValueParameter; } get { return (IFixedValueParameter) Parameters[UpdateIntervalParameterName]; } } #endregion private void OnColumnsChanged(object sender, EventArgs e) { if (Problem == null || Problem.ProblemData == null || Problem.ProblemData.Dataset == null || !Parameters.ContainsKey(DistanceFunctionParameterName)) return; DistanceFunctionParameter.ValidValues.OfType().Single().Weights = new RealVector(Problem.ProblemData.AllowedInputVariables.Select(x => 1.0).ToArray()); DistanceFunctionParameter.ValidValues.OfType().Single().Weights = new DoubleArray(Problem.ProblemData.AllowedInputVariables.Select(x => 1.0).ToArray()); } private static Color ConvertTotalToRgb(double low, double high, double cell) { var colorGradient = ColorGradient.Colors; var range = high - low; var h = cell / range; return HsVtoRgb(h * 0.5, 1.0f, 1.0f); } //taken from https://stackoverflow.com/a/17099130 private static Color HsVtoRgb(double hue, double saturation, double value) { while (hue > 1.0) { hue -= 1.0; } while (hue < 0.0) { hue += 1.0; } while (saturation > 1.0) { saturation -= 1.0; } while (saturation < 0.0) { saturation += 1.0; } while (value > 1.0) { value -= 1.0; } while (value < 0.0) { value += 1.0; } if (hue > 0.999) { hue = 0.999; } if (hue < 0.001) { hue = 0.001; } if (saturation > 0.999) { saturation = 0.999; } if (saturation < 0.001) { return Color.FromArgb((int) (value * 255.0), (int) (value * 255.0), (int) (value * 255.0)); } if (value > 0.999) { value = 0.999; } if (value < 0.001) { value = 0.001; } var h6 = hue * 6.0; if (h6.IsAlmost(6.0)) { h6 = 0.0; } var ihue = (int) h6; var p = value * (1.0 - saturation); var q = value * (1.0 - saturation * (h6 - ihue)); var t = value * (1.0 - saturation * (1.0 - (h6 - ihue))); switch (ihue) { case 0: return Color.FromArgb((int) (value * 255), (int) (t * 255), (int) (p * 255)); case 1: return Color.FromArgb((int) (q * 255), (int) (value * 255), (int) (p * 255)); case 2: return Color.FromArgb((int) (p * 255), (int) (value * 255), (int) (t * 255)); case 3: return Color.FromArgb((int) (p * 255), (int) (q * 255), (int) (value * 255)); case 4: return Color.FromArgb((int) (t * 255), (int) (p * 255), (int) (value * 255)); default: return Color.FromArgb((int) (value * 255), (int) (p * 255), (int) (q * 255)); } var h = cell / range * colorGradient.Count; return colorGradient[(int) h]; } #endregion
• ## branches/Weighted TSNE/3.4/TSNE/TSNEStatic.cs

 r15455 } var sumP = .0; for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j]; for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP; for (var i = 0; i < data.Length; i++) { for (var j = 0; j < data.Length; j++) { sumP += p[i, j]; } } for (var i = 0; i < data.Length; i++) { for (var j = 0; j < data.Length; j++) { p[i, j] /= sumP; } } return p; } public static double[,] Run(T[] data, IDistance distance, IRandom random, int newDimensions = 2, double perplexity = 25, int iterations = 1000, double theta = 0, int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, double theta = 0, int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5, double finalMomentum = .8, double eta = 10.0 ) { } } // Perform gradient update (with momentum and gains)
• ## branches/Weighted TSNE/3.4/TSNE/TSNEUtils.cs

 r15455 /// comparer for list elemnts /// internal static void NthElement(this IList list, int left, int right, int n, IComparer comparer) { internal static void PartialSort(this IList list, int left, int right, int n, IComparer comparer) { while (true) { if (left == right) return;
• ## branches/Weighted TSNE/3.4/TSNE/VantagePointTree.cs

 r15207 // Partition around the median distance var median = (upper + lower) / 2; items.NthElement(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower])); items.PartialSort(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower])); // Threshold of the new node will be the distance to the median
Note: See TracChangeset for help on using the changeset viewer.