Changeset 15479


Ignore:
Timestamp:
11/20/17 15:29:53 (3 years ago)
Author:
bwerth
Message:

#2850 worked on weighted tSNE

Location:
branches/Weighted TSNE/3.4/TSNE
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • branches/Weighted TSNE/3.4/TSNE/Distances/CosineDistance.cs

    r15234 r15479  
    2222using System;
    2323using System.Collections.Generic;
    24 using System.Linq;
    2524using HeuristicLab.Common;
    2625using HeuristicLab.Core;
     
    2827
    2928namespace HeuristicLab.Algorithms.DataAnalysis {
    30 
    3129  /// <summary>
    3230  /// The angular distance as defined as a normalized distance measure dependent on the angle between two vectors.
     
    3533  [Item("CosineDistance", "The angular distance as defined as a normalized distance measure dependent on the angle between two vectors.")]
    3634  public class CosineDistance : DistanceBase<IEnumerable<double>> {
    37 
    3835    #region HLConstructors & Cloning
    3936    [StorableConstructor]
     
    4845
    4946    #region statics
    50     public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) {
    51       if (point1.Count != point2.Count) throw new ArgumentException("Cosine distance not defined on vectors of different length");
    52       var innerprod = 0.0;
    53       var length1 = 0.0;
    54       var length2 = 0.0;
    55 
    56       for (var i = 0; i < point1.Count; i++) {
    57         double d1 = point1[i], d2 = point2[i];
    58         innerprod += d1 * d2;
    59         length1 += d1 * d1;
    60         length2 += d2 * d2;
     47    public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) {
     48      using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) {
     49        var innerprod = 0.0;
     50        var length1 = 0.0;
     51        var length2 = 0.0;
     52        var p1Next = p1Enum.MoveNext();
     53        var p2Next = p2Enum.MoveNext();
     54        while (p1Next && p2Next) {
     55          double d1 = p1Enum.Current, d2 = p2Enum.Current;
     56          innerprod += d1 * d2;
     57          length1 += d1 * d1;
     58          length2 += d2 * d2;
     59          p1Next = p1Enum.MoveNext();
     60          p2Next = p1Enum.MoveNext();
     61        }
     62        var divisor = Math.Sqrt(length1 * length2);
     63        if (divisor.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0");
     64        if (p2Next || p1Next) throw new ArgumentException("Cosine distance not defined on vectors of different length");
     65        return 1 - innerprod / divisor;
    6166      }
    62       var l = Math.Sqrt(length1 * length2);
    63       if (l.IsAlmost(0)) throw new ArgumentException("Cosine distance is not defined on vectors of length 0");
    64       return 1 - innerprod / l;
    6567    }
    6668    #endregion
    6769    public override double Get(IEnumerable<double> a, IEnumerable<double> b) {
    68       return GetDistance(a.ToArray(), b.ToArray());
     70      return GetDistance(a, b);
    6971    }
    7072  }
  • branches/Weighted TSNE/3.4/TSNE/Distances/DistanceBase.cs

    r15451 r15479  
    5151    }
    5252
    53     private class DistanceComparer : IComparer<T>, IComparer {
     53    internal class DistanceComparer : IComparer<T>, IComparer {
    5454      private readonly T item;
    5555      private readonly IDistance<T> dist;
  • branches/Weighted TSNE/3.4/TSNE/Distances/EuclideanDistance.cs

    r15207 r15479  
    3131  [Item("EuclideanDistance", "A norm function that uses Euclidean distance")]
    3232  public class EuclideanDistance : DistanceBase<IEnumerable<double>> {
    33 
    3433    #region HLConstructors & Cloning
    3534    [StorableConstructor]
    3635    protected EuclideanDistance(bool deserializing) : base(deserializing) { }
    3736    protected EuclideanDistance(EuclideanDistance original, Cloner cloner) : base(original, cloner) { }
    38     public override IDeepCloneable Clone(Cloner cloner) { return new EuclideanDistance(this, cloner); }
     37    public override IDeepCloneable Clone(Cloner cloner) {
     38      return new EuclideanDistance(this, cloner);
     39    }
    3940    public EuclideanDistance() { }
    4041    #endregion
    4142
    42     public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2) {
    43       if (point1.Count != point2.Count) throw new ArgumentException("Euclidean distance not defined on vectors of different length");
    44       var sum = 0.0;
    45       for (var i = 0; i < point1.Count; i++) {
    46         var d = point1[i] - point2[i];
    47         sum += d * d;
     43    public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) {
     44      using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) {
     45        var sum = 0.0;
     46        var p1Next = p1Enum.MoveNext();
     47        var p2Next = p2Enum.MoveNext();
     48        while (p1Next && p2Next) {
     49          var d = p1Enum.Current - p2Enum.Current;
     50          sum += d * d;
     51          p1Next = p1Enum.MoveNext();
     52          p2Next = p1Enum.MoveNext();
     53        }
     54        if (p2Next || p1Next) throw new ArgumentException("Euclidean distance not defined on vectors of different length");
     55        return Math.Sqrt(sum);
    4856      }
    49 
    50       return Math.Sqrt(sum);
    5157    }
    5258
    5359    public override double Get(IEnumerable<double> a, IEnumerable<double> b) {
    54       return GetDistance(a.ToArray(), b.ToArray());
     60      return GetDistance(a, b);
    5561    }
    5662  }
  • branches/Weighted TSNE/3.4/TSNE/Distances/ManhattanDistance.cs

    r15207 r15479  
    3131  [Item("ManhattanDistance", "A distance function that uses block distance")]
    3232  public class ManhattanDistance : DistanceBase<IEnumerable<double>> {
    33 
    3433    #region HLConstructors & Cloning
    3534    [StorableConstructor]
     
    4544    #endregion
    4645
    47     public static double GetDistance(double[] point1, double[] point2) {
    48       if (point1.Length != point2.Length) throw new ArgumentException("Manhattan distance not defined on vectors of different length");
    49       var sum = 0.0;
    50       for (var i = 0; i < point1.Length; i++)
    51         sum += Math.Abs(point1[i] + point2[i]);
    52       return sum;
     46    public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2) {
     47      using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator()) {
     48        var sum = 0.0;
     49        var p1Next = p1Enum.MoveNext();
     50        var p2Next = p2Enum.MoveNext();
     51        while (p1Next && p2Next) {
     52          sum += Math.Abs(p1Enum.Current - p2Enum.Current);
     53          p1Next = p1Enum.MoveNext();
     54          p2Next = p1Enum.MoveNext();
     55        }
     56        if (p2Next || p1Next) throw new ArgumentException("Manhattan distance not defined on vectors of different length");
     57        return sum;
     58      }
    5359    }
    5460
    5561    public override double Get(IEnumerable<double> a, IEnumerable<double> b) {
    56       return GetDistance(a.ToArray(), b.ToArray());
     62      return GetDistance(a, b);
    5763    }
    5864  }
  • branches/Weighted TSNE/3.4/TSNE/Distances/WeightedEuclideanDistance.cs

    r15455 r15479  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
    2425using System.Linq;
     
    3132namespace HeuristicLab.Algorithms.DataAnalysis {
    3233  [StorableClass]
    33   [Item("WeightedEuclideanDistance", "A weighted norm function that uses Euclidean distance √(Σ(w[i]*(p1[i]-p2[i])²)/Σw[i])")]
    34   public class WeightedEuclideanDistance : DistanceBase<IEnumerable<double>> {
     34  [Item("WeightedEuclideanDistance", "A weighted norm function that uses Euclidean distance √(Σ(w[i]²*(p1[i]-p2[i])²))")]
     35  public class WeightedEuclideanDistance : ParameterizedNamedItem, IDistance<IEnumerable<double>> {
    3536    public const string WeightsParameterName = "Weights";
    3637    public IValueParameter<DoubleArray> WeigthsParameter {
    37       get { return Parameters[WeightsParameterName] as IValueParameter<DoubleArray>; }
     38      get { return (IValueParameter<DoubleArray>) Parameters[WeightsParameterName]; }
    3839    }
    3940
     
    4647    [StorableConstructor]
    4748    protected WeightedEuclideanDistance(bool deserializing) : base(deserializing) { }
    48     protected WeightedEuclideanDistance(WeightedEuclideanDistance original, Cloner cloner) : base(original, cloner) { }
     49    private void AfterDeserialization() {
     50      RegisterParameterEvents();
     51    }
     52    protected WeightedEuclideanDistance(WeightedEuclideanDistance original, Cloner cloner) : base(original, cloner) {
     53      RegisterParameterEvents();
     54    }
    4955    public override IDeepCloneable Clone(Cloner cloner) {
    5056      return new WeightedEuclideanDistance(this, cloner);
    5157    }
    5258    public WeightedEuclideanDistance() {
    53       Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "The weights used to modify the euclidean distance. If no weights are specified a Random Forrest Regression / Classification is used to automatically set the weigths. "));
     59      Parameters.Add(new ValueParameter<DoubleArray>(WeightsParameterName, "The weights used to modify the euclidean distance."));
     60      RegisterParameterEvents();
    5461    }
    5562    #endregion
    5663
    57     public static double GetDistance(IReadOnlyList<double> point1, IReadOnlyList<double> point2, DoubleArray impacts) {
    58       if (point1.Count != point2.Count) throw new ArgumentException("Weighted Euclidean distance not defined on vectors of different length");
    59       if (impacts == null || impacts.Count() != point1.Count) throw new ArgumentException("Weighted Euclidean distance requires a non-null weight vector of length equal to the number of allowed input double variables the compared points");
    60       var sum = 0.0;
    61       var sumW = 0.0;
    62       for (var i = 0; i < point1.Count; i++) {
    63         var d = point1[i] - point2[i];
    64         var w = impacts[i] * impacts[i];
    65         sum += d * d * w;
    66         sumW += w;
     64    public static double GetDistance(IEnumerable<double> point1, IEnumerable<double> point2, IEnumerable<double> weights) {
     65      using (IEnumerator<double> p1Enum = point1.GetEnumerator(), p2Enum = point2.GetEnumerator(), weEnum = weights.GetEnumerator()) {
     66        var sum = 0.0;
     67        var p1Next = p1Enum.MoveNext();
     68        var p2Next = p2Enum.MoveNext();
     69        var weNext = weEnum.MoveNext();
     70        while (p1Next && p2Next && weNext) {
     71          var d = p1Enum.Current - p2Enum.Current;
     72          var w = weEnum.Current;
     73          sum += d * d * w * w;
     74          p1Next = p1Enum.MoveNext();
     75          p2Next = p2Enum.MoveNext();
     76          weNext = weEnum.MoveNext();
     77        }
     78        if (weNext) throw new ArgumentException("Weighted Euclidean distance requires a non-null weight vector of length equal to the number of allowed input double variables the compared points");
     79        if (p1Next || p2Next) throw new ArgumentException("Weighted Euclidean distance not defined on vectors of different length");
     80        return Math.Sqrt(sum);
    6781      }
    68       return Math.Sqrt(sum / sumW);
    6982    }
    7083
    71     public override double Get(IEnumerable<double> a, IEnumerable<double> b) {
    72       return GetDistance(a.ToArray(), b.ToArray(), Weights);
     84    public double Get(IEnumerable<double> a, IEnumerable<double> b) {
     85      return GetDistance(a, b, Weights);
     86    }
     87    public IComparer<IEnumerable<double>> GetDistanceComparer(IEnumerable<double> item) {
     88      return new DistanceBase<IEnumerable<double>>.DistanceComparer(item, this);
     89    }
     90    public double Get(object x, object y) {
     91      return Get((IEnumerable<double>) x, (IEnumerable<double>) y);
     92    }
     93    public IComparer GetDistanceComparer(object item) {
     94      return new DistanceBase<IEnumerable<double>>.DistanceComparer((IEnumerable<double>) item, this);
     95    }
     96
     97    private void RegisterParameterEvents() {
     98      WeigthsParameter.ValueChanged += OnWeightsArrayChanged;
     99      WeigthsParameter.Value.ItemChanged += OnWeightChanged;
     100    }
     101    private void OnWeightChanged(object sender, EventArgs<int> e) {
     102      WeigthsParameter.Value.ItemChanged -= OnWeightChanged;
     103      Weights[e.Value] = Math.Max(0, Weights[e.Value]);
     104      WeigthsParameter.Value.ItemChanged -= OnWeightChanged;
     105    }
     106    private void OnWeightsArrayChanged(object sender, EventArgs e) {
     107      for (int i = 0; i < Weights.Length; i++)
     108        Weights[i] = Math.Max(0, Weights[i]);
     109      WeigthsParameter.Value.ItemChanged += OnWeightChanged;
    73110    }
    74111  }
  • branches/Weighted TSNE/3.4/TSNE/TSNEAlgorithm.cs

    r15455 r15479  
    2929using HeuristicLab.Core;
    3030using HeuristicLab.Data;
    31 using HeuristicLab.Encodings.RealVectorEncoding;
    3231using HeuristicLab.Optimization;
    3332using HeuristicLab.Parameters;
     
    8786    #region Parameter properties
    8887    public IFixedValueParameter<DoubleValue> PerplexityParameter {
    89       get { return Parameters[PerplexityParameterName] as IFixedValueParameter<DoubleValue>; }
     88      get { return (IFixedValueParameter<DoubleValue>) Parameters[PerplexityParameterName]; }
    9089    }
    9190    public IFixedValueParameter<PercentValue> ThetaParameter {
    92       get { return Parameters[ThetaParameterName] as IFixedValueParameter<PercentValue>; }
     91      get { return (IFixedValueParameter<PercentValue>) Parameters[ThetaParameterName]; }
    9392    }
    9493    public IFixedValueParameter<IntValue> NewDimensionsParameter {
    95       get { return Parameters[NewDimensionsParameterName] as IFixedValueParameter<IntValue>; }
     94      get { return (IFixedValueParameter<IntValue>) Parameters[NewDimensionsParameterName]; }
    9695    }
    9796    public IConstrainedValueParameter<IDistance<double[]>> DistanceFunctionParameter {
    98       get { return Parameters[DistanceFunctionParameterName] as IConstrainedValueParameter<IDistance<double[]>>; }
     97      get { return (IConstrainedValueParameter<IDistance<double[]>>) Parameters[DistanceFunctionParameterName]; }
    9998    }
    10099    public IFixedValueParameter<IntValue> MaxIterationsParameter {
    101       get { return Parameters[MaxIterationsParameterName] as IFixedValueParameter<IntValue>; }
     100      get { return (IFixedValueParameter<IntValue>) Parameters[MaxIterationsParameterName]; }
    102101    }
    103102    public IFixedValueParameter<IntValue> StopLyingIterationParameter {
    104       get { return Parameters[StopLyingIterationParameterName] as IFixedValueParameter<IntValue>; }
     103      get { return (IFixedValueParameter<IntValue>) Parameters[StopLyingIterationParameterName]; }
    105104    }
    106105    public IFixedValueParameter<IntValue> MomentumSwitchIterationParameter {
    107       get { return Parameters[MomentumSwitchIterationParameterName] as IFixedValueParameter<IntValue>; }
     106      get { return (IFixedValueParameter<IntValue>) Parameters[MomentumSwitchIterationParameterName]; }
    108107    }
    109108    public IFixedValueParameter<DoubleValue> InitialMomentumParameter {
    110       get { return Parameters[InitialMomentumParameterName] as IFixedValueParameter<DoubleValue>; }
     109      get { return (IFixedValueParameter<DoubleValue>) Parameters[InitialMomentumParameterName]; }
    111110    }
    112111    public IFixedValueParameter<DoubleValue> FinalMomentumParameter {
    113       get { return Parameters[FinalMomentumParameterName] as IFixedValueParameter<DoubleValue>; }
     112      get { return (IFixedValueParameter<DoubleValue>) Parameters[FinalMomentumParameterName]; }
    114113    }
    115114    public IFixedValueParameter<DoubleValue> EtaParameter {
    116       get { return Parameters[EtaParameterName] as IFixedValueParameter<DoubleValue>; }
     115      get { return (IFixedValueParameter<DoubleValue>) Parameters[EtaParameterName]; }
    117116    }
    118117    public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter {
    119       get { return Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>; }
     118      get { return (IFixedValueParameter<BoolValue>) Parameters[SetSeedRandomlyParameterName]; }
    120119    }
    121120    public IFixedValueParameter<IntValue> SeedParameter {
    122       get { return Parameters[SeedParameterName] as IFixedValueParameter<IntValue>; }
     121      get { return (IFixedValueParameter<IntValue>) Parameters[SeedParameterName]; }
    123122    }
    124123    public IConstrainedValueParameter<StringValue> ClassesNameParameter {
    125       get { return Parameters[ClassesNameParameterName] as IConstrainedValueParameter<StringValue>; }
     124      get { return (IConstrainedValueParameter<StringValue>) Parameters[ClassesNameParameterName]; }
    126125    }
    127126    public IFixedValueParameter<BoolValue> NormalizationParameter {
    128       get { return Parameters[NormalizationParameterName] as IFixedValueParameter<BoolValue>; }
     127      get { return (IFixedValueParameter<BoolValue>) Parameters[NormalizationParameterName]; }
    129128    }
    130129    public IFixedValueParameter<BoolValue> RandomInitializationParameter {
    131       get { return Parameters[RandomInitializationParameterName] as IFixedValueParameter<BoolValue>; }
     130      get { return (IFixedValueParameter<BoolValue>) Parameters[RandomInitializationParameterName]; }
    132131    }
    133132    public IFixedValueParameter<IntValue> UpdateIntervalParameter {
    134       get { return Parameters[UpdateIntervalParameterName] as IFixedValueParameter<IntValue>; }
     133      get { return (IFixedValueParameter<IntValue>) Parameters[UpdateIntervalParameterName]; }
    135134    }
    136135    #endregion
     
    336335    private void OnColumnsChanged(object sender, EventArgs e) {
    337336      if (Problem == null || Problem.ProblemData == null || Problem.ProblemData.Dataset == null || !Parameters.ContainsKey(DistanceFunctionParameterName)) return;
    338       DistanceFunctionParameter.ValidValues.OfType<WeightedEuclideanDistance>().Single().Weights = new RealVector(Problem.ProblemData.AllowedInputVariables.Select(x => 1.0).ToArray());
     337      DistanceFunctionParameter.ValidValues.OfType<WeightedEuclideanDistance>().Single().Weights = new DoubleArray(Problem.ProblemData.AllowedInputVariables.Select(x => 1.0).ToArray());
    339338    }
    340339
     
    531530
    532531    private static Color ConvertTotalToRgb(double low, double high, double cell) {
     532      var colorGradient = ColorGradient.Colors;
    533533      var range = high - low;
    534       var h = cell / range;
    535       return HsVtoRgb(h * 0.5, 1.0f, 1.0f);
    536     }
    537 
    538     //taken from https://stackoverflow.com/a/17099130
    539     private static Color HsVtoRgb(double hue, double saturation, double value) {
    540       while (hue > 1.0) { hue -= 1.0; }
    541       while (hue < 0.0) { hue += 1.0; }
    542       while (saturation > 1.0) { saturation -= 1.0; }
    543       while (saturation < 0.0) { saturation += 1.0; }
    544       while (value > 1.0) { value -= 1.0; }
    545       while (value < 0.0) { value += 1.0; }
    546       if (hue > 0.999) { hue = 0.999; }
    547       if (hue < 0.001) { hue = 0.001; }
    548       if (saturation > 0.999) { saturation = 0.999; }
    549       if (saturation < 0.001) { return Color.FromArgb((int) (value * 255.0), (int) (value * 255.0), (int) (value * 255.0)); }
    550       if (value > 0.999) { value = 0.999; }
    551       if (value < 0.001) { value = 0.001; }
    552 
    553       var h6 = hue * 6.0;
    554       if (h6.IsAlmost(6.0)) { h6 = 0.0; }
    555       var ihue = (int) h6;
    556       var p = value * (1.0 - saturation);
    557       var q = value * (1.0 - saturation * (h6 - ihue));
    558       var t = value * (1.0 - saturation * (1.0 - (h6 - ihue)));
    559       switch (ihue) {
    560         case 0:
    561           return Color.FromArgb((int) (value * 255), (int) (t * 255), (int) (p * 255));
    562         case 1:
    563           return Color.FromArgb((int) (q * 255), (int) (value * 255), (int) (p * 255));
    564         case 2:
    565           return Color.FromArgb((int) (p * 255), (int) (value * 255), (int) (t * 255));
    566         case 3:
    567           return Color.FromArgb((int) (p * 255), (int) (q * 255), (int) (value * 255));
    568         case 4:
    569           return Color.FromArgb((int) (t * 255), (int) (p * 255), (int) (value * 255));
    570         default:
    571           return Color.FromArgb((int) (value * 255), (int) (p * 255), (int) (q * 255));
    572       }
     534      var h = cell / range * colorGradient.Count;
     535      return colorGradient[(int) h];
    573536    }
    574537    #endregion
  • branches/Weighted TSNE/3.4/TSNE/TSNEStatic.cs

    r15455 r15479  
    257257        }
    258258        var sumP = .0;
    259         for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) sumP += p[i, j];
    260         for (var i = 0; i < data.Length; i++) for (var j = 0; j < data.Length; j++) p[i, j] /= sumP;
     259        for (var i = 0; i < data.Length; i++) {
     260          for (var j = 0; j < data.Length; j++) {
     261            sumP += p[i, j];
     262          }
     263        }
     264        for (var i = 0; i < data.Length; i++) {
     265          for (var j = 0; j < data.Length; j++) {
     266            p[i, j] /= sumP;
     267          }
     268        }
    261269        return p;
    262270      }
     
    555563    public static double[,] Run(T[] data, IDistance<T> distance, IRandom random,
    556564      int newDimensions = 2, double perplexity = 25, int iterations = 1000,
    557       double theta = 0,
    558       int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,
     565      double theta = 0, int stopLyingIter = 0, int momSwitchIter = 0, double momentum = .5,
    559566      double finalMomentum = .8, double eta = 10.0
    560567    ) {
     
    591598        }
    592599      }
    593 
    594600
    595601      // Perform gradient update (with momentum and gains)
  • branches/Weighted TSNE/3.4/TSNE/TSNEUtils.cs

    r15455 r15479  
    6666    /// <param name="comparer">comparer for list elemnts </param>
    6767    /// <returns></returns>
    68     internal static void NthElement<T>(this IList<T> list, int left, int right, int n, IComparer<T> comparer) {
     68    internal static void PartialSort<T>(this IList<T> list, int left, int right, int n, IComparer<T> comparer) {
    6969      while (true) {
    7070        if (left == right) return;
  • branches/Weighted TSNE/3.4/TSNE/VantagePointTree.cs

    r15207 r15479  
    139139      // Partition around the median distance
    140140      var median = (upper + lower) / 2;
    141       items.NthElement(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower]));
     141      items.PartialSort(lower + 1, upper - 1, median, distance.GetDistanceComparer(items[lower]));
    142142
    143143      // Threshold of the new node will be the distance to the median
Note: See TracChangeset for help on using the changeset viewer.