Changeset 13878


Ignore:
Timestamp:
06/07/16 16:29:59 (3 years ago)
Author:
abeham
Message:

#2457: added standardization of features for recommendation and using log10 of the expected runtime for clustering

Location:
branches/PerformanceComparison
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/PerformanceComparison/HeuristicLab.OptimizationExpertSystem.Common/3.3/KnowledgeCenter.cs

    r13809 r13878  
    657657    }
    658658
     659    public static double[][] GetFeaturesStandardized(IRun[] problemInstances, string[] characteristics, out double[] means, out double[] sdevs, double[] medianValues = null) {
     660      var instances = new double[problemInstances.Length][];
     661      var columns = new List<double>[characteristics.Length];
     662      for (var p = 0; p < problemInstances.Length; p++) {
     663        instances[p] = new double[characteristics.Length];
     664        for (var f = 0; f < characteristics.Length; f++) {
     665          if (columns[f] == null) {
     666            columns[f] = new List<double>(problemInstances.Length);
     667          }
     668          IItem item;
     669          if (problemInstances[p].Results.TryGetValue(characteristics[f], out item)) {
     670            double val = 0;
     671            var dItem = item as DoubleValue;
     672            if (dItem != null) {
     673              val = dItem.Value;
     674            } else {
     675              var iItem = item as IntValue;
     676              if (iItem != null) val = iItem.Value;
     677              else val = double.NaN;
     678            }
     679            if (double.IsNaN(val) && medianValues != null)
     680              instances[p][f] = medianValues[f];
     681            else instances[p][f] = val;
     682            columns[f].Add(instances[p][f]);
     683          } else instances[p][f] = medianValues != null ? medianValues[f] : double.NaN;
     684        }
     685      }
     686
     687      means = new double[characteristics.Length];
     688      sdevs = new double[characteristics.Length];
     689      for (var f = 0; f < characteristics.Length; f++) {
     690        var mean = columns[f].Average();
     691        var dev = columns[f].StandardDeviation();
     692        means[f] = mean;
     693        sdevs[f] = dev;
     694        for (var p = 0; p < problemInstances.Length; p++) {
     695          if (dev.IsAlmost(0)) instances[p][f] = 0;
     696          else instances[p][f] = (instances[p][f] - mean) / dev;
     697        }
     698      }
     699
     700      return instances;
     701    }
     702
    659703    public static double[] GetMedianValues(IRun[] problemInstances, string[] characteristics) {
    660704      var values = new List<double>[characteristics.Length];
     
    704748                          .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
    705749                          .ToDictionary(x => x.Key, x => ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", target, Maximization).ExpectedRuntime);
     750    }
     751
     752    public Dictionary<IAlgorithm, double> GetAlgorithmPerformanceLog10(IRun problemInstance) {
     753      if (!problemInstance.Parameters.ContainsKey("BestKnownQuality")) return new Dictionary<IAlgorithm, double>();
     754      var target = GetTarget(((DoubleValue)problemInstance.Parameters["BestKnownQuality"]).Value, MinimumTarget.Value, Maximization);
     755      return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value)
     756                          .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
     757                          .ToDictionary(x => x.Key, x => Math.Log10(ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", target, Maximization).ExpectedRuntime));
    706758    }
    707759
     
    802854
    803855        var values = pr.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single())
    804                        .ToDictionary(x => x.Key, x => ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", GetTarget(bkq, target, max), max).ExpectedRuntime);
     856                       .ToDictionary(x => x.Key, x => Math.Log10(ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", GetTarget(bkq, target, max), max).ExpectedRuntime));
    805857        var ranks = ClusteringHelper<long>.Cluster(nClasses, values, x => double.IsInfinity(x.Value))
    806858          .GetByCluster().ToList();
  • branches/PerformanceComparison/HeuristicLab.OptimizationExpertSystem.Common/3.3/Recommenders/KNearestNeighborModel.cs

    r13803 r13878  
    2121
    2222using HeuristicLab.Collections;
     23using HeuristicLab.Common;
    2324using HeuristicLab.Optimization;
     25using System;
    2426using System.Collections.Generic;
    2527using System.Linq;
     
    4648
    4749    public IEnumerable<KeyValuePair<IAlgorithm, double>> GetRanking(IRun problemInstance) {
    48       var features = KnowledgeCenter.GetFeatures(performance.Keys.OrderBy(problemInstanceMap.GetBySecond).ToArray(), characteristics, medianValues);
     50      double[] means, sdevs;
     51      var features = KnowledgeCenter.GetFeaturesStandardized(performance.Keys.OrderBy(problemInstanceMap.GetBySecond).ToArray(), characteristics, out means, out sdevs, medianValues);
    4952      var feature = KnowledgeCenter.GetFeatures(new [] { problemInstance }, characteristics, medianValues)[0];
     53      for (var f = 0; f < feature.Length; f++) {
     54        if (sdevs[f].IsAlmost(0)) feature[f] = 0;
     55        else feature[f] = (feature[f] - means[f]) / sdevs[f];
     56      }
    5057      var nearestK = features.Select((f, i) => new { ProblemInstanceIndex = i, Feature = f })
    5158                             .OrderBy(x => x.Feature.Select((f, i) => (f - feature[i]) * (f - feature[i])).Sum());
     
    6067       
    6168        foreach (var p in perfs) {
    62           var ert = p.Value;
     69          var ert = Math.Pow(10, p.Value);
    6370          Performance perf;
    6471          if (!performances.TryGetValue(p.Key, out perf)) {
  • branches/PerformanceComparison/HeuristicLab.OptimizationExpertSystem.Common/3.3/Recommenders/KNearestNeighborRecommender.cs

    r13791 r13878  
    4949
    5050    public IRecommendationModel TrainModel(IRun[] problemInstances, KnowledgeCenter kc, string[] characteristics) {
    51       var perfData = problemInstances.Select(pi => new { ProblemInstance = pi, Performance = kc.GetAlgorithmPerformance(pi) })
     51      var perfData = problemInstances.Select(pi => new { ProblemInstance = pi, Performance = kc.GetAlgorithmPerformanceLog10(pi) })
    5252                                     .ToDictionary(x => x.ProblemInstance, x => x.Performance);
    5353      return new KNearestNeighborModel(KParameter.Value.Value, perfData, characteristics);
  • branches/PerformanceComparison/HeuristicLab.OptimizationExpertSystem/3.3/Views/PerformanceModelingView.cs

    r13803 r13878  
    186186        var count = 0;
    187187        foreach (var pi in trainingSet) {
    188           var observed = Content.GetAlgorithmPerformance(pi);
     188          var observed = Content.GetAlgorithmPerformanceLog10(pi);
    189189          if (observed.Count == 0) continue;
    190190          progress.Status = pi.Name + "...";
    191191          var model = recommender.TrainModel(trainingSet.Where(x => x != pi).ToArray(), Content, features);
    192           var predictedTopN = model.GetRanking(pi).Take(topN).ToDictionary(x => x.Key, x => x.Value);
    193           var predicted = model.GetRanking(pi).ToDictionary(x => x.Key, x => x.Value);
     192          var predictedTopN = model.GetRanking(pi).Take(topN).ToDictionary(x => x.Key, x => Math.Log10(x.Value));
     193          var predicted = model.GetRanking(pi).ToDictionary(x => x.Key, x => Math.Log10(x.Value));
    194194          var ae = AbsoluteError(observed, predictedTopN);
    195195          if (!double.IsNaN(ae)) {
     
    240240        double actual;
    241241        if (!performance.TryGetValue(tuple.Key, out actual)) continue;
    242         if (double.IsInfinity(actual)) actual = int.MaxValue;
    243         error += Math.Abs(actual - tuple.Value);
     242        if (double.IsInfinity(actual)) actual = Math.Log10(int.MaxValue);
     243        error += Math.Abs(Math.Pow(10, actual) - Math.Pow(10, tuple.Value));
    244244        count++;
    245245      }
     
    253253        double actual;
    254254        if (!performance.TryGetValue(tuple.Key, out actual)) continue;
    255         if (double.IsInfinity(actual)) actual = int.MaxValue;
    256         error += Math.Abs(Math.Log10(actual) - Math.Log10(tuple.Value));
     255        if (double.IsInfinity(actual)) actual = Math.Log10(int.MaxValue);
     256        error += Math.Abs(actual - tuple.Value);
    257257        count++;
    258258      }
Note: See TracChangeset for help on using the changeset viewer.