Free cookie consent management tool by TermsFeed Policy Generator

source: branches/PerformanceComparison/HeuristicLab.OptimizationExpertSystem.Common/3.3/KnowledgeCenter.cs @ 13797

Last change on this file since 13797 was 13797, checked in by abeham, 8 years ago

#2457: worked on testing recommendation algorithms through x-validation

File size: 36.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HeuristicLab.Analysis;
23using HeuristicLab.Analysis.SelfOrganizingMaps;
24using HeuristicLab.Collections;
25using HeuristicLab.Common;
26using HeuristicLab.Common.Resources;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.MainForm;
30using HeuristicLab.Optimization;
31using HeuristicLab.Persistence.Default.Xml;
32using HeuristicLab.Problems.DataAnalysis;
33using HeuristicLab.Random;
34using System;
35using System.Collections.Generic;
36using System.Drawing;
37using System.IO;
38using System.Linq;
39using System.Threading;
40using System.Threading.Tasks;
41using RunCreationClient = HeuristicLab.Clients.OKB.RunCreation.RunCreationClient;
42using SingleObjectiveOKBProblem = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBProblem;
43using SingleObjectiveOKBSolution = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBSolution;
44
45namespace HeuristicLab.OptimizationExpertSystem.Common {
46  [Item("Knowledge Center", "Currently in experimental phase, an expert system that makes algorithm suggestions based on fitness landscape analysis features and an optimization knowledge base.")]
47  [Creatable(CreatableAttribute.Categories.TestingAndAnalysis, Priority = 119)]
48  public sealed class KnowledgeCenter : IContent {
49    private bool SuppressEvents { get; set; }
50
51    public string Filename { get; set; }
52
53    public static Image StaticItemImage {
54      get { return VSImageLibrary.Library; }
55    }
56
57    private readonly IntValue maximumEvaluations;
58    public IntValue MaximumEvaluations {
59      get { return maximumEvaluations; }
60    }
61
62    private readonly DoubleValue minimumTarget;
63    public DoubleValue MinimumTarget {
64      get { return minimumTarget; }
65    }
66   
67    private readonly RunCollection instanceRuns;
68    public RunCollection InstanceRuns {
69      get { return instanceRuns; }
70    }
71
72    private readonly RunCollection seededRuns;
73    public RunCollection SeededRuns {
74      get { return seededRuns; }
75    }
76
77    private readonly RunCollection knowledgeBase;
78    public RunCollection KnowledgeBase {
79      get { return knowledgeBase; }
80    }
81
82    private readonly SingleObjectiveOKBProblem problem;
83    public SingleObjectiveOKBProblem Problem {
84      get { return problem; }
85    }
86
87    private readonly ItemList<IAlgorithm> algorithmInstances;
88    private readonly ReadOnlyItemList<IAlgorithm> readonlyAlgorithmInstances;
89    public ReadOnlyItemList<IAlgorithm> AlgorithmInstances {
90      get { return readonlyAlgorithmInstances; }
91    }
92
93    private readonly RunCollection problemInstances;
94    public RunCollection ProblemInstances {
95      get { return problemInstances; }
96    }
97
98    private IRecommendationModel recommendationModel;
99    public IRecommendationModel RecommendationModel {
100      get { return recommendationModel; }
101      set {
102        if (recommendationModel == value) return;
103        recommendationModel = value;
104        OnRecommenderModelChanged();
105      }
106    }
107
108    private readonly CheckedItemList<IScope> solutionSeedingPool;
109    public CheckedItemList<IScope> SolutionSeedingPool {
110      get { return solutionSeedingPool; }
111    }
112
113    private readonly EnumValue<SeedingStrategyTypes> seedingStrategy;
114    public EnumValue<SeedingStrategyTypes> SeedingStrategy {
115      get { return seedingStrategy; }
116    }
117   
118    private BidirectionalLookup<long, IRun> algorithmId2RunMapping;
119    private BidirectionalDictionary<long, IAlgorithm> algorithmId2AlgorithmInstanceMapping;
120    private BidirectionalDictionary<long, IRun> problemId2ProblemInstanceMapping;
121   
122    public bool Maximization {
123      get { return Problem != null && Problem.ProblemId >= 0 && ((IValueParameter<BoolValue>)Problem.MaximizationParameter).Value.Value; }
124    }
125
126    public KnowledgeCenter() {
127      maximumEvaluations = new IntValue(10000);
128      minimumTarget = new DoubleValue(0.05);
129      instanceRuns = new RunCollection();
130      seededRuns = new RunCollection();
131      knowledgeBase = new RunCollection();
132      algorithmInstances = new ItemList<IAlgorithm>();
133      readonlyAlgorithmInstances = algorithmInstances.AsReadOnly();
134      problemInstances = new RunCollection();
135      recommendationModel = FixedRankModel.GetEmpty();
136      problem = new SingleObjectiveOKBProblem();
137      algorithmId2RunMapping = new BidirectionalLookup<long, IRun>();
138      algorithmId2AlgorithmInstanceMapping = new BidirectionalDictionary<long, IAlgorithm>();
139      problemId2ProblemInstanceMapping = new BidirectionalDictionary<long, IRun>();
140      solutionSeedingPool = new CheckedItemList<IScope>();
141      seedingStrategy = new EnumValue<SeedingStrategyTypes>(SeedingStrategyTypes.NoSeeding);
142      RegisterEventHandlers();
143    }
144
145    private void ProblemOnProblemChanged(object sender, EventArgs eventArgs) {
146      // TODO: Potentially, knowledge base has to be re-downloaded
147    }
148
149    private void RegisterEventHandlers() {
150      maximumEvaluations.ValueChanged += MaximumEvaluationsOnValueChanged;
151      minimumTarget.ValueChanged += MinimumTargetOnValueChanged;
152      problem.ProblemChanged += ProblemOnProblemChanged;
153      problem.Solutions.ItemsAdded += ProblemSolutionsChanged;
154      problem.Solutions.ItemsReplaced += ProblemSolutionsChanged;
155      problem.Solutions.ItemsRemoved += ProblemSolutionsChanged;
156      problem.Solutions.CollectionReset += ProblemSolutionsChanged;
157      instanceRuns.CollectionReset += InformationChanged;
158      instanceRuns.ItemsAdded += InformationChanged;
159      instanceRuns.ItemsRemoved += InformationChanged;
160      instanceRuns.Reset += InformationChanged;
161      instanceRuns.UpdateOfRunsInProgressChanged += InformationChanged;
162      knowledgeBase.CollectionReset += InformationChanged;
163      knowledgeBase.ItemsAdded += InformationChanged;
164      knowledgeBase.ItemsRemoved += InformationChanged;
165    }
166
167    private void MaximumEvaluationsOnValueChanged(object sender, EventArgs eventArgs) {
168
169    }
170
171    private void MinimumTargetOnValueChanged(object sender, EventArgs e) {
172
173    }
174
175    private void ProblemSolutionsChanged(object sender, EventArgs e) {
176      foreach (var sol in Problem.Solutions.Select(x => x.Solution).OfType<IScope>()) {
177        if (!SolutionSeedingPool.Contains(sol))
178          SolutionSeedingPool.Add(sol, false);
179      }
180    }
181
182    private void InformationChanged(object sender, EventArgs e) {
183      var runCollection = sender as RunCollection;
184      if (runCollection != null && runCollection.UpdateOfRunsInProgress) return;
185    }
186   
187    public bool IsCurrentInstance(IRun run) {
188      if (!problemId2ProblemInstanceMapping.ContainsSecond(run)) return false;
189      return problemId2ProblemInstanceMapping.GetBySecond(run) == Problem.ProblemId;
190    }
191
192    public void UpdateInstanceProjection(string[] characteristics) {
193      if (characteristics.Length == 0) return;
194
195      var instances = GetProblemCharacteristics(characteristics);
196
197      var key2Idx = new BidirectionalDictionary<IRun, int>();
198      foreach (var kvp in instances.Select((k, i) => new { Index = i, Key = k.Key }))
199        key2Idx.Add(kvp.Key, kvp.Index);
200
201      #region MDS
202      Func<double[], double[], double> euclid = (a, b) => Math.Sqrt(a.Zip(b, (x, y) => (x - y)).Sum(x => x * x));
203      var num = instances.Count;
204      var matrix = new DoubleMatrix(num, num);
205      for (var i = 0; i < num - 1; i++) {
206        for (var j = i + 1; j < num; j++) {
207          matrix[i, j] = matrix[j, i] = euclid(instances[key2Idx.GetBySecond(i)], instances[key2Idx.GetBySecond(j)]);
208        }
209      }
210
211      var coords = MultidimensionalScaling.KruskalShepard(matrix);
212      #endregion
213      #region PCA
214      double[,] v = null;
215      var ds = new double[instances.Count, characteristics.Length];
216      if (characteristics.Length > 1) {
217        foreach (var instance in instances) {
218          var arr = instance.Value;
219          for (var feature = 0; feature < arr.Length; feature++)
220            ds[key2Idx.GetByFirst(instance.Key), feature] = arr[feature];
221        }
222
223        int info;
224        double[] s2;
225        alglib.pcabuildbasis(ds, ds.GetLength(0), ds.GetLength(1), out info, out s2, out v);
226      }
227      #endregion
228      #region SOM
229      var features = new DoubleMatrix(characteristics.Length, instances.Count);
230      foreach (var instance in instances) {
231        var arr = instance.Value;
232        for (var feature = 0; feature < arr.Length; feature++)
233          features[feature, key2Idx.GetByFirst(instance.Key)] = arr[feature];
234      }
235      var somCoords = SOM.Map(features, new MersenneTwister(42), somSize: 10, learningRadius: 20, iterations: 200, jittering: true);
236      #endregion
237
238      ProblemInstances.UpdateOfRunsInProgress = true;
239      try {
240        foreach (var instance in ProblemInstances) {
241          IItem item;
242          if (v != null) {
243            double x = 0, y = 0;
244            for (var feature = 0; feature < ds.GetLength(1); feature++) {
245              x += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 0];
246              y += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 1];
247            }
248
249            if (instance.Results.TryGetValue("Projection.PCA.X", out item)) {
250              ((DoubleValue)item).Value = x;
251            } else instance.Results.Add("Projection.PCA.X", new DoubleValue(x));
252            if (instance.Results.TryGetValue("Projection.PCA.Y", out item)) {
253              ((DoubleValue)item).Value = y;
254            } else instance.Results.Add("Projection.PCA.Y", new DoubleValue(y));
255          } else {
256            instance.Results.Remove("Projection.PCA.X");
257            instance.Results.Remove("Projection.PCA.Y");
258          }
259
260          if (instance.Results.TryGetValue("Projection.MDS.X", out item)) {
261            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 0];
262          } else instance.Results.Add("Projection.MDS.X", new DoubleValue(coords[key2Idx.GetByFirst(instance), 0]));
263          if (instance.Results.TryGetValue("Projection.MDS.Y", out item)) {
264            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 1];
265          } else instance.Results.Add("Projection.MDS.Y", new DoubleValue(coords[key2Idx.GetByFirst(instance), 1]));
266
267          if (instance.Results.TryGetValue("Projection.SOM.X", out item)) {
268            ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 0];
269          } else instance.Results.Add("Projection.SOM.X", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 0]));
270          if (instance.Results.TryGetValue("Projection.SOM.Y", out item)) {
271            ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 1];
272          } else instance.Results.Add("Projection.SOM.Y", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 1]));
273        }
274      } finally { ProblemInstances.UpdateOfRunsInProgress = false; }
275    }
276
277    private static readonly HashSet<string> InterestingValueNames = new HashSet<string>() {
278      "QualityPerEvaluations", "Problem Name", "Problem Type", "Algorithm Name", "Algorithm Type", "Maximization", "BestKnownQuality"
279    };
280
281    public Task<ResultCollection> StartAlgorithmAsync(int index) {
282      return StartAlgorithmAsync(index, CancellationToken.None);
283    }
284
285    public Task<ResultCollection> StartAlgorithmAsync(int index, CancellationToken cancellation) {
286      var selectedInstance = algorithmInstances[index];
287      var algorithmClone = (IAlgorithm)selectedInstance.Clone();
288      var problemClone = Problem.CloneProblem() as ISingleObjectiveHeuristicOptimizationProblem;
289      if (problemClone == null) throw new InvalidOperationException("Problem is not of type " + typeof(ISingleObjectiveHeuristicOptimizationProblem).FullName);
290      // TODO: It is assumed the problem instance by default is configured using no preexisting solution creator
291      var seedingStrategyLocal = SeedingStrategy.Value;
292      if (seedingStrategyLocal != SeedingStrategyTypes.NoSeeding) {
293        if (!SolutionSeedingPool.CheckedItems.Any()) throw new InvalidOperationException("There are no solutions selected for seeding.");
294        // TODO: It would be necessary to specify the solution creator somewhere (property and GUI)
295        var seedingCreator = problemClone.Operators.OfType<IPreexistingSolutionCreator>().FirstOrDefault();
296        if (seedingCreator == null) throw new InvalidOperationException("The problem does not contain a solution creator that allows seeding.");
297        seedingCreator.PreexistingSolutionsParameter.Value.Replace(SolutionSeedingPool.CheckedItems.Select(x => x.Value));
298        seedingCreator.SampleFromPreexistingParameter.Value.Value = seedingStrategyLocal == SeedingStrategyTypes.SeedBySampling;
299        // TODO: WHY!? WHY??!?
300        ((dynamic)problemClone.SolutionCreatorParameter).Value = (dynamic)seedingCreator;
301      }
302      algorithmClone.Problem = problemClone;
303      algorithmClone.Prepare(true);
304      IParameter stopParam;
305      var monitorStop = true;
306      if (algorithmClone.Parameters.TryGetValue("MaximumEvaluations", out stopParam)) {
307        var maxEvalParam = stopParam as IValueParameter<Data.IntValue>;
308        if (maxEvalParam != null) {
309          maxEvalParam.Value.Value = MaximumEvaluations.Value;
310          monitorStop = false;
311        }
312      }
313
314      // TODO: The following can be simplified when we have async implementation patterns for our algorithms:
315      // TODO: The closures can be removed and replaced with private member methods
316      var waitHandle = new AutoResetEvent(false);
317
318      #region EventHandler closures
319      EventHandler exeStateChanged = (sender, e) => {
320        if (algorithmClone.ExecutionState == ExecutionState.Stopped) {
321          lock (Problem.Solutions) {
322            foreach (var solution in algorithmClone.Results.Where(x => x.Name.ToLower().Contains("solution")).Select(x => x.Value).OfType<IScope>()) {
323              Problem.Solutions.Add(new SingleObjectiveOKBSolution(Problem.ProblemId) {
324                Quality = solution.Variables.ContainsKey(Problem.Problem.Evaluator.QualityParameter.ActualName) ? ((DoubleValue)solution.Variables[Problem.Problem.Evaluator.QualityParameter.ActualName].Value).Value : double.NaN,
325                Solution = (IItem)solution.Clone()
326              });
327            }
328          }
329          if (seedingStrategyLocal == SeedingStrategyTypes.NoSeeding) {
330            lock (InstanceRuns) {
331              InstanceRuns.Add(algorithmClone.Runs.Last());
332            }
333          } else {
334            lock (SeededRuns) {
335              SeededRuns.Add(algorithmClone.Runs.Last());
336            }
337          }
338          waitHandle.Set();
339        }
340      };
341
342      EventHandler<EventArgs<Exception>> exceptionOccurred = (sender, e) => {
343        waitHandle.Set();
344      };
345
346      EventHandler timeChanged = (sender, e) => {
347        IResult evalSolResult;
348        if (!algorithmClone.Results.TryGetValue("EvaluatedSolutions", out evalSolResult) || !(evalSolResult.Value is Data.IntValue)) return;
349        var evalSols = ((Data.IntValue)evalSolResult.Value).Value;
350        if (evalSols >= MaximumEvaluations.Value && algorithmClone.ExecutionState == ExecutionState.Started)
351          algorithmClone.Stop();
352      };
353      #endregion
354
355      algorithmClone.ExecutionStateChanged += exeStateChanged;
356      algorithmClone.ExceptionOccurred += exceptionOccurred;
357      if (monitorStop) algorithmClone.ExecutionTimeChanged += timeChanged;
358
359      return Task.Factory.StartNew(() => {
360        algorithmClone.Start();
361        OnAlgorithmInstanceStarted(algorithmClone);
362        var cancelRequested = false;
363        while (!waitHandle.WaitOne(200)) {
364          if (cancellation.IsCancellationRequested) {
365            cancelRequested = true;
366            break;
367          }
368        }
369        if (cancelRequested) {
370          try { algorithmClone.Stop(); } catch { } // ignore race condition if it is stopped in the meantime
371          waitHandle.WaitOne();
372        }
373        waitHandle.Dispose();
374        return algorithmClone.Results;
375      }, TaskCreationOptions.LongRunning);
376    }
377
378    public ResultCollection StartAlgorithm(int index, CancellationToken cancellation) {
379      var task = StartAlgorithmAsync(index, cancellation);
380      task.Wait(cancellation);
381      return task.Result;
382    }
383
384    public Task UpdateKnowledgeBaseAsync(IProgress progress = null) {
385      if (progress == null) progress = new Progress(string.Empty);
386      progress.Start("Updating Knowledge Base from OKB");
387      OnDownloadStarted(progress);
388      return Task.Factory.StartNew(() => { DoUpdateKnowledgeBase(progress); }, TaskCreationOptions.LongRunning);
389    }
390
391    public void UpdateKnowledgeBase(IProgress progress = null) {
392      UpdateKnowledgeBaseAsync(progress).Wait();
393    }
394
395    private void DoUpdateKnowledgeBase(IProgress progress) {
396      var queryClient = Clients.OKB.Query.QueryClient.Instance;
397      var adminClient = Clients.OKB.Administration.AdministrationClient.Instance;
398      try {
399        progress.Status = "Connecting to OKB...";
400        progress.ProgressValue = 0;
401        // FIXME: How to tell if refresh is necessary?
402        var refreshTasks = new[] {
403          Task.Factory.StartNew(() => queryClient.Refresh()),
404          Task.Factory.StartNew(() => adminClient.Refresh())
405        };
406        Task.WaitAll(refreshTasks);
407
408        var probInstance = adminClient.Problems.SingleOrDefault(x => x.Id == Problem.ProblemId);
409        if (probInstance == null) throw new InvalidOperationException("The chosen problem instance cannot be found in the OKB.");
410        var probClassId = probInstance.ProblemClassId;
411
412        var problemClassFilter = (Clients.OKB.Query.StringComparisonAvailableValuesFilter)queryClient.Filters.Single(x => x.Label == "Problem Class Name");
413        problemClassFilter.Value = adminClient.ProblemClasses.Single(x => x.Id == probClassId).Name;
414
415        problemId2ProblemInstanceMapping.Clear();
416        progress.Status = "Downloading problem instances...";
417        progress.ProgressValue = 0;
418        int[] p = { 0 };
419        ProblemInstances.UpdateOfRunsInProgress = true;
420        ProblemInstances.Clear();
421        var characteristics = new HashSet<string>();
422        var totalProblems = adminClient.Problems.Count(x => x.ProblemClassId == probClassId);
423        Parallel.ForEach(adminClient.Problems.Where(x => x.ProblemClassId == probClassId), new ParallelOptions { MaxDegreeOfParallelism = 8 }, (pInst) => {
424          var charas = new List<string>();
425          IRun probRun = null;
426          var data = Clients.OKB.Administration.AdministrationClient.GetProblemData(pInst.Id);
427          if (data != null) {
428            using (var stream = new MemoryStream(data)) {
429              try {
430                var prob = (IProblem)XmlParser.Deserialize<IContent>(stream);
431                probRun = new Run() { Name = prob.Name };
432                prob.CollectParameterValues(probRun.Parameters);
433                probRun.Parameters["Problem Name"] = new StringValue(prob.Name);
434                probRun.Parameters["Problem Type"] = new StringValue(prob.GetType().Name);
435                foreach (var v in RunCreationClient.Instance.GetCharacteristicValues(pInst.Id)) {
436                  probRun.Results.Add("Characteristic." + v.Name, RunCreationClient.Instance.ConvertToItem(v));
437                  charas.Add("Characteristic." + v.Name);
438                }
439              } catch { }
440              stream.Close();
441            }
442            if (probRun != null) {
443              lock (characteristics) {
444                problemId2ProblemInstanceMapping.Add(pInst.Id, probRun);
445                ProblemInstances.Add(probRun);
446                progress.Status = string.Format("Downloaded problem {0} (okb-id: {1})....", pInst.Name, pInst.Id);
447                p[0]++;
448                progress.ProgressValue = p[0] / (double)totalProblems;
449                foreach (var c in charas) characteristics.Add(c);
450              }
451            }
452          }
453        });
454
455        algorithmId2AlgorithmInstanceMapping.Clear();
456        progress.Status = "Downloading algorithm instances...";
457        progress.ProgressValue = 0;
458        p[0] = 0;
459        Parallel.ForEach(adminClient.Algorithms, new ParallelOptions { MaxDegreeOfParallelism = 8 }, (algInst) => {
460          IAlgorithm alg = null;
461          var data = Clients.OKB.Administration.AdministrationClient.GetAlgorithmData(algInst.Id);
462          if (data != null) {
463            using (var stream = new MemoryStream(data)) {
464              try {
465                alg = (IAlgorithm)XmlParser.Deserialize<IContent>(stream);
466              } catch { }
467              stream.Close();
468            }
469            if (alg != null) {
470              lock (algorithmId2AlgorithmInstanceMapping) {
471                algorithmInstances.Add(alg);
472                algorithmId2AlgorithmInstanceMapping.Add(algInst.Id, alg);
473                progress.Status = string.Format("Downloaded algorithm {0} (okb-id: {1})...", algInst.Name, algInst.Id);
474                p[0]++;
475                progress.ProgressValue = p[0] / (double)adminClient.Algorithms.Count;
476              }
477            }
478          }
479        });
480
481        var interestingValues = queryClient.ValueNames.Where(x => InterestingValueNames.Contains(x.Name)).ToList();
482
483        progress.Status = "Downloading runs...";
484        progress.ProgressValue = 0;
485        p[0] = 0;
486        var count = queryClient.GetNumberOfRuns(problemClassFilter);
487        if (count == 0) return;
488       
489        var runList = new List<IRun>();
490        var runIds = queryClient.GetRunIds(problemClassFilter).ToList();
491        var batches = runIds.Select((v, i) => new { Idx = i, Val = v }).GroupBy(x => x.Idx / 500, x => x.Val);
492        Parallel.ForEach(batches.Select(x => x.ToList()), new ParallelOptions { MaxDegreeOfParallelism = 4 }, (batch) => {
493          var okbRuns = queryClient.GetRunsWithValues(batch, true, interestingValues);
494          var hlRuns = okbRuns.AsParallel().Select(x => new { AlgorithmId = x.Algorithm.Id, Run = queryClient.ConvertToOptimizationRun(x) }).ToList();
495          lock (runList) {
496            foreach (var r in hlRuns) {
497              algorithmId2RunMapping.Add(r.AlgorithmId, r.Run);
498              runList.Add(r.Run);
499            }
500            progress.Status = string.Format("Downloaded runs {0} to {1} of {2}...", p[0], p[0] + batch.Count, count);
501            p[0] += batch.Count;
502            progress.ProgressValue = p[0] / (double)count;
503          }
504        });
505
506        progress.Status = "Finishing...";
507       
508        try {
509          KnowledgeBase.UpdateOfRunsInProgress = true;
510          KnowledgeBase.Clear();
511          KnowledgeBase.AddRange(runList);
512        } finally { KnowledgeBase.UpdateOfRunsInProgress = false; }
513
514        var algInstRunDict = runList.Where(x => x.Parameters.ContainsKey("Problem Name") && x.Parameters["Problem Name"] is StringValue)
515                                          .GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value)
516                                          .ToDictionary(x => x.Key, x => x.GroupBy(y => ((StringValue)y.Parameters["Algorithm Name"]).Value)
517                                                                                  .ToDictionary(y => y.Key, y => y.ToList()));
518
519        // set best-known quality to best-found in case it is not known
520        foreach (var kvp in algInstRunDict) {
521          var prob = ProblemInstances.SingleOrDefault(x => ((StringValue)x.Parameters["Problem Name"]).Value == kvp.Key);
522          if (prob == null) continue;
523          var maximization = ((BoolValue)prob.Parameters["Maximization"]).Value;
524
525          IItem bkParam;
526          if (!prob.Parameters.TryGetValue("BestKnownQuality", out bkParam) || !(bkParam is DoubleValue)) {
527            var list = kvp.Value.SelectMany(x => x.Value)
528              .Where(x => x.Results.ContainsKey("QualityPerEvaluations"))
529              .Select(x => ((IndexedDataTable<double>)x.Results["QualityPerEvaluations"]).Rows.First().Values.Last().Item2);
530            if (!list.Any()) continue;
531            bkParam = new DoubleValue(maximization ? list.Max() : list.Min());
532            prob.Parameters["BestKnownQuality"] = bkParam;
533          }
534        }
535
536        // add algorithm instance ranks as features to the problem instances for a range of targets
537        foreach (var target in new[] {0, 0.01, 0.05, 0.1, 0.2, 0.5}) {
538          var cls = GetPerformanceClasses(target, 5);
539          foreach (var kvp in cls) {
540            var prob = kvp.Key;
541            foreach (var kvp2 in kvp.Value) {
542              var resultName = "Rank." + algorithmId2AlgorithmInstanceMapping.GetByFirst(kvp2.Key) + "@" + (target * 100) + "%";
543              prob.Results[resultName] = new IntValue(kvp2.Value);
544            }
545          }
546        }
547      } finally { progress.Finish(); ProblemInstances.UpdateOfRunsInProgress = false; }
548      UpdateInstanceProjection(ProblemInstances.ResultNames.Where(x => x.StartsWith("Characteristic.")).ToArray());
549    }
550
551    public static double[][] GetFeatures(IRun[] problemInstances, string[] characteristics, double[] medianValues = null) {
552      var instances = new double[problemInstances.Length][];
553      for (var p = 0; p < problemInstances.Length; p++) {
554        instances[p] = new double[characteristics.Length];
555        for (var f = 0; f < characteristics.Length; f++) {
556          IItem item;
557          if (problemInstances[p].Results.TryGetValue(characteristics[f], out item)) {
558            double val = 0;
559            var dItem = item as DoubleValue;
560            if (dItem != null) {
561              val = dItem.Value;
562            } else {
563              var iItem = item as IntValue;
564              if (iItem != null) val = iItem.Value;
565              else val = double.NaN;
566            }
567            if (double.IsNaN(val) && medianValues != null)
568              instances[p][f] = medianValues[f];
569            else instances[p][f] = val;
570          } else instances[p][f] = medianValues != null ? medianValues[f] : double.NaN;
571        }
572      }
573      return instances;
574    }
575
576    public static double[] GetMedianValues(IRun[] problemInstances, string[] characteristics) {
577      var values = new List<double>[characteristics.Length];
578      foreach (var problemInstance in problemInstances) {
579        for (var f = 0; f < characteristics.Length; f++) {
580          if (values[f] == null) values[f] = new List<double>(problemInstances.Length);
581          IItem item;
582          if (problemInstance.Results.TryGetValue(characteristics[f], out item)) {
583            var dItem = item as DoubleValue;
584            if (dItem != null) values[f].Add(dItem.Value);
585            else {
586              var iItem = item as IntValue;
587              if (iItem != null) values[f].Add(iItem.Value);
588            }
589          }
590        }
591      }
592      return values.Select(x => x.Count == 0 ? 0.0 : x.Median()).ToArray();
593    }
594
595    public Dictionary<IRun, double[]> GetProblemCharacteristics(string[] characteristics) {
596      var map = ProblemInstances.Select((v, i) => new { Index = i, ProblemInstance = v }).ToDictionary(x => x.Index, x => x.ProblemInstance);
597      var instances = GetFeatures(ProblemInstances.ToArray(), characteristics);
598      var median = GetMedianValues(ProblemInstances.ToArray(), characteristics);
599
600      var allValues = instances.Select(x => x.Select((f, i) => new { Idx = i, Val = double.IsNaN(f) ? median[i] : f }).ToList())
601        .SelectMany(x => x)
602        .GroupBy(x => x.Idx, x => x.Val)
603        .OrderBy(x => x.Key).ToList();
604      var avg = allValues.Select(x => x.Average()).ToList();
605      var stdev = allValues.Select(x => x.StandardDeviation()).ToList();
606
607      // normalize characteristic values by transforming them to their z-score
608      foreach (var features in instances) {
609        for (var i = 0; i < features.Length; i++) {
610          if (double.IsNaN(features[i])) features[i] = median[i];
611          if (stdev[i] > 0) features[i] = (features[i] - avg[i]) / stdev[i];
612        }
613      }
614      return instances.Select((v, i) => new { ProblemInstance = map[i], Features = v }).ToDictionary(x => x.ProblemInstance, x => x.Features);
615    }
616
617    public Dictionary<IAlgorithm, double> GetAlgorithmPerformance(IRun problemInstance) {
618      if (!problemInstance.Parameters.ContainsKey("BestKnownQuality")) return new Dictionary<IAlgorithm, double>();
619      var target = GetTarget(((DoubleValue)problemInstance.Parameters["BestKnownQuality"]).Value, MinimumTarget.Value, Maximization);
620      return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value)
621                          .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
622                          .ToDictionary(x => x.Key, x => ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", target, Maximization).ExpectedRuntime);
623    }
624
625    public Dictionary<IAlgorithm, List<IRun>> GetAlgorithmRuns(IRun problemInstance) {
626      return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value)
627                          .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
628                          .ToDictionary(x => x.Key, x => x.ToList());
629    }
630
631    public Dictionary<IAlgorithm, List<IRun>> GetKnowledgeBaseByAlgorithm() {
632      return KnowledgeBase.GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
633                          .ToDictionary(x => x.Key, x => x.ToList());
634    }
635
636    public IEnumerable<IRegressionProblem> GetRegressionProblemPerAlgorithmInstance(double target, string[] characteristics) {
637      if (Problem == null) yield break;
638      var features = GetProblemCharacteristics(characteristics);
639      // TODO: knowledgebase only stores problem name as a string
640      // this doesn't work if there are two equally named problem instances
641      var problemMap = ProblemInstances.Select(x => new { Key = ((StringValue)x.Parameters["Problem Name"]).Value, Value = x })
642                                       .ToDictionary(x => x.Key, x => x.Value);
643      foreach (var relevantRuns in knowledgeBase.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single())) {
644        var problemRuns = relevantRuns.GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value).ToList();
645        var ds = new ModifiableDataset();
646        ds.AddVariable("Problem Name", new List<string>());
647        foreach (var pc in characteristics)
648          ds.AddVariable(pc, new List<double>());
649        ds.AddVariable("ERT", new List<double>());
650        foreach (var pr in problemRuns) {
651          var prob = problemMap[pr.Key];
652          var f = features[prob];
653          var max = ((BoolValue)prob.Parameters["Maximization"]).Value;
654          var bkq = ((DoubleValue)prob.Parameters["BestKnownQuality"]).Value;
655          var ert = ExpectedRuntimeHelper.CalculateErt(pr.ToList(), "QualityPerEvaluations", GetTarget(bkq, target, max), max).ExpectedRuntime;
656          if (double.IsNaN(ert)) ert = int.MaxValue;
657          ds.AddRow(new object[] { pr.Key }.Concat(f.Cast<object>()).Concat(new object[] { ert }));
658        }
659        var datAnalysisData = new RegressionProblemData(ds, characteristics, "ERT");
660        var result = new RegressionProblem() {
661          Name = algorithmId2AlgorithmInstanceMapping.GetByFirst(relevantRuns.Key).Name
662        };
663        result.ProblemDataParameter.Value = datAnalysisData;
664        yield return result;
665      }
666    }
667
668    public IEnumerable<IClassificationProblem> GetClassificationProblemPerAlgorithmInstance(double target, string[] characteristics) {
669      if (Problem == null) yield break;
670
671      var classes = GetPerformanceClasses(target, 5);
672      var features = GetProblemCharacteristics(characteristics);
673
674      foreach (var alg in AlgorithmInstances) {
675        var ds = new ModifiableDataset();
676        ds.AddVariable("Problem Name", new List<string>());
677        foreach (var pc in characteristics)
678          ds.AddVariable(pc, new List<double>());
679        ds.AddVariable("Class", new List<double>());
680
681        foreach (var c in classes) {
682          int cls;
683          if (c.Value.TryGetValue(algorithmId2AlgorithmInstanceMapping.GetBySecond(alg), out cls)) {
684            ds.AddRow(new object[] { ((StringValue)c.Key.Parameters["Problem Name"]).Value }
685              .Concat(features[c.Key].Cast<object>()).Concat(new object[] { cls }));
686          }
687        }
688        var datAnalysisData = new ClassificationProblemData(ds, characteristics, "Class");
689        var result = new ClassificationProblem() {
690          Name = alg.Name
691        };
692        result.ProblemDataParameter.Value = datAnalysisData;
693        yield return result;
694      }
695    }
696
697    public Dictionary<IRun, double> GetProblemDistances(string[] characteristics) {
698      var result = new Dictionary<IRun, double>();
699      var currentInstance = problemId2ProblemInstanceMapping.GetByFirst(Problem.ProblemId);
700      var features = GetProblemCharacteristics(characteristics);
701      var cF = features[currentInstance];
702      foreach (var b in ProblemInstances) {
703        if (b == currentInstance) continue;
704        var sum = features[b].Select((t, f) => (cF[f] - t) * (cF[f] - t)).Sum();
705        result[b] = Math.Sqrt(sum);
706      }
707      return result;
708    }
709
710    public Dictionary<IRun, Dictionary<long, int>> GetPerformanceClasses(double target, int nClasses) {
711      var result = new Dictionary<IRun, Dictionary<long, int>>();
712      var problemMap = ProblemInstances.Select(x => new { Key = ((StringValue)x.Parameters["Problem Name"]).Value, Value = x })
713                                       .ToDictionary(x => x.Key, x => x.Value);
714      foreach (var pr in KnowledgeBase.GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value).ToList()) {
715        var bkq = ((DoubleValue)problemMap[pr.Key].Parameters["BestKnownQuality"]).Value;
716        var max = ((BoolValue)problemMap[pr.Key].Parameters["Maximization"]).Value;
717
718        result[problemMap[pr.Key]] = new Dictionary<long, int>();
719
720        var values = pr.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single())
721                       .ToDictionary(x => x.Key, x => ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", GetTarget(bkq, target, max), max).ExpectedRuntime);
722        var ranks = ClusteringHelper<long>.Cluster(nClasses, values, x => double.IsNaN(x.Value))
723          .GetByCluster().ToList();
724        foreach (var c in ranks) {
725          foreach (var a in c.Value)
726            result[problemMap[pr.Key]][a.Key] = c.Key;
727        }
728      }
729      return result;
730    }
731
732    public double GetTarget(double bestKnownQuality, double target, bool maximization) {
733      return bestKnownQuality * (maximization ? (1 - target) : (1 + target));
734    }
735
736    public event EventHandler<EventArgs<IProgress>> DownloadStarted;
737    private void OnDownloadStarted(IProgress progress) {
738      var handler = DownloadStarted;
739      if (handler != null) handler(this, new EventArgs<IProgress>(progress));
740    }
741
742    public event EventHandler<EventArgs<IAlgorithm>> AlgorithmInstanceStarted;
743    private void OnAlgorithmInstanceStarted(IAlgorithm instance) {
744      var handler = AlgorithmInstanceStarted;
745      if (handler != null) handler(this, new EventArgs<IAlgorithm>(instance));
746    }
747
748    public event EventHandler RecommendationModelChanged;
749    private void OnRecommenderModelChanged() {
750      var handler = RecommendationModelChanged;
751      if (handler != null) handler(this, EventArgs.Empty);
752    }
753
754    public IEnumerable<KeyValuePair<IAlgorithm, double>> GetAlgorithmInstanceRanking() {
755      return RecommendationModel.GetRanking(ProblemInstances.Single(IsCurrentInstance));
756    }
757  }
758}
Note: See TracBrowser for help on using the repository browser.