Free cookie consent management tool by TermsFeed Policy Generator

source: branches/1614_GeneralizedQAP/HeuristicLab.OptimizationExpertSystem.Common/3.3/KnowledgeCenter.cs

Last change on this file was 16728, checked in by abeham, 6 years ago

#1614: updated to new persistence and .NET 4.6.1

File size: 44.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Drawing;
25using System.IO;
26using System.Linq;
27using System.Threading;
28using System.Threading.Tasks;
29using HeuristicLab.Algorithms.DataAnalysis;
30using HeuristicLab.Analysis;
31using HeuristicLab.Analysis.SelfOrganizingMaps;
32using HeuristicLab.Collections;
33using HeuristicLab.Common;
34using HeuristicLab.Common.Resources;
35using HeuristicLab.Core;
36using HeuristicLab.Data;
37using HeuristicLab.MainForm;
38using HeuristicLab.Optimization;
39using HeuristicLab.Persistence.Default.Xml;
40using HeuristicLab.Problems.DataAnalysis;
41using HeuristicLab.Random;
42using Algorithm = HeuristicLab.Clients.OKB.Administration.Algorithm;
43using Problem = HeuristicLab.Clients.OKB.Administration.Problem;
44using RunCreationClient = HeuristicLab.Clients.OKB.RunCreation.RunCreationClient;
45using SingleObjectiveOKBProblem = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBProblem;
46using SingleObjectiveOKBSolution = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBSolution;
47
48namespace HeuristicLab.OptimizationExpertSystem.Common {
49  [Item("Knowledge Center", "Currently in experimental phase, an expert system that makes algorithm suggestions based on fitness landscape analysis features and an optimization knowledge base.")]
50  [Creatable(CreatableAttribute.Categories.TestingAndAnalysis, Priority = 119)]
51  public sealed class KnowledgeCenter : IContent {
52    private bool SuppressEvents { get; set; }
53
54    public string Filename { get; set; }
55
56    public static Image StaticItemImage {
57      get { return VSImageLibrary.Library; }
58    }
59
60    private readonly IntValue maximumEvaluations;
61    public IntValue MaximumEvaluations {
62      get { return maximumEvaluations; }
63    }
64
65    private readonly DoubleValue minimumTarget;
66    public DoubleValue MinimumTarget {
67      get { return minimumTarget; }
68    }
69   
70    private readonly RunCollection instanceRuns;
71    public RunCollection InstanceRuns {
72      get { return instanceRuns; }
73    }
74
75    private readonly RunCollection seededRuns;
76    public RunCollection SeededRuns {
77      get { return seededRuns; }
78    }
79
80    private readonly RunCollection knowledgeBase;
81    public RunCollection KnowledgeBase {
82      get { return knowledgeBase; }
83    }
84
85    private readonly SingleObjectiveOKBProblem problem;
86    public SingleObjectiveOKBProblem Problem {
87      get { return problem; }
88    }
89
90    private readonly ItemList<IAlgorithm> algorithmInstances;
91    private readonly ReadOnlyItemList<IAlgorithm> readonlyAlgorithmInstances;
92    public ReadOnlyItemList<IAlgorithm> AlgorithmInstances {
93      get { return readonlyAlgorithmInstances; }
94    }
95
96    private readonly RunCollection problemInstances;
97    public RunCollection ProblemInstances {
98      get { return problemInstances; }
99    }
100
101    private IRecommendationModel recommendationModel;
102    public IRecommendationModel RecommendationModel {
103      get { return recommendationModel; }
104      set {
105        if (recommendationModel == value) return;
106        recommendationModel = value;
107        OnRecommenderModelChanged();
108      }
109    }
110
111    private readonly CheckedItemList<IScope> solutionSeedingPool;
112    public CheckedItemList<IScope> SolutionSeedingPool {
113      get { return solutionSeedingPool; }
114    }
115
116    private readonly EnumValue<SeedingStrategyTypes> seedingStrategy;
117    public EnumValue<SeedingStrategyTypes> SeedingStrategy {
118      get { return seedingStrategy; }
119    }
120   
121    private BidirectionalLookup<long, IRun> algorithmId2RunMapping;
122    private BidirectionalDictionary<long, IAlgorithm> algorithmId2AlgorithmInstanceMapping;
123    private BidirectionalDictionary<long, IRun> problemId2ProblemInstanceMapping;
124   
125    public bool Maximization {
126      get { return Problem != null && Problem.ProblemId >= 0 && ((IValueParameter<BoolValue>)Problem.MaximizationParameter).Value.Value; }
127    }
128
129    public KnowledgeCenter() {
130      maximumEvaluations = new IntValue(10000);
131      minimumTarget = new DoubleValue(0.05);
132      instanceRuns = new RunCollection();
133      seededRuns = new RunCollection();
134      knowledgeBase = new RunCollection();
135      algorithmInstances = new ItemList<IAlgorithm>();
136      readonlyAlgorithmInstances = algorithmInstances.AsReadOnly();
137      problemInstances = new RunCollection();
138      recommendationModel = FixedRankModel.GetEmpty();
139      problem = new SingleObjectiveOKBProblem();
140      algorithmId2RunMapping = new BidirectionalLookup<long, IRun>();
141      algorithmId2AlgorithmInstanceMapping = new BidirectionalDictionary<long, IAlgorithm>();
142      problemId2ProblemInstanceMapping = new BidirectionalDictionary<long, IRun>();
143      solutionSeedingPool = new CheckedItemList<IScope>();
144      seedingStrategy = new EnumValue<SeedingStrategyTypes>(SeedingStrategyTypes.NoSeeding);
145      RegisterEventHandlers();
146    }
147
148    private void ProblemOnProblemChanged(object sender, EventArgs eventArgs) {
149      // TODO: Potentially, knowledge base has to be re-downloaded
150    }
151
152    private void RegisterEventHandlers() {
153      maximumEvaluations.ValueChanged += MaximumEvaluationsOnValueChanged;
154      minimumTarget.ValueChanged += MinimumTargetOnValueChanged;
155      problem.ProblemChanged += ProblemOnProblemChanged;
156      problem.Solutions.ItemsAdded += ProblemSolutionsChanged;
157      problem.Solutions.ItemsReplaced += ProblemSolutionsChanged;
158      problem.Solutions.ItemsRemoved += ProblemSolutionsChanged;
159      problem.Solutions.CollectionReset += ProblemSolutionsChanged;
160      instanceRuns.CollectionReset += InformationChanged;
161      instanceRuns.ItemsAdded += InformationChanged;
162      instanceRuns.ItemsRemoved += InformationChanged;
163      instanceRuns.Reset += InformationChanged;
164      instanceRuns.UpdateOfRunsInProgressChanged += InformationChanged;
165      knowledgeBase.CollectionReset += InformationChanged;
166      knowledgeBase.ItemsAdded += InformationChanged;
167      knowledgeBase.ItemsRemoved += InformationChanged;
168    }
169
170    private void MaximumEvaluationsOnValueChanged(object sender, EventArgs eventArgs) {
171
172    }
173
174    private void MinimumTargetOnValueChanged(object sender, EventArgs e) {
175
176    }
177
178    private void ProblemSolutionsChanged(object sender, EventArgs e) {
179      foreach (var sol in Problem.Solutions.Select(x => x.Solution).OfType<IScope>()) {
180        if (!SolutionSeedingPool.Contains(sol))
181          SolutionSeedingPool.Add(sol, false);
182      }
183    }
184
185    private void InformationChanged(object sender, EventArgs e) {
186      var runCollection = sender as RunCollection;
187      if (runCollection != null && runCollection.UpdateOfRunsInProgress) return;
188    }
189   
190    public bool IsCurrentInstance(IRun run) {
191      if (!problemId2ProblemInstanceMapping.ContainsSecond(run)) return false;
192      return problemId2ProblemInstanceMapping.GetBySecond(run) == Problem.ProblemId;
193    }
194
195    public void UpdateInstanceProjection(string[] characteristics) {
196      if (characteristics.Length == 0) return;
197
198      var instances = GetProblemCharacteristics(characteristics);
199
200      var key2Idx = new BidirectionalDictionary<IRun, int>();
201      foreach (var kvp in instances.Select((k, i) => new { Index = i, Key = k.Key }))
202        key2Idx.Add(kvp.Key, kvp.Index);
203
204      Func<double[], double[], double> euclid = (a, b) => Math.Sqrt(a.Zip(b, (x, y) => (x - y)).Sum(x => x * x));
205      Func<DoubleArray, DoubleArray, double> euclidDArray = (a, b) => Math.Sqrt(a.Zip(b, (x, y) => (x - y)).Sum(x => x * x));
206      #region MDS
207      var num = instances.Count;
208      var matrix = new DoubleMatrix(num, num);
209      for (var i = 0; i < num - 1; i++) {
210        for (var j = i + 1; j < num; j++) {
211          matrix[i, j] = matrix[j, i] = euclid(instances[key2Idx.GetBySecond(i)], instances[key2Idx.GetBySecond(j)]);
212        }
213      }
214
215      var coords = MultidimensionalScaling.KruskalShepard(matrix);
216      #endregion
217      #region PCA
218      double[,] v = null;
219      var ds = new double[instances.Count, characteristics.Length];
220      if (characteristics.Length > 1) {
221        foreach (var instance in instances) {
222          var arr = instance.Value;
223          for (var feature = 0; feature < arr.Length; feature++)
224            ds[key2Idx.GetByFirst(instance.Key), feature] = arr[feature];
225        }
226
227        int info;
228        double[] s2;
229        alglib.pcabuildbasis(ds, ds.GetLength(0), ds.GetLength(1), out info, out s2, out v);
230      }
231      #endregion
232      #region SOM
233      var features = new DoubleMatrix(characteristics.Length, instances.Count);
234      foreach (var instance in instances) {
235        var arr = instance.Value;
236        for (var feature = 0; feature < arr.Length; feature++)
237          features[feature, key2Idx.GetByFirst(instance.Key)] = arr[feature];
238      }
239      var somCoords = SOM.Map(features, new MersenneTwister(42), somSize: 10, learningRadius: 20, iterations: 200, jittering: true);
240      #endregion
241      #region TSNE
242      var tsneFeatures = new DoubleArray[instances.Count];
243      foreach (var instance in instances) {
244        tsneFeatures[key2Idx.GetByFirst(instance.Key)] = new DoubleArray(instance.Value);
245      }
246      var tsneCoords = TSNEStatic<DoubleArray>.Run(tsneFeatures, new EuclideanDistance(), new FastRandom(42),
247        newDimensions: 2, perplexity: Math.Min((instances.Count - 1) / 4, 50), theta: 0,
248        stopLyingIter: 0, momSwitchIter: 0, momentum: 0, finalMomentum: 0, eta: 10);
249      #endregion
250
251      ProblemInstances.UpdateOfRunsInProgress = true;
252      try {
253        foreach (var instance in ProblemInstances) {
254          IItem item;
255          if (v != null) {
256            double x = 0, y = 0;
257            for (var feature = 0; feature < ds.GetLength(1); feature++) {
258              x += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 0];
259              y += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 1];
260            }
261
262            if (instance.Results.TryGetValue("Projection.PCA.X", out item)) {
263              ((DoubleValue)item).Value = x;
264            } else instance.Results.Add("Projection.PCA.X", new DoubleValue(x));
265            if (instance.Results.TryGetValue("Projection.PCA.Y", out item)) {
266              ((DoubleValue)item).Value = y;
267            } else instance.Results.Add("Projection.PCA.Y", new DoubleValue(y));
268          } else {
269            instance.Results.Remove("Projection.PCA.X");
270            instance.Results.Remove("Projection.PCA.Y");
271          }
272
273          if (instance.Results.TryGetValue("Projection.MDS.X", out item)) {
274            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 0];
275          } else instance.Results.Add("Projection.MDS.X", new DoubleValue(coords[key2Idx.GetByFirst(instance), 0]));
276          if (instance.Results.TryGetValue("Projection.MDS.Y", out item)) {
277            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 1];
278          } else instance.Results.Add("Projection.MDS.Y", new DoubleValue(coords[key2Idx.GetByFirst(instance), 1]));
279
280          if (instance.Results.TryGetValue("Projection.SOM.X", out item)) {
281            ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 0];
282          } else instance.Results.Add("Projection.SOM.X", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 0]));
283          if (instance.Results.TryGetValue("Projection.SOM.Y", out item)) {
284            ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 1];
285          } else instance.Results.Add("Projection.SOM.Y", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 1]));
286
287          if (instance.Results.TryGetValue("Projection.TSNE.X", out item)) {
288            ((DoubleValue)item).Value = tsneCoords[key2Idx.GetByFirst(instance), 0];
289          } else instance.Results.Add("Projection.TSNE.X", new DoubleValue(tsneCoords[key2Idx.GetByFirst(instance), 0]));
290          if (instance.Results.TryGetValue("Projection.TSNE.Y", out item)) {
291            ((DoubleValue)item).Value = tsneCoords[key2Idx.GetByFirst(instance), 1];
292          } else instance.Results.Add("Projection.TSNE.Y", new DoubleValue(tsneCoords[key2Idx.GetByFirst(instance), 1]));
293        }
294      } finally { ProblemInstances.UpdateOfRunsInProgress = false; }
295    }
296
297    private static readonly HashSet<string> InterestingValueNames = new HashSet<string>() {
298      "QualityPerEvaluations", "QualityPerClock", "Problem Name", "Problem Type", "Algorithm Name", "Algorithm Type", "Maximization", "BestKnownQuality"
299    };
300
301    public Task<ResultCollection> StartAlgorithmAsync(int index) {
302      return StartAlgorithmAsync(index, CancellationToken.None);
303    }
304
305    public Task<ResultCollection> StartAlgorithmAsync(int index, CancellationToken cancellation) {
306      var selectedInstance = algorithmInstances[index];
307      var algorithmClone = (IAlgorithm)selectedInstance.Clone();
308      var problemClone = Problem.CloneProblem() as ISingleObjectiveHeuristicOptimizationProblem;
309      if (problemClone == null) throw new InvalidOperationException("Problem is not of type " + typeof(ISingleObjectiveHeuristicOptimizationProblem).FullName);
310      // TODO: It is assumed the problem instance by default is configured using no preexisting solution creator
311      var seedingStrategyLocal = SeedingStrategy.Value;
312      if (seedingStrategyLocal != SeedingStrategyTypes.NoSeeding) {
313        if (!SolutionSeedingPool.CheckedItems.Any()) throw new InvalidOperationException("There are no solutions selected for seeding.");
314        // TODO: It would be necessary to specify the solution creator somewhere (property and GUI)
315        var seedingCreator = problemClone.Operators.OfType<IPreexistingSolutionCreator>().FirstOrDefault();
316        if (seedingCreator == null) throw new InvalidOperationException("The problem does not contain a solution creator that allows seeding.");
317        seedingCreator.PreexistingSolutionsParameter.Value.Replace(SolutionSeedingPool.CheckedItems.Select(x => x.Value));
318        seedingCreator.SampleFromPreexistingParameter.Value.Value = seedingStrategyLocal == SeedingStrategyTypes.SeedBySampling;
319        // TODO: WHY!? WHY??!?
320        ((dynamic)problemClone.SolutionCreatorParameter).Value = (dynamic)seedingCreator;
321      }
322      algorithmClone.Problem = problemClone;
323      algorithmClone.Prepare(true);
324      IParameter stopParam;
325      var monitorStop = true;
326      if (algorithmClone.Parameters.TryGetValue("MaximumEvaluations", out stopParam)) {
327        var maxEvalParam = stopParam as IValueParameter<Data.IntValue>;
328        if (maxEvalParam != null) {
329          maxEvalParam.Value.Value = MaximumEvaluations.Value;
330          monitorStop = false;
331        }
332      }
333
334      // TODO: The following can be simplified when we have async implementation patterns for our algorithms:
335      // TODO: The closures can be removed and replaced with private member methods
336      var waitHandle = new AutoResetEvent(false);
337
338      #region EventHandler closures
339      EventHandler exeStateChanged = (sender, e) => {
340        if (algorithmClone.ExecutionState == ExecutionState.Stopped) {
341          lock (Problem.Solutions) {
342            foreach (var solution in algorithmClone.Results.Where(x => x.Name.ToLower().Contains("solution")).Select(x => x.Value).OfType<IScope>()) {
343              Problem.Solutions.Add(new SingleObjectiveOKBSolution(Problem.ProblemId) {
344                Quality = solution.Variables.ContainsKey(Problem.Problem.Evaluator.QualityParameter.ActualName) ? ((DoubleValue)solution.Variables[Problem.Problem.Evaluator.QualityParameter.ActualName].Value).Value : double.NaN,
345                Solution = (IItem)solution.Clone()
346              });
347            }
348          }
349          if (seedingStrategyLocal == SeedingStrategyTypes.NoSeeding) {
350            lock (InstanceRuns) {
351              InstanceRuns.Add(algorithmClone.Runs.Last());
352            }
353          } else {
354            lock (SeededRuns) {
355              SeededRuns.Add(algorithmClone.Runs.Last());
356            }
357          }
358          waitHandle.Set();
359        }
360      };
361
362      EventHandler<EventArgs<Exception>> exceptionOccurred = (sender, e) => {
363        waitHandle.Set();
364      };
365
366      EventHandler timeChanged = (sender, e) => {
367        IResult evalSolResult;
368        if (!algorithmClone.Results.TryGetValue("EvaluatedSolutions", out evalSolResult) || !(evalSolResult.Value is Data.IntValue)) return;
369        var evalSols = ((Data.IntValue)evalSolResult.Value).Value;
370        if (evalSols >= MaximumEvaluations.Value && algorithmClone.ExecutionState == ExecutionState.Started)
371          algorithmClone.Stop();
372      };
373      #endregion
374
375      algorithmClone.ExecutionStateChanged += exeStateChanged;
376      algorithmClone.ExceptionOccurred += exceptionOccurred;
377      if (monitorStop) algorithmClone.ExecutionTimeChanged += timeChanged;
378
379      return Task.Factory.StartNew(() => {
380        algorithmClone.Start();
381        OnAlgorithmInstanceStarted(algorithmClone);
382        var cancelRequested = false;
383        while (!waitHandle.WaitOne(200)) {
384          if (cancellation.IsCancellationRequested) {
385            cancelRequested = true;
386            break;
387          }
388        }
389        if (cancelRequested) {
390          try { algorithmClone.Stop(); } catch { } // ignore race condition if it is stopped in the meantime
391          waitHandle.WaitOne();
392        }
393        waitHandle.Dispose();
394        return algorithmClone.Results;
395      }, TaskCreationOptions.LongRunning);
396    }
397
398    public ResultCollection StartAlgorithm(int index, CancellationToken cancellation) {
399      var task = StartAlgorithmAsync(index, cancellation);
400      task.Wait(cancellation);
401      return task.Result;
402    }
403
404    public Task UpdateKnowledgeBaseAsync(IProgress progress = null) {
405      progress?.Start("Updating Knowledge Base from OKB");
406      OnDownloadStarted(progress);
407      return Task.Factory.StartNew(() => { DoUpdateKnowledgeBase(progress); }, TaskCreationOptions.LongRunning);
408    }
409
410    public void UpdateKnowledgeBase(IProgress progress = null) {
411      UpdateKnowledgeBaseAsync(progress).Wait();
412    }
413
414    private void DoUpdateKnowledgeBase(IProgress progress) {
415      var queryClient = Clients.OKB.Query.QueryClient.Instance;
416      var adminClient = Clients.OKB.Administration.AdministrationClient.Instance;
417      try {
418        if (progress != null) {
419          progress.Message = "Connecting to OKB...";
420          progress.ProgressValue = 0;
421        }
422        // FIXME: How to tell if refresh is necessary?
423        var refreshTasks = new[] {
424          Task.Factory.StartNew(() => queryClient.Refresh()),
425          Task.Factory.StartNew(() => adminClient.Refresh())
426        };
427        Task.WaitAll(refreshTasks);
428
429        var probInstance = adminClient.Problems.SingleOrDefault(x => x.Id == Problem.ProblemId);
430        if (probInstance == null) throw new InvalidOperationException("The chosen problem instance cannot be found in the OKB.");
431        var probClassId = probInstance.ProblemClassId;
432
433        var problemClassFilter = (Clients.OKB.Query.StringComparisonAvailableValuesFilter)queryClient.Filters.Single(x => x.Label == "Problem Class Name");
434        problemClassFilter.Value = adminClient.ProblemClasses.Single(x => x.Id == probClassId).Name;
435
436        problemId2ProblemInstanceMapping.Clear();
437        if (progress != null) {
438          progress.Message = "Downloading algorithm and problem instances...";
439          progress.ProgressValue = 0;
440        }
441        int[] p = { 0 };
442        ProblemInstances.UpdateOfRunsInProgress = true;
443        ProblemInstances.Clear();
444        algorithmId2AlgorithmInstanceMapping.Clear();
445        algorithmId2RunMapping.Clear();
446        algorithmInstances.Clear();
447
448        var characteristics = new HashSet<string>();
449        var totalProblems = adminClient.Problems.Count(x => x.ProblemClassId == probClassId);
450        var totalAlgorithms = adminClient.Algorithms.Count;
451        var problems = adminClient.Problems.Where(x => x.ProblemClassId == probClassId);
452        var algorithms = adminClient.Algorithms;
453        var combined = problems.Cast<object>().Concat(algorithms.Cast<object>()).Shuffle(new MersenneTwister());
454        Parallel.ForEach(combined, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, (inst) => {
455          var pInst = inst as Clients.OKB.Administration.Problem;
456          if (pInst != null) DownloadProblemInstance(progress, pInst, p, totalProblems + totalAlgorithms, characteristics);
457          else {
458            var aInst = inst as Clients.OKB.Administration.Algorithm;
459            DownloadAlgorithmInstance(progress, aInst, p, totalProblems + totalAlgorithms);
460          }
461        });
462
463        var interestingValues = queryClient.ValueNames.Where(x => InterestingValueNames.Contains(x.Name)).ToList();
464
465        if (progress != null) {
466          progress.Message = "Downloading runs...";
467          progress.ProgressValue = 0;
468        }
469        p[0] = 0;
470        var count = queryClient.GetNumberOfRuns(problemClassFilter);
471        if (count == 0) return;
472       
473        var runList = new List<IRun>();
474        var runIds = LoadRunsFromCache(queryClient.GetRunIds(problemClassFilter), runList, progress);
475        var batches = runIds.Select((v, i) => new { Idx = i, Val = v }).GroupBy(x => x.Idx / 500, x => x.Val);
476        Parallel.ForEach(batches.Select(x => x.ToList()), new ParallelOptions { MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, 4) },
477          (batch) => {
478          var okbRuns = queryClient.GetRunsWithValues(batch, true, interestingValues);
479          var hlRuns = okbRuns.AsParallel().Select(x => new { AlgorithmId = x.Algorithm.Id, RunId = x.Id, Run = queryClient.ConvertToOptimizationRun(x) }).ToList();
480          lock (runList) {
481            var toCache = new List<Tuple<long, long, IRun>>();
482            foreach (var r in hlRuns) {
483              algorithmId2RunMapping.Add(r.AlgorithmId, r.Run);
484              runList.Add(r.Run);
485              toCache.Add(Tuple.Create(r.AlgorithmId, r.RunId, r.Run));
486            }
487            SaveToCache(toCache);
488            p[0] += batch.Count;
489            if (progress != null) {
490              progress.Message = string.Format("Downloaded runs {0} to {1} of {2}...", p[0], p[0] + batch.Count, count);
491              progress.ProgressValue = p[0] / (double)count;
492            }
493          }
494        });
495        if (progress != null) {
496          progress.Message = "Finishing...";
497        }
498        // remove algorithm instances that do not appear in any downloaded run
499        for (var algIdx = 0; algIdx < algorithmInstances.Count; algIdx++) {
500          var id = algorithmId2AlgorithmInstanceMapping.GetBySecond(algorithmInstances[algIdx]);
501          if (!algorithmId2RunMapping.ContainsFirst(id)) {
502            algorithmId2AlgorithmInstanceMapping.RemoveByFirst(id);
503            algorithmInstances.RemoveAt(algIdx);
504            algIdx--;
505          }
506        }
507       
508        try {
509          KnowledgeBase.UpdateOfRunsInProgress = true;
510          KnowledgeBase.Clear();
511          KnowledgeBase.AddRange(runList);
512        } finally { KnowledgeBase.UpdateOfRunsInProgress = false; }
513
514        var algInstRunDict = runList.Where(x => x.Parameters.ContainsKey("Problem Name") && x.Parameters["Problem Name"] is StringValue)
515                                          .GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value)
516                                          .ToDictionary(x => x.Key, x => x.GroupBy(y => ((StringValue)y.Parameters["Algorithm Name"]).Value)
517                                                                                  .ToDictionary(y => y.Key, y => y.ToList()));
518
519        // set best-known quality to best-found in case it is not known
520        foreach (var kvp in algInstRunDict) {
521          var prob = ProblemInstances.SingleOrDefault(x => ((StringValue)x.Parameters["Problem Name"]).Value == kvp.Key);
522          if (prob == null) continue;
523          var maximization = ((BoolValue)prob.Parameters["Maximization"]).Value;
524
525          IItem bkParam;
526          if (!prob.Parameters.TryGetValue("BestKnownQuality", out bkParam) || !(bkParam is DoubleValue) || double.IsNaN(((DoubleValue)bkParam).Value)) {
527            var list = kvp.Value.SelectMany(x => x.Value)
528              .Where(x => x.Results.ContainsKey("QualityPerClock"))
529              .Select(x => (IndexedDataTable<double>)x.Results["QualityPerClock"])
530              .Where(x => x.Rows.Count > 0 && x.Rows.First().Values.Count > 0)
531              .Select(x => x.Rows.First().Values.Last().Item2);
532            if (!list.Any()) continue;
533            bkParam = new DoubleValue(maximization ? list.Max() : list.Min());
534            prob.Parameters["BestKnownQuality"] = bkParam;
535          }
536        }
537
538        // add algorithm instance ranks as features to the problem instances for a range of targets
539        foreach (var target in new[] {0, 0.01, 0.05, 0.1, 0.2, 0.5}) {
540          var cls = GetPerformanceClasses(target, 5);
541          foreach (var kvp in cls) {
542            var prob = kvp.Key;
543            foreach (var kvp2 in kvp.Value) {
544              var resultName = "Rank." + algorithmId2AlgorithmInstanceMapping.GetByFirst(kvp2.Key) + "@" + (target * 100) + "%";
545              prob.Results[resultName] = new IntValue(kvp2.Value);
546            }
547          }
548        }
549      } finally { progress?.Finish(); ProblemInstances.UpdateOfRunsInProgress = false; }
550      UpdateInstanceProjection(ProblemInstances.ResultNames.Where(x => x.StartsWith("Characteristic.")).ToArray());
551    }
552
553    private void DownloadAlgorithmInstance(IProgress progress, Algorithm algInst, int[] p, int total) {
554      IAlgorithm alg = null;
555      var data = Clients.OKB.Administration.AdministrationClient.GetAlgorithmData(algInst.Id);
556      if (data != null) {
557        using (var stream = new MemoryStream(data)) {
558          try {
559            alg = (IAlgorithm)XmlParser.Deserialize<IContent>(stream);
560          } catch { }
561          stream.Close();
562        }
563        if (alg != null) {
564          lock (this) {
565            algorithmInstances.Add(alg);
566            algorithmId2AlgorithmInstanceMapping.Add(algInst.Id, alg);
567            p[0]++;
568            if (progress != null) {
569              progress.Message = string.Format("Downloaded algorithm {0} (okb-id: {1})...", algInst.Name, algInst.Id);
570              progress.ProgressValue = p[0] / (double)total;
571            }
572          }
573        }
574      }
575    }
576
577    private void DownloadProblemInstance(IProgress progress, Problem pInst, int[] p, int totalProblems, HashSet<string> characteristics) {
578      var charas = new List<string>();
579      IRun probRun = null;
580      var data = Clients.OKB.Administration.AdministrationClient.GetProblemData(pInst.Id);
581      if (data != null) {
582        using (var stream = new MemoryStream(data)) {
583          try {
584            var prob = (IProblem)XmlParser.Deserialize<IContent>(stream);
585            probRun = new Run() {Name = prob.Name};
586            prob.CollectParameterValues(probRun.Parameters);
587            probRun.Parameters["Problem Name"] = new StringValue(prob.Name);
588            probRun.Parameters["Problem Type"] = new StringValue(prob.GetType().Name);
589            foreach (var v in RunCreationClient.Instance.GetCharacteristicValues(pInst.Id)) {
590              probRun.Results.Add("Characteristic." + v.Name, RunCreationClient.Instance.ConvertToItem(v));
591              charas.Add("Characteristic." + v.Name);
592            }
593          } catch { }
594          stream.Close();
595        }
596        if (probRun != null) {
597          lock (this) {
598            problemId2ProblemInstanceMapping.Add(pInst.Id, probRun);
599            ProblemInstances.Add(probRun);
600            p[0]++;
601
602            if (progress != null) {
603              progress.Message = string.Format("Downloaded problem {0} (okb-id: {1})....", pInst.Name, pInst.Id);
604              progress.ProgressValue = p[0] / (double)totalProblems;
605            }
606            foreach (var c in charas) characteristics.Add(c);
607          }
608        }
609      }
610    }
611
612    private List<long> LoadRunsFromCache(IEnumerable<long> runIds, List<IRun> runList, IProgress progress) {
613      var hashSet = new HashSet<long>(runIds);
614      var total = hashSet.Count;
615      try {
616        var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "HeuristicLab.OKB", "cache", "runs");
617        Parallel.ForEach(Directory.EnumerateDirectories(path).Select((d, i) => new { Index = i, Directory = d }).GroupBy(x => x.Index / 100), new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount },
618          (folderGroup) => {
619          var localRunList = new List<Tuple<long, long, IRun>>();
620          foreach (var runPath in folderGroup.Select(x => x.Directory)) {
621            long runId;
622            var runFolder = new DirectoryInfo(runPath).Name;
623            if (!long.TryParse(runFolder, out runId) || !hashSet.Contains(runId)) continue;
624            var runFilePath = Directory.EnumerateFiles(runPath).Single();
625            var runFileName = Path.GetFileNameWithoutExtension(runFilePath);
626            long algId;
627            if (!long.TryParse(runFileName, out algId)) continue;
628            IRun run = null;
629            try {
630              using (var file = File.OpenRead(runFilePath))
631                run = XmlParser.Deserialize<IRun>(file);
632            } catch {
633              File.Delete(runFilePath);
634              Directory.Delete(runPath);
635            }
636            if (run != null) localRunList.Add(Tuple.Create(algId, runId, run));
637          }
638          lock (runList) {
639            foreach (var r in localRunList) {
640              hashSet.Remove(r.Item2);
641              algorithmId2RunMapping.Add(r.Item1, r.Item3);
642              runList.Add(r.Item3);
643            }
644            if (progress != null) {
645              progress.Message = string.Format("Retrieved {0} of {1} from cache", runList.Count, total);
646              progress.ProgressValue = (double)runList.Count / total;
647            }
648          }
649        });
650      } catch { }
651      return hashSet.ToList();
652    }
653
654    private void SaveToCache(IEnumerable<Tuple<long, long, IRun>> runs) {
655      try {
656        var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "HeuristicLab.OKB", "cache", "runs");
657        if (!Directory.Exists(path)) Directory.CreateDirectory(path);
658        foreach (var r in runs) {
659          var runPath = Path.Combine(path, r.Item2.ToString());
660          if (!Directory.Exists(runPath)) Directory.CreateDirectory(runPath);
661          using (var file = File.Open(Path.Combine(runPath, r.Item1.ToString()), FileMode.Create, FileAccess.ReadWrite)) {
662            XmlGenerator.Serialize(r.Item3, file);
663          }
664        }
665      } catch { }
666    }
667
668    public static double[][] GetFeatures(IRun[] problemInstances, string[] characteristics, double[] medianValues = null) {
669      var instances = new double[problemInstances.Length][];
670      for (var p = 0; p < problemInstances.Length; p++) {
671        instances[p] = new double[characteristics.Length];
672        for (var f = 0; f < characteristics.Length; f++) {
673          IItem item;
674          if (problemInstances[p].Results.TryGetValue(characteristics[f], out item)) {
675            double val = 0;
676            var dItem = item as DoubleValue;
677            if (dItem != null) {
678              val = dItem.Value;
679            } else {
680              var iItem = item as IntValue;
681              if (iItem != null) val = iItem.Value;
682              else val = double.NaN;
683            }
684            if (double.IsNaN(val) && medianValues != null)
685              instances[p][f] = medianValues[f];
686            else instances[p][f] = val;
687          } else instances[p][f] = medianValues != null ? medianValues[f] : double.NaN;
688        }
689      }
690      return instances;
691    }
692
693    public static double[][] GetFeaturesStandardized(IRun[] problemInstances, string[] characteristics, out double[] means, out double[] sdevs, double[] medianValues = null) {
694      var instances = new double[problemInstances.Length][];
695      var columns = new List<double>[characteristics.Length];
696      for (var p = 0; p < problemInstances.Length; p++) {
697        instances[p] = new double[characteristics.Length];
698        for (var f = 0; f < characteristics.Length; f++) {
699          if (columns[f] == null) {
700            columns[f] = new List<double>(problemInstances.Length);
701          }
702          IItem item;
703          if (problemInstances[p].Results.TryGetValue(characteristics[f], out item)) {
704            double val = 0;
705            var dItem = item as DoubleValue;
706            if (dItem != null) {
707              val = dItem.Value;
708            } else {
709              var iItem = item as IntValue;
710              if (iItem != null) val = iItem.Value;
711              else val = double.NaN;
712            }
713            if (double.IsNaN(val) && medianValues != null)
714              instances[p][f] = medianValues[f];
715            else instances[p][f] = val;
716            columns[f].Add(instances[p][f]);
717          } else instances[p][f] = medianValues != null ? medianValues[f] : double.NaN;
718        }
719      }
720
721      means = new double[characteristics.Length];
722      sdevs = new double[characteristics.Length];
723      for (var f = 0; f < characteristics.Length; f++) {
724        var mean = columns[f].Average();
725        var dev = columns[f].StandardDeviation();
726        means[f] = mean;
727        sdevs[f] = dev;
728        for (var p = 0; p < problemInstances.Length; p++) {
729          if (dev.IsAlmost(0)) instances[p][f] = 0;
730          else instances[p][f] = (instances[p][f] - mean) / dev;
731        }
732      }
733
734      return instances;
735    }
736
737    public static double[] GetMedianValues(IRun[] problemInstances, string[] characteristics) {
738      var values = new List<double>[characteristics.Length];
739      foreach (var problemInstance in problemInstances) {
740        for (var f = 0; f < characteristics.Length; f++) {
741          if (values[f] == null) values[f] = new List<double>(problemInstances.Length);
742          IItem item;
743          if (problemInstance.Results.TryGetValue(characteristics[f], out item)) {
744            var dItem = item as DoubleValue;
745            if (dItem != null) values[f].Add(dItem.Value);
746            else {
747              var iItem = item as IntValue;
748              if (iItem != null) values[f].Add(iItem.Value);
749            }
750          }
751        }
752      }
753      return values.Select(x => x.Count == 0 ? 0.0 : x.Median()).ToArray();
754    }
755
756    public Dictionary<IRun, double[]> GetProblemCharacteristics(string[] characteristics) {
757      var map = ProblemInstances.Select((v, i) => new { Index = i, ProblemInstance = v }).ToDictionary(x => x.Index, x => x.ProblemInstance);
758      var instances = GetFeatures(ProblemInstances.ToArray(), characteristics);
759      var median = GetMedianValues(ProblemInstances.ToArray(), characteristics);
760
761      var allValues = instances.Select(x => x.Select((f, i) => new { Idx = i, Val = double.IsNaN(f) ? median[i] : f }).ToList())
762        .SelectMany(x => x)
763        .GroupBy(x => x.Idx, x => x.Val)
764        .OrderBy(x => x.Key).ToList();
765      var avg = allValues.Select(x => x.Average()).ToList();
766      var stdev = allValues.Select(x => x.StandardDeviation()).ToList();
767
768      // normalize characteristic values by transforming them to their z-score
769      foreach (var features in instances) {
770        for (var i = 0; i < features.Length; i++) {
771          if (double.IsNaN(features[i])) features[i] = median[i];
772          if (stdev[i] > 0) features[i] = (features[i] - avg[i]) / stdev[i];
773        }
774      }
775      return instances.Select((v, i) => new { ProblemInstance = map[i], Features = v }).ToDictionary(x => x.ProblemInstance, x => x.Features);
776    }
777
778    public Dictionary<IAlgorithm, double> GetAlgorithmPerformance(IRun problemInstance) {
779      if (!problemInstance.Parameters.ContainsKey("BestKnownQuality")) return new Dictionary<IAlgorithm, double>();
780      var target = GetTarget(((DoubleValue)problemInstance.Parameters["BestKnownQuality"]).Value, MinimumTarget.Value, Maximization);
781      return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value)
782                          .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
783                          .ToDictionary(x => x.Key, x => ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerClock", target, Maximization).ExpectedRuntime);
784    }
785
786    public Dictionary<IAlgorithm, double> GetAlgorithmPerformanceLog10(IRun problemInstance) {
787      if (!problemInstance.Parameters.ContainsKey("BestKnownQuality")) return new Dictionary<IAlgorithm, double>();
788      var target = GetTarget(((DoubleValue)problemInstance.Parameters["BestKnownQuality"]).Value, MinimumTarget.Value, Maximization);
789      return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value)
790                          .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
791                          .ToDictionary(x => x.Key, x => Math.Log10(ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerClock", target, Maximization).ExpectedRuntime));
792    }
793
794    public Dictionary<IAlgorithm, List<IRun>> GetAlgorithmRuns(IRun problemInstance) {
795      return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value)
796                          .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
797                          .ToDictionary(x => x.Key, x => x.ToList());
798    }
799
800    public Dictionary<IAlgorithm, List<IRun>> GetKnowledgeBaseByAlgorithm() {
801      return KnowledgeBase.GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
802                          .ToDictionary(x => x.Key, x => x.ToList());
803    }
804
805    public IEnumerable<IRegressionProblem> GetRegressionProblemPerAlgorithmInstance(double target, string[] characteristics) {
806      if (Problem == null) yield break;
807      var features = GetProblemCharacteristics(characteristics);
808      // TODO: knowledgebase only stores problem name as a string
809      // this doesn't work if there are two equally named problem instances
810      var problemMap = ProblemInstances.Select(x => new { Key = ((StringValue)x.Parameters["Problem Name"]).Value, Value = x })
811                                       .ToDictionary(x => x.Key, x => x.Value);
812      foreach (var relevantRuns in knowledgeBase.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single())) {
813        var problemRuns = relevantRuns.GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value).ToList();
814        var ds = new ModifiableDataset();
815        ds.AddVariable("Problem Name", new List<string>());
816        foreach (var pc in characteristics)
817          ds.AddVariable(pc, new List<double>());
818        ds.AddVariable("ERT", new List<double>());
819        foreach (var pr in problemRuns) {
820          var prob = problemMap[pr.Key];
821          var f = features[prob];
822          var max = ((BoolValue)prob.Parameters["Maximization"]).Value;
823          var bkq = ((DoubleValue)prob.Parameters["BestKnownQuality"]).Value;
824          var ert = ExpectedRuntimeHelper.CalculateErt(pr.ToList(), "QualityPerClock", GetTarget(bkq, target, max), max).ExpectedRuntime;
825          if (double.IsInfinity(ert)) ert = int.MaxValue;
826          ds.AddRow(new object[] { pr.Key }.Concat(f.Cast<object>()).Concat(new object[] { ert }));
827        }
828        var datAnalysisData = new RegressionProblemData(ds, characteristics, "ERT");
829        var result = new RegressionProblem() {
830          Name = algorithmId2AlgorithmInstanceMapping.GetByFirst(relevantRuns.Key).Name
831        };
832        result.ProblemDataParameter.Value = datAnalysisData;
833        yield return result;
834      }
835    }
836
837    public IEnumerable<IClassificationProblem> GetClassificationProblemPerAlgorithmInstance(double target, string[] characteristics) {
838      if (Problem == null) yield break;
839
840      var classes = GetPerformanceClasses(target, 5);
841      var features = GetProblemCharacteristics(characteristics);
842
843      foreach (var alg in AlgorithmInstances) {
844        var ds = new ModifiableDataset();
845        ds.AddVariable("Problem Name", new List<string>());
846        foreach (var pc in characteristics)
847          ds.AddVariable(pc, new List<double>());
848        ds.AddVariable("Class", new List<double>());
849
850        foreach (var c in classes) {
851          int cls;
852          if (c.Value.TryGetValue(algorithmId2AlgorithmInstanceMapping.GetBySecond(alg), out cls)) {
853            ds.AddRow(new object[] { ((StringValue)c.Key.Parameters["Problem Name"]).Value }
854              .Concat(features[c.Key].Cast<object>()).Concat(new object[] { cls }));
855          }
856        }
857        var datAnalysisData = new ClassificationProblemData(ds, characteristics, "Class");
858        var result = new ClassificationProblem() {
859          Name = alg.Name
860        };
861        result.ProblemDataParameter.Value = datAnalysisData;
862        yield return result;
863      }
864    }
865
866    public Dictionary<IRun, double> GetProblemDistances(string[] characteristics) {
867      var result = new Dictionary<IRun, double>();
868      var currentInstance = problemId2ProblemInstanceMapping.GetByFirst(Problem.ProblemId);
869      var features = GetProblemCharacteristics(characteristics);
870      var cF = features[currentInstance];
871      foreach (var b in ProblemInstances) {
872        if (b == currentInstance) continue;
873        var sum = features[b].Select((t, f) => (cF[f] - t) * (cF[f] - t)).Sum();
874        result[b] = Math.Sqrt(sum);
875      }
876      return result;
877    }
878
879    public Dictionary<IRun, Dictionary<long, int>> GetPerformanceClasses(double target, int nClasses) {
880      var result = new Dictionary<IRun, Dictionary<long, int>>();
881      var problemMap = ProblemInstances.Select(x => new { Key = ((StringValue)x.Parameters["Problem Name"]).Value, Value = x })
882                                       .ToDictionary(x => x.Key, x => x.Value);
883      foreach (var pr in KnowledgeBase.GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value).ToList()) {
884        var bkq = ((DoubleValue)problemMap[pr.Key].Parameters["BestKnownQuality"]).Value;
885        var max = ((BoolValue)problemMap[pr.Key].Parameters["Maximization"]).Value;
886
887        result[problemMap[pr.Key]] = new Dictionary<long, int>();
888
889        var values = pr.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single())
890                       .ToDictionary(x => x.Key, x => Math.Log10(ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerClock", GetTarget(bkq, target, max), max).ExpectedRuntime));
891        var ranks = ClusteringHelper<long>.Cluster(nClasses, values, x => double.IsInfinity(x.Value))
892          .GetByCluster().ToList();
893        foreach (var c in ranks) {
894          foreach (var a in c.Value)
895            result[problemMap[pr.Key]][a.Key] = c.Key;
896        }
897      }
898      return result;
899    }
900
901    public double GetTarget(double bestKnownQuality, double target, bool maximization) {
902      return bestKnownQuality * (maximization ? (1 - target) : (1 + target));
903    }
904
905    public event EventHandler<EventArgs<IProgress>> DownloadStarted;
906    private void OnDownloadStarted(IProgress progress) {
907      var handler = DownloadStarted;
908      if (handler != null) handler(this, new EventArgs<IProgress>(progress));
909    }
910
911    public event EventHandler<EventArgs<IAlgorithm>> AlgorithmInstanceStarted;
912    private void OnAlgorithmInstanceStarted(IAlgorithm instance) {
913      var handler = AlgorithmInstanceStarted;
914      if (handler != null) handler(this, new EventArgs<IAlgorithm>(instance));
915    }
916
917    public event EventHandler RecommendationModelChanged;
918    private void OnRecommenderModelChanged() {
919      var handler = RecommendationModelChanged;
920      if (handler != null) handler(this, EventArgs.Empty);
921    }
922
923    public IEnumerable<KeyValuePair<IAlgorithm, double>> GetAlgorithmInstanceRanking() {
924      return RecommendationModel.GetRanking(ProblemInstances.Single(IsCurrentInstance));
925    }
926  }
927}
Note: See TracBrowser for help on using the repository browser.