Free cookie consent management tool by TermsFeed Policy Generator

source: branches/PerformanceComparison/HeuristicLab.OptimizationExpertSystem.Common/3.3/KnowledgeCenter.cs @ 13809

Last change on this file since 13809 was 13809, checked in by abeham, 8 years ago

#2457: added file system based cache of runs

File size: 40.5 KB
RevLine 
[12842]1#region License Information
2/* HeuristicLab
[13667]3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[12842]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[12860]22using HeuristicLab.Analysis;
[13750]23using HeuristicLab.Analysis.SelfOrganizingMaps;
[13551]24using HeuristicLab.Collections;
[13485]25using HeuristicLab.Common;
26using HeuristicLab.Common.Resources;
[12842]27using HeuristicLab.Core;
[13485]28using HeuristicLab.Data;
29using HeuristicLab.MainForm;
[12842]30using HeuristicLab.Optimization;
[13485]31using HeuristicLab.Persistence.Default.Xml;
[13774]32using HeuristicLab.Problems.DataAnalysis;
[13750]33using HeuristicLab.Random;
[13649]34using System;
35using System.Collections.Generic;
36using System.Drawing;
37using System.IO;
38using System.Linq;
39using System.Threading;
40using System.Threading.Tasks;
[13809]41using Algorithm = HeuristicLab.Clients.OKB.Administration.Algorithm;
42using Problem = HeuristicLab.Clients.OKB.Administration.Problem;
[13551]43using RunCreationClient = HeuristicLab.Clients.OKB.RunCreation.RunCreationClient;
44using SingleObjectiveOKBProblem = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBProblem;
[13713]45using SingleObjectiveOKBSolution = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBSolution;
[12842]46
[13663]47namespace HeuristicLab.OptimizationExpertSystem.Common {
[13722]48  [Item("Knowledge Center", "Currently in experimental phase, an expert system that makes algorithm suggestions based on fitness landscape analysis features and an optimization knowledge base.")]
[12860]49  [Creatable(CreatableAttribute.Categories.TestingAndAnalysis, Priority = 119)]
[13722]50  public sealed class KnowledgeCenter : IContent {
[13752]51    private bool SuppressEvents { get; set; }
[12842]52
53    public string Filename { get; set; }
54
[13774]55    public static Image StaticItemImage {
[12842]56      get { return VSImageLibrary.Library; }
57    }
58
[13751]59    private readonly IntValue maximumEvaluations;
[13722]60    public IntValue MaximumEvaluations {
[12847]61      get { return maximumEvaluations; }
62    }
63
[13757]64    private readonly DoubleValue minimumTarget;
65    public DoubleValue MinimumTarget {
66      get { return minimumTarget; }
67    }
68   
[13751]69    private readonly RunCollection instanceRuns;
[13722]70    public RunCollection InstanceRuns {
71      get { return instanceRuns; }
[12842]72    }
73
[13751]74    private readonly RunCollection seededRuns;
[13722]75    public RunCollection SeededRuns {
76      get { return seededRuns; }
77    }
78
[13751]79    private readonly RunCollection knowledgeBase;
[13551]80    public RunCollection KnowledgeBase {
81      get { return knowledgeBase; }
[12842]82    }
83
[13751]84    private readonly SingleObjectiveOKBProblem problem;
[13551]85    public SingleObjectiveOKBProblem Problem {
[12842]86      get { return problem; }
87    }
88
[13774]89    private readonly ItemList<IAlgorithm> algorithmInstances;
90    private readonly ReadOnlyItemList<IAlgorithm> readonlyAlgorithmInstances;
91    public ReadOnlyItemList<IAlgorithm> AlgorithmInstances {
92      get { return readonlyAlgorithmInstances; }
[12847]93    }
94
[13751]95    private readonly RunCollection problemInstances;
[12957]96    public RunCollection ProblemInstances {
97      get { return problemInstances; }
98    }
99
[13787]100    private IRecommendationModel recommendationModel;
101    public IRecommendationModel RecommendationModel {
102      get { return recommendationModel; }
103      set {
104        if (recommendationModel == value) return;
105        recommendationModel = value;
106        OnRecommenderModelChanged();
107      }
[13757]108    }
[13751]109
110    private readonly CheckedItemList<IScope> solutionSeedingPool;
[13713]111    public CheckedItemList<IScope> SolutionSeedingPool {
112      get { return solutionSeedingPool; }
[13663]113    }
[13713]114
[13751]115    private readonly EnumValue<SeedingStrategyTypes> seedingStrategy;
[13713]116    public EnumValue<SeedingStrategyTypes> SeedingStrategy {
117      get { return seedingStrategy; }
118    }
[13663]119   
[13551]120    private BidirectionalLookup<long, IRun> algorithmId2RunMapping;
121    private BidirectionalDictionary<long, IAlgorithm> algorithmId2AlgorithmInstanceMapping;
[13751]122    private BidirectionalDictionary<long, IRun> problemId2ProblemInstanceMapping;
123   
[13774]124    public bool Maximization {
[13722]125      get { return Problem != null && Problem.ProblemId >= 0 && ((IValueParameter<BoolValue>)Problem.MaximizationParameter).Value.Value; }
[12842]126    }
127
[13722]128    public KnowledgeCenter() {
[13748]129      maximumEvaluations = new IntValue(10000);
[13759]130      minimumTarget = new DoubleValue(0.05);
[13722]131      instanceRuns = new RunCollection();
[13748]132      seededRuns = new RunCollection();
[13551]133      knowledgeBase = new RunCollection();
[13774]134      algorithmInstances = new ItemList<IAlgorithm>();
135      readonlyAlgorithmInstances = algorithmInstances.AsReadOnly();
[12957]136      problemInstances = new RunCollection();
[13787]137      recommendationModel = FixedRankModel.GetEmpty();
[13551]138      problem = new SingleObjectiveOKBProblem();
139      algorithmId2RunMapping = new BidirectionalLookup<long, IRun>();
140      algorithmId2AlgorithmInstanceMapping = new BidirectionalDictionary<long, IAlgorithm>();
[13751]141      problemId2ProblemInstanceMapping = new BidirectionalDictionary<long, IRun>();
[13713]142      solutionSeedingPool = new CheckedItemList<IScope>();
143      seedingStrategy = new EnumValue<SeedingStrategyTypes>(SeedingStrategyTypes.NoSeeding);
[12860]144      RegisterEventHandlers();
[12842]145    }
146
[13551]147    private void ProblemOnProblemChanged(object sender, EventArgs eventArgs) {
[13748]148      // TODO: Potentially, knowledge base has to be re-downloaded
[13551]149    }
150
[12860]151    private void RegisterEventHandlers() {
[13722]152      maximumEvaluations.ValueChanged += MaximumEvaluationsOnValueChanged;
[13757]153      minimumTarget.ValueChanged += MinimumTargetOnValueChanged;
[13551]154      problem.ProblemChanged += ProblemOnProblemChanged;
[13713]155      problem.Solutions.ItemsAdded += ProblemSolutionsChanged;
156      problem.Solutions.ItemsReplaced += ProblemSolutionsChanged;
157      problem.Solutions.ItemsRemoved += ProblemSolutionsChanged;
158      problem.Solutions.CollectionReset += ProblemSolutionsChanged;
[13722]159      instanceRuns.CollectionReset += InformationChanged;
160      instanceRuns.ItemsAdded += InformationChanged;
161      instanceRuns.ItemsRemoved += InformationChanged;
162      instanceRuns.Reset += InformationChanged;
163      instanceRuns.UpdateOfRunsInProgressChanged += InformationChanged;
[13551]164      knowledgeBase.CollectionReset += InformationChanged;
165      knowledgeBase.ItemsAdded += InformationChanged;
166      knowledgeBase.ItemsRemoved += InformationChanged;
[12842]167    }
168
[13722]169    private void MaximumEvaluationsOnValueChanged(object sender, EventArgs eventArgs) {
[13787]170
[13722]171    }
172
[13757]173    private void MinimumTargetOnValueChanged(object sender, EventArgs e) {
[13787]174
[13757]175    }
176
[13713]177    private void ProblemSolutionsChanged(object sender, EventArgs e) {
178      foreach (var sol in Problem.Solutions.Select(x => x.Solution).OfType<IScope>()) {
179        if (!SolutionSeedingPool.Contains(sol))
180          SolutionSeedingPool.Add(sol, false);
181      }
182    }
183
[12860]184    private void InformationChanged(object sender, EventArgs e) {
185      var runCollection = sender as RunCollection;
186      if (runCollection != null && runCollection.UpdateOfRunsInProgress) return;
[12842]187    }
[13774]188   
[13751]189    public bool IsCurrentInstance(IRun run) {
190      if (!problemId2ProblemInstanceMapping.ContainsSecond(run)) return false;
191      return problemId2ProblemInstanceMapping.GetBySecond(run) == Problem.ProblemId;
192    }
[13561]193
[13787]194    public void UpdateInstanceProjection(string[] characteristics) {
195      if (characteristics.Length == 0) return;
[13751]196
[13787]197      var instances = GetProblemCharacteristics(characteristics);
[13718]198
[13751]199      var key2Idx = new BidirectionalDictionary<IRun, int>();
[13561]200      foreach (var kvp in instances.Select((k, i) => new { Index = i, Key = k.Key }))
201        key2Idx.Add(kvp.Key, kvp.Index);
202
203      #region MDS
204      Func<double[], double[], double> euclid = (a, b) => Math.Sqrt(a.Zip(b, (x, y) => (x - y)).Sum(x => x * x));
205      var num = instances.Count;
206      var matrix = new DoubleMatrix(num, num);
207      for (var i = 0; i < num - 1; i++) {
208        for (var j = i + 1; j < num; j++) {
209          matrix[i, j] = matrix[j, i] = euclid(instances[key2Idx.GetBySecond(i)], instances[key2Idx.GetBySecond(j)]);
210        }
211      }
212
213      var coords = MultidimensionalScaling.KruskalShepard(matrix);
214      #endregion
215      #region PCA
[13791]216      double[,] v = null;
[13787]217      var ds = new double[instances.Count, characteristics.Length];
[13791]218      if (characteristics.Length > 1) {
219        foreach (var instance in instances) {
220          var arr = instance.Value;
221          for (var feature = 0; feature < arr.Length; feature++)
222            ds[key2Idx.GetByFirst(instance.Key), feature] = arr[feature];
223        }
224
225        int info;
226        double[] s2;
227        alglib.pcabuildbasis(ds, ds.GetLength(0), ds.GetLength(1), out info, out s2, out v);
[13561]228      }
229      #endregion
[13750]230      #region SOM
[13787]231      var features = new DoubleMatrix(characteristics.Length, instances.Count);
[13750]232      foreach (var instance in instances) {
233        var arr = instance.Value;
234        for (var feature = 0; feature < arr.Length; feature++)
235          features[feature, key2Idx.GetByFirst(instance.Key)] = arr[feature];
236      }
[13791]237      var somCoords = SOM.Map(features, new MersenneTwister(42), somSize: 10, learningRadius: 20, iterations: 200, jittering: true);
[13750]238      #endregion
[12957]239
240      ProblemInstances.UpdateOfRunsInProgress = true;
241      try {
242        foreach (var instance in ProblemInstances) {
[13791]243          IItem item;
244          if (v != null) {
245            double x = 0, y = 0;
246            for (var feature = 0; feature < ds.GetLength(1); feature++) {
247              x += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 0];
248              y += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 1];
249            }
250
251            if (instance.Results.TryGetValue("Projection.PCA.X", out item)) {
252              ((DoubleValue)item).Value = x;
253            } else instance.Results.Add("Projection.PCA.X", new DoubleValue(x));
254            if (instance.Results.TryGetValue("Projection.PCA.Y", out item)) {
255              ((DoubleValue)item).Value = y;
256            } else instance.Results.Add("Projection.PCA.Y", new DoubleValue(y));
257          } else {
258            instance.Results.Remove("Projection.PCA.X");
259            instance.Results.Remove("Projection.PCA.Y");
[12957]260          }
[13561]261
262          if (instance.Results.TryGetValue("Projection.MDS.X", out item)) {
[13751]263            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 0];
264          } else instance.Results.Add("Projection.MDS.X", new DoubleValue(coords[key2Idx.GetByFirst(instance), 0]));
[13561]265          if (instance.Results.TryGetValue("Projection.MDS.Y", out item)) {
[13751]266            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 1];
267          } else instance.Results.Add("Projection.MDS.Y", new DoubleValue(coords[key2Idx.GetByFirst(instance), 1]));
[13750]268
269          if (instance.Results.TryGetValue("Projection.SOM.X", out item)) {
[13751]270            ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 0];
271          } else instance.Results.Add("Projection.SOM.X", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 0]));
[13750]272          if (instance.Results.TryGetValue("Projection.SOM.Y", out item)) {
[13751]273            ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 1];
274          } else instance.Results.Add("Projection.SOM.Y", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 1]));
[12957]275        }
276      } finally { ProblemInstances.UpdateOfRunsInProgress = false; }
277    }
278
[13485]279    private static readonly HashSet<string> InterestingValueNames = new HashSet<string>() {
[13551]280      "QualityPerEvaluations", "Problem Name", "Problem Type", "Algorithm Name", "Algorithm Type", "Maximization", "BestKnownQuality"
[13485]281    };
282
[13722]283    public Task<ResultCollection> StartAlgorithmAsync(int index) {
284      return StartAlgorithmAsync(index, CancellationToken.None);
285    }
286
287    public Task<ResultCollection> StartAlgorithmAsync(int index, CancellationToken cancellation) {
[13774]288      var selectedInstance = algorithmInstances[index];
[13713]289      var algorithmClone = (IAlgorithm)selectedInstance.Clone();
290      var problemClone = Problem.CloneProblem() as ISingleObjectiveHeuristicOptimizationProblem;
291      if (problemClone == null) throw new InvalidOperationException("Problem is not of type " + typeof(ISingleObjectiveHeuristicOptimizationProblem).FullName);
292      // TODO: It is assumed the problem instance by default is configured using no preexisting solution creator
[13722]293      var seedingStrategyLocal = SeedingStrategy.Value;
294      if (seedingStrategyLocal != SeedingStrategyTypes.NoSeeding) {
[13713]295        if (!SolutionSeedingPool.CheckedItems.Any()) throw new InvalidOperationException("There are no solutions selected for seeding.");
296        // TODO: It would be necessary to specify the solution creator somewhere (property and GUI)
297        var seedingCreator = problemClone.Operators.OfType<IPreexistingSolutionCreator>().FirstOrDefault();
298        if (seedingCreator == null) throw new InvalidOperationException("The problem does not contain a solution creator that allows seeding.");
299        seedingCreator.PreexistingSolutionsParameter.Value.Replace(SolutionSeedingPool.CheckedItems.Select(x => x.Value));
[13722]300        seedingCreator.SampleFromPreexistingParameter.Value.Value = seedingStrategyLocal == SeedingStrategyTypes.SeedBySampling;
[13713]301        // TODO: WHY!? WHY??!?
302        ((dynamic)problemClone.SolutionCreatorParameter).Value = (dynamic)seedingCreator;
303      }
304      algorithmClone.Problem = problemClone;
305      algorithmClone.Prepare(true);
[13649]306      IParameter stopParam;
307      var monitorStop = true;
[13713]308      if (algorithmClone.Parameters.TryGetValue("MaximumEvaluations", out stopParam)) {
[13649]309        var maxEvalParam = stopParam as IValueParameter<Data.IntValue>;
310        if (maxEvalParam != null) {
[13722]311          maxEvalParam.Value.Value = MaximumEvaluations.Value;
[13649]312          monitorStop = false;
313        }
314      }
315
[13713]316      // TODO: The following can be simplified when we have async implementation patterns for our algorithms:
317      // TODO: The closures can be removed and replaced with private member methods
318      var waitHandle = new AutoResetEvent(false);
[13649]319
[13713]320      #region EventHandler closures
321      EventHandler exeStateChanged = (sender, e) => {
[13722]322        if (algorithmClone.ExecutionState == ExecutionState.Stopped) {
[13748]323          lock (Problem.Solutions) {
324            foreach (var solution in algorithmClone.Results.Where(x => x.Name.ToLower().Contains("solution")).Select(x => x.Value).OfType<IScope>()) {
325              Problem.Solutions.Add(new SingleObjectiveOKBSolution(Problem.ProblemId) {
326                Quality = solution.Variables.ContainsKey(Problem.Problem.Evaluator.QualityParameter.ActualName) ? ((DoubleValue)solution.Variables[Problem.Problem.Evaluator.QualityParameter.ActualName].Value).Value : double.NaN,
327                Solution = (IItem)solution.Clone()
328              });
329            }
[13713]330          }
[13722]331          if (seedingStrategyLocal == SeedingStrategyTypes.NoSeeding) {
[13748]332            lock (InstanceRuns) {
333              InstanceRuns.Add(algorithmClone.Runs.Last());
334            }
335          } else {
336            lock (SeededRuns) {
337              SeededRuns.Add(algorithmClone.Runs.Last());
338            }
339          }
[13713]340          waitHandle.Set();
[13649]341        }
[13713]342      };
[13649]343
[13713]344      EventHandler<EventArgs<Exception>> exceptionOccurred = (sender, e) => {
345        waitHandle.Set();
346      };
[13649]347
[13713]348      EventHandler timeChanged = (sender, e) => {
349        IResult evalSolResult;
350        if (!algorithmClone.Results.TryGetValue("EvaluatedSolutions", out evalSolResult) || !(evalSolResult.Value is Data.IntValue)) return;
351        var evalSols = ((Data.IntValue)evalSolResult.Value).Value;
[13722]352        if (evalSols >= MaximumEvaluations.Value && algorithmClone.ExecutionState == ExecutionState.Started)
[13713]353          algorithmClone.Stop();
354      };
355      #endregion
[13649]356
[13713]357      algorithmClone.ExecutionStateChanged += exeStateChanged;
358      algorithmClone.ExceptionOccurred += exceptionOccurred;
359      if (monitorStop) algorithmClone.ExecutionTimeChanged += timeChanged;
[13649]360
[13713]361      return Task.Factory.StartNew(() => {
362        algorithmClone.Start();
[13722]363        OnAlgorithmInstanceStarted(algorithmClone);
364        var cancelRequested = false;
365        while (!waitHandle.WaitOne(200)) {
366          if (cancellation.IsCancellationRequested) {
367            cancelRequested = true;
368            break;
369          }
370        }
371        if (cancelRequested) {
372          try { algorithmClone.Stop(); } catch { } // ignore race condition if it is stopped in the meantime
373          waitHandle.WaitOne();
374        }
[13713]375        waitHandle.Dispose();
[13722]376        return algorithmClone.Results;
377      }, TaskCreationOptions.LongRunning);
[13649]378    }
379
[13722]380    public ResultCollection StartAlgorithm(int index, CancellationToken cancellation) {
381      var task = StartAlgorithmAsync(index, cancellation);
382      task.Wait(cancellation);
383      return task.Result;
[13649]384    }
385
[13718]386    public Task UpdateKnowledgeBaseAsync(IProgress progress = null) {
387      if (progress == null) progress = new Progress(string.Empty);
[13485]388      progress.Start("Updating Knowledge Base from OKB");
[13718]389      OnDownloadStarted(progress);
390      return Task.Factory.StartNew(() => { DoUpdateKnowledgeBase(progress); }, TaskCreationOptions.LongRunning);
[13485]391    }
392
[13718]393    public void UpdateKnowledgeBase(IProgress progress = null) {
394      UpdateKnowledgeBaseAsync(progress).Wait();
[13485]395    }
396
397    private void DoUpdateKnowledgeBase(IProgress progress) {
398      var queryClient = Clients.OKB.Query.QueryClient.Instance;
399      var adminClient = Clients.OKB.Administration.AdministrationClient.Instance;
400      try {
[13752]401        progress.Status = "Connecting to OKB...";
[13551]402        progress.ProgressValue = 0;
403        // FIXME: How to tell if refresh is necessary?
[13759]404        var refreshTasks = new[] {
405          Task.Factory.StartNew(() => queryClient.Refresh()),
406          Task.Factory.StartNew(() => adminClient.Refresh())
407        };
408        Task.WaitAll(refreshTasks);
[13551]409
410        var probInstance = adminClient.Problems.SingleOrDefault(x => x.Id == Problem.ProblemId);
411        if (probInstance == null) throw new InvalidOperationException("The chosen problem instance cannot be found in the OKB.");
412        var probClassId = probInstance.ProblemClassId;
413
414        var problemClassFilter = (Clients.OKB.Query.StringComparisonAvailableValuesFilter)queryClient.Filters.Single(x => x.Label == "Problem Class Name");
415        problemClassFilter.Value = adminClient.ProblemClasses.Single(x => x.Id == probClassId).Name;
416
[13757]417        problemId2ProblemInstanceMapping.Clear();
[13809]418        progress.Status = "Downloading algorithm and problem instances...";
[13551]419        progress.ProgressValue = 0;
[13809]420
[13752]421        int[] p = { 0 };
[13551]422        ProblemInstances.UpdateOfRunsInProgress = true;
423        ProblemInstances.Clear();
424        algorithmId2AlgorithmInstanceMapping.Clear();
[13804]425        algorithmId2RunMapping.Clear();
426        algorithmInstances.Clear();
[13809]427
428        var characteristics = new HashSet<string>();
429        var totalProblems = adminClient.Problems.Count(x => x.ProblemClassId == probClassId);
430        var totalAlgorithms = adminClient.Algorithms.Count;
431        var problems = adminClient.Problems.Where(x => x.ProblemClassId == probClassId);
432        var algorithms = adminClient.Algorithms;
433        var combined = problems.Cast<object>().Concat(algorithms.Cast<object>()).Shuffle(new MersenneTwister());
434        Parallel.ForEach(combined, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, (inst) => {
435          var pInst = inst as Clients.OKB.Administration.Problem;
436          if (pInst != null) DownloadProblemInstance(progress, pInst, p, totalProblems + totalAlgorithms, characteristics);
437          else {
438            var aInst = inst as Clients.OKB.Administration.Algorithm;
439            DownloadAlgorithmInstance(progress, aInst, p, totalProblems + totalAlgorithms);
[13551]440          }
[13752]441        });
[13551]442
[13485]443        var interestingValues = queryClient.ValueNames.Where(x => InterestingValueNames.Contains(x.Name)).ToList();
444
[13752]445        progress.Status = "Downloading runs...";
[13551]446        progress.ProgressValue = 0;
[13752]447        p[0] = 0;
[13551]448        var count = queryClient.GetNumberOfRuns(problemClassFilter);
[13485]449        if (count == 0) return;
[13649]450       
[13752]451        var runList = new List<IRun>();
[13809]452        var runIds = LoadRunsFromCache(queryClient.GetRunIds(problemClassFilter), runList, progress);
[13752]453        var batches = runIds.Select((v, i) => new { Idx = i, Val = v }).GroupBy(x => x.Idx / 500, x => x.Val);
[13809]454        Parallel.ForEach(batches.Select(x => x.ToList()), new ParallelOptions { MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, 4) },
455          (batch) => {
[13752]456          var okbRuns = queryClient.GetRunsWithValues(batch, true, interestingValues);
[13809]457          var hlRuns = okbRuns.AsParallel().Select(x => new { AlgorithmId = x.Algorithm.Id, RunId = x.Id, Run = queryClient.ConvertToOptimizationRun(x) }).ToList();
[13752]458          lock (runList) {
[13809]459            var toCache = new List<Tuple<long, long, IRun>>();
[13752]460            foreach (var r in hlRuns) {
461              algorithmId2RunMapping.Add(r.AlgorithmId, r.Run);
462              runList.Add(r.Run);
[13809]463              toCache.Add(Tuple.Create(r.AlgorithmId, r.RunId, r.Run));
[13485]464            }
[13809]465            SaveToCache(toCache);
[13752]466            progress.Status = string.Format("Downloaded runs {0} to {1} of {2}...", p[0], p[0] + batch.Count, count);
467            p[0] += batch.Count;
468            progress.ProgressValue = p[0] / (double)count;
469          }
470        });
[13804]471        progress.Status = "Finishing...";
[13551]472
[13804]473        // remove algorithm instances that do not appear in any downloaded run
474        for (var algIdx = 0; algIdx < algorithmInstances.Count; algIdx++) {
475          var id = algorithmId2AlgorithmInstanceMapping.GetBySecond(algorithmInstances[algIdx]);
476          if (!algorithmId2RunMapping.ContainsFirst(id)) {
477            algorithmId2AlgorithmInstanceMapping.RemoveByFirst(id);
478            algorithmInstances.RemoveAt(algIdx);
479            algIdx--;
480          }
481        }
[13787]482       
483        try {
484          KnowledgeBase.UpdateOfRunsInProgress = true;
485          KnowledgeBase.Clear();
486          KnowledgeBase.AddRange(runList);
487        } finally { KnowledgeBase.UpdateOfRunsInProgress = false; }
488
[13551]489        var algInstRunDict = runList.Where(x => x.Parameters.ContainsKey("Problem Name") && x.Parameters["Problem Name"] is StringValue)
490                                          .GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value)
491                                          .ToDictionary(x => x.Key, x => x.GroupBy(y => ((StringValue)y.Parameters["Algorithm Name"]).Value)
492                                                                                  .ToDictionary(y => y.Key, y => y.ToList()));
493
[13787]494        // set best-known quality to best-found in case it is not known
495        foreach (var kvp in algInstRunDict) {
496          var prob = ProblemInstances.SingleOrDefault(x => ((StringValue)x.Parameters["Problem Name"]).Value == kvp.Key);
497          if (prob == null) continue;
498          var maximization = ((BoolValue)prob.Parameters["Maximization"]).Value;
[13551]499
[13649]500          IItem bkParam;
[13787]501          if (!prob.Parameters.TryGetValue("BestKnownQuality", out bkParam) || !(bkParam is DoubleValue)) {
502            var list = kvp.Value.SelectMany(x => x.Value)
503              .Where(x => x.Results.ContainsKey("QualityPerEvaluations"))
504              .Select(x => ((IndexedDataTable<double>)x.Results["QualityPerEvaluations"]).Rows.First().Values.Last().Item2);
505            if (!list.Any()) continue;
[13649]506            bkParam = new DoubleValue(maximization ? list.Max() : list.Min());
[13787]507            prob.Parameters["BestKnownQuality"] = bkParam;
508          }
509        }
[13551]510
[13787]511        // add algorithm instance ranks as features to the problem instances for a range of targets
[13797]512        foreach (var target in new[] {0, 0.01, 0.05, 0.1, 0.2, 0.5}) {
[13787]513          var cls = GetPerformanceClasses(target, 5);
514          foreach (var kvp in cls) {
515            var prob = kvp.Key;
516            foreach (var kvp2 in kvp.Value) {
[13797]517              var resultName = "Rank." + algorithmId2AlgorithmInstanceMapping.GetByFirst(kvp2.Key) + "@" + (target * 100) + "%";
[13787]518              prob.Results[resultName] = new IntValue(kvp2.Value);
[13485]519            }
520          }
521        }
[13551]522      } finally { progress.Finish(); ProblemInstances.UpdateOfRunsInProgress = false; }
[13787]523      UpdateInstanceProjection(ProblemInstances.ResultNames.Where(x => x.StartsWith("Characteristic.")).ToArray());
[13485]524    }
525
[13809]526    private void DownloadAlgorithmInstance(IProgress progress, Algorithm algInst, int[] p, int total) {
527      IAlgorithm alg = null;
528      var data = Clients.OKB.Administration.AdministrationClient.GetAlgorithmData(algInst.Id);
529      if (data != null) {
530        using (var stream = new MemoryStream(data)) {
531          try {
532            alg = (IAlgorithm)XmlParser.Deserialize<IContent>(stream);
533          } catch { }
534          stream.Close();
535        }
536        if (alg != null) {
537          lock (progress) {
538            algorithmInstances.Add(alg);
539            algorithmId2AlgorithmInstanceMapping.Add(algInst.Id, alg);
540            progress.Status = string.Format("Downloaded algorithm {0} (okb-id: {1})...", algInst.Name, algInst.Id);
541            p[0]++;
542            progress.ProgressValue = p[0] / (double)total;
543          }
544        }
545      }
546    }
547
548    private void DownloadProblemInstance(IProgress progress, Problem pInst, int[] p, int totalProblems, HashSet<string> characteristics) {
549      var charas = new List<string>();
550      IRun probRun = null;
551      var data = Clients.OKB.Administration.AdministrationClient.GetProblemData(pInst.Id);
552      if (data != null) {
553        using (var stream = new MemoryStream(data)) {
554          try {
555            var prob = (IProblem)XmlParser.Deserialize<IContent>(stream);
556            probRun = new Run() {Name = prob.Name};
557            prob.CollectParameterValues(probRun.Parameters);
558            probRun.Parameters["Problem Name"] = new StringValue(prob.Name);
559            probRun.Parameters["Problem Type"] = new StringValue(prob.GetType().Name);
560            foreach (var v in RunCreationClient.Instance.GetCharacteristicValues(pInst.Id)) {
561              probRun.Results.Add("Characteristic." + v.Name, RunCreationClient.Instance.ConvertToItem(v));
562              charas.Add("Characteristic." + v.Name);
563            }
564          } catch { }
565          stream.Close();
566        }
567        if (probRun != null) {
568          lock (progress) {
569            problemId2ProblemInstanceMapping.Add(pInst.Id, probRun);
570            ProblemInstances.Add(probRun);
571            progress.Status = string.Format("Downloaded problem {0} (okb-id: {1})....", pInst.Name, pInst.Id);
572            p[0]++;
573            progress.ProgressValue = p[0] / (double)totalProblems;
574            foreach (var c in charas) characteristics.Add(c);
575          }
576        }
577      }
578    }
579
580    private List<long> LoadRunsFromCache(IEnumerable<long> runIds, List<IRun> runList, IProgress progress) {
581      var hashSet = new HashSet<long>(runIds);
582      var total = hashSet.Count;
583      try {
584        var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "HeuristicLab.OKB", "cache", "runs");
585        Parallel.ForEach(Directory.EnumerateDirectories(path).Select((d, i) => new { Index = i, Directory = d }).GroupBy(x => x.Index / 100), new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount },
586          (folderGroup) => {
587          var localRunList = new List<Tuple<long, long, IRun>>();
588          foreach (var runPath in folderGroup.Select(x => x.Directory)) {
589            long runId;
590            var runFolder = new DirectoryInfo(runPath).Name;
591            if (!long.TryParse(runFolder, out runId) || !hashSet.Contains(runId)) continue;
592            var runFilePath = Directory.EnumerateFiles(runPath).Single();
593            var runFileName = Path.GetFileNameWithoutExtension(runFilePath);
594            long algId;
595            if (!long.TryParse(runFileName, out algId)) continue;
596            IRun run = null;
597            try {
598              using (var file = File.OpenRead(runFilePath))
599                run = XmlParser.Deserialize<IRun>(file);
600            } catch {
601              File.Delete(runFilePath);
602              Directory.Delete(runPath);
603            }
604            if (run != null) localRunList.Add(Tuple.Create(algId, runId, run));
605          }
606          lock (runList) {
607            foreach (var r in localRunList) {
608              hashSet.Remove(r.Item2);
609              algorithmId2RunMapping.Add(r.Item1, r.Item3);
610              runList.Add(r.Item3);
611            }
612            progress.Status = string.Format("Retrieved {0} of {1} from cache", runList.Count, total);
613            progress.ProgressValue = (double)runList.Count / total;
614          }
615        });
616      } catch { }
617      return hashSet.ToList();
618    }
619
620    private void SaveToCache(IEnumerable<Tuple<long, long, IRun>> runs) {
621      try {
622        var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "HeuristicLab.OKB", "cache", "runs");
623        if (!Directory.Exists(path)) Directory.CreateDirectory(path);
624        foreach (var r in runs) {
625          var runPath = Path.Combine(path, r.Item2.ToString());
626          if (!Directory.Exists(runPath)) Directory.CreateDirectory(runPath);
627          using (var file = File.Open(Path.Combine(runPath, r.Item1.ToString()), FileMode.Create, FileAccess.ReadWrite)) {
628            XmlGenerator.Serialize(r.Item3, file);
629          }
630        }
631      } catch { }
632    }
633
[13791]634    public static double[][] GetFeatures(IRun[] problemInstances, string[] characteristics, double[] medianValues = null) {
635      var instances = new double[problemInstances.Length][];
636      for (var p = 0; p < problemInstances.Length; p++) {
637        instances[p] = new double[characteristics.Length];
638        for (var f = 0; f < characteristics.Length; f++) {
639          IItem item;
640          if (problemInstances[p].Results.TryGetValue(characteristics[f], out item)) {
641            double val = 0;
642            var dItem = item as DoubleValue;
643            if (dItem != null) {
644              val = dItem.Value;
645            } else {
646              var iItem = item as IntValue;
647              if (iItem != null) val = iItem.Value;
648              else val = double.NaN;
649            }
650            if (double.IsNaN(val) && medianValues != null)
651              instances[p][f] = medianValues[f];
652            else instances[p][f] = val;
653          } else instances[p][f] = medianValues != null ? medianValues[f] : double.NaN;
654        }
655      }
656      return instances;
657    }
658
659    public static double[] GetMedianValues(IRun[] problemInstances, string[] characteristics) {
660      var values = new List<double>[characteristics.Length];
661      foreach (var problemInstance in problemInstances) {
662        for (var f = 0; f < characteristics.Length; f++) {
663          if (values[f] == null) values[f] = new List<double>(problemInstances.Length);
664          IItem item;
665          if (problemInstance.Results.TryGetValue(characteristics[f], out item)) {
666            var dItem = item as DoubleValue;
667            if (dItem != null) values[f].Add(dItem.Value);
668            else {
669              var iItem = item as IntValue;
670              if (iItem != null) values[f].Add(iItem.Value);
671            }
672          }
673        }
674      }
675      return values.Select(x => x.Count == 0 ? 0.0 : x.Median()).ToArray();
676    }
677
678    public Dictionary<IRun, double[]> GetProblemCharacteristics(string[] characteristics) {
679      var map = ProblemInstances.Select((v, i) => new { Index = i, ProblemInstance = v }).ToDictionary(x => x.Index, x => x.ProblemInstance);
680      var instances = GetFeatures(ProblemInstances.ToArray(), characteristics);
681      var median = GetMedianValues(ProblemInstances.ToArray(), characteristics);
682
683      var allValues = instances.Select(x => x.Select((f, i) => new { Idx = i, Val = double.IsNaN(f) ? median[i] : f }).ToList())
684        .SelectMany(x => x)
685        .GroupBy(x => x.Idx, x => x.Val)
686        .OrderBy(x => x.Key).ToList();
687      var avg = allValues.Select(x => x.Average()).ToList();
688      var stdev = allValues.Select(x => x.StandardDeviation()).ToList();
689
690      // normalize characteristic values by transforming them to their z-score
691      foreach (var features in instances) {
692        for (var i = 0; i < features.Length; i++) {
693          if (double.IsNaN(features[i])) features[i] = median[i];
694          if (stdev[i] > 0) features[i] = (features[i] - avg[i]) / stdev[i];
695        }
696      }
697      return instances.Select((v, i) => new { ProblemInstance = map[i], Features = v }).ToDictionary(x => x.ProblemInstance, x => x.Features);
698    }
699
[13774]700    public Dictionary<IAlgorithm, double> GetAlgorithmPerformance(IRun problemInstance) {
701      if (!problemInstance.Parameters.ContainsKey("BestKnownQuality")) return new Dictionary<IAlgorithm, double>();
[13797]702      var target = GetTarget(((DoubleValue)problemInstance.Parameters["BestKnownQuality"]).Value, MinimumTarget.Value, Maximization);
[13774]703      return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value)
704                          .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
705                          .ToDictionary(x => x.Key, x => ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", target, Maximization).ExpectedRuntime);
706    }
707
[13791]708    public Dictionary<IAlgorithm, List<IRun>> GetAlgorithmRuns(IRun problemInstance) {
709      return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value)
710                          .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
711                          .ToDictionary(x => x.Key, x => x.ToList());
712    }
713
[13774]714    public Dictionary<IAlgorithm, List<IRun>> GetKnowledgeBaseByAlgorithm() {
715      return KnowledgeBase.GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
716                          .ToDictionary(x => x.Key, x => x.ToList());
717    }
718
[13787]719    public IEnumerable<IRegressionProblem> GetRegressionProblemPerAlgorithmInstance(double target, string[] characteristics) {
[13774]720      if (Problem == null) yield break;
[13787]721      var features = GetProblemCharacteristics(characteristics);
[13774]722      // TODO: knowledgebase only stores problem name as a string
723      // this doesn't work if there are two equally named problem instances
724      var problemMap = ProblemInstances.Select(x => new { Key = ((StringValue)x.Parameters["Problem Name"]).Value, Value = x })
725                                       .ToDictionary(x => x.Key, x => x.Value);
[13649]726      foreach (var relevantRuns in knowledgeBase.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single())) {
[13774]727        var problemRuns = relevantRuns.GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value).ToList();
728        var ds = new ModifiableDataset();
729        ds.AddVariable("Problem Name", new List<string>());
[13787]730        foreach (var pc in characteristics)
731          ds.AddVariable(pc, new List<double>());
[13774]732        ds.AddVariable("ERT", new List<double>());
733        foreach (var pr in problemRuns) {
734          var prob = problemMap[pr.Key];
[13787]735          var f = features[prob];
736          var max = ((BoolValue)prob.Parameters["Maximization"]).Value;
[13774]737          var bkq = ((DoubleValue)prob.Parameters["BestKnownQuality"]).Value;
[13797]738          var ert = ExpectedRuntimeHelper.CalculateErt(pr.ToList(), "QualityPerEvaluations", GetTarget(bkq, target, max), max).ExpectedRuntime;
[13803]739          if (double.IsInfinity(ert)) ert = int.MaxValue;
[13787]740          ds.AddRow(new object[] { pr.Key }.Concat(f.Cast<object>()).Concat(new object[] { ert }));
[12860]741        }
[13787]742        var datAnalysisData = new RegressionProblemData(ds, characteristics, "ERT");
[13774]743        var result = new RegressionProblem() {
744          Name = algorithmId2AlgorithmInstanceMapping.GetByFirst(relevantRuns.Key).Name
745        };
746        result.ProblemDataParameter.Value = datAnalysisData;
747        yield return result;
[12860]748      }
[13774]749    }
[12842]750
[13787]751    public IEnumerable<IClassificationProblem> GetClassificationProblemPerAlgorithmInstance(double target, string[] characteristics) {
752      if (Problem == null) yield break;
753
754      var classes = GetPerformanceClasses(target, 5);
755      var features = GetProblemCharacteristics(characteristics);
756
757      foreach (var alg in AlgorithmInstances) {
758        var ds = new ModifiableDataset();
759        ds.AddVariable("Problem Name", new List<string>());
760        foreach (var pc in characteristics)
761          ds.AddVariable(pc, new List<double>());
762        ds.AddVariable("Class", new List<double>());
763
764        foreach (var c in classes) {
765          int cls;
766          if (c.Value.TryGetValue(algorithmId2AlgorithmInstanceMapping.GetBySecond(alg), out cls)) {
767            ds.AddRow(new object[] { ((StringValue)c.Key.Parameters["Problem Name"]).Value }
768              .Concat(features[c.Key].Cast<object>()).Concat(new object[] { cls }));
769          }
770        }
771        var datAnalysisData = new ClassificationProblemData(ds, characteristics, "Class");
772        var result = new ClassificationProblem() {
773          Name = alg.Name
774        };
775        result.ProblemDataParameter.Value = datAnalysisData;
776        yield return result;
777      }
[12842]778    }
779
[13787]780    public Dictionary<IRun, double> GetProblemDistances(string[] characteristics) {
781      var result = new Dictionary<IRun, double>();
[13759]782      var currentInstance = problemId2ProblemInstanceMapping.GetByFirst(Problem.ProblemId);
[13787]783      var features = GetProblemCharacteristics(characteristics);
784      var cF = features[currentInstance];
785      foreach (var b in ProblemInstances) {
786        if (b == currentInstance) continue;
787        var sum = features[b].Select((t, f) => (cF[f] - t) * (cF[f] - t)).Sum();
788        result[b] = Math.Sqrt(sum);
[13757]789      }
[13759]790      return result;
[13757]791    }
792
[13787]793    public Dictionary<IRun, Dictionary<long, int>> GetPerformanceClasses(double target, int nClasses) {
794      var result = new Dictionary<IRun, Dictionary<long, int>>();
795      var problemMap = ProblemInstances.Select(x => new { Key = ((StringValue)x.Parameters["Problem Name"]).Value, Value = x })
796                                       .ToDictionary(x => x.Key, x => x.Value);
797      foreach (var pr in KnowledgeBase.GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value).ToList()) {
798        var bkq = ((DoubleValue)problemMap[pr.Key].Parameters["BestKnownQuality"]).Value;
799        var max = ((BoolValue)problemMap[pr.Key].Parameters["Maximization"]).Value;
800
801        result[problemMap[pr.Key]] = new Dictionary<long, int>();
802
[13794]803        var values = pr.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single())
[13797]804                       .ToDictionary(x => x.Key, x => ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", GetTarget(bkq, target, max), max).ExpectedRuntime);
[13803]805        var ranks = ClusteringHelper<long>.Cluster(nClasses, values, x => double.IsInfinity(x.Value))
[13794]806          .GetByCluster().ToList();
807        foreach (var c in ranks) {
808          foreach (var a in c.Value)
[13797]809            result[problemMap[pr.Key]][a.Key] = c.Key;
[13787]810        }
[13759]811      }
[13787]812      return result;
[13757]813    }
814
[13797]815    public double GetTarget(double bestKnownQuality, double target, bool maximization) {
816      return bestKnownQuality * (maximization ? (1 - target) : (1 + target));
[13787]817    }
818
[13718]819    public event EventHandler<EventArgs<IProgress>> DownloadStarted;
820    private void OnDownloadStarted(IProgress progress) {
821      var handler = DownloadStarted;
822      if (handler != null) handler(this, new EventArgs<IProgress>(progress));
823    }
[13722]824
825    public event EventHandler<EventArgs<IAlgorithm>> AlgorithmInstanceStarted;
826    private void OnAlgorithmInstanceStarted(IAlgorithm instance) {
827      var handler = AlgorithmInstanceStarted;
828      if (handler != null) handler(this, new EventArgs<IAlgorithm>(instance));
829    }
[13757]830
[13787]831    public event EventHandler RecommendationModelChanged;
832    private void OnRecommenderModelChanged() {
833      var handler = RecommendationModelChanged;
834      if (handler != null) handler(this, EventArgs.Empty);
835    }
836
[13794]837    public IEnumerable<KeyValuePair<IAlgorithm, double>> GetAlgorithmInstanceRanking() {
[13791]838      return RecommendationModel.GetRanking(ProblemInstances.Single(IsCurrentInstance));
[13787]839    }
[12842]840  }
841}
Note: See TracBrowser for help on using the repository browser.