Free cookie consent management tool by TermsFeed Policy Generator

source: branches/PerformanceComparison/HeuristicLab.OptimizationExpertSystem.Common/3.3/KnowledgeCenter.cs @ 13757

Last change on this file since 13757 was 13757, checked in by abeham, 8 years ago

#2547:

  • worked on problem instance mapping
  • started working on improved suggestion algorithm
File size: 37.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HeuristicLab.Analysis;
23using HeuristicLab.Analysis.SelfOrganizingMaps;
24using HeuristicLab.Collections;
25using HeuristicLab.Common;
26using HeuristicLab.Common.Resources;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.MainForm;
30using HeuristicLab.Optimization;
31using HeuristicLab.Persistence.Default.Xml;
32using HeuristicLab.Random;
33using System;
34using System.Collections.Generic;
35using System.Drawing;
36using System.IO;
37using System.Linq;
38using System.Threading;
39using System.Threading.Tasks;
40using RunCreationClient = HeuristicLab.Clients.OKB.RunCreation.RunCreationClient;
41using SingleObjectiveOKBProblem = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBProblem;
42using SingleObjectiveOKBSolution = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBSolution;
43
44namespace HeuristicLab.OptimizationExpertSystem.Common {
45  [Item("Knowledge Center", "Currently in experimental phase, an expert system that makes algorithm suggestions based on fitness landscape analysis features and an optimization knowledge base.")]
46  [Creatable(CreatableAttribute.Categories.TestingAndAnalysis, Priority = 119)]
47  public sealed class KnowledgeCenter : IContent {
48    private bool SuppressEvents { get; set; }
49
50    public string Filename { get; set; }
51
52    public static new Image StaticItemImage {
53      get { return VSImageLibrary.Library; }
54    }
55
56    private readonly IntValue maximumEvaluations;
57    public IntValue MaximumEvaluations {
58      get { return maximumEvaluations; }
59    }
60
61    private readonly DoubleValue minimumTarget;
62    public DoubleValue MinimumTarget {
63      get { return minimumTarget; }
64    }
65   
66    private readonly RunCollection instanceRuns;
67    public RunCollection InstanceRuns {
68      get { return instanceRuns; }
69    }
70
71    private readonly RunCollection seededRuns;
72    public RunCollection SeededRuns {
73      get { return seededRuns; }
74    }
75
76    private readonly RunCollection knowledgeBase;
77    public RunCollection KnowledgeBase {
78      get { return knowledgeBase; }
79    }
80
81    private readonly SingleObjectiveOKBProblem problem;
82    public SingleObjectiveOKBProblem Problem {
83      get { return problem; }
84    }
85
86    private readonly ItemList<IAlgorithm> suggestedInstances;
87    private readonly ReadOnlyItemList<IAlgorithm> readOnlySuggestedInstances;
88    public ReadOnlyItemList<IAlgorithm> SuggestedInstances {
89      get { return readOnlySuggestedInstances; }
90    }
91
92    private readonly RunCollection problemInstances;
93    public RunCollection ProblemInstances {
94      get { return problemInstances; }
95    }
96
97    private readonly CheckedItemList<StringValue> problemCharacteristics;
98    private readonly ReadOnlyCheckedItemList<StringValue> readonlyProblemCharacteristics;
99    public ReadOnlyCheckedItemList<StringValue> ProblemCharacteristics {
100      get { return readonlyProblemCharacteristics; }
101    }
102
103    private readonly EnumValue<ProblemInstanceProximityType> problemInstanceProximity;
104    public EnumValue<ProblemInstanceProximityType> ProblemInstanceProximity {
105      get { return problemInstanceProximity; }
106    }
107
108    private readonly DoubleValue problemInstanceNeighborhoodFactor;
109    public DoubleValue ProblemInstanceNeighborhoodFactor {
110      get { return problemInstanceNeighborhoodFactor; }
111    }
112
113    private readonly CheckedItemList<IScope> solutionSeedingPool;
114    public CheckedItemList<IScope> SolutionSeedingPool {
115      get { return solutionSeedingPool; }
116    }
117
118    private readonly EnumValue<SeedingStrategyTypes> seedingStrategy;
119    public EnumValue<SeedingStrategyTypes> SeedingStrategy {
120      get { return seedingStrategy; }
121    }
122   
123    private BidirectionalLookup<long, IRun> algorithmId2RunMapping;
124    private BidirectionalDictionary<long, IAlgorithm> algorithmId2AlgorithmInstanceMapping;
125    private BidirectionalDictionary<long, IRun> problemId2ProblemInstanceMapping;
126   
127    private bool Maximization {
128      get { return Problem != null && Problem.ProblemId >= 0 && ((IValueParameter<BoolValue>)Problem.MaximizationParameter).Value.Value; }
129    }
130
131    public KnowledgeCenter() {
132      maximumEvaluations = new IntValue(10000);
133      minimumTarget = new DoubleValue(1.05);
134      instanceRuns = new RunCollection();
135      seededRuns = new RunCollection();
136      knowledgeBase = new RunCollection();
137      suggestedInstances = new ItemList<IAlgorithm>();
138      readOnlySuggestedInstances = suggestedInstances.AsReadOnly();
139      problemInstances = new RunCollection();
140      problemCharacteristics = new CheckedItemList<StringValue>();
141      problemInstanceProximity = new EnumValue<ProblemInstanceProximityType>(ProblemInstanceProximityType.FeatureSpace);
142      problemInstanceNeighborhoodFactor = new DoubleValue(1);
143      readonlyProblemCharacteristics = problemCharacteristics.AsReadOnly();
144      problem = new SingleObjectiveOKBProblem();
145      algorithmId2RunMapping = new BidirectionalLookup<long, IRun>();
146      algorithmId2AlgorithmInstanceMapping = new BidirectionalDictionary<long, IAlgorithm>();
147      problemId2ProblemInstanceMapping = new BidirectionalDictionary<long, IRun>();
148      solutionSeedingPool = new CheckedItemList<IScope>();
149      seedingStrategy = new EnumValue<SeedingStrategyTypes>(SeedingStrategyTypes.NoSeeding);
150      RegisterEventHandlers();
151    }
152
153    private void ProblemOnProblemChanged(object sender, EventArgs eventArgs) {
154      // TODO: Potentially, knowledge base has to be re-downloaded
155    }
156
157    private void RegisterEventHandlers() {
158      maximumEvaluations.ValueChanged += MaximumEvaluationsOnValueChanged;
159      minimumTarget.ValueChanged += MinimumTargetOnValueChanged;
160      problem.ProblemChanged += ProblemOnProblemChanged;
161      problem.Solutions.ItemsAdded += ProblemSolutionsChanged;
162      problem.Solutions.ItemsReplaced += ProblemSolutionsChanged;
163      problem.Solutions.ItemsRemoved += ProblemSolutionsChanged;
164      problem.Solutions.CollectionReset += ProblemSolutionsChanged;
165      instanceRuns.CollectionReset += InformationChanged;
166      instanceRuns.ItemsAdded += InformationChanged;
167      instanceRuns.ItemsRemoved += InformationChanged;
168      instanceRuns.Reset += InformationChanged;
169      instanceRuns.UpdateOfRunsInProgressChanged += InformationChanged;
170      knowledgeBase.CollectionReset += InformationChanged;
171      knowledgeBase.ItemsAdded += InformationChanged;
172      knowledgeBase.ItemsRemoved += InformationChanged;
173      problemCharacteristics.ItemsAdded += CharacteristicChanged;
174      problemCharacteristics.ItemsReplaced += CharacteristicChanged;
175      problemCharacteristics.ItemsRemoved += CharacteristicChanged;
176      problemCharacteristics.CollectionReset += CharacteristicChanged;
177      problemCharacteristics.CheckedItemsChanged += CharacteristicChanged;
178      problemInstanceProximity.ValueChanged += ProblemInstanceProximityChanged;
179      problemInstanceNeighborhoodFactor.ValueChanged += ProblemInstanceProximityChanged;
180    }
181
182    private void MaximumEvaluationsOnValueChanged(object sender, EventArgs eventArgs) {
183      UpdateSuggestions();
184    }
185
186    private void MinimumTargetOnValueChanged(object sender, EventArgs e) {
187      UpdateSuggestions();
188    }
189
190    private void ProblemSolutionsChanged(object sender, EventArgs e) {
191      foreach (var sol in Problem.Solutions.Select(x => x.Solution).OfType<IScope>()) {
192        if (!SolutionSeedingPool.Contains(sol))
193          SolutionSeedingPool.Add(sol, false);
194      }
195    }
196
197    private void InformationChanged(object sender, EventArgs e) {
198      var runCollection = sender as RunCollection;
199      if (runCollection != null && runCollection.UpdateOfRunsInProgress) return;
200      UpdateSuggestions();
201    }
202
203    private void CharacteristicChanged(object sender, EventArgs e) {
204      if (SuppressEvents) return;
205      UpdateInstanceProjection();
206    }
207
208    private void ProblemInstanceProximityChanged(object sender, EventArgs e) {
209      UpdateSuggestions();
210    }
211
212    public bool IsCurrentInstance(IRun run) {
213      if (!problemId2ProblemInstanceMapping.ContainsSecond(run)) return false;
214      return problemId2ProblemInstanceMapping.GetBySecond(run) == Problem.ProblemId;
215    }
216
217    public void UpdateInstanceProjection() {
218      if (ProblemCharacteristics.Count == 0) return;
219
220      var instances = GetProblemCharacteristics();
221
222      var key2Idx = new BidirectionalDictionary<IRun, int>();
223      foreach (var kvp in instances.Select((k, i) => new { Index = i, Key = k.Key }))
224        key2Idx.Add(kvp.Key, kvp.Index);
225
226      #region MDS
227      Func<double[], double[], double> euclid = (a, b) => Math.Sqrt(a.Zip(b, (x, y) => (x - y)).Sum(x => x * x));
228      var num = instances.Count;
229      var matrix = new DoubleMatrix(num, num);
230      for (var i = 0; i < num - 1; i++) {
231        for (var j = i + 1; j < num; j++) {
232          matrix[i, j] = matrix[j, i] = euclid(instances[key2Idx.GetBySecond(i)], instances[key2Idx.GetBySecond(j)]);
233        }
234      }
235
236      var coords = MultidimensionalScaling.KruskalShepard(matrix);
237      #endregion
238      #region PCA
239      var ds = new double[instances.Count, ProblemCharacteristics.CheckedItems.Count()];
240      foreach (var instance in instances) {
241        var arr = instance.Value;
242        for (var feature = 0; feature < arr.Length; feature++)
243          ds[key2Idx.GetByFirst(instance.Key), feature] = arr[feature];
244      }
245
246      int info;
247      double[] s2;
248      double[,] v;
249      alglib.pcabuildbasis(ds, ds.GetLength(0), ds.GetLength(1), out info, out s2, out v);
250      #endregion
251      #region SOM
252      var features = new DoubleMatrix(ProblemCharacteristics.CheckedItems.Count(), instances.Count);
253      foreach (var instance in instances) {
254        var arr = instance.Value;
255        for (var feature = 0; feature < arr.Length; feature++)
256          features[feature, key2Idx.GetByFirst(instance.Key)] = arr[feature];
257      }
258      var somCoords = SOM.Map(features, new MersenneTwister(42), somSize: 20, learningRadius: 20, iterations: 200, jittering: true);
259      #endregion
260
261      ProblemInstances.UpdateOfRunsInProgress = true;
262      try {
263        foreach (var instance in ProblemInstances) {
264          double x = 0, y = 0;
265          for (var feature = 0; feature < ds.GetLength(1); feature++) {
266            x += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 0];
267            y += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 1];
268          }
269
270          IItem item;
271          if (instance.Results.TryGetValue("Projection.PCA.X", out item)) {
272            ((DoubleValue)item).Value = x;
273          } else instance.Results.Add("Projection.PCA.X", new DoubleValue(x));
274          if (instance.Results.TryGetValue("Projection.PCA.Y", out item)) {
275            ((DoubleValue)item).Value = y;
276          } else instance.Results.Add("Projection.PCA.Y", new DoubleValue(y));
277
278          if (instance.Results.TryGetValue("Projection.MDS.X", out item)) {
279            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 0];
280          } else instance.Results.Add("Projection.MDS.X", new DoubleValue(coords[key2Idx.GetByFirst(instance), 0]));
281          if (instance.Results.TryGetValue("Projection.MDS.Y", out item)) {
282            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 1];
283          } else instance.Results.Add("Projection.MDS.Y", new DoubleValue(coords[key2Idx.GetByFirst(instance), 1]));
284
285          if (instance.Results.TryGetValue("Projection.SOM.X", out item)) {
286            ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 0];
287          } else instance.Results.Add("Projection.SOM.X", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 0]));
288          if (instance.Results.TryGetValue("Projection.SOM.Y", out item)) {
289            ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 1];
290          } else instance.Results.Add("Projection.SOM.Y", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 1]));
291        }
292      } finally { ProblemInstances.UpdateOfRunsInProgress = false; }
293    }
294
295    private Dictionary<IRun, double[]> GetProblemCharacteristics() {
296      var instances = new Dictionary<IRun, double[]>();
297      var values = new List<double>[ProblemCharacteristics.CheckedItems.Count()];
298      foreach (var run in ProblemInstances) {
299        var f = 0;
300        instances[run] = new double[ProblemCharacteristics.CheckedItems.Count()];
301        foreach (var c in ProblemCharacteristics.CheckedItems.Select(x => x.Value.Value)) {
302          if (values[f] == null) values[f] = new List<double>(ProblemInstances.Count);
303          IItem item;
304          if (run.Results.TryGetValue(c, out item)) {
305            var val = (double)((dynamic)item).Value;
306            if (!double.IsNaN(val)) values[f].Add(val);
307            instances[run][f] = val;
308          } else instances[run][f] = double.NaN;
309          f++;
310        }
311      }
312      var median = values.Select(x => x.Count == 0 ? 0.0 : x.Median()).ToList();
313
314      var allValues = instances.Values.Select(x => x.Select((f, i) => new {Idx = i, Val = double.IsNaN(f) ? median[i] : f}).ToList())
315        .SelectMany(x => x)
316        .GroupBy(x => x.Idx, x => x.Val)
317        .OrderBy(x => x.Key).ToList();
318      var avg = allValues.Select(x => x.Average()).ToList();
319      var stdev = allValues.Select(x => x.StandardDeviation()).ToList();
320
321      // normalize characteristic values by transforming them to their z-score
322      foreach (var key in instances.Keys.ToList()) {
323        var arr = instances[key];
324        for (var i = 0; i < arr.Length; i++) {
325          if (double.IsNaN(arr[i])) arr[i] = median[i];
326          if (stdev[i] > 0) arr[i] = (arr[i] - avg[i]) / stdev[i];
327        }
328      }
329      return instances;
330    }
331
332    private static readonly HashSet<string> InterestingValueNames = new HashSet<string>() {
333      "QualityPerEvaluations", "Problem Name", "Problem Type", "Algorithm Name", "Algorithm Type", "Maximization", "BestKnownQuality"
334    };
335
336    public Task<ResultCollection> StartAlgorithmAsync(int index) {
337      return StartAlgorithmAsync(index, CancellationToken.None);
338    }
339
340    public Task<ResultCollection> StartAlgorithmAsync(int index, CancellationToken cancellation) {
341      var selectedInstance = suggestedInstances[index];
342      var algorithmClone = (IAlgorithm)selectedInstance.Clone();
343      var problemClone = Problem.CloneProblem() as ISingleObjectiveHeuristicOptimizationProblem;
344      if (problemClone == null) throw new InvalidOperationException("Problem is not of type " + typeof(ISingleObjectiveHeuristicOptimizationProblem).FullName);
345      // TODO: It is assumed the problem instance by default is configured using no preexisting solution creator
346      var seedingStrategyLocal = SeedingStrategy.Value;
347      if (seedingStrategyLocal != SeedingStrategyTypes.NoSeeding) {
348        if (!SolutionSeedingPool.CheckedItems.Any()) throw new InvalidOperationException("There are no solutions selected for seeding.");
349        // TODO: It would be necessary to specify the solution creator somewhere (property and GUI)
350        var seedingCreator = problemClone.Operators.OfType<IPreexistingSolutionCreator>().FirstOrDefault();
351        if (seedingCreator == null) throw new InvalidOperationException("The problem does not contain a solution creator that allows seeding.");
352        seedingCreator.PreexistingSolutionsParameter.Value.Replace(SolutionSeedingPool.CheckedItems.Select(x => x.Value));
353        seedingCreator.SampleFromPreexistingParameter.Value.Value = seedingStrategyLocal == SeedingStrategyTypes.SeedBySampling;
354        // TODO: WHY!? WHY??!?
355        ((dynamic)problemClone.SolutionCreatorParameter).Value = (dynamic)seedingCreator;
356      }
357      algorithmClone.Problem = problemClone;
358      algorithmClone.Prepare(true);
359      IParameter stopParam;
360      var monitorStop = true;
361      if (algorithmClone.Parameters.TryGetValue("MaximumEvaluations", out stopParam)) {
362        var maxEvalParam = stopParam as IValueParameter<Data.IntValue>;
363        if (maxEvalParam != null) {
364          maxEvalParam.Value.Value = MaximumEvaluations.Value;
365          monitorStop = false;
366        }
367      }
368
369      // TODO: The following can be simplified when we have async implementation patterns for our algorithms:
370      // TODO: The closures can be removed and replaced with private member methods
371      var waitHandle = new AutoResetEvent(false);
372
373      #region EventHandler closures
374      EventHandler exeStateChanged = (sender, e) => {
375        if (algorithmClone.ExecutionState == ExecutionState.Stopped) {
376          lock (Problem.Solutions) {
377            foreach (var solution in algorithmClone.Results.Where(x => x.Name.ToLower().Contains("solution")).Select(x => x.Value).OfType<IScope>()) {
378              Problem.Solutions.Add(new SingleObjectiveOKBSolution(Problem.ProblemId) {
379                Quality = solution.Variables.ContainsKey(Problem.Problem.Evaluator.QualityParameter.ActualName) ? ((DoubleValue)solution.Variables[Problem.Problem.Evaluator.QualityParameter.ActualName].Value).Value : double.NaN,
380                Solution = (IItem)solution.Clone()
381              });
382            }
383          }
384          if (seedingStrategyLocal == SeedingStrategyTypes.NoSeeding) {
385            lock (InstanceRuns) {
386              InstanceRuns.Add(algorithmClone.Runs.Last());
387            }
388          } else {
389            lock (SeededRuns) {
390              SeededRuns.Add(algorithmClone.Runs.Last());
391            }
392          }
393          waitHandle.Set();
394        }
395      };
396
397      EventHandler<EventArgs<Exception>> exceptionOccurred = (sender, e) => {
398        waitHandle.Set();
399      };
400
401      EventHandler timeChanged = (sender, e) => {
402        IResult evalSolResult;
403        if (!algorithmClone.Results.TryGetValue("EvaluatedSolutions", out evalSolResult) || !(evalSolResult.Value is Data.IntValue)) return;
404        var evalSols = ((Data.IntValue)evalSolResult.Value).Value;
405        if (evalSols >= MaximumEvaluations.Value && algorithmClone.ExecutionState == ExecutionState.Started)
406          algorithmClone.Stop();
407      };
408      #endregion
409
410      algorithmClone.ExecutionStateChanged += exeStateChanged;
411      algorithmClone.ExceptionOccurred += exceptionOccurred;
412      if (monitorStop) algorithmClone.ExecutionTimeChanged += timeChanged;
413
414      return Task.Factory.StartNew(() => {
415        algorithmClone.Start();
416        OnAlgorithmInstanceStarted(algorithmClone);
417        var cancelRequested = false;
418        while (!waitHandle.WaitOne(200)) {
419          if (cancellation.IsCancellationRequested) {
420            cancelRequested = true;
421            break;
422          }
423        }
424        if (cancelRequested) {
425          try { algorithmClone.Stop(); } catch { } // ignore race condition if it is stopped in the meantime
426          waitHandle.WaitOne();
427        }
428        waitHandle.Dispose();
429        return algorithmClone.Results;
430      }, TaskCreationOptions.LongRunning);
431    }
432
433    public ResultCollection StartAlgorithm(int index, CancellationToken cancellation) {
434      var task = StartAlgorithmAsync(index, cancellation);
435      task.Wait(cancellation);
436      return task.Result;
437    }
438
439    public Task UpdateKnowledgeBaseAsync(IProgress progress = null) {
440      if (progress == null) progress = new Progress(string.Empty);
441      progress.Start("Updating Knowledge Base from OKB");
442      OnDownloadStarted(progress);
443      return Task.Factory.StartNew(() => { DoUpdateKnowledgeBase(progress); }, TaskCreationOptions.LongRunning);
444    }
445
446    public void UpdateKnowledgeBase(IProgress progress = null) {
447      UpdateKnowledgeBaseAsync(progress).Wait();
448    }
449
450    private void DoUpdateKnowledgeBase(IProgress progress) {
451      var queryClient = Clients.OKB.Query.QueryClient.Instance;
452      var adminClient = Clients.OKB.Administration.AdministrationClient.Instance;
453      try {
454        progress.Status = "Connecting to OKB...";
455        progress.ProgressValue = 0;
456        // FIXME: How to tell if refresh is necessary?
457        queryClient.Refresh();
458        progress.ProgressValue = 0.5;
459        progress.Status = "Downloading algorithm and problem instance information...";
460        // FIXME: How to tell if refresh is necessary?
461        adminClient.Refresh();
462
463        var probInstance = adminClient.Problems.SingleOrDefault(x => x.Id == Problem.ProblemId);
464        if (probInstance == null) throw new InvalidOperationException("The chosen problem instance cannot be found in the OKB.");
465        var probClassId = probInstance.ProblemClassId;
466
467        var problemClassFilter = (Clients.OKB.Query.StringComparisonAvailableValuesFilter)queryClient.Filters.Single(x => x.Label == "Problem Class Name");
468        problemClassFilter.Value = adminClient.ProblemClasses.Single(x => x.Id == probClassId).Name;
469
470        problemId2ProblemInstanceMapping.Clear();
471        progress.Status = "Downloading problem instances...";
472        progress.ProgressValue = 0;
473        int[] p = { 0 };
474        ProblemInstances.UpdateOfRunsInProgress = true;
475        ProblemInstances.Clear();
476        var characteristics = new HashSet<string>();
477        var totalProblems = adminClient.Problems.Count(x => x.ProblemClassId == probClassId);
478        Parallel.ForEach(adminClient.Problems.Where(x => x.ProblemClassId == probClassId), (pInst) => {
479          var charas = new List<string>();
480          IRun probRun = null;
481          var data = Clients.OKB.Administration.AdministrationClient.GetProblemData(pInst.Id);
482          if (data != null) {
483            using (var stream = new MemoryStream(data)) {
484              try {
485                var prob = (IProblem)XmlParser.Deserialize<IContent>(stream);
486                probRun = new Run() { Name = prob.Name };
487                prob.CollectParameterValues(probRun.Parameters);
488                probRun.Parameters["Problem Name"] = new StringValue(prob.Name);
489                probRun.Parameters["Problem Type"] = new StringValue(prob.GetType().Name);
490                foreach (var v in RunCreationClient.Instance.GetCharacteristicValues(pInst.Id)) {
491                  probRun.Results.Add("Characteristic." + v.Name, RunCreationClient.Instance.ConvertToItem(v));
492                  charas.Add("Characteristic." + v.Name);
493                }
494              } catch { }
495              stream.Close();
496            }
497            if (probRun != null) {
498              lock (characteristics) {
499                problemId2ProblemInstanceMapping.Add(pInst.Id, probRun);
500                ProblemInstances.Add(probRun);
501                progress.Status = string.Format("Downloaded problem {0} (okb-id: {1})....", pInst.Name, pInst.Id);
502                p[0]++;
503                progress.ProgressValue = p[0] / (double)totalProblems;
504                foreach (var c in charas) characteristics.Add(c);
505              }
506            }
507          }
508        });
509
510        algorithmId2AlgorithmInstanceMapping.Clear();
511        progress.Status = "Downloading algorithm instances...";
512        progress.ProgressValue = 0;
513        p[0] = 0;
514        Parallel.ForEach(adminClient.Algorithms, (algInst) => {
515          IAlgorithm alg = null;
516          var data = Clients.OKB.Administration.AdministrationClient.GetAlgorithmData(algInst.Id);
517          if (data != null) {
518            using (var stream = new MemoryStream(data)) {
519              try {
520                alg = (IAlgorithm)XmlParser.Deserialize<IContent>(stream);
521              } catch { }
522              stream.Close();
523            }
524            if (alg != null) {
525              lock (algorithmId2AlgorithmInstanceMapping) {
526                algorithmId2AlgorithmInstanceMapping.Add(algInst.Id, alg);
527                progress.Status = string.Format("Downloaded algorithm {0} (okb-id: {1})...", algInst.Name, algInst.Id);
528                p[0]++;
529                progress.ProgressValue = p[0] / (double)adminClient.Algorithms.Count;
530              }
531            }
532          }
533        });
534
535        var interestingValues = queryClient.ValueNames.Where(x => InterestingValueNames.Contains(x.Name)).ToList();
536
537        progress.Status = "Downloading runs...";
538        progress.ProgressValue = 0;
539        p[0] = 0;
540        var count = queryClient.GetNumberOfRuns(problemClassFilter);
541        if (count == 0) return;
542       
543        var runList = new List<IRun>();
544        var runIds = queryClient.GetRunIds(problemClassFilter).ToList();
545        var batches = runIds.Select((v, i) => new { Idx = i, Val = v }).GroupBy(x => x.Idx / 500, x => x.Val);
546        Parallel.ForEach(batches.Select(x => x.ToList()), (batch) => {
547          var okbRuns = queryClient.GetRunsWithValues(batch, true, interestingValues);
548          var hlRuns = okbRuns.AsParallel().Select(x => new { AlgorithmId = x.Algorithm.Id, Run = queryClient.ConvertToOptimizationRun(x) }).ToList();
549          lock (runList) {
550            foreach (var r in hlRuns) {
551              algorithmId2RunMapping.Add(r.AlgorithmId, r.Run);
552              runList.Add(r.Run);
553            }
554            progress.Status = string.Format("Downloaded runs {0} to {1} of {2}...", p[0], p[0] + batch.Count, count);
555            p[0] += batch.Count;
556            progress.ProgressValue = p[0] / (double)count;
557          }
558        });
559
560        progress.Status = "Finishing...";
561        var algInstRunDict = runList.Where(x => x.Parameters.ContainsKey("Problem Name") && x.Parameters["Problem Name"] is StringValue)
562                                          .GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value)
563                                          .ToDictionary(x => x.Key, x => x.GroupBy(y => ((StringValue)y.Parameters["Algorithm Name"]).Value)
564                                                                                  .ToDictionary(y => y.Key, y => y.ToList()));
565
566        foreach (var instance in ProblemInstances) {
567          IItem probNameParam;
568          if (!instance.Parameters.TryGetValue("Problem Name", out probNameParam)) continue;
569
570          var probInstanceName = ((StringValue)probNameParam).Value;
571          var maximization = ((BoolValue)instance.Parameters["Maximization"]).Value;
572         
573          IItem bkParam;
574          if (!instance.Parameters.TryGetValue("BestKnownQuality", out bkParam) || !(bkParam is DoubleValue)) {
575            Dictionary<string, List<IRun>> algRuns;
576            if (!algInstRunDict.TryGetValue(probInstanceName, out algRuns)) continue;
577            var list = algRuns.SelectMany(x => x.Value)
578                              .Where(x => x.Results.ContainsKey("QualityPerEvaluations"))
579                              .Select(x => ((IndexedDataTable<double>)x.Results["QualityPerEvaluations"]).Rows.First().Values.Last().Item2);
580            bkParam = new DoubleValue(maximization ? list.Max() : list.Min());
581            instance.Parameters["BestKnownQuality"] = bkParam;
582          } else bkParam = instance.Parameters["BestKnownQuality"];
583
584          var bkQuality = ((DoubleValue)bkParam).Value;
585
586          if (!algInstRunDict.ContainsKey(probInstanceName)) continue;
587          // TODO: Things needs to be configurable here (table name, targets)
588          foreach (var target in new[] { 1, 1.01, 1.05, 1.1, 1.2, 1.5 }) {
589            var indexMap = new BidirectionalDictionary<string, int>();
590            var dict = new Dictionary<string, double>();
591            var idx = 0;
592            foreach (var kvp in algInstRunDict[probInstanceName]) {
593              var result = ExpectedRuntimeHelper.CalculateErt(kvp.Value, "QualityPerEvaluations", bkQuality * target, maximization);
594              indexMap.Add(kvp.Key, idx);
595              dict[kvp.Key] = !double.IsNaN(result.ExpectedRuntime) ? result.ExpectedRuntime : int.MaxValue;
596              idx++;
597            }
598            var points = dict.OrderBy(x => indexMap.GetByFirst(x.Key)).Select(x => x.Value > 0 ? Math.Log10(x.Value) : 0).ToArray();
599            int[] clusters;
600            Ckmeans1dClusters(points, 5, out clusters);
601            var ranks = clusters.Select((c, i) => new { Cluster = c, Perf = dict[indexMap.GetBySecond(i)] })
602                                .GroupBy(x => x.Cluster, x => x.Perf)
603                                .Select(x => new { Cluster = x.Key, AvgPerf = x.Average() })
604                                .OrderBy(x => x.AvgPerf)
605                                .Select((c, i) => new { Cluster = c.Cluster, Rank = i })
606                                .ToDictionary(x => x.Cluster, x => x.Rank);
607            for (var i = 0; i < clusters.Length; i++) {
608              var resultName = "Rank." + indexMap.GetBySecond(i) + "@" + ((target - 1) * 100) + "%";
609              instance.Results[resultName] = new IntValue(dict[indexMap.GetBySecond(i)] < int.MaxValue ? ranks[clusters[i]] : 6);
610            }
611          }
612        }
613        try {
614          SuppressEvents = true;
615          problemCharacteristics.Replace(characteristics.Select(x => new StringValue(x)));
616          foreach (var pc in problemCharacteristics.Where(x => !x.Value.StartsWith("Characteristic.")))
617            problemCharacteristics.SetItemCheckedState(pc, false);
618        } finally { SuppressEvents = false; }
619        try {
620          KnowledgeBase.UpdateOfRunsInProgress = true;
621          KnowledgeBase.Clear();
622          KnowledgeBase.AddRange(runList);
623        } finally { KnowledgeBase.UpdateOfRunsInProgress = false; }
624      } finally { progress.Finish(); ProblemInstances.UpdateOfRunsInProgress = false; }
625      UpdateInstanceProjection();
626      UpdateSuggestions();
627    }
628
629    private void UpdateSuggestions() {
630      if (Problem == null) return;
631      var piDistances = GetProblemDistances();
632      var maxDistance = piDistances.Max();
633      // Weighted combination of algorithm performances using distance-based exponentially falling weights
634      // Scale distances by maxDistance
635      // Parameter to influence dampening factor
636      // Algorithm performances are given in expected run time
637      // Using convergence graphs up to maximum evaluations effort
638      // Care has to be taken if not every algorithm has been executed on every problem instance
639      var instances = new SortedList<double, IAlgorithm>();
640      foreach (var relevantRuns in knowledgeBase.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single())) {
641        var algorithm = algorithmId2AlgorithmInstanceMapping.GetByFirst(relevantRuns.Key);
642        var avgQuality = 0.0;
643        var counter = 0;
644        foreach (var problemRuns in relevantRuns.GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value)) {
645          var probInstance = ProblemInstances.SingleOrDefault(x => x.Name == problemRuns.Key);
646          if (probInstance == null) continue;
647          var bkQuality = ((DoubleValue)probInstance.Parameters["BestKnownQuality"]).Value;
648          foreach (var run in problemRuns) {
649            var performanceGraph = ((IndexedDataTable<double>)run.Results["QualityPerEvaluations"]);
650            try {
651              avgQuality += performanceGraph.Rows.First().Values.TakeWhile(x => x.Item1 < MaximumEvaluations.Value).Last().Item2 / bkQuality;
652              counter++;
653            } catch {
654              continue;
655            }
656          }
657        }
658        avgQuality /= counter;
659        instances.Add(avgQuality, algorithm);
660      }
661
662      var instanceLadder = instances.Select(x => (IAlgorithm)x.Value.Clone()).ToList();
663      if (Maximization) instanceLadder.Reverse();
664      suggestedInstances.Replace(instanceLadder);
665    }
666
667    private DoubleMatrix GetProblemDistances() {
668      var matrix = new DoubleMatrix(ProblemInstances.Count, ProblemInstances.Count);
669      switch (ProblemInstanceProximity.Value) {
670        case ProblemInstanceProximityType.MDS:
671        case ProblemInstanceProximityType.PCA:
672        case ProblemInstanceProximityType.SOM:
673          int i = 0, j = 0;
674          foreach (var a in ProblemInstances) {
675            double xa, ya;
676            GetProjectionCoordinates(a, out xa, out ya);
677            j = 0;
678            foreach (var b in ProblemInstances) {
679              double xb, yb;
680              GetProjectionCoordinates(b, out xb, out yb);
681              matrix[i, j] = Math.Sqrt((xa - xb) * (xa - xb) + (ya - yb) * (ya - yb));
682              j++;
683            }
684            i++;
685          }
686          break;
687        case ProblemInstanceProximityType.FeatureSpace:
688          int k = 0, l = 0;
689          var features = GetProblemCharacteristics();
690          foreach (var a in ProblemInstances) {
691            l = 0;
692            var fa = features[a];
693            foreach (var b in ProblemInstances) {
694              var sum = features[b].Select((t, f) => (fa[f] - t) * (fa[f] - t)).Sum();
695              matrix[k, l] = Math.Sqrt(sum);
696              l++;
697            }
698            k++;
699          }
700          break;
701        default: throw new InvalidOperationException(string.Format("Unkonwn proximity type {0}", ProblemInstanceProximity.Value));
702      }
703      return matrix;
704    }
705
706    private void GetProjectionCoordinates(IRun problemInstance, out double x, out double y) {
707      x = ((DoubleValue)problemInstance.Results["Projection." + ProblemInstanceProximity.Value + ".X"]).Value;
708      y = ((DoubleValue)problemInstance.Results["Projection." + ProblemInstanceProximity.Value + ".Y"]).Value;
709    }
710
711    public event EventHandler<EventArgs<IProgress>> DownloadStarted;
712    private void OnDownloadStarted(IProgress progress) {
713      var handler = DownloadStarted;
714      if (handler != null) handler(this, new EventArgs<IProgress>(progress));
715    }
716
717    public event EventHandler<EventArgs<IAlgorithm>> AlgorithmInstanceStarted;
718    private void OnAlgorithmInstanceStarted(IAlgorithm instance) {
719      var handler = AlgorithmInstanceStarted;
720      if (handler != null) handler(this, new EventArgs<IAlgorithm>(instance));
721    }
722
723    // implement further classes and methods
724    private static SortedList<double, int> Ckmeans1dClusters(double[] estimations, int k, out int[] clusterValues) {
725      int nPoints = estimations.Length;
726      var distinct = estimations.Distinct().OrderBy(x => x).ToArray();
727      var max = distinct.Max();
728      if (distinct.Length <= k) {
729        var dict = distinct.Select((v, i) => new { Index = i, Value = v }).ToDictionary(x => x.Value, y => y.Index);
730        for (int i = distinct.Length; i < k; i++)
731          dict.Add(max + i - distinct.Length + 1, i);
732
733        clusterValues = new int[nPoints];
734        for (int i = 0; i < nPoints; i++)
735          if (!dict.ContainsKey(estimations[i])) clusterValues[i] = 0;
736          else clusterValues[i] = dict[estimations[i]];
737
738        return new SortedList<double, int>(dict);
739      }
740
741      var n = distinct.Length;
742      var D = new double[n, k];
743      var B = new int[n, k];
744
745      for (int m = 0; m < k; m++) {
746        for (int j = m; j <= n - k + m; j++) {
747          if (m == 0)
748            D[j, m] = SumOfSquaredDistances(distinct, 0, j + 1);
749          else {
750            var minD = double.MaxValue;
751            var minI = 0;
752            for (int i = 1; i <= j; i++) {
753              var d = D[i - 1, m - 1] + SumOfSquaredDistances(distinct, i, j + 1);
754              if (d < minD) {
755                minD = d;
756                minI = i;
757              }
758            }
759            D[j, m] = minD;
760            B[j, m] = minI;
761          }
762        }
763      }
764
765      var centers = new SortedList<double, int>();
766      var upper = B[n - 1, k - 1];
767      var c = Mean(distinct, upper, n);
768      centers.Add(c, k - 1);
769      for (int i = k - 2; i >= 0; i--) {
770        var lower = B[upper - 1, i];
771        var c2 = Mean(distinct, lower, upper);
772        centers.Add(c2, i);
773        upper = lower;
774      }
775
776      clusterValues = new int[nPoints];
777      for (int i = 0; i < estimations.Length; i++) {
778        clusterValues[i] = centers.MinItems(x => Math.Abs(estimations[i] - x.Key)).First().Value;
779      }
780
781      return centers;
782    }
783
784    private static double SumOfSquaredDistances(double[] x, int start, int end) {
785      if (start == end) throw new InvalidOperationException();
786      if (start + 1 == end) return 0.0;
787      double mean = 0.0;
788      for (int i = start; i < end; i++) {
789        mean += x[i];
790      }
791      mean /= (end - start);
792      var sum = 0.0;
793      for (int i = start; i < end; i++) {
794        sum += (x[i] - mean) * (x[i] - mean);
795      }
796      return sum;
797    }
798
799    private static double Mean(double[] x, int start, int end) {
800      if (start == end) throw new InvalidOperationException();
801      double mean = 0.0;
802      for (int i = start; i < end; i++) {
803        mean += x[i];
804      }
805      mean /= (end - start);
806      return mean;
807    }
808  }
809}
Note: See TracBrowser for help on using the repository browser.