source: branches/PerformanceComparison/HeuristicLab.OptimizationExpertSystem/3.3/ExpertSystem.cs @ 13561

Last change on this file since 13561 was 13561, checked in by abeham, 6 years ago

#2457:

  • added two types of problem instance mappings: PCA and MDS
File size: 22.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.ComponentModel;
25using System.Drawing;
26using System.IO;
27using System.Linq;
28using System.Threading.Tasks;
29using HeuristicLab.Analysis;
30using HeuristicLab.Collections;
31using HeuristicLab.Common;
32using HeuristicLab.Common.Resources;
33using HeuristicLab.Core;
34using HeuristicLab.Data;
35using HeuristicLab.MainForm;
36using HeuristicLab.Optimization;
37using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
38using HeuristicLab.Persistence.Default.Xml;
39using RunCreationClient = HeuristicLab.Clients.OKB.RunCreation.RunCreationClient;
40using SingleObjectiveOKBProblem = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBProblem;
41
42namespace HeuristicLab.OptimizationExpertSystem {
43  [Item("Expert-System", "Currently in experimental phase, an expert system that makes algorithm suggestions based on fitness landscape analysis features and an optimization knowledge base.")]
44  [StorableClass]
45  [Creatable(CreatableAttribute.Categories.TestingAndAnalysis, Priority = 119)]
46  public sealed class ExpertSystem : NamedItem, IStorableContent, INotifyPropertyChanged {
47
48    public string Filename { get; set; }
49
50    public static new Image StaticItemImage {
51      get { return VSImageLibrary.Library; }
52    }
53
54    [Storable]
55    private int maximumEvaluations;
56    public int MaximumEvaluations {
57      get { return maximumEvaluations; }
58      set {
59        if (maximumEvaluations == value) return;
60        maximumEvaluations = value;
61        OnPropertyChanged("MaximumEvaluations");
62        UpdateSuggestions();
63      }
64    }
65
66    [Storable]
67    private RunCollection runs;
68    public RunCollection Runs {
69      get { return runs; }
70    }
71
72    [Storable]
73    private RunCollection knowledgeBase;
74    public RunCollection KnowledgeBase {
75      get { return knowledgeBase; }
76      set {
77        if (knowledgeBase == value) return;
78        knowledgeBase = value;
79        OnPropertyChanged("KnowledgeBase");
80      }
81    }
82
83    [Storable]
84    private SingleObjectiveOKBProblem problem;
85    public SingleObjectiveOKBProblem Problem {
86      get { return problem; }
87      set {
88        if (problem == value) return;
89        problem = value;
90        OnPropertyChanged("Problem");
91        UpdateSuggestions();
92      }
93    }
94
95    [Storable]
96    private ItemList<IAlgorithm> suggestedInstances;
97    private ReadOnlyItemList<IAlgorithm> readOnlySuggestedInstances;
98    public ReadOnlyItemList<IAlgorithm> SuggestedInstances {
99      get { return readOnlySuggestedInstances; }
100    }
101
102    [Storable]
103    private RunCollection problemInstances;
104    public RunCollection ProblemInstances {
105      get { return problemInstances; }
106      set {
107        if (problemInstances == value) return;
108        problemInstances = value;
109        OnPropertyChanged("ProblemInstances");
110      }
111    }
112
113    [Storable]
114    private BidirectionalLookup<long, IRun> algorithmId2RunMapping;
115    [Storable]
116    private BidirectionalDictionary<long, IAlgorithm> algorithmId2AlgorithmInstanceMapping;
117
118    [Storable]
119    private Run currentInstance;
120
121    private bool Maximization {
122      get {
123        return Problem != null && Problem.ProblemId >= 0 && ((IValueParameter<BoolValue>)Problem.MaximizationParameter).Value.Value;
124      }
125    }
126
127    [StorableConstructor]
128    private ExpertSystem(bool deserializing) : base(deserializing) { }
129    private ExpertSystem(ExpertSystem original, Cloner cloner)
130      : base(original, cloner) {
131      runs = cloner.Clone(original.runs);
132      knowledgeBase = cloner.Clone(original.knowledgeBase);
133      suggestedInstances = cloner.Clone(original.suggestedInstances);
134      readOnlySuggestedInstances = suggestedInstances.AsReadOnly();
135      problemInstances = cloner.Clone(original.problemInstances);
136      problem = cloner.Clone(original.problem);
137      algorithmId2RunMapping = new BidirectionalLookup<long, IRun>();
138      foreach (var kvp in original.algorithmId2RunMapping.FirstEnumerable) {
139        algorithmId2RunMapping.AddRangeFirst(kvp.Key, kvp.Select(cloner.Clone));
140      }
141      algorithmId2AlgorithmInstanceMapping = new BidirectionalDictionary<long, IAlgorithm>();
142      foreach (var kvp in original.algorithmId2AlgorithmInstanceMapping) {
143        algorithmId2AlgorithmInstanceMapping.Add(kvp.Key, cloner.Clone(kvp.Value));
144      }
145      currentInstance = cloner.Clone(original.currentInstance);
146      RegisterEventHandlers();
147    }
148    public ExpertSystem() {
149      Name = ItemName;
150      Description = ItemDescription;
151      runs = new RunCollection();
152      knowledgeBase = new RunCollection();
153      suggestedInstances = new ItemList<IAlgorithm>();
154      readOnlySuggestedInstances = suggestedInstances.AsReadOnly();
155      problemInstances = new RunCollection();
156      problem = new SingleObjectiveOKBProblem();
157      algorithmId2RunMapping = new BidirectionalLookup<long, IRun>();
158      algorithmId2AlgorithmInstanceMapping = new BidirectionalDictionary<long, IAlgorithm>();
159      RegisterEventHandlers();
160    }
161
162    private void ProblemOnProblemChanged(object sender, EventArgs eventArgs) {
163      if (Problem == null) return;
164    }
165
166    public override IDeepCloneable Clone(Cloner cloner) {
167      return new ExpertSystem(this, cloner);
168    }
169
170    [StorableHook(HookType.AfterDeserialization)]
171    private void AfterDeserialization() {
172      readOnlySuggestedInstances = suggestedInstances.AsReadOnly();
173      RegisterEventHandlers();
174    }
175
176    private void RegisterEventHandlers() {
177      problem.ProblemChanged += ProblemOnProblemChanged;
178      runs.CollectionReset += InformationChanged;
179      runs.ItemsAdded += InformationChanged;
180      runs.ItemsRemoved += InformationChanged;
181      runs.Reset += InformationChanged;
182      runs.UpdateOfRunsInProgressChanged += InformationChanged;
183      knowledgeBase.CollectionReset += InformationChanged;
184      knowledgeBase.ItemsAdded += InformationChanged;
185      knowledgeBase.ItemsRemoved += InformationChanged;
186    }
187
188    private void InformationChanged(object sender, EventArgs e) {
189      var runCollection = sender as RunCollection;
190      if (runCollection != null && runCollection.UpdateOfRunsInProgress) return;
191      UpdateSuggestions();
192    }
193
194    public void UpdateInstanceProjection() {
195      currentInstance = null;
196      var filter = new HashSet<string>();
197
198      if (Problem.ProblemId != -1) {
199        currentInstance = new Run { Name = Problem.Name };
200        Problem.Problem.CollectParameterValues(currentInstance.Parameters);
201        foreach (var c in RunCreationClient.GetCharacteristicValues(Problem.ProblemId)) {
202          var key = "Characteristic." + c.Name;
203          currentInstance.Results.Add(key, RunCreationClient.Instance.ConvertToItem(c));
204          filter.Add(key);
205        }
206      }
207      // TODO: There is a problem with missing values that has to be solved
208      // The common set of characteristics among all problems may be too small to be useful
209      // It has to be decided somehow which problem instances to include in order to obtain the set of features that is expressive and available
210      var allCharacteristics = ProblemInstances.Select(x => new HashSet<string>(x.Results.Where(y => y.Key.StartsWith("Characteristic.", StringComparison.Ordinal)).Select(y => y.Key))).ToList();
211      var commonCharacteristics = filter.Count > 0 ? filter : allCharacteristics[0];
212      for (var i = 0; i < allCharacteristics.Count; i++)
213        commonCharacteristics.IntersectWith(allCharacteristics[i]);
214
215      if (commonCharacteristics.Count == 0) return;
216
217      var instances = new Dictionary<string, double[]>();
218      foreach (var i in ProblemInstances)
219        instances[i.Name] = i.Results.Where(x => commonCharacteristics.Contains(x.Key)).OrderBy(x => x.Key).Select(x => (double)((dynamic)x.Value).Value).ToArray();
220      if (currentInstance != null)
221        instances[currentInstance.Name] = currentInstance.Results.Where(x => commonCharacteristics.Contains(x.Key)).OrderBy(x => x.Key).Select(x => (double)((dynamic)x.Value).Value).ToArray();
222
223      var allValues = instances.SelectMany(x => x.Value).ToList();
224      var avg = allValues.Average();
225      var stdev = allValues.StandardDeviation();
226
227      // normalize characteristic values by transforming them to their z-score
228      foreach (var key in instances.Keys.ToList()) {
229        var arr = instances[key];
230        for (var i = 0; i < arr.Length; i++) {
231          arr[i] = (arr[i] - avg) / stdev;
232        }
233      }
234
235      var key2Idx = new BidirectionalDictionary<string, int>();
236      foreach (var kvp in instances.Select((k, i) => new { Index = i, Key = k.Key }))
237        key2Idx.Add(kvp.Key, kvp.Index);
238
239      #region MDS
240      Func<double[], double[], double> euclid = (a, b) => Math.Sqrt(a.Zip(b, (x, y) => (x - y)).Sum(x => x * x));
241      var num = instances.Count;
242      var matrix = new DoubleMatrix(num, num);
243      for (var i = 0; i < num - 1; i++) {
244        for (var j = i + 1; j < num; j++) {
245          matrix[i, j] = matrix[j, i] = euclid(instances[key2Idx.GetBySecond(i)], instances[key2Idx.GetBySecond(j)]);
246        }
247      }
248
249      var coords = MultidimensionalScaling.KruskalShepard(matrix);
250      #endregion
251      #region PCA
252      var ds = new double[instances.Count, commonCharacteristics.Count];
253      foreach (var instance in instances) {
254        var arr = instance.Value;
255        for (var feature = 0; feature < arr.Length; feature++)
256          ds[key2Idx.GetByFirst(instance.Key), feature] = arr[feature];
257      }
258
259      int info;
260      double[] s2;
261      double[,] v;
262      alglib.pcabuildbasis(ds, ds.GetLength(0), ds.GetLength(1), out info, out s2, out v);
263      #endregion
264
265      ProblemInstances.UpdateOfRunsInProgress = true;
266      try {
267        foreach (var instance in ProblemInstances) {
268          double x = 0, y = 0;
269          for (var feature = 0; feature < ds.GetLength(1); feature++) {
270            x += ds[key2Idx.GetByFirst(instance.Name), feature] * v[feature, 0];
271            y += ds[key2Idx.GetByFirst(instance.Name), feature] * v[feature, 1];
272          }
273
274          IItem item;
275          if (instance.Results.TryGetValue("Projection.PCA.X", out item)) {
276            ((DoubleValue)item).Value = x;
277          } else instance.Results.Add("Projection.PCA.X", new DoubleValue(x));
278          if (instance.Results.TryGetValue("Projection.PCA.Y", out item)) {
279            ((DoubleValue)item).Value = y;
280          } else instance.Results.Add("Projection.PCA.Y", new DoubleValue(y));
281
282          if (instance.Results.TryGetValue("Projection.MDS.X", out item)) {
283            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance.Name), 0];
284          } else instance.Results.Add("Projection.MDS.X", new DoubleValue(coords[key2Idx.GetByFirst(instance.Name), 0]));
285          if (instance.Results.TryGetValue("Projection.MDS.Y", out item)) {
286            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance.Name), 1];
287          } else instance.Results.Add("Projection.MDS.Y", new DoubleValue(coords[key2Idx.GetByFirst(instance.Name), 1]));
288        }
289      } finally { ProblemInstances.UpdateOfRunsInProgress = false; }
290
291      if (currentInstance != null) {
292        double x = 0, y = 0;
293        for (var feature = 0; feature < ds.GetLength(1); feature++) {
294          x += ds[key2Idx.GetByFirst(currentInstance.Name), feature] * v[feature, 0];
295          y += ds[key2Idx.GetByFirst(currentInstance.Name), feature] * v[feature, 1];
296        }
297
298        IItem item;
299        if (currentInstance.Results.TryGetValue("Projection.PCA.X", out item)) {
300          ((DoubleValue)item).Value = x;
301        } else currentInstance.Results.Add("Projection.PCA.X", new DoubleValue(x));
302        if (currentInstance.Results.TryGetValue("Projection.PCA.Y", out item)) {
303          ((DoubleValue)item).Value = y;
304        } else currentInstance.Results.Add("Projection.PCA.Y", new DoubleValue(y));
305
306        if (currentInstance.Results.TryGetValue("Projection.MDS.X", out item)) {
307          ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(currentInstance.Name), 0];
308        } else currentInstance.Results.Add("Projection.MDS.X", new DoubleValue(coords[key2Idx.GetByFirst(currentInstance.Name), 0]));
309        if (currentInstance.Results.TryGetValue("Projection.MDS.Y", out item)) {
310          ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(currentInstance.Name), 1];
311        } else currentInstance.Results.Add("Projection.MDS.Y", new DoubleValue(coords[key2Idx.GetByFirst(currentInstance.Name), 1]));
312      }
313    }
314
315    public Tuple<double, double> ProjectCurrentInstance(string projection) {
316      if (currentInstance == null) return null;
317      var xKey = "Projection." + projection + ".X";
318      var yKey = "Projection." + projection + ".Y";
319      if (!currentInstance.Results.ContainsKey(xKey) || !currentInstance.Results.ContainsKey(yKey))
320        return null;
321      var x = ((DoubleValue)currentInstance.Results[xKey]).Value;
322      var y = ((DoubleValue)currentInstance.Results[yKey]).Value;
323      return Tuple.Create(x, y);
324    }
325
326    private static readonly HashSet<string> InterestingValueNames = new HashSet<string>() {
327      "QualityPerEvaluations", "Problem Name", "Problem Type", "Algorithm Name", "Algorithm Type", "Maximization", "BestKnownQuality"
328    };
329
330    public async void UpdateKnowledgeBaseAsync(IProgress progress) {
331      progress.Start("Updating Knowledge Base from OKB");
332      await Task.Factory.StartNew(() => { DoUpdateKnowledgeBase(progress); }, TaskCreationOptions.LongRunning);
333    }
334
335    public void UpdateKnowledgeBase() {
336      DoUpdateKnowledgeBase(new Progress(string.Empty, ProgressState.Started));
337    }
338
339    private void DoUpdateKnowledgeBase(IProgress progress) {
340      var queryClient = Clients.OKB.Query.QueryClient.Instance;
341      var adminClient = Clients.OKB.Administration.AdministrationClient.Instance;
342      try {
343        progress.Status = "Downloading run information...";
344        progress.ProgressValue = 0;
345        // FIXME: How to tell if refresh is necessary?
346        queryClient.Refresh();
347        progress.ProgressValue = 0.5;
348        progress.Status = "Downloading algorithm and problem instance information...";
349        // FIXME: How to tell if refresh is necessary?
350        adminClient.Refresh();
351
352        var probInstance = adminClient.Problems.SingleOrDefault(x => x.Id == Problem.ProblemId);
353        if (probInstance == null) throw new InvalidOperationException("The chosen problem instance cannot be found in the OKB.");
354        var probClassId = probInstance.ProblemClassId;
355
356        var problemClassFilter = (Clients.OKB.Query.StringComparisonAvailableValuesFilter)queryClient.Filters.Single(x => x.Label == "Problem Class Name");
357        problemClassFilter.Value = adminClient.ProblemClasses.Single(x => x.Id == probClassId).Name;
358
359        progress.Status = "Downloading problem instances...";
360        progress.ProgressValue = 0;
361        var p = 0;
362        ProblemInstances.UpdateOfRunsInProgress = true;
363        ProblemInstances.Clear();
364        var totalProblems = adminClient.Problems.Count(x => x.ProblemClassId == probClassId);
365        foreach (var problInst in adminClient.Problems.Where(x => x.ProblemClassId == probClassId)) {
366          progress.Status = string.Format("Downloading problem {0} (okb-id: {1})....", problInst.Name, problInst.Id);
367          var data = Clients.OKB.Administration.AdministrationClient.GetProblemData(problInst.Id);
368          if (data != null) {
369            using (var stream = new MemoryStream(data)) {
370              try {
371                var prob = (IProblem)XmlParser.Deserialize<IContent>(stream);
372                var probRun = new Run() { Name = prob.Name };
373                prob.CollectParameterValues(probRun.Parameters);
374                progress.Status += Environment.NewLine + "Downloading characteristics...";
375                foreach (var v in RunCreationClient.GetCharacteristicValues(problInst.Id)) {
376                  probRun.Results.Add("Characteristic." + v.Name, RunCreationClient.Instance.ConvertToItem(v));
377                }
378                ProblemInstances.Add(probRun);
379              } catch { }
380              stream.Close();
381            }
382          }
383          p++;
384          progress.ProgressValue = p / (double)totalProblems;
385        }
386
387        algorithmId2AlgorithmInstanceMapping.Clear();
388        progress.Status = "Downloading algorithm instances...";
389        progress.ProgressValue = 0;
390        p = 0;
391        foreach (var algInst in adminClient.Algorithms) {
392          progress.Status = string.Format("Downloading algorithm {0} (okb-id: {1})...", algInst.Name, algInst.Id);
393          var data = Clients.OKB.Administration.AdministrationClient.GetAlgorithmData(algInst.Id);
394          if (data != null) {
395            using (var stream = new MemoryStream(data)) {
396              try {
397                var alg = (IAlgorithm)XmlParser.Deserialize<IContent>(stream);
398                algorithmId2AlgorithmInstanceMapping.Add(algInst.Id, alg);
399              } catch { }
400              stream.Close();
401            }
402          }
403          p++;
404          progress.ProgressValue = p / (double)adminClient.Algorithms.Count;
405        }
406
407        var interestingValues = queryClient.ValueNames.Where(x => InterestingValueNames.Contains(x.Name)).ToList();
408
409        progress.Status = "Obtaining number of runs...";
410        progress.ProgressValue = 0;
411        p = 0;
412        var count = queryClient.GetNumberOfRuns(problemClassFilter);
413        if (count == 0) return;
414
415        var runIds = queryClient.GetRunIds(problemClassFilter).ToList();
416        var conversions = new List<Task>();
417        var runList = new List<IRun>();
418        while (p < count) {
419          var nextIds = runIds.Skip(p).Take(500).ToList();
420          progress.Status = string.Format("Downloading runs {0} to {1} of {2}...", p, p + nextIds.Count, count);
421          var okbRuns = queryClient.GetRunsWithValues(nextIds, true, interestingValues);
422          conversions.Add(Task.Factory.StartNew(() => {
423            var hlRuns = okbRuns.AsParallel().Select(x => new { AlgorithmId = x.Algorithm.Id, Run = queryClient.ConvertToOptimizationRun(x) }).ToList();
424            lock (runList) {
425              foreach (var r in hlRuns) {
426                algorithmId2RunMapping.Add(r.AlgorithmId, r.Run);
427                runList.Add(r.Run);
428              }
429            }
430          }));
431          p += nextIds.Count;
432          progress.ProgressValue = p / (double)count;
433        }
434        Task.WaitAll(conversions.ToArray());
435
436        progress.Status = "Finishing...";
437        var algInstRunDict = runList.Where(x => x.Parameters.ContainsKey("Problem Name") && x.Parameters["Problem Name"] is StringValue)
438                                          .GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value)
439                                          .ToDictionary(x => x.Key, x => x.GroupBy(y => ((StringValue)y.Parameters["Algorithm Name"]).Value)
440                                                                                  .ToDictionary(y => y.Key, y => y.ToList()));
441
442        foreach (var instance in ProblemInstances) {
443          IItem probNameParam;
444          if (!instance.Parameters.TryGetValue("Problem Name", out probNameParam)) continue;
445
446          var probInstanceName = ((StringValue)probNameParam).Value;
447          var bkQuality = ((DoubleValue)instance.Parameters["BestKnownQuality"]).Value;
448          var maximization = ((BoolValue)instance.Parameters["Maximization"]).Value;
449
450          if (!algInstRunDict.ContainsKey(probInstanceName)) continue;
451          foreach (var kvp in algInstRunDict[probInstanceName]) {
452            var algInstanceName = kvp.Key;
453            // TODO: Things needs to be configurable here (table name, targets)
454            foreach (var target in new[] { 1, 1.01, 1.05, 1.1, 1.2, 1.5 }) {
455              var result = ExpectedRuntimeHelper.CalculateErt(kvp.Value, "QualityPerEvaluations", bkQuality * target, maximization);
456              var resultName = algInstanceName + "@" + ((target - 1) * 100) + "%";
457              IItem item;
458              if (instance.Results.TryGetValue(resultName, out item)) {
459                ((DoubleValue)item).Value = Math.Log10(result.ExpectedRuntime);
460              } else instance.Results.Add(resultName, new DoubleValue(Math.Log10(result.ExpectedRuntime)));
461            }
462          }
463        }
464        KnowledgeBase = new RunCollection(runList);
465      } finally { progress.Finish(); ProblemInstances.UpdateOfRunsInProgress = false; }
466    }
467
468    private void UpdateSuggestions() {
469      if (Problem == null) return;
470      var instances = new SortedList<double, IAlgorithm>();
471      foreach (var relevantRuns in knowledgeBase.GroupBy(x => x.Algorithm)) {
472        var avgQuality = 0.0;
473        var counter = 0;
474        foreach (var run in relevantRuns) {
475          var performanceGraph = ((IndexedDataTable<double>)run.Results["QualityPerEvaluations"]);
476          try {
477            avgQuality += performanceGraph.Rows.First().Values.TakeWhile(x => x.Item1 < MaximumEvaluations).Last().Item2;
478            counter++;
479          } catch { continue; }
480        }
481        avgQuality /= counter;
482        instances.Add(avgQuality, relevantRuns.Key);
483      }
484
485      suggestedInstances.Clear();
486      var instanceLadder = instances.Select(x => (IAlgorithm)x.Value.Clone());
487      suggestedInstances.AddRange(Maximization ? instanceLadder.Reverse() : instanceLadder);
488    }
489
490    public event PropertyChangedEventHandler PropertyChanged;
491    private void OnPropertyChanged(string propertyName) {
492      var handler = PropertyChanged;
493      if (handler != null) handler(this, new PropertyChangedEventArgs(propertyName));
494    }
495  }
496}
Note: See TracBrowser for help on using the repository browser.