Free cookie consent management tool by TermsFeed Policy Generator

source: branches/PerformanceComparison/HeuristicLab.OptimizationExpertSystem.Common/3.3/KnowledgeCenter.cs @ 13774

Last change on this file since 13774 was 13774, checked in by abeham, 8 years ago

#2457: worked on recommendation algorithms

File size: 38.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using HeuristicLab.Analysis;
23using HeuristicLab.Analysis.SelfOrganizingMaps;
24using HeuristicLab.Collections;
25using HeuristicLab.Common;
26using HeuristicLab.Common.Resources;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.MainForm;
30using HeuristicLab.Optimization;
31using HeuristicLab.Persistence.Default.Xml;
32using HeuristicLab.Problems.DataAnalysis;
33using HeuristicLab.Random;
34using System;
35using System.Collections.Generic;
36using System.Drawing;
37using System.IO;
38using System.Linq;
39using System.Threading;
40using System.Threading.Tasks;
41using RunCreationClient = HeuristicLab.Clients.OKB.RunCreation.RunCreationClient;
42using SingleObjectiveOKBProblem = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBProblem;
43using SingleObjectiveOKBSolution = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBSolution;
44
45namespace HeuristicLab.OptimizationExpertSystem.Common {
46  [Item("Knowledge Center", "Currently in experimental phase, an expert system that makes algorithm suggestions based on fitness landscape analysis features and an optimization knowledge base.")]
47  [Creatable(CreatableAttribute.Categories.TestingAndAnalysis, Priority = 119)]
48  public sealed class KnowledgeCenter : IContent {
49    private bool SuppressEvents { get; set; }
50
51    public string Filename { get; set; }
52
53    public static Image StaticItemImage {
54      get { return VSImageLibrary.Library; }
55    }
56
57    private readonly IntValue maximumEvaluations;
58    public IntValue MaximumEvaluations {
59      get { return maximumEvaluations; }
60    }
61
62    private readonly DoubleValue minimumTarget;
63    public DoubleValue MinimumTarget {
64      get { return minimumTarget; }
65    }
66   
67    private readonly RunCollection instanceRuns;
68    public RunCollection InstanceRuns {
69      get { return instanceRuns; }
70    }
71
72    private readonly RunCollection seededRuns;
73    public RunCollection SeededRuns {
74      get { return seededRuns; }
75    }
76
77    private readonly RunCollection knowledgeBase;
78    public RunCollection KnowledgeBase {
79      get { return knowledgeBase; }
80    }
81
82    private readonly SingleObjectiveOKBProblem problem;
83    public SingleObjectiveOKBProblem Problem {
84      get { return problem; }
85    }
86
87    private readonly ItemList<IAlgorithm> algorithmInstances;
88    private readonly ReadOnlyItemList<IAlgorithm> readonlyAlgorithmInstances;
89    public ReadOnlyItemList<IAlgorithm> AlgorithmInstances {
90      get { return readonlyAlgorithmInstances; }
91    }
92
93    private readonly RunCollection problemInstances;
94    public RunCollection ProblemInstances {
95      get { return problemInstances; }
96    }
97
98    private readonly CheckedItemList<StringValue> problemCharacteristics;
99    private readonly ReadOnlyCheckedItemList<StringValue> readonlyProblemCharacteristics;
100    public ReadOnlyCheckedItemList<StringValue> ProblemCharacteristics {
101      get { return readonlyProblemCharacteristics; }
102    }
103
104    private IAlgorithmInstanceRecommender algorithmInstanceRecommender;
105    public IAlgorithmInstanceRecommender AlgorithmInstanceRecommender {
106      get { return algorithmInstanceRecommender; }
107      set { algorithmInstanceRecommender = value; }
108    }
109
110    private readonly CheckedItemList<IScope> solutionSeedingPool;
111    public CheckedItemList<IScope> SolutionSeedingPool {
112      get { return solutionSeedingPool; }
113    }
114
115    private readonly EnumValue<SeedingStrategyTypes> seedingStrategy;
116    public EnumValue<SeedingStrategyTypes> SeedingStrategy {
117      get { return seedingStrategy; }
118    }
119   
120    private BidirectionalLookup<long, IRun> algorithmId2RunMapping;
121    private BidirectionalDictionary<long, IAlgorithm> algorithmId2AlgorithmInstanceMapping;
122    private BidirectionalDictionary<long, IRun> problemId2ProblemInstanceMapping;
123   
124    public bool Maximization {
125      get { return Problem != null && Problem.ProblemId >= 0 && ((IValueParameter<BoolValue>)Problem.MaximizationParameter).Value.Value; }
126    }
127
128    public KnowledgeCenter() {
129      maximumEvaluations = new IntValue(10000);
130      minimumTarget = new DoubleValue(0.05);
131      instanceRuns = new RunCollection();
132      seededRuns = new RunCollection();
133      knowledgeBase = new RunCollection();
134      algorithmInstances = new ItemList<IAlgorithm>();
135      readonlyAlgorithmInstances = algorithmInstances.AsReadOnly();
136      problemInstances = new RunCollection();
137      problemCharacteristics = new CheckedItemList<StringValue>();
138      readonlyProblemCharacteristics = problemCharacteristics.AsReadOnly();
139      algorithmInstanceRecommender = new OverallBestRecommender(this);
140      problem = new SingleObjectiveOKBProblem();
141      algorithmId2RunMapping = new BidirectionalLookup<long, IRun>();
142      algorithmId2AlgorithmInstanceMapping = new BidirectionalDictionary<long, IAlgorithm>();
143      problemId2ProblemInstanceMapping = new BidirectionalDictionary<long, IRun>();
144      solutionSeedingPool = new CheckedItemList<IScope>();
145      seedingStrategy = new EnumValue<SeedingStrategyTypes>(SeedingStrategyTypes.NoSeeding);
146      RegisterEventHandlers();
147    }
148
149    private void ProblemOnProblemChanged(object sender, EventArgs eventArgs) {
150      // TODO: Potentially, knowledge base has to be re-downloaded
151    }
152
153    private void RegisterEventHandlers() {
154      maximumEvaluations.ValueChanged += MaximumEvaluationsOnValueChanged;
155      minimumTarget.ValueChanged += MinimumTargetOnValueChanged;
156      problem.ProblemChanged += ProblemOnProblemChanged;
157      problem.Solutions.ItemsAdded += ProblemSolutionsChanged;
158      problem.Solutions.ItemsReplaced += ProblemSolutionsChanged;
159      problem.Solutions.ItemsRemoved += ProblemSolutionsChanged;
160      problem.Solutions.CollectionReset += ProblemSolutionsChanged;
161      instanceRuns.CollectionReset += InformationChanged;
162      instanceRuns.ItemsAdded += InformationChanged;
163      instanceRuns.ItemsRemoved += InformationChanged;
164      instanceRuns.Reset += InformationChanged;
165      instanceRuns.UpdateOfRunsInProgressChanged += InformationChanged;
166      knowledgeBase.CollectionReset += InformationChanged;
167      knowledgeBase.ItemsAdded += InformationChanged;
168      knowledgeBase.ItemsRemoved += InformationChanged;
169      problemCharacteristics.ItemsAdded += CharacteristicChanged;
170      problemCharacteristics.ItemsReplaced += CharacteristicChanged;
171      problemCharacteristics.ItemsRemoved += CharacteristicChanged;
172      problemCharacteristics.CollectionReset += CharacteristicChanged;
173      problemCharacteristics.CheckedItemsChanged += CharacteristicChanged;
174    }
175
176    private void MaximumEvaluationsOnValueChanged(object sender, EventArgs eventArgs) {
177      UpdateSuggestions();
178    }
179
180    private void MinimumTargetOnValueChanged(object sender, EventArgs e) {
181      UpdateSuggestions();
182    }
183
184    private void ProblemSolutionsChanged(object sender, EventArgs e) {
185      foreach (var sol in Problem.Solutions.Select(x => x.Solution).OfType<IScope>()) {
186        if (!SolutionSeedingPool.Contains(sol))
187          SolutionSeedingPool.Add(sol, false);
188      }
189    }
190
191    private void InformationChanged(object sender, EventArgs e) {
192      var runCollection = sender as RunCollection;
193      if (runCollection != null && runCollection.UpdateOfRunsInProgress) return;
194      UpdateSuggestions();
195    }
196
197    private void CharacteristicChanged(object sender, EventArgs e) {
198      if (SuppressEvents) return;
199      UpdateInstanceProjection();
200    }
201   
202    public bool IsCurrentInstance(IRun run) {
203      if (!problemId2ProblemInstanceMapping.ContainsSecond(run)) return false;
204      return problemId2ProblemInstanceMapping.GetBySecond(run) == Problem.ProblemId;
205    }
206
207    public void UpdateInstanceProjection() {
208      if (ProblemCharacteristics.Count == 0) return;
209
210      var instances = GetProblemCharacteristics();
211
212      var key2Idx = new BidirectionalDictionary<IRun, int>();
213      foreach (var kvp in instances.Select((k, i) => new { Index = i, Key = k.Key }))
214        key2Idx.Add(kvp.Key, kvp.Index);
215
216      #region MDS
217      Func<double[], double[], double> euclid = (a, b) => Math.Sqrt(a.Zip(b, (x, y) => (x - y)).Sum(x => x * x));
218      var num = instances.Count;
219      var matrix = new DoubleMatrix(num, num);
220      for (var i = 0; i < num - 1; i++) {
221        for (var j = i + 1; j < num; j++) {
222          matrix[i, j] = matrix[j, i] = euclid(instances[key2Idx.GetBySecond(i)], instances[key2Idx.GetBySecond(j)]);
223        }
224      }
225
226      var coords = MultidimensionalScaling.KruskalShepard(matrix);
227      #endregion
228      #region PCA
229      var ds = new double[instances.Count, ProblemCharacteristics.CheckedItems.Count()];
230      foreach (var instance in instances) {
231        var arr = instance.Value;
232        for (var feature = 0; feature < arr.Length; feature++)
233          ds[key2Idx.GetByFirst(instance.Key), feature] = arr[feature];
234      }
235
236      int info;
237      double[] s2;
238      double[,] v;
239      alglib.pcabuildbasis(ds, ds.GetLength(0), ds.GetLength(1), out info, out s2, out v);
240      #endregion
241      #region SOM
242      var features = new DoubleMatrix(ProblemCharacteristics.CheckedItems.Count(), instances.Count);
243      foreach (var instance in instances) {
244        var arr = instance.Value;
245        for (var feature = 0; feature < arr.Length; feature++)
246          features[feature, key2Idx.GetByFirst(instance.Key)] = arr[feature];
247      }
248      var somCoords = SOM.Map(features, new MersenneTwister(42), somSize: 20, learningRadius: 20, iterations: 200, jittering: true);
249      #endregion
250
251      ProblemInstances.UpdateOfRunsInProgress = true;
252      try {
253        foreach (var instance in ProblemInstances) {
254          double x = 0, y = 0;
255          for (var feature = 0; feature < ds.GetLength(1); feature++) {
256            x += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 0];
257            y += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 1];
258          }
259
260          IItem item;
261          if (instance.Results.TryGetValue("Projection.PCA.X", out item)) {
262            ((DoubleValue)item).Value = x;
263          } else instance.Results.Add("Projection.PCA.X", new DoubleValue(x));
264          if (instance.Results.TryGetValue("Projection.PCA.Y", out item)) {
265            ((DoubleValue)item).Value = y;
266          } else instance.Results.Add("Projection.PCA.Y", new DoubleValue(y));
267
268          if (instance.Results.TryGetValue("Projection.MDS.X", out item)) {
269            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 0];
270          } else instance.Results.Add("Projection.MDS.X", new DoubleValue(coords[key2Idx.GetByFirst(instance), 0]));
271          if (instance.Results.TryGetValue("Projection.MDS.Y", out item)) {
272            ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 1];
273          } else instance.Results.Add("Projection.MDS.Y", new DoubleValue(coords[key2Idx.GetByFirst(instance), 1]));
274
275          if (instance.Results.TryGetValue("Projection.SOM.X", out item)) {
276            ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 0];
277          } else instance.Results.Add("Projection.SOM.X", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 0]));
278          if (instance.Results.TryGetValue("Projection.SOM.Y", out item)) {
279            ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 1];
280          } else instance.Results.Add("Projection.SOM.Y", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 1]));
281        }
282      } finally { ProblemInstances.UpdateOfRunsInProgress = false; }
283    }
284
285    private Dictionary<IRun, double[]> GetProblemCharacteristics() {
286      var instances = new Dictionary<IRun, double[]>();
287      var values = new List<double>[ProblemCharacteristics.CheckedItems.Count()];
288      foreach (var run in ProblemInstances) {
289        var f = 0;
290        instances[run] = new double[ProblemCharacteristics.CheckedItems.Count()];
291        foreach (var c in ProblemCharacteristics.CheckedItems.Select(x => x.Value.Value)) {
292          if (values[f] == null) values[f] = new List<double>(ProblemInstances.Count);
293          IItem item;
294          if (run.Results.TryGetValue(c, out item)) {
295            var val = (double)((dynamic)item).Value;
296            if (!double.IsNaN(val)) values[f].Add(val);
297            instances[run][f] = val;
298          } else instances[run][f] = double.NaN;
299          f++;
300        }
301      }
302      var median = values.Select(x => x.Count == 0 ? 0.0 : x.Median()).ToList();
303
304      var allValues = instances.Values.Select(x => x.Select((f, i) => new {Idx = i, Val = double.IsNaN(f) ? median[i] : f}).ToList())
305        .SelectMany(x => x)
306        .GroupBy(x => x.Idx, x => x.Val)
307        .OrderBy(x => x.Key).ToList();
308      var avg = allValues.Select(x => x.Average()).ToList();
309      var stdev = allValues.Select(x => x.StandardDeviation()).ToList();
310
311      // normalize characteristic values by transforming them to their z-score
312      foreach (var key in instances.Keys.ToList()) {
313        var arr = instances[key];
314        for (var i = 0; i < arr.Length; i++) {
315          if (double.IsNaN(arr[i])) arr[i] = median[i];
316          if (stdev[i] > 0) arr[i] = (arr[i] - avg[i]) / stdev[i];
317        }
318      }
319      return instances;
320    }
321
322    private static readonly HashSet<string> InterestingValueNames = new HashSet<string>() {
323      "QualityPerEvaluations", "Problem Name", "Problem Type", "Algorithm Name", "Algorithm Type", "Maximization", "BestKnownQuality"
324    };
325
326    public Task<ResultCollection> StartAlgorithmAsync(int index) {
327      return StartAlgorithmAsync(index, CancellationToken.None);
328    }
329
330    public Task<ResultCollection> StartAlgorithmAsync(int index, CancellationToken cancellation) {
331      var selectedInstance = algorithmInstances[index];
332      var algorithmClone = (IAlgorithm)selectedInstance.Clone();
333      var problemClone = Problem.CloneProblem() as ISingleObjectiveHeuristicOptimizationProblem;
334      if (problemClone == null) throw new InvalidOperationException("Problem is not of type " + typeof(ISingleObjectiveHeuristicOptimizationProblem).FullName);
335      // TODO: It is assumed the problem instance by default is configured using no preexisting solution creator
336      var seedingStrategyLocal = SeedingStrategy.Value;
337      if (seedingStrategyLocal != SeedingStrategyTypes.NoSeeding) {
338        if (!SolutionSeedingPool.CheckedItems.Any()) throw new InvalidOperationException("There are no solutions selected for seeding.");
339        // TODO: It would be necessary to specify the solution creator somewhere (property and GUI)
340        var seedingCreator = problemClone.Operators.OfType<IPreexistingSolutionCreator>().FirstOrDefault();
341        if (seedingCreator == null) throw new InvalidOperationException("The problem does not contain a solution creator that allows seeding.");
342        seedingCreator.PreexistingSolutionsParameter.Value.Replace(SolutionSeedingPool.CheckedItems.Select(x => x.Value));
343        seedingCreator.SampleFromPreexistingParameter.Value.Value = seedingStrategyLocal == SeedingStrategyTypes.SeedBySampling;
344        // TODO: WHY!? WHY??!?
345        ((dynamic)problemClone.SolutionCreatorParameter).Value = (dynamic)seedingCreator;
346      }
347      algorithmClone.Problem = problemClone;
348      algorithmClone.Prepare(true);
349      IParameter stopParam;
350      var monitorStop = true;
351      if (algorithmClone.Parameters.TryGetValue("MaximumEvaluations", out stopParam)) {
352        var maxEvalParam = stopParam as IValueParameter<Data.IntValue>;
353        if (maxEvalParam != null) {
354          maxEvalParam.Value.Value = MaximumEvaluations.Value;
355          monitorStop = false;
356        }
357      }
358
359      // TODO: The following can be simplified when we have async implementation patterns for our algorithms:
360      // TODO: The closures can be removed and replaced with private member methods
361      var waitHandle = new AutoResetEvent(false);
362
363      #region EventHandler closures
364      EventHandler exeStateChanged = (sender, e) => {
365        if (algorithmClone.ExecutionState == ExecutionState.Stopped) {
366          lock (Problem.Solutions) {
367            foreach (var solution in algorithmClone.Results.Where(x => x.Name.ToLower().Contains("solution")).Select(x => x.Value).OfType<IScope>()) {
368              Problem.Solutions.Add(new SingleObjectiveOKBSolution(Problem.ProblemId) {
369                Quality = solution.Variables.ContainsKey(Problem.Problem.Evaluator.QualityParameter.ActualName) ? ((DoubleValue)solution.Variables[Problem.Problem.Evaluator.QualityParameter.ActualName].Value).Value : double.NaN,
370                Solution = (IItem)solution.Clone()
371              });
372            }
373          }
374          if (seedingStrategyLocal == SeedingStrategyTypes.NoSeeding) {
375            lock (InstanceRuns) {
376              InstanceRuns.Add(algorithmClone.Runs.Last());
377            }
378          } else {
379            lock (SeededRuns) {
380              SeededRuns.Add(algorithmClone.Runs.Last());
381            }
382          }
383          waitHandle.Set();
384        }
385      };
386
387      EventHandler<EventArgs<Exception>> exceptionOccurred = (sender, e) => {
388        waitHandle.Set();
389      };
390
391      EventHandler timeChanged = (sender, e) => {
392        IResult evalSolResult;
393        if (!algorithmClone.Results.TryGetValue("EvaluatedSolutions", out evalSolResult) || !(evalSolResult.Value is Data.IntValue)) return;
394        var evalSols = ((Data.IntValue)evalSolResult.Value).Value;
395        if (evalSols >= MaximumEvaluations.Value && algorithmClone.ExecutionState == ExecutionState.Started)
396          algorithmClone.Stop();
397      };
398      #endregion
399
400      algorithmClone.ExecutionStateChanged += exeStateChanged;
401      algorithmClone.ExceptionOccurred += exceptionOccurred;
402      if (monitorStop) algorithmClone.ExecutionTimeChanged += timeChanged;
403
404      return Task.Factory.StartNew(() => {
405        algorithmClone.Start();
406        OnAlgorithmInstanceStarted(algorithmClone);
407        var cancelRequested = false;
408        while (!waitHandle.WaitOne(200)) {
409          if (cancellation.IsCancellationRequested) {
410            cancelRequested = true;
411            break;
412          }
413        }
414        if (cancelRequested) {
415          try { algorithmClone.Stop(); } catch { } // ignore race condition if it is stopped in the meantime
416          waitHandle.WaitOne();
417        }
418        waitHandle.Dispose();
419        return algorithmClone.Results;
420      }, TaskCreationOptions.LongRunning);
421    }
422
423    public ResultCollection StartAlgorithm(int index, CancellationToken cancellation) {
424      var task = StartAlgorithmAsync(index, cancellation);
425      task.Wait(cancellation);
426      return task.Result;
427    }
428
429    public Task UpdateKnowledgeBaseAsync(IProgress progress = null) {
430      if (progress == null) progress = new Progress(string.Empty);
431      progress.Start("Updating Knowledge Base from OKB");
432      OnDownloadStarted(progress);
433      return Task.Factory.StartNew(() => { DoUpdateKnowledgeBase(progress); }, TaskCreationOptions.LongRunning);
434    }
435
436    public void UpdateKnowledgeBase(IProgress progress = null) {
437      UpdateKnowledgeBaseAsync(progress).Wait();
438    }
439
440    private void DoUpdateKnowledgeBase(IProgress progress) {
441      var queryClient = Clients.OKB.Query.QueryClient.Instance;
442      var adminClient = Clients.OKB.Administration.AdministrationClient.Instance;
443      try {
444        progress.Status = "Connecting to OKB...";
445        progress.ProgressValue = 0;
446        // FIXME: How to tell if refresh is necessary?
447        var refreshTasks = new[] {
448          Task.Factory.StartNew(() => queryClient.Refresh()),
449          Task.Factory.StartNew(() => adminClient.Refresh())
450        };
451        Task.WaitAll(refreshTasks);
452
453        var probInstance = adminClient.Problems.SingleOrDefault(x => x.Id == Problem.ProblemId);
454        if (probInstance == null) throw new InvalidOperationException("The chosen problem instance cannot be found in the OKB.");
455        var probClassId = probInstance.ProblemClassId;
456
457        var problemClassFilter = (Clients.OKB.Query.StringComparisonAvailableValuesFilter)queryClient.Filters.Single(x => x.Label == "Problem Class Name");
458        problemClassFilter.Value = adminClient.ProblemClasses.Single(x => x.Id == probClassId).Name;
459
460        problemId2ProblemInstanceMapping.Clear();
461        progress.Status = "Downloading problem instances...";
462        progress.ProgressValue = 0;
463        int[] p = { 0 };
464        ProblemInstances.UpdateOfRunsInProgress = true;
465        ProblemInstances.Clear();
466        var characteristics = new HashSet<string>();
467        var totalProblems = adminClient.Problems.Count(x => x.ProblemClassId == probClassId);
468        Parallel.ForEach(adminClient.Problems.Where(x => x.ProblemClassId == probClassId), new ParallelOptions { MaxDegreeOfParallelism = 3 }, (pInst) => {
469          var charas = new List<string>();
470          IRun probRun = null;
471          var data = Clients.OKB.Administration.AdministrationClient.GetProblemData(pInst.Id);
472          if (data != null) {
473            using (var stream = new MemoryStream(data)) {
474              try {
475                var prob = (IProblem)XmlParser.Deserialize<IContent>(stream);
476                probRun = new Run() { Name = prob.Name };
477                prob.CollectParameterValues(probRun.Parameters);
478                probRun.Parameters["Problem Name"] = new StringValue(prob.Name);
479                probRun.Parameters["Problem Type"] = new StringValue(prob.GetType().Name);
480                foreach (var v in RunCreationClient.Instance.GetCharacteristicValues(pInst.Id)) {
481                  probRun.Results.Add("Characteristic." + v.Name, RunCreationClient.Instance.ConvertToItem(v));
482                  charas.Add("Characteristic." + v.Name);
483                }
484              } catch { }
485              stream.Close();
486            }
487            if (probRun != null) {
488              lock (characteristics) {
489                problemId2ProblemInstanceMapping.Add(pInst.Id, probRun);
490                ProblemInstances.Add(probRun);
491                progress.Status = string.Format("Downloaded problem {0} (okb-id: {1})....", pInst.Name, pInst.Id);
492                p[0]++;
493                progress.ProgressValue = p[0] / (double)totalProblems;
494                foreach (var c in charas) characteristics.Add(c);
495              }
496            }
497          }
498        });
499
500        algorithmId2AlgorithmInstanceMapping.Clear();
501        progress.Status = "Downloading algorithm instances...";
502        progress.ProgressValue = 0;
503        p[0] = 0;
504        Parallel.ForEach(adminClient.Algorithms, new ParallelOptions { MaxDegreeOfParallelism = 3 }, (algInst) => {
505          IAlgorithm alg = null;
506          var data = Clients.OKB.Administration.AdministrationClient.GetAlgorithmData(algInst.Id);
507          if (data != null) {
508            using (var stream = new MemoryStream(data)) {
509              try {
510                alg = (IAlgorithm)XmlParser.Deserialize<IContent>(stream);
511              } catch { }
512              stream.Close();
513            }
514            if (alg != null) {
515              lock (algorithmId2AlgorithmInstanceMapping) {
516                algorithmId2AlgorithmInstanceMapping.Add(algInst.Id, alg);
517                progress.Status = string.Format("Downloaded algorithm {0} (okb-id: {1})...", algInst.Name, algInst.Id);
518                p[0]++;
519                progress.ProgressValue = p[0] / (double)adminClient.Algorithms.Count;
520              }
521            }
522          }
523        });
524
525        var interestingValues = queryClient.ValueNames.Where(x => InterestingValueNames.Contains(x.Name)).ToList();
526
527        progress.Status = "Downloading runs...";
528        progress.ProgressValue = 0;
529        p[0] = 0;
530        var count = queryClient.GetNumberOfRuns(problemClassFilter);
531        if (count == 0) return;
532       
533        var runList = new List<IRun>();
534        var runIds = queryClient.GetRunIds(problemClassFilter).ToList();
535        var batches = runIds.Select((v, i) => new { Idx = i, Val = v }).GroupBy(x => x.Idx / 500, x => x.Val);
536        Parallel.ForEach(batches.Select(x => x.ToList()), new ParallelOptions { MaxDegreeOfParallelism = 3 }, (batch) => {
537          var okbRuns = queryClient.GetRunsWithValues(batch, true, interestingValues);
538          var hlRuns = okbRuns.AsParallel().Select(x => new { AlgorithmId = x.Algorithm.Id, Run = queryClient.ConvertToOptimizationRun(x) }).ToList();
539          lock (runList) {
540            foreach (var r in hlRuns) {
541              algorithmId2RunMapping.Add(r.AlgorithmId, r.Run);
542              runList.Add(r.Run);
543            }
544            progress.Status = string.Format("Downloaded runs {0} to {1} of {2}...", p[0], p[0] + batch.Count, count);
545            p[0] += batch.Count;
546            progress.ProgressValue = p[0] / (double)count;
547          }
548        });
549
550        progress.Status = "Finishing...";
551        var algInstRunDict = runList.Where(x => x.Parameters.ContainsKey("Problem Name") && x.Parameters["Problem Name"] is StringValue)
552                                          .GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value)
553                                          .ToDictionary(x => x.Key, x => x.GroupBy(y => ((StringValue)y.Parameters["Algorithm Name"]).Value)
554                                                                                  .ToDictionary(y => y.Key, y => y.ToList()));
555
556        foreach (var instance in ProblemInstances) {
557          IItem probNameParam;
558          if (!instance.Parameters.TryGetValue("Problem Name", out probNameParam)) continue;
559
560          var probInstanceName = ((StringValue)probNameParam).Value;
561          var maximization = ((BoolValue)instance.Parameters["Maximization"]).Value;
562         
563          IItem bkParam;
564          if (!instance.Parameters.TryGetValue("BestKnownQuality", out bkParam) || !(bkParam is DoubleValue)) {
565            Dictionary<string, List<IRun>> algRuns;
566            if (!algInstRunDict.TryGetValue(probInstanceName, out algRuns)) continue;
567            var list = algRuns.SelectMany(x => x.Value)
568                              .Where(x => x.Results.ContainsKey("QualityPerEvaluations"))
569                              .Select(x => ((IndexedDataTable<double>)x.Results["QualityPerEvaluations"]).Rows.First().Values.Last().Item2);
570            bkParam = new DoubleValue(maximization ? list.Max() : list.Min());
571            instance.Parameters["BestKnownQuality"] = bkParam;
572          } else bkParam = instance.Parameters["BestKnownQuality"];
573
574          var bkQuality = ((DoubleValue)bkParam).Value;
575
576          if (!algInstRunDict.ContainsKey(probInstanceName)) continue;
577          // TODO: Things needs to be configurable here (table name, targets)
578          foreach (var target in new[] { 1, 1.01, 1.05, 1.1, 1.2, 1.5 }) {
579            var indexMap = new BidirectionalDictionary<string, int>();
580            var dict = new Dictionary<string, double>();
581            var idx = 0;
582            foreach (var kvp in algInstRunDict[probInstanceName]) {
583              var result = ExpectedRuntimeHelper.CalculateErt(kvp.Value, "QualityPerEvaluations", bkQuality * target, maximization);
584              indexMap.Add(kvp.Key, idx);
585              dict[kvp.Key] = !double.IsNaN(result.ExpectedRuntime) ? result.ExpectedRuntime : int.MaxValue;
586              idx++;
587            }
588            var points = dict.OrderBy(x => indexMap.GetByFirst(x.Key)).Select(x => x.Value > 0 ? Math.Log10(x.Value) : 0).ToArray();
589            int[] clusters;
590            Ckmeans1dClusters(points, 5, out clusters);
591            var ranks = clusters.Select((c, i) => new { Cluster = c, Perf = dict[indexMap.GetBySecond(i)] })
592                                .GroupBy(x => x.Cluster, x => x.Perf)
593                                .Select(x => new { Cluster = x.Key, AvgPerf = x.Average() })
594                                .OrderBy(x => x.AvgPerf)
595                                .Select((c, i) => new { Cluster = c.Cluster, Rank = i })
596                                .ToDictionary(x => x.Cluster, x => x.Rank);
597            for (var i = 0; i < clusters.Length; i++) {
598              var resultName = "Rank." + indexMap.GetBySecond(i) + "@" + ((target - 1) * 100) + "%";
599              instance.Results[resultName] = new IntValue(dict[indexMap.GetBySecond(i)] < int.MaxValue ? ranks[clusters[i]] : 6);
600            }
601          }
602        }
603        try {
604          SuppressEvents = true;
605          problemCharacteristics.Replace(characteristics.Select(x => new StringValue(x)));
606          foreach (var pc in problemCharacteristics.Where(x => !x.Value.StartsWith("Characteristic.")))
607            problemCharacteristics.SetItemCheckedState(pc, false);
608        } finally { SuppressEvents = false; }
609        try {
610          KnowledgeBase.UpdateOfRunsInProgress = true;
611          KnowledgeBase.Clear();
612          KnowledgeBase.AddRange(runList);
613        } finally { KnowledgeBase.UpdateOfRunsInProgress = false; }
614      } finally { progress.Finish(); ProblemInstances.UpdateOfRunsInProgress = false; }
615      UpdateInstanceProjection();
616      UpdateSuggestions();
617    }
618
619    public void UpdateSuggestions() {
620      // TODO: Maintain a separate list of suggested instances
621      // TODO: expose expected expected run time
622      algorithmInstances.Replace(AlgorithmInstanceRecommender.GetRanking());
623    }
624
625    public Dictionary<IAlgorithm, double> GetAlgorithmPerformance(IRun problemInstance) {
626      if (!problemInstance.Parameters.ContainsKey("BestKnownQuality")) return new Dictionary<IAlgorithm, double>();
627      var target = GetTarget(((DoubleValue)problemInstance.Parameters["BestKnownQuality"]).Value);
628      return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value)
629                          .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
630                          .ToDictionary(x => x.Key, x => ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", target, Maximization).ExpectedRuntime);
631    }
632
633    public Dictionary<IAlgorithm, List<IRun>> GetKnowledgeBaseByAlgorithm() {
634      return KnowledgeBase.GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single()))
635                          .ToDictionary(x => x.Key, x => x.ToList());
636    }
637
638    public IEnumerable<IRegressionProblem> GetDataAnalysisProblem(double target) {
639      if (Problem == null) yield break;
640      var characteristics = GetProblemCharacteristics();
641      // TODO: knowledgebase only stores problem name as a string
642      // this doesn't work if there are two equally named problem instances
643      var problemMap = ProblemInstances.Select(x => new { Key = ((StringValue)x.Parameters["Problem Name"]).Value, Value = x })
644                                       .ToDictionary(x => x.Key, x => x.Value);
645      foreach (var relevantRuns in knowledgeBase.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single())) {
646        var problemRuns = relevantRuns.GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value).ToList();
647        var ds = new ModifiableDataset();
648        ds.AddVariable("Problem Name", new List<string>());
649        foreach (var pc in ProblemCharacteristics.CheckedItems)
650          ds.AddVariable(pc.Value.Value, new List<double>());
651        ds.AddVariable("ERT", new List<double>());
652        var max = Maximization;
653        foreach (var pr in problemRuns) {
654          var prob = problemMap[pr.Key];
655          var features = characteristics[prob];
656          var bkq = ((DoubleValue)prob.Parameters["BestKnownQuality"]).Value;
657          var ert = ExpectedRuntimeHelper.CalculateErt(pr.ToList(), "QualityPerEvaluations", GetTarget(bkq), max).ExpectedRuntime;
658          if (double.IsNaN(ert)) ert = int.MaxValue;
659          ds.AddRow(new object[] { pr.Key }.Concat(features.Cast<object>()).Concat(new object[] { ert }));
660        }
661        var datAnalysisData = new RegressionProblemData(ds, ProblemCharacteristics.CheckedItems.Select(x => x.Value.Value), "ERT");
662        var result = new RegressionProblem() {
663          Name = algorithmId2AlgorithmInstanceMapping.GetByFirst(relevantRuns.Key).Name
664        };
665        result.ProblemDataParameter.Value = datAnalysisData;
666        yield return result;
667      }
668    }
669
670    private double GetTarget(double bestKnownQuality) {
671      return bestKnownQuality * (Maximization ? (1 - MinimumTarget.Value) : (1 + MinimumTarget.Value));
672    }
673
674    public Dictionary<IRun, double> GetProblemDistances(ProblemInstanceProximityType proximityType) {
675      var result = new Dictionary<IRun, double>();
676      var currentInstance = problemId2ProblemInstanceMapping.GetByFirst(Problem.ProblemId);
677      switch (proximityType) {
678        case ProblemInstanceProximityType.MDS:
679        case ProblemInstanceProximityType.PCA:
680        case ProblemInstanceProximityType.SOM:
681          double xa, ya;
682          GetProjectionCoordinates(currentInstance, proximityType, out xa, out ya);
683          foreach (var b in ProblemInstances) {
684            if (b == currentInstance) continue;
685            double xb, yb;
686            GetProjectionCoordinates(b, proximityType, out xb, out yb);
687            var d = Math.Sqrt((xa - xb) * (xa - xb) + (ya - yb) * (ya - yb));
688            result[b] = d;
689          }
690          break;
691        case ProblemInstanceProximityType.FeatureSpace:
692          var features = GetProblemCharacteristics();
693          var cF = features[currentInstance];
694          foreach (var b in ProblemInstances) {
695            if (b == currentInstance) continue;
696            var sum = features[b].Select((t, f) => (cF[f] - t) * (cF[f] - t)).Sum();
697            result[b] = Math.Sqrt(sum);
698          }
699          break;
700        default: throw new InvalidOperationException(string.Format("Unkonwn proximity type {0}", proximityType));
701      }
702      return result;
703    }
704
705    private void GetProjectionCoordinates(IRun problemInstance, ProblemInstanceProximityType proximityType, out double x, out double y) {
706      x = ((DoubleValue)problemInstance.Results["Projection." + proximityType + ".X"]).Value;
707      y = ((DoubleValue)problemInstance.Results["Projection." + proximityType + ".Y"]).Value;
708      if (proximityType == ProblemInstanceProximityType.SOM) {
709        x = Math.Floor(x);
710        y = Math.Floor(y);
711      }
712    }
713
714    public event EventHandler<EventArgs<IProgress>> DownloadStarted;
715    private void OnDownloadStarted(IProgress progress) {
716      var handler = DownloadStarted;
717      if (handler != null) handler(this, new EventArgs<IProgress>(progress));
718    }
719
720    public event EventHandler<EventArgs<IAlgorithm>> AlgorithmInstanceStarted;
721    private void OnAlgorithmInstanceStarted(IAlgorithm instance) {
722      var handler = AlgorithmInstanceStarted;
723      if (handler != null) handler(this, new EventArgs<IAlgorithm>(instance));
724    }
725
726    // implement further classes and methods
727    private static SortedList<double, int> Ckmeans1dClusters(double[] estimations, int k, out int[] clusterValues) {
728      int nPoints = estimations.Length;
729      var distinct = estimations.Distinct().OrderBy(x => x).ToArray();
730      var max = distinct.Max();
731      if (distinct.Length <= k) {
732        var dict = distinct.Select((v, i) => new { Index = i, Value = v }).ToDictionary(x => x.Value, y => y.Index);
733        for (int i = distinct.Length; i < k; i++)
734          dict.Add(max + i - distinct.Length + 1, i);
735
736        clusterValues = new int[nPoints];
737        for (int i = 0; i < nPoints; i++)
738          if (!dict.ContainsKey(estimations[i])) clusterValues[i] = 0;
739          else clusterValues[i] = dict[estimations[i]];
740
741        return new SortedList<double, int>(dict);
742      }
743
744      var n = distinct.Length;
745      var D = new double[n, k];
746      var B = new int[n, k];
747
748      for (int m = 0; m < k; m++) {
749        for (int j = m; j <= n - k + m; j++) {
750          if (m == 0)
751            D[j, m] = SumOfSquaredDistances(distinct, 0, j + 1);
752          else {
753            var minD = double.MaxValue;
754            var minI = 0;
755            for (int i = 1; i <= j; i++) {
756              var d = D[i - 1, m - 1] + SumOfSquaredDistances(distinct, i, j + 1);
757              if (d < minD) {
758                minD = d;
759                minI = i;
760              }
761            }
762            D[j, m] = minD;
763            B[j, m] = minI;
764          }
765        }
766      }
767
768      var centers = new SortedList<double, int>();
769      var upper = B[n - 1, k - 1];
770      var c = Mean(distinct, upper, n);
771      centers.Add(c, k - 1);
772      for (int i = k - 2; i >= 0; i--) {
773        var lower = B[upper - 1, i];
774        var c2 = Mean(distinct, lower, upper);
775        centers.Add(c2, i);
776        upper = lower;
777      }
778
779      clusterValues = new int[nPoints];
780      for (int i = 0; i < estimations.Length; i++) {
781        clusterValues[i] = centers.MinItems(x => Math.Abs(estimations[i] - x.Key)).First().Value;
782      }
783
784      return centers;
785    }
786
787    private static double SumOfSquaredDistances(double[] x, int start, int end) {
788      if (start == end) throw new InvalidOperationException();
789      if (start + 1 == end) return 0.0;
790      double mean = 0.0;
791      for (int i = start; i < end; i++) {
792        mean += x[i];
793      }
794      mean /= (end - start);
795      var sum = 0.0;
796      for (int i = start; i < end; i++) {
797        sum += (x[i] - mean) * (x[i] - mean);
798      }
799      return sum;
800    }
801
802    private static double Mean(double[] x, int start, int end) {
803      if (start == end) throw new InvalidOperationException();
804      double mean = 0.0;
805      for (int i = start; i < end; i++) {
806        mean += x[i];
807      }
808      mean /= (end - start);
809      return mean;
810    }
811  }
812}
Note: See TracBrowser for help on using the repository browser.