#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using HeuristicLab.Analysis; using HeuristicLab.Analysis.SelfOrganizingMaps; using HeuristicLab.Collections; using HeuristicLab.Common; using HeuristicLab.Common.Resources; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.MainForm; using HeuristicLab.Optimization; using HeuristicLab.Persistence.Default.Xml; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Random; using System; using System.Collections.Generic; using System.Drawing; using System.IO; using System.Linq; using System.Threading; using System.Threading.Tasks; using HeuristicLab.Algorithms.DataAnalysis; using Algorithm = HeuristicLab.Clients.OKB.Administration.Algorithm; using Problem = HeuristicLab.Clients.OKB.Administration.Problem; using RunCreationClient = HeuristicLab.Clients.OKB.RunCreation.RunCreationClient; using SingleObjectiveOKBProblem = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBProblem; using SingleObjectiveOKBSolution = HeuristicLab.Clients.OKB.RunCreation.SingleObjectiveOKBSolution; namespace HeuristicLab.OptimizationExpertSystem.Common { [Item("Knowledge Center", "Currently in experimental phase, an expert system that makes algorithm suggestions based on fitness landscape analysis features and an optimization knowledge base.")] [Creatable(CreatableAttribute.Categories.TestingAndAnalysis, Priority = 119)] public sealed class KnowledgeCenter : IContent { private bool SuppressEvents { get; set; } public string Filename { get; set; } public static Image StaticItemImage { get { return VSImageLibrary.Library; } } private readonly IntValue maximumEvaluations; public IntValue MaximumEvaluations { get { return maximumEvaluations; } } private readonly DoubleValue minimumTarget; public DoubleValue MinimumTarget { get { return minimumTarget; } } private readonly RunCollection instanceRuns; public RunCollection InstanceRuns { get { return instanceRuns; } } private readonly RunCollection seededRuns; public RunCollection SeededRuns { get { return seededRuns; } } private readonly RunCollection knowledgeBase; public RunCollection KnowledgeBase { get { return knowledgeBase; } } private readonly SingleObjectiveOKBProblem problem; public SingleObjectiveOKBProblem Problem { get { return problem; } } private readonly ItemList algorithmInstances; private readonly ReadOnlyItemList readonlyAlgorithmInstances; public ReadOnlyItemList AlgorithmInstances { get { return readonlyAlgorithmInstances; } } private readonly RunCollection problemInstances; public RunCollection ProblemInstances { get { return problemInstances; } } private IRecommendationModel recommendationModel; public IRecommendationModel RecommendationModel { get { return recommendationModel; } set { if (recommendationModel == value) return; recommendationModel = value; OnRecommenderModelChanged(); } } private readonly CheckedItemList solutionSeedingPool; public CheckedItemList SolutionSeedingPool { get { return solutionSeedingPool; } } private readonly EnumValue seedingStrategy; public EnumValue SeedingStrategy { get { return seedingStrategy; } } private BidirectionalLookup algorithmId2RunMapping; private BidirectionalDictionary algorithmId2AlgorithmInstanceMapping; private BidirectionalDictionary problemId2ProblemInstanceMapping; public bool Maximization { get { return Problem != null && Problem.ProblemId >= 0 && ((IValueParameter)Problem.MaximizationParameter).Value.Value; } } public KnowledgeCenter() { maximumEvaluations = new IntValue(10000); minimumTarget = new DoubleValue(0.05); instanceRuns = new RunCollection(); seededRuns = new RunCollection(); knowledgeBase = new RunCollection(); algorithmInstances = new ItemList(); readonlyAlgorithmInstances = algorithmInstances.AsReadOnly(); problemInstances = new RunCollection(); recommendationModel = FixedRankModel.GetEmpty(); problem = new SingleObjectiveOKBProblem(); algorithmId2RunMapping = new BidirectionalLookup(); algorithmId2AlgorithmInstanceMapping = new BidirectionalDictionary(); problemId2ProblemInstanceMapping = new BidirectionalDictionary(); solutionSeedingPool = new CheckedItemList(); seedingStrategy = new EnumValue(SeedingStrategyTypes.NoSeeding); RegisterEventHandlers(); } private void ProblemOnProblemChanged(object sender, EventArgs eventArgs) { // TODO: Potentially, knowledge base has to be re-downloaded } private void RegisterEventHandlers() { maximumEvaluations.ValueChanged += MaximumEvaluationsOnValueChanged; minimumTarget.ValueChanged += MinimumTargetOnValueChanged; problem.ProblemChanged += ProblemOnProblemChanged; problem.Solutions.ItemsAdded += ProblemSolutionsChanged; problem.Solutions.ItemsReplaced += ProblemSolutionsChanged; problem.Solutions.ItemsRemoved += ProblemSolutionsChanged; problem.Solutions.CollectionReset += ProblemSolutionsChanged; instanceRuns.CollectionReset += InformationChanged; instanceRuns.ItemsAdded += InformationChanged; instanceRuns.ItemsRemoved += InformationChanged; instanceRuns.Reset += InformationChanged; instanceRuns.UpdateOfRunsInProgressChanged += InformationChanged; knowledgeBase.CollectionReset += InformationChanged; knowledgeBase.ItemsAdded += InformationChanged; knowledgeBase.ItemsRemoved += InformationChanged; } private void MaximumEvaluationsOnValueChanged(object sender, EventArgs eventArgs) { } private void MinimumTargetOnValueChanged(object sender, EventArgs e) { } private void ProblemSolutionsChanged(object sender, EventArgs e) { foreach (var sol in Problem.Solutions.Select(x => x.Solution).OfType()) { if (!SolutionSeedingPool.Contains(sol)) SolutionSeedingPool.Add(sol, false); } } private void InformationChanged(object sender, EventArgs e) { var runCollection = sender as RunCollection; if (runCollection != null && runCollection.UpdateOfRunsInProgress) return; } public bool IsCurrentInstance(IRun run) { if (!problemId2ProblemInstanceMapping.ContainsSecond(run)) return false; return problemId2ProblemInstanceMapping.GetBySecond(run) == Problem.ProblemId; } public void UpdateInstanceProjection(string[] characteristics) { if (characteristics.Length == 0) return; var instances = GetProblemCharacteristics(characteristics); var key2Idx = new BidirectionalDictionary(); foreach (var kvp in instances.Select((k, i) => new { Index = i, Key = k.Key })) key2Idx.Add(kvp.Key, kvp.Index); Func euclid = (a, b) => Math.Sqrt(a.Zip(b, (x, y) => (x - y)).Sum(x => x * x)); Func euclidDArray = (a, b) => Math.Sqrt(a.Zip(b, (x, y) => (x - y)).Sum(x => x * x)); #region MDS var num = instances.Count; var matrix = new DoubleMatrix(num, num); for (var i = 0; i < num - 1; i++) { for (var j = i + 1; j < num; j++) { matrix[i, j] = matrix[j, i] = euclid(instances[key2Idx.GetBySecond(i)], instances[key2Idx.GetBySecond(j)]); } } var coords = MultidimensionalScaling.KruskalShepard(matrix); #endregion #region PCA double[,] v = null; var ds = new double[instances.Count, characteristics.Length]; if (characteristics.Length > 1) { foreach (var instance in instances) { var arr = instance.Value; for (var feature = 0; feature < arr.Length; feature++) ds[key2Idx.GetByFirst(instance.Key), feature] = arr[feature]; } int info; double[] s2; alglib.pcabuildbasis(ds, ds.GetLength(0), ds.GetLength(1), out info, out s2, out v); } #endregion #region SOM var features = new DoubleMatrix(characteristics.Length, instances.Count); foreach (var instance in instances) { var arr = instance.Value; for (var feature = 0; feature < arr.Length; feature++) features[feature, key2Idx.GetByFirst(instance.Key)] = arr[feature]; } var somCoords = SOM.Map(features, new MersenneTwister(42), somSize: 10, learningRadius: 20, iterations: 200, jittering: true); #endregion #region TSNE var tsneFeatures = new DoubleArray[instances.Count]; foreach (var instance in instances) { tsneFeatures[key2Idx.GetByFirst(instance.Key)] = new DoubleArray(instance.Value); } var tsneCoords = TSNEStatic.Run(tsneFeatures, new EuclideanDistance(), new FastRandom(42), newDimensions: 2, perplexity: Math.Min((instances.Count - 1) / 4, 50), theta: 0, stopLyingIter: 0, momSwitchIter: 0, momentum: 0, finalMomentum: 0, eta: 10); #endregion ProblemInstances.UpdateOfRunsInProgress = true; try { foreach (var instance in ProblemInstances) { IItem item; if (v != null) { double x = 0, y = 0; for (var feature = 0; feature < ds.GetLength(1); feature++) { x += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 0]; y += ds[key2Idx.GetByFirst(instance), feature] * v[feature, 1]; } if (instance.Results.TryGetValue("Projection.PCA.X", out item)) { ((DoubleValue)item).Value = x; } else instance.Results.Add("Projection.PCA.X", new DoubleValue(x)); if (instance.Results.TryGetValue("Projection.PCA.Y", out item)) { ((DoubleValue)item).Value = y; } else instance.Results.Add("Projection.PCA.Y", new DoubleValue(y)); } else { instance.Results.Remove("Projection.PCA.X"); instance.Results.Remove("Projection.PCA.Y"); } if (instance.Results.TryGetValue("Projection.MDS.X", out item)) { ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 0]; } else instance.Results.Add("Projection.MDS.X", new DoubleValue(coords[key2Idx.GetByFirst(instance), 0])); if (instance.Results.TryGetValue("Projection.MDS.Y", out item)) { ((DoubleValue)item).Value = coords[key2Idx.GetByFirst(instance), 1]; } else instance.Results.Add("Projection.MDS.Y", new DoubleValue(coords[key2Idx.GetByFirst(instance), 1])); if (instance.Results.TryGetValue("Projection.SOM.X", out item)) { ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 0]; } else instance.Results.Add("Projection.SOM.X", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 0])); if (instance.Results.TryGetValue("Projection.SOM.Y", out item)) { ((DoubleValue)item).Value = somCoords[key2Idx.GetByFirst(instance), 1]; } else instance.Results.Add("Projection.SOM.Y", new DoubleValue(somCoords[key2Idx.GetByFirst(instance), 1])); if (instance.Results.TryGetValue("Projection.TSNE.X", out item)) { ((DoubleValue)item).Value = tsneCoords[key2Idx.GetByFirst(instance), 0]; } else instance.Results.Add("Projection.TSNE.X", new DoubleValue(tsneCoords[key2Idx.GetByFirst(instance), 0])); if (instance.Results.TryGetValue("Projection.TSNE.Y", out item)) { ((DoubleValue)item).Value = tsneCoords[key2Idx.GetByFirst(instance), 1]; } else instance.Results.Add("Projection.TSNE.Y", new DoubleValue(tsneCoords[key2Idx.GetByFirst(instance), 1])); } } finally { ProblemInstances.UpdateOfRunsInProgress = false; } } private static readonly HashSet InterestingValueNames = new HashSet() { "QualityPerEvaluations", "Problem Name", "Problem Type", "Algorithm Name", "Algorithm Type", "Maximization", "BestKnownQuality" }; public Task StartAlgorithmAsync(int index) { return StartAlgorithmAsync(index, CancellationToken.None); } public Task StartAlgorithmAsync(int index, CancellationToken cancellation) { var selectedInstance = algorithmInstances[index]; var algorithmClone = (IAlgorithm)selectedInstance.Clone(); var problemClone = Problem.CloneProblem() as ISingleObjectiveHeuristicOptimizationProblem; if (problemClone == null) throw new InvalidOperationException("Problem is not of type " + typeof(ISingleObjectiveHeuristicOptimizationProblem).FullName); // TODO: It is assumed the problem instance by default is configured using no preexisting solution creator var seedingStrategyLocal = SeedingStrategy.Value; if (seedingStrategyLocal != SeedingStrategyTypes.NoSeeding) { if (!SolutionSeedingPool.CheckedItems.Any()) throw new InvalidOperationException("There are no solutions selected for seeding."); // TODO: It would be necessary to specify the solution creator somewhere (property and GUI) var seedingCreator = problemClone.Operators.OfType().FirstOrDefault(); if (seedingCreator == null) throw new InvalidOperationException("The problem does not contain a solution creator that allows seeding."); seedingCreator.PreexistingSolutionsParameter.Value.Replace(SolutionSeedingPool.CheckedItems.Select(x => x.Value)); seedingCreator.SampleFromPreexistingParameter.Value.Value = seedingStrategyLocal == SeedingStrategyTypes.SeedBySampling; // TODO: WHY!? WHY??!? ((dynamic)problemClone.SolutionCreatorParameter).Value = (dynamic)seedingCreator; } algorithmClone.Problem = problemClone; algorithmClone.Prepare(true); IParameter stopParam; var monitorStop = true; if (algorithmClone.Parameters.TryGetValue("MaximumEvaluations", out stopParam)) { var maxEvalParam = stopParam as IValueParameter; if (maxEvalParam != null) { maxEvalParam.Value.Value = MaximumEvaluations.Value; monitorStop = false; } } // TODO: The following can be simplified when we have async implementation patterns for our algorithms: // TODO: The closures can be removed and replaced with private member methods var waitHandle = new AutoResetEvent(false); #region EventHandler closures EventHandler exeStateChanged = (sender, e) => { if (algorithmClone.ExecutionState == ExecutionState.Stopped) { lock (Problem.Solutions) { foreach (var solution in algorithmClone.Results.Where(x => x.Name.ToLower().Contains("solution")).Select(x => x.Value).OfType()) { Problem.Solutions.Add(new SingleObjectiveOKBSolution(Problem.ProblemId) { Quality = solution.Variables.ContainsKey(Problem.Problem.Evaluator.QualityParameter.ActualName) ? ((DoubleValue)solution.Variables[Problem.Problem.Evaluator.QualityParameter.ActualName].Value).Value : double.NaN, Solution = (IItem)solution.Clone() }); } } if (seedingStrategyLocal == SeedingStrategyTypes.NoSeeding) { lock (InstanceRuns) { InstanceRuns.Add(algorithmClone.Runs.Last()); } } else { lock (SeededRuns) { SeededRuns.Add(algorithmClone.Runs.Last()); } } waitHandle.Set(); } }; EventHandler> exceptionOccurred = (sender, e) => { waitHandle.Set(); }; EventHandler timeChanged = (sender, e) => { IResult evalSolResult; if (!algorithmClone.Results.TryGetValue("EvaluatedSolutions", out evalSolResult) || !(evalSolResult.Value is Data.IntValue)) return; var evalSols = ((Data.IntValue)evalSolResult.Value).Value; if (evalSols >= MaximumEvaluations.Value && algorithmClone.ExecutionState == ExecutionState.Started) algorithmClone.Stop(); }; #endregion algorithmClone.ExecutionStateChanged += exeStateChanged; algorithmClone.ExceptionOccurred += exceptionOccurred; if (monitorStop) algorithmClone.ExecutionTimeChanged += timeChanged; return Task.Factory.StartNew(() => { algorithmClone.Start(); OnAlgorithmInstanceStarted(algorithmClone); var cancelRequested = false; while (!waitHandle.WaitOne(200)) { if (cancellation.IsCancellationRequested) { cancelRequested = true; break; } } if (cancelRequested) { try { algorithmClone.Stop(); } catch { } // ignore race condition if it is stopped in the meantime waitHandle.WaitOne(); } waitHandle.Dispose(); return algorithmClone.Results; }, TaskCreationOptions.LongRunning); } public ResultCollection StartAlgorithm(int index, CancellationToken cancellation) { var task = StartAlgorithmAsync(index, cancellation); task.Wait(cancellation); return task.Result; } public Task UpdateKnowledgeBaseAsync(IProgress progress = null) { if (progress == null) progress = new Progress(string.Empty); progress.Start("Updating Knowledge Base from OKB"); OnDownloadStarted(progress); return Task.Factory.StartNew(() => { DoUpdateKnowledgeBase(progress); }, TaskCreationOptions.LongRunning); } public void UpdateKnowledgeBase(IProgress progress = null) { UpdateKnowledgeBaseAsync(progress).Wait(); } private void DoUpdateKnowledgeBase(IProgress progress) { var queryClient = Clients.OKB.Query.QueryClient.Instance; var adminClient = Clients.OKB.Administration.AdministrationClient.Instance; try { progress.Status = "Connecting to OKB..."; progress.ProgressValue = 0; // FIXME: How to tell if refresh is necessary? var refreshTasks = new[] { Task.Factory.StartNew(() => queryClient.Refresh()), Task.Factory.StartNew(() => adminClient.Refresh()) }; Task.WaitAll(refreshTasks); var probInstance = adminClient.Problems.SingleOrDefault(x => x.Id == Problem.ProblemId); if (probInstance == null) throw new InvalidOperationException("The chosen problem instance cannot be found in the OKB."); var probClassId = probInstance.ProblemClassId; var problemClassFilter = (Clients.OKB.Query.StringComparisonAvailableValuesFilter)queryClient.Filters.Single(x => x.Label == "Problem Class Name"); problemClassFilter.Value = adminClient.ProblemClasses.Single(x => x.Id == probClassId).Name; problemId2ProblemInstanceMapping.Clear(); progress.Status = "Downloading algorithm and problem instances..."; progress.ProgressValue = 0; int[] p = { 0 }; ProblemInstances.UpdateOfRunsInProgress = true; ProblemInstances.Clear(); algorithmId2AlgorithmInstanceMapping.Clear(); algorithmId2RunMapping.Clear(); algorithmInstances.Clear(); var characteristics = new HashSet(); var totalProblems = adminClient.Problems.Count(x => x.ProblemClassId == probClassId); var totalAlgorithms = adminClient.Algorithms.Count; var problems = adminClient.Problems.Where(x => x.ProblemClassId == probClassId); var algorithms = adminClient.Algorithms; var combined = problems.Cast().Concat(algorithms.Cast()).Shuffle(new MersenneTwister()); Parallel.ForEach(combined, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }, (inst) => { var pInst = inst as Clients.OKB.Administration.Problem; if (pInst != null) DownloadProblemInstance(progress, pInst, p, totalProblems + totalAlgorithms, characteristics); else { var aInst = inst as Clients.OKB.Administration.Algorithm; DownloadAlgorithmInstance(progress, aInst, p, totalProblems + totalAlgorithms); } }); var interestingValues = queryClient.ValueNames.Where(x => InterestingValueNames.Contains(x.Name)).ToList(); progress.Status = "Downloading runs..."; progress.ProgressValue = 0; p[0] = 0; var count = queryClient.GetNumberOfRuns(problemClassFilter); if (count == 0) return; var runList = new List(); var runIds = LoadRunsFromCache(queryClient.GetRunIds(problemClassFilter), runList, progress); var batches = runIds.Select((v, i) => new { Idx = i, Val = v }).GroupBy(x => x.Idx / 500, x => x.Val); Parallel.ForEach(batches.Select(x => x.ToList()), new ParallelOptions { MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, 4) }, (batch) => { var okbRuns = queryClient.GetRunsWithValues(batch, true, interestingValues); var hlRuns = okbRuns.AsParallel().Select(x => new { AlgorithmId = x.Algorithm.Id, RunId = x.Id, Run = queryClient.ConvertToOptimizationRun(x) }).ToList(); lock (runList) { var toCache = new List>(); foreach (var r in hlRuns) { algorithmId2RunMapping.Add(r.AlgorithmId, r.Run); runList.Add(r.Run); toCache.Add(Tuple.Create(r.AlgorithmId, r.RunId, r.Run)); } SaveToCache(toCache); progress.Status = string.Format("Downloaded runs {0} to {1} of {2}...", p[0], p[0] + batch.Count, count); p[0] += batch.Count; progress.ProgressValue = p[0] / (double)count; } }); progress.Status = "Finishing..."; // remove algorithm instances that do not appear in any downloaded run for (var algIdx = 0; algIdx < algorithmInstances.Count; algIdx++) { var id = algorithmId2AlgorithmInstanceMapping.GetBySecond(algorithmInstances[algIdx]); if (!algorithmId2RunMapping.ContainsFirst(id)) { algorithmId2AlgorithmInstanceMapping.RemoveByFirst(id); algorithmInstances.RemoveAt(algIdx); algIdx--; } } try { KnowledgeBase.UpdateOfRunsInProgress = true; KnowledgeBase.Clear(); KnowledgeBase.AddRange(runList); } finally { KnowledgeBase.UpdateOfRunsInProgress = false; } var algInstRunDict = runList.Where(x => x.Parameters.ContainsKey("Problem Name") && x.Parameters["Problem Name"] is StringValue) .GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value) .ToDictionary(x => x.Key, x => x.GroupBy(y => ((StringValue)y.Parameters["Algorithm Name"]).Value) .ToDictionary(y => y.Key, y => y.ToList())); // set best-known quality to best-found in case it is not known foreach (var kvp in algInstRunDict) { var prob = ProblemInstances.SingleOrDefault(x => ((StringValue)x.Parameters["Problem Name"]).Value == kvp.Key); if (prob == null) continue; var maximization = ((BoolValue)prob.Parameters["Maximization"]).Value; IItem bkParam; if (!prob.Parameters.TryGetValue("BestKnownQuality", out bkParam) || !(bkParam is DoubleValue)) { var list = kvp.Value.SelectMany(x => x.Value) .Where(x => x.Results.ContainsKey("QualityPerEvaluations")) .Select(x => ((IndexedDataTable)x.Results["QualityPerEvaluations"]).Rows.First().Values.Last().Item2); if (!list.Any()) continue; bkParam = new DoubleValue(maximization ? list.Max() : list.Min()); prob.Parameters["BestKnownQuality"] = bkParam; } } // add algorithm instance ranks as features to the problem instances for a range of targets foreach (var target in new[] {0, 0.01, 0.05, 0.1, 0.2, 0.5}) { var cls = GetPerformanceClasses(target, 5); foreach (var kvp in cls) { var prob = kvp.Key; foreach (var kvp2 in kvp.Value) { var resultName = "Rank." + algorithmId2AlgorithmInstanceMapping.GetByFirst(kvp2.Key) + "@" + (target * 100) + "%"; prob.Results[resultName] = new IntValue(kvp2.Value); } } } } finally { progress.Finish(); ProblemInstances.UpdateOfRunsInProgress = false; } UpdateInstanceProjection(ProblemInstances.ResultNames.Where(x => x.StartsWith("Characteristic.")).ToArray()); } private void DownloadAlgorithmInstance(IProgress progress, Algorithm algInst, int[] p, int total) { IAlgorithm alg = null; var data = Clients.OKB.Administration.AdministrationClient.GetAlgorithmData(algInst.Id); if (data != null) { using (var stream = new MemoryStream(data)) { try { alg = (IAlgorithm)XmlParser.Deserialize(stream); } catch { } stream.Close(); } if (alg != null) { lock (progress) { algorithmInstances.Add(alg); algorithmId2AlgorithmInstanceMapping.Add(algInst.Id, alg); progress.Status = string.Format("Downloaded algorithm {0} (okb-id: {1})...", algInst.Name, algInst.Id); p[0]++; progress.ProgressValue = p[0] / (double)total; } } } } private void DownloadProblemInstance(IProgress progress, Problem pInst, int[] p, int totalProblems, HashSet characteristics) { var charas = new List(); IRun probRun = null; var data = Clients.OKB.Administration.AdministrationClient.GetProblemData(pInst.Id); if (data != null) { using (var stream = new MemoryStream(data)) { try { var prob = (IProblem)XmlParser.Deserialize(stream); probRun = new Run() {Name = prob.Name}; prob.CollectParameterValues(probRun.Parameters); probRun.Parameters["Problem Name"] = new StringValue(prob.Name); probRun.Parameters["Problem Type"] = new StringValue(prob.GetType().Name); foreach (var v in RunCreationClient.Instance.GetCharacteristicValues(pInst.Id)) { probRun.Results.Add("Characteristic." + v.Name, RunCreationClient.Instance.ConvertToItem(v)); charas.Add("Characteristic." + v.Name); } } catch { } stream.Close(); } if (probRun != null) { lock (progress) { problemId2ProblemInstanceMapping.Add(pInst.Id, probRun); ProblemInstances.Add(probRun); progress.Status = string.Format("Downloaded problem {0} (okb-id: {1})....", pInst.Name, pInst.Id); p[0]++; progress.ProgressValue = p[0] / (double)totalProblems; foreach (var c in charas) characteristics.Add(c); } } } } private List LoadRunsFromCache(IEnumerable runIds, List runList, IProgress progress) { var hashSet = new HashSet(runIds); var total = hashSet.Count; try { var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "HeuristicLab.OKB", "cache", "runs"); Parallel.ForEach(Directory.EnumerateDirectories(path).Select((d, i) => new { Index = i, Directory = d }).GroupBy(x => x.Index / 100), new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount }, (folderGroup) => { var localRunList = new List>(); foreach (var runPath in folderGroup.Select(x => x.Directory)) { long runId; var runFolder = new DirectoryInfo(runPath).Name; if (!long.TryParse(runFolder, out runId) || !hashSet.Contains(runId)) continue; var runFilePath = Directory.EnumerateFiles(runPath).Single(); var runFileName = Path.GetFileNameWithoutExtension(runFilePath); long algId; if (!long.TryParse(runFileName, out algId)) continue; IRun run = null; try { using (var file = File.OpenRead(runFilePath)) run = XmlParser.Deserialize(file); } catch { File.Delete(runFilePath); Directory.Delete(runPath); } if (run != null) localRunList.Add(Tuple.Create(algId, runId, run)); } lock (runList) { foreach (var r in localRunList) { hashSet.Remove(r.Item2); algorithmId2RunMapping.Add(r.Item1, r.Item3); runList.Add(r.Item3); } progress.Status = string.Format("Retrieved {0} of {1} from cache", runList.Count, total); progress.ProgressValue = (double)runList.Count / total; } }); } catch { } return hashSet.ToList(); } private void SaveToCache(IEnumerable> runs) { try { var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), "HeuristicLab.OKB", "cache", "runs"); if (!Directory.Exists(path)) Directory.CreateDirectory(path); foreach (var r in runs) { var runPath = Path.Combine(path, r.Item2.ToString()); if (!Directory.Exists(runPath)) Directory.CreateDirectory(runPath); using (var file = File.Open(Path.Combine(runPath, r.Item1.ToString()), FileMode.Create, FileAccess.ReadWrite)) { XmlGenerator.Serialize(r.Item3, file); } } } catch { } } public static double[][] GetFeatures(IRun[] problemInstances, string[] characteristics, double[] medianValues = null) { var instances = new double[problemInstances.Length][]; for (var p = 0; p < problemInstances.Length; p++) { instances[p] = new double[characteristics.Length]; for (var f = 0; f < characteristics.Length; f++) { IItem item; if (problemInstances[p].Results.TryGetValue(characteristics[f], out item)) { double val = 0; var dItem = item as DoubleValue; if (dItem != null) { val = dItem.Value; } else { var iItem = item as IntValue; if (iItem != null) val = iItem.Value; else val = double.NaN; } if (double.IsNaN(val) && medianValues != null) instances[p][f] = medianValues[f]; else instances[p][f] = val; } else instances[p][f] = medianValues != null ? medianValues[f] : double.NaN; } } return instances; } public static double[][] GetFeaturesStandardized(IRun[] problemInstances, string[] characteristics, out double[] means, out double[] sdevs, double[] medianValues = null) { var instances = new double[problemInstances.Length][]; var columns = new List[characteristics.Length]; for (var p = 0; p < problemInstances.Length; p++) { instances[p] = new double[characteristics.Length]; for (var f = 0; f < characteristics.Length; f++) { if (columns[f] == null) { columns[f] = new List(problemInstances.Length); } IItem item; if (problemInstances[p].Results.TryGetValue(characteristics[f], out item)) { double val = 0; var dItem = item as DoubleValue; if (dItem != null) { val = dItem.Value; } else { var iItem = item as IntValue; if (iItem != null) val = iItem.Value; else val = double.NaN; } if (double.IsNaN(val) && medianValues != null) instances[p][f] = medianValues[f]; else instances[p][f] = val; columns[f].Add(instances[p][f]); } else instances[p][f] = medianValues != null ? medianValues[f] : double.NaN; } } means = new double[characteristics.Length]; sdevs = new double[characteristics.Length]; for (var f = 0; f < characteristics.Length; f++) { var mean = columns[f].Average(); var dev = columns[f].StandardDeviation(); means[f] = mean; sdevs[f] = dev; for (var p = 0; p < problemInstances.Length; p++) { if (dev.IsAlmost(0)) instances[p][f] = 0; else instances[p][f] = (instances[p][f] - mean) / dev; } } return instances; } public static double[] GetMedianValues(IRun[] problemInstances, string[] characteristics) { var values = new List[characteristics.Length]; foreach (var problemInstance in problemInstances) { for (var f = 0; f < characteristics.Length; f++) { if (values[f] == null) values[f] = new List(problemInstances.Length); IItem item; if (problemInstance.Results.TryGetValue(characteristics[f], out item)) { var dItem = item as DoubleValue; if (dItem != null) values[f].Add(dItem.Value); else { var iItem = item as IntValue; if (iItem != null) values[f].Add(iItem.Value); } } } } return values.Select(x => x.Count == 0 ? 0.0 : x.Median()).ToArray(); } public Dictionary GetProblemCharacteristics(string[] characteristics) { var map = ProblemInstances.Select((v, i) => new { Index = i, ProblemInstance = v }).ToDictionary(x => x.Index, x => x.ProblemInstance); var instances = GetFeatures(ProblemInstances.ToArray(), characteristics); var median = GetMedianValues(ProblemInstances.ToArray(), characteristics); var allValues = instances.Select(x => x.Select((f, i) => new { Idx = i, Val = double.IsNaN(f) ? median[i] : f }).ToList()) .SelectMany(x => x) .GroupBy(x => x.Idx, x => x.Val) .OrderBy(x => x.Key).ToList(); var avg = allValues.Select(x => x.Average()).ToList(); var stdev = allValues.Select(x => x.StandardDeviation()).ToList(); // normalize characteristic values by transforming them to their z-score foreach (var features in instances) { for (var i = 0; i < features.Length; i++) { if (double.IsNaN(features[i])) features[i] = median[i]; if (stdev[i] > 0) features[i] = (features[i] - avg[i]) / stdev[i]; } } return instances.Select((v, i) => new { ProblemInstance = map[i], Features = v }).ToDictionary(x => x.ProblemInstance, x => x.Features); } public Dictionary GetAlgorithmPerformance(IRun problemInstance) { if (!problemInstance.Parameters.ContainsKey("BestKnownQuality")) return new Dictionary(); var target = GetTarget(((DoubleValue)problemInstance.Parameters["BestKnownQuality"]).Value, MinimumTarget.Value, Maximization); return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value) .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single())) .ToDictionary(x => x.Key, x => ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", target, Maximization).ExpectedRuntime); } public Dictionary GetAlgorithmPerformanceLog10(IRun problemInstance) { if (!problemInstance.Parameters.ContainsKey("BestKnownQuality")) return new Dictionary(); var target = GetTarget(((DoubleValue)problemInstance.Parameters["BestKnownQuality"]).Value, MinimumTarget.Value, Maximization); return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value) .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single())) .ToDictionary(x => x.Key, x => Math.Log10(ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", target, Maximization).ExpectedRuntime)); } public Dictionary> GetAlgorithmRuns(IRun problemInstance) { return knowledgeBase.Where(x => ((StringValue)x.Parameters["Problem Name"]).Value == ((StringValue)problemInstance.Parameters["Problem Name"]).Value) .GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single())) .ToDictionary(x => x.Key, x => x.ToList()); } public Dictionary> GetKnowledgeBaseByAlgorithm() { return KnowledgeBase.GroupBy(x => algorithmId2AlgorithmInstanceMapping.GetByFirst(algorithmId2RunMapping.GetBySecond(x).Single())) .ToDictionary(x => x.Key, x => x.ToList()); } public IEnumerable GetRegressionProblemPerAlgorithmInstance(double target, string[] characteristics) { if (Problem == null) yield break; var features = GetProblemCharacteristics(characteristics); // TODO: knowledgebase only stores problem name as a string // this doesn't work if there are two equally named problem instances var problemMap = ProblemInstances.Select(x => new { Key = ((StringValue)x.Parameters["Problem Name"]).Value, Value = x }) .ToDictionary(x => x.Key, x => x.Value); foreach (var relevantRuns in knowledgeBase.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single())) { var problemRuns = relevantRuns.GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value).ToList(); var ds = new ModifiableDataset(); ds.AddVariable("Problem Name", new List()); foreach (var pc in characteristics) ds.AddVariable(pc, new List()); ds.AddVariable("ERT", new List()); foreach (var pr in problemRuns) { var prob = problemMap[pr.Key]; var f = features[prob]; var max = ((BoolValue)prob.Parameters["Maximization"]).Value; var bkq = ((DoubleValue)prob.Parameters["BestKnownQuality"]).Value; var ert = ExpectedRuntimeHelper.CalculateErt(pr.ToList(), "QualityPerEvaluations", GetTarget(bkq, target, max), max).ExpectedRuntime; if (double.IsInfinity(ert)) ert = int.MaxValue; ds.AddRow(new object[] { pr.Key }.Concat(f.Cast()).Concat(new object[] { ert })); } var datAnalysisData = new RegressionProblemData(ds, characteristics, "ERT"); var result = new RegressionProblem() { Name = algorithmId2AlgorithmInstanceMapping.GetByFirst(relevantRuns.Key).Name }; result.ProblemDataParameter.Value = datAnalysisData; yield return result; } } public IEnumerable GetClassificationProblemPerAlgorithmInstance(double target, string[] characteristics) { if (Problem == null) yield break; var classes = GetPerformanceClasses(target, 5); var features = GetProblemCharacteristics(characteristics); foreach (var alg in AlgorithmInstances) { var ds = new ModifiableDataset(); ds.AddVariable("Problem Name", new List()); foreach (var pc in characteristics) ds.AddVariable(pc, new List()); ds.AddVariable("Class", new List()); foreach (var c in classes) { int cls; if (c.Value.TryGetValue(algorithmId2AlgorithmInstanceMapping.GetBySecond(alg), out cls)) { ds.AddRow(new object[] { ((StringValue)c.Key.Parameters["Problem Name"]).Value } .Concat(features[c.Key].Cast()).Concat(new object[] { cls })); } } var datAnalysisData = new ClassificationProblemData(ds, characteristics, "Class"); var result = new ClassificationProblem() { Name = alg.Name }; result.ProblemDataParameter.Value = datAnalysisData; yield return result; } } public Dictionary GetProblemDistances(string[] characteristics) { var result = new Dictionary(); var currentInstance = problemId2ProblemInstanceMapping.GetByFirst(Problem.ProblemId); var features = GetProblemCharacteristics(characteristics); var cF = features[currentInstance]; foreach (var b in ProblemInstances) { if (b == currentInstance) continue; var sum = features[b].Select((t, f) => (cF[f] - t) * (cF[f] - t)).Sum(); result[b] = Math.Sqrt(sum); } return result; } public Dictionary> GetPerformanceClasses(double target, int nClasses) { var result = new Dictionary>(); var problemMap = ProblemInstances.Select(x => new { Key = ((StringValue)x.Parameters["Problem Name"]).Value, Value = x }) .ToDictionary(x => x.Key, x => x.Value); foreach (var pr in KnowledgeBase.GroupBy(x => ((StringValue)x.Parameters["Problem Name"]).Value).ToList()) { var bkq = ((DoubleValue)problemMap[pr.Key].Parameters["BestKnownQuality"]).Value; var max = ((BoolValue)problemMap[pr.Key].Parameters["Maximization"]).Value; result[problemMap[pr.Key]] = new Dictionary(); var values = pr.GroupBy(x => algorithmId2RunMapping.GetBySecond(x).Single()) .ToDictionary(x => x.Key, x => Math.Log10(ExpectedRuntimeHelper.CalculateErt(x.ToList(), "QualityPerEvaluations", GetTarget(bkq, target, max), max).ExpectedRuntime)); var ranks = ClusteringHelper.Cluster(nClasses, values, x => double.IsInfinity(x.Value)) .GetByCluster().ToList(); foreach (var c in ranks) { foreach (var a in c.Value) result[problemMap[pr.Key]][a.Key] = c.Key; } } return result; } public double GetTarget(double bestKnownQuality, double target, bool maximization) { return bestKnownQuality * (maximization ? (1 - target) : (1 + target)); } public event EventHandler> DownloadStarted; private void OnDownloadStarted(IProgress progress) { var handler = DownloadStarted; if (handler != null) handler(this, new EventArgs(progress)); } public event EventHandler> AlgorithmInstanceStarted; private void OnAlgorithmInstanceStarted(IAlgorithm instance) { var handler = AlgorithmInstanceStarted; if (handler != null) handler(this, new EventArgs(instance)); } public event EventHandler RecommendationModelChanged; private void OnRecommenderModelChanged() { var handler = RecommendationModelChanged; if (handler != null) handler(this, EventArgs.Empty); } public IEnumerable> GetAlgorithmInstanceRanking() { return RecommendationModel.GetRanking(ProblemInstances.Single(IsCurrentInstance)); } } }