#region License Information
/* HeuristicLab
* Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using HeuristicLab.Algorithms.DataAnalysis;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.RealVectorEncoding;
using HeuristicLab.Optimization;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.Algorithms.SAPBA {
internal static class SapbaUtilities {
//Extention methods for convenience
public static int ArgMax(this IEnumerable values, Func func) {
var max = double.MinValue;
var maxIdx = 0;
var idx = 0;
foreach (var v in values) {
var d = func.Invoke(v);
if (d > max) {
max = d;
maxIdx = idx;
}
idx++;
}
return maxIdx;
}
public static int ArgMin(this IEnumerable values, Func func) {
return ArgMax(values, x => -func.Invoke(x));
}
public static double GetEstimation(this IRegressionModel model, RealVector r) {
var dataset = GetDataSet(new[] { new Tuple(r, 0.0) }, false);
return model.GetEstimatedValues(dataset, new[] { 0 }).First();
}
public static double GetVariance(this IConfidenceRegressionModel model, RealVector r) {
var dataset = GetDataSet(new[] { new Tuple(r, 0.0) }, false);
return model.GetEstimatedVariances(dataset, new[] { 0 }).First();
}
public static double GetDoubleValue(this IDataset dataset, int i, int j) {
return dataset.GetDoubleValue("input" + j, i);
}
//Sub-Algorithms
public static ResultCollection SyncRunSubAlgorithm(IAlgorithm alg, int random) {
if (alg.Parameters.ContainsKey("SetSeedRandomly") && alg.Parameters.ContainsKey("Seed")) {
var setSeed = alg.Parameters["SetSeedRandomly"].ActualValue as BoolValue;
var seed = alg.Parameters["Seed"].ActualValue as IntValue;
if (seed == null || setSeed == null) throw new ArgumentException("wrong SeedParametertypes");
setSeed.Value = false;
seed.Value = random;
}
EventWaitHandle trigger = new AutoResetEvent(false);
Exception ex = null;
EventHandler> exhandler = (sender, e) => ex = e.Value;
EventHandler stoppedHandler = (sender, e) => trigger.Set();
alg.ExceptionOccurred += exhandler;
alg.Stopped += stoppedHandler;
alg.Prepare();
alg.Start();
trigger.WaitOne();
alg.ExceptionOccurred -= exhandler;
alg.Stopped -= stoppedHandler;
if (ex != null) throw ex;
return alg.Results;
}
public static IRegressionSolution BuildModel(IReadOnlyList> samples, IDataAnalysisAlgorithm regressionAlgorithm, IRandom random, bool removeDuplicates = true, IRegressionSolution oldSolution = null) {
var dataset = GetDataSet(samples, removeDuplicates);
var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output");
problemdata.TrainingPartition.Start = 0;
problemdata.TrainingPartition.End = dataset.Rows;
problemdata.TestPartition.Start = dataset.Rows;
problemdata.TestPartition.End = dataset.Rows;
if (regressionAlgorithm.Problem == null) regressionAlgorithm.Problem = new RegressionProblem();
var problem = regressionAlgorithm.Problem;
problem.ProblemDataParameter.Value = problemdata;
var i = 0;
IRegressionSolution solution = null;
while (solution == null && i++ < 100) {
var results = SyncRunSubAlgorithm(regressionAlgorithm, random.Next(int.MaxValue));
solution = results.Select(x => x.Value).OfType().SingleOrDefault();
}
//special treatement for GaussianProcessRegression
var gp = regressionAlgorithm as GaussianProcessRegression;
var oldGaussian = oldSolution as GaussianProcessRegressionSolution;
if (gp != null && oldGaussian != null) {
const double noise = 0.0;
var n = samples.First().Item1.Length;
var mean = (IMeanFunction)oldGaussian.Model.MeanFunction.Clone();
var cov = (ICovarianceFunction)oldGaussian.Model.CovarianceFunction.Clone();
if (mean.GetNumberOfParameters(n) != 0 || cov.GetNumberOfParameters(n) != 0) throw new ArgumentException("DEBUG: assumption about fixed paramters wrong");
double[] hyp = { noise };
try {
var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable, problemdata.AllowedInputVariables, problemdata.TrainingIndices, hyp, mean, cov);
model.FixParameters();
var sol = new GaussianProcessRegressionSolution(model, problemdata);
if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) solution = sol;
}
catch (ArgumentException) { }
}
if (solution == null) throw new ArgumentException("The algorithm didn't return a model");
regressionAlgorithm.Runs.Clear();
return solution;
}
//RegressionModel extensions
public const double DuplicateResolution = 0.000001;
public static Dataset GetDataSet(IReadOnlyList> samples, bool removeDuplicates) {
if (removeDuplicates) samples = RemoveDuplicates(samples); //TODO duplicate removal leads to incorrect uncertainty values in models
var dimensions = samples[0].Item1.Length + 1;
var data = new double[samples.Count, dimensions];
var names = new string[dimensions - 1];
for (var i = 0; i < names.Length; i++) names[i] = "input" + i;
for (var j = 0; j < samples.Count; j++) {
for (var i = 0; i < names.Length; i++) data[j, i] = samples[j].Item1[i];
data[j, dimensions - 1] = samples[j].Item2;
}
return new Dataset(names.Concat(new[] { "output" }).ToArray(), data);
}
private static IReadOnlyList> RemoveDuplicates(IReadOnlyList> samples) {
var res = new List>();
foreach (var sample in samples) {
if (res.Count == 0) {
res.Add(new Tuple(sample.Item1, sample.Item2, 1));
continue;
}
var index = res.ArgMin(x => Euclidian(sample.Item1, x.Item1));
var d = Euclidian(res[index].Item1, sample.Item1);
if (d > DuplicateResolution) res.Add(new Tuple(sample.Item1, sample.Item2, 1));
else {
var t = res[index];
res.RemoveAt(index);
res.Add(new Tuple(t.Item1, t.Item2 + sample.Item2, t.Item3 + 1));
}
}
return res.Select(x => new Tuple(x.Item1, x.Item2 / x.Item3)).ToArray();
}
private static double Euclidian(IEnumerable a, IEnumerable b) {
return Math.Sqrt(a.Zip(b, (d, d1) => d - d1).Sum(d => d * d));
}
}
}