Free cookie consent management tool by TermsFeed Policy Generator

source: branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/EfficientGlobalOptimizationAlgorithm.cs @ 15012

Last change on this file since 15012 was 14833, checked in by bwerth, 7 years ago

#2745 added LatinHyperCubeDesign as possible InitialSamplingPlan

File size: 30.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using System.Windows.Forms;
27using HeuristicLab.Algorithms.DataAnalysis;
28using HeuristicLab.Analysis;
29using HeuristicLab.Common;
30using HeuristicLab.Core;
31using HeuristicLab.Data;
32using HeuristicLab.Encodings.RealVectorEncoding;
33using HeuristicLab.Optimization;
34using HeuristicLab.Parameters;
35using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
36using HeuristicLab.Problems.DataAnalysis;
37using HeuristicLab.Problems.Instances.DataAnalysis;
38using HeuristicLab.Problems.Instances.DataAnalysis.Views;
39using HeuristicLab.Random;
40
41namespace HeuristicLab.Algorithms.EGO {
42  [StorableClass]
43  [Creatable(CreatableAttribute.Categories.Algorithms, Priority = 95)]
44  [Item("EfficientGlobalOptimizationAlgortihm", "Solves a problem by sequentially learning a model, solving a subproblem on the model and evaluating the best found solution for this subproblem.")]
45  public class EfficientGlobalOptimizationAlgorithm : BasicAlgorithm, ISurrogateAlgorithm<RealVector> {
46    #region Basic-Alg-Essentials
47    public override bool SupportsPause => true;
48    public override Type ProblemType => typeof(SingleObjectiveBasicProblem<IEncoding>);
49    public new SingleObjectiveBasicProblem<IEncoding> Problem
50    {
51      get { return (SingleObjectiveBasicProblem<IEncoding>)base.Problem; }
52      set { base.Problem = value; }
53    }
54    #endregion
55
56    #region ParameterNames
57    private const string GenerationSizeParameterName = "GenerationSize";
58    private const string InfillCriterionParameterName = "InfillCriterion";
59    private const string InfillOptimizationAlgorithmParameterName = "InfillOptimizationAlgorithm";
60    private const string InfillOptimizationRestartsParameterName = "InfillOptimizationRestarts";
61    private const string InitialEvaluationsParameterName = "Initial Evaluations";
62    private const string MaximumEvaluationsParameterName = "Maximum Evaluations";
63    private const string MaximumRuntimeParameterName = "Maximum Runtime";
64    private const string RegressionAlgorithmParameterName = "RegressionAlgorithm";
65    private const string SeedParameterName = "Seed";
66    private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
67    private const string MaximalDataSetSizeParameterName = "MaximalDataSetSize";
68    private const string RemoveDuplicatesParamterName = "RemoveDuplicates";
69    private const string InitialSamplesParameterName = "InitialSamplesFile";
70    private const string BaselineVectorParameterName = "BaselineVector";
71    private const string InitialSamplingPlanParamterName = "InitialSamplingPlan";
72    #endregion
73
74    #region ResultNames
75    private const string BestQualityResultName = "Best Quality";
76    private const string BestSolutionResultName = "Best Solution";
77    private const string EvaluatedSoultionsResultName = "EvaluatedSolutions";
78    private const string IterationsResultName = "Iterations";
79    private const string RegressionSolutionResultName = "Model";
80    private const string QualitiesChartResultName = "Qualities";
81    private const string BestQualitiesRowResultName = "Best Quality";
82    private const string CurrentQualitiesRowResultName = "Current Quality";
83    private const string WorstQualitiesRowResultName = "Worst Quality";
84    #endregion
85
86    #region TransmissionResultNames
87    public const string BestInfillSolutionResultName = "BestInfillSolution";
88    public const string BestInfillQualityResultName = "BestInfillQuality";
89    #endregion
90
91    #region ParameterProperties
92    public IFixedValueParameter<IntValue> GenerationSizeParemeter => Parameters[GenerationSizeParameterName] as IFixedValueParameter<IntValue>;
93    public IConstrainedValueParameter<IInfillCriterion> InfillCriterionParameter => Parameters[InfillCriterionParameterName] as IConstrainedValueParameter<IInfillCriterion>;
94    public IValueParameter<Algorithm> InfillOptimizationAlgorithmParameter => Parameters[InfillOptimizationAlgorithmParameterName] as IValueParameter<Algorithm>;
95    public IFixedValueParameter<IntValue> InfillOptimizationRestartsParemeter => Parameters[InfillOptimizationRestartsParameterName] as IFixedValueParameter<IntValue>;
96    public IFixedValueParameter<IntValue> InitialEvaluationsParameter => Parameters[InitialEvaluationsParameterName] as IFixedValueParameter<IntValue>;
97    public IFixedValueParameter<IntValue> MaximumEvaluationsParameter => Parameters[MaximumEvaluationsParameterName] as IFixedValueParameter<IntValue>;
98    public IFixedValueParameter<IntValue> MaximumRuntimeParameter => Parameters[MaximumRuntimeParameterName] as IFixedValueParameter<IntValue>;
99    public IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionAlgorithmParameter => Parameters[RegressionAlgorithmParameterName] as IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>>;
100    public IFixedValueParameter<IntValue> SeedParameter => Parameters[SeedParameterName] as IFixedValueParameter<IntValue>;
101    public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter => Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>;
102    public IFixedValueParameter<IntValue> MaximalDataSetSizeParameter => Parameters[MaximalDataSetSizeParameterName] as IFixedValueParameter<IntValue>;
103    public IFixedValueParameter<BoolValue> RemoveDuplicatesParameter => Parameters[RemoveDuplicatesParamterName] as IFixedValueParameter<BoolValue>;
104    public IFixedValueParameter<FileValue> InitialSamplesParameter => Parameters[InitialSamplesParameterName] as IFixedValueParameter<FileValue>;
105    public IValueParameter<RealVector> BaselineVectorParameter => Parameters[BaselineVectorParameterName] as IValueParameter<RealVector>;
106    public IConstrainedValueParameter<IInitialSampling> InitialSamplingPlanParameter => Parameters[InitialSamplingPlanParamterName] as IConstrainedValueParameter<IInitialSampling>;
107    #endregion
108
109    #region Properties
110
111    public int GenerationSize => GenerationSizeParemeter.Value.Value;
112    public IInfillCriterion InfillCriterion => InfillCriterionParameter.Value;
113    public Algorithm InfillOptimizationAlgorithm => InfillOptimizationAlgorithmParameter.Value;
114    public int InfillOptimizationRestarts => InfillOptimizationRestartsParemeter.Value.Value;
115    public int InitialEvaluations => InitialEvaluationsParameter.Value.Value;
116    public int MaximumEvaluations => MaximumEvaluationsParameter.Value.Value;
117    public int MaximumRuntime => MaximumRuntimeParameter.Value.Value;
118    public IDataAnalysisAlgorithm<IRegressionProblem> RegressionAlgorithm => RegressionAlgorithmParameter.Value;
119    public int Seed => SeedParameter.Value.Value;
120    public bool SetSeedRandomly => SetSeedRandomlyParameter.Value.Value;
121    public int MaximalDatasetSize => MaximalDataSetSizeParameter.Value.Value;
122    private IEnumerable<Tuple<RealVector, double>> DataSamples => Samples.Count > MaximalDatasetSize && MaximalDatasetSize > 0
123      ? Samples.Skip(Samples.Count - MaximalDatasetSize)
124      : Samples;
125
126    private bool RemoveDuplicates => RemoveDuplicatesParameter.Value.Value;
127    private RealVector BaselineVector => BaselineVectorParameter.Value;
128    private IInitialSampling InitialSamplingPlan => InitialSamplingPlanParameter.Value;
129    #endregion
130
131    #region StorableProperties
132    [Storable]
133    private IRandom Random = new MersenneTwister();
134    [Storable]
135    private List<Tuple<RealVector, double>> Samples;
136    [Storable]
137    private List<Tuple<RealVector, double>> InitialSamples;
138    #endregion
139
140    #region ResultsProperties
141    private double ResultsBestQuality
142    {
143      get { return ((DoubleValue)Results[BestQualityResultName].Value).Value; }
144      set { ((DoubleValue)Results[BestQualityResultName].Value).Value = value; }
145    }
146    private RealVector ResultsBestSolution
147    {
148      get { return (RealVector)Results[BestSolutionResultName].Value; }
149      set { Results[BestSolutionResultName].Value = value; }
150    }
151    private int ResultsEvaluations
152    {
153      get { return ((IntValue)Results[EvaluatedSoultionsResultName].Value).Value; }
154      set { ((IntValue)Results[EvaluatedSoultionsResultName].Value).Value = value; }
155    }
156    private int ResultsIterations
157    {
158      get { return ((IntValue)Results[IterationsResultName].Value).Value; }
159      set { ((IntValue)Results[IterationsResultName].Value).Value = value; }
160    }
161    private DataTable ResultsQualities => (DataTable)Results[QualitiesChartResultName].Value;
162    private DataRow ResultsQualitiesBest => ResultsQualities.Rows[BestQualitiesRowResultName];
163
164    private DataRow ResultsQualitiesWorst => ResultsQualities.Rows[WorstQualitiesRowResultName];
165
166    private DataRow ResultsQualitiesIteration => ResultsQualities.Rows[CurrentQualitiesRowResultName];
167
168    private IRegressionSolution ResultsModel
169    {
170      get { return (IRegressionSolution)Results[RegressionSolutionResultName].Value; }
171      set { Results[RegressionSolutionResultName].Value = value; }
172    }
173    #endregion
174
175    #region HLConstructors
176    [StorableConstructor]
177    protected EfficientGlobalOptimizationAlgorithm(bool deserializing) : base(deserializing) { }
178    [StorableHook(HookType.AfterDeserialization)]
179    private void AfterDeseialization() {
180      RegisterEventhandlers();
181    }
182    protected EfficientGlobalOptimizationAlgorithm(EfficientGlobalOptimizationAlgorithm original, Cloner cloner)
183      : base(original, cloner) {
184      Random = cloner.Clone(Random);
185      if (original.Samples != null) Samples = original.Samples.Select(x => new Tuple<RealVector, double>(cloner.Clone(x.Item1), x.Item2)).ToList();
186      if (original.InitialSamples != null) Samples = original.InitialSamples.Select(x => new Tuple<RealVector, double>(cloner.Clone(x.Item1), x.Item2)).ToList();
187      RegisterEventhandlers();
188    }
189    public override IDeepCloneable Clone(Cloner cloner) { return new EfficientGlobalOptimizationAlgorithm(this, cloner); }
190    public EfficientGlobalOptimizationAlgorithm() {
191      var cmaes = new CMAEvolutionStrategy.CMAEvolutionStrategy {
192        MaximumGenerations = 300,
193        PopulationSize = 50
194      };
195      var model = new GaussianProcessRegression {
196        Problem = new RegressionProblem()
197      };
198      model.CovarianceFunctionParameter.Value = new CovarianceRationalQuadraticIso();
199      Parameters.Add(new FixedValueParameter<IntValue>(MaximumEvaluationsParameterName, "", new IntValue(int.MaxValue)));
200      Parameters.Add(new FixedValueParameter<IntValue>(InitialEvaluationsParameterName, "", new IntValue(10)));
201      Parameters.Add(new FixedValueParameter<IntValue>(MaximumRuntimeParameterName, "The maximum runtime in seconds after which the algorithm stops. Use -1 to specify no limit for the runtime", new IntValue(-1)));
202      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0)));
203      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true)));
204      Parameters.Add(new ValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>>(RegressionAlgorithmParameterName, "The model used to approximate the problem", model));
205      Parameters.Add(new ValueParameter<Algorithm>(InfillOptimizationAlgorithmParameterName, "The algorithm used to solve the expected improvement subproblem", cmaes));
206      Parameters.Add(new FixedValueParameter<IntValue>(InfillOptimizationRestartsParameterName, "Number of restarts of the SubAlgortihm to avoid local optima", new IntValue(1)));
207      Parameters.Add(new FixedValueParameter<IntValue>(GenerationSizeParameterName, "Number points that are sampled every iteration (stadard EGO: 1)", new IntValue(1)));
208      Parameters.Add(new ConstrainedValueParameter<IInfillCriterion>(InfillCriterionParameterName, "Decision what value should decide the next sample"));
209      InfillCriterionParameter.ValidValues.Add(new ExpectedImprovement());
210      InfillCriterionParameter.ValidValues.Add(new AugmentedExpectedImprovement());
211      InfillCriterionParameter.ValidValues.Add(new ExpectedQuality());
212      var eqi = new ExpectedQuantileImprovement();
213      InfillCriterionParameter.ValidValues.Add(eqi);
214      eqi.MaxEvaluationsParameter.Value = MaximumEvaluationsParameter.Value;
215      InfillCriterionParameter.ValidValues.Add(new MinimalQuantileCriterium());
216      InfillCriterionParameter.ValidValues.Add(new RobustImprovement());
217      InfillCriterionParameter.ValidValues.Add(new PluginExpectedImprovement());
218      Parameters.Add(new FixedValueParameter<IntValue>(MaximalDataSetSizeParameterName, "The maximum number of sample points used to generate the model. Set 0 or less to use always all samples ", new IntValue(-1)));
219      Parameters.Add(new FixedValueParameter<BoolValue>(RemoveDuplicatesParamterName, "Wether duplicate samples should be replaced by a single sample with an averaged quality. This GREATLY decreases the chance of ill conditioned models (unbuildable models) but is not theoretically sound as the model ignores the increasing certainty in this region"));
220      Parameters.Add(new FixedValueParameter<FileValue>(InitialSamplesParameterName, "The file specifying some initial samples used to jump start the algorithm. These samples are not counted as evaluations. If InitialEvaluations is more than the samples specified in the file, the rest is uniformly random generated and evaluated.", new FileValue()));
221      Parameters.Add(new ValueParameter<RealVector>(BaselineVectorParameterName, "A vector used to create a baseline, this vector is evaluated once and is not part of the modeling process (has no influence on algorithm performance)"));
222      var intialSamplingPlans = new ItemSet<IInitialSampling> { new UniformRandomSampling(), new LatinHyperCubeDesign() };
223      Parameters.Add(new ConstrainedValueParameter<IInitialSampling>(InitialSamplingPlanParamterName, intialSamplingPlans, intialSamplingPlans.First()));
224
225      SetInfillProblem();
226      RegisterEventhandlers();
227    }
228    #endregion
229
230    protected override void Initialize(CancellationToken cancellationToken) {
231      base.Initialize(cancellationToken);
232      //encoding
233      var enc = Problem.Encoding as RealVectorEncoding;
234      if (enc == null) throw new ArgumentException("The EGO algorithm can only be applied to RealVectorEncodings");
235      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
236      if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm has no InfillProblem. Troubles with Eventhandling?");
237      infillProblem.Problem = Problem;
238
239
240      //random
241      if (SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next();
242      Random.Reset(Seed);
243      Samples = InitialSamples?.ToList() ?? new List<Tuple<RealVector, double>>();
244
245      //results
246      Results.Add(new Result(IterationsResultName, new IntValue(0)));
247      Results.Add(new Result(EvaluatedSoultionsResultName, new IntValue(Samples.Count)));
248      Results.Add(new Result(BestSolutionResultName, new RealVector(1)));
249      Results.Add(new Result(BestQualityResultName, new DoubleValue(Problem.Maximization ? double.MinValue : double.MaxValue)));
250      Results.Add(new Result(RegressionSolutionResultName, typeof(IRegressionSolution)));
251      var table = new DataTable(QualitiesChartResultName);
252      table.Rows.Add(new DataRow(BestQualitiesRowResultName));
253      table.Rows.Add(new DataRow(WorstQualitiesRowResultName));
254      table.Rows.Add(new DataRow(CurrentQualitiesRowResultName));
255      Results.Add(new Result(QualitiesChartResultName, table));
256      if (BaselineVector != null && BaselineVector.Length == enc.Length) Results.Add(new Result("BaselineValue", new DoubleValue(Evaluate(BaselineVector).Item2)));
257
258
259
260    }
261
262    protected override void Run(CancellationToken cancellationToken) {
263      //initial samples
264      if (Samples.Count < InitialEvaluations) {
265        var points = InitialSamplingPlan.GetSamples(InitialEvaluations - Samples.Count, Samples.Select(x => x.Item1).ToArray(), (RealVectorEncoding)Problem.Encoding, Random);
266        foreach (var t in points) {
267          try {
268            Samples.Add(Evaluate(t));
269            cancellationToken.ThrowIfCancellationRequested();
270          }
271          finally {
272            Analyze();
273          }
274        }
275      }
276      //adaptive samples
277      for (ResultsIterations = 0; ResultsEvaluations < MaximumEvaluations; ResultsIterations++) {
278        try {
279          ResultsModel = BuildModel(cancellationToken);
280          if (ResultsModel == null) break;
281          cancellationToken.ThrowIfCancellationRequested();
282          for (var i = 0; i < GenerationSize; i++) {
283            var samplepoint = OptimizeInfillProblem(cancellationToken);
284            var sample = Evaluate(samplepoint);
285            Samples.Add(sample);
286            cancellationToken.ThrowIfCancellationRequested();
287          }
288
289        }
290        finally {
291          Analyze();
292        }
293      }
294    }
295
296    public void SetInitialSamples(RealVector[] individuals, double[] qualities) {
297      InitialSamples = individuals.Zip(qualities, (individual, d) => new Tuple<RealVector, double>(individual, d)).ToList();
298    }
299
300    #region Eventhandling
301    private void RegisterEventhandlers() {
302      DeregisterEventhandlers();
303      RegressionAlgorithmParameter.ValueChanged += OnModelAlgorithmChanged;
304      InfillOptimizationAlgorithmParameter.ValueChanged += OnInfillOptimizationAlgorithmChanged;
305      InfillOptimizationAlgorithm.ProblemChanged += InfillOptimizationProblemChanged;
306      InfillCriterionParameter.ValueChanged += InfillCriterionChanged;
307      InitialSamplesParameter.ToStringChanged += OnInitialSamplesChanged;
308
309
310    }
311    private void DeregisterEventhandlers() {
312      RegressionAlgorithmParameter.ValueChanged -= OnModelAlgorithmChanged;
313      InfillOptimizationAlgorithmParameter.ValueChanged -= OnInfillOptimizationAlgorithmChanged;
314      InfillOptimizationAlgorithm.ProblemChanged -= InfillOptimizationProblemChanged;
315      InfillCriterionParameter.ValueChanged -= InfillCriterionChanged;
316      InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
317    }
318    private void OnInfillOptimizationAlgorithmChanged(object sender, EventArgs args) {
319      SetInfillProblem();
320      InfillOptimizationAlgorithm.ProblemChanged -= InfillOptimizationProblemChanged;
321      InfillOptimizationAlgorithm.ProblemChanged += InfillOptimizationProblemChanged;
322    }
323    private void InfillOptimizationProblemChanged(object sender, EventArgs e) {
324      InfillOptimizationAlgorithm.ProblemChanged -= InfillOptimizationProblemChanged;
325      SetInfillProblem();
326      InfillOptimizationAlgorithm.ProblemChanged += InfillOptimizationProblemChanged;
327    }
328    private void InfillCriterionChanged(object sender, EventArgs e) {
329      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
330      if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm has no InfillProblem. Troubles with Eventhandling?");
331      infillProblem.InfillCriterion = InfillCriterion;
332    }
333    private void OnModelAlgorithmChanged(object sender, EventArgs args) {
334      RegressionAlgorithm.Problem = new RegressionProblem();
335    }
336    private void OnInitialSamplesChanged(object sender, EventArgs args) {
337      IRegressionProblemData samplesData = null;
338      using (var importTypeDialog = new RegressionImportTypeDialog()) {
339        if (importTypeDialog.ShowDialog() != DialogResult.OK) return;
340        samplesData = new RegressionCSVInstanceProvider().ImportData(importTypeDialog.Path, importTypeDialog.ImportType, importTypeDialog.CSVFormat);
341        InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
342        InitialSamplesParameter.Value.Value = importTypeDialog.Path;
343        InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
344
345      }
346
347
348
349      var solutions = new RealVector[samplesData.Dataset.Rows];
350      var qualities = new double[samplesData.Dataset.Rows];
351      var inputVariables = samplesData.InputVariables.CheckedItems.ToArray();
352      for (var i = 0; i < solutions.Length; i++) {
353        qualities[i] = samplesData.Dataset.GetDoubleValue(samplesData.TargetVariable, i);
354        solutions[i] = new RealVector(inputVariables.Length);
355        for (var j = 0; j < inputVariables.Length; j++) solutions[i][j] = samplesData.Dataset.GetDoubleValue(inputVariables[j].Value.Value, i);
356      }
357
358      SetInitialSamples(solutions, qualities);
359
360    }
361
362    protected override void OnExecutionTimeChanged() {
363      base.OnExecutionTimeChanged();
364      if (CancellationTokenSource == null) return;
365      if (MaximumRuntime == -1) return;
366      if (ExecutionTime.TotalSeconds > MaximumRuntime) CancellationTokenSource.Cancel();
367    }
368    public override void Pause() {
369      if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started || InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Paused) InfillOptimizationAlgorithm.Stop();
370      if (RegressionAlgorithm.ExecutionState == ExecutionState.Started || RegressionAlgorithm.ExecutionState == ExecutionState.Paused) RegressionAlgorithm.Stop();
371      base.Pause();
372    }
373    public override void Stop() {
374      if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started || InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Paused) InfillOptimizationAlgorithm.Stop();
375      if (RegressionAlgorithm.ExecutionState == ExecutionState.Started || RegressionAlgorithm.ExecutionState == ExecutionState.Paused) RegressionAlgorithm.Stop();
376      base.Stop();
377    }
378    protected override void OnProblemChanged() {
379      base.OnProblemChanged();
380      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
381      if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm has no InfillProblem. Troubles with Eventhandling?");
382      infillProblem.Problem = Problem;
383    }
384    #endregion
385
386    #region helpers
387    private void SetInfillProblem() {
388      var infillProblem = new InfillProblem {
389        InfillCriterion = InfillCriterion,
390        Problem = Problem
391      };
392      InfillOptimizationAlgorithm.Problem = infillProblem;
393    }
394    private IRegressionSolution BuildModel(CancellationToken cancellationToken) {
395      var dataset = EgoUtilities.GetDataSet(DataSamples.ToList(), RemoveDuplicates);
396      var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output");
397      problemdata.TrainingPartition.Start = 0;
398      problemdata.TrainingPartition.End = dataset.Rows;
399      problemdata.TestPartition.Start = dataset.Rows;
400      problemdata.TestPartition.End = dataset.Rows;
401
402      //train
403      var problem = (RegressionProblem)RegressionAlgorithm.Problem;
404      problem.ProblemDataParameter.Value = problemdata;
405      var i = 0;
406      IRegressionSolution solution = null;
407
408      while (solution == null && i++ < 100) {  //TODO: Question: Why does GP degenerate to NaN so often? Answer: There is not even the slightest mitigation strategy for "almost duplicates" that ill-condition the covariance matrix.
409        var results = EgoUtilities.SyncRunSubAlgorithm(RegressionAlgorithm, Random.Next(int.MaxValue));
410        solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault();
411        cancellationToken.ThrowIfCancellationRequested();
412      }
413
414      //try creating a model with old hyperparameters and new dataset;
415      var gp = RegressionAlgorithm as GaussianProcessRegression;
416      var oldmodel = ResultsModel as GaussianProcessRegressionSolution;
417      if (gp != null && oldmodel != null) {
418        var n = Samples.First().Item1.Length;
419        var mean = (IMeanFunction)oldmodel.Model.MeanFunction.Clone();
420        var cov = (ICovarianceFunction)oldmodel.Model.CovarianceFunction.Clone();
421        if (mean.GetNumberOfParameters(n) != 0 || cov.GetNumberOfParameters(n) != 0) throw new ArgumentException("DEBUG: assumption about fixed paramters wrong");
422        var noise = 0.0;
423        double[] hyp = { noise };
424        try {
425          var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable,
426            problemdata.AllowedInputVariables, problemdata.TrainingIndices, hyp, mean, cov);
427          model.FixParameters();
428          var sol = new GaussianProcessRegressionSolution(model, problemdata);
429          if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) {
430            solution = sol;
431          }
432        }
433        catch (ArgumentException) { }
434      }
435
436
437      if (!ResultsQualities.Rows.ContainsKey("DEBUG: Degenerates")) ResultsQualities.Rows.Add(new DataRow("DEBUG: Degenerates"));
438      var row = ResultsQualities.Rows["DEBUG: Degenerates"];
439      row.Values.Add(i - 1);
440      if (solution == null) Results.Add(new Result("Status", new StringValue("The Algorithm did not return a Model")));
441      else {
442        if (!ResultsQualities.Rows.ContainsKey("DEBUG: RMSE")) ResultsQualities.Rows.Add(new DataRow("DEBUG: RMSE"));
443        row = ResultsQualities.Rows["DEBUG: RMSE"];
444        row.Values.Add(Math.Sqrt(solution.TrainingMeanSquaredError));
445      }
446
447      RegressionAlgorithm.Runs.Clear();
448      return solution;
449    }
450    private RealVector OptimizeInfillProblem(CancellationToken cancellationToken) {
451      //parameterize and check InfillProblem
452      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
453      if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm does not have InfillProblem. Problem with Eventhandling?");
454      if (infillProblem.InfillCriterion != InfillCriterion) throw new ArgumentException("InfillCiriterion for Problem is not correct. Problem with Eventhandling?");
455      if (infillProblem.Problem != Problem) throw new ArgumentException("Expensive real problem is not correctly set in InfillProblem. Problem with Eventhandling?");
456      InfillCriterion.Initialize(ResultsModel, Problem.Maximization, infillProblem.Encoding);
457
458      RealVector bestVector = null;
459      var bestValue = infillProblem.Maximization ? double.NegativeInfinity : double.PositiveInfinity;
460
461      for (var i = 0; i < InfillOptimizationRestarts; i++) {
462        //optimize
463        var res = EgoUtilities.SyncRunSubAlgorithm(InfillOptimizationAlgorithm, Random.Next(int.MaxValue));
464        cancellationToken.ThrowIfCancellationRequested();
465        //extract results
466        if (!res.ContainsKey(BestInfillSolutionResultName)) throw new ArgumentException("The InfillOptimizationAlgorithm did not return a best solution");
467        var v = res[BestInfillSolutionResultName].Value as RealVector;
468        if (!res.ContainsKey(BestInfillQualityResultName)) throw new ArgumentException("The InfillOptimizationAlgorithm did not return a best quality");
469        var d = res[BestInfillQualityResultName].Value as DoubleValue;
470        if (d == null || v == null) throw new ArgumentException("The InfillOptimizationAlgorithm did not return the expected result types");
471
472        //check for improvement
473        if (infillProblem.Maximization != d.Value > bestValue) continue;
474        bestValue = d.Value;
475        bestVector = v;
476      }
477
478      InfillOptimizationAlgorithm.Runs.Clear();
479      return bestVector;
480    }
481    private Tuple<RealVector, double> Evaluate(RealVector point) {
482      return new Tuple<RealVector, double>(point, Problem.Evaluate(GetIndividual(point), Random));
483    }
484    private void Analyze() {
485      ResultsEvaluations = Samples.Count;
486      var max = Samples.ArgMax(x => x.Item2);
487      var min = Samples.ArgMin(x => x.Item2);
488      var best = Samples[Problem.Maximization ? max : min];
489      ResultsBestQuality = best.Item2;
490      ResultsBestSolution = best.Item1;
491      ResultsQualitiesBest.Values.Add(ResultsBestQuality);
492      ResultsQualitiesIteration.Values.Add(Samples[Samples.Count - 1].Item2);
493      ResultsQualitiesWorst.Values.Add(Samples[Problem.Maximization ? min : max].Item2);
494      Problem.Analyze(Samples.Select(x => GetIndividual(x.Item1)).ToArray(), Samples.Select(x => x.Item2).ToArray(), Results, Random);
495
496      if (Samples.Count != 0 && Samples[0].Item1.Length == 2) {
497        var plotname = "DEBUG:Sample Distribution";
498        var rowInit = "Initial Samples";
499        var rowAll = "All Samples";
500        if (!Results.ContainsKey(plotname)) Results.Add(new Result(plotname, new ScatterPlot()));
501        var plot = Results[plotname].Value as ScatterPlot;
502        if (!plot.Rows.ContainsKey(rowInit) && InitialSamples != null && InitialSamples.Count > 0)
503          plot.Rows.Add(new ScatterPlotDataRow(rowInit, "samples from inital file (already evaulated)", InitialSamples.Select(x => new Point2D<double>(x.Item1[0], x.Item1[1]))));
504        if (!plot.Rows.ContainsKey(rowAll)) plot.Rows.Add(new ScatterPlotDataRow(rowAll, "All samples", new Point2D<double>[0]));
505        else { plot.Rows[rowAll].Points.Clear(); }
506        plot.Rows[rowAll].Points.AddRange(Samples.Select(x => new Point2D<double>(x.Item1[0], x.Item1[1])));
507
508
509      }
510    }
511    private Individual GetIndividual(RealVector r) {
512      var scope = new Scope();
513      scope.Variables.Add(new Variable(Problem.Encoding.Name, r));
514      return new SingleEncodingIndividual(Problem.Encoding, scope);
515    }
516    #endregion
517  }
518}
Note: See TracBrowser for help on using the repository browser.