Free cookie consent management tool by TermsFeed Policy Generator

source: branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/EfficientGlobalOptimizationAlgorithm.cs @ 14818

Last change on this file since 14818 was 14818, checked in by bwerth, 7 years ago

#2745 added several new InfillCriteria and moved Parameters from the InfillProblem to the Criteria themselves; added Sanitiy checks for GaussianProcessRegression

File size: 28.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using System.Windows.Forms;
27using HeuristicLab.Algorithms.DataAnalysis;
28using HeuristicLab.Analysis;
29using HeuristicLab.Common;
30using HeuristicLab.Core;
31using HeuristicLab.Data;
32using HeuristicLab.Encodings.RealVectorEncoding;
33using HeuristicLab.Optimization;
34using HeuristicLab.Parameters;
35using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
36using HeuristicLab.Problems.DataAnalysis;
37using HeuristicLab.Problems.Instances.DataAnalysis;
38using HeuristicLab.Problems.Instances.DataAnalysis.Views;
39using HeuristicLab.Random;
40
41namespace HeuristicLab.Algorithms.EGO {
42  [StorableClass]
43  [Creatable(CreatableAttribute.Categories.Algorithms, Priority = 95)]
44  [Item("EfficientGlobalOptimizationAlgortihm", "Solves a problem by sequentially learning a model, solving a subproblem on the model and evaluating the best found solution for this subproblem.")]
45  public class EfficientGlobalOptimizationAlgorithm : BasicAlgorithm, ISurrogateAlgorithm<RealVector> {
46    #region Basic-Alg-Essentials
47    public override bool SupportsPause => true;
48    public override Type ProblemType => typeof(SingleObjectiveBasicProblem<IEncoding>);
49    public new SingleObjectiveBasicProblem<IEncoding> Problem
50    {
51      get { return (SingleObjectiveBasicProblem<IEncoding>)base.Problem; }
52      set { base.Problem = value; }
53    }
54    #endregion
55
56    #region ParameterNames
57    private const string GenerationSizeParameterName = "GenerationSize";
58    private const string InfillCriterionParameterName = "InfillCriterion";
59    private const string InfillOptimizationAlgorithmParameterName = "InfillOptimizationAlgorithm";
60    private const string InfillOptimizationRestartsParameterName = "InfillOptimizationRestarts";
61    private const string InitialEvaluationsParameterName = "Initial Evaluations";
62    private const string MaximumEvaluationsParameterName = "Maximum Evaluations";
63    private const string MaximumRuntimeParameterName = "Maximum Runtime";
64    private const string RegressionAlgorithmParameterName = "RegressionAlgorithm";
65    private const string SeedParameterName = "Seed";
66    private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
67    private const string MaximalDataSetSizeParameterName = "MaximalDataSetSize";
68    private const string RemoveDuplicatesParamterName = "RemoveDuplicates";
69    private const string InitialSamplesParameterName = "InitialSamplesFile";
70    private const string BaselineVectorParameterName = "BaselineVector";
71    #endregion
72
73    #region ResultNames
74    private const string BestQualityResultName = "Best Quality";
75    private const string BestSolutionResultName = "Best Solution";
76    private const string EvaluatedSoultionsResultName = "EvaluatedSolutions";
77    private const string IterationsResultName = "Iterations";
78    private const string RegressionSolutionResultName = "Model";
79    private const string QualitiesChartResultName = "Qualities";
80    private const string BestQualitiesRowResultName = "Best Quality";
81    private const string CurrentQualitiesRowResultName = "Current Quality";
82    private const string WorstQualitiesRowResultName = "Worst Quality";
83    #endregion
84
85    #region TransmissionResultNames
86    public const string BestInfillSolutionResultName = "BestInfillSolution";
87    public const string BestInfillQualityResultName = "BestInfillQuality";
88    #endregion
89
90    #region ParameterProperties
91    public IFixedValueParameter<IntValue> GenerationSizeParemeter => Parameters[GenerationSizeParameterName] as IFixedValueParameter<IntValue>;
92    public IConstrainedValueParameter<IInfillCriterion> InfillCriterionParameter => Parameters[InfillCriterionParameterName] as IConstrainedValueParameter<IInfillCriterion>;
93    public IValueParameter<Algorithm> InfillOptimizationAlgorithmParameter => Parameters[InfillOptimizationAlgorithmParameterName] as IValueParameter<Algorithm>;
94    public IFixedValueParameter<IntValue> InfillOptimizationRestartsParemeter => Parameters[InfillOptimizationRestartsParameterName] as IFixedValueParameter<IntValue>;
95    public IFixedValueParameter<IntValue> InitialEvaluationsParameter => Parameters[InitialEvaluationsParameterName] as IFixedValueParameter<IntValue>;
96    public IFixedValueParameter<IntValue> MaximumEvaluationsParameter => Parameters[MaximumEvaluationsParameterName] as IFixedValueParameter<IntValue>;
97    public IFixedValueParameter<IntValue> MaximumRuntimeParameter => Parameters[MaximumRuntimeParameterName] as IFixedValueParameter<IntValue>;
98    public IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionAlgorithmParameter => Parameters[RegressionAlgorithmParameterName] as IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>>;
99    public IFixedValueParameter<IntValue> SeedParameter => Parameters[SeedParameterName] as IFixedValueParameter<IntValue>;
100    public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter => Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>;
101    public IFixedValueParameter<IntValue> MaximalDataSetSizeParameter => Parameters[MaximalDataSetSizeParameterName] as IFixedValueParameter<IntValue>;
102    public IFixedValueParameter<BoolValue> RemoveDuplicatesParameter => Parameters[RemoveDuplicatesParamterName] as IFixedValueParameter<BoolValue>;
103
104    public IFixedValueParameter<FileValue> InitialSamplesParameter => Parameters[InitialSamplesParameterName] as IFixedValueParameter<FileValue>;
105
106    public IValueParameter<RealVector> BaselineVectorParameter => Parameters[BaselineVectorParameterName] as IValueParameter<RealVector>;
107    #endregion
108
109    #region Properties
110
111    public int GenerationSize => GenerationSizeParemeter.Value.Value;
112    public IInfillCriterion InfillCriterion => InfillCriterionParameter.Value;
113    public Algorithm InfillOptimizationAlgorithm => InfillOptimizationAlgorithmParameter.Value;
114    public int InfillOptimizationRestarts => InfillOptimizationRestartsParemeter.Value.Value;
115    public int InitialEvaluations => InitialEvaluationsParameter.Value.Value;
116    public int MaximumEvaluations => MaximumEvaluationsParameter.Value.Value;
117    public int MaximumRuntime => MaximumRuntimeParameter.Value.Value;
118    public IDataAnalysisAlgorithm<IRegressionProblem> RegressionAlgorithm => RegressionAlgorithmParameter.Value;
119    public int Seed => SeedParameter.Value.Value;
120    public bool SetSeedRandomly => SetSeedRandomlyParameter.Value.Value;
121    public int MaximalDatasetSize => MaximalDataSetSizeParameter.Value.Value;
122    private IEnumerable<Tuple<RealVector, double>> DataSamples => Samples.Count > MaximalDatasetSize && MaximalDatasetSize > 0
123      ? Samples.Skip(Samples.Count - MaximalDatasetSize)
124      : Samples;
125
126    private bool RemoveDuplicates => RemoveDuplicatesParameter.Value.Value;
127    private RealVector BaselineVector => BaselineVectorParameter.Value;
128    #endregion
129
130    #region StorableProperties
131    [Storable]
132    private IRandom Random = new MersenneTwister();
133    [Storable]
134    private List<Tuple<RealVector, double>> Samples;
135    [Storable]
136    private List<Tuple<RealVector, double>> InitialSamples;
137    #endregion
138
139    #region ResultsProperties
140    private double ResultsBestQuality
141    {
142      get { return ((DoubleValue)Results[BestQualityResultName].Value).Value; }
143      set { ((DoubleValue)Results[BestQualityResultName].Value).Value = value; }
144    }
145    private RealVector ResultsBestSolution
146    {
147      get { return (RealVector)Results[BestSolutionResultName].Value; }
148      set { Results[BestSolutionResultName].Value = value; }
149    }
150    private int ResultsEvaluations
151    {
152      get { return ((IntValue)Results[EvaluatedSoultionsResultName].Value).Value; }
153      set { ((IntValue)Results[EvaluatedSoultionsResultName].Value).Value = value; }
154    }
155    private int ResultsIterations
156    {
157      get { return ((IntValue)Results[IterationsResultName].Value).Value; }
158      set { ((IntValue)Results[IterationsResultName].Value).Value = value; }
159    }
160    private DataTable ResultsQualities => (DataTable)Results[QualitiesChartResultName].Value;
161    private DataRow ResultsQualitiesBest => ResultsQualities.Rows[BestQualitiesRowResultName];
162
163    private DataRow ResultsQualitiesWorst => ResultsQualities.Rows[WorstQualitiesRowResultName];
164
165    private DataRow ResultsQualitiesIteration => ResultsQualities.Rows[CurrentQualitiesRowResultName];
166
167    private IRegressionSolution ResultsModel
168    {
169      get { return (IRegressionSolution)Results[RegressionSolutionResultName].Value; }
170      set { Results[RegressionSolutionResultName].Value = value; }
171    }
172    #endregion
173
174    #region HLConstructors
175    [StorableConstructor]
176    protected EfficientGlobalOptimizationAlgorithm(bool deserializing) : base(deserializing) { }
177    [StorableHook(HookType.AfterDeserialization)]
178    private void AfterDeseialization() {
179      RegisterEventhandlers();
180    }
181    protected EfficientGlobalOptimizationAlgorithm(EfficientGlobalOptimizationAlgorithm original, Cloner cloner)
182      : base(original, cloner) {
183      Random = cloner.Clone(Random);
184      if (original.Samples != null) Samples = original.Samples.Select(x => new Tuple<RealVector, double>(cloner.Clone(x.Item1), x.Item2)).ToList();
185      if (original.InitialSamples != null) Samples = original.InitialSamples.Select(x => new Tuple<RealVector, double>(cloner.Clone(x.Item1), x.Item2)).ToList();
186      RegisterEventhandlers();
187    }
188    public override IDeepCloneable Clone(Cloner cloner) { return new EfficientGlobalOptimizationAlgorithm(this, cloner); }
189    public EfficientGlobalOptimizationAlgorithm() {
190      var cmaes = new CMAEvolutionStrategy.CMAEvolutionStrategy {
191        MaximumGenerations = 300,
192        PopulationSize = 50
193      };
194      var model = new GaussianProcessRegression {
195        Problem = new RegressionProblem()
196      };
197      model.CovarianceFunctionParameter.Value = new CovarianceRationalQuadraticIso();
198      Parameters.Add(new FixedValueParameter<IntValue>(MaximumEvaluationsParameterName, "", new IntValue(int.MaxValue)));
199      Parameters.Add(new FixedValueParameter<IntValue>(InitialEvaluationsParameterName, "", new IntValue(10)));
200      Parameters.Add(new FixedValueParameter<IntValue>(MaximumRuntimeParameterName, "The maximum runtime in seconds after which the algorithm stops. Use -1 to specify no limit for the runtime", new IntValue(3600)));
201      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0)));
202      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true)));
203      Parameters.Add(new ValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>>(RegressionAlgorithmParameterName, "The model used to approximate the problem", model));
204      Parameters.Add(new ValueParameter<Algorithm>(InfillOptimizationAlgorithmParameterName, "The algorithm used to solve the expected improvement subproblem", cmaes));
205      Parameters.Add(new FixedValueParameter<IntValue>(InfillOptimizationRestartsParameterName, "Number of restarts of the SubAlgortihm to avoid local optima", new IntValue(1)));
206      Parameters.Add(new FixedValueParameter<IntValue>(GenerationSizeParameterName, "Number points that are sampled every iteration (stadard EGO: 1)", new IntValue(1)));
207      Parameters.Add(new ConstrainedValueParameter<IInfillCriterion>(InfillCriterionParameterName, "Decision what value should decide the next sample"));
208      InfillCriterionParameter.ValidValues.Add(new AugmentedExpectedImprovement());
209      InfillCriterionParameter.ValidValues.Add(new ExpectedImprovement());
210      InfillCriterionParameter.ValidValues.Add(new ExpectedQuality());
211      var eqi = new ExpectedQuantileImprovement();
212      InfillCriterionParameter.ValidValues.Add(eqi);
213      eqi.MaxEvaluationsParameter.Value = MaximumEvaluationsParameter.Value;
214      InfillCriterionParameter.ValidValues.Add(new MinimalQuantileCriterium());
215      InfillCriterionParameter.ValidValues.Add(new RobustImprovement());
216      InfillCriterionParameter.ValidValues.Add(new PluginExpectedImprovement());
217      Parameters.Add(new FixedValueParameter<IntValue>(MaximalDataSetSizeParameterName, "The maximum number of sample points used to generate the model. Set 0 or less to use always all samples ", new IntValue(-1)));
218      Parameters.Add(new FixedValueParameter<BoolValue>(RemoveDuplicatesParamterName, "Wether duplicate samples should be replaced by a single sample with an averaged quality. This GREATLY decreases the chance of ill conditioned models (unbuildable models) but is not theoretically sound as the model ignores the increasing certainty in this region"));
219      Parameters.Add(new FixedValueParameter<FileValue>(InitialSamplesParameterName, "The file specifying some initial samples used to jump start the algorithm. These samples are not counted as evaluations. If InitialEvaluations is more than the samples specified in the file, the rest is uniformly random generated and evaluated.", new FileValue()));
220      Parameters.Add(new ValueParameter<RealVector>(BaselineVectorParameterName, "A vector used to create a baseline, this vector is evaluated once and is not part of the modeling process (has no influence on algorithm performance)"));
221      SetInfillProblem();
222      RegisterEventhandlers();
223    }
224    #endregion
225
226    protected override void Initialize(CancellationToken cancellationToken) {
227      base.Initialize(cancellationToken);
228      //encoding
229      var enc = Problem.Encoding as RealVectorEncoding;
230      if (enc == null) throw new ArgumentException("The EGO algorithm can only be applied to RealVectorEncodings");
231      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
232      if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm has no InfillProblem. Troubles with Eventhandling?");
233      infillProblem.Problem = Problem;
234
235
236      //random
237      if (SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next();
238      Random.Reset(Seed);
239      Samples = InitialSamples == null ? new List<Tuple<RealVector, double>>() : InitialSamples.ToList();
240
241      //results
242      Results.Add(new Result(IterationsResultName, new IntValue(0)));
243      Results.Add(new Result(EvaluatedSoultionsResultName, new IntValue(Samples.Count)));
244      Results.Add(new Result(BestSolutionResultName, new RealVector(1)));
245      Results.Add(new Result(BestQualityResultName, new DoubleValue(Problem.Maximization ? double.MinValue : double.MaxValue)));
246      Results.Add(new Result(RegressionSolutionResultName, typeof(IRegressionSolution)));
247      var table = new DataTable(QualitiesChartResultName);
248      table.Rows.Add(new DataRow(BestQualitiesRowResultName));
249      table.Rows.Add(new DataRow(WorstQualitiesRowResultName));
250      table.Rows.Add(new DataRow(CurrentQualitiesRowResultName));
251      Results.Add(new Result(QualitiesChartResultName, table));
252      if (BaselineVector != null && BaselineVector.Length == enc.Length)
253        Results.Add(new Result("BaselineValue", new DoubleValue(Evaluate(BaselineVector).Item2)));
254
255      //initial samples
256      if (Samples.Count < InitialEvaluations) {
257        var points = EgoUtilities.GetUniformRandomDesign(InitialEvaluations - Samples.Count, enc.Length, enc.Bounds, Random);
258        foreach (var t in points) {
259          Samples.Add(Evaluate(t));
260          cancellationToken.ThrowIfCancellationRequested();
261        }
262      }
263
264      Analyze();
265    }
266
267    protected override void Run(CancellationToken cancellationToken) {
268      for (ResultsIterations = 0; ResultsEvaluations < MaximumEvaluations; ResultsIterations++) {
269        try {
270          ResultsModel = BuildModel(cancellationToken);
271          if (ResultsModel == null) break;
272          cancellationToken.ThrowIfCancellationRequested();
273          for (var i = 0; i < GenerationSize; i++) {
274            var samplepoint = OptimizeInfillProblem(cancellationToken);
275            var sample = Evaluate(samplepoint);
276            Samples.Add(sample);
277            cancellationToken.ThrowIfCancellationRequested();
278          }
279
280        }
281        finally {
282          Analyze();
283        }
284      }
285    }
286
287    public void SetInitialSamples(RealVector[] individuals, double[] qualities) {
288      InitialSamples = individuals.Zip(qualities, (individual, d) => new Tuple<RealVector, double>(individual, d)).ToList();
289    }
290
291    #region Eventhandling
292    private void RegisterEventhandlers() {
293      DeregisterEventhandlers();
294      RegressionAlgorithmParameter.ValueChanged += OnModelAlgorithmChanged;
295      InfillOptimizationAlgorithmParameter.ValueChanged += OnInfillOptimizationAlgorithmChanged;
296      InfillOptimizationAlgorithm.ProblemChanged += InfillOptimizationProblemChanged;
297      InfillCriterionParameter.ValueChanged += InfillCriterionChanged;
298      InitialSamplesParameter.ToStringChanged += OnInitialSamplesChanged;
299
300
301    }
302    private void DeregisterEventhandlers() {
303      RegressionAlgorithmParameter.ValueChanged -= OnModelAlgorithmChanged;
304      InfillOptimizationAlgorithmParameter.ValueChanged -= OnInfillOptimizationAlgorithmChanged;
305      InfillOptimizationAlgorithm.ProblemChanged -= InfillOptimizationProblemChanged;
306      InfillCriterionParameter.ValueChanged -= InfillCriterionChanged;
307      InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
308    }
309    private void OnInfillOptimizationAlgorithmChanged(object sender, EventArgs args) {
310      SetInfillProblem();
311      InfillOptimizationAlgorithm.ProblemChanged -= InfillOptimizationProblemChanged;
312      InfillOptimizationAlgorithm.ProblemChanged += InfillOptimizationProblemChanged;
313    }
314    private void InfillOptimizationProblemChanged(object sender, EventArgs e) {
315      InfillOptimizationAlgorithm.ProblemChanged -= InfillOptimizationProblemChanged;
316      SetInfillProblem();
317      InfillOptimizationAlgorithm.ProblemChanged += InfillOptimizationProblemChanged;
318    }
319    private void InfillCriterionChanged(object sender, EventArgs e) {
320      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
321      if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm has no InfillProblem. Troubles with Eventhandling?");
322      infillProblem.InfillCriterion = InfillCriterion;
323    }
324    private void OnModelAlgorithmChanged(object sender, EventArgs args) {
325      RegressionAlgorithm.Problem = new RegressionProblem();
326    }
327    private void OnInitialSamplesChanged(object sender, EventArgs args) {
328      IRegressionProblemData samplesData = null;
329      using (var importTypeDialog = new RegressionImportTypeDialog()) {
330        if (importTypeDialog.ShowDialog() != DialogResult.OK) return;
331        samplesData = new RegressionCSVInstanceProvider().ImportData(importTypeDialog.Path, importTypeDialog.ImportType, importTypeDialog.CSVFormat);
332        InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
333        InitialSamplesParameter.Value.Value = importTypeDialog.Path;
334        InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
335
336      }
337
338
339
340      var solutions = new RealVector[samplesData.Dataset.Rows];
341      var qualities = new double[samplesData.Dataset.Rows];
342      var inputVariables = samplesData.InputVariables.CheckedItems.ToArray();
343      for (var i = 0; i < solutions.Length; i++) {
344        qualities[i] = samplesData.Dataset.GetDoubleValue(samplesData.TargetVariable, i);
345        solutions[i] = new RealVector(inputVariables.Length);
346        for (var j = 0; j < inputVariables.Length; j++) solutions[i][j] = samplesData.Dataset.GetDoubleValue(inputVariables[j].Value.Value, i);
347      }
348
349      SetInitialSamples(solutions, qualities);
350
351    }
352
353    protected override void OnExecutionTimeChanged() {
354      base.OnExecutionTimeChanged();
355      if (CancellationTokenSource == null) return;
356      if (MaximumRuntime == -1) return;
357      if (ExecutionTime.TotalSeconds > MaximumRuntime) CancellationTokenSource.Cancel();
358    }
359    public override void Pause() {
360      if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started || InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Paused) InfillOptimizationAlgorithm.Stop();
361      if (RegressionAlgorithm.ExecutionState == ExecutionState.Started || RegressionAlgorithm.ExecutionState == ExecutionState.Paused) RegressionAlgorithm.Stop();
362      base.Pause();
363    }
364    public override void Stop() {
365      if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started || InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Paused) InfillOptimizationAlgorithm.Stop();
366      if (RegressionAlgorithm.ExecutionState == ExecutionState.Started || RegressionAlgorithm.ExecutionState == ExecutionState.Paused) RegressionAlgorithm.Stop();
367      base.Stop();
368    }
369    protected override void OnProblemChanged() {
370      base.OnProblemChanged();
371      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
372      if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm has no InfillProblem. Troubles with Eventhandling?");
373      infillProblem.Problem = Problem;
374    }
375    #endregion
376
377    #region helpers
378    private void SetInfillProblem() {
379      var infillProblem = new InfillProblem {
380        InfillCriterion = InfillCriterion,
381        Problem = Problem
382      };
383      InfillOptimizationAlgorithm.Problem = infillProblem;
384    }
385    private IRegressionSolution BuildModel(CancellationToken cancellationToken) {
386      var dataset = EgoUtilities.GetDataSet(DataSamples.ToList(), RemoveDuplicates);
387      var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output");
388      problemdata.TrainingPartition.Start = 0;
389      problemdata.TrainingPartition.End = dataset.Rows;
390      problemdata.TestPartition.Start = dataset.Rows;
391      problemdata.TestPartition.End = dataset.Rows;
392
393      //train
394      var problem = (RegressionProblem)RegressionAlgorithm.Problem;
395      problem.ProblemDataParameter.Value = problemdata;
396      var i = 0;
397      IRegressionSolution solution = null;
398
399      while (solution == null && i++ < 100) {  //TODO: Question: Why does GP degenerate to NaN so often? Answer: There is not even the slightest mitigation strategy for "almost duplicates" that ill-condition the covariance matrix.
400        var results = EgoUtilities.SyncRunSubAlgorithm(RegressionAlgorithm, Random.Next(int.MaxValue));
401        solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault();
402        cancellationToken.ThrowIfCancellationRequested();
403      }
404
405      //try creating a model with old hyperparameters and new dataset;
406      var gp = RegressionAlgorithm as GaussianProcessRegression;
407      var oldmodel = ResultsModel as GaussianProcessRegressionSolution;
408      if (gp != null && oldmodel != null) {
409        var n = Samples.First().Item1.Length;
410        var mean = (IMeanFunction)oldmodel.Model.MeanFunction.Clone();
411        var cov = (ICovarianceFunction)oldmodel.Model.CovarianceFunction.Clone();
412        if (mean.GetNumberOfParameters(n) != 0 || cov.GetNumberOfParameters(n) != 0) throw new ArgumentException("DEBUG: assumption about fixed paramters wrong");
413        var noise = 0.0;
414        double[] hyp = { noise };
415        try {
416          var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable,
417            problemdata.AllowedInputVariables, problemdata.TrainingIndices, hyp, mean, cov);
418          model.FixParameters();
419          var sol = new GaussianProcessRegressionSolution(model, problemdata);
420          if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) {
421            solution = sol;
422          }
423        }
424        catch (ArgumentException) { }
425      }
426
427
428      if (!ResultsQualities.Rows.ContainsKey("DEBUG: Degenerates")) ResultsQualities.Rows.Add(new DataRow("DEBUG: Degenerates"));
429      var row = ResultsQualities.Rows["DEBUG: Degenerates"];
430      row.Values.Add(i - 1);
431      if (solution == null) Results.Add(new Result("Status", new StringValue("The Algorithm did not return a Model")));
432      else {
433        if (!ResultsQualities.Rows.ContainsKey("DEBUG: RMSE")) ResultsQualities.Rows.Add(new DataRow("DEBUG: RMSE"));
434        row = ResultsQualities.Rows["DEBUG: RMSE"];
435        row.Values.Add(Math.Sqrt(solution.TrainingMeanSquaredError));
436      }
437
438      RegressionAlgorithm.Runs.Clear();
439      return solution;
440    }
441    private RealVector OptimizeInfillProblem(CancellationToken cancellationToken) {
442      //parameterize and check InfillProblem
443      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
444      if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm does not have InfillProblem. Problem with Eventhandling?");
445      if (infillProblem.InfillCriterion != InfillCriterion) throw new ArgumentException("InfillCiriterion for Problem is not correct. Problem with Eventhandling?");
446      if (infillProblem.Problem != Problem) throw new ArgumentException("Expensive real problem is not correctly set in InfillProblem. Problem with Eventhandling?");
447      InfillCriterion.Initialize(ResultsModel, Problem.Maximization, infillProblem.Encoding);
448
449      RealVector bestVector = null;
450      var bestValue = infillProblem.Maximization ? double.NegativeInfinity : double.PositiveInfinity;
451
452      for (var i = 0; i < InfillOptimizationRestarts; i++) {
453        //optimize
454        var res = EgoUtilities.SyncRunSubAlgorithm(InfillOptimizationAlgorithm, Random.Next(int.MaxValue));
455        cancellationToken.ThrowIfCancellationRequested();
456        //extract results
457        if (!res.ContainsKey(BestInfillSolutionResultName)) throw new ArgumentException("The InfillOptimizationAlgorithm did not return a best solution");
458        var v = res[BestInfillSolutionResultName].Value as RealVector;
459        if (!res.ContainsKey(BestInfillQualityResultName)) throw new ArgumentException("The InfillOptimizationAlgorithm did not return a best quality");
460        var d = res[BestInfillQualityResultName].Value as DoubleValue;
461        if (d == null || v == null) throw new ArgumentException("The InfillOptimizationAlgorithm did not return the expected result types");
462
463        //check for improvement
464        if (infillProblem.Maximization != d.Value > bestValue) continue;
465        bestValue = d.Value;
466        bestVector = v;
467      }
468
469      InfillOptimizationAlgorithm.Runs.Clear();
470      return bestVector;
471    }
472    private Tuple<RealVector, double> Evaluate(RealVector point) {
473      return new Tuple<RealVector, double>(point, Problem.Evaluate(GetIndividual(point), Random));
474    }
475    private void Analyze() {
476      ResultsEvaluations = Samples.Count;
477      var max = Samples.ArgMax(x => x.Item2);
478      var min = Samples.ArgMin(x => x.Item2);
479      var best = Samples[Problem.Maximization ? max : min];
480      ResultsBestQuality = best.Item2;
481      ResultsBestSolution = best.Item1;
482      ResultsQualitiesBest.Values.Add(ResultsBestQuality);
483      ResultsQualitiesIteration.Values.Add(Samples[Samples.Count - 1].Item2);
484      ResultsQualitiesWorst.Values.Add(Samples[Problem.Maximization ? min : max].Item2);
485      Problem.Analyze(Samples.Select(x => GetIndividual(x.Item1)).ToArray(), Samples.Select(x => x.Item2).ToArray(), Results, Random);
486    }
487    private Individual GetIndividual(RealVector r) {
488      var scope = new Scope();
489      scope.Variables.Add(new Variable(Problem.Encoding.Name, r));
490      return new SingleEncodingIndividual(Problem.Encoding, scope);
491    }
492    #endregion
493  }
494}
Note: See TracBrowser for help on using the repository browser.