Changeset 14818 for branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/EfficientGlobalOptimizationAlgorithm.cs
- Timestamp:
- 04/04/17 12:37:52 (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/EfficientGlobalOptimizationAlgorithm.cs
r14768 r14818 24 24 using System.Linq; 25 25 using System.Threading; 26 using System.Windows.Forms; 26 27 using HeuristicLab.Algorithms.DataAnalysis; 27 28 using HeuristicLab.Analysis; … … 34 35 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 35 36 using HeuristicLab.Problems.DataAnalysis; 37 using HeuristicLab.Problems.Instances.DataAnalysis; 38 using HeuristicLab.Problems.Instances.DataAnalysis.Views; 36 39 using HeuristicLab.Random; 37 40 … … 57 60 private const string InfillOptimizationRestartsParameterName = "InfillOptimizationRestarts"; 58 61 private const string InitialEvaluationsParameterName = "Initial Evaluations"; 59 private const string Maximum IterationsParameterName = "Maximum Iterations";62 private const string MaximumEvaluationsParameterName = "Maximum Evaluations"; 60 63 private const string MaximumRuntimeParameterName = "Maximum Runtime"; 61 64 private const string RegressionAlgorithmParameterName = "RegressionAlgorithm"; … … 63 66 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 64 67 private const string MaximalDataSetSizeParameterName = "MaximalDataSetSize"; 68 private const string RemoveDuplicatesParamterName = "RemoveDuplicates"; 69 private const string InitialSamplesParameterName = "InitialSamplesFile"; 70 private const string BaselineVectorParameterName = "BaselineVector"; 65 71 #endregion 66 72 … … 88 94 public IFixedValueParameter<IntValue> InfillOptimizationRestartsParemeter => Parameters[InfillOptimizationRestartsParameterName] as IFixedValueParameter<IntValue>; 89 95 public IFixedValueParameter<IntValue> InitialEvaluationsParameter => Parameters[InitialEvaluationsParameterName] as IFixedValueParameter<IntValue>; 90 public IFixedValueParameter<IntValue> Maximum IterationsParameter => Parameters[MaximumIterationsParameterName] as IFixedValueParameter<IntValue>;96 public IFixedValueParameter<IntValue> MaximumEvaluationsParameter => Parameters[MaximumEvaluationsParameterName] as IFixedValueParameter<IntValue>; 91 97 public IFixedValueParameter<IntValue> MaximumRuntimeParameter => Parameters[MaximumRuntimeParameterName] as IFixedValueParameter<IntValue>; 92 98 public IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionAlgorithmParameter => Parameters[RegressionAlgorithmParameterName] as IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>>; … … 94 100 public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter => Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>; 95 101 public IFixedValueParameter<IntValue> MaximalDataSetSizeParameter => Parameters[MaximalDataSetSizeParameterName] as IFixedValueParameter<IntValue>; 102 public IFixedValueParameter<BoolValue> RemoveDuplicatesParameter => Parameters[RemoveDuplicatesParamterName] as IFixedValueParameter<BoolValue>; 103 104 public IFixedValueParameter<FileValue> InitialSamplesParameter => Parameters[InitialSamplesParameterName] as IFixedValueParameter<FileValue>; 105 106 public IValueParameter<RealVector> BaselineVectorParameter => Parameters[BaselineVectorParameterName] as IValueParameter<RealVector>; 96 107 #endregion 97 108 98 109 #region Properties 99 110 100 public int GenerationSize 101 { 102 get { return GenerationSizeParemeter.Value.Value; } 103 } 104 public IInfillCriterion InfillCriterion 105 { 106 get { return InfillCriterionParameter.Value; } 107 } 108 public Algorithm InfillOptimizationAlgorithm 109 { 110 get { return InfillOptimizationAlgorithmParameter.Value; } 111 } 112 public int InfillOptimizationRestarts 113 { 114 get { return InfillOptimizationRestartsParemeter.Value.Value; } 115 } 116 public int InitialEvaluations 117 { 118 get { return InitialEvaluationsParameter.Value.Value; } 119 } 120 public int MaximumIterations 121 { 122 get { return MaximumIterationsParameter.Value.Value; } 123 } 124 public int MaximumRuntime 125 { 126 get { return MaximumRuntimeParameter.Value.Value; } 127 } 128 public IDataAnalysisAlgorithm<IRegressionProblem> RegressionAlgorithm 129 { 130 get { return RegressionAlgorithmParameter.Value; } 131 } 132 public int Seed 133 { 134 get { return SeedParameter.Value.Value; } 135 } 136 public bool SetSeedRandomly 137 { 138 get { return SetSeedRandomlyParameter.Value.Value; } 139 } 140 public int MaximalDatasetSize 141 { 142 get { return MaximalDataSetSizeParameter.Value.Value; } 143 } 144 145 private IEnumerable<Tuple<RealVector, double>> DataSamples 146 { 147 get 148 { 149 return Samples.Count > MaximalDatasetSize && MaximalDatasetSize > 0 150 ? Samples.Skip(Samples.Count - MaximalDatasetSize) 151 : Samples; 152 } 153 } 154 111 public int GenerationSize => GenerationSizeParemeter.Value.Value; 112 public IInfillCriterion InfillCriterion => InfillCriterionParameter.Value; 113 public Algorithm InfillOptimizationAlgorithm => InfillOptimizationAlgorithmParameter.Value; 114 public int InfillOptimizationRestarts => InfillOptimizationRestartsParemeter.Value.Value; 115 public int InitialEvaluations => InitialEvaluationsParameter.Value.Value; 116 public int MaximumEvaluations => MaximumEvaluationsParameter.Value.Value; 117 public int MaximumRuntime => MaximumRuntimeParameter.Value.Value; 118 public IDataAnalysisAlgorithm<IRegressionProblem> RegressionAlgorithm => RegressionAlgorithmParameter.Value; 119 public int Seed => SeedParameter.Value.Value; 120 public bool SetSeedRandomly => SetSeedRandomlyParameter.Value.Value; 121 public int MaximalDatasetSize => MaximalDataSetSizeParameter.Value.Value; 122 private IEnumerable<Tuple<RealVector, double>> DataSamples => Samples.Count > MaximalDatasetSize && MaximalDatasetSize > 0 123 ? Samples.Skip(Samples.Count - MaximalDatasetSize) 124 : Samples; 125 126 private bool RemoveDuplicates => RemoveDuplicatesParameter.Value.Value; 127 private RealVector BaselineVector => BaselineVectorParameter.Value; 155 128 #endregion 156 129 … … 185 158 set { ((IntValue)Results[IterationsResultName].Value).Value = value; } 186 159 } 187 private DataTable ResultsQualities 188 { 189 get { return (DataTable)Results[QualitiesChartResultName].Value; } 190 } 191 private DataRow ResultsQualitiesBest 192 { 193 get { return ResultsQualities.Rows[BestQualitiesRowResultName]; } 194 } 195 private DataRow ResultsQualitiesWorst 196 { 197 get { return ResultsQualities.Rows[WorstQualitiesRowResultName]; } 198 } 199 private DataRow ResultsQualitiesIteration 200 { 201 get { return ResultsQualities.Rows[CurrentQualitiesRowResultName]; } 202 } 160 private DataTable ResultsQualities => (DataTable)Results[QualitiesChartResultName].Value; 161 private DataRow ResultsQualitiesBest => ResultsQualities.Rows[BestQualitiesRowResultName]; 162 163 private DataRow ResultsQualitiesWorst => ResultsQualities.Rows[WorstQualitiesRowResultName]; 164 165 private DataRow ResultsQualitiesIteration => ResultsQualities.Rows[CurrentQualitiesRowResultName]; 166 203 167 private IRegressionSolution ResultsModel 204 168 { … … 232 196 }; 233 197 model.CovarianceFunctionParameter.Value = new CovarianceRationalQuadraticIso(); 234 Parameters.Add(new FixedValueParameter<IntValue>(Maximum IterationsParameterName, "", new IntValue(int.MaxValue)));198 Parameters.Add(new FixedValueParameter<IntValue>(MaximumEvaluationsParameterName, "", new IntValue(int.MaxValue))); 235 199 Parameters.Add(new FixedValueParameter<IntValue>(InitialEvaluationsParameterName, "", new IntValue(10))); 236 200 Parameters.Add(new FixedValueParameter<IntValue>(MaximumRuntimeParameterName, "The maximum runtime in seconds after which the algorithm stops. Use -1 to specify no limit for the runtime", new IntValue(3600))); … … 242 206 Parameters.Add(new FixedValueParameter<IntValue>(GenerationSizeParameterName, "Number points that are sampled every iteration (stadard EGO: 1)", new IntValue(1))); 243 207 Parameters.Add(new ConstrainedValueParameter<IInfillCriterion>(InfillCriterionParameterName, "Decision what value should decide the next sample")); 208 InfillCriterionParameter.ValidValues.Add(new AugmentedExpectedImprovement()); 244 209 InfillCriterionParameter.ValidValues.Add(new ExpectedImprovement()); 245 210 InfillCriterionParameter.ValidValues.Add(new ExpectedQuality()); 246 InfillCriterionParameter.ValidValues.Add(new ConfidenceBound()); 211 var eqi = new ExpectedQuantileImprovement(); 212 InfillCriterionParameter.ValidValues.Add(eqi); 213 eqi.MaxEvaluationsParameter.Value = MaximumEvaluationsParameter.Value; 214 InfillCriterionParameter.ValidValues.Add(new MinimalQuantileCriterium()); 215 InfillCriterionParameter.ValidValues.Add(new RobustImprovement()); 216 InfillCriterionParameter.ValidValues.Add(new PluginExpectedImprovement()); 247 217 Parameters.Add(new FixedValueParameter<IntValue>(MaximalDataSetSizeParameterName, "The maximum number of sample points used to generate the model. Set 0 or less to use always all samples ", new IntValue(-1))); 248 218 Parameters.Add(new FixedValueParameter<BoolValue>(RemoveDuplicatesParamterName, "Wether duplicate samples should be replaced by a single sample with an averaged quality. This GREATLY decreases the chance of ill conditioned models (unbuildable models) but is not theoretically sound as the model ignores the increasing certainty in this region")); 219 Parameters.Add(new FixedValueParameter<FileValue>(InitialSamplesParameterName, "The file specifying some initial samples used to jump start the algorithm. These samples are not counted as evaluations. If InitialEvaluations is more than the samples specified in the file, the rest is uniformly random generated and evaluated.", new FileValue())); 220 Parameters.Add(new ValueParameter<RealVector>(BaselineVectorParameterName, "A vector used to create a baseline, this vector is evaluated once and is not part of the modeling process (has no influence on algorithm performance)")); 249 221 SetInfillProblem(); 250 222 RegisterEventhandlers(); … … 257 229 var enc = Problem.Encoding as RealVectorEncoding; 258 230 if (enc == null) throw new ArgumentException("The EGO algorithm can only be applied to RealVectorEncodings"); 231 var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem; 232 if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm has no InfillProblem. Troubles with Eventhandling?"); 233 infillProblem.Problem = Problem; 234 259 235 260 236 //random … … 274 250 table.Rows.Add(new DataRow(CurrentQualitiesRowResultName)); 275 251 Results.Add(new Result(QualitiesChartResultName, table)); 252 if (BaselineVector != null && BaselineVector.Length == enc.Length) 253 Results.Add(new Result("BaselineValue", new DoubleValue(Evaluate(BaselineVector).Item2))); 276 254 277 255 //initial samples … … 288 266 289 267 protected override void Run(CancellationToken cancellationToken) { 290 for (ResultsIterations = 0; Results Iterations < MaximumIterations; ResultsIterations++) {268 for (ResultsIterations = 0; ResultsEvaluations < MaximumEvaluations; ResultsIterations++) { 291 269 try { 292 270 ResultsModel = BuildModel(cancellationToken); 271 if (ResultsModel == null) break; 293 272 cancellationToken.ThrowIfCancellationRequested(); 294 273 for (var i = 0; i < GenerationSize; i++) { 295 var samplepoint = OptimizeInfillProblem( );274 var samplepoint = OptimizeInfillProblem(cancellationToken); 296 275 var sample = Evaluate(samplepoint); 297 276 Samples.Add(sample); … … 317 296 InfillOptimizationAlgorithm.ProblemChanged += InfillOptimizationProblemChanged; 318 297 InfillCriterionParameter.ValueChanged += InfillCriterionChanged; 298 InitialSamplesParameter.ToStringChanged += OnInitialSamplesChanged; 299 319 300 320 301 } … … 324 305 InfillOptimizationAlgorithm.ProblemChanged -= InfillOptimizationProblemChanged; 325 306 InfillCriterionParameter.ValueChanged -= InfillCriterionChanged; 307 InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged; 326 308 } 327 309 private void OnInfillOptimizationAlgorithmChanged(object sender, EventArgs args) { … … 343 325 RegressionAlgorithm.Problem = new RegressionProblem(); 344 326 } 327 private void OnInitialSamplesChanged(object sender, EventArgs args) { 328 IRegressionProblemData samplesData = null; 329 using (var importTypeDialog = new RegressionImportTypeDialog()) { 330 if (importTypeDialog.ShowDialog() != DialogResult.OK) return; 331 samplesData = new RegressionCSVInstanceProvider().ImportData(importTypeDialog.Path, importTypeDialog.ImportType, importTypeDialog.CSVFormat); 332 InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged; 333 InitialSamplesParameter.Value.Value = importTypeDialog.Path; 334 InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged; 335 336 } 337 338 339 340 var solutions = new RealVector[samplesData.Dataset.Rows]; 341 var qualities = new double[samplesData.Dataset.Rows]; 342 var inputVariables = samplesData.InputVariables.CheckedItems.ToArray(); 343 for (var i = 0; i < solutions.Length; i++) { 344 qualities[i] = samplesData.Dataset.GetDoubleValue(samplesData.TargetVariable, i); 345 solutions[i] = new RealVector(inputVariables.Length); 346 for (var j = 0; j < inputVariables.Length; j++) solutions[i][j] = samplesData.Dataset.GetDoubleValue(inputVariables[j].Value.Value, i); 347 } 348 349 SetInitialSamples(solutions, qualities); 350 351 } 352 345 353 protected override void OnExecutionTimeChanged() { 346 354 base.OnExecutionTimeChanged(); … … 350 358 } 351 359 public override void Pause() { 352 if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started ) InfillOptimizationAlgorithm.Pause();353 if (RegressionAlgorithm.ExecutionState == ExecutionState.Started ) RegressionAlgorithm.Pause();360 if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started || InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Paused) InfillOptimizationAlgorithm.Stop(); 361 if (RegressionAlgorithm.ExecutionState == ExecutionState.Started || RegressionAlgorithm.ExecutionState == ExecutionState.Paused) RegressionAlgorithm.Stop(); 354 362 base.Pause(); 355 363 } … … 376 384 } 377 385 private IRegressionSolution BuildModel(CancellationToken cancellationToken) { 378 var dataset = EgoUtilities.GetDataSet(DataSamples.ToList() );386 var dataset = EgoUtilities.GetDataSet(DataSamples.ToList(), RemoveDuplicates); 379 387 var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output"); 380 388 problemdata.TrainingPartition.Start = 0; … … 388 396 var i = 0; 389 397 IRegressionSolution solution = null; 390 double r2 = 0; 391 while ( (solution == null || RegressionAlgorithm is GaussianProcessRegression && r2 < 0.95) && i++ < 100) { //TODO: ask why GP degenerates to NaN so often398 399 while (solution == null && i++ < 100) { //TODO: Question: Why does GP degenerate to NaN so often? Answer: There is not even the slightest mitigation strategy for "almost duplicates" that ill-condition the covariance matrix. 392 400 var results = EgoUtilities.SyncRunSubAlgorithm(RegressionAlgorithm, Random.Next(int.MaxValue)); 393 401 solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault(); 394 r2 = solution?.TrainingRSquared ?? 0;395 402 cancellationToken.ThrowIfCancellationRequested(); 396 403 } 397 404 398 if (solution == null) throw new ArgumentException("The Algorithm did not return a Model"); 405 //try creating a model with old hyperparameters and new dataset; 406 var gp = RegressionAlgorithm as GaussianProcessRegression; 407 var oldmodel = ResultsModel as GaussianProcessRegressionSolution; 408 if (gp != null && oldmodel != null) { 409 var n = Samples.First().Item1.Length; 410 var mean = (IMeanFunction)oldmodel.Model.MeanFunction.Clone(); 411 var cov = (ICovarianceFunction)oldmodel.Model.CovarianceFunction.Clone(); 412 if (mean.GetNumberOfParameters(n) != 0 || cov.GetNumberOfParameters(n) != 0) throw new ArgumentException("DEBUG: assumption about fixed paramters wrong"); 413 var noise = 0.0; 414 double[] hyp = { noise }; 415 try { 416 var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable, 417 problemdata.AllowedInputVariables, problemdata.TrainingIndices, hyp, mean, cov); 418 model.FixParameters(); 419 var sol = new GaussianProcessRegressionSolution(model, problemdata); 420 if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) { 421 solution = sol; 422 } 423 } 424 catch (ArgumentException) { } 425 } 426 427 428 if (!ResultsQualities.Rows.ContainsKey("DEBUG: Degenerates")) ResultsQualities.Rows.Add(new DataRow("DEBUG: Degenerates")); 429 var row = ResultsQualities.Rows["DEBUG: Degenerates"]; 430 row.Values.Add(i - 1); 431 if (solution == null) Results.Add(new Result("Status", new StringValue("The Algorithm did not return a Model"))); 432 else { 433 if (!ResultsQualities.Rows.ContainsKey("DEBUG: RMSE")) ResultsQualities.Rows.Add(new DataRow("DEBUG: RMSE")); 434 row = ResultsQualities.Rows["DEBUG: RMSE"]; 435 row.Values.Add(Math.Sqrt(solution.TrainingMeanSquaredError)); 436 } 437 399 438 RegressionAlgorithm.Runs.Clear(); 400 439 return solution; 401 440 } 402 private RealVector OptimizeInfillProblem( ) {441 private RealVector OptimizeInfillProblem(CancellationToken cancellationToken) { 403 442 //parameterize and check InfillProblem 404 443 var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem; … … 406 445 if (infillProblem.InfillCriterion != InfillCriterion) throw new ArgumentException("InfillCiriterion for Problem is not correct. Problem with Eventhandling?"); 407 446 if (infillProblem.Problem != Problem) throw new ArgumentException("Expensive real problem is not correctly set in InfillProblem. Problem with Eventhandling?"); 408 infillProblem.RegressionSolution = ResultsModel; 409 if (MaximalDatasetSize > 0 && MaximalDatasetSize < Samples.Count) { infillProblem.Encoding.Bounds = EgoUtilities.GetBoundingBox(DataSamples.Select(x => x.Item1)); } 447 InfillCriterion.Initialize(ResultsModel, Problem.Maximization, infillProblem.Encoding); 410 448 411 449 RealVector bestVector = null; … … 415 453 //optimize 416 454 var res = EgoUtilities.SyncRunSubAlgorithm(InfillOptimizationAlgorithm, Random.Next(int.MaxValue)); 417 455 cancellationToken.ThrowIfCancellationRequested(); 418 456 //extract results 419 457 if (!res.ContainsKey(BestInfillSolutionResultName)) throw new ArgumentException("The InfillOptimizationAlgorithm did not return a best solution");
Note: See TracChangeset
for help on using the changeset viewer.