- Timestamp:
- 04/04/17 12:37:52 (8 years ago)
- Location:
- branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO
- Files:
-
- 4 added
- 10 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/EfficientGlobalOptimizationAlgorithm.cs
r14768 r14818 24 24 using System.Linq; 25 25 using System.Threading; 26 using System.Windows.Forms; 26 27 using HeuristicLab.Algorithms.DataAnalysis; 27 28 using HeuristicLab.Analysis; … … 34 35 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 35 36 using HeuristicLab.Problems.DataAnalysis; 37 using HeuristicLab.Problems.Instances.DataAnalysis; 38 using HeuristicLab.Problems.Instances.DataAnalysis.Views; 36 39 using HeuristicLab.Random; 37 40 … … 57 60 private const string InfillOptimizationRestartsParameterName = "InfillOptimizationRestarts"; 58 61 private const string InitialEvaluationsParameterName = "Initial Evaluations"; 59 private const string Maximum IterationsParameterName = "Maximum Iterations";62 private const string MaximumEvaluationsParameterName = "Maximum Evaluations"; 60 63 private const string MaximumRuntimeParameterName = "Maximum Runtime"; 61 64 private const string RegressionAlgorithmParameterName = "RegressionAlgorithm"; … … 63 66 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 64 67 private const string MaximalDataSetSizeParameterName = "MaximalDataSetSize"; 68 private const string RemoveDuplicatesParamterName = "RemoveDuplicates"; 69 private const string InitialSamplesParameterName = "InitialSamplesFile"; 70 private const string BaselineVectorParameterName = "BaselineVector"; 65 71 #endregion 66 72 … … 88 94 public IFixedValueParameter<IntValue> InfillOptimizationRestartsParemeter => Parameters[InfillOptimizationRestartsParameterName] as IFixedValueParameter<IntValue>; 89 95 public IFixedValueParameter<IntValue> InitialEvaluationsParameter => Parameters[InitialEvaluationsParameterName] as IFixedValueParameter<IntValue>; 90 public IFixedValueParameter<IntValue> Maximum IterationsParameter => Parameters[MaximumIterationsParameterName] as IFixedValueParameter<IntValue>;96 public IFixedValueParameter<IntValue> MaximumEvaluationsParameter => Parameters[MaximumEvaluationsParameterName] as IFixedValueParameter<IntValue>; 91 97 public IFixedValueParameter<IntValue> MaximumRuntimeParameter => Parameters[MaximumRuntimeParameterName] as IFixedValueParameter<IntValue>; 92 98 public IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionAlgorithmParameter => Parameters[RegressionAlgorithmParameterName] as IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>>; … … 94 100 public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter => Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>; 95 101 public IFixedValueParameter<IntValue> MaximalDataSetSizeParameter => Parameters[MaximalDataSetSizeParameterName] as IFixedValueParameter<IntValue>; 102 public IFixedValueParameter<BoolValue> RemoveDuplicatesParameter => Parameters[RemoveDuplicatesParamterName] as IFixedValueParameter<BoolValue>; 103 104 public IFixedValueParameter<FileValue> InitialSamplesParameter => Parameters[InitialSamplesParameterName] as IFixedValueParameter<FileValue>; 105 106 public IValueParameter<RealVector> BaselineVectorParameter => Parameters[BaselineVectorParameterName] as IValueParameter<RealVector>; 96 107 #endregion 97 108 98 109 #region Properties 99 110 100 public int GenerationSize 101 { 102 get { return GenerationSizeParemeter.Value.Value; } 103 } 104 public IInfillCriterion InfillCriterion 105 { 106 get { return InfillCriterionParameter.Value; } 107 } 108 public Algorithm InfillOptimizationAlgorithm 109 { 110 get { return InfillOptimizationAlgorithmParameter.Value; } 111 } 112 public int InfillOptimizationRestarts 113 { 114 get { return InfillOptimizationRestartsParemeter.Value.Value; } 115 } 116 public int InitialEvaluations 117 { 118 get { return InitialEvaluationsParameter.Value.Value; } 119 } 120 public int MaximumIterations 121 { 122 get { return MaximumIterationsParameter.Value.Value; } 123 } 124 public int MaximumRuntime 125 { 126 get { return MaximumRuntimeParameter.Value.Value; } 127 } 128 public IDataAnalysisAlgorithm<IRegressionProblem> RegressionAlgorithm 129 { 130 get { return RegressionAlgorithmParameter.Value; } 131 } 132 public int Seed 133 { 134 get { return SeedParameter.Value.Value; } 135 } 136 public bool SetSeedRandomly 137 { 138 get { return SetSeedRandomlyParameter.Value.Value; } 139 } 140 public int MaximalDatasetSize 141 { 142 get { return MaximalDataSetSizeParameter.Value.Value; } 143 } 144 145 private IEnumerable<Tuple<RealVector, double>> DataSamples 146 { 147 get 148 { 149 return Samples.Count > MaximalDatasetSize && MaximalDatasetSize > 0 150 ? Samples.Skip(Samples.Count - MaximalDatasetSize) 151 : Samples; 152 } 153 } 154 111 public int GenerationSize => GenerationSizeParemeter.Value.Value; 112 public IInfillCriterion InfillCriterion => InfillCriterionParameter.Value; 113 public Algorithm InfillOptimizationAlgorithm => InfillOptimizationAlgorithmParameter.Value; 114 public int InfillOptimizationRestarts => InfillOptimizationRestartsParemeter.Value.Value; 115 public int InitialEvaluations => InitialEvaluationsParameter.Value.Value; 116 public int MaximumEvaluations => MaximumEvaluationsParameter.Value.Value; 117 public int MaximumRuntime => MaximumRuntimeParameter.Value.Value; 118 public IDataAnalysisAlgorithm<IRegressionProblem> RegressionAlgorithm => RegressionAlgorithmParameter.Value; 119 public int Seed => SeedParameter.Value.Value; 120 public bool SetSeedRandomly => SetSeedRandomlyParameter.Value.Value; 121 public int MaximalDatasetSize => MaximalDataSetSizeParameter.Value.Value; 122 private IEnumerable<Tuple<RealVector, double>> DataSamples => Samples.Count > MaximalDatasetSize && MaximalDatasetSize > 0 123 ? Samples.Skip(Samples.Count - MaximalDatasetSize) 124 : Samples; 125 126 private bool RemoveDuplicates => RemoveDuplicatesParameter.Value.Value; 127 private RealVector BaselineVector => BaselineVectorParameter.Value; 155 128 #endregion 156 129 … … 185 158 set { ((IntValue)Results[IterationsResultName].Value).Value = value; } 186 159 } 187 private DataTable ResultsQualities 188 { 189 get { return (DataTable)Results[QualitiesChartResultName].Value; } 190 } 191 private DataRow ResultsQualitiesBest 192 { 193 get { return ResultsQualities.Rows[BestQualitiesRowResultName]; } 194 } 195 private DataRow ResultsQualitiesWorst 196 { 197 get { return ResultsQualities.Rows[WorstQualitiesRowResultName]; } 198 } 199 private DataRow ResultsQualitiesIteration 200 { 201 get { return ResultsQualities.Rows[CurrentQualitiesRowResultName]; } 202 } 160 private DataTable ResultsQualities => (DataTable)Results[QualitiesChartResultName].Value; 161 private DataRow ResultsQualitiesBest => ResultsQualities.Rows[BestQualitiesRowResultName]; 162 163 private DataRow ResultsQualitiesWorst => ResultsQualities.Rows[WorstQualitiesRowResultName]; 164 165 private DataRow ResultsQualitiesIteration => ResultsQualities.Rows[CurrentQualitiesRowResultName]; 166 203 167 private IRegressionSolution ResultsModel 204 168 { … … 232 196 }; 233 197 model.CovarianceFunctionParameter.Value = new CovarianceRationalQuadraticIso(); 234 Parameters.Add(new FixedValueParameter<IntValue>(Maximum IterationsParameterName, "", new IntValue(int.MaxValue)));198 Parameters.Add(new FixedValueParameter<IntValue>(MaximumEvaluationsParameterName, "", new IntValue(int.MaxValue))); 235 199 Parameters.Add(new FixedValueParameter<IntValue>(InitialEvaluationsParameterName, "", new IntValue(10))); 236 200 Parameters.Add(new FixedValueParameter<IntValue>(MaximumRuntimeParameterName, "The maximum runtime in seconds after which the algorithm stops. Use -1 to specify no limit for the runtime", new IntValue(3600))); … … 242 206 Parameters.Add(new FixedValueParameter<IntValue>(GenerationSizeParameterName, "Number points that are sampled every iteration (stadard EGO: 1)", new IntValue(1))); 243 207 Parameters.Add(new ConstrainedValueParameter<IInfillCriterion>(InfillCriterionParameterName, "Decision what value should decide the next sample")); 208 InfillCriterionParameter.ValidValues.Add(new AugmentedExpectedImprovement()); 244 209 InfillCriterionParameter.ValidValues.Add(new ExpectedImprovement()); 245 210 InfillCriterionParameter.ValidValues.Add(new ExpectedQuality()); 246 InfillCriterionParameter.ValidValues.Add(new ConfidenceBound()); 211 var eqi = new ExpectedQuantileImprovement(); 212 InfillCriterionParameter.ValidValues.Add(eqi); 213 eqi.MaxEvaluationsParameter.Value = MaximumEvaluationsParameter.Value; 214 InfillCriterionParameter.ValidValues.Add(new MinimalQuantileCriterium()); 215 InfillCriterionParameter.ValidValues.Add(new RobustImprovement()); 216 InfillCriterionParameter.ValidValues.Add(new PluginExpectedImprovement()); 247 217 Parameters.Add(new FixedValueParameter<IntValue>(MaximalDataSetSizeParameterName, "The maximum number of sample points used to generate the model. Set 0 or less to use always all samples ", new IntValue(-1))); 248 218 Parameters.Add(new FixedValueParameter<BoolValue>(RemoveDuplicatesParamterName, "Wether duplicate samples should be replaced by a single sample with an averaged quality. This GREATLY decreases the chance of ill conditioned models (unbuildable models) but is not theoretically sound as the model ignores the increasing certainty in this region")); 219 Parameters.Add(new FixedValueParameter<FileValue>(InitialSamplesParameterName, "The file specifying some initial samples used to jump start the algorithm. These samples are not counted as evaluations. If InitialEvaluations is more than the samples specified in the file, the rest is uniformly random generated and evaluated.", new FileValue())); 220 Parameters.Add(new ValueParameter<RealVector>(BaselineVectorParameterName, "A vector used to create a baseline, this vector is evaluated once and is not part of the modeling process (has no influence on algorithm performance)")); 249 221 SetInfillProblem(); 250 222 RegisterEventhandlers(); … … 257 229 var enc = Problem.Encoding as RealVectorEncoding; 258 230 if (enc == null) throw new ArgumentException("The EGO algorithm can only be applied to RealVectorEncodings"); 231 var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem; 232 if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm has no InfillProblem. Troubles with Eventhandling?"); 233 infillProblem.Problem = Problem; 234 259 235 260 236 //random … … 274 250 table.Rows.Add(new DataRow(CurrentQualitiesRowResultName)); 275 251 Results.Add(new Result(QualitiesChartResultName, table)); 252 if (BaselineVector != null && BaselineVector.Length == enc.Length) 253 Results.Add(new Result("BaselineValue", new DoubleValue(Evaluate(BaselineVector).Item2))); 276 254 277 255 //initial samples … … 288 266 289 267 protected override void Run(CancellationToken cancellationToken) { 290 for (ResultsIterations = 0; Results Iterations < MaximumIterations; ResultsIterations++) {268 for (ResultsIterations = 0; ResultsEvaluations < MaximumEvaluations; ResultsIterations++) { 291 269 try { 292 270 ResultsModel = BuildModel(cancellationToken); 271 if (ResultsModel == null) break; 293 272 cancellationToken.ThrowIfCancellationRequested(); 294 273 for (var i = 0; i < GenerationSize; i++) { 295 var samplepoint = OptimizeInfillProblem( );274 var samplepoint = OptimizeInfillProblem(cancellationToken); 296 275 var sample = Evaluate(samplepoint); 297 276 Samples.Add(sample); … … 317 296 InfillOptimizationAlgorithm.ProblemChanged += InfillOptimizationProblemChanged; 318 297 InfillCriterionParameter.ValueChanged += InfillCriterionChanged; 298 InitialSamplesParameter.ToStringChanged += OnInitialSamplesChanged; 299 319 300 320 301 } … … 324 305 InfillOptimizationAlgorithm.ProblemChanged -= InfillOptimizationProblemChanged; 325 306 InfillCriterionParameter.ValueChanged -= InfillCriterionChanged; 307 InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged; 326 308 } 327 309 private void OnInfillOptimizationAlgorithmChanged(object sender, EventArgs args) { … … 343 325 RegressionAlgorithm.Problem = new RegressionProblem(); 344 326 } 327 private void OnInitialSamplesChanged(object sender, EventArgs args) { 328 IRegressionProblemData samplesData = null; 329 using (var importTypeDialog = new RegressionImportTypeDialog()) { 330 if (importTypeDialog.ShowDialog() != DialogResult.OK) return; 331 samplesData = new RegressionCSVInstanceProvider().ImportData(importTypeDialog.Path, importTypeDialog.ImportType, importTypeDialog.CSVFormat); 332 InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged; 333 InitialSamplesParameter.Value.Value = importTypeDialog.Path; 334 InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged; 335 336 } 337 338 339 340 var solutions = new RealVector[samplesData.Dataset.Rows]; 341 var qualities = new double[samplesData.Dataset.Rows]; 342 var inputVariables = samplesData.InputVariables.CheckedItems.ToArray(); 343 for (var i = 0; i < solutions.Length; i++) { 344 qualities[i] = samplesData.Dataset.GetDoubleValue(samplesData.TargetVariable, i); 345 solutions[i] = new RealVector(inputVariables.Length); 346 for (var j = 0; j < inputVariables.Length; j++) solutions[i][j] = samplesData.Dataset.GetDoubleValue(inputVariables[j].Value.Value, i); 347 } 348 349 SetInitialSamples(solutions, qualities); 350 351 } 352 345 353 protected override void OnExecutionTimeChanged() { 346 354 base.OnExecutionTimeChanged(); … … 350 358 } 351 359 public override void Pause() { 352 if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started ) InfillOptimizationAlgorithm.Pause();353 if (RegressionAlgorithm.ExecutionState == ExecutionState.Started ) RegressionAlgorithm.Pause();360 if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started || InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Paused) InfillOptimizationAlgorithm.Stop(); 361 if (RegressionAlgorithm.ExecutionState == ExecutionState.Started || RegressionAlgorithm.ExecutionState == ExecutionState.Paused) RegressionAlgorithm.Stop(); 354 362 base.Pause(); 355 363 } … … 376 384 } 377 385 private IRegressionSolution BuildModel(CancellationToken cancellationToken) { 378 var dataset = EgoUtilities.GetDataSet(DataSamples.ToList() );386 var dataset = EgoUtilities.GetDataSet(DataSamples.ToList(), RemoveDuplicates); 379 387 var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output"); 380 388 problemdata.TrainingPartition.Start = 0; … … 388 396 var i = 0; 389 397 IRegressionSolution solution = null; 390 double r2 = 0; 391 while ( (solution == null || RegressionAlgorithm is GaussianProcessRegression && r2 < 0.95) && i++ < 100) { //TODO: ask why GP degenerates to NaN so often398 399 while (solution == null && i++ < 100) { //TODO: Question: Why does GP degenerate to NaN so often? Answer: There is not even the slightest mitigation strategy for "almost duplicates" that ill-condition the covariance matrix. 392 400 var results = EgoUtilities.SyncRunSubAlgorithm(RegressionAlgorithm, Random.Next(int.MaxValue)); 393 401 solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault(); 394 r2 = solution?.TrainingRSquared ?? 0;395 402 cancellationToken.ThrowIfCancellationRequested(); 396 403 } 397 404 398 if (solution == null) throw new ArgumentException("The Algorithm did not return a Model"); 405 //try creating a model with old hyperparameters and new dataset; 406 var gp = RegressionAlgorithm as GaussianProcessRegression; 407 var oldmodel = ResultsModel as GaussianProcessRegressionSolution; 408 if (gp != null && oldmodel != null) { 409 var n = Samples.First().Item1.Length; 410 var mean = (IMeanFunction)oldmodel.Model.MeanFunction.Clone(); 411 var cov = (ICovarianceFunction)oldmodel.Model.CovarianceFunction.Clone(); 412 if (mean.GetNumberOfParameters(n) != 0 || cov.GetNumberOfParameters(n) != 0) throw new ArgumentException("DEBUG: assumption about fixed paramters wrong"); 413 var noise = 0.0; 414 double[] hyp = { noise }; 415 try { 416 var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable, 417 problemdata.AllowedInputVariables, problemdata.TrainingIndices, hyp, mean, cov); 418 model.FixParameters(); 419 var sol = new GaussianProcessRegressionSolution(model, problemdata); 420 if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) { 421 solution = sol; 422 } 423 } 424 catch (ArgumentException) { } 425 } 426 427 428 if (!ResultsQualities.Rows.ContainsKey("DEBUG: Degenerates")) ResultsQualities.Rows.Add(new DataRow("DEBUG: Degenerates")); 429 var row = ResultsQualities.Rows["DEBUG: Degenerates"]; 430 row.Values.Add(i - 1); 431 if (solution == null) Results.Add(new Result("Status", new StringValue("The Algorithm did not return a Model"))); 432 else { 433 if (!ResultsQualities.Rows.ContainsKey("DEBUG: RMSE")) ResultsQualities.Rows.Add(new DataRow("DEBUG: RMSE")); 434 row = ResultsQualities.Rows["DEBUG: RMSE"]; 435 row.Values.Add(Math.Sqrt(solution.TrainingMeanSquaredError)); 436 } 437 399 438 RegressionAlgorithm.Runs.Clear(); 400 439 return solution; 401 440 } 402 private RealVector OptimizeInfillProblem( ) {441 private RealVector OptimizeInfillProblem(CancellationToken cancellationToken) { 403 442 //parameterize and check InfillProblem 404 443 var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem; … … 406 445 if (infillProblem.InfillCriterion != InfillCriterion) throw new ArgumentException("InfillCiriterion for Problem is not correct. Problem with Eventhandling?"); 407 446 if (infillProblem.Problem != Problem) throw new ArgumentException("Expensive real problem is not correctly set in InfillProblem. Problem with Eventhandling?"); 408 infillProblem.RegressionSolution = ResultsModel; 409 if (MaximalDatasetSize > 0 && MaximalDatasetSize < Samples.Count) { infillProblem.Encoding.Bounds = EgoUtilities.GetBoundingBox(DataSamples.Select(x => x.Item1)); } 447 InfillCriterion.Initialize(ResultsModel, Problem.Maximization, infillProblem.Encoding); 410 448 411 449 RealVector bestVector = null; … … 415 453 //optimize 416 454 var res = EgoUtilities.SyncRunSubAlgorithm(InfillOptimizationAlgorithm, Random.Next(int.MaxValue)); 417 455 cancellationToken.ThrowIfCancellationRequested(); 418 456 //extract results 419 457 if (!res.ContainsKey(BestInfillSolutionResultName)) throw new ArgumentException("The InfillOptimizationAlgorithm did not return a best solution"); -
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/EgoUtilities.cs
r14768 r14818 94 94 95 95 public static double GetEstimation(this IRegressionModel model, RealVector r) { 96 var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) } );96 var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) }, false); 97 97 return model.GetEstimatedValues(dataset, new[] { 0 }).First(); 98 98 } 99 99 public static double GetVariance(this IConfidenceRegressionModel model, RealVector r) { 100 var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) } );100 var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) }, false); 101 101 return model.GetEstimatedVariances(dataset, new[] { 0 }).First(); 102 102 } 103 103 104 public static Dataset GetDataSet(IReadOnlyList<Tuple<RealVector, double>> samples) { 105 var n = samples[0].Item1.Length + 1; 106 var data = new double[samples.Count, n]; 107 var names = new string[n - 1]; 108 for (var i = 0; i < n; i++) 109 if (i < names.Length) { 110 names[i] = "input" + i; 111 for (var j = 0; j < samples.Count; j++) data[j, i] = samples[j].Item1[i]; 112 } else 113 for (var j = 0; j < samples.Count; j++) data[j, n - 1] = samples[j].Item2; 104 105 public static double GetDoubleValue(this IDataset dataset, int i, int j) { 106 return dataset.GetDoubleValue("input" + j, i); 107 } 108 public static Dataset GetDataSet(IReadOnlyList<Tuple<RealVector, double>> samples, bool removeDuplicates) { 109 if (removeDuplicates) 110 samples = RemoveDuplicates(samples); //TODO duplicates require heteroskedasticity in Models 111 112 113 var dimensions = samples[0].Item1.Length + 1; 114 var data = new double[samples.Count, dimensions]; 115 var names = new string[dimensions - 1]; 116 for (var i = 0; i < names.Length; i++) names[i] = "input" + i; 117 118 for (var j = 0; j < samples.Count; j++) { 119 for (var i = 0; i < names.Length; i++) data[j, i] = samples[j].Item1[i]; 120 data[j, dimensions - 1] = samples[j].Item2; 121 122 } 123 124 114 125 return new Dataset(names.Concat(new[] { "output" }).ToArray(), data); 115 126 } 127 128 private static IReadOnlyList<Tuple<RealVector, double>> RemoveDuplicates(IReadOnlyList<Tuple<RealVector, double>> samples) { 129 var res = new List<Tuple<RealVector, double, int>>(); 130 131 foreach (var sample in samples) { 132 if (res.Count == 0) { 133 res.Add(new Tuple<RealVector, double, int>(sample.Item1, sample.Item2, 1)); 134 continue; 135 } 136 137 var index = res.ArgMin(x => Euclidian(sample.Item1, x.Item1)); 138 var d = Euclidian(res[index].Item1, sample.Item1); 139 if (d > 0.0001) 140 res.Add(new Tuple<RealVector, double, int>(sample.Item1, sample.Item2, 1)); 141 else { 142 var t = res[index]; 143 res.RemoveAt(index); 144 res.Add(new Tuple<RealVector, double, int>(t.Item1, t.Item2 + sample.Item2, t.Item3 + 1)); 145 } 146 } 147 return res.Select(x => new Tuple<RealVector, double>(x.Item1, x.Item2 / x.Item3)).ToArray(); 148 } 149 150 private static double Euclidian(IEnumerable<double> a, IEnumerable<double> b) { 151 return Math.Sqrt(a.Zip(b, (d, d1) => d - d1).Sum(d => d * d)); 152 } 153 116 154 public static DoubleMatrix GetBoundingBox(IEnumerable<RealVector> vectors) { 117 155 DoubleMatrix res = null; … … 128 166 return res; 129 167 } 168 169 130 170 } 131 171 } -
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/HeuristicLab.Algorithms.EGO-3.4.csproj
r14768 r14818 92 92 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath> 93 93 </Reference> 94 <Reference Include="HeuristicLab.Problems.Instances.DataAnalysis-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 95 <SpecificVersion>False</SpecificVersion> 96 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances.DataAnalysis-3.3.dll</HintPath> 97 </Reference> 98 <Reference Include="HeuristicLab.Problems.Instances.DataAnalysis.Views-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 99 <SpecificVersion>False</SpecificVersion> 100 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances.DataAnalysis.Views-3.3.dll</HintPath> 101 </Reference> 94 102 <Reference Include="HeuristicLab.Random-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 95 103 <SpecificVersion>False</SpecificVersion> … … 98 106 <Reference Include="System" /> 99 107 <Reference Include="System.Core" /> 108 <Reference Include="System.Windows.Forms" /> 100 109 <Reference Include="System.Xml.Linq" /> 101 110 <Reference Include="System.Data.DataSetExtensions" /> … … 108 117 <Compile Include="EgoUtilities.cs" /> 109 118 <Compile Include="EfficientGlobalOptimizationAlgorithm.cs" /> 119 <Compile Include="InfillCriteria\AugmentedExpectedImprovement.cs" /> 120 <Compile Include="InfillCriteria\ExpectedQuantileImprovement.cs" /> 121 <Compile Include="InfillCriteria\PluginExpectedImprovement.cs" /> 110 122 <Compile Include="InfillCriteria\RobustImprovement.cs" /> 111 <Compile Include="InfillCriteria\ ConfidenceBound.cs" />123 <Compile Include="InfillCriteria\MinimalQuantileCriterium.cs" /> 112 124 <Compile Include="InfillCriteria\ExpectedQuality.cs" /> 113 125 <Compile Include="InfillCriteria\InfillCriterionBase.cs" /> -
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/InfillCriteria/ExpectedImprovement.cs
r14768 r14818 42 42 43 43 #region ParameterProperties 44 public IFixedValueParameter<DoubleValue> ExploitationWeightParameter 45 { 46 get { return Parameters[ExploitationWeightParameterName] as IFixedValueParameter<DoubleValue>; } 47 } 44 public IFixedValueParameter<DoubleValue> ExploitationWeightParameter => Parameters[ExploitationWeightParameterName] as IFixedValueParameter<DoubleValue>; 45 48 46 #endregion 49 47 50 48 #region Properties 51 pr ivate double ExploitationWeight52 { 53 get { return ExploitationWeightParameter.Value.Value; }54 }49 protected double ExploitationWeight => ExploitationWeightParameter.Value.Value; 50 51 [Storable] 52 protected double YMin; 55 53 #endregion 56 54 57 55 #region HL-Constructors, Serialization and Cloning 58 56 [StorableConstructor] 59 pr ivateExpectedImprovement(bool deserializing) : base(deserializing) { }57 protected ExpectedImprovement(bool deserializing) : base(deserializing) { } 60 58 [StorableHook(HookType.AfterDeserialization)] 61 59 private void AfterDeserialization() { 62 60 RegisterEventhandlers(); 63 61 } 64 pr ivateExpectedImprovement(ExpectedImprovement original, Cloner cloner) : base(original, cloner) {62 protected ExpectedImprovement(ExpectedImprovement original, Cloner cloner) : base(original, cloner) { 65 63 RegisterEventhandlers(); 66 64 } … … 74 72 #endregion 75 73 76 public override double Evaluate(IRegressionSolution solution, RealVector vector, bool maximization) { 77 if (maximization) throw new NotImplementedException("Expected Improvement for maximization not yet implemented"); 78 var model = solution.Model as IConfidenceRegressionModel; 79 if (model == null) throw new ArgumentException("can not calculate EI without confidence measure"); 74 public override double Evaluate(RealVector vector) { 75 var model = RegressionSolution.Model as IConfidenceRegressionModel; 80 76 var yhat = model.GetEstimation(vector); 81 var min = solution.ProblemData.TargetVariableTrainingValues.Min();82 77 var s = Math.Sqrt(model.GetVariance(vector)); 83 return GetEstimatedImprovement( min, yhat, s, ExploitationWeight);78 return GetEstimatedImprovement(YMin, yhat, s, ExploitationWeight); 84 79 } 85 80 86 public override bool Maximization( bool expensiveProblemMaximization) {81 public override bool Maximization() { 87 82 return true; 83 } 84 85 protected override void Initialize() { 86 if (ExpensiveMaximization) throw new NotImplementedException("Expected Improvement for maximization not yet implemented"); 87 var model = RegressionSolution.Model as IConfidenceRegressionModel; 88 if (model == null) throw new ArgumentException("can not calculate EI without confidence measure"); 89 YMin = RegressionSolution.ProblemData.TargetVariableTrainingValues.Min(); 88 90 } 89 91 … … 104 106 105 107 #region Helpers 106 pr ivatestatic double GetEstimatedImprovement(double ymin, double yhat, double s, double w) {108 protected static double GetEstimatedImprovement(double ymin, double yhat, double s, double w) { 107 109 if (Math.Abs(s) < double.Epsilon) return 0; 108 110 var val = (ymin - yhat) / s; -
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/InfillCriteria/ExpectedQuality.cs
r14741 r14818 24 24 using HeuristicLab.Encodings.RealVectorEncoding; 25 25 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 26 using HeuristicLab.Problems.DataAnalysis;27 26 28 27 // ReSharper disable once CheckNamespace … … 42 41 #endregion 43 42 44 public override double Evaluate(IRegressionSolution solution, RealVector vector, bool maximization) { 45 return solution.Model.GetEstimation(vector); 43 public override double Evaluate(RealVector vector) { 44 return RegressionSolution.Model.GetEstimation(vector); 45 } 46 47 public override bool Maximization() { 48 return ExpensiveMaximization; 49 } 50 51 protected override void Initialize() { 46 52 } 47 53 } -
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/InfillCriteria/InfillCriterionBase.cs
r14741 r14818 23 23 using HeuristicLab.Core; 24 24 using HeuristicLab.Encodings.RealVectorEncoding; 25 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 25 26 using HeuristicLab.Problems.DataAnalysis; 26 27 27 28 // ReSharper disable once CheckNamespace 28 29 namespace HeuristicLab.Algorithms.EGO { 29 30 [StorableClass] 30 31 public abstract class InfillCriterionBase : ParameterizedNamedItem, IInfillCriterion { 31 32 33 [Storable] 34 protected IRegressionSolution RegressionSolution; 35 [Storable] 36 protected bool ExpensiveMaximization; 37 [Storable] 38 protected RealVectorEncoding Encoding; 39 32 40 protected InfillCriterionBase(bool deserializing) : base(deserializing) { } 33 protected InfillCriterionBase(InfillCriterionBase original, Cloner cloner) : base(original, cloner) { } 41 42 protected InfillCriterionBase(InfillCriterionBase original, Cloner cloner) : base(original, cloner) { 43 RegressionSolution = cloner.Clone(original.RegressionSolution); 44 ExpensiveMaximization = original.ExpensiveMaximization; 45 Encoding = cloner.Clone(original.Encoding); 46 } 34 47 protected InfillCriterionBase() { } 35 48 36 public abstract double Evaluate(IRegressionSolution model, RealVector vector, bool maximization); 37 public virtual bool Maximization(bool expensiveProblemMaximization) { 38 return expensiveProblemMaximization; 49 public abstract double Evaluate(RealVector vector); 50 public abstract bool Maximization(); 51 52 public void Initialize(IRegressionSolution solution, bool expensiveMaximization, RealVectorEncoding encoding) { 53 RegressionSolution = solution; 54 ExpensiveMaximization = expensiveMaximization; 55 Encoding = encoding; 56 Initialize(); 39 57 } 58 59 protected abstract void Initialize(); 60 40 61 } 41 62 } -
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/InfillCriteria/RobustImprovement.cs
r14768 r14818 21 21 22 22 using System; 23 using System.Collections.Generic; 24 using System.Linq; 23 25 using HeuristicLab.Common; 24 26 using HeuristicLab.Core; … … 33 35 34 36 [StorableClass] 35 [Item(" ConfidenceBoundMeassure", "Adding or Subtracting the variance * factor to the model estimation")]36 public class ConfidenceBound: InfillCriterionBase {37 [Item("RobustImprovementMeassure", "Adding or Subtracting the variance * factor to the model estimation")] 38 public class RobustImprovement : InfillCriterionBase { 37 39 38 40 #region ParameterNames 39 private const string ConfidenceWeightParameterName = "ConfidenceWeight";41 private const string KParameterName = "NearestNeighbours"; 40 42 #endregion 41 43 42 44 #region ParameterProperties 43 public IFixedValueParameter<DoubleValue> ConfidenceWeightParameter 44 { 45 get { return Parameters[ConfidenceWeightParameterName] as IFixedValueParameter<DoubleValue>; } 46 } 45 public IFixedValueParameter<IntValue> KParameter => Parameters[KParameterName] as IFixedValueParameter<IntValue>; 46 47 47 #endregion 48 48 49 49 #region Properties 50 private double ConfidenceWeight 51 { 52 get { return ConfidenceWeightParameter.Value.Value; } 53 } 50 private int K => KParameter.Value.Value; 51 52 [Storable] 53 private double MaxSolutionDist; 54 55 [Storable] 56 //TODO use VP-Tree instead of array 57 private RealVector[] Data; 54 58 #endregion 55 59 56 60 #region HL-Constructors, Serialization and Cloning 57 61 [StorableConstructor] 58 private ConfidenceBound(bool deserializing) : base(deserializing) { } 59 private ConfidenceBound(ConfidenceBound original, Cloner cloner) : base(original, cloner) { } 60 public ConfidenceBound() { 61 Parameters.Add(new FixedValueParameter<DoubleValue>(ConfidenceWeightParameterName, "A value between 0 and 1 indicating the focus on exploration (0) or exploitation (1)", new DoubleValue(0.5))); 62 private RobustImprovement(bool deserializing) : base(deserializing) { } 63 64 private RobustImprovement(RobustImprovement original, Cloner cloner) : base(original, cloner) { 65 MaxSolutionDist = original.MaxSolutionDist; 66 Data = original.Data != null ? original.Data.Select(cloner.Clone).ToArray() : null; 67 } 68 public RobustImprovement() { 69 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "A value larger than 0 indicating how many nearestNeighbours shall be used to determine the RI meassure", new IntValue(3))); 62 70 } 63 71 public override IDeepCloneable Clone(Cloner cloner) { 64 return new ConfidenceBound(this, cloner);72 return new RobustImprovement(this, cloner); 65 73 } 66 74 #endregion 67 75 68 public override double Evaluate(IRegressionSolution solution, RealVector vector, bool maximization) { 69 var model = solution.Model as IConfidenceRegressionModel; 76 77 public override double Evaluate(RealVector vector) { 78 List<RealVector> nearestNeighbours; 79 List<double> distances; 80 Search(vector, K, out nearestNeighbours, out distances); 81 var distVectors = nearestNeighbours.Select(x => Minus(x, vector)).ToList(); 82 var sum = 0.0; 83 var wsum = 1.0; //weights for angular distance 84 var used = new HashSet<RealVector>(); 85 foreach (var distVector in distVectors) { 86 var d = Math.Pow(distances[used.Count], 0.5); 87 if (used.Count == 0) { 88 sum += d; 89 } else { 90 var w = used.Select(x => Angular(distVector, x)).Min(); 91 sum += w * d; 92 wsum += w; 93 } 94 used.Add(distVector); 95 } 96 sum /= wsum * MaxSolutionDist; //normalize 97 return sum; 98 } 99 public override bool Maximization() { 100 return ExpensiveMaximization; 101 } 102 protected override void Initialize() { 103 var model = RegressionSolution.Model as IConfidenceRegressionModel; 70 104 if (model == null) throw new ArgumentException("can not calculate EI without confidence measure"); 71 var yhat = model.GetEstimation(vector); 72 var s = Math.Sqrt(model.GetVariance(vector)) * ConfidenceWeight; 73 return maximization ? yhat + s : yhat - s; 105 Data = new RealVector[RegressionSolution.ProblemData.Dataset.Rows]; 106 for (var i = 0; i < Data.Length; i++) { 107 Data[i] = new RealVector(Encoding.Length); 108 for (var j = 0; j < Encoding.Length; j++) 109 Data[i][j] = RegressionSolution.ProblemData.Dataset.GetDoubleValue(i, j); 110 } 111 112 var maxSolution = new double[Encoding.Length]; 113 var minSolution = new double[Encoding.Length]; 114 for (var i = 0; i < Encoding.Length; i++) { 115 var j = i % Encoding.Bounds.Rows; 116 maxSolution[i] = Encoding.Bounds[j, 1]; 117 minSolution[i] = Encoding.Bounds[j, 0]; 118 } 119 MaxSolutionDist = Euclidian(maxSolution, minSolution) / Data.Length; 74 120 } 75 121 122 #region Helpers 123 private static double Euclidian(IEnumerable<double> a, IEnumerable<double> b) { 124 return Math.Sqrt(a.Zip(b, (d, d1) => d - d1).Sum(d => d * d)); 125 } 126 private static double Angular(RealVector a, RealVector b) { 127 var innerProduct = a.Zip(b, (x, y) => x * y).Sum(); 128 var res = Math.Acos(innerProduct / (Norm(a) * Norm(b))) / Math.PI; 129 return double.IsNaN(res) ? 0 : res; 130 } 131 private static double Norm(IEnumerable<double> a) { 132 return Math.Sqrt(a.Sum(d => d * d)); 133 } 134 private static RealVector Minus(RealVector a, RealVector b) { 135 return new RealVector(a.Zip(b, (d, d1) => d - d1).ToArray()); 136 } 137 138 private void Search(RealVector vector, int k, out List<RealVector> nearestNeighbours, out List<double> distances) { 139 var neighbours = new SortedList<double, RealVector>(new DuplicateKeyComparer<double>()); 140 foreach (var n in Data) neighbours.Add(Euclidian(n, vector), n); 141 nearestNeighbours = new List<RealVector>(); 142 143 distances = new List<double>(); 144 foreach (var entry in neighbours) { 145 nearestNeighbours.Add(entry.Value); 146 distances.Add(entry.Key); 147 if (distances.Count == k) break; 148 } 149 } 150 #endregion 151 152 public class DuplicateKeyComparer<TKey> : IComparer<TKey> where TKey : IComparable { 153 public int Compare(TKey x, TKey y) { 154 var result = x.CompareTo(y); 155 return result == 0 ? 1 : result; 156 } 157 } 76 158 } 77 159 } -
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/Interfaces/IInfillCriterion.cs
r14741 r14818 27 27 namespace HeuristicLab.Algorithms.EGO { 28 28 public interface IInfillCriterion : INamedItem { 29 double Evaluate(IRegressionSolution model, RealVector vector, bool maximization); 30 bool Maximization(bool expensiveProblemMaximization); 29 double Evaluate(RealVector vector); 30 bool Maximization(); 31 void Initialize(IRegressionSolution solution, bool expensiveMaximization, RealVectorEncoding encoding); 31 32 } 32 33 } -
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/Interfaces/ISurrogateAlgorithm.cs
r14768 r14818 26 26 namespace HeuristicLab.Algorithms.EGO { 27 27 public interface ISurrogateAlgorithm<T> : IAlgorithm where T : IDeepCloneable { 28 void SetInitialSamples(T[] i, double[] qualities);28 void SetInitialSamples(T[] solutions, double[] qualities); 29 29 } 30 30 } -
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/Problems/InfillProblem.cs
r14768 r14818 27 27 using HeuristicLab.Optimization; 28 28 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 29 using HeuristicLab.Problems.DataAnalysis;30 29 31 30 namespace HeuristicLab.Algorithms.EGO { … … 34 33 public sealed class InfillProblem : SingleObjectiveBasicProblem<RealVectorEncoding> { 35 34 36 public override bool Maximization => true; 35 public override bool Maximization => true; //This is necessary because algorithms do not expect the maximization to change 37 36 38 37 #region Properties; … … 41 40 [Storable] 42 41 private SingleObjectiveBasicProblem<IEncoding> problem; 43 [Storable]44 private IRegressionSolution regressionSolution;45 46 42 47 43 public IInfillCriterion InfillCriterion … … 57 53 problem = value; 58 54 if (problem == null) return; 59 Encoding = problem.Encoding as RealVectorEncoding; 55 var enc = problem.Encoding as RealVectorEncoding; 56 if (enc == null) throw new ArgumentException("EGO can not be performed on non-RealVectorEncodings"); 57 Encoding = enc; 60 58 SolutionCreator = new UniformRandomRealVectorCreator();//ignore Problem specific Solution Creation 61 if (Encoding == null) throw new ArgumentException("EGO can not be performed on non-RealVectorEncodings"); 59 62 60 } 63 }64 public IRegressionSolution RegressionSolution65 {66 get { return regressionSolution; }67 set { regressionSolution = value; }68 61 } 69 62 #endregion … … 75 68 infillCriterion = cloner.Clone(original.InfillCriterion); 76 69 problem = cloner.Clone(original.Problem); 77 regressionSolution = cloner.Clone(original.regressionSolution);78 70 } 79 71 public InfillProblem() { } … … 82 74 83 75 public override double Evaluate(Individual individual, IRandom r) { 84 var q = InfillCriterion.Evaluate( RegressionSolution, individual.RealVector(), Problem.Maximization);85 return InfillCriterion.Maximization( Problem.Maximization) ? q : -q; //This is necessary because Maximization is not supposed to change on a normal problem76 var q = InfillCriterion.Evaluate(individual.RealVector()); 77 return InfillCriterion.Maximization() ? q : -q; 86 78 } 87 79 public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) { 88 80 base.Analyze(individuals, qualities, results, random); 89 81 var best = qualities.ArgMax(x => x); 90 var qnew = InfillCriterion.Maximization(Problem.Maximization) ? qualities[best] : -qualities[best]; 91 //var best = Maximization ? qualities.ArgMax(x => x) : qualities.ArgMin(x => x); 82 var qnew = InfillCriterion.Maximization() ? qualities[best] : -qualities[best]; 92 83 const string qname = EfficientGlobalOptimizationAlgorithm.BestInfillQualityResultName; 93 84 const string sname = EfficientGlobalOptimizationAlgorithm.BestInfillSolutionResultName; … … 99 90 var qold = results[qname].Value as DoubleValue; 100 91 if (qold == null) throw new ArgumentException("Old best quality is not a double value. Conflicting Analyzers?"); 101 if (qold.Value >= qnew == InfillCriterion.Maximization( Problem.Maximization)) return;92 if (qold.Value >= qnew == InfillCriterion.Maximization()) return; 102 93 results[sname].Value = (RealVector)individuals[best].RealVector().Clone(); 103 94 qold.Value = qnew; 104 105 95 } 106 96 }
Note: See TracChangeset
for help on using the changeset viewer.