Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/04/17 12:37:52 (7 years ago)
Author:
bwerth
Message:

#2745 added several new InfillCriteria and moved Parameters from the InfillProblem to the Criteria themselves; added Sanitiy checks for GaussianProcessRegression

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/EfficientGlobalOptimizationAlgorithm.cs

    r14768 r14818  
    2424using System.Linq;
    2525using System.Threading;
     26using System.Windows.Forms;
    2627using HeuristicLab.Algorithms.DataAnalysis;
    2728using HeuristicLab.Analysis;
     
    3435using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3536using HeuristicLab.Problems.DataAnalysis;
     37using HeuristicLab.Problems.Instances.DataAnalysis;
     38using HeuristicLab.Problems.Instances.DataAnalysis.Views;
    3639using HeuristicLab.Random;
    3740
     
    5760    private const string InfillOptimizationRestartsParameterName = "InfillOptimizationRestarts";
    5861    private const string InitialEvaluationsParameterName = "Initial Evaluations";
    59     private const string MaximumIterationsParameterName = "Maximum Iterations";
     62    private const string MaximumEvaluationsParameterName = "Maximum Evaluations";
    6063    private const string MaximumRuntimeParameterName = "Maximum Runtime";
    6164    private const string RegressionAlgorithmParameterName = "RegressionAlgorithm";
     
    6366    private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
    6467    private const string MaximalDataSetSizeParameterName = "MaximalDataSetSize";
     68    private const string RemoveDuplicatesParamterName = "RemoveDuplicates";
     69    private const string InitialSamplesParameterName = "InitialSamplesFile";
     70    private const string BaselineVectorParameterName = "BaselineVector";
    6571    #endregion
    6672
     
    8894    public IFixedValueParameter<IntValue> InfillOptimizationRestartsParemeter => Parameters[InfillOptimizationRestartsParameterName] as IFixedValueParameter<IntValue>;
    8995    public IFixedValueParameter<IntValue> InitialEvaluationsParameter => Parameters[InitialEvaluationsParameterName] as IFixedValueParameter<IntValue>;
    90     public IFixedValueParameter<IntValue> MaximumIterationsParameter => Parameters[MaximumIterationsParameterName] as IFixedValueParameter<IntValue>;
     96    public IFixedValueParameter<IntValue> MaximumEvaluationsParameter => Parameters[MaximumEvaluationsParameterName] as IFixedValueParameter<IntValue>;
    9197    public IFixedValueParameter<IntValue> MaximumRuntimeParameter => Parameters[MaximumRuntimeParameterName] as IFixedValueParameter<IntValue>;
    9298    public IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionAlgorithmParameter => Parameters[RegressionAlgorithmParameterName] as IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>>;
     
    94100    public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter => Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>;
    95101    public IFixedValueParameter<IntValue> MaximalDataSetSizeParameter => Parameters[MaximalDataSetSizeParameterName] as IFixedValueParameter<IntValue>;
     102    public IFixedValueParameter<BoolValue> RemoveDuplicatesParameter => Parameters[RemoveDuplicatesParamterName] as IFixedValueParameter<BoolValue>;
     103
     104    public IFixedValueParameter<FileValue> InitialSamplesParameter => Parameters[InitialSamplesParameterName] as IFixedValueParameter<FileValue>;
     105
     106    public IValueParameter<RealVector> BaselineVectorParameter => Parameters[BaselineVectorParameterName] as IValueParameter<RealVector>;
    96107    #endregion
    97108
    98109    #region Properties
    99110
    100     public int GenerationSize
    101     {
    102       get { return GenerationSizeParemeter.Value.Value; }
    103     }
    104     public IInfillCriterion InfillCriterion
    105     {
    106       get { return InfillCriterionParameter.Value; }
    107     }
    108     public Algorithm InfillOptimizationAlgorithm
    109     {
    110       get { return InfillOptimizationAlgorithmParameter.Value; }
    111     }
    112     public int InfillOptimizationRestarts
    113     {
    114       get { return InfillOptimizationRestartsParemeter.Value.Value; }
    115     }
    116     public int InitialEvaluations
    117     {
    118       get { return InitialEvaluationsParameter.Value.Value; }
    119     }
    120     public int MaximumIterations
    121     {
    122       get { return MaximumIterationsParameter.Value.Value; }
    123     }
    124     public int MaximumRuntime
    125     {
    126       get { return MaximumRuntimeParameter.Value.Value; }
    127     }
    128     public IDataAnalysisAlgorithm<IRegressionProblem> RegressionAlgorithm
    129     {
    130       get { return RegressionAlgorithmParameter.Value; }
    131     }
    132     public int Seed
    133     {
    134       get { return SeedParameter.Value.Value; }
    135     }
    136     public bool SetSeedRandomly
    137     {
    138       get { return SetSeedRandomlyParameter.Value.Value; }
    139     }
    140     public int MaximalDatasetSize
    141     {
    142       get { return MaximalDataSetSizeParameter.Value.Value; }
    143     }
    144 
    145     private IEnumerable<Tuple<RealVector, double>> DataSamples
    146     {
    147       get
    148       {
    149         return Samples.Count > MaximalDatasetSize && MaximalDatasetSize > 0
    150           ? Samples.Skip(Samples.Count - MaximalDatasetSize)
    151           : Samples;
    152       }
    153     }
    154 
     111    public int GenerationSize => GenerationSizeParemeter.Value.Value;
     112    public IInfillCriterion InfillCriterion => InfillCriterionParameter.Value;
     113    public Algorithm InfillOptimizationAlgorithm => InfillOptimizationAlgorithmParameter.Value;
     114    public int InfillOptimizationRestarts => InfillOptimizationRestartsParemeter.Value.Value;
     115    public int InitialEvaluations => InitialEvaluationsParameter.Value.Value;
     116    public int MaximumEvaluations => MaximumEvaluationsParameter.Value.Value;
     117    public int MaximumRuntime => MaximumRuntimeParameter.Value.Value;
     118    public IDataAnalysisAlgorithm<IRegressionProblem> RegressionAlgorithm => RegressionAlgorithmParameter.Value;
     119    public int Seed => SeedParameter.Value.Value;
     120    public bool SetSeedRandomly => SetSeedRandomlyParameter.Value.Value;
     121    public int MaximalDatasetSize => MaximalDataSetSizeParameter.Value.Value;
     122    private IEnumerable<Tuple<RealVector, double>> DataSamples => Samples.Count > MaximalDatasetSize && MaximalDatasetSize > 0
     123      ? Samples.Skip(Samples.Count - MaximalDatasetSize)
     124      : Samples;
     125
     126    private bool RemoveDuplicates => RemoveDuplicatesParameter.Value.Value;
     127    private RealVector BaselineVector => BaselineVectorParameter.Value;
    155128    #endregion
    156129
     
    185158      set { ((IntValue)Results[IterationsResultName].Value).Value = value; }
    186159    }
    187     private DataTable ResultsQualities
    188     {
    189       get { return (DataTable)Results[QualitiesChartResultName].Value; }
    190     }
    191     private DataRow ResultsQualitiesBest
    192     {
    193       get { return ResultsQualities.Rows[BestQualitiesRowResultName]; }
    194     }
    195     private DataRow ResultsQualitiesWorst
    196     {
    197       get { return ResultsQualities.Rows[WorstQualitiesRowResultName]; }
    198     }
    199     private DataRow ResultsQualitiesIteration
    200     {
    201       get { return ResultsQualities.Rows[CurrentQualitiesRowResultName]; }
    202     }
     160    private DataTable ResultsQualities => (DataTable)Results[QualitiesChartResultName].Value;
     161    private DataRow ResultsQualitiesBest => ResultsQualities.Rows[BestQualitiesRowResultName];
     162
     163    private DataRow ResultsQualitiesWorst => ResultsQualities.Rows[WorstQualitiesRowResultName];
     164
     165    private DataRow ResultsQualitiesIteration => ResultsQualities.Rows[CurrentQualitiesRowResultName];
     166
    203167    private IRegressionSolution ResultsModel
    204168    {
     
    232196      };
    233197      model.CovarianceFunctionParameter.Value = new CovarianceRationalQuadraticIso();
    234       Parameters.Add(new FixedValueParameter<IntValue>(MaximumIterationsParameterName, "", new IntValue(int.MaxValue)));
     198      Parameters.Add(new FixedValueParameter<IntValue>(MaximumEvaluationsParameterName, "", new IntValue(int.MaxValue)));
    235199      Parameters.Add(new FixedValueParameter<IntValue>(InitialEvaluationsParameterName, "", new IntValue(10)));
    236200      Parameters.Add(new FixedValueParameter<IntValue>(MaximumRuntimeParameterName, "The maximum runtime in seconds after which the algorithm stops. Use -1 to specify no limit for the runtime", new IntValue(3600)));
     
    242206      Parameters.Add(new FixedValueParameter<IntValue>(GenerationSizeParameterName, "Number points that are sampled every iteration (stadard EGO: 1)", new IntValue(1)));
    243207      Parameters.Add(new ConstrainedValueParameter<IInfillCriterion>(InfillCriterionParameterName, "Decision what value should decide the next sample"));
     208      InfillCriterionParameter.ValidValues.Add(new AugmentedExpectedImprovement());
    244209      InfillCriterionParameter.ValidValues.Add(new ExpectedImprovement());
    245210      InfillCriterionParameter.ValidValues.Add(new ExpectedQuality());
    246       InfillCriterionParameter.ValidValues.Add(new ConfidenceBound());
     211      var eqi = new ExpectedQuantileImprovement();
     212      InfillCriterionParameter.ValidValues.Add(eqi);
     213      eqi.MaxEvaluationsParameter.Value = MaximumEvaluationsParameter.Value;
     214      InfillCriterionParameter.ValidValues.Add(new MinimalQuantileCriterium());
     215      InfillCriterionParameter.ValidValues.Add(new RobustImprovement());
     216      InfillCriterionParameter.ValidValues.Add(new PluginExpectedImprovement());
    247217      Parameters.Add(new FixedValueParameter<IntValue>(MaximalDataSetSizeParameterName, "The maximum number of sample points used to generate the model. Set 0 or less to use always all samples ", new IntValue(-1)));
    248 
     218      Parameters.Add(new FixedValueParameter<BoolValue>(RemoveDuplicatesParamterName, "Wether duplicate samples should be replaced by a single sample with an averaged quality. This GREATLY decreases the chance of ill conditioned models (unbuildable models) but is not theoretically sound as the model ignores the increasing certainty in this region"));
     219      Parameters.Add(new FixedValueParameter<FileValue>(InitialSamplesParameterName, "The file specifying some initial samples used to jump start the algorithm. These samples are not counted as evaluations. If InitialEvaluations is more than the samples specified in the file, the rest is uniformly random generated and evaluated.", new FileValue()));
     220      Parameters.Add(new ValueParameter<RealVector>(BaselineVectorParameterName, "A vector used to create a baseline, this vector is evaluated once and is not part of the modeling process (has no influence on algorithm performance)"));
    249221      SetInfillProblem();
    250222      RegisterEventhandlers();
     
    257229      var enc = Problem.Encoding as RealVectorEncoding;
    258230      if (enc == null) throw new ArgumentException("The EGO algorithm can only be applied to RealVectorEncodings");
     231      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
     232      if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm has no InfillProblem. Troubles with Eventhandling?");
     233      infillProblem.Problem = Problem;
     234
    259235
    260236      //random
     
    274250      table.Rows.Add(new DataRow(CurrentQualitiesRowResultName));
    275251      Results.Add(new Result(QualitiesChartResultName, table));
     252      if (BaselineVector != null && BaselineVector.Length == enc.Length)
     253        Results.Add(new Result("BaselineValue", new DoubleValue(Evaluate(BaselineVector).Item2)));
    276254
    277255      //initial samples
     
    288266
    289267    protected override void Run(CancellationToken cancellationToken) {
    290       for (ResultsIterations = 0; ResultsIterations < MaximumIterations; ResultsIterations++) {
     268      for (ResultsIterations = 0; ResultsEvaluations < MaximumEvaluations; ResultsIterations++) {
    291269        try {
    292270          ResultsModel = BuildModel(cancellationToken);
     271          if (ResultsModel == null) break;
    293272          cancellationToken.ThrowIfCancellationRequested();
    294273          for (var i = 0; i < GenerationSize; i++) {
    295             var samplepoint = OptimizeInfillProblem();
     274            var samplepoint = OptimizeInfillProblem(cancellationToken);
    296275            var sample = Evaluate(samplepoint);
    297276            Samples.Add(sample);
     
    317296      InfillOptimizationAlgorithm.ProblemChanged += InfillOptimizationProblemChanged;
    318297      InfillCriterionParameter.ValueChanged += InfillCriterionChanged;
     298      InitialSamplesParameter.ToStringChanged += OnInitialSamplesChanged;
     299
    319300
    320301    }
     
    324305      InfillOptimizationAlgorithm.ProblemChanged -= InfillOptimizationProblemChanged;
    325306      InfillCriterionParameter.ValueChanged -= InfillCriterionChanged;
     307      InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
    326308    }
    327309    private void OnInfillOptimizationAlgorithmChanged(object sender, EventArgs args) {
     
    343325      RegressionAlgorithm.Problem = new RegressionProblem();
    344326    }
     327    private void OnInitialSamplesChanged(object sender, EventArgs args) {
     328      IRegressionProblemData samplesData = null;
     329      using (var importTypeDialog = new RegressionImportTypeDialog()) {
     330        if (importTypeDialog.ShowDialog() != DialogResult.OK) return;
     331        samplesData = new RegressionCSVInstanceProvider().ImportData(importTypeDialog.Path, importTypeDialog.ImportType, importTypeDialog.CSVFormat);
     332        InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
     333        InitialSamplesParameter.Value.Value = importTypeDialog.Path;
     334        InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
     335
     336      }
     337
     338
     339
     340      var solutions = new RealVector[samplesData.Dataset.Rows];
     341      var qualities = new double[samplesData.Dataset.Rows];
     342      var inputVariables = samplesData.InputVariables.CheckedItems.ToArray();
     343      for (var i = 0; i < solutions.Length; i++) {
     344        qualities[i] = samplesData.Dataset.GetDoubleValue(samplesData.TargetVariable, i);
     345        solutions[i] = new RealVector(inputVariables.Length);
     346        for (var j = 0; j < inputVariables.Length; j++) solutions[i][j] = samplesData.Dataset.GetDoubleValue(inputVariables[j].Value.Value, i);
     347      }
     348
     349      SetInitialSamples(solutions, qualities);
     350
     351    }
     352
    345353    protected override void OnExecutionTimeChanged() {
    346354      base.OnExecutionTimeChanged();
     
    350358    }
    351359    public override void Pause() {
    352       if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started) InfillOptimizationAlgorithm.Pause();
    353       if (RegressionAlgorithm.ExecutionState == ExecutionState.Started) RegressionAlgorithm.Pause();
     360      if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started || InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Paused) InfillOptimizationAlgorithm.Stop();
     361      if (RegressionAlgorithm.ExecutionState == ExecutionState.Started || RegressionAlgorithm.ExecutionState == ExecutionState.Paused) RegressionAlgorithm.Stop();
    354362      base.Pause();
    355363    }
     
    376384    }
    377385    private IRegressionSolution BuildModel(CancellationToken cancellationToken) {
    378       var dataset = EgoUtilities.GetDataSet(DataSamples.ToList());
     386      var dataset = EgoUtilities.GetDataSet(DataSamples.ToList(), RemoveDuplicates);
    379387      var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output");
    380388      problemdata.TrainingPartition.Start = 0;
     
    388396      var i = 0;
    389397      IRegressionSolution solution = null;
    390       double r2 = 0;
    391       while ((solution == null || RegressionAlgorithm is GaussianProcessRegression && r2 < 0.95) && i++ < 100) {  //TODO: ask why GP degenerates to NaN so often
     398
     399      while (solution == null && i++ < 100) {  //TODO: Question: Why does GP degenerate to NaN so often? Answer: There is not even the slightest mitigation strategy for "almost duplicates" that ill-condition the covariance matrix.
    392400        var results = EgoUtilities.SyncRunSubAlgorithm(RegressionAlgorithm, Random.Next(int.MaxValue));
    393401        solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault();
    394         r2 = solution?.TrainingRSquared ?? 0;
    395402        cancellationToken.ThrowIfCancellationRequested();
    396403      }
    397404
    398       if (solution == null) throw new ArgumentException("The Algorithm did not return a Model");
     405      //try creating a model with old hyperparameters and new dataset;
     406      var gp = RegressionAlgorithm as GaussianProcessRegression;
     407      var oldmodel = ResultsModel as GaussianProcessRegressionSolution;
     408      if (gp != null && oldmodel != null) {
     409        var n = Samples.First().Item1.Length;
     410        var mean = (IMeanFunction)oldmodel.Model.MeanFunction.Clone();
     411        var cov = (ICovarianceFunction)oldmodel.Model.CovarianceFunction.Clone();
     412        if (mean.GetNumberOfParameters(n) != 0 || cov.GetNumberOfParameters(n) != 0) throw new ArgumentException("DEBUG: assumption about fixed paramters wrong");
     413        var noise = 0.0;
     414        double[] hyp = { noise };
     415        try {
     416          var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable,
     417            problemdata.AllowedInputVariables, problemdata.TrainingIndices, hyp, mean, cov);
     418          model.FixParameters();
     419          var sol = new GaussianProcessRegressionSolution(model, problemdata);
     420          if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) {
     421            solution = sol;
     422          }
     423        }
     424        catch (ArgumentException) { }
     425      }
     426
     427
     428      if (!ResultsQualities.Rows.ContainsKey("DEBUG: Degenerates")) ResultsQualities.Rows.Add(new DataRow("DEBUG: Degenerates"));
     429      var row = ResultsQualities.Rows["DEBUG: Degenerates"];
     430      row.Values.Add(i - 1);
     431      if (solution == null) Results.Add(new Result("Status", new StringValue("The Algorithm did not return a Model")));
     432      else {
     433        if (!ResultsQualities.Rows.ContainsKey("DEBUG: RMSE")) ResultsQualities.Rows.Add(new DataRow("DEBUG: RMSE"));
     434        row = ResultsQualities.Rows["DEBUG: RMSE"];
     435        row.Values.Add(Math.Sqrt(solution.TrainingMeanSquaredError));
     436      }
     437
    399438      RegressionAlgorithm.Runs.Clear();
    400439      return solution;
    401440    }
    402     private RealVector OptimizeInfillProblem() {
     441    private RealVector OptimizeInfillProblem(CancellationToken cancellationToken) {
    403442      //parameterize and check InfillProblem
    404443      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
     
    406445      if (infillProblem.InfillCriterion != InfillCriterion) throw new ArgumentException("InfillCiriterion for Problem is not correct. Problem with Eventhandling?");
    407446      if (infillProblem.Problem != Problem) throw new ArgumentException("Expensive real problem is not correctly set in InfillProblem. Problem with Eventhandling?");
    408       infillProblem.RegressionSolution = ResultsModel;
    409       if (MaximalDatasetSize > 0 && MaximalDatasetSize < Samples.Count) { infillProblem.Encoding.Bounds = EgoUtilities.GetBoundingBox(DataSamples.Select(x => x.Item1)); }
     447      InfillCriterion.Initialize(ResultsModel, Problem.Maximization, infillProblem.Encoding);
    410448
    411449      RealVector bestVector = null;
     
    415453        //optimize
    416454        var res = EgoUtilities.SyncRunSubAlgorithm(InfillOptimizationAlgorithm, Random.Next(int.MaxValue));
    417 
     455        cancellationToken.ThrowIfCancellationRequested();
    418456        //extract results
    419457        if (!res.ContainsKey(BestInfillSolutionResultName)) throw new ArgumentException("The InfillOptimizationAlgorithm did not return a best solution");
Note: See TracChangeset for help on using the changeset viewer.