Changeset 14818


Ignore:
Timestamp:
04/04/17 12:37:52 (2 years ago)
Author:
bwerth
Message:

#2745 added several new InfillCriteria and moved Parameters from the InfillProblem to the Criteria themselves; added Sanitiy checks for GaussianProcessRegression

Location:
branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO
Files:
4 added
10 edited

Legend:

Unmodified
Added
Removed
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/EfficientGlobalOptimizationAlgorithm.cs

    r14768 r14818  
    2424using System.Linq;
    2525using System.Threading;
     26using System.Windows.Forms;
    2627using HeuristicLab.Algorithms.DataAnalysis;
    2728using HeuristicLab.Analysis;
     
    3435using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3536using HeuristicLab.Problems.DataAnalysis;
     37using HeuristicLab.Problems.Instances.DataAnalysis;
     38using HeuristicLab.Problems.Instances.DataAnalysis.Views;
    3639using HeuristicLab.Random;
    3740
     
    5760    private const string InfillOptimizationRestartsParameterName = "InfillOptimizationRestarts";
    5861    private const string InitialEvaluationsParameterName = "Initial Evaluations";
    59     private const string MaximumIterationsParameterName = "Maximum Iterations";
     62    private const string MaximumEvaluationsParameterName = "Maximum Evaluations";
    6063    private const string MaximumRuntimeParameterName = "Maximum Runtime";
    6164    private const string RegressionAlgorithmParameterName = "RegressionAlgorithm";
     
    6366    private const string SetSeedRandomlyParameterName = "SetSeedRandomly";
    6467    private const string MaximalDataSetSizeParameterName = "MaximalDataSetSize";
     68    private const string RemoveDuplicatesParamterName = "RemoveDuplicates";
     69    private const string InitialSamplesParameterName = "InitialSamplesFile";
     70    private const string BaselineVectorParameterName = "BaselineVector";
    6571    #endregion
    6672
     
    8894    public IFixedValueParameter<IntValue> InfillOptimizationRestartsParemeter => Parameters[InfillOptimizationRestartsParameterName] as IFixedValueParameter<IntValue>;
    8995    public IFixedValueParameter<IntValue> InitialEvaluationsParameter => Parameters[InitialEvaluationsParameterName] as IFixedValueParameter<IntValue>;
    90     public IFixedValueParameter<IntValue> MaximumIterationsParameter => Parameters[MaximumIterationsParameterName] as IFixedValueParameter<IntValue>;
     96    public IFixedValueParameter<IntValue> MaximumEvaluationsParameter => Parameters[MaximumEvaluationsParameterName] as IFixedValueParameter<IntValue>;
    9197    public IFixedValueParameter<IntValue> MaximumRuntimeParameter => Parameters[MaximumRuntimeParameterName] as IFixedValueParameter<IntValue>;
    9298    public IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionAlgorithmParameter => Parameters[RegressionAlgorithmParameterName] as IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>>;
     
    94100    public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter => Parameters[SetSeedRandomlyParameterName] as IFixedValueParameter<BoolValue>;
    95101    public IFixedValueParameter<IntValue> MaximalDataSetSizeParameter => Parameters[MaximalDataSetSizeParameterName] as IFixedValueParameter<IntValue>;
     102    public IFixedValueParameter<BoolValue> RemoveDuplicatesParameter => Parameters[RemoveDuplicatesParamterName] as IFixedValueParameter<BoolValue>;
     103
     104    public IFixedValueParameter<FileValue> InitialSamplesParameter => Parameters[InitialSamplesParameterName] as IFixedValueParameter<FileValue>;
     105
     106    public IValueParameter<RealVector> BaselineVectorParameter => Parameters[BaselineVectorParameterName] as IValueParameter<RealVector>;
    96107    #endregion
    97108
    98109    #region Properties
    99110
    100     public int GenerationSize
    101     {
    102       get { return GenerationSizeParemeter.Value.Value; }
    103     }
    104     public IInfillCriterion InfillCriterion
    105     {
    106       get { return InfillCriterionParameter.Value; }
    107     }
    108     public Algorithm InfillOptimizationAlgorithm
    109     {
    110       get { return InfillOptimizationAlgorithmParameter.Value; }
    111     }
    112     public int InfillOptimizationRestarts
    113     {
    114       get { return InfillOptimizationRestartsParemeter.Value.Value; }
    115     }
    116     public int InitialEvaluations
    117     {
    118       get { return InitialEvaluationsParameter.Value.Value; }
    119     }
    120     public int MaximumIterations
    121     {
    122       get { return MaximumIterationsParameter.Value.Value; }
    123     }
    124     public int MaximumRuntime
    125     {
    126       get { return MaximumRuntimeParameter.Value.Value; }
    127     }
    128     public IDataAnalysisAlgorithm<IRegressionProblem> RegressionAlgorithm
    129     {
    130       get { return RegressionAlgorithmParameter.Value; }
    131     }
    132     public int Seed
    133     {
    134       get { return SeedParameter.Value.Value; }
    135     }
    136     public bool SetSeedRandomly
    137     {
    138       get { return SetSeedRandomlyParameter.Value.Value; }
    139     }
    140     public int MaximalDatasetSize
    141     {
    142       get { return MaximalDataSetSizeParameter.Value.Value; }
    143     }
    144 
    145     private IEnumerable<Tuple<RealVector, double>> DataSamples
    146     {
    147       get
    148       {
    149         return Samples.Count > MaximalDatasetSize && MaximalDatasetSize > 0
    150           ? Samples.Skip(Samples.Count - MaximalDatasetSize)
    151           : Samples;
    152       }
    153     }
    154 
     111    public int GenerationSize => GenerationSizeParemeter.Value.Value;
     112    public IInfillCriterion InfillCriterion => InfillCriterionParameter.Value;
     113    public Algorithm InfillOptimizationAlgorithm => InfillOptimizationAlgorithmParameter.Value;
     114    public int InfillOptimizationRestarts => InfillOptimizationRestartsParemeter.Value.Value;
     115    public int InitialEvaluations => InitialEvaluationsParameter.Value.Value;
     116    public int MaximumEvaluations => MaximumEvaluationsParameter.Value.Value;
     117    public int MaximumRuntime => MaximumRuntimeParameter.Value.Value;
     118    public IDataAnalysisAlgorithm<IRegressionProblem> RegressionAlgorithm => RegressionAlgorithmParameter.Value;
     119    public int Seed => SeedParameter.Value.Value;
     120    public bool SetSeedRandomly => SetSeedRandomlyParameter.Value.Value;
     121    public int MaximalDatasetSize => MaximalDataSetSizeParameter.Value.Value;
     122    private IEnumerable<Tuple<RealVector, double>> DataSamples => Samples.Count > MaximalDatasetSize && MaximalDatasetSize > 0
     123      ? Samples.Skip(Samples.Count - MaximalDatasetSize)
     124      : Samples;
     125
     126    private bool RemoveDuplicates => RemoveDuplicatesParameter.Value.Value;
     127    private RealVector BaselineVector => BaselineVectorParameter.Value;
    155128    #endregion
    156129
     
    185158      set { ((IntValue)Results[IterationsResultName].Value).Value = value; }
    186159    }
    187     private DataTable ResultsQualities
    188     {
    189       get { return (DataTable)Results[QualitiesChartResultName].Value; }
    190     }
    191     private DataRow ResultsQualitiesBest
    192     {
    193       get { return ResultsQualities.Rows[BestQualitiesRowResultName]; }
    194     }
    195     private DataRow ResultsQualitiesWorst
    196     {
    197       get { return ResultsQualities.Rows[WorstQualitiesRowResultName]; }
    198     }
    199     private DataRow ResultsQualitiesIteration
    200     {
    201       get { return ResultsQualities.Rows[CurrentQualitiesRowResultName]; }
    202     }
     160    private DataTable ResultsQualities => (DataTable)Results[QualitiesChartResultName].Value;
     161    private DataRow ResultsQualitiesBest => ResultsQualities.Rows[BestQualitiesRowResultName];
     162
     163    private DataRow ResultsQualitiesWorst => ResultsQualities.Rows[WorstQualitiesRowResultName];
     164
     165    private DataRow ResultsQualitiesIteration => ResultsQualities.Rows[CurrentQualitiesRowResultName];
     166
    203167    private IRegressionSolution ResultsModel
    204168    {
     
    232196      };
    233197      model.CovarianceFunctionParameter.Value = new CovarianceRationalQuadraticIso();
    234       Parameters.Add(new FixedValueParameter<IntValue>(MaximumIterationsParameterName, "", new IntValue(int.MaxValue)));
     198      Parameters.Add(new FixedValueParameter<IntValue>(MaximumEvaluationsParameterName, "", new IntValue(int.MaxValue)));
    235199      Parameters.Add(new FixedValueParameter<IntValue>(InitialEvaluationsParameterName, "", new IntValue(10)));
    236200      Parameters.Add(new FixedValueParameter<IntValue>(MaximumRuntimeParameterName, "The maximum runtime in seconds after which the algorithm stops. Use -1 to specify no limit for the runtime", new IntValue(3600)));
     
    242206      Parameters.Add(new FixedValueParameter<IntValue>(GenerationSizeParameterName, "Number points that are sampled every iteration (stadard EGO: 1)", new IntValue(1)));
    243207      Parameters.Add(new ConstrainedValueParameter<IInfillCriterion>(InfillCriterionParameterName, "Decision what value should decide the next sample"));
     208      InfillCriterionParameter.ValidValues.Add(new AugmentedExpectedImprovement());
    244209      InfillCriterionParameter.ValidValues.Add(new ExpectedImprovement());
    245210      InfillCriterionParameter.ValidValues.Add(new ExpectedQuality());
    246       InfillCriterionParameter.ValidValues.Add(new ConfidenceBound());
     211      var eqi = new ExpectedQuantileImprovement();
     212      InfillCriterionParameter.ValidValues.Add(eqi);
     213      eqi.MaxEvaluationsParameter.Value = MaximumEvaluationsParameter.Value;
     214      InfillCriterionParameter.ValidValues.Add(new MinimalQuantileCriterium());
     215      InfillCriterionParameter.ValidValues.Add(new RobustImprovement());
     216      InfillCriterionParameter.ValidValues.Add(new PluginExpectedImprovement());
    247217      Parameters.Add(new FixedValueParameter<IntValue>(MaximalDataSetSizeParameterName, "The maximum number of sample points used to generate the model. Set 0 or less to use always all samples ", new IntValue(-1)));
    248 
     218      Parameters.Add(new FixedValueParameter<BoolValue>(RemoveDuplicatesParamterName, "Wether duplicate samples should be replaced by a single sample with an averaged quality. This GREATLY decreases the chance of ill conditioned models (unbuildable models) but is not theoretically sound as the model ignores the increasing certainty in this region"));
     219      Parameters.Add(new FixedValueParameter<FileValue>(InitialSamplesParameterName, "The file specifying some initial samples used to jump start the algorithm. These samples are not counted as evaluations. If InitialEvaluations is more than the samples specified in the file, the rest is uniformly random generated and evaluated.", new FileValue()));
     220      Parameters.Add(new ValueParameter<RealVector>(BaselineVectorParameterName, "A vector used to create a baseline, this vector is evaluated once and is not part of the modeling process (has no influence on algorithm performance)"));
    249221      SetInfillProblem();
    250222      RegisterEventhandlers();
     
    257229      var enc = Problem.Encoding as RealVectorEncoding;
    258230      if (enc == null) throw new ArgumentException("The EGO algorithm can only be applied to RealVectorEncodings");
     231      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
     232      if (infillProblem == null) throw new ArgumentException("InfillOptimizationAlgorithm has no InfillProblem. Troubles with Eventhandling?");
     233      infillProblem.Problem = Problem;
     234
    259235
    260236      //random
     
    274250      table.Rows.Add(new DataRow(CurrentQualitiesRowResultName));
    275251      Results.Add(new Result(QualitiesChartResultName, table));
     252      if (BaselineVector != null && BaselineVector.Length == enc.Length)
     253        Results.Add(new Result("BaselineValue", new DoubleValue(Evaluate(BaselineVector).Item2)));
    276254
    277255      //initial samples
     
    288266
    289267    protected override void Run(CancellationToken cancellationToken) {
    290       for (ResultsIterations = 0; ResultsIterations < MaximumIterations; ResultsIterations++) {
     268      for (ResultsIterations = 0; ResultsEvaluations < MaximumEvaluations; ResultsIterations++) {
    291269        try {
    292270          ResultsModel = BuildModel(cancellationToken);
     271          if (ResultsModel == null) break;
    293272          cancellationToken.ThrowIfCancellationRequested();
    294273          for (var i = 0; i < GenerationSize; i++) {
    295             var samplepoint = OptimizeInfillProblem();
     274            var samplepoint = OptimizeInfillProblem(cancellationToken);
    296275            var sample = Evaluate(samplepoint);
    297276            Samples.Add(sample);
     
    317296      InfillOptimizationAlgorithm.ProblemChanged += InfillOptimizationProblemChanged;
    318297      InfillCriterionParameter.ValueChanged += InfillCriterionChanged;
     298      InitialSamplesParameter.ToStringChanged += OnInitialSamplesChanged;
     299
    319300
    320301    }
     
    324305      InfillOptimizationAlgorithm.ProblemChanged -= InfillOptimizationProblemChanged;
    325306      InfillCriterionParameter.ValueChanged -= InfillCriterionChanged;
     307      InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
    326308    }
    327309    private void OnInfillOptimizationAlgorithmChanged(object sender, EventArgs args) {
     
    343325      RegressionAlgorithm.Problem = new RegressionProblem();
    344326    }
     327    private void OnInitialSamplesChanged(object sender, EventArgs args) {
     328      IRegressionProblemData samplesData = null;
     329      using (var importTypeDialog = new RegressionImportTypeDialog()) {
     330        if (importTypeDialog.ShowDialog() != DialogResult.OK) return;
     331        samplesData = new RegressionCSVInstanceProvider().ImportData(importTypeDialog.Path, importTypeDialog.ImportType, importTypeDialog.CSVFormat);
     332        InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
     333        InitialSamplesParameter.Value.Value = importTypeDialog.Path;
     334        InitialSamplesParameter.ToStringChanged -= OnInitialSamplesChanged;
     335
     336      }
     337
     338
     339
     340      var solutions = new RealVector[samplesData.Dataset.Rows];
     341      var qualities = new double[samplesData.Dataset.Rows];
     342      var inputVariables = samplesData.InputVariables.CheckedItems.ToArray();
     343      for (var i = 0; i < solutions.Length; i++) {
     344        qualities[i] = samplesData.Dataset.GetDoubleValue(samplesData.TargetVariable, i);
     345        solutions[i] = new RealVector(inputVariables.Length);
     346        for (var j = 0; j < inputVariables.Length; j++) solutions[i][j] = samplesData.Dataset.GetDoubleValue(inputVariables[j].Value.Value, i);
     347      }
     348
     349      SetInitialSamples(solutions, qualities);
     350
     351    }
     352
    345353    protected override void OnExecutionTimeChanged() {
    346354      base.OnExecutionTimeChanged();
     
    350358    }
    351359    public override void Pause() {
    352       if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started) InfillOptimizationAlgorithm.Pause();
    353       if (RegressionAlgorithm.ExecutionState == ExecutionState.Started) RegressionAlgorithm.Pause();
     360      if (InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Started || InfillOptimizationAlgorithm.ExecutionState == ExecutionState.Paused) InfillOptimizationAlgorithm.Stop();
     361      if (RegressionAlgorithm.ExecutionState == ExecutionState.Started || RegressionAlgorithm.ExecutionState == ExecutionState.Paused) RegressionAlgorithm.Stop();
    354362      base.Pause();
    355363    }
     
    376384    }
    377385    private IRegressionSolution BuildModel(CancellationToken cancellationToken) {
    378       var dataset = EgoUtilities.GetDataSet(DataSamples.ToList());
     386      var dataset = EgoUtilities.GetDataSet(DataSamples.ToList(), RemoveDuplicates);
    379387      var problemdata = new RegressionProblemData(dataset, dataset.VariableNames.Where(x => !x.Equals("output")), "output");
    380388      problemdata.TrainingPartition.Start = 0;
     
    388396      var i = 0;
    389397      IRegressionSolution solution = null;
    390       double r2 = 0;
    391       while ((solution == null || RegressionAlgorithm is GaussianProcessRegression && r2 < 0.95) && i++ < 100) {  //TODO: ask why GP degenerates to NaN so often
     398
     399      while (solution == null && i++ < 100) {  //TODO: Question: Why does GP degenerate to NaN so often? Answer: There is not even the slightest mitigation strategy for "almost duplicates" that ill-condition the covariance matrix.
    392400        var results = EgoUtilities.SyncRunSubAlgorithm(RegressionAlgorithm, Random.Next(int.MaxValue));
    393401        solution = results.Select(x => x.Value).OfType<IRegressionSolution>().SingleOrDefault();
    394         r2 = solution?.TrainingRSquared ?? 0;
    395402        cancellationToken.ThrowIfCancellationRequested();
    396403      }
    397404
    398       if (solution == null) throw new ArgumentException("The Algorithm did not return a Model");
     405      //try creating a model with old hyperparameters and new dataset;
     406      var gp = RegressionAlgorithm as GaussianProcessRegression;
     407      var oldmodel = ResultsModel as GaussianProcessRegressionSolution;
     408      if (gp != null && oldmodel != null) {
     409        var n = Samples.First().Item1.Length;
     410        var mean = (IMeanFunction)oldmodel.Model.MeanFunction.Clone();
     411        var cov = (ICovarianceFunction)oldmodel.Model.CovarianceFunction.Clone();
     412        if (mean.GetNumberOfParameters(n) != 0 || cov.GetNumberOfParameters(n) != 0) throw new ArgumentException("DEBUG: assumption about fixed paramters wrong");
     413        var noise = 0.0;
     414        double[] hyp = { noise };
     415        try {
     416          var model = new GaussianProcessModel(problemdata.Dataset, problemdata.TargetVariable,
     417            problemdata.AllowedInputVariables, problemdata.TrainingIndices, hyp, mean, cov);
     418          model.FixParameters();
     419          var sol = new GaussianProcessRegressionSolution(model, problemdata);
     420          if (solution == null || solution.TrainingMeanSquaredError > sol.TrainingMeanSquaredError) {
     421            solution = sol;
     422          }
     423        }
     424        catch (ArgumentException) { }
     425      }
     426
     427
     428      if (!ResultsQualities.Rows.ContainsKey("DEBUG: Degenerates")) ResultsQualities.Rows.Add(new DataRow("DEBUG: Degenerates"));
     429      var row = ResultsQualities.Rows["DEBUG: Degenerates"];
     430      row.Values.Add(i - 1);
     431      if (solution == null) Results.Add(new Result("Status", new StringValue("The Algorithm did not return a Model")));
     432      else {
     433        if (!ResultsQualities.Rows.ContainsKey("DEBUG: RMSE")) ResultsQualities.Rows.Add(new DataRow("DEBUG: RMSE"));
     434        row = ResultsQualities.Rows["DEBUG: RMSE"];
     435        row.Values.Add(Math.Sqrt(solution.TrainingMeanSquaredError));
     436      }
     437
    399438      RegressionAlgorithm.Runs.Clear();
    400439      return solution;
    401440    }
    402     private RealVector OptimizeInfillProblem() {
     441    private RealVector OptimizeInfillProblem(CancellationToken cancellationToken) {
    403442      //parameterize and check InfillProblem
    404443      var infillProblem = InfillOptimizationAlgorithm.Problem as InfillProblem;
     
    406445      if (infillProblem.InfillCriterion != InfillCriterion) throw new ArgumentException("InfillCiriterion for Problem is not correct. Problem with Eventhandling?");
    407446      if (infillProblem.Problem != Problem) throw new ArgumentException("Expensive real problem is not correctly set in InfillProblem. Problem with Eventhandling?");
    408       infillProblem.RegressionSolution = ResultsModel;
    409       if (MaximalDatasetSize > 0 && MaximalDatasetSize < Samples.Count) { infillProblem.Encoding.Bounds = EgoUtilities.GetBoundingBox(DataSamples.Select(x => x.Item1)); }
     447      InfillCriterion.Initialize(ResultsModel, Problem.Maximization, infillProblem.Encoding);
    410448
    411449      RealVector bestVector = null;
     
    415453        //optimize
    416454        var res = EgoUtilities.SyncRunSubAlgorithm(InfillOptimizationAlgorithm, Random.Next(int.MaxValue));
    417 
     455        cancellationToken.ThrowIfCancellationRequested();
    418456        //extract results
    419457        if (!res.ContainsKey(BestInfillSolutionResultName)) throw new ArgumentException("The InfillOptimizationAlgorithm did not return a best solution");
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/EgoUtilities.cs

    r14768 r14818  
    9494
    9595    public static double GetEstimation(this IRegressionModel model, RealVector r) {
    96       var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) });
     96      var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) }, false);
    9797      return model.GetEstimatedValues(dataset, new[] { 0 }).First();
    9898    }
    9999    public static double GetVariance(this IConfidenceRegressionModel model, RealVector r) {
    100       var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) });
     100      var dataset = GetDataSet(new[] { new Tuple<RealVector, double>(r, 0.0) }, false);
    101101      return model.GetEstimatedVariances(dataset, new[] { 0 }).First();
    102102    }
    103103
    104     public static Dataset GetDataSet(IReadOnlyList<Tuple<RealVector, double>> samples) {
    105       var n = samples[0].Item1.Length + 1;
    106       var data = new double[samples.Count, n];
    107       var names = new string[n - 1];
    108       for (var i = 0; i < n; i++)
    109         if (i < names.Length) {
    110           names[i] = "input" + i;
    111           for (var j = 0; j < samples.Count; j++) data[j, i] = samples[j].Item1[i];
    112         } else
    113           for (var j = 0; j < samples.Count; j++) data[j, n - 1] = samples[j].Item2;
     104
     105    public static double GetDoubleValue(this IDataset dataset, int i, int j) {
     106      return dataset.GetDoubleValue("input" + j, i);
     107    }
     108    public static Dataset GetDataSet(IReadOnlyList<Tuple<RealVector, double>> samples, bool removeDuplicates) {
     109      if (removeDuplicates)
     110        samples = RemoveDuplicates(samples); //TODO duplicates require heteroskedasticity in Models
     111
     112
     113      var dimensions = samples[0].Item1.Length + 1;
     114      var data = new double[samples.Count, dimensions];
     115      var names = new string[dimensions - 1];
     116      for (var i = 0; i < names.Length; i++) names[i] = "input" + i;
     117
     118      for (var j = 0; j < samples.Count; j++) {
     119        for (var i = 0; i < names.Length; i++) data[j, i] = samples[j].Item1[i];
     120        data[j, dimensions - 1] = samples[j].Item2;
     121
     122      }
     123
     124
    114125      return new Dataset(names.Concat(new[] { "output" }).ToArray(), data);
    115126    }
     127
     128    private static IReadOnlyList<Tuple<RealVector, double>> RemoveDuplicates(IReadOnlyList<Tuple<RealVector, double>> samples) {
     129      var res = new List<Tuple<RealVector, double, int>>();
     130
     131      foreach (var sample in samples) {
     132        if (res.Count == 0) {
     133          res.Add(new Tuple<RealVector, double, int>(sample.Item1, sample.Item2, 1));
     134          continue;
     135        }
     136
     137        var index = res.ArgMin(x => Euclidian(sample.Item1, x.Item1));
     138        var d = Euclidian(res[index].Item1, sample.Item1);
     139        if (d > 0.0001)
     140          res.Add(new Tuple<RealVector, double, int>(sample.Item1, sample.Item2, 1));
     141        else {
     142          var t = res[index];
     143          res.RemoveAt(index);
     144          res.Add(new Tuple<RealVector, double, int>(t.Item1, t.Item2 + sample.Item2, t.Item3 + 1));
     145        }
     146      }
     147      return res.Select(x => new Tuple<RealVector, double>(x.Item1, x.Item2 / x.Item3)).ToArray();
     148    }
     149
     150    private static double Euclidian(IEnumerable<double> a, IEnumerable<double> b) {
     151      return Math.Sqrt(a.Zip(b, (d, d1) => d - d1).Sum(d => d * d));
     152    }
     153
    116154    public static DoubleMatrix GetBoundingBox(IEnumerable<RealVector> vectors) {
    117155      DoubleMatrix res = null;
     
    128166      return res;
    129167    }
     168
     169
    130170  }
    131171}
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/HeuristicLab.Algorithms.EGO-3.4.csproj

    r14768 r14818  
    9292      <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath>
    9393    </Reference>
     94    <Reference Include="HeuristicLab.Problems.Instances.DataAnalysis-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     95      <SpecificVersion>False</SpecificVersion>
     96      <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances.DataAnalysis-3.3.dll</HintPath>
     97    </Reference>
     98    <Reference Include="HeuristicLab.Problems.Instances.DataAnalysis.Views-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     99      <SpecificVersion>False</SpecificVersion>
     100      <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances.DataAnalysis.Views-3.3.dll</HintPath>
     101    </Reference>
    94102    <Reference Include="HeuristicLab.Random-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    95103      <SpecificVersion>False</SpecificVersion>
     
    98106    <Reference Include="System" />
    99107    <Reference Include="System.Core" />
     108    <Reference Include="System.Windows.Forms" />
    100109    <Reference Include="System.Xml.Linq" />
    101110    <Reference Include="System.Data.DataSetExtensions" />
     
    108117    <Compile Include="EgoUtilities.cs" />
    109118    <Compile Include="EfficientGlobalOptimizationAlgorithm.cs" />
     119    <Compile Include="InfillCriteria\AugmentedExpectedImprovement.cs" />
     120    <Compile Include="InfillCriteria\ExpectedQuantileImprovement.cs" />
     121    <Compile Include="InfillCriteria\PluginExpectedImprovement.cs" />
    110122    <Compile Include="InfillCriteria\RobustImprovement.cs" />
    111     <Compile Include="InfillCriteria\ConfidenceBound.cs" />
     123    <Compile Include="InfillCriteria\MinimalQuantileCriterium.cs" />
    112124    <Compile Include="InfillCriteria\ExpectedQuality.cs" />
    113125    <Compile Include="InfillCriteria\InfillCriterionBase.cs" />
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/InfillCriteria/ExpectedImprovement.cs

    r14768 r14818  
    4242
    4343    #region ParameterProperties
    44     public IFixedValueParameter<DoubleValue> ExploitationWeightParameter
    45     {
    46       get { return Parameters[ExploitationWeightParameterName] as IFixedValueParameter<DoubleValue>; }
    47     }
     44    public IFixedValueParameter<DoubleValue> ExploitationWeightParameter => Parameters[ExploitationWeightParameterName] as IFixedValueParameter<DoubleValue>;
     45
    4846    #endregion
    4947
    5048    #region Properties
    51     private double ExploitationWeight
    52     {
    53       get { return ExploitationWeightParameter.Value.Value; }
    54     }
     49    protected double ExploitationWeight => ExploitationWeightParameter.Value.Value;
     50
     51    [Storable]
     52    protected double YMin;
    5553    #endregion
    5654
    5755    #region HL-Constructors, Serialization and Cloning
    5856    [StorableConstructor]
    59     private ExpectedImprovement(bool deserializing) : base(deserializing) { }
     57    protected ExpectedImprovement(bool deserializing) : base(deserializing) { }
    6058    [StorableHook(HookType.AfterDeserialization)]
    6159    private void AfterDeserialization() {
    6260      RegisterEventhandlers();
    6361    }
    64     private ExpectedImprovement(ExpectedImprovement original, Cloner cloner) : base(original, cloner) {
     62    protected ExpectedImprovement(ExpectedImprovement original, Cloner cloner) : base(original, cloner) {
    6563      RegisterEventhandlers();
    6664    }
     
    7472    #endregion
    7573
    76     public override double Evaluate(IRegressionSolution solution, RealVector vector, bool maximization) {
    77       if (maximization) throw new NotImplementedException("Expected Improvement for maximization not yet implemented");
    78       var model = solution.Model as IConfidenceRegressionModel;
    79       if (model == null) throw new ArgumentException("can not calculate EI without confidence measure");
     74    public override double Evaluate(RealVector vector) {
     75      var model = RegressionSolution.Model as IConfidenceRegressionModel;
    8076      var yhat = model.GetEstimation(vector);
    81       var min = solution.ProblemData.TargetVariableTrainingValues.Min();
    8277      var s = Math.Sqrt(model.GetVariance(vector));
    83       return GetEstimatedImprovement(min, yhat, s, ExploitationWeight);
     78      return GetEstimatedImprovement(YMin, yhat, s, ExploitationWeight);
    8479    }
    8580
    86     public override bool Maximization(bool expensiveProblemMaximization) {
     81    public override bool Maximization() {
    8782      return true;
     83    }
     84
     85    protected override void Initialize() {
     86      if (ExpensiveMaximization) throw new NotImplementedException("Expected Improvement for maximization not yet implemented");
     87      var model = RegressionSolution.Model as IConfidenceRegressionModel;
     88      if (model == null) throw new ArgumentException("can not calculate EI without confidence measure");
     89      YMin = RegressionSolution.ProblemData.TargetVariableTrainingValues.Min();
    8890    }
    8991
     
    104106
    105107    #region Helpers
    106     private static double GetEstimatedImprovement(double ymin, double yhat, double s, double w) {
     108    protected static double GetEstimatedImprovement(double ymin, double yhat, double s, double w) {
    107109      if (Math.Abs(s) < double.Epsilon) return 0;
    108110      var val = (ymin - yhat) / s;
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/InfillCriteria/ExpectedQuality.cs

    r14741 r14818  
    2424using HeuristicLab.Encodings.RealVectorEncoding;
    2525using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    26 using HeuristicLab.Problems.DataAnalysis;
    2726
    2827// ReSharper disable once CheckNamespace
     
    4241    #endregion
    4342
    44     public override double Evaluate(IRegressionSolution solution, RealVector vector, bool maximization) {
    45       return solution.Model.GetEstimation(vector);
     43    public override double Evaluate(RealVector vector) {
     44      return RegressionSolution.Model.GetEstimation(vector);
     45    }
     46
     47    public override bool Maximization() {
     48      return ExpensiveMaximization;
     49    }
     50
     51    protected override void Initialize() {
    4652    }
    4753  }
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/InfillCriteria/InfillCriterionBase.cs

    r14741 r14818  
    2323using HeuristicLab.Core;
    2424using HeuristicLab.Encodings.RealVectorEncoding;
     25using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2526using HeuristicLab.Problems.DataAnalysis;
    2627
    2728// ReSharper disable once CheckNamespace
    2829namespace HeuristicLab.Algorithms.EGO {
    29 
     30  [StorableClass]
    3031  public abstract class InfillCriterionBase : ParameterizedNamedItem, IInfillCriterion {
    3132
     33    [Storable]
     34    protected IRegressionSolution RegressionSolution;
     35    [Storable]
     36    protected bool ExpensiveMaximization;
     37    [Storable]
     38    protected RealVectorEncoding Encoding;
     39
    3240    protected InfillCriterionBase(bool deserializing) : base(deserializing) { }
    33     protected InfillCriterionBase(InfillCriterionBase original, Cloner cloner) : base(original, cloner) { }
     41
     42    protected InfillCriterionBase(InfillCriterionBase original, Cloner cloner) : base(original, cloner) {
     43      RegressionSolution = cloner.Clone(original.RegressionSolution);
     44      ExpensiveMaximization = original.ExpensiveMaximization;
     45      Encoding = cloner.Clone(original.Encoding);
     46    }
    3447    protected InfillCriterionBase() { }
    3548
    36     public abstract double Evaluate(IRegressionSolution model, RealVector vector, bool maximization);
    37     public virtual bool Maximization(bool expensiveProblemMaximization) {
    38       return expensiveProblemMaximization;
     49    public abstract double Evaluate(RealVector vector);
     50    public abstract bool Maximization();
     51
     52    public void Initialize(IRegressionSolution solution, bool expensiveMaximization, RealVectorEncoding encoding) {
     53      RegressionSolution = solution;
     54      ExpensiveMaximization = expensiveMaximization;
     55      Encoding = encoding;
     56      Initialize();
    3957    }
     58
     59    protected abstract void Initialize();
     60
    4061  }
    4162}
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/InfillCriteria/RobustImprovement.cs

    r14768 r14818  
    2121
    2222using System;
     23using System.Collections.Generic;
     24using System.Linq;
    2325using HeuristicLab.Common;
    2426using HeuristicLab.Core;
     
    3335
    3436  [StorableClass]
    35   [Item("ConfidenceBoundMeassure", "Adding or Subtracting the variance * factor to the model estimation")]
    36   public class ConfidenceBound : InfillCriterionBase {
     37  [Item("RobustImprovementMeassure", "Adding or Subtracting the variance * factor to the model estimation")]
     38  public class RobustImprovement : InfillCriterionBase {
    3739
    3840    #region ParameterNames
    39     private const string ConfidenceWeightParameterName = "ConfidenceWeight";
     41    private const string KParameterName = "NearestNeighbours";
    4042    #endregion
    4143
    4244    #region ParameterProperties
    43     public IFixedValueParameter<DoubleValue> ConfidenceWeightParameter
    44     {
    45       get { return Parameters[ConfidenceWeightParameterName] as IFixedValueParameter<DoubleValue>; }
    46     }
     45    public IFixedValueParameter<IntValue> KParameter => Parameters[KParameterName] as IFixedValueParameter<IntValue>;
     46
    4747    #endregion
    4848
    4949    #region Properties
    50     private double ConfidenceWeight
    51     {
    52       get { return ConfidenceWeightParameter.Value.Value; }
    53     }
     50    private int K => KParameter.Value.Value;
     51
     52    [Storable]
     53    private double MaxSolutionDist;
     54
     55    [Storable]
     56    //TODO use VP-Tree instead of array
     57    private RealVector[] Data;
    5458    #endregion
    5559
    5660    #region HL-Constructors, Serialization and Cloning
    5761    [StorableConstructor]
    58     private ConfidenceBound(bool deserializing) : base(deserializing) { }
    59     private ConfidenceBound(ConfidenceBound original, Cloner cloner) : base(original, cloner) { }
    60     public ConfidenceBound() {
    61       Parameters.Add(new FixedValueParameter<DoubleValue>(ConfidenceWeightParameterName, "A value between 0 and 1 indicating the focus on exploration (0) or exploitation (1)", new DoubleValue(0.5)));
     62    private RobustImprovement(bool deserializing) : base(deserializing) { }
     63
     64    private RobustImprovement(RobustImprovement original, Cloner cloner) : base(original, cloner) {
     65      MaxSolutionDist = original.MaxSolutionDist;
     66      Data = original.Data != null ? original.Data.Select(cloner.Clone).ToArray() : null;
     67    }
     68    public RobustImprovement() {
     69      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "A value larger than 0 indicating how many nearestNeighbours shall be used to determine the RI meassure", new IntValue(3)));
    6270    }
    6371    public override IDeepCloneable Clone(Cloner cloner) {
    64       return new ConfidenceBound(this, cloner);
     72      return new RobustImprovement(this, cloner);
    6573    }
    6674    #endregion
    6775
    68     public override double Evaluate(IRegressionSolution solution, RealVector vector, bool maximization) {
    69       var model = solution.Model as IConfidenceRegressionModel;
     76
     77    public override double Evaluate(RealVector vector) {
     78      List<RealVector> nearestNeighbours;
     79      List<double> distances;
     80      Search(vector, K, out nearestNeighbours, out distances);
     81      var distVectors = nearestNeighbours.Select(x => Minus(x, vector)).ToList();
     82      var sum = 0.0;
     83      var wsum = 1.0; //weights for angular distance
     84      var used = new HashSet<RealVector>();
     85      foreach (var distVector in distVectors) {
     86        var d = Math.Pow(distances[used.Count], 0.5);
     87        if (used.Count == 0) {
     88          sum += d;
     89        } else {
     90          var w = used.Select(x => Angular(distVector, x)).Min();
     91          sum += w * d;
     92          wsum += w;
     93        }
     94        used.Add(distVector);
     95      }
     96      sum /= wsum * MaxSolutionDist; //normalize
     97      return sum;
     98    }
     99    public override bool Maximization() {
     100      return ExpensiveMaximization;
     101    }
     102    protected override void Initialize() {
     103      var model = RegressionSolution.Model as IConfidenceRegressionModel;
    70104      if (model == null) throw new ArgumentException("can not calculate EI without confidence measure");
    71       var yhat = model.GetEstimation(vector);
    72       var s = Math.Sqrt(model.GetVariance(vector)) * ConfidenceWeight;
    73       return maximization ? yhat + s : yhat - s;
     105      Data = new RealVector[RegressionSolution.ProblemData.Dataset.Rows];
     106      for (var i = 0; i < Data.Length; i++) {
     107        Data[i] = new RealVector(Encoding.Length);
     108        for (var j = 0; j < Encoding.Length; j++)
     109          Data[i][j] = RegressionSolution.ProblemData.Dataset.GetDoubleValue(i, j);
     110      }
     111
     112      var maxSolution = new double[Encoding.Length];
     113      var minSolution = new double[Encoding.Length];
     114      for (var i = 0; i < Encoding.Length; i++) {
     115        var j = i % Encoding.Bounds.Rows;
     116        maxSolution[i] = Encoding.Bounds[j, 1];
     117        minSolution[i] = Encoding.Bounds[j, 0];
     118      }
     119      MaxSolutionDist = Euclidian(maxSolution, minSolution) / Data.Length;
    74120    }
    75121
     122    #region Helpers
     123    private static double Euclidian(IEnumerable<double> a, IEnumerable<double> b) {
     124      return Math.Sqrt(a.Zip(b, (d, d1) => d - d1).Sum(d => d * d));
     125    }
     126    private static double Angular(RealVector a, RealVector b) {
     127      var innerProduct = a.Zip(b, (x, y) => x * y).Sum();
     128      var res = Math.Acos(innerProduct / (Norm(a) * Norm(b))) / Math.PI;
     129      return double.IsNaN(res) ? 0 : res;
     130    }
     131    private static double Norm(IEnumerable<double> a) {
     132      return Math.Sqrt(a.Sum(d => d * d));
     133    }
     134    private static RealVector Minus(RealVector a, RealVector b) {
     135      return new RealVector(a.Zip(b, (d, d1) => d - d1).ToArray());
     136    }
     137
     138    private void Search(RealVector vector, int k, out List<RealVector> nearestNeighbours, out List<double> distances) {
     139      var neighbours = new SortedList<double, RealVector>(new DuplicateKeyComparer<double>());
     140      foreach (var n in Data) neighbours.Add(Euclidian(n, vector), n);
     141      nearestNeighbours = new List<RealVector>();
     142
     143      distances = new List<double>();
     144      foreach (var entry in neighbours) {
     145        nearestNeighbours.Add(entry.Value);
     146        distances.Add(entry.Key);
     147        if (distances.Count == k) break;
     148      }
     149    }
     150    #endregion
     151
     152    public class DuplicateKeyComparer<TKey> : IComparer<TKey> where TKey : IComparable {
     153      public int Compare(TKey x, TKey y) {
     154        var result = x.CompareTo(y);
     155        return result == 0 ? 1 : result;
     156      }
     157    }
    76158  }
    77159}
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/Interfaces/IInfillCriterion.cs

    r14741 r14818  
    2727namespace HeuristicLab.Algorithms.EGO {
    2828  public interface IInfillCriterion : INamedItem {
    29     double Evaluate(IRegressionSolution model, RealVector vector, bool maximization);
    30     bool Maximization(bool expensiveProblemMaximization);
     29    double Evaluate(RealVector vector);
     30    bool Maximization();
     31    void Initialize(IRegressionSolution solution, bool expensiveMaximization, RealVectorEncoding encoding);
    3132  }
    3233}
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/Interfaces/ISurrogateAlgorithm.cs

    r14768 r14818  
    2626namespace HeuristicLab.Algorithms.EGO {
    2727  public interface ISurrogateAlgorithm<T> : IAlgorithm where T : IDeepCloneable {
    28     void SetInitialSamples(T[] i, double[] qualities);
     28    void SetInitialSamples(T[] solutions, double[] qualities);
    2929  }
    3030}
  • branches/EfficientGlobalOptimization/HeuristicLab.Algorithms.EGO/Problems/InfillProblem.cs

    r14768 r14818  
    2727using HeuristicLab.Optimization;
    2828using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    29 using HeuristicLab.Problems.DataAnalysis;
    3029
    3130namespace HeuristicLab.Algorithms.EGO {
     
    3433  public sealed class InfillProblem : SingleObjectiveBasicProblem<RealVectorEncoding> {
    3534
    36     public override bool Maximization => true;
     35    public override bool Maximization => true;  //This is necessary because algorithms do not expect the maximization to change
    3736
    3837    #region Properties;
     
    4140    [Storable]
    4241    private SingleObjectiveBasicProblem<IEncoding> problem;
    43     [Storable]
    44     private IRegressionSolution regressionSolution;
    45 
    4642
    4743    public IInfillCriterion InfillCriterion
     
    5753        problem = value;
    5854        if (problem == null) return;
    59         Encoding = problem.Encoding as RealVectorEncoding;
     55        var enc = problem.Encoding as RealVectorEncoding;
     56        if (enc == null) throw new ArgumentException("EGO can not be performed on non-RealVectorEncodings");
     57        Encoding = enc;
    6058        SolutionCreator = new UniformRandomRealVectorCreator();//ignore Problem specific Solution Creation
    61         if (Encoding == null) throw new ArgumentException("EGO can not be performed on non-RealVectorEncodings");
     59
    6260      }
    63     }
    64     public IRegressionSolution RegressionSolution
    65     {
    66       get { return regressionSolution; }
    67       set { regressionSolution = value; }
    6861    }
    6962    #endregion
     
    7568      infillCriterion = cloner.Clone(original.InfillCriterion);
    7669      problem = cloner.Clone(original.Problem);
    77       regressionSolution = cloner.Clone(original.regressionSolution);
    7870    }
    7971    public InfillProblem() { }
     
    8274
    8375    public override double Evaluate(Individual individual, IRandom r) {
    84       var q = InfillCriterion.Evaluate(RegressionSolution, individual.RealVector(), Problem.Maximization);
    85       return InfillCriterion.Maximization(Problem.Maximization) ? q : -q; //This is necessary because Maximization is not supposed to change on a normal problem
     76      var q = InfillCriterion.Evaluate(individual.RealVector());
     77      return InfillCriterion.Maximization() ? q : -q;
    8678    }
    8779    public override void Analyze(Individual[] individuals, double[] qualities, ResultCollection results, IRandom random) {
    8880      base.Analyze(individuals, qualities, results, random);
    8981      var best = qualities.ArgMax(x => x);
    90       var qnew = InfillCriterion.Maximization(Problem.Maximization) ? qualities[best] : -qualities[best];
    91       //var best = Maximization ? qualities.ArgMax(x => x) : qualities.ArgMin(x => x);
     82      var qnew = InfillCriterion.Maximization() ? qualities[best] : -qualities[best];
    9283      const string qname = EfficientGlobalOptimizationAlgorithm.BestInfillQualityResultName;
    9384      const string sname = EfficientGlobalOptimizationAlgorithm.BestInfillSolutionResultName;
     
    9990      var qold = results[qname].Value as DoubleValue;
    10091      if (qold == null) throw new ArgumentException("Old best quality is not a double value. Conflicting Analyzers?");
    101       if (qold.Value >= qnew == InfillCriterion.Maximization(Problem.Maximization)) return;
     92      if (qold.Value >= qnew == InfillCriterion.Maximization()) return;
    10293      results[sname].Value = (RealVector)individuals[best].RealVector().Clone();
    10394      qold.Value = qnew;
    104 
    10595    }
    10696  }
Note: See TracChangeset for help on using the changeset viewer.