#region License Information /* HeuristicLab * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Random; using HeuristicLab.Selection; namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { /// /// eps-Lexicase Selection /// [Item("EpsLexicaseSelection", "")] [StorableClass] public sealed class EpsLexicaseSelection : StochasticSingleObjectiveSelector, ISingleObjectiveSelector { [StorableConstructor] private EpsLexicaseSelection(bool deserializing) : base(deserializing) { } private EpsLexicaseSelection(EpsLexicaseSelection original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new EpsLexicaseSelection(this, cloner); } public EpsLexicaseSelection() : base() { Parameters.Add(new ScopeTreeLookupParameter("Errors", 1)); var validPolicies = new ItemSet(new string[] { "ϵ_e", "ϵ_y", "ϵ_e,λ", "ϵ_y,λ" }.Select(s => new StringValue(s).AsReadOnly())); Parameters.Add(new ConstrainedValueParameter("Policy", "The selection policy (see La Cava, Spector, Danai: eps-Lexicase Selection for Regression, GECCO 2016)", validPolicies)); Parameters.Add(new ValueParameter("ϵ", "The ϵ value for ϵ_e and ϵ_y policies", new DoubleValue(1.0))); Parameters.Add(new LookupParameter("AvgConsideredTestCases", "The average number of considered test cases for selection.")); } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { // remove if (!Parameters.ContainsKey("AvgConsideredTestCases")) { Parameters.Add(new LookupParameter("AvgConsideredTestCases", "The average number of considered test cases for selection.")); } } protected override IScope[] Select(List scopes) { // NOT efficiently implemented, used only for exploration of diversity for a paper int parentCount = NumberOfSelectedSubScopesParameter.ActualValue.Value; bool copy = CopySelectedParameter.Value.Value; if (!copy) throw new ArgumentException("copy is false in eps-lexicase selection."); IRandom random = RandomParameter.ActualValue; bool maximization = MaximizationParameter.ActualValue.Value; IScope[] selected = new IScope[parentCount]; int nScopes = scopes.Count(); var errors = (ItemArray)((IScopeTreeLookupParameter)Parameters["Errors"]).ActualValue; if (errors == null || !errors.Any()) throw new ArgumentException("Have not found errors of models. Have you used an analyzer that calculates the errors and stores them in the scope?"); var e = errors.Select(e_m => e_m.ToArray()).ToArray(); errors = null; // don't use errors // see La Cava, Spector, Danai: eps-Lexicase Selection for Regression, GECCO 2016 var ts = Enumerable.Range(0, e.First().Length).ToArray(); double eps = ((DoubleValue)Parameters["ϵ"].ActualValue).Value; var selectionPolicy = ((StringValue)Parameters["Policy"].ActualValue).Value; var selectedScopes = new IScope[parentCount]; var nCasesList = new List(parentCount); var lambda_e = ts.Select(t=> MAD(e.Select(e_m => e_m[t]).ToArray())).ToArray(); for (int i = 0; i < parentCount; i++) { int nCases; var selectedIdx = SelectIdx(random, e, selectionPolicy, ts, eps, lambda_e, out nCases); nCasesList.Add(nCases); selectedScopes[i] = (IScope)(scopes[selectedIdx]).Clone(); } Parameters["AvgConsideredTestCases"].ActualValue = new DoubleValue(nCasesList.Median()); return selectedScopes; } public static int SelectIdx(IRandom random, double[][] errors, string selectionPolicy, int[] ts, double eps, double[] lambda_e, out int nCases) { ts.ShuffleInPlace(random); var activeModelIdxs = new SortedSet(Enumerable.Range(0, errors.Length)); nCases = 0; foreach (var t in ts) { if (activeModelIdxs.Count <= 1) break; nCases++; switch (selectionPolicy) { case "ϵ_e": { var bestError = errors.Select(err_m => err_m[t]).Min(); // as noted in corrected version of GECCO 2016 paper activeModelIdxs.RemoveWhere(modelIdx => errors[modelIdx][t] > bestError * (1 + eps)); break; } case "ϵ_y": { activeModelIdxs.RemoveWhere(modelIdx => errors[modelIdx][t] > eps); break; } // Note in a corrected version of the GECCO Paper La Cava changed equations (2) and (5) // Equations 2 and 5 have been corrected to // indicate that the pass conditions for individuals in -lexicase // selection are defined relative to the best error in the population // on that training case, not in the selection pool. // a more recent and detailed description of the algorithm is given in https://arxiv.org/pdf/1709.05394.pdf // which indicates that semi-dynamic eps-lexicase performs best (Algorithm 4.2) // -> we also implement semi-dynamic eps-lexicase which calculates lambda over the whole population // I have not found a way to get reasonable convergence using MAD for lambda. // If lambda_e[t] is zero this means that all models are effectively the same => select randomly. // It seems that linear scaling (or the replacement of NaN outputs with the average of y) // has the effect that MAD is zero (especially in the beginning), which means there is not selection pressure at the beginning. // Semi-dynamic -Lexicase Selection case "ϵ_e,λ": { var bestError = activeModelIdxs.Select(modelIdx => errors[modelIdx][t]).Min(); // See https://arxiv.org/pdf/1709.05394.pdf Alg 4.2 activeModelIdxs.RemoveWhere(modelIdx => errors[modelIdx][t] > bestError + lambda_e[t]); // in the gecco paper the acceptance criterion is err < lambda_et this is later correct to err <= lambda_et break; } case "ϵ_y,λ": { activeModelIdxs.RemoveWhere(modelIdx => errors[modelIdx][t] > lambda_e[t]); // in the gecco paper the acceptance criterion is err < lambda_et this is later correct to err <= lambda_et break; } default: throw new ArgumentException("unknown policy " + selectionPolicy); } } if (!activeModelIdxs.Any()) throw new ArgumentException("nothing left in the pool"); else return activeModelIdxs.SampleRandom(random, 1).First(); } private static double MAD(double[] x) { var median_x = x.Median(); return x.Select(xi => Math.Abs(xi - median_x)).Median(); } private static double StdDev(double[] x) { return x.StandardDeviation(); } } }