source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs @ 13480

Last change on this file since 13480 was 13480, checked in by bburlacu, 5 years ago

#1772: Schema diversification strategy: implement adaptive replacement ratio and added number of evaluated solutions per generation to the diversification statistics operator

File size: 16.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading.Tasks;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.EvolutionTracking;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34using HeuristicLab.Random;
35
36namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
37  [Item("SchemaEvaluator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
38  [StorableClass]
39  public class SchemaEvaluator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
40    #region parameter names
41    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
42    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
43    private const string ReplacementRatioParameterName = "ReplacementRatio";
44    private const string SchemaParameterName = "Schema";
45    private const string PopulationSizeParameterName = "PopulationSize";
46    private const string RandomParameterName = "Random";
47    private const string EvaluatorParameterName = "Evaluator";
48    private const string ProblemDataParameterName = "ProblemData";
49    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
50    private const string EstimationLimitsParameterName = "EstimationLimits";
51    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
52    private const string MutatorParameterName = "Mutator";
53    private const string CrossoverParameterName = "Crossover";
54    private const string RandomReplacementParameterName = "RandomReplacement";
55    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
56    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
57    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
58    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
59    private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
60    #endregion
61
62    #region parameters
63    public ILookupParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
64      get { return (ILookupParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
65    }
66    public ILookupParameter<BoolValue> ExclusiveMatchingParameter {
67      get { return (ILookupParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
68    }
69    public ILookupParameter<BoolValue> ExecuteInParallelParameter {
70      get { return (ILookupParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
71    }
72    public ILookupParameter<IntValue> MaxDegreeOfParallelismParameter {
73      get { return (ILookupParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
74    }
75    public ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>> EvaluatorParameter {
76      get { return (ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>)Parameters[EvaluatorParameterName]; }
77    }
78    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
79      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
80    }
81    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
82      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
83    }
84    public ILookupParameter<DoubleLimit> EstimationLimitsParameter {
85      get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
86    }
87    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
88      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
89    }
90    public ILookupParameter<BoolValue> RandomReplacementParameter {
91      get { return (ILookupParameter<BoolValue>)Parameters[RandomReplacementParameterName]; }
92    }
93    public ILookupParameter<ISymbolicExpressionTreeManipulator> MutatorParameter {
94      get { return (ILookupParameter<ISymbolicExpressionTreeManipulator>)Parameters[MutatorParameterName]; }
95    }
96    public ILookupParameter<ISymbolicExpressionTreeCrossover> CrossoverParameter {
97      get { return (ILookupParameter<ISymbolicExpressionTreeCrossover>)Parameters[CrossoverParameterName]; }
98    }
99    public ILookupParameter<IRandom> RandomParameter {
100      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
101    }
102    public ILookupParameter<IntValue> PopulationSizeParameter {
103      get { return (ILookupParameter<IntValue>)Parameters[PopulationSizeParameterName]; }
104    }
105    public ILookupParameter<ISymbolicExpressionTree> SchemaParameter {
106      get { return (ILookupParameter<ISymbolicExpressionTree>)Parameters[SchemaParameterName]; }
107    }
108    public ILookupParameter<PercentValue> MinimumSchemaFrequencyParameter {
109      get { return (ILookupParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
110    }
111    public ILookupParameter<PercentValue> ReplacementRatioParameter {
112      get { return (ILookupParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
113    }
114    public ILookupParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
115      get { return (ILookupParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
116    }
117    public LookupParameter<IntValue> NumberOfChangedTreesParameter {
118      get { return (LookupParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
119    }
120    #endregion
121
122    #region parameter properties
123    public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
124    public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } }
125    public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
126    public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } }
127    public IntValue NumberOfChangedTrees { get { return NumberOfChangedTreesParameter.ActualValue; } }
128    #endregion
129
130    private readonly SymbolicExpressionTreePhenotypicSimilarityCalculator calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
131    private readonly QueryMatch qm;
132
133    public Func<double, double> ReplacementRule { get; set; }
134
135    private readonly ISymbolicExpressionTreeNodeEqualityComparer comp = new SymbolicExpressionTreeNodeEqualityComparer {
136      MatchConstantValues = false,
137      MatchVariableWeights = false,
138      MatchVariableNames = true
139    };
140
141    public ISymbolicExpressionTree Schema { get; set; }
142
143    [Storable]
144    private readonly UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
145
146    public SchemaEvaluator() {
147      qm = new QueryMatch(comp) { MatchParents = true };
148      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
149      #region add parameters
150      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated"));
151      Parameters.Add(new LookupParameter<PercentValue>(MinimumSchemaFrequencyParameterName));
152      Parameters.Add(new LookupParameter<PercentValue>(ReplacementRatioParameterName));
153      Parameters.Add(new LookupParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName));
154      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(PopulationSizeParameterName));
155      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName));
156      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>(EvaluatorParameterName));
157      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
158      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
159      Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
160      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
161      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeManipulator>(MutatorParameterName));
162      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeCrossover>(CrossoverParameterName));
163      Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName));
164      Parameters.Add(new LookupParameter<IntValue>(NumberOfChangedTreesParameterName));
165      Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName));
166      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
167      Parameters.Add(new LookupParameter<BoolValue>(ExclusiveMatchingParameterName));
168      Parameters.Add(new LookupParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName));
169      #endregion
170    }
171
172    [StorableConstructor]
173    protected SchemaEvaluator(bool deserializing) : base(deserializing) { }
174
175    protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) {
176      this.comp = original.comp == null ? new SymbolicExpressionTreeNodeEqualityComparer {
177        MatchConstantValues = false,
178        MatchVariableWeights = false,
179        MatchVariableNames = true
180      } : (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone();
181      this.qm = new QueryMatch(comp) { MatchParents = original.qm?.MatchParents ?? true };
182      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
183    }
184
185    public override IDeepCloneable Clone(Cloner cloner) {
186      return new SchemaEvaluator(this, cloner);
187    }
188
189    public override IOperation Apply() {
190      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
191      var trees = individuals.Select(x => (ISymbolicExpressionTree)x.Variables["SymbolicExpressionTree"].Value).ToList();
192      var qualities = individuals.Select(x => (DoubleValue)x.Variables["Quality"].Value).ToList();
193
194      var random = RandomParameter.ActualValue;
195      var mutator = MutatorParameter.ActualValue;
196
197      var s = Schema;
198      var sRoot = s.Root.GetSubtree(0).GetSubtree(0);
199      int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
200
201      // first apply the length and root equality checks in order to filter the individuals
202      var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value;
203      var filtered = new List<int>();
204      for (int i = 0; i < individuals.Count; ++i) {
205        if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue;
206        var t = trees[i];
207        var tRoot = t.Root.GetSubtree(0).GetSubtree(0);
208        if (t.Length < s.Length || !qm.Comparer.Equals(tRoot, sRoot)) continue;
209        filtered.Add(i);
210      }
211
212      // if we don't have enough filtered individuals, then we are done
213      // if the schema exceeds the minimum frequency, it gets processed further
214      if (filtered.Count < countThreshold) {
215        return base.Apply();
216      }
217
218      // check if the filtered individuals match the schema
219      var sNodes = QueryMatch.InitializePostOrder(sRoot);
220      var matchingIndividuals = new ScopeList();
221      bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
222      int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
223
224      if (executeInParallel) {
225        var matching = new bool[filtered.Count];
226
227        Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
228          i => {
229            var index = filtered[i];
230            var t = trees[index];
231            var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
232            if (qm.Match(tNodes, sNodes)) {
233              matching[i] = true;
234            }
235          });
236
237        matchingIndividuals.AddRange(filtered.Where((x, i) => matching[i]).Select(x => individuals[x]));
238      } else {
239        for (int i = 0; i < filtered.Count; ++i) {
240          // break early if it becomes impossible to reach the minimum threshold
241          if (matchingIndividuals.Count + filtered.Count - i < countThreshold)
242            break;
243
244          var index = filtered[i];
245          var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0);
246          var tNodes = QueryMatch.InitializePostOrder(tRoot);
247          if (qm.Match(tNodes, sNodes))
248            matchingIndividuals.Add(individuals[index]);
249        }
250      }
251
252      if (matchingIndividuals.Count < countThreshold) {
253        return base.Apply();
254      }
255
256      var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
257      if (similarity < MinimumPhenotypicSimilarity.Value) {
258        return base.Apply();
259      }
260
261      double replacementRatio;
262      var adaptiveReplacementRatio = UseAdaptiveReplacementRatioParameter.ActualValue.Value;
263
264      if (adaptiveReplacementRatio) {
265        var r = (double)matchingIndividuals.Count / individuals.Count;
266        replacementRatio = ReplacementRule(r);
267      } else {
268        replacementRatio = ReplacementRatio.Value;
269      }
270
271      int n = (int)Math.Floor(matchingIndividuals.Count * replacementRatio);
272      var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
273                                                         : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
274      var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
275      var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
276      foreach (var ind in individualsToReplace) {
277        var mutatorOp = ExecutionContext.CreateChildOperation(mutator, ind);
278        var updateOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind);
279        mutationOc.Add(mutatorOp);
280        updateEstimatedValues.Add(updateOp);
281        if (exclusiveMatching)
282          ind.Variables.Add(new Core.Variable("AlreadyMatched"));
283      }
284
285      NumberOfChangedTrees.Value += individualsToReplace.Count; // a lock is not necessary here because the SchemaEvaluators cannot be executed in parallel
286
287      return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply());
288    }
289
290    public static double CalculateSimilarity(ScopeList individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
291      double similarity = 0;
292      int count = individuals.Count;
293      if (count < 2) return double.NaN;
294      if (parallel) {
295        var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
296        var simArray = new double[count - 1];
297        Parallel.For(0, count - 1, parallelOptions, i => {
298          double innerSim = 0;
299          for (int j = i + 1; j < count; ++j) {
300            innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
301          }
302          simArray[i] = innerSim;
303        });
304        similarity = simArray.Sum();
305      } else {
306        for (int i = 0; i < count - 1; ++i) {
307          for (int j = i + 1; j < count; ++j) {
308            similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
309          }
310        }
311      }
312      return similarity / (count * (count - 1) / 2.0);
313    }
314  }
315}
Note: See TracBrowser for help on using the repository browser.