Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs @ 16130

Last change on this file since 16130 was 15906, checked in by bburlacu, 7 years ago

#1772: Refactoring and speed optimization of diversification operators

File size: 18.0 KB
RevLine 
[12951]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[15906]23using System.Collections.Concurrent;
[13480]24using System.Collections.Generic;
[12951]25using System.Linq;
[12979]26using System.Threading.Tasks;
[12951]27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.EvolutionTracking;
[13480]32using HeuristicLab.Optimization;
[12951]33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.Random;
36
[12958]37namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[12951]38  [Item("SchemaEvaluator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
39  [StorableClass]
40  public class SchemaEvaluator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
[12952]41    #region parameter names
[12951]42    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
43    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
[15906]44    private const string MutationRateParameterName = "MutationRate";
[12951]45    private const string SchemaParameterName = "Schema";
46    private const string PopulationSizeParameterName = "PopulationSize";
47    private const string RandomParameterName = "Random";
48    private const string EvaluatorParameterName = "Evaluator";
49    private const string ProblemDataParameterName = "ProblemData";
50    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
51    private const string EstimationLimitsParameterName = "EstimationLimits";
52    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
53    private const string MutatorParameterName = "Mutator";
[13480]54    private const string CrossoverParameterName = "Crossover";
[12988]55    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
[12979]56    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
57    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
[12988]58    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
[15906]59    private const string UseAdaptiveMutationRateParameterName = "UseAdaptiveMutationRate";
[13496]60    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
[12952]61    #endregion
[12951]62
63    #region parameters
[15906]64    public ILookupParameter<BoolValue> UseAdaptiveMutationRateParameter {
65      get { return (ILookupParameter<BoolValue>)Parameters[UseAdaptiveMutationRateParameterName]; }
[13480]66    }
[12988]67    public ILookupParameter<BoolValue> ExclusiveMatchingParameter {
68      get { return (ILookupParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
69    }
[12979]70    public ILookupParameter<BoolValue> ExecuteInParallelParameter {
71      get { return (ILookupParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
72    }
73    public ILookupParameter<IntValue> MaxDegreeOfParallelismParameter {
74      get { return (ILookupParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
75    }
[12951]76    public ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>> EvaluatorParameter {
77      get { return (ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>)Parameters[EvaluatorParameterName]; }
78    }
79    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
80      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
81    }
82    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
83      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
84    }
85    public ILookupParameter<DoubleLimit> EstimationLimitsParameter {
86      get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
87    }
88    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
89      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
90    }
[13480]91    public ILookupParameter<ISymbolicExpressionTreeCrossover> CrossoverParameter {
92      get { return (ILookupParameter<ISymbolicExpressionTreeCrossover>)Parameters[CrossoverParameterName]; }
93    }
[12951]94    public ILookupParameter<IRandom> RandomParameter {
95      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
96    }
97    public ILookupParameter<IntValue> PopulationSizeParameter {
98      get { return (ILookupParameter<IntValue>)Parameters[PopulationSizeParameterName]; }
99    }
100    public ILookupParameter<ISymbolicExpressionTree> SchemaParameter {
101      get { return (ILookupParameter<ISymbolicExpressionTree>)Parameters[SchemaParameterName]; }
102    }
103    public ILookupParameter<PercentValue> MinimumSchemaFrequencyParameter {
104      get { return (ILookupParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
105    }
[15906]106    public ILookupParameter<PercentValue> MutationRateParameter {
107      get { return (ILookupParameter<PercentValue>)Parameters[MutationRateParameterName]; }
[12951]108    }
109    public ILookupParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
110      get { return (ILookupParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
111    }
[12988]112    public LookupParameter<IntValue> NumberOfChangedTreesParameter {
113      get { return (LookupParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
[12952]114    }
[13496]115    public LookupParameter<BoolValue> StrictSchemaMatchingParameter {
116      get { return (LookupParameter<BoolValue>)Parameters[StrictSchemaMatchingParameterName]; }
117    }
[12951]118    #endregion
119
120    #region parameter properties
[12979]121    public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
[15906]122    public PercentValue MutationRate { get { return MutationRateParameter.ActualValue; } }
[12979]123    public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
[12988]124    public IntValue NumberOfChangedTrees { get { return NumberOfChangedTreesParameter.ActualValue; } }
[12951]125    #endregion
126
[13565]127    private QueryMatch qm;
[12952]128
[13565]129    [Storable]
130    private SymbolicExpressionTreePhenotypicSimilarityCalculator calculator;
[13480]131
[13565]132    [Storable]
[15906]133    public string MutationRateUpdateRule { get; set; }
[12952]134
[13527]135    [Storable]
[13565]136    private ISymbolicExpressionTreeNodeEqualityComparer comparer;
137
138    [Storable]
[12988]139    public ISymbolicExpressionTree Schema { get; set; }
140
[12979]141    [Storable]
[13527]142    private readonly UpdateQualityOperator updateQualityOperator;
[12952]143
[15482]144    [Storable]
145    public ISymbolicExpressionTreeManipulator SchemaManipulator { get; set; }
146
[13496]147    [StorableHook(HookType.AfterDeserialization)]
148    private void AfterDeserialization() {
149      if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName))
150        Parameters.Add(new LookupParameter<BoolValue>(StrictSchemaMatchingParameterName));
[13565]151
152      if (calculator == null)
153        calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
154
155      if (comparer == null)
156        comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false };
157
158      qm = new QueryMatch(comparer) { MatchParents = true };
[13496]159    }
160
[12951]161    public SchemaEvaluator() {
[13565]162      calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
163      comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false };
164      qm = new QueryMatch(comparer) { MatchParents = true };
165      updateQualityOperator = new UpdateQualityOperator();
[12988]166      #region add parameters
[12951]167      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated"));
168      Parameters.Add(new LookupParameter<PercentValue>(MinimumSchemaFrequencyParameterName));
[15906]169      Parameters.Add(new LookupParameter<PercentValue>(MutationRateParameterName));
[12951]170      Parameters.Add(new LookupParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName));
171      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(PopulationSizeParameterName));
172      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName));
173      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>(EvaluatorParameterName));
174      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
175      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
176      Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
177      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
[13496]178      Parameters.Add(new LookupParameter<BoolValue>(StrictSchemaMatchingParameterName));
[12951]179      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeManipulator>(MutatorParameterName));
[13480]180      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeCrossover>(CrossoverParameterName));
[12988]181      Parameters.Add(new LookupParameter<IntValue>(NumberOfChangedTreesParameterName));
[12979]182      Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName));
183      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
[12988]184      Parameters.Add(new LookupParameter<BoolValue>(ExclusiveMatchingParameterName));
[15906]185      Parameters.Add(new LookupParameter<BoolValue>(UseAdaptiveMutationRateParameterName));
[12988]186      #endregion
[12951]187    }
188
[12966]189    [StorableConstructor]
190    protected SchemaEvaluator(bool deserializing) : base(deserializing) { }
191
[12951]192    protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) {
[13565]193      calculator = cloner.Clone(original.calculator);
194      comparer = cloner.Clone(original.comparer);
195      qm = new QueryMatch(comparer) { MatchParents = true };
196      updateQualityOperator = new UpdateQualityOperator();
[13527]197      Schema = original.Schema;
[12951]198    }
199
200    public override IDeepCloneable Clone(Cloner cloner) {
201      return new SchemaEvaluator(this, cloner);
202    }
203
204    public override IOperation Apply() {
[13496]205      var strictSchemaMatching = StrictSchemaMatchingParameter.ActualValue.Value;
206      if (strictSchemaMatching) {
[13565]207        comparer.MatchVariableWeights = true;
208        comparer.MatchConstantValues = true;
[13496]209      } else {
[13565]210        comparer.MatchVariableWeights = false;
211        comparer.MatchConstantValues = false;
[13496]212      }
213
[12951]214      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
[15906]215      var trees = new ISymbolicExpressionTree[individuals.Count];
216      var qualities = new double[individuals.Count];
[12951]217
[15906]218      for (int i = 0; i < individuals.Count; ++i) {
219        trees[i] = (ISymbolicExpressionTree)individuals[i].Variables["SymbolicExpressionTree"].Value;
220        qualities[i] = ((DoubleValue)individuals[i].Variables["Quality"].Value).Value;
221      }
222
[12951]223      var random = RandomParameter.ActualValue;
[15906]224      var sRoot = Schema.Root.GetSubtree(0).GetSubtree(0);
[12951]225
[12979]226      // first apply the length and root equality checks in order to filter the individuals
[12988]227      var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value;
[13480]228      var filtered = new List<int>();
[15906]229
230      for (int i = 0; i < trees.Length; ++i) {
[13480]231        if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue;
232        var t = trees[i];
233        var tRoot = t.Root.GetSubtree(0).GetSubtree(0);
[15906]234        if (t.Length < Schema.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue;
[13480]235        filtered.Add(i);
236      }
[12979]237
238      // if we don't have enough filtered individuals, then we are done
[13480]239      // if the schema exceeds the minimum frequency, it gets processed further
[15906]240      int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
[12979]241      if (filtered.Count < countThreshold) {
242        return base.Apply();
243      }
244
245      // check if the filtered individuals match the schema
[15906]246      var matching = new List<int>();
[12979]247      var sNodes = QueryMatch.InitializePostOrder(sRoot);
[15906]248
[12979]249      bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
250      int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
251
252      if (executeInParallel) {
[15906]253        var partitioner = Partitioner.Create(0, filtered.Count);
254        var po = new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism };
[12979]255
[15906]256        Parallel.ForEach(partitioner, po, (range, loop) => {
257          var partial = new List<int>();
258          for (int i = range.Item1; i < range.Item2; ++i) {
259            var idx = filtered[i];
260            var tRoot = trees[idx].Root.GetSubtree(0).GetSubtree(0);
261            var tNodes = QueryMatch.InitializePostOrder(tRoot);
[12979]262
[15906]263            if (qm.Match(tNodes, sNodes)) { partial.Add(idx); }
264          }
265          lock (matching) { matching.AddRange(partial); }
266        });
[12979]267      } else {
268        for (int i = 0; i < filtered.Count; ++i) {
[13480]269          var index = filtered[i];
270          var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0);
271          var tNodes = QueryMatch.InitializePostOrder(tRoot);
[15906]272          if (qm.Match(tNodes, sNodes)) {
273            matching.Add(index);
274          }
[12979]275        }
[12958]276      }
[15906]277      if (matching.Count < countThreshold) {
[12951]278        return base.Apply();
[12952]279      }
[12951]280
[15906]281      matching.Sort((a, b) => qualities[a].CompareTo(qualities[b])); // sort by ascending quality
282      var matchingIndividuals = matching.Select(x => individuals[x]).ToArray(); // fittest individual will be last in the array
[13480]283      var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
[12952]284      if (similarity < MinimumPhenotypicSimilarity.Value) {
[12951]285        return base.Apply();
[12952]286      }
[12951]287
[15906]288      double mutationRate;
289      var useAdaptiveMutationRate = UseAdaptiveMutationRateParameter.ActualValue.Value;
[13480]290
[15906]291      if (useAdaptiveMutationRate) {
292        var r = (double)matchingIndividuals.Length / individuals.Count;
293        mutationRate = CalculateMutationRate(r);
[13480]294      } else {
[15906]295        mutationRate = MutationRate.Value;
[13480]296      }
297
[15906]298      var mutations = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
299      var updates = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
300
301      // use length - 1 because we don't want to mutate the best individual in each schema group (which could also be the overall elite)
302      for (int i = 0; i < matchingIndividuals.Length - 1; ++i) {
303        if (random.NextDouble() > mutationRate) continue;
304
305        var ind = matchingIndividuals[i];
306
307        var mutate = ExecutionContext.CreateChildOperation(SchemaManipulator, ind);
308        var update = ExecutionContext.CreateChildOperation(updateQualityOperator, ind);
309
310        mutations.Add(mutate);
311        updates.Add(update);
312
[12988]313        if (exclusiveMatching)
314          ind.Variables.Add(new Core.Variable("AlreadyMatched"));
[12951]315      }
[15906]316      NumberOfChangedTrees.Value += mutations.Count;
[12988]317
[15906]318      return new OperationCollection(mutations, updates, base.Apply());
[12951]319    }
[12979]320
[15906]321    private double CalculateMutationRate(double r) {
322      switch (MutationRateUpdateRule) {
[13565]323        case "f(x) = x": {
324            return r;
325          }
326        case "f(x) = tanh(x)": {
327            return Math.Tanh(r);
328          }
329        case "f(x) = tanh(2x)": {
330            return Math.Tanh(2 * r);
331          }
332        case "f(x) = tanh(3x)": {
333            return Math.Tanh(3 * r);
334          }
335        case "f(x) = tanh(4x)": {
336            return Math.Tanh(4 * r);
337          }
338        case "f(x) = 1-sqrt(1-x)": {
339            return 1 - Math.Sqrt(1 - r);
340          }
341        default:
342          throw new ArgumentException("Unknown replacement rule");
343      }
344    }
345
[15906]346    public static double CalculateSimilarity(IScope[] individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
347      if (individuals.Length < 2)
348        return double.NaN;
349
[12979]350      double similarity = 0;
[15906]351      int count = individuals.Length;
352      int n = count * (count - 1) / 2;
353
[12979]354      if (parallel) {
[15906]355        var ii = new int[n];
356        var jj = new int[n];
357        int k = 0;
358        for (int i = 0; i < count - 1; ++i)
[12979]359          for (int j = i + 1; j < count; ++j) {
[15906]360            ii[k] = i;
361            jj[k] = j;
362            ++k;
[12979]363          }
[15906]364        var po = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
365        var partitioner = Partitioner.Create(0, n);
366        var locker = new object();
367        Parallel.ForEach(partitioner, new ParallelOptions { MaxDegreeOfParallelism = 4 }, (range, loop) => {
368          var partial = 0d;
369          for (int idx = range.Item1; idx < range.Item2; ++idx) {
370            int i = ii[idx];
371            int j = jj[idx];
372            partial += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
373          }
374          lock (locker) { similarity += partial; }
[12979]375        });
376      } else {
377        for (int i = 0; i < count - 1; ++i) {
378          for (int j = i + 1; j < count; ++j) {
379            similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
380          }
381        }
382      }
[15906]383      return similarity / n;
[12979]384    }
[12951]385  }
386}
Note: See TracBrowser for help on using the repository browser.