Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs @ 15529

Last change on this file since 15529 was 15482, checked in by bburlacu, 7 years ago

#1772: Add the option to specify a separate mutation operation for schema diversification (independent of the one used in the algorithm main loop). Refactor duplicated code for schema generation in SchemaCreator.cs.

File size: 18.0 KB
RevLine 
[12951]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
[13480]23using System.Collections.Generic;
[12951]24using System.Linq;
[12979]25using System.Threading.Tasks;
[12951]26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.EvolutionTracking;
[13480]31using HeuristicLab.Optimization;
[12951]32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34using HeuristicLab.Random;
35
[12958]36namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[12951]37  [Item("SchemaEvaluator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
38  [StorableClass]
39  public class SchemaEvaluator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
[12952]40    #region parameter names
[12951]41    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
42    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
43    private const string ReplacementRatioParameterName = "ReplacementRatio";
44    private const string SchemaParameterName = "Schema";
45    private const string PopulationSizeParameterName = "PopulationSize";
46    private const string RandomParameterName = "Random";
47    private const string EvaluatorParameterName = "Evaluator";
48    private const string ProblemDataParameterName = "ProblemData";
49    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
50    private const string EstimationLimitsParameterName = "EstimationLimits";
51    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
52    private const string MutatorParameterName = "Mutator";
[13480]53    private const string CrossoverParameterName = "Crossover";
[12951]54    private const string RandomReplacementParameterName = "RandomReplacement";
[12988]55    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
[12979]56    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
57    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
[12988]58    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
[13480]59    private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
[13496]60    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
[12952]61    #endregion
[12951]62
63    #region parameters
[13480]64    public ILookupParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
65      get { return (ILookupParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
66    }
[12988]67    public ILookupParameter<BoolValue> ExclusiveMatchingParameter {
68      get { return (ILookupParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
69    }
[12979]70    public ILookupParameter<BoolValue> ExecuteInParallelParameter {
71      get { return (ILookupParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
72    }
73    public ILookupParameter<IntValue> MaxDegreeOfParallelismParameter {
74      get { return (ILookupParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
75    }
[12951]76    public ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>> EvaluatorParameter {
77      get { return (ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>)Parameters[EvaluatorParameterName]; }
78    }
79    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
80      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
81    }
82    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
83      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
84    }
85    public ILookupParameter<DoubleLimit> EstimationLimitsParameter {
86      get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
87    }
88    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
89      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
90    }
91    public ILookupParameter<BoolValue> RandomReplacementParameter {
92      get { return (ILookupParameter<BoolValue>)Parameters[RandomReplacementParameterName]; }
93    }
[13480]94    public ILookupParameter<ISymbolicExpressionTreeCrossover> CrossoverParameter {
95      get { return (ILookupParameter<ISymbolicExpressionTreeCrossover>)Parameters[CrossoverParameterName]; }
96    }
[12951]97    public ILookupParameter<IRandom> RandomParameter {
98      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
99    }
100    public ILookupParameter<IntValue> PopulationSizeParameter {
101      get { return (ILookupParameter<IntValue>)Parameters[PopulationSizeParameterName]; }
102    }
103    public ILookupParameter<ISymbolicExpressionTree> SchemaParameter {
104      get { return (ILookupParameter<ISymbolicExpressionTree>)Parameters[SchemaParameterName]; }
105    }
106    public ILookupParameter<PercentValue> MinimumSchemaFrequencyParameter {
107      get { return (ILookupParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
108    }
109    public ILookupParameter<PercentValue> ReplacementRatioParameter {
110      get { return (ILookupParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
111    }
112    public ILookupParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
113      get { return (ILookupParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
114    }
[12988]115    public LookupParameter<IntValue> NumberOfChangedTreesParameter {
116      get { return (LookupParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
[12952]117    }
[13496]118    public LookupParameter<BoolValue> StrictSchemaMatchingParameter {
119      get { return (LookupParameter<BoolValue>)Parameters[StrictSchemaMatchingParameterName]; }
120    }
[12951]121    #endregion
122
123    #region parameter properties
[12979]124    public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
125    public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } }
126    public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
127    public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } }
[12988]128    public IntValue NumberOfChangedTrees { get { return NumberOfChangedTreesParameter.ActualValue; } }
[12951]129    #endregion
130
[13565]131    private QueryMatch qm;
[12952]132
[13565]133    [Storable]
134    private SymbolicExpressionTreePhenotypicSimilarityCalculator calculator;
[13480]135
[13565]136    [Storable]
137    public string ReplacementRule { get; set; }
[12952]138
[13527]139    [Storable]
[13565]140    private ISymbolicExpressionTreeNodeEqualityComparer comparer;
141
142    [Storable]
[12988]143    public ISymbolicExpressionTree Schema { get; set; }
144
[12979]145    [Storable]
[13527]146    private readonly UpdateQualityOperator updateQualityOperator;
[12952]147
[15482]148    [Storable]
149    public ISymbolicExpressionTreeManipulator SchemaManipulator { get; set; }
150
[13496]151    [StorableHook(HookType.AfterDeserialization)]
152    private void AfterDeserialization() {
153      if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName))
154        Parameters.Add(new LookupParameter<BoolValue>(StrictSchemaMatchingParameterName));
[13565]155
156      if (calculator == null)
157        calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
158
159      if (comparer == null)
160        comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false };
161
162      qm = new QueryMatch(comparer) { MatchParents = true };
[13496]163    }
164
[12951]165    public SchemaEvaluator() {
[13565]166      calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
167      comparer = new SymbolicExpressionTreeNodeEqualityComparer { MatchVariableNames = true, MatchVariableWeights = true, MatchConstantValues = false };
168      qm = new QueryMatch(comparer) { MatchParents = true };
169      updateQualityOperator = new UpdateQualityOperator();
[12988]170      #region add parameters
[12951]171      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated"));
172      Parameters.Add(new LookupParameter<PercentValue>(MinimumSchemaFrequencyParameterName));
173      Parameters.Add(new LookupParameter<PercentValue>(ReplacementRatioParameterName));
174      Parameters.Add(new LookupParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName));
175      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(PopulationSizeParameterName));
176      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName));
177      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>(EvaluatorParameterName));
178      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
179      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
180      Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
181      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
[13496]182      Parameters.Add(new LookupParameter<BoolValue>(StrictSchemaMatchingParameterName));
[12951]183      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeManipulator>(MutatorParameterName));
[13480]184      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeCrossover>(CrossoverParameterName));
[12951]185      Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName));
[12988]186      Parameters.Add(new LookupParameter<IntValue>(NumberOfChangedTreesParameterName));
[12979]187      Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName));
188      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
[12988]189      Parameters.Add(new LookupParameter<BoolValue>(ExclusiveMatchingParameterName));
[13480]190      Parameters.Add(new LookupParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName));
[12988]191      #endregion
[12951]192    }
193
[12966]194    [StorableConstructor]
195    protected SchemaEvaluator(bool deserializing) : base(deserializing) { }
196
[12951]197    protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) {
[13565]198      calculator = cloner.Clone(original.calculator);
199      comparer = cloner.Clone(original.comparer);
200      qm = new QueryMatch(comparer) { MatchParents = true };
201      updateQualityOperator = new UpdateQualityOperator();
[13527]202      Schema = original.Schema;
[12951]203    }
204
205    public override IDeepCloneable Clone(Cloner cloner) {
206      return new SchemaEvaluator(this, cloner);
207    }
208
209    public override IOperation Apply() {
[13496]210      var strictSchemaMatching = StrictSchemaMatchingParameter.ActualValue.Value;
211      if (strictSchemaMatching) {
[13565]212        comparer.MatchVariableWeights = true;
213        comparer.MatchConstantValues = true;
[13496]214      } else {
[13565]215        comparer.MatchVariableWeights = false;
216        comparer.MatchConstantValues = false;
[13496]217      }
218
[12951]219      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
[13480]220      var trees = individuals.Select(x => (ISymbolicExpressionTree)x.Variables["SymbolicExpressionTree"].Value).ToList();
[12951]221
222      var random = RandomParameter.ActualValue;
223
[12988]224      var s = Schema;
[12979]225      var sRoot = s.Root.GetSubtree(0).GetSubtree(0);
226      int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
227
228      // first apply the length and root equality checks in order to filter the individuals
[12988]229      var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value;
[13480]230      var filtered = new List<int>();
231      for (int i = 0; i < individuals.Count; ++i) {
232        if (exclusiveMatching && individuals[i].Variables.ContainsKey("AlreadyMatched")) continue;
233        var t = trees[i];
234        var tRoot = t.Root.GetSubtree(0).GetSubtree(0);
[14427]235        if (t.Length < s.Length || !qm.EqualityComparer.Equals(tRoot, sRoot)) continue;
[13480]236        filtered.Add(i);
237      }
[12979]238
239      // if we don't have enough filtered individuals, then we are done
[13480]240      // if the schema exceeds the minimum frequency, it gets processed further
[12979]241      if (filtered.Count < countThreshold) {
242        return base.Apply();
243      }
244
245      // check if the filtered individuals match the schema
246      var sNodes = QueryMatch.InitializePostOrder(sRoot);
[12958]247      var matchingIndividuals = new ScopeList();
[12979]248      bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
249      int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
250
251      if (executeInParallel) {
252        var matching = new bool[filtered.Count];
253
254        Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
255          i => {
[13480]256            var index = filtered[i];
257            var t = trees[index];
[12979]258            var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
259            if (qm.Match(tNodes, sNodes)) {
260              matching[i] = true;
261            }
262          });
263
[13480]264        matchingIndividuals.AddRange(filtered.Where((x, i) => matching[i]).Select(x => individuals[x]));
[12979]265      } else {
266        for (int i = 0; i < filtered.Count; ++i) {
267          // break early if it becomes impossible to reach the minimum threshold
268          if (matchingIndividuals.Count + filtered.Count - i < countThreshold)
269            break;
270
[13480]271          var index = filtered[i];
272          var tRoot = trees[index].Root.GetSubtree(0).GetSubtree(0);
273          var tNodes = QueryMatch.InitializePostOrder(tRoot);
[12979]274          if (qm.Match(tNodes, sNodes))
[13480]275            matchingIndividuals.Add(individuals[index]);
[12979]276        }
[12958]277      }
[12951]278
[12979]279      if (matchingIndividuals.Count < countThreshold) {
[12951]280        return base.Apply();
[12952]281      }
[12951]282
[13480]283      var similarity = CalculateSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
[12952]284      if (similarity < MinimumPhenotypicSimilarity.Value) {
[12951]285        return base.Apply();
[12952]286      }
[12951]287
[13480]288      double replacementRatio;
289      var adaptiveReplacementRatio = UseAdaptiveReplacementRatioParameter.ActualValue.Value;
290
291      if (adaptiveReplacementRatio) {
292        var r = (double)matchingIndividuals.Count / individuals.Count;
[13565]293        replacementRatio = CalculateReplacementRatio(r);
[13480]294      } else {
295        replacementRatio = ReplacementRatio.Value;
296      }
297
298      int n = (int)Math.Floor(matchingIndividuals.Count * replacementRatio);
[12952]299      var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
300                                                         : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
[12979]301      var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
302      var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
[12951]303      foreach (var ind in individualsToReplace) {
[15482]304        var mutatorOp = ExecutionContext.CreateChildOperation(SchemaManipulator, ind);
[13527]305        var updateOp = ExecutionContext.CreateChildOperation(updateQualityOperator, ind);
[12979]306        mutationOc.Add(mutatorOp);
307        updateEstimatedValues.Add(updateOp);
[12988]308        if (exclusiveMatching)
309          ind.Variables.Add(new Core.Variable("AlreadyMatched"));
[12951]310      }
[12988]311
312      NumberOfChangedTrees.Value += individualsToReplace.Count; // a lock is not necessary here because the SchemaEvaluators cannot be executed in parallel
313
[12979]314      return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply());
[12951]315    }
[12979]316
[13565]317    private double CalculateReplacementRatio(double r) {
318      switch (ReplacementRule) {
319        case "f(x) = x": {
320            return r;
321          }
322        case "f(x) = tanh(x)": {
323            return Math.Tanh(r);
324          }
325        case "f(x) = tanh(2x)": {
326            return Math.Tanh(2 * r);
327          }
328        case "f(x) = tanh(3x)": {
329            return Math.Tanh(3 * r);
330          }
331        case "f(x) = tanh(4x)": {
332            return Math.Tanh(4 * r);
333          }
334        case "f(x) = 1-sqrt(1-x)": {
335            return 1 - Math.Sqrt(1 - r);
336          }
337        default:
338          throw new ArgumentException("Unknown replacement rule");
339      }
340    }
341
[13480]342    public static double CalculateSimilarity(ScopeList individuals, ISolutionSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
[12979]343      double similarity = 0;
344      int count = individuals.Count;
[13480]345      if (count < 2) return double.NaN;
[12979]346      if (parallel) {
347        var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
348        var simArray = new double[count - 1];
349        Parallel.For(0, count - 1, parallelOptions, i => {
350          double innerSim = 0;
351          for (int j = i + 1; j < count; ++j) {
352            innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
353          }
354          simArray[i] = innerSim;
355        });
356        similarity = simArray.Sum();
357      } else {
358        for (int i = 0; i < count - 1; ++i) {
359          for (int j = i + 1; j < count; ++j) {
360            similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
361          }
362        }
363      }
364      return similarity / (count * (count - 1) / 2.0);
365    }
[12951]366  }
367}
Note: See TracBrowser for help on using the repository browser.