Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs @ 12988

Last change on this file since 12988 was 12988, checked in by bburlacu, 9 years ago

#1772: Performance improvement changes

  • QueryMatch.cs: eliminate unnecessary ToList() call and expensive GetBranchLevel calls
  • Diversification: eliminated creation of shallow copies of the individual subscopes as it was either too slow (due to events being registered/deregistered when variables are added to the scope) or too leaky (if attempting to clear the scopes without clearing the variables then the code is leaking EventHandlers)
  • Aggregated diversification statistics separately with the help of some parameters set up in the SchemaCreator and SchemaEvaluator
  • Made code in the UpdateEstimatedValuesOperator perform exactly as in the evaluator (updating quality and estimated values)
  • Removed no longer needed SchemaCleanupOperator
  • Do not evaluate intermediate vertices in the genealogy analyzer if the TrimOlderGenerations flag is activated

New functionality:

  • parameter to control the fraction of the population to be considered by the diversification strategy
  • parameter to control whether individuals may be matched by any schema and mutated only once (exclusive matching)
  • parameter to control whether linear scaling should be applied to the estimated values used for the calculation of phenotypic similarity (default: yes)
File size: 15.6 KB
RevLine 
[12951]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
[12979]24using System.Threading.Tasks;
[12951]25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.EvolutionTracking;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.Random;
33
[12958]34namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
[12951]35  [Item("SchemaEvaluator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
36  [StorableClass]
37  public class SchemaEvaluator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
[12952]38    #region parameter names
[12951]39    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
40    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
41    private const string ReplacementRatioParameterName = "ReplacementRatio";
42    private const string SchemaParameterName = "Schema";
43    private const string PopulationSizeParameterName = "PopulationSize";
44    private const string RandomParameterName = "Random";
45    private const string EvaluatorParameterName = "Evaluator";
46    private const string ProblemDataParameterName = "ProblemData";
47    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
48    private const string EstimationLimitsParameterName = "EstimationLimits";
49    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
50    private const string MutatorParameterName = "Mutator";
51    private const string RandomReplacementParameterName = "RandomReplacement";
[12988]52    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
[12979]53    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
54    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
[12988]55    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
[12952]56    #endregion
[12951]57
58    #region parameters
[12988]59    public ILookupParameter<BoolValue> ExclusiveMatchingParameter {
60      get { return (ILookupParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
61    }
[12979]62    public ILookupParameter<BoolValue> ExecuteInParallelParameter {
63      get { return (ILookupParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
64    }
65    public ILookupParameter<IntValue> MaxDegreeOfParallelismParameter {
66      get { return (ILookupParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
67    }
[12951]68    public ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>> EvaluatorParameter {
69      get { return (ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>)Parameters[EvaluatorParameterName]; }
70    }
71    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
72      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
73    }
74    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
75      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
76    }
77    public ILookupParameter<DoubleLimit> EstimationLimitsParameter {
78      get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
79    }
80    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
81      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
82    }
83    public ILookupParameter<BoolValue> RandomReplacementParameter {
84      get { return (ILookupParameter<BoolValue>)Parameters[RandomReplacementParameterName]; }
85    }
86    public ILookupParameter<ISymbolicExpressionTreeManipulator> MutatorParameter {
87      get { return (ILookupParameter<ISymbolicExpressionTreeManipulator>)Parameters[MutatorParameterName]; }
88    }
89    public ILookupParameter<IRandom> RandomParameter {
90      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
91    }
92    public ILookupParameter<IntValue> PopulationSizeParameter {
93      get { return (ILookupParameter<IntValue>)Parameters[PopulationSizeParameterName]; }
94    }
95    public ILookupParameter<ISymbolicExpressionTree> SchemaParameter {
96      get { return (ILookupParameter<ISymbolicExpressionTree>)Parameters[SchemaParameterName]; }
97    }
98    public ILookupParameter<PercentValue> MinimumSchemaFrequencyParameter {
99      get { return (ILookupParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
100    }
101    public ILookupParameter<PercentValue> ReplacementRatioParameter {
102      get { return (ILookupParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
103    }
104    public ILookupParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
105      get { return (ILookupParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
106    }
[12988]107    public LookupParameter<IntValue> NumberOfChangedTreesParameter {
108      get { return (LookupParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
[12952]109    }
[12951]110    #endregion
111
112    #region parameter properties
[12979]113    public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
114    public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } }
115    public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
116    public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } }
[12988]117    public IntValue NumberOfChangedTrees { get { return NumberOfChangedTreesParameter.ActualValue; } }
[12951]118    #endregion
119
[12952]120    private readonly SymbolicExpressionTreePhenotypicSimilarityCalculator calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
121    private readonly QueryMatch qm;
122
123    private readonly ISymbolicExpressionTreeNodeEqualityComparer comp = new SymbolicExpressionTreeNodeEqualityComparer {
124      MatchConstantValues = false,
125      MatchVariableWeights = false,
126      MatchVariableNames = true
127    };
128
[12988]129    public ISymbolicExpressionTree Schema { get; set; }
130
[12979]131    [Storable]
132    private readonly UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
[12952]133
[12951]134    public SchemaEvaluator() {
135      qm = new QueryMatch(comp) { MatchParents = true };
[12979]136      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
[12988]137      #region add parameters
[12951]138      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated"));
139      Parameters.Add(new LookupParameter<PercentValue>(MinimumSchemaFrequencyParameterName));
140      Parameters.Add(new LookupParameter<PercentValue>(ReplacementRatioParameterName));
141      Parameters.Add(new LookupParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName));
142      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(PopulationSizeParameterName));
143      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName));
144      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>(EvaluatorParameterName));
145      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
146      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
147      Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
148      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
149      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeManipulator>(MutatorParameterName));
150      Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName));
[12988]151      Parameters.Add(new LookupParameter<IntValue>(NumberOfChangedTreesParameterName));
[12979]152      Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName));
153      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
[12988]154      Parameters.Add(new LookupParameter<BoolValue>(ExclusiveMatchingParameterName));
155      #endregion
[12951]156    }
157
[12966]158    [StorableConstructor]
159    protected SchemaEvaluator(bool deserializing) : base(deserializing) { }
160
[12951]161    protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) {
[12979]162      this.comp = original.comp == null ? new SymbolicExpressionTreeNodeEqualityComparer {
163        MatchConstantValues = false,
164        MatchVariableWeights = false,
165        MatchVariableNames = true
166      } : (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone();
167      this.qm = new QueryMatch(comp) { MatchParents = original.qm?.MatchParents ?? true };
168      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
[12951]169    }
170
171    public override IDeepCloneable Clone(Cloner cloner) {
172      return new SchemaEvaluator(this, cloner);
173    }
174
175    public override IOperation Apply() {
176      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
177
178      var random = RandomParameter.ActualValue;
179      var mutator = MutatorParameter.ActualValue;
180
[12988]181      var s = Schema;
[12979]182      var sRoot = s.Root.GetSubtree(0).GetSubtree(0);
183      int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
184
185      // first apply the length and root equality checks in order to filter the individuals
[12988]186      var exclusiveMatching = ExclusiveMatchingParameter.ActualValue.Value;
187      var filtered = exclusiveMatching ? (from ind in individuals
188                                          where !ind.Variables.ContainsKey("AlreadyMatched")
189                                          let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value
190                                          where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot)
191                                          select ind).ToList()
192                                       : (from ind in individuals
193                                          let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value
194                                          where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot)
195                                          select ind).ToList();
[12979]196
197      // if we don't have enough filtered individuals, then we are done
198      if (filtered.Count < countThreshold) {
199        return base.Apply();
200      }
201
202      // check if the filtered individuals match the schema
203      var sNodes = QueryMatch.InitializePostOrder(sRoot);
[12958]204      var matchingIndividuals = new ScopeList();
[12979]205      bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
206      int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
207
208      if (executeInParallel) {
209        var matching = new bool[filtered.Count];
210
211        Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
212          i => {
213            var t = (ISymbolicExpressionTree)filtered[i].Variables["SymbolicExpressionTree"].Value;
214            var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
215            if (qm.Match(tNodes, sNodes)) {
216              matching[i] = true;
217            }
218          });
219
220        for (int i = 0; i < matching.Length; ++i) {
221          if (matching[i])
222            matchingIndividuals.Add(filtered[i]);
223        }
224      } else {
225        for (int i = 0; i < filtered.Count; ++i) {
226          // break early if it becomes impossible to reach the minimum threshold
227          if (matchingIndividuals.Count + filtered.Count - i < countThreshold)
228            break;
229
230          var ind = filtered[i];
231          var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;
232          var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
233          if (qm.Match(tNodes, sNodes))
234            matchingIndividuals.Add(ind);
235        }
[12958]236      }
[12951]237
[12988]238      // additional condition: the average schema quality should be equal or greater than the population average quality
[12979]239      if (matchingIndividuals.Count < countThreshold) {
[12951]240        return base.Apply();
[12952]241      }
[12951]242
[12979]243      var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
[12952]244      if (similarity < MinimumPhenotypicSimilarity.Value) {
[12951]245        return base.Apply();
[12952]246      }
[12951]247
[12970]248      int n = (int)Math.Floor(matchingIndividuals.Count * ReplacementRatio.Value);
[12952]249      var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
250                                                         : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
[12979]251      var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
252      var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
[12951]253      foreach (var ind in individualsToReplace) {
254        var mutatorOp = ExecutionContext.CreateChildOperation(mutator, ind);
[12979]255        var updateOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind);
256        mutationOc.Add(mutatorOp);
257        updateEstimatedValues.Add(updateOp);
[12988]258        if (exclusiveMatching)
259          ind.Variables.Add(new Core.Variable("AlreadyMatched"));
[12951]260      }
[12988]261
262      NumberOfChangedTrees.Value += individualsToReplace.Count; // a lock is not necessary here because the SchemaEvaluators cannot be executed in parallel
263
[12979]264      return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply());
[12951]265    }
[12979]266
267    private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
268      double similarity = 0;
269      int count = individuals.Count;
270      if (parallel) {
271        var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
272        var simArray = new double[count - 1];
273        Parallel.For(0, count - 1, parallelOptions, i => {
274          double innerSim = 0;
275          for (int j = i + 1; j < count; ++j) {
276            innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
277          }
278          simArray[i] = innerSim;
279        });
280        similarity = simArray.Sum();
281      } else {
282        for (int i = 0; i < count - 1; ++i) {
283          for (int j = i + 1; j < count; ++j) {
284            similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
285          }
286        }
287      }
288      return similarity / (count * (count - 1) / 2.0);
289    }
[12951]290  }
291}
Note: See TracBrowser for help on using the repository browser.