Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaEvaluator.cs @ 12979

Last change on this file since 12979 was 12979, checked in by bburlacu, 9 years ago

#1772:

  • added parameters to the SchemaCreator for calculating things in parallel
  • added parallel code to the SchemaEvaluator
  • implemented quality calculation and saving of estimated values in the scope in the UpdateEstimatedValuesOperator
  • small refactoring of QueryMatch (added some useful methods and made NodeInfo internal)
  • updated query match unit test
File size: 14.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using System.Threading.Tasks;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.EvolutionTracking;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.Random;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
35  [Item("SchemaEvaluator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
36  [StorableClass]
37  public class SchemaEvaluator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
38    #region parameter names
39    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
40    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
41    private const string ReplacementRatioParameterName = "ReplacementRatio";
42    private const string SchemaParameterName = "Schema";
43    private const string PopulationSizeParameterName = "PopulationSize";
44    private const string RandomParameterName = "Random";
45    private const string EvaluatorParameterName = "Evaluator";
46    private const string ProblemDataParameterName = "ProblemData";
47    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
48    private const string EstimationLimitsParameterName = "EstimationLimits";
49    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
50    private const string MutatorParameterName = "Mutator";
51    private const string RandomReplacementParameterName = "RandomReplacement";
52    private const string ChangedTreesParameterName = "ChangedTrees";
53    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
54    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
55    #endregion
56
57    #region parameters
58    public ILookupParameter<BoolValue> ExecuteInParallelParameter {
59      get { return (ILookupParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
60    }
61    public ILookupParameter<IntValue> MaxDegreeOfParallelismParameter {
62      get { return (ILookupParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
63    }
64    public ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>> EvaluatorParameter {
65      get { return (ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>)Parameters[EvaluatorParameterName]; }
66    }
67    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
68      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
69    }
70    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
71      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
72    }
73    public ILookupParameter<DoubleLimit> EstimationLimitsParameter {
74      get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
75    }
76    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
77      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
78    }
79    public ILookupParameter<BoolValue> RandomReplacementParameter {
80      get { return (ILookupParameter<BoolValue>)Parameters[RandomReplacementParameterName]; }
81    }
82    public ILookupParameter<ISymbolicExpressionTreeManipulator> MutatorParameter {
83      get { return (ILookupParameter<ISymbolicExpressionTreeManipulator>)Parameters[MutatorParameterName]; }
84    }
85    public ILookupParameter<IRandom> RandomParameter {
86      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
87    }
88    public ILookupParameter<IntValue> PopulationSizeParameter {
89      get { return (ILookupParameter<IntValue>)Parameters[PopulationSizeParameterName]; }
90    }
91    public ILookupParameter<ISymbolicExpressionTree> SchemaParameter {
92      get { return (ILookupParameter<ISymbolicExpressionTree>)Parameters[SchemaParameterName]; }
93    }
94    public ILookupParameter<PercentValue> MinimumSchemaFrequencyParameter {
95      get { return (ILookupParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
96    }
97    public ILookupParameter<PercentValue> ReplacementRatioParameter {
98      get { return (ILookupParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
99    }
100    public ILookupParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
101      get { return (ILookupParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
102    }
103    public LookupParameter<IntValue> ChangedTreesParameter {
104      get { return (LookupParameter<IntValue>)Parameters[ChangedTreesParameterName]; }
105    }
106    #endregion
107
108    #region parameter properties
109    public PercentValue MinimumSchemaFrequency { get { return MinimumSchemaFrequencyParameter.ActualValue; } }
110    public PercentValue ReplacementRatio { get { return ReplacementRatioParameter.ActualValue; } }
111    public PercentValue MinimumPhenotypicSimilarity { get { return MinimumPhenotypicSimilarityParameter.ActualValue; } }
112    public BoolValue RandomReplacement { get { return RandomReplacementParameter.ActualValue; } }
113    #endregion
114
115    private readonly SymbolicExpressionTreePhenotypicSimilarityCalculator calculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
116    private readonly QueryMatch qm;
117
118    private readonly ISymbolicExpressionTreeNodeEqualityComparer comp = new SymbolicExpressionTreeNodeEqualityComparer {
119      MatchConstantValues = false,
120      MatchVariableWeights = false,
121      MatchVariableNames = true
122    };
123
124    [Storable]
125    private readonly UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
126
127    public SchemaEvaluator() {
128      qm = new QueryMatch(comp) { MatchParents = true };
129      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
130
131      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(SchemaParameterName, "The current schema to be evaluated"));
132      Parameters.Add(new LookupParameter<PercentValue>(MinimumSchemaFrequencyParameterName));
133      Parameters.Add(new LookupParameter<PercentValue>(ReplacementRatioParameterName));
134      Parameters.Add(new LookupParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName));
135      Parameters.Add(new LookupParameter<ISymbolicExpressionTree>(PopulationSizeParameterName));
136      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName));
137      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>(EvaluatorParameterName));
138      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
139      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
140      Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
141      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
142      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeManipulator>(MutatorParameterName));
143      Parameters.Add(new LookupParameter<BoolValue>(RandomReplacementParameterName));
144      Parameters.Add(new LookupParameter<IntValue>(ChangedTreesParameterName));
145      Parameters.Add(new LookupParameter<BoolValue>(ExecuteInParallelParameterName));
146      Parameters.Add(new LookupParameter<IntValue>(MaxDegreeOfParalellismParameterName));
147    }
148
149    [StorableConstructor]
150    protected SchemaEvaluator(bool deserializing) : base(deserializing) { }
151
152    protected SchemaEvaluator(SchemaEvaluator original, Cloner cloner) : base(original, cloner) {
153      this.comp = original.comp == null ? new SymbolicExpressionTreeNodeEqualityComparer {
154        MatchConstantValues = false,
155        MatchVariableWeights = false,
156        MatchVariableNames = true
157      } : (ISymbolicExpressionTreeNodeEqualityComparer)original.comp.Clone();
158      this.qm = new QueryMatch(comp) { MatchParents = original.qm?.MatchParents ?? true };
159      this.updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
160    }
161
162    public override IDeepCloneable Clone(Cloner cloner) {
163      return new SchemaEvaluator(this, cloner);
164    }
165
166    public override IOperation Apply() {
167      var individuals = ExecutionContext.Scope.SubScopes; // the scopes represent the individuals
168
169      var random = RandomParameter.ActualValue;
170      var mutator = MutatorParameter.ActualValue;
171
172      var s = SchemaParameter.ActualValue;
173      var sRoot = s.Root.GetSubtree(0).GetSubtree(0);
174      int countThreshold = (int)Math.Max(2, Math.Round(MinimumSchemaFrequency.Value * individuals.Count));
175
176      // first apply the length and root equality checks in order to filter the individuals
177      var filtered = (from ind in individuals
178                      let t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value
179                      where t.Length >= s.Length && qm.Comparer.Equals(t.Root.GetSubtree(0).GetSubtree(0), sRoot)
180                      select ind).ToList();
181
182      // if we don't have enough filtered individuals, then we are done
183      if (filtered.Count < countThreshold) {
184        ChangedTreesParameter.ActualValue = new IntValue(0);
185        return base.Apply();
186      }
187
188      // check if the filtered individuals match the schema
189      var sNodes = QueryMatch.InitializePostOrder(sRoot);
190      var matchingIndividuals = new ScopeList();
191      bool executeInParallel = ExecuteInParallelParameter.ActualValue.Value;
192      int maxDegreeOfParallelism = MaxDegreeOfParallelismParameter.ActualValue.Value;
193
194      if (executeInParallel) {
195        var matching = new bool[filtered.Count];
196
197        Parallel.For(0, filtered.Count, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism },
198          i => {
199            var t = (ISymbolicExpressionTree)filtered[i].Variables["SymbolicExpressionTree"].Value;
200            var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
201            if (qm.Match(tNodes, sNodes)) {
202              matching[i] = true;
203            }
204          });
205
206        for (int i = 0; i < matching.Length; ++i) {
207          if (matching[i])
208            matchingIndividuals.Add(filtered[i]);
209        }
210      } else {
211        for (int i = 0; i < filtered.Count; ++i) {
212
213          // break early if it becomes impossible to reach the minimum threshold
214          if (matchingIndividuals.Count + filtered.Count - i < countThreshold)
215            break;
216
217          var ind = filtered[i];
218          var t = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;
219          var tNodes = QueryMatch.InitializePostOrder(t.Root.GetSubtree(0).GetSubtree(0));
220          if (qm.Match(tNodes, sNodes))
221            matchingIndividuals.Add(ind);
222        }
223      }
224
225      if (matchingIndividuals.Count < countThreshold) {
226        ChangedTreesParameter.ActualValue = new IntValue(0);
227        return base.Apply();
228      }
229
230      var similarity = CalculatePhenotypicSimilarity(matchingIndividuals, calculator, executeInParallel, maxDegreeOfParallelism);
231      if (similarity < MinimumPhenotypicSimilarity.Value) {
232        ChangedTreesParameter.ActualValue = new IntValue(0);
233        return base.Apply();
234      }
235
236      int n = (int)Math.Floor(matchingIndividuals.Count * ReplacementRatio.Value);
237      var individualsToReplace = RandomReplacement.Value ? matchingIndividuals.SampleRandomWithoutRepetition(random, n).ToList()
238                                                         : matchingIndividuals.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n).ToList();
239      var mutationOc = new OperationCollection { Parallel = false }; // cannot be parallel due to the before/after operators which insert vertices in the genealogy graph
240      var updateEstimatedValues = new OperationCollection { Parallel = true }; // evaluation should be done in parallel when possible
241      foreach (var ind in individualsToReplace) {
242        var mutatorOp = ExecutionContext.CreateChildOperation(mutator, ind);
243        var updateOp = ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, ind);
244        mutationOc.Add(mutatorOp);
245        updateEstimatedValues.Add(updateOp);
246      }
247      ChangedTreesParameter.ActualValue = new IntValue(individualsToReplace.Count);
248      return new OperationCollection(mutationOc, updateEstimatedValues, base.Apply());
249    }
250
251    private static double CalculatePhenotypicSimilarity(ScopeList individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator, bool parallel = false, int nThreads = -1) {
252      double similarity = 0;
253      int count = individuals.Count;
254      if (parallel) {
255        var parallelOptions = new ParallelOptions { MaxDegreeOfParallelism = nThreads };
256        var simArray = new double[count - 1];
257        Parallel.For(0, count - 1, parallelOptions, i => {
258          double innerSim = 0;
259          for (int j = i + 1; j < count; ++j) {
260            innerSim += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
261          }
262          simArray[i] = innerSim;
263        });
264        similarity = simArray.Sum();
265      } else {
266        for (int i = 0; i < count - 1; ++i) {
267          for (int j = i + 1; j < count; ++j) {
268            similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
269          }
270        }
271      }
272      return similarity / (count * (count - 1) / 2.0);
273    }
274  }
275}
Note: See TracBrowser for help on using the repository browser.