Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs @ 12988

Last change on this file since 12988 was 12988, checked in by bburlacu, 9 years ago

#1772: Performance improvement changes

  • QueryMatch.cs: eliminate unnecessary ToList() call and expensive GetBranchLevel calls
  • Diversification: eliminated creation of shallow copies of the individual subscopes as it was either too slow (due to events being registered/deregistered when variables are added to the scope) or too leaky (if attempting to clear the scopes without clearing the variables then the code is leaking EventHandlers)
  • Aggregated diversification statistics separately with the help of some parameters set up in the SchemaCreator and SchemaEvaluator
  • Made code in the UpdateEstimatedValuesOperator perform exactly as in the evaluator (updating quality and estimated values)
  • Removed no longer needed SchemaCleanupOperator
  • Do not evaluate intermediate vertices in the genealogy analyzer if the TrimOlderGenerations flag is activated

New functionality:

  • parameter to control the fraction of the population to be considered by the diversification strategy
  • parameter to control whether individuals may be matched by any schema and mutated only once (exclusive matching)
  • parameter to control whether linear scaling should be applied to the estimated values used for the calculation of phenotypic similarity (default: yes)
File size: 11.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.EvolutionTracking;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
35  [StorableClass]
36  public class SchemaCreator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
37    private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
38    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
39    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
40    private const string ReplacementRatioParameterName = "ReplacementRatio";
41    private const string RandomReplacementParameterName = "RandomReplacement";
42    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
43    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
44    private const string PercentageOfPopulationParameterName = "PercentageOfPopulationToDiversify";
45    private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
46    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
47    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
48    private const string NumberOfSchemasParameterName = "NumberOfSchemas";
49    private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
50
51    #region parameters
52    public IFixedValueParameter<BoolValue> ExclusiveMatchingParameter {
53      get { return (IFixedValueParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
54    }
55    public IFixedValueParameter<BoolValue> ScaleEstimatedValuesParameter {
56      get { return (IFixedValueParameter<BoolValue>)Parameters[ScaleEstimatedValuesParameterName]; }
57    }
58    public IFixedValueParameter<PercentValue> PercentageOfPopulationParameter {
59      get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageOfPopulationParameterName]; }
60    }
61    public IFixedValueParameter<IntValue> MinimumSchemaLengthParameter {
62      get { return (IFixedValueParameter<IntValue>)Parameters[MinimumSchemaLengthParameterName]; }
63    }
64    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
65      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
66    }
67    public IFixedValueParameter<IntValue> MaxDegreeOfParallelismParameter {
68      get { return (IFixedValueParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
69    }
70    public IFixedValueParameter<PercentValue> MinimumSchemaFrequencyParameter {
71      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
72    }
73    public IFixedValueParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
74      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
75    }
76    public IFixedValueParameter<PercentValue> ReplacementRatioParameter {
77      get { return (IFixedValueParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
78    }
79    public IValueParameter<IntValue> NumberOfSchemasParameter {
80      get { return (IValueParameter<IntValue>)Parameters[NumberOfSchemasParameterName]; }
81    }
82    public IValueParameter<DoubleValue> AverageSchemaLengthParameter {
83      get { return (IValueParameter<DoubleValue>)Parameters[AverageSchemaLengthParameterName]; }
84    }
85    public IValueParameter<IntValue> NumberOfChangedTreesParameter {
86      get { return (IValueParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
87    }
88    #endregion
89
90    #region parameter properties
91    public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
92    public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
93    public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
94    public double PercentageOfPopulation { get { return PercentageOfPopulationParameter.Value.Value; } }
95    #endregion
96
97    private UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
98    private DiversificationStatisticsOperator diversificationStatisticsOperator;
99
100    public SchemaCreator() {
101      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
102      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
103      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
104      Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.9)));
105      Parameters.Add(new FixedValueParameter<PercentValue>(PercentageOfPopulationParameterName, new PercentValue(1)));
106      Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(false)));
107      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(false)));
108      Parameters.Add(new FixedValueParameter<IntValue>(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
109      Parameters.Add(new FixedValueParameter<BoolValue>(ScaleEstimatedValuesParameterName, new BoolValue(true)));
110      Parameters.Add(new FixedValueParameter<BoolValue>(ExclusiveMatchingParameterName, new BoolValue(false)));
111      Parameters.Add(new ValueParameter<IntValue>(NumberOfChangedTreesParameterName, new IntValue(0)));
112      Parameters.Add(new ValueParameter<IntValue>(NumberOfSchemasParameterName, new IntValue(0)));
113      Parameters.Add(new ValueParameter<DoubleValue>(AverageSchemaLengthParameterName, new DoubleValue(0)));
114
115      NumberOfChangedTreesParameter.Hidden = true;
116      NumberOfSchemasParameter.Hidden = true;
117      AverageSchemaLengthParameter.Hidden = true;
118
119      ExecuteInParallelParameter.Hidden = true;
120      MaxDegreeOfParallelismParameter.Hidden = true;
121    }
122
123    protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { }
124
125    public override IDeepCloneable Clone(Cloner cloner) {
126      return new SchemaCreator(this, cloner);
127    }
128
129    [StorableConstructor]
130    protected SchemaCreator(bool deserializing) : base(deserializing) { }
131
132    public override IOperation Apply() {
133      // apply only when at least one generation has passed
134      var gen = Generations.Value;
135      if (gen < 1 || GenealogyGraph == null)
136        return base.Apply();
137
138      var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation);
139      var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.Take(n));
140
141      // for now, only consider crossover offspring
142      var vertices = from s in scopes
143                     let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
144                     let v = GenealogyGraph.GetByContent(t)
145                     where v.InDegree == 2
146                     select v;
147
148      var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList();
149      var anySubtreeSymbol = new AnySubtreeSymbol();
150
151      var updateEstimatedValues = new OperationCollection { Parallel = true };
152      if (updateEstimatedValuesOperator == null)
153        updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
154
155      foreach (var s in scopes) {
156        if (!s.Variables.ContainsKey("EstimatedValues"))
157          s.Variables.Add(new Core.Variable("EstimatedValues"));
158        updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s));
159      }
160
161      var evaluateSchemas = new OperationCollection();
162      var schemas = new List<ISymbolicExpressionTree>();
163      var wildCardCounts = new List<double>();
164      #region create schemas and add subscopes representing the individuals
165      foreach (var g in groups) {
166        var parent = g.Key;
167        if (parent.Data.Length < MinimumSchemaLength)
168          continue;
169        bool replaced = false;
170        var schema = (ISymbolicExpressionTree)parent.Data.Clone();
171        var nodes = schema.IterateNodesPrefix().ToList();
172        var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null);
173        var indices = arcs.Select(x => ((IFragment<ISymbolicExpressionTreeNode>)x.Data).Index1).Distinct().ToArray();
174        Array.Sort(indices);
175        var nodesToReplace = indices.Select(x => nodes[x]).ToList();
176        int w = 0;
177        for (int i = nodesToReplace.Count - 1; i >= 0; --i) {
178          var node = nodesToReplace[i];
179
180          // do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength
181          if ((schema.Length - node.GetLength() + 1) < MinimumSchemaLength)
182            continue;
183
184          var replacement = anySubtreeSymbol.CreateTreeNode();
185          ReplaceSubtree(node, replacement, false);
186          replaced = true;
187          ++w;
188        }
189        if (replaced) {
190          // store the schemas and the number of wildcards they contain in two lists
191          schemas.Add(schema);
192          wildCardCounts.Add(w);
193        }
194      }
195
196      for (int i = 0; i < schemas.Count; ++i) {
197        var schema = schemas[i];
198        evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema }, ExecutionContext.Scope));
199      }
200      #endregion
201
202      if (diversificationStatisticsOperator == null)
203        diversificationStatisticsOperator = new DiversificationStatisticsOperator();
204
205      var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
206
207      // set parameters for statistics
208      AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
209      NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
210      NumberOfChangedTreesParameter.Value = new IntValue(0);
211
212      // return an operation collection containing all the scope operations + base.Apply()
213      return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
214    }
215
216    private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement,
217      bool preserveChildren = true) {
218      var parent = original.Parent;
219      if (parent == null)
220        throw new ArgumentException("Parent cannot be null for node " + original);
221      var index = parent.IndexOfSubtree(original);
222      parent.RemoveSubtree(index);
223      parent.InsertSubtree(index, replacement);
224
225      if (preserveChildren) {
226        var subtrees = original.Subtrees.ToList();
227
228        for (int i = subtrees.Count - 1; i >= 0; --i)
229          original.RemoveSubtree(i);
230
231        for (int i = 0; i < subtrees.Count; ++i) {
232          replacement.AddSubtree(subtrees[i]);
233        }
234      }
235    }
236  }
237}
Note: See TracBrowser for help on using the repository browser.