source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs @ 15482

Last change on this file since 15482 was 15482, checked in by bburlacu, 5 years ago

#1772: Add the option to specify a separate mutation operation for schema diversification (independent of the one used in the algorithm main loop). Refactor duplicated code for schema generation in SchemaCreator.cs.

File size: 18.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.EvolutionTracking;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32using HeuristicLab.PluginInfrastructure;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
35  using Vertices = IEnumerable<IGenealogyGraphNode<ISymbolicExpressionTree>>;
36
37  [Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
38  [StorableClass]
39  public class SchemaCreator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
40    #region parameter names
41    private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
42    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
43    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
44    private const string ReplacementRatioParameterName = "ReplacementRatio";
45    private const string RandomReplacementParameterName = "RandomReplacement";
46    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
47    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
48    private const string PercentageOfPopulationParameterName = "PercentageOfPopulationToDiversify";
49    private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
50    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
51    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
52    private const string NumberOfSchemasParameterName = "NumberOfSchemas";
53    private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
54    private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
55    private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule";
56    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
57    private const string SchemaDefinitionParameterName = "SchemaDefinition";
58    private const string SchemaManipulatorParameterName = "SchemaManipulator";
59    #endregion
60
61    #region parameters
62    public IConstrainedValueParameter<ISymbolicExpressionTreeManipulator> SchemaManipulatorParameter {
63      get { return (IConstrainedValueParameter<ISymbolicExpressionTreeManipulator>)Parameters[SchemaManipulatorParameterName]; }
64    }
65    public IConstrainedValueParameter<StringValue> SchemaDefinitionParameter {
66      get { return (IConstrainedValueParameter<StringValue>)Parameters[SchemaDefinitionParameterName]; }
67    }
68    public IConstrainedValueParameter<StringValue> ReplacementRatioUpdateRuleParameter {
69      get { return (IConstrainedValueParameter<StringValue>)Parameters[ReplacementRatioUpdateRuleParameterName]; }
70    }
71    public IFixedValueParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
72      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
73    }
74    public IFixedValueParameter<BoolValue> StrictSchemaMatchingParameter {
75      get { return (IFixedValueParameter<BoolValue>)Parameters[StrictSchemaMatchingParameterName]; }
76    }
77    public IFixedValueParameter<BoolValue> ExclusiveMatchingParameter {
78      get { return (IFixedValueParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
79    }
80    public IFixedValueParameter<BoolValue> ScaleEstimatedValuesParameter {
81      get { return (IFixedValueParameter<BoolValue>)Parameters[ScaleEstimatedValuesParameterName]; }
82    }
83    public IFixedValueParameter<PercentValue> PercentageOfPopulationParameter {
84      get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageOfPopulationParameterName]; }
85    }
86    public IFixedValueParameter<IntValue> MinimumSchemaLengthParameter {
87      get { return (IFixedValueParameter<IntValue>)Parameters[MinimumSchemaLengthParameterName]; }
88    }
89    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
90      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
91    }
92    public IFixedValueParameter<IntValue> MaxDegreeOfParallelismParameter {
93      get { return (IFixedValueParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
94    }
95    public IFixedValueParameter<PercentValue> MinimumSchemaFrequencyParameter {
96      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
97    }
98    public IFixedValueParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
99      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
100    }
101    public IFixedValueParameter<PercentValue> ReplacementRatioParameter {
102      get { return (IFixedValueParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
103    }
104    public IValueParameter<IntValue> NumberOfSchemasParameter {
105      get { return (IValueParameter<IntValue>)Parameters[NumberOfSchemasParameterName]; }
106    }
107    public IValueParameter<DoubleValue> AverageSchemaLengthParameter {
108      get { return (IValueParameter<DoubleValue>)Parameters[AverageSchemaLengthParameterName]; }
109    }
110    public IValueParameter<IntValue> NumberOfChangedTreesParameter {
111      get { return (IValueParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
112    }
113    #endregion
114
115    #region parameter properties
116    public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
117    public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
118    public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
119    public double PercentageOfPopulation { get { return PercentageOfPopulationParameter.Value.Value; } }
120    public bool StrictSchemaMatching { get { return StrictSchemaMatchingParameter.Value.Value; } }
121    #endregion
122
123    private UpdateQualityOperator updateQualityOperator;
124    private DiversificationStatisticsOperator diversificationStatisticsOperator;
125
126    public override void ClearState() {
127      NumberOfChangedTreesParameter.Value.Value = 0;
128      NumberOfChangedTreesParameter.Value.Value = 0;
129      AverageSchemaLengthParameter.Value.Value = 0;
130      base.ClearState();
131    }
132
133    public SchemaCreator() {
134      #region add parameters
135      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
136      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
137      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
138      Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.9)));
139      Parameters.Add(new FixedValueParameter<PercentValue>(PercentageOfPopulationParameterName, new PercentValue(1)));
140      Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(false)));
141      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(false)));
142      Parameters.Add(new FixedValueParameter<IntValue>(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
143      Parameters.Add(new FixedValueParameter<BoolValue>(ScaleEstimatedValuesParameterName, new BoolValue(true)));
144      Parameters.Add(new FixedValueParameter<BoolValue>(ExclusiveMatchingParameterName, new BoolValue(false)));
145      Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(false)));
146      Parameters.Add(new ValueParameter<IntValue>(NumberOfChangedTreesParameterName, new IntValue(0)));
147      Parameters.Add(new ValueParameter<IntValue>(NumberOfSchemasParameterName, new IntValue(0)));
148      Parameters.Add(new ValueParameter<DoubleValue>(AverageSchemaLengthParameterName, new DoubleValue(0)));
149      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName, new BoolValue(true)));
150
151      // add update rules
152      var replacementRatioUpdateRules = new ItemSet<StringValue>(new[] {
153        new StringValue("f(x) = x"),
154        new StringValue("f(x) = tanh(x)"),
155        new StringValue("f(x) = tanh(2x)"),
156        new StringValue("f(x) = tanh(3x)"),
157        new StringValue("f(x) = tanh(4x)"),
158        new StringValue("f(x) = 1-sqrt(1-x)")
159      });
160      var replacementRatioUpdateRuleParameter = new ConstrainedValueParameter<StringValue>(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules);
161      replacementRatioUpdateRuleParameter.Value = replacementRatioUpdateRules.First();
162      Parameters.Add(replacementRatioUpdateRuleParameter);
163
164      // add schema definitions
165      var schemaDefinitions = new ItemSet<StringValue>(new[] { "=", "#", "=,#" }.Select(x => new StringValue(x)));
166      var schemaDefinitionParameter = new ConstrainedValueParameter<StringValue>(SchemaDefinitionParameterName, schemaDefinitions);
167      schemaDefinitionParameter.Value = schemaDefinitions.First();
168      Parameters.Add(schemaDefinitionParameter);
169
170      // use a separate set of manipulators in order to allow the user to specify different mutations for schemas
171      // and not be limited to the manipulator parameter for the whole algorithm
172      var manipulators = ApplicationManager.Manager.GetTypes(typeof(ISymbolicExpressionTreeManipulator))
173                                           .Where(x => !typeof(IMultiOperator<ISymbolicExpressionTreeManipulator>).IsAssignableFrom(x)
174                                                       && !typeof(ISymbolicExpressionTreeArchitectureAlteringOperator).IsAssignableFrom(x))
175                                           .Select(x => (ISymbolicExpressionTreeManipulator)Activator.CreateInstance(x)).ToList();
176      manipulators.Add(new MultiSymbolicExpressionTreeManipulator());
177
178      // add individual manipulators
179      var manipulatorParameter = new ConstrainedValueParameter<ISymbolicExpressionTreeManipulator>(SchemaManipulatorParameterName, new ItemSet<ISymbolicExpressionTreeManipulator>(manipulators));
180      // add a multi manipulator as well
181      manipulatorParameter.Value = manipulators.First();
182      Parameters.Add(manipulatorParameter);
183      #endregion
184
185      NumberOfChangedTreesParameter.Hidden = true;
186      NumberOfSchemasParameter.Hidden = true;
187      AverageSchemaLengthParameter.Hidden = true;
188
189      ExecuteInParallelParameter.Hidden = true;
190      MaxDegreeOfParallelismParameter.Hidden = true;
191    }
192
193    protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { }
194
195    public override IDeepCloneable Clone(Cloner cloner) {
196      return new SchemaCreator(this, cloner);
197    }
198
199    [StorableConstructor]
200    protected SchemaCreator(bool deserializing) : base(deserializing) { }
201
202
203    [StorableHook(HookType.AfterDeserialization)]
204    private void AfterDeserialization() {
205      if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName))
206        Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(false)));
207    }
208
209    public override IOperation Apply() {
210      // apply only when at least one generation has passed
211      var gen = Generations.Value;
212      if (gen < 1 || GenealogyGraph == null)
213        return base.Apply();
214
215      var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation);
216
217      var updateEstimatedValues = new OperationCollection { Parallel = true };
218      if (updateQualityOperator == null)
219        updateQualityOperator = new UpdateQualityOperator();
220
221      foreach (var s in ExecutionContext.Scope.SubScopes.Where(s => !s.Variables.ContainsKey("EstimatedValues"))) {
222        updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateQualityOperator, s));
223      }
224
225      var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value;
226      var schemaManipulator = SchemaManipulatorParameter.Value;
227
228      var evaluateSchemas = new OperationCollection();
229
230      // for now, only consider crossover offspring
231      var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.OrderByDescending(x => ((DoubleValue)x.Variables["Quality"].Value).Value).Take(n));
232      var vertices = from s in scopes
233                     let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
234                     let v = GenealogyGraph.GetByContent(t)
235                     where v.InDegree == 2
236                     select v;
237
238      List<ISymbolicExpressionTree> schemas;
239      switch (SchemaDefinitionParameter.Value.Value) {
240        case "=":
241          schemas = GenerateAnyNodeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
242          break;
243        case "#":
244          schemas = GenerateAnySubtreeSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
245          break;
246        case "=,#":
247          schemas = GenerateCombinedSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
248          break;
249        default:
250          return base.Apply();
251      }
252
253      if (!schemas.Any())
254        return base.Apply();
255
256      #region create schemas and add subscopes representing the individuals
257      foreach (var schema in schemas) {
258        evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = replacementRule, SchemaManipulator = schemaManipulator }, ExecutionContext.Scope));
259      }
260      #endregion
261
262      if (diversificationStatisticsOperator == null)
263        diversificationStatisticsOperator = new DiversificationStatisticsOperator();
264
265      var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
266
267      // set parameters for statistics
268      AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
269      NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
270      NumberOfChangedTreesParameter.Value = new IntValue(0);
271
272      // return an operation collection containing all the scope operations + base.Apply()
273      return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
274    }
275
276    #region schema generation
277    public static IEnumerable<ISymbolicExpressionTree> GenerateAnyNodeSchemas(Vertices vertices, int minimumSchemaLength, int minOffspringCount = 1, bool strict = true) {
278      return GenerateSchemas(vertices, ReplaceAnyNode, minimumSchemaLength, minOffspringCount, strict);
279    }
280
281    public static IEnumerable<ISymbolicExpressionTree> GenerateAnySubtreeSchemas(Vertices vertices, int minimumSchemaLength, int minOffspringCount = 1, bool strict = true) {
282      return GenerateSchemas(vertices, ReplaceAnySubtree, minimumSchemaLength, minOffspringCount, strict);
283    }
284
285    public static IEnumerable<ISymbolicExpressionTree> GenerateCombinedSchemas(Vertices vertices, int minimumSchemaLength, int minOffspringCount = 1, bool strict = true) {
286      return GenerateSchemas(vertices, ReplaceCombined, minimumSchemaLength, minOffspringCount, strict);
287    }
288
289    public static IEnumerable<ISymbolicExpressionTree> GenerateSchemas(Vertices vertices, Func<ISymbolicExpressionTree, ISymbolicExpressionTreeNode, int, bool> replaceFunc, int minimumSchemaLength, int minOffspringCount, bool strict = true) {
290      var anySubtreeSymbol = new AnySubtreeSymbol();
291      var groups = vertices.GroupBy(x => x.Parents.First()).Where(g => g.Skip(minOffspringCount - 1).Any()).OrderByDescending(g => g.Count());
292      var hash = new HashSet<string>();
293      foreach (var g in groups) {
294        var parent = g.Key;
295        if (parent.Data.Length < minimumSchemaLength)
296          continue;
297        bool replaced = false;
298        var schema = (ISymbolicExpressionTree)parent.Data.Clone();
299        var nodes = schema.IterateNodesPrefix().ToList();
300        var fragments = g.Select(x => x.InArcs.Last().Data).Where(x => x != null).Cast<IFragment<ISymbolicExpressionTreeNode>>();
301        var indices = fragments.Select(x => x.Index1).Distinct().OrderByDescending(x => schema.Root.GetBranchLevel(nodes[x]));
302        foreach (var i in indices) {
303          replaced |= replaceFunc(schema, nodes[i], minimumSchemaLength);
304        }
305        if (replaced) {
306          var str = schema.Root.GetSubtree(0).GetSubtree(0).FormatToString(strict);
307          if (hash.Add(str))
308            yield return schema;
309        }
310      }
311    }
312
313    // node replacement rules
314    private static bool ReplaceAnyNode(ISymbolicExpressionTree schema, ISymbolicExpressionTreeNode node, int minSchemaLength) {
315      var anyNodeSymbol = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MaximumArity);
316      var replacement = anyNodeSymbol.CreateTreeNode();
317      SchemaUtil.ReplaceSubtree(node, replacement, true);
318
319      return true;
320    }
321
322    private static bool ReplaceAnySubtree(ISymbolicExpressionTree schema, ISymbolicExpressionTreeNode node, int minSchemaLength) {
323      if (schema.Length - node.GetLength() + 1 < minSchemaLength)
324        return false;
325
326      var anySubtreeSymbol = new AnySubtreeSymbol();
327      var replacement = anySubtreeSymbol.CreateTreeNode();
328
329      SchemaUtil.ReplaceSubtree(node, replacement, false);
330      return true;
331    }
332
333    private static bool ReplaceCombined(ISymbolicExpressionTree schema, ISymbolicExpressionTreeNode node, int minSchemaLength) {
334      ISymbol wildcard;
335      if (node.SubtreeCount > 0)
336        wildcard = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MaximumArity);
337      else
338        wildcard = new AnySubtreeSymbol();
339
340      SchemaUtil.ReplaceSubtree(node, wildcard.CreateTreeNode(), node.SubtreeCount > 0);
341      return true;
342    }
343    #endregion
344  }
345}
Note: See TracBrowser for help on using the repository browser.