source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs @ 13496

Last change on this file since 13496 was 13496, checked in by bburlacu, 5 years ago

#1772: Update diversification operators (add extra evaluations to the evaluation counter, add the option for strict schema matching, add additional replacement ratio update rules.

File size: 15.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.EvolutionTracking;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
35  [StorableClass]
36  public class SchemaCreator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
37    #region parameter names
38    private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
39    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
40    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
41    private const string ReplacementRatioParameterName = "ReplacementRatio";
42    private const string RandomReplacementParameterName = "RandomReplacement";
43    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
44    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
45    private const string PercentageOfPopulationParameterName = "PercentageOfPopulationToDiversify";
46    private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
47    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
48    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
49    private const string NumberOfSchemasParameterName = "NumberOfSchemas";
50    private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
51    private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
52    private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule";
53    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
54    #endregion
55
56    #region parameters
57
58    public IConstrainedValueParameter<StringValue> ReplacementRatioUpdateRuleParameter {
59      get { return (IConstrainedValueParameter<StringValue>)Parameters[ReplacementRatioUpdateRuleParameterName]; }
60    }
61    public IFixedValueParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
62      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
63    }
64    public IFixedValueParameter<BoolValue> StrictSchemaMatchingParameter {
65      get { return (IFixedValueParameter<BoolValue>)Parameters[StrictSchemaMatchingParameterName]; }
66    }
67    public IFixedValueParameter<BoolValue> ExclusiveMatchingParameter {
68      get { return (IFixedValueParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
69    }
70    public IFixedValueParameter<BoolValue> ScaleEstimatedValuesParameter {
71      get { return (IFixedValueParameter<BoolValue>)Parameters[ScaleEstimatedValuesParameterName]; }
72    }
73    public IFixedValueParameter<PercentValue> PercentageOfPopulationParameter {
74      get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageOfPopulationParameterName]; }
75    }
76    public IFixedValueParameter<IntValue> MinimumSchemaLengthParameter {
77      get { return (IFixedValueParameter<IntValue>)Parameters[MinimumSchemaLengthParameterName]; }
78    }
79    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
80      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
81    }
82    public IFixedValueParameter<IntValue> MaxDegreeOfParallelismParameter {
83      get { return (IFixedValueParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
84    }
85    public IFixedValueParameter<PercentValue> MinimumSchemaFrequencyParameter {
86      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
87    }
88    public IFixedValueParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
89      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
90    }
91    public IFixedValueParameter<PercentValue> ReplacementRatioParameter {
92      get { return (IFixedValueParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
93    }
94    public IValueParameter<IntValue> NumberOfSchemasParameter {
95      get { return (IValueParameter<IntValue>)Parameters[NumberOfSchemasParameterName]; }
96    }
97    public IValueParameter<DoubleValue> AverageSchemaLengthParameter {
98      get { return (IValueParameter<DoubleValue>)Parameters[AverageSchemaLengthParameterName]; }
99    }
100    public IValueParameter<IntValue> NumberOfChangedTreesParameter {
101      get { return (IValueParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
102    }
103    #endregion
104
105    #region parameter properties
106    public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
107    public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
108    public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
109    public double PercentageOfPopulation { get { return PercentageOfPopulationParameter.Value.Value; } }
110    public bool StrictSchemaMatching { get { return StrictSchemaMatchingParameter.Value.Value; } }
111    #endregion
112
113    private UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
114    private DiversificationStatisticsOperator diversificationStatisticsOperator;
115
116    public override void ClearState() {
117      NumberOfChangedTreesParameter.Value.Value = 0;
118      NumberOfChangedTreesParameter.Value.Value = 0;
119      AverageSchemaLengthParameter.Value.Value = 0;
120      base.ClearState();
121    }
122
123    public SchemaCreator() {
124      #region add parameters
125      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
126      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
127      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
128      Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.9)));
129      Parameters.Add(new FixedValueParameter<PercentValue>(PercentageOfPopulationParameterName, new PercentValue(1)));
130      Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(false)));
131      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(false)));
132      Parameters.Add(new FixedValueParameter<IntValue>(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
133      Parameters.Add(new FixedValueParameter<BoolValue>(ScaleEstimatedValuesParameterName, new BoolValue(true)));
134      Parameters.Add(new FixedValueParameter<BoolValue>(ExclusiveMatchingParameterName, new BoolValue(false)));
135      Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(false)));
136      Parameters.Add(new ValueParameter<IntValue>(NumberOfChangedTreesParameterName, new IntValue(0)));
137      Parameters.Add(new ValueParameter<IntValue>(NumberOfSchemasParameterName, new IntValue(0)));
138      Parameters.Add(new ValueParameter<DoubleValue>(AverageSchemaLengthParameterName, new DoubleValue(0)));
139      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName, new BoolValue(true)));
140
141      var replacementRatioUpdateRules = new ItemSet<StringValue>(new[] {
142        new StringValue("f(x) = x"),
143        new StringValue("f(x) = tanh(x)"),
144        new StringValue("f(x) = tanh(2x)"),
145        new StringValue("f(x) = tanh(3x)"),
146        new StringValue("f(x) = tanh(4x)"),
147        new StringValue("f(x) = 1-sqrt(1-x)")
148      });
149      Parameters.Add(new ConstrainedValueParameter<StringValue>(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules));
150      #endregion
151      NumberOfChangedTreesParameter.Hidden = true;
152      NumberOfSchemasParameter.Hidden = true;
153      AverageSchemaLengthParameter.Hidden = true;
154
155      ExecuteInParallelParameter.Hidden = true;
156      MaxDegreeOfParallelismParameter.Hidden = true;
157    }
158
159    protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { }
160
161    public override IDeepCloneable Clone(Cloner cloner) {
162      return new SchemaCreator(this, cloner);
163    }
164
165    [StorableConstructor]
166    protected SchemaCreator(bool deserializing) : base(deserializing) { }
167
168
169    [StorableHook(HookType.AfterDeserialization)]
170    private void AfterDeserialization() {
171      if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName))
172        Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(false)));
173    }
174
175    public override IOperation Apply() {
176      // apply only when at least one generation has passed
177      var gen = Generations.Value;
178      if (gen < 1 || GenealogyGraph == null)
179        return base.Apply();
180
181      var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation);
182
183      var updateEstimatedValues = new OperationCollection { Parallel = true };
184      if (updateEstimatedValuesOperator == null)
185        updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
186
187      foreach (var s in ExecutionContext.Scope.SubScopes.Where(s => !s.Variables.ContainsKey("EstimatedValues"))) {
188        updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s));
189      }
190
191      Func<double, double> rule;
192      var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value;
193
194      switch (replacementRule) {
195        case "f(x) = x": {
196            rule = x => x;
197            break;
198          }
199        case "f(x) = tanh(x)": {
200            rule = x => Math.Tanh(x);
201            break;
202          }
203        case "f(x) = tanh(2x)": {
204            rule = x => Math.Tanh(2 * x);
205            break;
206          }
207        case "f(x) = tanh(3x)": {
208            rule = x => Math.Tanh(3 * x);
209            break;
210          }
211        case "f(x) = tanh(4x)": {
212            rule = x => Math.Tanh(4 * x);
213            break;
214          }
215        case "f(x) = 1-sqrt(1-x)": {
216            rule = x => 1 - Math.Sqrt(1 - x);
217            break;
218          }
219        default:
220          throw new ArgumentException("Unknown replacement rule");
221      }
222
223      var evaluateSchemas = new OperationCollection();
224
225      // for now, only consider crossover offspring
226      var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.OrderByDescending(x => ((DoubleValue)x.Variables["Quality"].Value).Value).Take(n));
227      var vertices = from s in scopes
228                     let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
229                     let v = GenealogyGraph.GetByContent(t)
230                     where v.InDegree == 2
231                     select v;
232
233      var schemas = new List<ISymbolicExpressionTree>(GenerateSchemas(vertices, MinimumSchemaLength));
234
235      #region create schemas and add subscopes representing the individuals
236      foreach (var schema in schemas) {
237        evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = rule }, ExecutionContext.Scope));
238      }
239      #endregion
240
241      if (diversificationStatisticsOperator == null)
242        diversificationStatisticsOperator = new DiversificationStatisticsOperator();
243
244      var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
245
246      // set parameters for statistics
247      AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
248      NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
249      NumberOfChangedTreesParameter.Value = new IntValue(0);
250
251      // return an operation collection containing all the scope operations + base.Apply()
252      return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
253    }
254
255    public static IEnumerable<ISymbolicExpressionTree> GenerateSchemas(IEnumerable<IGenealogyGraphNode<ISymbolicExpressionTree>> vertices, int minimumSchemaLength) {
256      var anySubtreeSymbol = new AnySubtreeSymbol();
257      //            var anyNodeSymbol = new AnyNodeSymbol();
258      var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList();
259      var hash = new HashSet<string>();
260      var formatter = new SymbolicExpressionTreeStringFormatter { Indent = false, AppendNewLines = false };
261      foreach (var g in groups) {
262        var parent = g.Key;
263        if (parent.Data.Length < minimumSchemaLength)
264          continue;
265        bool replaced = false;
266        var schema = (ISymbolicExpressionTree)parent.Data.Clone();
267        var nodes = schema.IterateNodesPrefix().ToList();
268        var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null);
269        var indices = (from arc in arcs
270                       let fragment = (IFragment<ISymbolicExpressionTreeNode>)arc.Data
271                       select fragment.Index1).Distinct().ToArray();
272        Array.Sort(indices);
273        var nodesToReplace = indices.Select(x => nodes[x]).ToList();
274        for (int i = nodesToReplace.Count - 1; i >= 0; --i) {
275          var node = nodesToReplace[i];
276
277          // do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength
278          if (schema.Length - node.GetLength() + 1 < minimumSchemaLength)
279            continue;
280
281          var replacement = anySubtreeSymbol.CreateTreeNode();
282          ReplaceSubtree(node, replacement, false);
283          //          var replacement = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MinimumArity).CreateTreeNode();
284          //          ReplaceSubtree(node, replacement, true);
285          replaced = true;
286        }
287        if (replaced) {
288          var str = formatter.Format(schema.Root.GetSubtree(0).GetSubtree(0));
289          if (hash.Contains(str)) continue;
290          yield return schema;
291          hash.Add(str);
292        }
293      }
294    }
295
296    private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, bool preserveChildren = true) {
297      var parent = original.Parent;
298      if (parent == null)
299        throw new ArgumentException("Parent cannot be null for node " + original);
300      var index = parent.IndexOfSubtree(original);
301      parent.RemoveSubtree(index);
302      parent.InsertSubtree(index, replacement);
303
304      if (preserveChildren) {
305        var subtrees = original.Subtrees.ToList();
306
307        for (int i = subtrees.Count - 1; i >= 0; --i)
308          original.RemoveSubtree(i);
309
310        for (int i = 0; i < subtrees.Count; ++i) {
311          replacement.AddSubtree(subtrees[i]);
312        }
313      }
314    }
315  }
316}
Note: See TracBrowser for help on using the repository browser.