Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs @ 13693

Last change on this file since 13693 was 13625, checked in by bburlacu, 9 years ago

#1772: Small refactor of the SchemaCreator to avoid generating duplicate schemas when non-strict matching is used.

File size: 16.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.EvolutionTracking;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
35  [Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
36  [StorableClass]
37  public class SchemaCreator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
38    #region parameter names
39    private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
40    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
41    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
42    private const string ReplacementRatioParameterName = "ReplacementRatio";
43    private const string RandomReplacementParameterName = "RandomReplacement";
44    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
45    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
46    private const string PercentageOfPopulationParameterName = "PercentageOfPopulationToDiversify";
47    private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
48    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
49    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
50    private const string NumberOfSchemasParameterName = "NumberOfSchemas";
51    private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
52    private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
53    private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule";
54    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
55    #endregion
56
57    #region parameters
58
59    public IConstrainedValueParameter<StringValue> ReplacementRatioUpdateRuleParameter {
60      get { return (IConstrainedValueParameter<StringValue>)Parameters[ReplacementRatioUpdateRuleParameterName]; }
61    }
62    public IFixedValueParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
63      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
64    }
65    public IFixedValueParameter<BoolValue> StrictSchemaMatchingParameter {
66      get { return (IFixedValueParameter<BoolValue>)Parameters[StrictSchemaMatchingParameterName]; }
67    }
68    public IFixedValueParameter<BoolValue> ExclusiveMatchingParameter {
69      get { return (IFixedValueParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
70    }
71    public IFixedValueParameter<BoolValue> ScaleEstimatedValuesParameter {
72      get { return (IFixedValueParameter<BoolValue>)Parameters[ScaleEstimatedValuesParameterName]; }
73    }
74    public IFixedValueParameter<PercentValue> PercentageOfPopulationParameter {
75      get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageOfPopulationParameterName]; }
76    }
77    public IFixedValueParameter<IntValue> MinimumSchemaLengthParameter {
78      get { return (IFixedValueParameter<IntValue>)Parameters[MinimumSchemaLengthParameterName]; }
79    }
80    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
81      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
82    }
83    public IFixedValueParameter<IntValue> MaxDegreeOfParallelismParameter {
84      get { return (IFixedValueParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
85    }
86    public IFixedValueParameter<PercentValue> MinimumSchemaFrequencyParameter {
87      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
88    }
89    public IFixedValueParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
90      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
91    }
92    public IFixedValueParameter<PercentValue> ReplacementRatioParameter {
93      get { return (IFixedValueParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
94    }
95    public IValueParameter<IntValue> NumberOfSchemasParameter {
96      get { return (IValueParameter<IntValue>)Parameters[NumberOfSchemasParameterName]; }
97    }
98    public IValueParameter<DoubleValue> AverageSchemaLengthParameter {
99      get { return (IValueParameter<DoubleValue>)Parameters[AverageSchemaLengthParameterName]; }
100    }
101    public IValueParameter<IntValue> NumberOfChangedTreesParameter {
102      get { return (IValueParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
103    }
104    #endregion
105
106    #region parameter properties
107    public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
108    public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
109    public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
110    public double PercentageOfPopulation { get { return PercentageOfPopulationParameter.Value.Value; } }
111    public bool StrictSchemaMatching { get { return StrictSchemaMatchingParameter.Value.Value; } }
112    #endregion
113
114    private UpdateQualityOperator updateQualityOperator;
115    private DiversificationStatisticsOperator diversificationStatisticsOperator;
116
117    public override void ClearState() {
118      NumberOfChangedTreesParameter.Value.Value = 0;
119      NumberOfChangedTreesParameter.Value.Value = 0;
120      AverageSchemaLengthParameter.Value.Value = 0;
121      base.ClearState();
122    }
123
124    public SchemaCreator() {
125      #region add parameters
126      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
127      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
128      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
129      Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.9)));
130      Parameters.Add(new FixedValueParameter<PercentValue>(PercentageOfPopulationParameterName, new PercentValue(1)));
131      Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(false)));
132      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(false)));
133      Parameters.Add(new FixedValueParameter<IntValue>(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
134      Parameters.Add(new FixedValueParameter<BoolValue>(ScaleEstimatedValuesParameterName, new BoolValue(true)));
135      Parameters.Add(new FixedValueParameter<BoolValue>(ExclusiveMatchingParameterName, new BoolValue(false)));
136      Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(false)));
137      Parameters.Add(new ValueParameter<IntValue>(NumberOfChangedTreesParameterName, new IntValue(0)));
138      Parameters.Add(new ValueParameter<IntValue>(NumberOfSchemasParameterName, new IntValue(0)));
139      Parameters.Add(new ValueParameter<DoubleValue>(AverageSchemaLengthParameterName, new DoubleValue(0)));
140      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName, new BoolValue(true)));
141
142      var replacementRatioUpdateRules = new ItemSet<StringValue>(new[] {
143        new StringValue("f(x) = x"),
144        new StringValue("f(x) = tanh(x)"),
145        new StringValue("f(x) = tanh(2x)"),
146        new StringValue("f(x) = tanh(3x)"),
147        new StringValue("f(x) = tanh(4x)"),
148        new StringValue("f(x) = 1-sqrt(1-x)")
149      });
150      Parameters.Add(new ConstrainedValueParameter<StringValue>(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules));
151      #endregion
152      NumberOfChangedTreesParameter.Hidden = true;
153      NumberOfSchemasParameter.Hidden = true;
154      AverageSchemaLengthParameter.Hidden = true;
155
156      ExecuteInParallelParameter.Hidden = true;
157      MaxDegreeOfParallelismParameter.Hidden = true;
158    }
159
160    protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { }
161
162    public override IDeepCloneable Clone(Cloner cloner) {
163      return new SchemaCreator(this, cloner);
164    }
165
166    [StorableConstructor]
167    protected SchemaCreator(bool deserializing) : base(deserializing) { }
168
169
170    [StorableHook(HookType.AfterDeserialization)]
171    private void AfterDeserialization() {
172      if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName))
173        Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(false)));
174    }
175
176    public override IOperation Apply() {
177      // apply only when at least one generation has passed
178      var gen = Generations.Value;
179      if (gen < 1 || GenealogyGraph == null)
180        return base.Apply();
181
182      var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation);
183
184      var updateEstimatedValues = new OperationCollection { Parallel = true };
185      if (updateQualityOperator == null)
186        updateQualityOperator = new UpdateQualityOperator();
187
188      foreach (var s in ExecutionContext.Scope.SubScopes.Where(s => !s.Variables.ContainsKey("EstimatedValues"))) {
189        updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateQualityOperator, s));
190      }
191
192      var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value;
193
194      var evaluateSchemas = new OperationCollection();
195
196      // for now, only consider crossover offspring
197      var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.OrderByDescending(x => ((DoubleValue)x.Variables["Quality"].Value).Value).Take(n));
198      var vertices = from s in scopes
199                     let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
200                     let v = GenealogyGraph.GetByContent(t)
201                     where v.InDegree == 2
202                     select v;
203
204      var schemas = new List<ISymbolicExpressionTree>(GenerateSchemas(vertices, MinimumSchemaLength, StrictSchemaMatching));
205
206      #region create schemas and add subscopes representing the individuals
207      foreach (var schema in schemas) {
208        evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = replacementRule }, ExecutionContext.Scope));
209      }
210      #endregion
211
212      if (diversificationStatisticsOperator == null)
213        diversificationStatisticsOperator = new DiversificationStatisticsOperator();
214
215      var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
216
217      // set parameters for statistics
218      AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
219      NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
220      NumberOfChangedTreesParameter.Value = new IntValue(0);
221
222      // return an operation collection containing all the scope operations + base.Apply()
223      return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
224    }
225
226    public static IEnumerable<ISymbolicExpressionTree> GenerateSchemas(IEnumerable<IGenealogyGraphNode<ISymbolicExpressionTree>> vertices, int minimumSchemaLength, bool strict = true) {
227      var anySubtreeSymbol = new AnySubtreeSymbol();
228      //            var anyNodeSymbol = new AnyNodeSymbol();
229      var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList();
230      var hash = new HashSet<string>();
231      //      var formatter = new SymbolicExpressionTreeStringFormatter { Indent = false, AppendNewLines = false };
232      foreach (var g in groups) {
233        var parent = g.Key;
234        if (parent.Data.Length < minimumSchemaLength)
235          continue;
236        bool replaced = false;
237        var schema = (ISymbolicExpressionTree)parent.Data.Clone();
238        var nodes = schema.IterateNodesPrefix().ToList();
239        var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null);
240        var indices = (from arc in arcs
241                       let fragment = (IFragment<ISymbolicExpressionTreeNode>)arc.Data
242                       select fragment.Index1).Distinct().ToArray();
243        Array.Sort(indices);
244        var nodesToReplace = indices.Select(x => nodes[x]).ToList();
245        for (int i = nodesToReplace.Count - 1; i >= 0; --i) {
246          var node = nodesToReplace[i];
247
248          // do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength
249          if (schema.Length - node.GetLength() + 1 < minimumSchemaLength)
250            continue;
251
252          var replacement = anySubtreeSymbol.CreateTreeNode();
253          ReplaceSubtree(node, replacement, false);
254          //          var replacement = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MinimumArity).CreateTreeNode();
255          //          ReplaceSubtree(node, replacement, true);
256          replaced = true;
257        }
258        if (replaced) {
259          //          var str = formatter.Format(schema.Root.GetSubtree(0).GetSubtree(0));
260          var str = SubtreeToString(schema.Root.GetSubtree(0).GetSubtree(0), strict);
261          if (hash.Contains(str)) continue;
262          yield return schema;
263          hash.Add(str);
264        }
265      }
266    }
267
268    private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, bool preserveChildren = true) {
269      var parent = original.Parent;
270      if (parent == null)
271        throw new ArgumentException("Parent cannot be null for node " + original);
272      var index = parent.IndexOfSubtree(original);
273      parent.RemoveSubtree(index);
274      parent.InsertSubtree(index, replacement);
275
276      if (preserveChildren) {
277        var subtrees = original.Subtrees.ToList();
278
279        for (int i = subtrees.Count - 1; i >= 0; --i)
280          original.RemoveSubtree(i);
281
282        for (int i = 0; i < subtrees.Count; ++i) {
283          replacement.AddSubtree(subtrees[i]);
284        }
285      }
286    }
287
288    private static string SubtreeToString(ISymbolicExpressionTreeNode node, bool strict = false) {
289      StringBuilder strBuilder = new StringBuilder();
290      // internal nodes or leaf nodes?
291      if (node is AnySubtree)
292        return "# ";
293
294      if (node.SubtreeCount > 0) {
295        strBuilder.Append("(");
296        // symbol on same line as '('
297        string label = string.Empty;
298        if (node is AnyNode)
299          label = "=";
300        else {
301          label = node.Symbol.Name;
302        }
303        strBuilder.Append(label + " ");
304        // each subtree expression on a new line
305        // and closing ')' also on new line
306        foreach (var subtree in node.Subtrees) {
307          strBuilder.Append(SubtreeToString(subtree, strict));
308        }
309        strBuilder.Append(") ");
310      } else {
311        // symbol in the same line with as '(' and ')'
312        var v = node as VariableTreeNode;
313        var c = node as ConstantTreeNode;
314        var w = node as AnyNode; // wildcard
315        string label = string.Empty;
316        if (w != null)
317          label = "=";
318        else if (v != null)
319          label = strict ? string.Format("{0:0.00}_{1}", v.Weight, v.VariableName) : string.Format("{0}", v.VariableName);
320        else if (c != null)
321          label = strict ? string.Format("{0:0.00}", c.Value) : "C";
322        strBuilder.Append(label);
323        if (node.Parent != null && node != node.Parent.Subtrees.Last())
324          strBuilder.Append(" ");
325        //strBuilder.Append(")");
326      }
327      return strBuilder.ToString();
328    }
329  }
330}
Note: See TracBrowser for help on using the repository browser.