source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs @ 13480

Last change on this file since 13480 was 13480, checked in by bburlacu, 5 years ago

#1772: Schema diversification strategy: implement adaptive replacement ratio and added number of evaluated solutions per generation to the diversification statistics operator

File size: 14.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.EvolutionTracking;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
35  [StorableClass]
36  public class SchemaCreator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
37    #region parameter names
38    private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
39    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
40    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
41    private const string ReplacementRatioParameterName = "ReplacementRatio";
42    private const string RandomReplacementParameterName = "RandomReplacement";
43    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
44    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
45    private const string PercentageOfPopulationParameterName = "PercentageOfPopulationToDiversify";
46    private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
47    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
48    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
49    private const string NumberOfSchemasParameterName = "NumberOfSchemas";
50    private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
51    private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
52    private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule";
53    #endregion
54
55    #region parameters
56
57    public IConstrainedValueParameter<StringValue> ReplacementRatioUpdateRuleParameter {
58      get { return (IConstrainedValueParameter<StringValue>)Parameters[ReplacementRatioUpdateRuleParameterName]; }
59    }
60    public IFixedValueParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
61      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
62    }
63    public IFixedValueParameter<BoolValue> ExclusiveMatchingParameter {
64      get { return (IFixedValueParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
65    }
66    public IFixedValueParameter<BoolValue> ScaleEstimatedValuesParameter {
67      get { return (IFixedValueParameter<BoolValue>)Parameters[ScaleEstimatedValuesParameterName]; }
68    }
69    public IFixedValueParameter<PercentValue> PercentageOfPopulationParameter {
70      get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageOfPopulationParameterName]; }
71    }
72    public IFixedValueParameter<IntValue> MinimumSchemaLengthParameter {
73      get { return (IFixedValueParameter<IntValue>)Parameters[MinimumSchemaLengthParameterName]; }
74    }
75    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
76      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
77    }
78    public IFixedValueParameter<IntValue> MaxDegreeOfParallelismParameter {
79      get { return (IFixedValueParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
80    }
81    public IFixedValueParameter<PercentValue> MinimumSchemaFrequencyParameter {
82      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
83    }
84    public IFixedValueParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
85      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
86    }
87    public IFixedValueParameter<PercentValue> ReplacementRatioParameter {
88      get { return (IFixedValueParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
89    }
90    public IValueParameter<IntValue> NumberOfSchemasParameter {
91      get { return (IValueParameter<IntValue>)Parameters[NumberOfSchemasParameterName]; }
92    }
93    public IValueParameter<DoubleValue> AverageSchemaLengthParameter {
94      get { return (IValueParameter<DoubleValue>)Parameters[AverageSchemaLengthParameterName]; }
95    }
96    public IValueParameter<IntValue> NumberOfChangedTreesParameter {
97      get { return (IValueParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
98    }
99    #endregion
100
101    #region parameter properties
102    public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
103    public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
104    public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
105    public double PercentageOfPopulation { get { return PercentageOfPopulationParameter.Value.Value; } }
106    #endregion
107
108    private UpdateEstimatedValuesOperator updateEstimatedValuesOperator;
109    private DiversificationStatisticsOperator diversificationStatisticsOperator;
110
111    public SchemaCreator() {
112      #region add parameters
113      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
114      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
115      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
116      Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.9)));
117      Parameters.Add(new FixedValueParameter<PercentValue>(PercentageOfPopulationParameterName, new PercentValue(1)));
118      Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(false)));
119      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(false)));
120      Parameters.Add(new FixedValueParameter<IntValue>(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
121      Parameters.Add(new FixedValueParameter<BoolValue>(ScaleEstimatedValuesParameterName, new BoolValue(true)));
122      Parameters.Add(new FixedValueParameter<BoolValue>(ExclusiveMatchingParameterName, new BoolValue(false)));
123      Parameters.Add(new ValueParameter<IntValue>(NumberOfChangedTreesParameterName, new IntValue(0)));
124      Parameters.Add(new ValueParameter<IntValue>(NumberOfSchemasParameterName, new IntValue(0)));
125      Parameters.Add(new ValueParameter<DoubleValue>(AverageSchemaLengthParameterName, new DoubleValue(0)));
126      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName, new BoolValue(true)));
127
128      var replacementRatioUpdateRules = new ItemSet<StringValue>(new[] { new StringValue("f(x) = x"), new StringValue("f(x) = tanh(x)"), new StringValue("f(x) = tanh(2x)"), new StringValue("f(x) = tanh(3x)") });
129      Parameters.Add(new ConstrainedValueParameter<StringValue>(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules));
130      #endregion
131      NumberOfChangedTreesParameter.Hidden = true;
132      NumberOfSchemasParameter.Hidden = true;
133      AverageSchemaLengthParameter.Hidden = true;
134
135      ExecuteInParallelParameter.Hidden = true;
136      MaxDegreeOfParallelismParameter.Hidden = true;
137    }
138
139    protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { }
140
141    public override IDeepCloneable Clone(Cloner cloner) {
142      return new SchemaCreator(this, cloner);
143    }
144
145    [StorableConstructor]
146    protected SchemaCreator(bool deserializing) : base(deserializing) { }
147
148    public override IOperation Apply() {
149      // apply only when at least one generation has passed
150      var gen = Generations.Value;
151      if (gen < 1 || GenealogyGraph == null)
152        return base.Apply();
153
154      var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation);
155      var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.Take(n));
156
157      var updateEstimatedValues = new OperationCollection { Parallel = true };
158      if (updateEstimatedValuesOperator == null)
159        updateEstimatedValuesOperator = new UpdateEstimatedValuesOperator();
160
161      foreach (var s in scopes) {
162        if (!s.Variables.ContainsKey("EstimatedValues"))
163          s.Variables.Add(new Core.Variable("EstimatedValues"));
164        updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateEstimatedValuesOperator, s));
165      }
166
167      Func<double, double> rule;
168      var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value;
169
170      switch (replacementRule) {
171        case "f(x) = x": {
172            rule = x => x;
173            break;
174          }
175        case "f(x) = tanh(x)": {
176            rule = x => Math.Tanh(x);
177            break;
178          }
179        case "f(x) = tanh(2x)": {
180            rule = x => Math.Tanh(2 * x);
181            break;
182          }
183        case "f(x) = tanh(3x)": {
184            rule = x => Math.Tanh(3 * x);
185            break;
186          }
187        default:
188          throw new ArgumentException("Unknown replacement rule");
189      }
190
191      var evaluateSchemas = new OperationCollection();
192
193      // for now, only consider crossover offspring
194      var vertices = from s in scopes
195                     let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
196                     let v = GenealogyGraph.GetByContent(t)
197                     where v.InDegree == 2
198                     select v;
199
200      var schemas = new List<ISymbolicExpressionTree>(GenerateSchemas(vertices, MinimumSchemaLength));
201
202      #region create schemas and add subscopes representing the individuals
203      foreach (var schema in schemas) {
204        evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = rule }, ExecutionContext.Scope));
205      }
206      #endregion
207
208      if (diversificationStatisticsOperator == null)
209        diversificationStatisticsOperator = new DiversificationStatisticsOperator();
210
211      var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
212
213      // set parameters for statistics
214      AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
215      NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
216      NumberOfChangedTreesParameter.Value = new IntValue(0);
217
218      // return an operation collection containing all the scope operations + base.Apply()
219      return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
220    }
221
222    public static IEnumerable<ISymbolicExpressionTree> GenerateSchemas(IEnumerable<IGenealogyGraphNode<ISymbolicExpressionTree>> vertices, int minimumSchemaLength) {
223      var anySubtreeSymbol = new AnySubtreeSymbol();
224      //      var anyNodeSymbol = new AnyNodeSymbol();
225      var groups = vertices.GroupBy(x => x.Parents.First()).OrderByDescending(g => g.Count()).ToList();
226      var hash = new HashSet<string>();
227      var formatter = new SymbolicExpressionTreeStringFormatter { Indent = false, AppendNewLines = false };
228      foreach (var g in groups) {
229        var parent = g.Key;
230        if (parent.Data.Length < minimumSchemaLength)
231          continue;
232        bool replaced = false;
233        var schema = (ISymbolicExpressionTree)parent.Data.Clone();
234        var nodes = schema.IterateNodesPrefix().ToList();
235        var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null);
236        var indices = (from arc in arcs
237                       let fragment = (IFragment<ISymbolicExpressionTreeNode>)arc.Data
238                       select fragment.Index1).Distinct().ToArray();
239        Array.Sort(indices);
240        var nodesToReplace = indices.Select(x => nodes[x]).ToList();
241        for (int i = nodesToReplace.Count - 1; i >= 0; --i) {
242          var node = nodesToReplace[i];
243
244          // do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength
245          if (schema.Length - node.GetLength() + 1 < minimumSchemaLength)
246            continue;
247
248          //          var replacement = anySubtreeSymbol.CreateTreeNode();
249          //          ReplaceSubtree(node, replacement, false);
250          var replacement = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MinimumArity).CreateTreeNode();
251          ReplaceSubtree(node, replacement, true);
252          replaced = true;
253        }
254        if (replaced) {
255          var str = formatter.Format(schema.Root.GetSubtree(0).GetSubtree(0));
256          if (hash.Contains(str)) continue;
257          yield return schema;
258          hash.Add(str);
259        }
260      }
261    }
262
263    private static void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, bool preserveChildren = true) {
264      var parent = original.Parent;
265      if (parent == null)
266        throw new ArgumentException("Parent cannot be null for node " + original);
267      var index = parent.IndexOfSubtree(original);
268      parent.RemoveSubtree(index);
269      parent.InsertSubtree(index, replacement);
270
271      if (preserveChildren) {
272        var subtrees = original.Subtrees.ToList();
273
274        for (int i = subtrees.Count - 1; i >= 0; --i)
275          original.RemoveSubtree(i);
276
277        for (int i = 0; i < subtrees.Count; ++i) {
278          replacement.AddSubtree(subtrees[i]);
279        }
280      }
281    }
282  }
283}
Note: See TracBrowser for help on using the repository browser.