Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/SchemaDiversification/SchemaCreator.cs @ 14627

Last change on this file since 14627 was 14627, checked in by bburlacu, 7 years ago

#1772: Add option when generating schemas to only consider parents that have produced a minimum number of offspring.

File size: 14.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.EvolutionTracking;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [Item("SchemaCreator", "An operator that builds schemas based on the heredity relationship in the genealogy graph.")]
35  [StorableClass]
36  public class SchemaCreator : EvolutionTrackingOperator<ISymbolicExpressionTree> {
37    #region parameter names
38    private const string MinimumSchemaLengthParameterName = "MinimumSchemaLength";
39    private const string MinimumSchemaFrequencyParameterName = "MinimumSchemaFrequency";
40    private const string MinimumPhenotypicSimilarityParameterName = "MinimumPhenotypicSimilarity";
41    private const string ReplacementRatioParameterName = "ReplacementRatio";
42    private const string RandomReplacementParameterName = "RandomReplacement";
43    private const string ExecuteInParallelParameterName = "ExecuteInParallel";
44    private const string MaxDegreeOfParalellismParameterName = "MaxDegreeOfParallelism";
45    private const string PercentageOfPopulationParameterName = "PercentageOfPopulationToDiversify";
46    private const string ScaleEstimatedValuesParameterName = "ScaleEstimatedValues";
47    private const string ExclusiveMatchingParameterName = "ExclusiveMatching";
48    private const string NumberOfChangedTreesParameterName = "NumberOfChangedTrees";
49    private const string NumberOfSchemasParameterName = "NumberOfSchemas";
50    private const string AverageSchemaLengthParameterName = "AverageSchemaLength";
51    private const string UseAdaptiveReplacementRatioParameterName = "UseAdaptiveReplacementRatio";
52    private const string ReplacementRatioUpdateRuleParameterName = "ReplacementRatioUpdateRule";
53    private const string StrictSchemaMatchingParameterName = "StrictSchemaMatching";
54    #endregion
55
56    #region parameters
57
58    public IConstrainedValueParameter<StringValue> ReplacementRatioUpdateRuleParameter {
59      get { return (IConstrainedValueParameter<StringValue>)Parameters[ReplacementRatioUpdateRuleParameterName]; }
60    }
61    public IFixedValueParameter<BoolValue> UseAdaptiveReplacementRatioParameter {
62      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveReplacementRatioParameterName]; }
63    }
64    public IFixedValueParameter<BoolValue> StrictSchemaMatchingParameter {
65      get { return (IFixedValueParameter<BoolValue>)Parameters[StrictSchemaMatchingParameterName]; }
66    }
67    public IFixedValueParameter<BoolValue> ExclusiveMatchingParameter {
68      get { return (IFixedValueParameter<BoolValue>)Parameters[ExclusiveMatchingParameterName]; }
69    }
70    public IFixedValueParameter<BoolValue> ScaleEstimatedValuesParameter {
71      get { return (IFixedValueParameter<BoolValue>)Parameters[ScaleEstimatedValuesParameterName]; }
72    }
73    public IFixedValueParameter<PercentValue> PercentageOfPopulationParameter {
74      get { return (IFixedValueParameter<PercentValue>)Parameters[PercentageOfPopulationParameterName]; }
75    }
76    public IFixedValueParameter<IntValue> MinimumSchemaLengthParameter {
77      get { return (IFixedValueParameter<IntValue>)Parameters[MinimumSchemaLengthParameterName]; }
78    }
79    public IFixedValueParameter<BoolValue> ExecuteInParallelParameter {
80      get { return (IFixedValueParameter<BoolValue>)Parameters[ExecuteInParallelParameterName]; }
81    }
82    public IFixedValueParameter<IntValue> MaxDegreeOfParallelismParameter {
83      get { return (IFixedValueParameter<IntValue>)Parameters[MaxDegreeOfParalellismParameterName]; }
84    }
85    public IFixedValueParameter<PercentValue> MinimumSchemaFrequencyParameter {
86      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumSchemaFrequencyParameterName]; }
87    }
88    public IFixedValueParameter<PercentValue> MinimumPhenotypicSimilarityParameter {
89      get { return (IFixedValueParameter<PercentValue>)Parameters[MinimumPhenotypicSimilarityParameterName]; }
90    }
91    public IFixedValueParameter<PercentValue> ReplacementRatioParameter {
92      get { return (IFixedValueParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
93    }
94    public IValueParameter<IntValue> NumberOfSchemasParameter {
95      get { return (IValueParameter<IntValue>)Parameters[NumberOfSchemasParameterName]; }
96    }
97    public IValueParameter<DoubleValue> AverageSchemaLengthParameter {
98      get { return (IValueParameter<DoubleValue>)Parameters[AverageSchemaLengthParameterName]; }
99    }
100    public IValueParameter<IntValue> NumberOfChangedTreesParameter {
101      get { return (IValueParameter<IntValue>)Parameters[NumberOfChangedTreesParameterName]; }
102    }
103    #endregion
104
105    #region parameter properties
106    public int MinimumSchemaLength { get { return MinimumSchemaLengthParameter.Value.Value; } }
107    public int MaxDegreeOfParallelism { get { return MaxDegreeOfParallelismParameter.Value.Value; } }
108    public bool ExecuteInParallel { get { return ExecuteInParallelParameter.Value.Value; } }
109    public double PercentageOfPopulation { get { return PercentageOfPopulationParameter.Value.Value; } }
110    public bool StrictSchemaMatching { get { return StrictSchemaMatchingParameter.Value.Value; } }
111    #endregion
112
113    private UpdateQualityOperator updateQualityOperator;
114    private DiversificationStatisticsOperator diversificationStatisticsOperator;
115
116    public override void ClearState() {
117      NumberOfChangedTreesParameter.Value.Value = 0;
118      NumberOfChangedTreesParameter.Value.Value = 0;
119      AverageSchemaLengthParameter.Value.Value = 0;
120      base.ClearState();
121    }
122
123    public SchemaCreator() {
124      #region add parameters
125      Parameters.Add(new FixedValueParameter<IntValue>(MinimumSchemaLengthParameterName, new IntValue(10)));
126      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumSchemaFrequencyParameterName, new PercentValue(0.01)));
127      Parameters.Add(new FixedValueParameter<PercentValue>(MinimumPhenotypicSimilarityParameterName, new PercentValue(0.9)));
128      Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName, new PercentValue(0.9)));
129      Parameters.Add(new FixedValueParameter<PercentValue>(PercentageOfPopulationParameterName, new PercentValue(1)));
130      Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(false)));
131      Parameters.Add(new FixedValueParameter<BoolValue>(ExecuteInParallelParameterName, new BoolValue(false)));
132      Parameters.Add(new FixedValueParameter<IntValue>(MaxDegreeOfParalellismParameterName, new IntValue(-1)));
133      Parameters.Add(new FixedValueParameter<BoolValue>(ScaleEstimatedValuesParameterName, new BoolValue(true)));
134      Parameters.Add(new FixedValueParameter<BoolValue>(ExclusiveMatchingParameterName, new BoolValue(false)));
135      Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(false)));
136      Parameters.Add(new ValueParameter<IntValue>(NumberOfChangedTreesParameterName, new IntValue(0)));
137      Parameters.Add(new ValueParameter<IntValue>(NumberOfSchemasParameterName, new IntValue(0)));
138      Parameters.Add(new ValueParameter<DoubleValue>(AverageSchemaLengthParameterName, new DoubleValue(0)));
139      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveReplacementRatioParameterName, new BoolValue(true)));
140
141      var replacementRatioUpdateRules = new ItemSet<StringValue>(new[] {
142        new StringValue("f(x) = x"),
143        new StringValue("f(x) = tanh(x)"),
144        new StringValue("f(x) = tanh(2x)"),
145        new StringValue("f(x) = tanh(3x)"),
146        new StringValue("f(x) = tanh(4x)"),
147        new StringValue("f(x) = 1-sqrt(1-x)")
148      });
149      Parameters.Add(new ConstrainedValueParameter<StringValue>(ReplacementRatioUpdateRuleParameterName, replacementRatioUpdateRules));
150      #endregion
151      NumberOfChangedTreesParameter.Hidden = true;
152      NumberOfSchemasParameter.Hidden = true;
153      AverageSchemaLengthParameter.Hidden = true;
154
155      ExecuteInParallelParameter.Hidden = true;
156      MaxDegreeOfParallelismParameter.Hidden = true;
157    }
158
159    protected SchemaCreator(SchemaCreator original, Cloner cloner) : base(original, cloner) { }
160
161    public override IDeepCloneable Clone(Cloner cloner) {
162      return new SchemaCreator(this, cloner);
163    }
164
165    [StorableConstructor]
166    protected SchemaCreator(bool deserializing) : base(deserializing) { }
167
168
169    [StorableHook(HookType.AfterDeserialization)]
170    private void AfterDeserialization() {
171      if (!Parameters.ContainsKey(StrictSchemaMatchingParameterName))
172        Parameters.Add(new FixedValueParameter<BoolValue>(StrictSchemaMatchingParameterName, new BoolValue(false)));
173    }
174
175    public override IOperation Apply() {
176      // apply only when at least one generation has passed
177      var gen = Generations.Value;
178      if (gen < 1 || GenealogyGraph == null)
179        return base.Apply();
180
181      var n = (int)Math.Round(ExecutionContext.Scope.SubScopes.Count * PercentageOfPopulation);
182
183      var updateEstimatedValues = new OperationCollection { Parallel = true };
184      if (updateQualityOperator == null)
185        updateQualityOperator = new UpdateQualityOperator();
186
187      foreach (var s in ExecutionContext.Scope.SubScopes.Where(s => !s.Variables.ContainsKey("EstimatedValues"))) {
188        updateEstimatedValues.Add(ExecutionContext.CreateChildOperation(updateQualityOperator, s));
189      }
190
191      var replacementRule = ReplacementRatioUpdateRuleParameter.Value.Value;
192
193      var evaluateSchemas = new OperationCollection();
194
195      // for now, only consider crossover offspring
196      var scopes = new ScopeList(ExecutionContext.Scope.SubScopes.OrderByDescending(x => ((DoubleValue)x.Variables["Quality"].Value).Value).Take(n));
197      var vertices = from s in scopes
198                     let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
199                     let v = GenealogyGraph.GetByContent(t)
200                     where v.InDegree == 2
201                     select v;
202
203      var schemas = GenerateSchemas(vertices, MinimumSchemaLength, 0, StrictSchemaMatching).ToList();
204
205      #region create schemas and add subscopes representing the individuals
206      foreach (var schema in schemas) {
207        evaluateSchemas.Add(ExecutionContext.CreateChildOperation(new SchemaEvaluator { Schema = schema, ReplacementRule = replacementRule }, ExecutionContext.Scope));
208      }
209      #endregion
210
211      if (diversificationStatisticsOperator == null)
212        diversificationStatisticsOperator = new DiversificationStatisticsOperator();
213
214      var calculateStatistics = ExecutionContext.CreateChildOperation(diversificationStatisticsOperator);
215
216      // set parameters for statistics
217      AverageSchemaLengthParameter.Value = new DoubleValue(schemas.Average(x => x.Length));
218      NumberOfSchemasParameter.Value = new IntValue(schemas.Count);
219      NumberOfChangedTreesParameter.Value = new IntValue(0);
220
221      // return an operation collection containing all the scope operations + base.Apply()
222      return new OperationCollection { updateEstimatedValues, evaluateSchemas, calculateStatistics, base.Apply() };
223    }
224
225    public static IEnumerable<ISymbolicExpressionTree> GenerateSchemas(IEnumerable<IGenealogyGraphNode<ISymbolicExpressionTree>> vertices, int minimumSchemaLength, int minOffspringCount = 1, bool strict = true) {
226      var anySubtreeSymbol = new AnySubtreeSymbol();
227      var groups = vertices.GroupBy(x => x.Parents.First()).Where(g => g.Count() >= minOffspringCount).OrderByDescending(g => g.Count());
228      var hash = new HashSet<string>();
229      foreach (var g in groups) {
230        var parent = g.Key;
231        if (parent.Data.Length < minimumSchemaLength)
232          continue;
233        bool replaced = false;
234        var schema = (ISymbolicExpressionTree)parent.Data.Clone();
235        var nodes = schema.IterateNodesPrefix().ToList();
236        var fragments = g.Select(x => x.InArcs.Last().Data).Where(x => x != null).Cast<IFragment<ISymbolicExpressionTreeNode>>();
237        var indices = fragments.Select(x => x.Index1).Distinct().OrderByDescending(x => schema.Root.GetBranchLevel(nodes[x]));
238        foreach (var i in indices) {
239          var node = nodes[i];
240          // do not replace the node with a wildcard if it would result in a length < MinimumSchemaLength
241          //          if (schema.Length - node.GetLength() + 1 < minimumSchemaLength)
242          //            continue;
243          ISymbolicExpressionTreeNode replacement;
244          if (node.SubtreeCount > 0) {
245            var anyNodeSymbol = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MaximumArity);
246            replacement = anyNodeSymbol.CreateTreeNode();
247          } else {
248            replacement = anySubtreeSymbol.CreateTreeNode();
249          }
250          SchemaUtil.ReplaceSubtree(node, replacement, true);
251          //          var replacement = anySubtreeSymbol.CreateTreeNode();
252          //          var replacement = new AnyNodeSymbol(node.Symbol.MinimumArity, node.Symbol.MinimumArity).CreateTreeNode();
253          //          ReplaceSubtree(node, replacement, true);
254          //          SchemaUtil.ReplaceSubtree(node, replacement, false);
255          replaced = true;
256        }
257        if (replaced) {
258          var str = schema.Root.GetSubtree(0).GetSubtree(0).FormatToString(strict);
259          if (hash.Add(str))
260            yield return schema;
261        }
262      }
263    }
264  }
265}
Note: See TracBrowser for help on using the repository browser.