Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Tracking/Analyzers/SymbolicDataAnalysisSchemaFrequencyAnalyzer.cs @ 12952

Last change on this file since 12952 was 12951, checked in by bburlacu, 9 years ago

#1772:

  • Slight refactor in QueryMatch.cs
  • Added a parameter to the genealogy analyzer for removing older generations from the graph (useful to conserve memory in experiments)
  • Updated wildcard nodes (added persistence & cloning)
  • Implemented diversification strategy based on schema frequencies & phenotypic similarity as a separate operator (for now keep also the analyzer)
  • Updated license headers
  • Added QueryMatch performance test (to be expanded)
File size: 19.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Text;
26using HeuristicLab.Analysis;
27using HeuristicLab.Common;
28using HeuristicLab.Core;
29using HeuristicLab.Data;
30using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
31using HeuristicLab.EvolutionTracking;
32using HeuristicLab.Optimization;
33using HeuristicLab.Parameters;
34using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
35using HeuristicLab.Random;
36
37namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Tracking.Analyzers {
38  [Item("SymbolicDataAnalysisSchemaFrequencyAnalyzer", "Analyze common schemas occuring in the population")]
39  [StorableClass]
40  public class SymbolicDataAnalysisSchemaFrequencyAnalyzer : EvolutionTrackingAnalyzer<ISymbolicExpressionTree> {
41    private const string GenotypeSimilarityThresholdParameterName = "GenotypeSimilarityThreshold";
42    private const string PhenotypeSimilarityThresholdParameterName = "PhenotypeSimilarityThreshold";
43    private const string SchemaLengthThresholdParameterName = "SchemaLengthThreshold";
44    private const string SchemaFrequencyThresholdParameterName = "SchemaFrequencyThreshold";
45    private const string ReplacementRatioParameterName = "ReplacementRatio";
46    private const string EvaluatorParameterName = "Evaluator";
47    private const string ProblemDataParameterName = "ProblemData";
48    private const string InterpreterParameterName = "SymbolicExpressionTreeInterpreter";
49    private const string EstimationLimitsParameterName = "EstimationLimits";
50    private const string ApplyLinearScalingParameterName = "ApplyLinearScaling";
51    private const string RandomParameterName = "Random";
52    private const string SolutionCreatorParameterName = "SolutionCreator";
53    private const string MaximumSymbolicExpressionTreeDepthParameterName = "MaximumSymbolicExpressionTreeDepth";
54    private const string MaximumSymbolicExpressionTreeLengthParameterName = "MaximumSymbolicExpressionTreeLength";
55    private const string GrammarParameterName = "SymbolicExpressionTreeGrammar";
56    private const string MutatorParameterName = "Mutator";
57    private const string RandomReplacementParameterName = "RandomReplacement";
58
59    private ISymbolicExpressionTreeNodeEqualityComparer comparer;
60    private QueryMatch qm;
61    private SymbolicExpressionTreePhenotypicSimilarityCalculator phenotypicSimilarityCalculator;
62    private SymbolicExpressionTreeBottomUpSimilarityCalculator bottomUpSimilarityCalculator;
63
64    private static readonly Dictionary<string, string> ShortNames = new Dictionary<string, string> {
65      { "Addition", "+" },
66      { "Subtraction", "-" },
67      { "Multiplication", "*" },
68      { "Division", "/" },
69      { "Exponential", "exp" },
70      { "Logarithm", "log" }
71    };
72
73    #region parameters
74    public IFixedValueParameter<BoolValue> RandomReplacementParameter {
75      get { return (IFixedValueParameter<BoolValue>)Parameters[RandomReplacementParameterName]; }
76    }
77    public ILookupParameter<ISymbolicExpressionTreeManipulator> MutatorParameter {
78      get { return (ILookupParameter<ISymbolicExpressionTreeManipulator>)Parameters[MutatorParameterName]; }
79    }
80    public IFixedValueParameter<PercentValue> GenotypeSimilarityThresholdParameter {
81      get { return (IFixedValueParameter<PercentValue>)Parameters[GenotypeSimilarityThresholdParameterName]; }
82    }
83    public IFixedValueParameter<PercentValue> PhenotypeSimilarityThresholdParameter {
84      get { return (IFixedValueParameter<PercentValue>)Parameters[PhenotypeSimilarityThresholdParameterName]; }
85    }
86    public IFixedValueParameter<IntValue> SchemaLengthThresholdParameter {
87      get { return (IFixedValueParameter<IntValue>)Parameters[SchemaLengthThresholdParameterName]; }
88    }
89    public IFixedValueParameter<PercentValue> SchemaFrequencyThresholdParameter {
90      get { return (IFixedValueParameter<PercentValue>)Parameters[SchemaFrequencyThresholdParameterName]; }
91    }
92    public IFixedValueParameter<PercentValue> ReplacementRatioParameter {
93      get { return (IFixedValueParameter<PercentValue>)Parameters[ReplacementRatioParameterName]; }
94    }
95    public ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>> EvaluatorParameter {
96      get { return (ILookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>)Parameters[EvaluatorParameterName]; }
97    }
98    public ILookupParameter<IRegressionProblemData> ProblemDataParameter {
99      get { return (ILookupParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
100    }
101    public ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter> InterpreterParameter {
102      get { return (ILookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[InterpreterParameterName]; }
103    }
104    public ILookupParameter<DoubleLimit> EstimationLimitsParameter {
105      get { return (ILookupParameter<DoubleLimit>)Parameters[EstimationLimitsParameterName]; }
106    }
107    public ILookupParameter<BoolValue> ApplyLinearScalingParameter {
108      get { return (ILookupParameter<BoolValue>)Parameters[ApplyLinearScalingParameterName]; }
109    }
110    public ILookupParameter<IRandom> RandomParameter {
111      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
112    }
113    public ILookupParameter<SymbolicExpressionTreeCreator> SolutionCreatorParameter {
114      get { return (ILookupParameter<SymbolicExpressionTreeCreator>)Parameters[SolutionCreatorParameterName]; }
115    }
116
117    public ILookupParameter<IntValue> MaximumSymbolicExpressionTreeLengthParameter {
118      get { return (ILookupParameter<IntValue>)Parameters[MaximumSymbolicExpressionTreeLengthParameterName]; }
119    }
120
121    public ILookupParameter<IntValue> MaximumSymbolicExpressionTreeDepthParameter {
122      get { return (ILookupParameter<IntValue>)Parameters[MaximumSymbolicExpressionTreeDepthParameterName]; }
123    }
124
125    public ILookupParameter<ISymbolicDataAnalysisGrammar> GrammarParameter {
126      get { return (ILookupParameter<ISymbolicDataAnalysisGrammar>)Parameters[GrammarParameterName]; }
127    }
128
129    public double GenotypeSimilarityThreshold {
130      get { return GenotypeSimilarityThresholdParameter.Value.Value; }
131    }
132    public double PhenotypeSimilarityThreshold {
133      get { return PhenotypeSimilarityThresholdParameter.Value.Value; }
134    }
135    public double SchemaLengthThreshold {
136      get { return SchemaLengthThresholdParameter.Value.Value; }
137    }
138    public double SchemaFrequencyThreshold {
139      get { return SchemaFrequencyThresholdParameter.Value.Value; }
140    }
141    public double ReplacementRatio {
142      get { return ReplacementRatioParameter.Value.Value; }
143    }
144
145    public bool RandomReplacement {
146      get { return RandomReplacementParameter.Value.Value; }
147    }
148    #endregion
149
150    public SymbolicDataAnalysisSchemaFrequencyAnalyzer() {
151      Parameters.Add(new FixedValueParameter<PercentValue>(GenotypeSimilarityThresholdParameterName));
152      Parameters.Add(new FixedValueParameter<PercentValue>(PhenotypeSimilarityThresholdParameterName));
153      Parameters.Add(new FixedValueParameter<IntValue>(SchemaLengthThresholdParameterName));
154      Parameters.Add(new FixedValueParameter<PercentValue>(SchemaFrequencyThresholdParameterName));
155      Parameters.Add(new FixedValueParameter<PercentValue>(ReplacementRatioParameterName));
156      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisSingleObjectiveEvaluator<IRegressionProblemData>>(EvaluatorParameterName));
157      Parameters.Add(new LookupParameter<IRegressionProblemData>(ProblemDataParameterName));
158      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisExpressionTreeInterpreter>(InterpreterParameterName));
159      Parameters.Add(new LookupParameter<DoubleLimit>(EstimationLimitsParameterName));
160      Parameters.Add(new LookupParameter<BoolValue>(ApplyLinearScalingParameterName));
161      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName));
162      Parameters.Add(new LookupParameter<SymbolicExpressionTreeCreator>(SolutionCreatorParameterName));
163      Parameters.Add(new LookupParameter<IntValue>(MaximumSymbolicExpressionTreeLengthParameterName));
164      Parameters.Add(new LookupParameter<IntValue>(MaximumSymbolicExpressionTreeDepthParameterName));
165      Parameters.Add(new LookupParameter<ISymbolicDataAnalysisGrammar>(GrammarParameterName));
166      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeManipulator>(MutatorParameterName));
167      Parameters.Add(new FixedValueParameter<BoolValue>(RandomReplacementParameterName, new BoolValue(true)));
168
169      comparer = new SymbolicExpressionTreeNodeEqualityComparer {
170        MatchConstantValues = false,
171        MatchVariableWeights = false,
172        MatchVariableNames = true
173      };
174      qm = new QueryMatch(comparer) { MatchParents = true };
175      phenotypicSimilarityCalculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
176      bottomUpSimilarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator();
177    }
178
179
180    [StorableHook(HookType.AfterDeserialization)]
181    private void AfterDeserialization() {
182      comparer = new SymbolicExpressionTreeNodeEqualityComparer {
183        MatchConstantValues = false,
184        MatchVariableWeights = false,
185        MatchVariableNames = true
186      };
187      qm = new QueryMatch(comparer) { MatchParents = true };
188      phenotypicSimilarityCalculator = new SymbolicExpressionTreePhenotypicSimilarityCalculator();
189      bottomUpSimilarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator();
190    }
191
192    [StorableConstructor]
193    protected SymbolicDataAnalysisSchemaFrequencyAnalyzer(bool deserializing) : base(deserializing) { }
194
195    protected SymbolicDataAnalysisSchemaFrequencyAnalyzer(SymbolicDataAnalysisSchemaFrequencyAnalyzer original, Cloner cloner) : base(original, cloner) {
196      this.comparer = original.comparer;
197      this.qm = original.qm;
198      this.phenotypicSimilarityCalculator = original.phenotypicSimilarityCalculator;
199      this.bottomUpSimilarityCalculator = original.bottomUpSimilarityCalculator;
200    }
201
202    public override IDeepCloneable Clone(Cloner cloner) {
203      return new SymbolicDataAnalysisSchemaFrequencyAnalyzer(this, cloner);
204    }
205
206    public override IOperation Apply() {
207      IntValue updateCounter = UpdateCounterParameter.ActualValue;
208      if (updateCounter == null) {
209        updateCounter = new IntValue(0);
210        UpdateCounterParameter.ActualValue = updateCounter;
211      }
212      updateCounter.Value++;
213      if (updateCounter.Value != UpdateInterval.Value) return base.Apply();
214      updateCounter.Value = 0;
215
216      var graph = PopulationGraph;
217      if (graph == null || Generation.Value == 0)
218        return base.Apply();
219
220      var vertices = PopulationGraph.GetByRank(Generation.Value).Where(x => x.InDegree == 2).Select(x => (IGenealogyGraphNode<ISymbolicExpressionTree>)x).ToList();
221      var groups = vertices.GroupBy(x => x.Parents.First()).ToList();
222
223      var schemas = new List<ISymbolicExpressionTree>();
224      var anySubtreeSymbol = new AnySubtreeSymbol();
225      //var map = new Dictionary<ISymbolicExpressionTree, IGenealogyGraphNode<ISymbolicExpressionTree>>();
226      int mutatedTrees = 0;
227      var scopes = this.ExecutionContext.Scope.SubScopes;
228
229      var problemData = ProblemDataParameter.ActualValue;
230      var interpreter = InterpreterParameter.ActualValue;
231      var evaluator = EvaluatorParameter.ActualValue;
232      var random = RandomParameter.ActualValue;
233      var maxTreeLength = MaximumSymbolicExpressionTreeLengthParameter.ActualValue.Value;
234      var maxTreeDepth = MaximumSymbolicExpressionTreeDepthParameter.ActualValue.Value;
235      var estimationLimits = EstimationLimitsParameter.ActualValue;
236
237
238
239      foreach (var g in groups) {
240        bool replaced = false;
241        var parentVertex = g.Key;
242        var schema = (ISymbolicExpressionTree)parentVertex.Data.Clone();
243        var arcs = g.Select(x => x.InArcs.Last()).Where(x => x.Data != null);
244        var fragments = arcs.Select(x => (IFragment<ISymbolicExpressionTreeNode>)x.Data).ToArray();
245        var indices = fragments.Select(x => x.Index1).Distinct().ToArray();
246        // sort cutpoint indices and fragments
247        Array.Sort(indices); // indices ordered in increasing preorder index
248        var nodes = schema.IterateNodesPrefix().ToList();
249        var nodesToReplace = indices.Where(x => x > 2).Select(x => nodes[x]).ToList();
250        // walking in postorder so that schemas are more granular
251        for (int i = nodesToReplace.Count - 1; i >= 0; --i) {
252          //if (schemas.Any(x => qm.Match(nodesToReplace[i], x.Root))) continue;
253          // replace node with wildcard (#)
254          var replacement = anySubtreeSymbol.CreateTreeNode();
255          ReplaceSubtree(nodesToReplace[i], replacement, false);
256          replaced = true;
257        }
258        if (replaced && schemas.Any(x => qm.Match(schema.Root, x.Root)))
259          continue;
260        // if conditions are satisfied, attempt to diversify the individuals matching the schema
261        if (replaced && schema.Length >= SchemaLengthThreshold) {
262          //          var matchingIndividuals = individuals.Where(x => x.Changed.Value == false && qm.Match(x.Tree, schema)).ToList();
263          var matchingScopes = (from s in scopes
264                                let t = (ISymbolicExpressionTree)s.Variables["SymbolicExpressionTree"].Value
265                                where qm.Match(t, schema)
266                                select s).ToList();
267
268          if (matchingScopes.Count >= SchemaFrequencyThreshold * scopes.Count) {
269            phenotypicSimilarityCalculator.Interpreter = interpreter;
270            phenotypicSimilarityCalculator.ProblemData = problemData;
271            var phenotypicSimilarity = CalculatePhenotypicSimilarity(matchingScopes, phenotypicSimilarityCalculator);
272            if (phenotypicSimilarity > PhenotypeSimilarityThreshold) {
273              var n = (int)Math.Round(ReplacementRatio * matchingScopes.Count);
274              var individualsToReplace = RandomReplacement
275                ? matchingScopes.SampleRandomWithoutRepetition(random, n)
276                : matchingScopes.OrderBy(x => (DoubleValue)x.Variables["Quality"].Value).Take(n);
277              foreach (var ind in individualsToReplace) {
278                var tree = (ISymbolicExpressionTree)ind.Variables["SymbolicExpressionTree"].Value;
279                ReplaceBranchManipulation.ReplaceRandomBranch(random, tree, maxTreeLength, maxTreeDepth);
280                var quality = evaluator.Evaluate(this.ExecutionContext, tree, problemData, problemData.TrainingIndices);
281                var v = PopulationGraph.GetByContent(tree);
282                v.Quality = quality;
283                ((DoubleValue)ind.Variables["Quality"].Value).Value = quality;
284                ind.Variables["EstimatedValues"].Value = new DoubleArray(interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices)
285                  .LimitToRange(estimationLimits.Lower, estimationLimits.Upper).ToArray());
286                mutatedTrees++;
287              }
288            }
289          }
290        }
291      }
292
293      DataTable table;
294      if (!Results.ContainsKey("MutatedTrees")) {
295        table = new DataTable("MutatedTrees");
296        table.Rows.Add(new DataRow("MutatedTrees") { VisualProperties = { StartIndexZero = true } });
297        Results.Add(new Result("MutatedTrees", table));
298      } else {
299        table = (DataTable)Results["MutatedTrees"].Value;
300      }
301      table.Rows["MutatedTrees"].Values.Add(mutatedTrees);
302
303      return base.Apply();
304    }
305
306    private static double CalculatePhenotypicSimilarity(List<IScope> individuals, SymbolicExpressionTreePhenotypicSimilarityCalculator calculator) {
307      double similarity = 0;
308      int count = individuals.Count;
309      for (int i = 0; i < count - 1; ++i) {
310        for (int j = i + 1; j < count; ++j) {
311          similarity += calculator.CalculateSolutionSimilarity(individuals[i], individuals[j]);
312        }
313      }
314      return similarity / (count * (count - 1) / 2.0);
315    }
316
317    private void ReplaceSubtree(ISymbolicExpressionTreeNode original, ISymbolicExpressionTreeNode replacement, bool preserveChildren = true) {
318      var parent = original.Parent;
319      if (parent == null)
320        throw new ArgumentException("Parent cannot be null for node " + original.ToString());
321      var index = parent.IndexOfSubtree(original);
322      parent.RemoveSubtree(index);
323      parent.InsertSubtree(index, replacement);
324
325      if (preserveChildren) {
326        var subtrees = original.Subtrees.ToList();
327
328        for (int i = subtrees.Count - 1; i >= 0; --i)
329          original.RemoveSubtree(i);
330
331        for (int i = 0; i < subtrees.Count; ++i) {
332          replacement.AddSubtree(subtrees[i]);
333        }
334      }
335      //CheckNodeIntegrity(parent);
336    }
337
338    private bool CheckNodeIntegrity(ISymbolicExpressionTreeNode node) {
339      var nodes = node.IterateNodesPrefix().ToList();
340      for (int i = nodes.Count - 1; i >= 0; --i) {
341        var n = nodes[i];
342
343        if (n.GetLength() != n.IterateNodesPrefix().Count())
344          throw new Exception("Node length cache is compromised for node " + n + "(" + i + ")");
345
346        if (n.Symbol is ProgramRootSymbol)
347          continue;
348        if (n.Parent == null) {
349          throw new Exception("Parent cannot be null for node " + n + "(" + i + ")");
350        }
351      }
352      return true;
353    }
354
355    private string SubtreeToString(ISymbolicExpressionTreeNode node) {
356      StringBuilder strBuilder = new StringBuilder();
357      // internal nodes or leaf nodes?
358      if (node is AnySubtree)
359        return "# ";
360
361      if (node.SubtreeCount > 0) {
362        strBuilder.Append("(");
363        // symbol on same line as '('
364        string label = string.Empty;
365        if (node is AnyNode)
366          label = "=";
367        else {
368          var name = node.Symbol.Name;
369          label = ShortNames.ContainsKey(name) ? ShortNames[name] : name;
370        }
371        strBuilder.Append(label + " ");
372        // each subtree expression on a new line
373        // and closing ')' also on new line
374        foreach (var subtree in node.Subtrees) {
375          strBuilder.Append(SubtreeToString(subtree));
376        }
377        strBuilder.Append(") ");
378      } else {
379        // symbol in the same line with as '(' and ')'
380        var v = node as VariableTreeNode;
381        var c = node as ConstantTreeNode;
382        var w = node as AnyNode; // wildcard
383        string label = string.Empty;
384        if (w != null)
385          label = "=";
386        else if (v != null)
387          label = string.Format("{0:0.00}_{1}", v.Weight, v.VariableName);
388        else if (c != null)
389          label = string.Format("{0:0.00}", c.Value);
390        strBuilder.Append(label);
391        if (node.Parent != null && node != node.Parent.Subtrees.Last())
392          strBuilder.Append(" ");
393        //strBuilder.Append(")");
394      }
395      return strBuilder.ToString();
396    }
397  }
398}
Note: See TracBrowser for help on using the repository browser.