Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Analyzers/SymbolicExpressionTreeFrequentPatternsAnalyzer.cs @ 9082

Last change on this file since 9082 was 9082, checked in by bburlacu, 11 years ago

#1772: Renamed and refactored genealogy graph components. Added SymbolGraph and FPGraph (frequent pattern graph). Added evolvability and genetic operator average improvement analyzer.

File size: 21.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Analysis;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Operators;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34using HeuristicLab.Problems.DataAnalysis.Symbolic;
35
36namespace HeuristicLab.EvolutionaryTracking {
37  /// <summary>
38  /// An analyzer that looks at individual lineages in the genealogy graph
39  /// </summary>
40  [Item("SymbolicExpressionTreeFrequentPatternsAnalyzer", "An operator that tries to find frequent tree fragments in the population")]
41  [StorableClass]
42  public sealed class SymbolicExpressionTreeFrequentPatternsAnalyzer : SingleSuccessorOperator, IAnalyzer {
43    #region Parameter names
44    private const string UpdateIntervalParameterName = "UpdateInterval";
45    private const string UpdateCounterParameterName = "UpdateCounter";
46    private const string ResultsParameterName = "Results";
47    private const string GenerationsParameterName = "Generations";
48    private const string MaximumGenerationsParameterName = "MaximumGenerations";
49    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
50    private const string RandomParameterName = "Random";
51    private const string MaximumFragmentDepthParameterName = "MaximumFragmentDepth";
52    private const string MaximumFragmentLengthParameterName = "MaximumFragmentLength";
53    private const string NumberOfFragmentsToGenerateParameterName = "NumberOfFragmentsToGenerate";
54    private const string FrequentFragmentOccurrenceThresholdParameterName = "FrequentFragmentOccurrenceThreshold";
55    private const string PopulationGraphParameterName = "PopulationGraph";
56    private const string CommonFragmentsParameterName = "CommonFragments";
57    private const string EnforceGrammarArityParameterName = "EnforceGrammarArity";
58    #endregion
59
60    #region Parameter properties
61    public ILookupParameter<IRandom> RandomParameter {
62      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
63    }
64    public ValueParameter<IntValue> UpdateIntervalParameter {
65      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
66    }
67    public ValueParameter<IntValue> UpdateCounterParameter {
68      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
69    }
70    public LookupParameter<ResultCollection> ResultsParameter {
71      get { return (LookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
72    }
73    public LookupParameter<IntValue> GenerationsParameter {
74      get { return (LookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
75    }
76    public LookupParameter<IntValue> MaximumGenerationsParameter {
77      get { return (LookupParameter<IntValue>)Parameters[MaximumGenerationsParameterName]; }
78    }
79    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
80      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
81    }
82    public ValueParameter<IntValue> MaximumFragmentDepthParameter {
83      get { return (ValueParameter<IntValue>)Parameters[MaximumFragmentDepthParameterName]; }
84    }
85    public ValueParameter<IntValue> MaximumFragmentLengthParameter {
86      get { return (ValueParameter<IntValue>)Parameters[MaximumFragmentLengthParameterName]; }
87    }
88    public ValueParameter<IntValue> NumberOfFragmentsToGenerateParameter {
89      get { return (ValueParameter<IntValue>)Parameters[NumberOfFragmentsToGenerateParameterName]; }
90    }
91    public ValueParameter<DoubleValue> FrequentFragmentOccurrenceThresholdParameter {
92      get { return (ValueParameter<DoubleValue>)Parameters[FrequentFragmentOccurrenceThresholdParameterName]; }
93    }
94    public ValueParameter<BoolValue> EnforceGrammarArityParameter {
95      get { return (ValueParameter<BoolValue>)Parameters[EnforceGrammarArityParameterName]; }
96    }
97    #endregion
98
99    #region Properties
100    public bool EnabledByDefault {
101      get { return true; }
102    }
103    public IntValue UpdateInterval {
104      get { return UpdateIntervalParameter.Value; }
105    }
106    public IntValue UpdateCounter {
107      get { return UpdateCounterParameter.Value; }
108    }
109    public IntValue Generations {
110      get { return GenerationsParameter.ActualValue; }
111    }
112    public IntValue MaximumGenerations {
113      get { return MaximumGenerationsParameter.ActualValue; }
114    }
115    public ResultCollection Results {
116      get { return ResultsParameter.ActualValue; }
117    }
118    public IntValue MaximumFragmentDepth {
119      get { return MaximumFragmentDepthParameter.Value; }
120    }
121    public IntValue MaximumFragmentLength {
122      get { return MaximumFragmentLengthParameter.Value; }
123    }
124    public IntValue NumberOfFragmentsToGenerate {
125      get { return NumberOfFragmentsToGenerateParameter.Value; }
126    }
127    public DoubleValue FrequentFragmentOccurrenceThreshold {
128      get { return FrequentFragmentOccurrenceThresholdParameter.Value; }
129    }
130    public BoolValue EnforceGrammarArity {
131      get { return EnforceGrammarArityParameter.Value; }
132    }
133    #endregion
134
135    [StorableConstructor]
136    private SymbolicExpressionTreeFrequentPatternsAnalyzer(bool deserializing) : base(deserializing) { }
137    private SymbolicExpressionTreeFrequentPatternsAnalyzer(SymbolicExpressionTreeFrequentPatternsAnalyzer original, Cloner cloner) : base(original, cloner) { }
138    public override IDeepCloneable Clone(Cloner cloner) {
139      return new SymbolicExpressionTreeFrequentPatternsAnalyzer(this, cloner);
140    }
141    public SymbolicExpressionTreeFrequentPatternsAnalyzer() {
142      #region Add parameters
143      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random number generator to use."));
144      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
145      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
146      Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
147      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations so far"));
148      Parameters.Add(new ValueLookupParameter<IntValue>("MaximumGenerations", "The maximum number of generations which should be processed."));
149      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
150      Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentDepthParameterName, "The maximum allowed depth for generated fragments.", new IntValue(4)));
151      Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentLengthParameterName, "The maximum allowed length for generated fragments.", new IntValue(15)));
152      Parameters.Add(new ValueParameter<IntValue>(NumberOfFragmentsToGenerateParameterName, "The number of fragments to generate.", new IntValue(1000)));
153      Parameters.Add(new ValueParameter<DoubleValue>(FrequentFragmentOccurrenceThresholdParameterName, "Occurrence used to discriminate frequent fragments.", new DoubleValue(0.3)));
154      Parameters.Add(new ValueParameter<BoolValue>(EnforceGrammarArityParameterName, "Whether to use minimum arity as defined by the grammar", new BoolValue(true)));
155      #endregion
156      UpdateCounterParameter.Hidden = true;
157      UpdateIntervalParameter.Hidden = true;
158      MaximumFragmentDepthParameter.Hidden = false;
159      MaximumFragmentLengthParameter.Hidden = false;
160
161      //this.Successor = new SymbolicExpressionTreeGenealogyGraphBuilder();
162    }
163
164    #region AfterDeserialization
165    [StorableHook(HookType.AfterDeserialization)]
166    private void AfterDeserialization() {
167      if (!Parameters.ContainsKey(MaximumFragmentDepthParameterName)) {
168        Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentDepthParameterName, "The maximum allowed depth for generated fragments.", new IntValue(4)));
169      }
170      if (!Parameters.ContainsKey(MaximumFragmentLengthParameterName)) {
171        Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentLengthParameterName, "The maximum allowed length for generated fragments.", new IntValue(15)));
172      }
173      if (!Parameters.ContainsKey(NumberOfFragmentsToGenerateParameterName)) {
174        Parameters.Add(new ValueParameter<IntValue>(NumberOfFragmentsToGenerateParameterName, "The number of fragments to generate.", new IntValue(1000)));
175      }
176      if (!Parameters.ContainsKey(FrequentFragmentOccurrenceThresholdParameterName)) {
177        Parameters.Add(new ValueParameter<DoubleValue>(FrequentFragmentOccurrenceThresholdParameterName, "Occurrence used to discriminate frequent fragments.", new DoubleValue(0.3)));
178      }
179      if (!Parameters.ContainsKey(EnforceGrammarArityParameterName)) {
180        Parameters.Add(new ValueParameter<BoolValue>(EnforceGrammarArityParameterName, "Whether to use minimum arity as defined by the grammar", new BoolValue(true)));
181      }
182    }
183    #endregion
184
185    #region IStatefulItem members
186    public override void InitializeState() {
187      base.InitializeState();
188      UpdateCounter.Value = 0;
189    }
190
191    public override void ClearState() {
192      base.ClearState();
193      UpdateCounter.Value = 0;
194    }
195    #endregion
196
197    public override IOperation Apply() {
198      UpdateCounter.Value++;
199      if (UpdateCounter.Value == UpdateInterval.Value) {
200        if (!Results.ContainsKey("SymbolGraph")) return base.Apply();
201
202        UpdateCounter.Value = 0;
203
204        var trees = SymbolicExpressionTreeParameter.ActualValue.ToList();
205        var grammar = trees[0].Root.Grammar;
206
207        // bring trees to a canonical form to eliminate permuted fragments
208        var canonicalSorter = new SymbolicExpressionTreeCanonicalSorter();
209        foreach (var t in trees)
210          canonicalSorter.SortSubtrees(t);
211
212        var graph = Results["SymbolGraph"].Value as SymbolGraph;
213
214        var similarityComparer = new SymbolicExpressionTreeNodeSimilarityComparer { MatchVariableNames = true };
215        var fragmentSimilarityComparer = new SymbolicExpressionTreeFragmentSimilarityComparer { SimilarityComparer = similarityComparer };
216        // generate fragments out of the probability graph and match them against the population
217        var fragments = GenerateFragments(graph, grammar, NumberOfFragmentsToGenerate.Value, MaximumFragmentDepth.Value, MaximumFragmentLength.Value).ToList();
218        // subtree-sort fragments to bring them to a canonical form
219        // var canonicalSorter = new SymbolicExpressionTreeCanonicalSorter();
220        foreach (var fragment in fragments)
221          canonicalSorter.SortSubtrees(fragment.Root);
222
223        if (!Results.ContainsKey(CommonFragmentsParameterName)) {
224          Results.Add(new Result(CommonFragmentsParameterName, new ItemDictionary<IFragment, DoubleValue>()));
225        }
226
227        var mostCommonFragmentsResult = Results[CommonFragmentsParameterName].Value as ItemDictionary<IFragment, DoubleValue>; // should never be null
228        var mostCommonFragments = mostCommonFragmentsResult.Select(x => x.Key).ToList();
229        mostCommonFragments.AddRange(fragments);
230        mostCommonFragmentsResult.Clear();
231
232        mostCommonFragments = mostCommonFragments.Distinct(fragmentSimilarityComparer).ToList();
233
234        foreach (var fragment in mostCommonFragments) {
235          var frequency = trees.Count(x => x.Root.ContainsFragment(fragment, similarityComparer)) / (double)trees.Count;
236          if (frequency > 0) mostCommonFragmentsResult.Add(fragment, new DoubleValue(frequency));
237        }
238
239        // update fragment frequencies DataTable
240        if (!Results.ContainsKey("FrequentFragments")) {
241          var dt = new DataTable("Frequent Fragments");
242          Results.Add(new Result("FrequentFragments", dt));
243        }
244
245        var fragmentFrequenciesTable = Results["FrequentFragments"].Value as DataTable; // should never be null
246        foreach (var fragment in mostCommonFragmentsResult) {
247          var prefixString = fragment.Key.Root.ToPrefixString();
248          if (!fragmentFrequenciesTable.Rows.ContainsKey(prefixString)) {
249            if (fragment.Value.Value > FrequentFragmentOccurrenceThreshold.Value) {
250              fragmentFrequenciesTable.Rows.Add(new DataRow(prefixString) { VisualProperties = { StartIndexZero = true } });
251              var row = fragmentFrequenciesTable.Rows[prefixString];
252              if (!Results.ContainsKey(PopulationGraphParameterName)) {
253                row.Values.AddRange(Enumerable.Repeat(0.0, Generations.Value));
254              } else {
255                var populationGraph = Results[PopulationGraphParameterName].Value as SymbolicExpressionTreeGenealogyGraph;
256                var layers = populationGraph.Nodes.Where(n => n.Rank % 1 == 0).GroupBy(n => n.Rank).OrderBy(g => g.Key);
257                var f = fragment;
258                row.Values.AddRange(from g in layers
259                                    select g.Count(n => n.SymbolicExpressionTree.Root.ContainsFragment(f.Key, similarityComparer)) / (double)g.Count());
260              }
261              fragmentFrequenciesTable.Rows[prefixString].Values.Add(fragment.Value.Value);
262            }
263          } else {
264            fragmentFrequenciesTable.Rows[prefixString].Values.Add(fragment.Value.Value);
265          }
266        }
267        foreach (var row in fragmentFrequenciesTable.Rows.Where(row => row.Values.Count == Generations.Value))
268          row.Values.Add(0.0);
269
270        #region Write fragment information to file
271
272        //        using (var streamWriter = new StreamWriter(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), "CommonFragments.txt"))) {
273        //          streamWriter.WriteLine("Median edge weight: " + medianWeight);
274        //          streamWriter.WriteLine("Average edge weight: " + averageWeight);
275        //          streamWriter.WriteLine("Symbol frequencies: ");
276        //          double sum = graph.Nodes.Sum(node => node.Weight);
277        //          foreach (var symbolNode in graph.Nodes.OrderBy(node => node.Weight)) {
278        //            streamWriter.WriteLine("{0}\t{1}", symbolNode.Label, String.Format("{0:0.00}", symbolNode.Weight / sum));
279        //          }
280        //          streamWriter.WriteLine();
281        //          streamWriter.WriteLine("Fragment root symbol frequencies:");
282        //          foreach (var group in fragments.GroupBy(x => x.Root.Symbol.Name)) {
283        //            streamWriter.WriteLine("{0}\t{1}", group.Key, String.Format("{0:0.00}", (double)group.Count() / NumberOfFragmentsToGenerate.Value));
284        //          }
285        //          streamWriter.WriteLine();
286        //          foreach (var fragment in mostCommonFragments) {
287        //            streamWriter.WriteLine(fragment.Key.ToPrefixString());
288        //            streamWriter.WriteLine("Frequency in last generation: {0}, Contained in best individual: {1}",
289        //              fragment.Value, trees[0].ContainsFragment(fragment.Key, SimilarityLevel.High) ? "YES" : "NO");
290        //            SymbolicExpressionTreeMatching.RenderNode(streamWriter, fragment.Key, string.Empty);
291        //            if (Results.ContainsKey(PopulationGraphParameterName)) {
292        //              var populationGraph = (SymbolicExpressionTreeGenealogyGraph)Results[PopulationGraphParameterName].Value;
293        //              streamWriter.Write("Frequencies per generation: ");
294        //              var groups = populationGraph.Nodes.GroupBy(n => n.Rank).OrderBy(g => g.Key);
295        //              var frequencies = (from g in groups.Where(g => g.Key == (int)g.Key)
296        //                                 let freq = g.Count(n => n.SymbolicExpressionTree.ContainsFragment(fragment.Key, SimilarityLevel.High))
297        //                                 select freq);
298        //              foreach (var freq in frequencies)
299        //                streamWriter.Write(freq / count + " ");
300        //              streamWriter.WriteLine();
301        //            } else {
302        //              streamWriter.WriteLine("Frequecy in current population: {0}", fragment.Value);
303        //            }
304        //            streamWriter.WriteLine("\n");
305        //          }
306        //          streamWriter.WriteLine("Average fragment length: {0}", avgFragmentLength);
307        //          streamWriter.WriteLine("Average fragment depth: {0}", avgFragmentDepth);
308        //          streamWriter.WriteLine("Percentage of unique generated fragments: {0}", distinctFragments.Count() / (double)fragments.Count);
309        //        }
310
311        #endregion
312      }
313      return base.Apply();
314    }
315
316    private IEnumerable<IFragment> GenerateFragments(SymbolGraph graph, ISymbolicExpressionTreeGrammar grammar, int numFragments, int maxDepth, int maxLength) {
317      var fragments = new List<IFragment>();
318      var random = RandomParameter.ActualValue;
319      var count = SymbolicExpressionTreeParameter.ActualValue.Count();
320      var threshold = count * FrequentFragmentOccurrenceThreshold.Value;
321      var availableSymbols = graph.Nodes.Where(x => x.OutEdges != null && x.OutEdges.Count > 0 && x.Weight >= threshold).ToList();
322      var weights = availableSymbols.Select(x => x.Weight).ToList();
323
324      if (!(availableSymbols.Count == 0 || weights.Count == 0))
325        for (int i = 0; i != numFragments; ++i) {
326          var symbol = availableSymbols.SelectRandom(weights, random);
327          var fragment = CreateFragment(symbol, random, grammar, threshold, maxDepth, maxLength, EnforceGrammarArity.Value);
328          fragments.Add(new Fragment(fragment));
329        }
330      return fragments;
331    }
332
333    private static ISymbolicExpressionTreeNode CreateFragment(SymbolNode symbolNode, IRandom random, ISymbolicExpressionTreeGrammar grammar, double threshold, int depthLimit, int lengthLimit, bool enforceGrammarArity = true) {
334      var node = symbolNode.Symbol.CreateTreeNode();
335      if (node.HasLocalParameters) node.ResetLocalParameters(random);
336      if (node is VariableTreeNode) (node as VariableTreeNode).VariableName = symbolNode.Label;
337
338      if (symbolNode.OutEdges == null || symbolNode.OutEdges.Count == 0 || depthLimit < 0)
339        return node; // node is a leaf so we just return it
340
341      int maxCount = grammar.GetMaximumSubtreeCount(node.Symbol);
342      if (maxCount > 0) {
343        int minCount = grammar.GetMinimumSubtreeCount(node.Symbol);
344        var arity = random.Next(enforceGrammarArity ? minCount : 1, maxCount + 1);
345        var possibleChildConnections = new List<Arc>();
346
347        if (depthLimit <= 2 || lengthLimit <= maxCount) { // if we are near the depth limit then we want to add leafs on the next level
348          possibleChildConnections.AddRange(from e in symbolNode.OutEdges
349                                            let n = (SymbolNode)e.Target
350                                            where grammar.GetMaximumSubtreeCount(n.Symbol) == 0
351                                            where n.Weight >= threshold
352                                            select e);
353        }
354        // if there are no available connections towards leaf nodes (in case depthLimit <= 2)
355        // or in case the depthLimit has not been reached yet
356        if (possibleChildConnections.Count == 0)
357          possibleChildConnections.AddRange(symbolNode.OutEdges.Where(a => ((SymbolNode)a.Target).Weight >= threshold)); // this will not be empty since we check above if the OutEdges are null
358        var weights = new double[possibleChildConnections.Count];
359
360        for (int i = 0; i != arity; ++i) {
361          // adjust weights according to the probability a child will find itself on a certain argument position
362          for (int j = 0; j != possibleChildConnections.Count; ++j) {
363            var childSymbolNode = possibleChildConnections[j].Target as SymbolNode;
364            double sumPos = childSymbolNode.Positions.Sum(x => x.Value);
365            int v; childSymbolNode.Positions.TryGetValue(i, out v);
366            weights[j] = possibleChildConnections[j].Weight * v / sumPos;
367          }
368
369          if (weights.Sum().IsAlmost(0.0)) continue;
370
371          var selectedConnection = possibleChildConnections.SelectRandom(weights, random);
372          var selectedChildSymbolNode = selectedConnection.Target as SymbolNode;
373          var child = CreateFragment(selectedChildSymbolNode, random, grammar, threshold, depthLimit - 1, --lengthLimit);
374          if (child == null) throw new Exception("Child cannot be null.");
375          node.AddSubtree(child);
376        }
377      }
378      return node;
379    }
380  }
381}
Note: See TracBrowser for help on using the repository browser.