Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Analyzers/SymbolicExpressionTreeFrequentPatternsAnalyzer.cs @ 9238

Last change on this file since 9238 was 9238, checked in by bburlacu, 11 years ago

#1772: Added base class for the fragment analyzers. Improved analyzers, added SymbolicExpressionTreeRelativeFragmentDepthAnalyzer. Added LineageExplorer.

File size: 21.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Analysis;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Operators;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34using HeuristicLab.Problems.DataAnalysis.Symbolic;
35
36namespace HeuristicLab.EvolutionaryTracking {
37  /// <summary>
38  /// An analyzer that looks at individual lineages in the genealogy graph
39  /// </summary>
40  [Item("SymbolicExpressionTreeFrequentPatternsAnalyzer", "An operator that tries to find frequent tree fragments in the population")]
41  [StorableClass]
42  public sealed class SymbolicExpressionTreeFrequentPatternsAnalyzer : SingleSuccessorOperator, IAnalyzer {
43    #region Parameter names
44    private const string UpdateIntervalParameterName = "UpdateInterval";
45    private const string UpdateCounterParameterName = "UpdateCounter";
46    private const string ResultsParameterName = "Results";
47    private const string GenerationsParameterName = "Generations";
48    private const string MaximumGenerationsParameterName = "MaximumGenerations";
49    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
50    private const string RandomParameterName = "Random";
51    private const string MaximumFragmentDepthParameterName = "MaximumFragmentDepth";
52    private const string MaximumFragmentLengthParameterName = "MaximumFragmentLength";
53    private const string NumberOfFragmentsToGenerateParameterName = "NumberOfFragmentsToGenerate";
54    private const string FrequentFragmentOccurrenceThresholdParameterName = "FrequentFragmentOccurrenceThreshold";
55    private const string PopulationGraphParameterName = "PopulationGraph";
56    private const string CommonFragmentsParameterName = "CommonFragments";
57    private const string EnforceGrammarArityParameterName = "EnforceGrammarArity";
58    #endregion
59
60    #region Parameter properties
61    public ILookupParameter<IRandom> RandomParameter {
62      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
63    }
64    public ValueParameter<IntValue> UpdateIntervalParameter {
65      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
66    }
67    public ValueParameter<IntValue> UpdateCounterParameter {
68      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
69    }
70    public LookupParameter<ResultCollection> ResultsParameter {
71      get { return (LookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
72    }
73    public LookupParameter<IntValue> GenerationsParameter {
74      get { return (LookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
75    }
76    public LookupParameter<IntValue> MaximumGenerationsParameter {
77      get { return (LookupParameter<IntValue>)Parameters[MaximumGenerationsParameterName]; }
78    }
79    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
80      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
81    }
82    public ValueParameter<IntValue> MaximumFragmentDepthParameter {
83      get { return (ValueParameter<IntValue>)Parameters[MaximumFragmentDepthParameterName]; }
84    }
85    public ValueParameter<IntValue> MaximumFragmentLengthParameter {
86      get { return (ValueParameter<IntValue>)Parameters[MaximumFragmentLengthParameterName]; }
87    }
88    public ValueParameter<IntValue> NumberOfFragmentsToGenerateParameter {
89      get { return (ValueParameter<IntValue>)Parameters[NumberOfFragmentsToGenerateParameterName]; }
90    }
91    public ValueParameter<DoubleValue> FrequentFragmentOccurrenceThresholdParameter {
92      get { return (ValueParameter<DoubleValue>)Parameters[FrequentFragmentOccurrenceThresholdParameterName]; }
93    }
94    public ValueParameter<BoolValue> EnforceGrammarArityParameter {
95      get { return (ValueParameter<BoolValue>)Parameters[EnforceGrammarArityParameterName]; }
96    }
97    #endregion
98    #region Properties
99    public bool EnabledByDefault {
100      get { return true; }
101    }
102    public IntValue UpdateInterval {
103      get { return UpdateIntervalParameter.Value; }
104    }
105    public IntValue UpdateCounter {
106      get { return UpdateCounterParameter.Value; }
107    }
108    public IntValue Generations {
109      get { return GenerationsParameter.ActualValue; }
110    }
111    public IntValue MaximumGenerations {
112      get { return MaximumGenerationsParameter.ActualValue; }
113    }
114    public ResultCollection Results {
115      get { return ResultsParameter.ActualValue; }
116    }
117    public IntValue MaximumFragmentDepth {
118      get { return MaximumFragmentDepthParameter.Value; }
119    }
120    public IntValue MaximumFragmentLength {
121      get { return MaximumFragmentLengthParameter.Value; }
122    }
123    public IntValue NumberOfFragmentsToGenerate {
124      get { return NumberOfFragmentsToGenerateParameter.Value; }
125    }
126    public DoubleValue FrequentFragmentOccurrenceThreshold {
127      get { return FrequentFragmentOccurrenceThresholdParameter.Value; }
128    }
129    public BoolValue EnforceGrammarArity {
130      get { return EnforceGrammarArityParameter.Value; }
131    }
132    #endregion
133
134    [StorableConstructor]
135    private SymbolicExpressionTreeFrequentPatternsAnalyzer(bool deserializing) : base(deserializing) { }
136    private SymbolicExpressionTreeFrequentPatternsAnalyzer(SymbolicExpressionTreeFrequentPatternsAnalyzer original, Cloner cloner) : base(original, cloner) { }
137    public override IDeepCloneable Clone(Cloner cloner) {
138      return new SymbolicExpressionTreeFrequentPatternsAnalyzer(this, cloner);
139    }
140    public SymbolicExpressionTreeFrequentPatternsAnalyzer() {
141      #region Add parameters
142      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random number generator to use."));
143      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
144      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
145      Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
146      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations so far"));
147      Parameters.Add(new ValueLookupParameter<IntValue>("MaximumGenerations", "The maximum number of generations which should be processed."));
148      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
149      Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentDepthParameterName, "The maximum allowed depth for generated fragments.", new IntValue(4)));
150      Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentLengthParameterName, "The maximum allowed length for generated fragments.", new IntValue(15)));
151      Parameters.Add(new ValueParameter<IntValue>(NumberOfFragmentsToGenerateParameterName, "The number of fragments to generate.", new IntValue(1000)));
152      Parameters.Add(new ValueParameter<DoubleValue>(FrequentFragmentOccurrenceThresholdParameterName, "Occurrence used to discriminate frequent fragments.", new DoubleValue(0.3)));
153      Parameters.Add(new ValueParameter<BoolValue>(EnforceGrammarArityParameterName, "Whether to use minimum arity as defined by the grammar", new BoolValue(true)));
154      #endregion
155      UpdateCounterParameter.Hidden = true;
156      UpdateIntervalParameter.Hidden = true;
157      MaximumFragmentDepthParameter.Hidden = false;
158      MaximumFragmentLengthParameter.Hidden = false;
159
160      //this.Successor = new SymbolicExpressionTreeGenealogyGraphBuilder();
161    }
162
163    #region AfterDeserialization
164    [StorableHook(HookType.AfterDeserialization)]
165    private void AfterDeserialization() {
166      if (!Parameters.ContainsKey(MaximumFragmentDepthParameterName)) {
167        Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentDepthParameterName, "The maximum allowed depth for generated fragments.", new IntValue(4)));
168      }
169      if (!Parameters.ContainsKey(MaximumFragmentLengthParameterName)) {
170        Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentLengthParameterName, "The maximum allowed length for generated fragments.", new IntValue(15)));
171      }
172      if (!Parameters.ContainsKey(NumberOfFragmentsToGenerateParameterName)) {
173        Parameters.Add(new ValueParameter<IntValue>(NumberOfFragmentsToGenerateParameterName, "The number of fragments to generate.", new IntValue(1000)));
174      }
175      if (!Parameters.ContainsKey(FrequentFragmentOccurrenceThresholdParameterName)) {
176        Parameters.Add(new ValueParameter<DoubleValue>(FrequentFragmentOccurrenceThresholdParameterName, "Occurrence used to discriminate frequent fragments.", new DoubleValue(0.3)));
177      }
178      if (!Parameters.ContainsKey(EnforceGrammarArityParameterName)) {
179        Parameters.Add(new ValueParameter<BoolValue>(EnforceGrammarArityParameterName, "Whether to use minimum arity as defined by the grammar", new BoolValue(true)));
180      }
181    }
182    #endregion
183
184    #region IStatefulItem members
185    public override void InitializeState() {
186      base.InitializeState();
187      UpdateCounter.Value = 0;
188    }
189
190    public override void ClearState() {
191      base.ClearState();
192      UpdateCounter.Value = 0;
193    }
194    #endregion
195
196    public override IOperation Apply() {
197      UpdateCounter.Value++;
198      if (UpdateCounter.Value == UpdateInterval.Value) {
199        if (!Results.ContainsKey("SymbolGraph")) return base.Apply();
200
201        UpdateCounter.Value = 0;
202
203        var trees = SymbolicExpressionTreeParameter.ActualValue.ToList();
204        var grammar = trees[0].Root.Grammar;
205
206        // bring trees to a canonical form to eliminate permuted fragments
207        var canonicalSorter = new SymbolicExpressionTreeCanonicalSorter();
208        foreach (var t in trees)
209          canonicalSorter.SortSubtrees(t);
210
211        var graph = Results["SymbolGraph"].Value as SymbolGraph;
212
213        var similarityComparer = new SymbolicExpressionTreeNodeSimilarityComparer { MatchVariableNames = true };
214        var fragmentSimilarityComparer = new SymbolicExpressionTreeFragmentSimilarityComparer { SimilarityComparer = similarityComparer };
215        // generate fragments out of the probability graph and match them against the population
216        var fragments = GenerateFragments(graph, grammar, NumberOfFragmentsToGenerate.Value, MaximumFragmentDepth.Value, MaximumFragmentLength.Value).ToList();
217        // subtree-sort fragments to bring them to a canonical form
218        // var canonicalSorter = new SymbolicExpressionTreeCanonicalSorter();
219        foreach (var fragment in fragments)
220          canonicalSorter.SortSubtrees(fragment.Root);
221
222        if (!Results.ContainsKey(CommonFragmentsParameterName)) {
223          Results.Add(new Result(CommonFragmentsParameterName, new ItemDictionary<IFragment, DoubleValue>()));
224        }
225
226        var mostCommonFragmentsResult = Results[CommonFragmentsParameterName].Value as ItemDictionary<IFragment, DoubleValue>; // should never be null
227        var mostCommonFragments = mostCommonFragmentsResult.Select(x => x.Key).ToList();
228        mostCommonFragments.AddRange(fragments);
229        mostCommonFragmentsResult.Clear();
230
231        mostCommonFragments = mostCommonFragments.Distinct(fragmentSimilarityComparer).ToList();
232
233        foreach (var fragment in mostCommonFragments) {
234          var frequency = trees.Count(x => x.Root.ContainsFragment(fragment, similarityComparer)) / (double)trees.Count;
235          if (frequency > 0) mostCommonFragmentsResult.Add(fragment, new DoubleValue(frequency));
236        }
237
238        // update fragment frequencies DataTable
239        if (!Results.ContainsKey("FrequentFragments")) {
240          var dt = new DataTable("Frequent Fragments");
241          Results.Add(new Result("FrequentFragments", dt));
242        }
243
244        var fragmentFrequenciesTable = Results["FrequentFragments"].Value as DataTable; // should never be null
245        foreach (var fragment in mostCommonFragmentsResult) {
246          var prefixString = fragment.Key.Root.ToPrefixString();
247          if (!fragmentFrequenciesTable.Rows.ContainsKey(prefixString)) {
248            if (fragment.Value.Value > FrequentFragmentOccurrenceThreshold.Value) {
249              fragmentFrequenciesTable.Rows.Add(new DataRow(prefixString) { VisualProperties = { StartIndexZero = true } });
250              var row = fragmentFrequenciesTable.Rows[prefixString];
251              if (!Results.ContainsKey(PopulationGraphParameterName)) {
252                row.Values.AddRange(Enumerable.Repeat(0.0, Generations.Value));
253              } else {
254                var populationGraph = Results[PopulationGraphParameterName].Value as SymbolicExpressionTreeGenealogyGraph;
255                var layers = populationGraph.Nodes.Where(n => n.Rank % 1 == 0).GroupBy(n => n.Rank).OrderBy(g => g.Key);
256                var f = fragment;
257                row.Values.AddRange(from g in layers
258                                    select g.Count(n => n.SymbolicExpressionTree.Root.ContainsFragment(f.Key, similarityComparer)) / (double)g.Count());
259              }
260              fragmentFrequenciesTable.Rows[prefixString].Values.Add(fragment.Value.Value);
261            }
262          } else {
263            fragmentFrequenciesTable.Rows[prefixString].Values.Add(fragment.Value.Value);
264          }
265        }
266        foreach (var row in fragmentFrequenciesTable.Rows.Where(row => row.Values.Count == Generations.Value))
267          row.Values.Add(0.0);
268
269        #region Write fragment information to file
270
271        //        using (var streamWriter = new StreamWriter(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), "CommonFragments.txt"))) {
272        //          streamWriter.WriteLine("Median edge weight: " + medianWeight);
273        //          streamWriter.WriteLine("Average edge weight: " + averageWeight);
274        //          streamWriter.WriteLine("Symbol frequencies: ");
275        //          double sum = graph.Nodes.Sum(node => node.Weight);
276        //          foreach (var symbolNode in graph.Nodes.OrderBy(node => node.Weight)) {
277        //            streamWriter.WriteLine("{0}\t{1}", symbolNode.Label, String.Format("{0:0.00}", symbolNode.Weight / sum));
278        //          }
279        //          streamWriter.WriteLine();
280        //          streamWriter.WriteLine("Fragment root symbol frequencies:");
281        //          foreach (var group in fragments.GroupBy(x => x.Root.Symbol.Name)) {
282        //            streamWriter.WriteLine("{0}\t{1}", group.Key, String.Format("{0:0.00}", (double)group.Count() / NumberOfFragmentsToGenerate.Value));
283        //          }
284        //          streamWriter.WriteLine();
285        //          foreach (var fragment in mostCommonFragments) {
286        //            streamWriter.WriteLine(fragment.Key.ToPrefixString());
287        //            streamWriter.WriteLine("Frequency in last generation: {0}, Contained in best individual: {1}",
288        //              fragment.Value, trees[0].ContainsFragment(fragment.Key, SimilarityLevel.High) ? "YES" : "NO");
289        //            SymbolicExpressionTreeMatching.RenderNode(streamWriter, fragment.Key, string.Empty);
290        //            if (Results.ContainsKey(PopulationGraphParameterName)) {
291        //              var populationGraph = (SymbolicExpressionTreeGenealogyGraph)Results[PopulationGraphParameterName].Value;
292        //              streamWriter.Write("Frequencies per generation: ");
293        //              var groups = populationGraph.Nodes.GroupBy(n => n.Rank).OrderBy(g => g.Key);
294        //              var frequencies = (from g in groups.Where(g => g.Key == (int)g.Key)
295        //                                 let freq = g.Count(n => n.SymbolicExpressionTree.ContainsFragment(fragment.Key, SimilarityLevel.High))
296        //                                 select freq);
297        //              foreach (var freq in frequencies)
298        //                streamWriter.Write(freq / count + " ");
299        //              streamWriter.WriteLine();
300        //            } else {
301        //              streamWriter.WriteLine("Frequecy in current population: {0}", fragment.Value);
302        //            }
303        //            streamWriter.WriteLine("\n");
304        //          }
305        //          streamWriter.WriteLine("Average fragment length: {0}", avgFragmentLength);
306        //          streamWriter.WriteLine("Average fragment depth: {0}", avgFragmentDepth);
307        //          streamWriter.WriteLine("Percentage of unique generated fragments: {0}", distinctFragments.Count() / (double)fragments.Count);
308        //        }
309
310        #endregion
311      }
312      return base.Apply();
313    }
314
315    private IEnumerable<IFragment> GenerateFragments(SymbolGraph graph, ISymbolicExpressionTreeGrammar grammar, int numFragments, int maxDepth, int maxLength) {
316      var fragments = new List<IFragment>();
317      var random = RandomParameter.ActualValue;
318      var count = SymbolicExpressionTreeParameter.ActualValue.Count();
319      var threshold = count * FrequentFragmentOccurrenceThreshold.Value;
320      var availableSymbols = graph.Nodes.Where(x => x.OutEdges != null && x.OutEdges.Count > 0 && x.Weight >= threshold).ToList();
321      var weights = availableSymbols.Select(x => x.Weight).ToList();
322
323      if (!(availableSymbols.Count == 0 || weights.Count == 0))
324        for (int i = 0; i != numFragments; ++i) {
325          var symbol = availableSymbols.SelectRandom(weights, random);
326          var fragment = CreateFragment(symbol, random, grammar, threshold, maxDepth, maxLength, EnforceGrammarArity.Value);
327          fragments.Add(new Fragment(fragment));
328        }
329      return fragments;
330    }
331
332    private static ISymbolicExpressionTreeNode CreateFragment(SymbolNode symbolNode, IRandom random, ISymbolicExpressionTreeGrammar grammar, double threshold, int depthLimit, int lengthLimit, bool enforceGrammarArity = true) {
333      var node = symbolNode.Symbol.CreateTreeNode();
334      if (node.HasLocalParameters) node.ResetLocalParameters(random);
335      if (node is VariableTreeNode) (node as VariableTreeNode).VariableName = symbolNode.Label;
336
337      if (symbolNode.OutEdges == null || symbolNode.OutEdges.Count == 0 || depthLimit < 0)
338        return node; // node is a leaf so we just return it
339
340      int maxCount = grammar.GetMaximumSubtreeCount(node.Symbol);
341      if (maxCount > 0) {
342        int minCount = grammar.GetMinimumSubtreeCount(node.Symbol);
343        var arity = random.Next(enforceGrammarArity ? minCount : 1, maxCount + 1);
344        var possibleChildConnections = new List<Arc>();
345
346        if (depthLimit <= 2 || lengthLimit <= maxCount) { // if we are near the depth limit then we want to add leafs on the next level
347          possibleChildConnections.AddRange(from e in symbolNode.OutEdges
348                                            let n = (SymbolNode)e.Target
349                                            where grammar.GetMaximumSubtreeCount(n.Symbol) == 0
350                                            where n.Weight >= threshold
351                                            select e);
352        }
353        // if there are no available connections towards leaf nodes (in case depthLimit <= 2)
354        // or in case the depthLimit has not been reached yet
355        if (possibleChildConnections.Count == 0)
356          possibleChildConnections.AddRange(symbolNode.OutEdges.Where(a => ((SymbolNode)a.Target).Weight >= threshold)); // this will not be empty since we check above if the OutEdges are null
357        var weights = new double[possibleChildConnections.Count];
358
359        for (int i = 0; i != arity; ++i) {
360          // adjust weights according to the probability a child will find itself on a certain argument position
361          for (int j = 0; j != possibleChildConnections.Count; ++j) {
362            var childSymbolNode = (SymbolNode)possibleChildConnections[j].Target;
363            double sumPos = childSymbolNode.Positions.Sum(x => x.Value);
364            int v; childSymbolNode.Positions.TryGetValue(i, out v);
365            weights[j] = possibleChildConnections[j].Weight * v / sumPos;
366          }
367
368          if (weights.Sum().IsAlmost(0.0)) continue;
369
370          var selectedConnection = possibleChildConnections.SelectRandom(weights, random);
371          var selectedChildSymbolNode = selectedConnection.Target as SymbolNode;
372          var child = CreateFragment(selectedChildSymbolNode, random, grammar, threshold, depthLimit - 1, --lengthLimit);
373          if (child == null) throw new Exception("Child cannot be null.");
374          node.AddSubtree(child);
375        }
376      }
377      return node;
378    }
379  }
380}
Note: See TracBrowser for help on using the repository browser.