Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Analyzers/SymbolicExpressionTreeFrequentPatternsAnalyzer.cs @ 9963

Last change on this file since 9963 was 9835, checked in by bburlacu, 11 years ago

#1772: Merged remaining trunk changes into the EvolutionaryTracking branch.

File size: 21.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Analysis;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
30using HeuristicLab.Operators;
31using HeuristicLab.Optimization;
32using HeuristicLab.Parameters;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34using HeuristicLab.Problems.DataAnalysis.Symbolic;
35
36namespace HeuristicLab.EvolutionaryTracking {
37  /// <summary>
38  /// An analyzer that looks at individual lineages in the genealogy graph
39  /// </summary>
40  [Item("SymbolicExpressionTreeFrequentPatternsAnalyzer", "An operator that tries to find frequent tree fragments in the population")]
41  [StorableClass]
42  public sealed class SymbolicExpressionTreeFrequentPatternsAnalyzer : SingleSuccessorOperator, IAnalyzer {
43    #region Parameter names
44    private const string UpdateIntervalParameterName = "UpdateInterval";
45    private const string UpdateCounterParameterName = "UpdateCounter";
46    private const string ResultsParameterName = "Results";
47    private const string GenerationsParameterName = "Generations";
48    private const string MaximumGenerationsParameterName = "MaximumGenerations";
49    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
50    private const string RandomParameterName = "Random";
51    private const string MaximumFragmentDepthParameterName = "MaximumFragmentDepth";
52    private const string MaximumFragmentLengthParameterName = "MaximumFragmentLength";
53    private const string NumberOfFragmentsToGenerateParameterName = "NumberOfFragmentsToGenerate";
54    private const string FrequentFragmentOccurrenceThresholdParameterName = "FrequentFragmentOccurrenceThreshold";
55    private const string PopulationGraphParameterName = "PopulationGraph";
56    private const string CommonFragmentsParameterName = "CommonFragments";
57    private const string EnforceGrammarArityParameterName = "EnforceGrammarArity";
58    #endregion
59
60    #region Parameter properties
61    public ILookupParameter<IRandom> RandomParameter {
62      get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
63    }
64    public ValueParameter<IntValue> UpdateIntervalParameter {
65      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
66    }
67    public ValueParameter<IntValue> UpdateCounterParameter {
68      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
69    }
70    public LookupParameter<ResultCollection> ResultsParameter {
71      get { return (LookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
72    }
73    public LookupParameter<IntValue> GenerationsParameter {
74      get { return (LookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
75    }
76    public LookupParameter<IntValue> MaximumGenerationsParameter {
77      get { return (LookupParameter<IntValue>)Parameters[MaximumGenerationsParameterName]; }
78    }
79    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
80      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
81    }
82    public ValueParameter<IntValue> MaximumFragmentDepthParameter {
83      get { return (ValueParameter<IntValue>)Parameters[MaximumFragmentDepthParameterName]; }
84    }
85    public ValueParameter<IntValue> MaximumFragmentLengthParameter {
86      get { return (ValueParameter<IntValue>)Parameters[MaximumFragmentLengthParameterName]; }
87    }
88    public ValueParameter<IntValue> NumberOfFragmentsToGenerateParameter {
89      get { return (ValueParameter<IntValue>)Parameters[NumberOfFragmentsToGenerateParameterName]; }
90    }
91    public ValueParameter<DoubleValue> FrequentFragmentOccurrenceThresholdParameter {
92      get { return (ValueParameter<DoubleValue>)Parameters[FrequentFragmentOccurrenceThresholdParameterName]; }
93    }
94    public ValueParameter<BoolValue> EnforceGrammarArityParameter {
95      get { return (ValueParameter<BoolValue>)Parameters[EnforceGrammarArityParameterName]; }
96    }
97    #endregion
98    #region Properties
99    public bool EnabledByDefault {
100      get { return true; }
101    }
102    public IntValue UpdateInterval {
103      get { return UpdateIntervalParameter.Value; }
104    }
105    public IntValue UpdateCounter {
106      get { return UpdateCounterParameter.Value; }
107    }
108    public IntValue Generations {
109      get { return GenerationsParameter.ActualValue; }
110    }
111    public IntValue MaximumGenerations {
112      get { return MaximumGenerationsParameter.ActualValue; }
113    }
114    public ResultCollection Results {
115      get { return ResultsParameter.ActualValue; }
116    }
117    public IntValue MaximumFragmentDepth {
118      get { return MaximumFragmentDepthParameter.Value; }
119    }
120    public IntValue MaximumFragmentLength {
121      get { return MaximumFragmentLengthParameter.Value; }
122    }
123    public IntValue NumberOfFragmentsToGenerate {
124      get { return NumberOfFragmentsToGenerateParameter.Value; }
125    }
126    public DoubleValue FrequentFragmentOccurrenceThreshold {
127      get { return FrequentFragmentOccurrenceThresholdParameter.Value; }
128    }
129    public BoolValue EnforceGrammarArity {
130      get { return EnforceGrammarArityParameter.Value; }
131    }
132    #endregion
133
134    [StorableConstructor]
135    private SymbolicExpressionTreeFrequentPatternsAnalyzer(bool deserializing) : base(deserializing) { }
136    private SymbolicExpressionTreeFrequentPatternsAnalyzer(SymbolicExpressionTreeFrequentPatternsAnalyzer original, Cloner cloner) : base(original, cloner) { }
137    public override IDeepCloneable Clone(Cloner cloner) {
138      return new SymbolicExpressionTreeFrequentPatternsAnalyzer(this, cloner);
139    }
140    public SymbolicExpressionTreeFrequentPatternsAnalyzer() {
141      #region Add parameters
142      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random number generator to use."));
143      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
144      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
145      Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
146      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations so far"));
147      Parameters.Add(new ValueLookupParameter<IntValue>("MaximumGenerations", "The maximum number of generations which should be processed."));
148      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
149      Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentDepthParameterName, "The maximum allowed depth for generated fragments.", new IntValue(4)));
150      Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentLengthParameterName, "The maximum allowed length for generated fragments.", new IntValue(15)));
151      Parameters.Add(new ValueParameter<IntValue>(NumberOfFragmentsToGenerateParameterName, "The number of fragments to generate.", new IntValue(1000)));
152      Parameters.Add(new ValueParameter<DoubleValue>(FrequentFragmentOccurrenceThresholdParameterName, "Occurrence used to discriminate frequent fragments.", new DoubleValue(0.3)));
153      Parameters.Add(new ValueParameter<BoolValue>(EnforceGrammarArityParameterName, "Whether to use minimum arity as defined by the grammar", new BoolValue(true)));
154      #endregion
155      UpdateCounterParameter.Hidden = true;
156      UpdateIntervalParameter.Hidden = true;
157      MaximumFragmentDepthParameter.Hidden = false;
158      MaximumFragmentLengthParameter.Hidden = false;
159
160      //this.Successor = new SymbolicExpressionTreeGenealogyGraphBuilder();
161    }
162
163    #region AfterDeserialization
164    [StorableHook(HookType.AfterDeserialization)]
165    private void AfterDeserialization() {
166      if (!Parameters.ContainsKey(MaximumFragmentDepthParameterName)) {
167        Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentDepthParameterName, "The maximum allowed depth for generated fragments.", new IntValue(4)));
168      }
169      if (!Parameters.ContainsKey(MaximumFragmentLengthParameterName)) {
170        Parameters.Add(new ValueParameter<IntValue>(MaximumFragmentLengthParameterName, "The maximum allowed length for generated fragments.", new IntValue(15)));
171      }
172      if (!Parameters.ContainsKey(NumberOfFragmentsToGenerateParameterName)) {
173        Parameters.Add(new ValueParameter<IntValue>(NumberOfFragmentsToGenerateParameterName, "The number of fragments to generate.", new IntValue(1000)));
174      }
175      if (!Parameters.ContainsKey(FrequentFragmentOccurrenceThresholdParameterName)) {
176        Parameters.Add(new ValueParameter<DoubleValue>(FrequentFragmentOccurrenceThresholdParameterName, "Occurrence used to discriminate frequent fragments.", new DoubleValue(0.3)));
177      }
178      if (!Parameters.ContainsKey(EnforceGrammarArityParameterName)) {
179        Parameters.Add(new ValueParameter<BoolValue>(EnforceGrammarArityParameterName, "Whether to use minimum arity as defined by the grammar", new BoolValue(true)));
180      }
181    }
182    #endregion
183
184    #region IStatefulItem members
185    public override void InitializeState() {
186      base.InitializeState();
187      UpdateCounter.Value = 0;
188    }
189
190    public override void ClearState() {
191      base.ClearState();
192      UpdateCounter.Value = 0;
193    }
194    #endregion
195
196    public override IOperation Apply() {
197      UpdateCounter.Value++;
198      if (UpdateCounter.Value == UpdateInterval.Value) {
199        if (!Results.ContainsKey("SymbolGraph")) return base.Apply();
200
201        UpdateCounter.Value = 0;
202
203        var trees = SymbolicExpressionTreeParameter.ActualValue.ToList();
204        var grammar = trees[0].Root.Grammar;
205
206        var graph = Results["SymbolGraph"].Value as SymbolGraph;
207
208        var similarityComparer = new SymbolicExpressionTreeNodeSimilarityComparer { MatchVariableNames = true };
209        var fragmentSimilarityComparer = new SymbolicExpressionTreeFragmentSimilarityComparer { SimilarityComparer = similarityComparer };
210        // generate fragments out of the probability graph and match them against the population
211        var fragments = GenerateFragments(graph, grammar, NumberOfFragmentsToGenerate.Value, MaximumFragmentDepth.Value, MaximumFragmentLength.Value).ToList();
212        // subtree-sort fragments to bring them to a canonical form
213        // var canonicalSorter = new SymbolicExpressionTreeCanonicalSorter();
214
215        if (!Results.ContainsKey(CommonFragmentsParameterName)) {
216          Results.Add(new Result(CommonFragmentsParameterName, new ItemDictionary<IFragment, DoubleValue>()));
217        }
218
219        var mostCommonFragmentsResult = Results[CommonFragmentsParameterName].Value as ItemDictionary<IFragment, DoubleValue>; // should never be null
220        var mostCommonFragments = mostCommonFragmentsResult.Select(x => x.Key).ToList();
221        mostCommonFragments.AddRange(fragments);
222        mostCommonFragmentsResult.Clear();
223
224        mostCommonFragments = mostCommonFragments.Distinct(fragmentSimilarityComparer).ToList();
225
226        foreach (var fragment in mostCommonFragments) {
227          var frequency = trees.Count(x => x.Root.ContainsFragment(fragment, similarityComparer)) / (double)trees.Count;
228          if (frequency > 0) mostCommonFragmentsResult.Add(fragment, new DoubleValue(frequency));
229        }
230
231        // update fragment frequencies DataTable
232        if (!Results.ContainsKey("FrequentFragments")) {
233          var dt = new DataTable("Frequent Fragments");
234          Results.Add(new Result("FrequentFragments", dt));
235        }
236
237        var fragmentFrequenciesTable = Results["FrequentFragments"].Value as DataTable; // should never be null
238        foreach (var fragment in mostCommonFragmentsResult) {
239          var prefixString = fragment.Key.Root.ToPrefixString();
240          if (!fragmentFrequenciesTable.Rows.ContainsKey(prefixString)) {
241            if (fragment.Value.Value > FrequentFragmentOccurrenceThreshold.Value) {
242              fragmentFrequenciesTable.Rows.Add(new DataRow(prefixString) { VisualProperties = { StartIndexZero = true } });
243              var row = fragmentFrequenciesTable.Rows[prefixString];
244              if (!Results.ContainsKey(PopulationGraphParameterName)) {
245                row.Values.AddRange(Enumerable.Repeat(0.0, Generations.Value));
246              } else {
247                var populationGraph = Results[PopulationGraphParameterName].Value as SymbolicExpressionTreeGenealogyGraph;
248                var layers = populationGraph.Nodes.Where(n => n.Rank % 1 == 0).GroupBy(n => n.Rank).OrderBy(g => g.Key);
249                var f = fragment;
250                row.Values.AddRange(from g in layers
251                                    select g.Count(n => n.SymbolicExpressionTree.Root.ContainsFragment(f.Key, similarityComparer)) / (double)g.Count());
252              }
253              fragmentFrequenciesTable.Rows[prefixString].Values.Add(fragment.Value.Value);
254            }
255          } else {
256            fragmentFrequenciesTable.Rows[prefixString].Values.Add(fragment.Value.Value);
257          }
258        }
259        foreach (var row in fragmentFrequenciesTable.Rows.Where(row => row.Values.Count == Generations.Value))
260          row.Values.Add(0.0);
261
262        #region Write fragment information to file
263
264        //        using (var streamWriter = new StreamWriter(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile), "CommonFragments.txt"))) {
265        //          streamWriter.WriteLine("Median edge weight: " + medianWeight);
266        //          streamWriter.WriteLine("Average edge weight: " + averageWeight);
267        //          streamWriter.WriteLine("Symbol frequencies: ");
268        //          double sum = graph.Nodes.Sum(node => node.Weight);
269        //          foreach (var symbolNode in graph.Nodes.OrderBy(node => node.Weight)) {
270        //            streamWriter.WriteLine("{0}\t{1}", symbolNode.Label, String.Format("{0:0.00}", symbolNode.Weight / sum));
271        //          }
272        //          streamWriter.WriteLine();
273        //          streamWriter.WriteLine("Fragment root symbol frequencies:");
274        //          foreach (var group in fragments.GroupBy(x => x.Root.Symbol.Name)) {
275        //            streamWriter.WriteLine("{0}\t{1}", group.Key, String.Format("{0:0.00}", (double)group.Count() / NumberOfFragmentsToGenerate.Value));
276        //          }
277        //          streamWriter.WriteLine();
278        //          foreach (var fragment in mostCommonFragments) {
279        //            streamWriter.WriteLine(fragment.Key.ToPrefixString());
280        //            streamWriter.WriteLine("Frequency in last generation: {0}, Contained in best individual: {1}",
281        //              fragment.Value, trees[0].ContainsFragment(fragment.Key, SimilarityLevel.High) ? "YES" : "NO");
282        //            SymbolicExpressionTreeMatching.RenderNode(streamWriter, fragment.Key, string.Empty);
283        //            if (Results.ContainsKey(PopulationGraphParameterName)) {
284        //              var populationGraph = (SymbolicExpressionTreeGenealogyGraph)Results[PopulationGraphParameterName].Value;
285        //              streamWriter.Write("Frequencies per generation: ");
286        //              var groups = populationGraph.Nodes.GroupBy(n => n.Rank).OrderBy(g => g.Key);
287        //              var frequencies = (from g in groups.Where(g => g.Key == (int)g.Key)
288        //                                 let freq = g.Count(n => n.SymbolicExpressionTree.ContainsFragment(fragment.Key, SimilarityLevel.High))
289        //                                 select freq);
290        //              foreach (var freq in frequencies)
291        //                streamWriter.Write(freq / count + " ");
292        //              streamWriter.WriteLine();
293        //            } else {
294        //              streamWriter.WriteLine("Frequecy in current population: {0}", fragment.Value);
295        //            }
296        //            streamWriter.WriteLine("\n");
297        //          }
298        //          streamWriter.WriteLine("Average fragment length: {0}", avgFragmentLength);
299        //          streamWriter.WriteLine("Average fragment depth: {0}", avgFragmentDepth);
300        //          streamWriter.WriteLine("Percentage of unique generated fragments: {0}", distinctFragments.Count() / (double)fragments.Count);
301        //        }
302
303        #endregion
304      }
305      return base.Apply();
306    }
307
308    private IEnumerable<IFragment> GenerateFragments(SymbolGraph graph, ISymbolicExpressionTreeGrammar grammar, int numFragments, int maxDepth, int maxLength) {
309      var fragments = new List<IFragment>();
310      var random = RandomParameter.ActualValue;
311      var count = SymbolicExpressionTreeParameter.ActualValue.Count();
312      var threshold = count * FrequentFragmentOccurrenceThreshold.Value;
313      var availableSymbols = graph.Nodes.Where(x => x.OutEdges != null && x.OutEdges.Count > 0 && x.Weight >= threshold).ToList();
314      var weights = availableSymbols.Select(x => x.Weight).ToList();
315
316      if (!(availableSymbols.Count == 0 || weights.Count == 0))
317        for (int i = 0; i != numFragments; ++i) {
318          var symbol = availableSymbols.SelectRandom(weights, random);
319          var fragment = CreateFragment(symbol, random, grammar, threshold, maxDepth, maxLength, EnforceGrammarArity.Value);
320          fragments.Add(new Fragment { Root = fragment });
321        }
322      return fragments;
323    }
324
325    private static ISymbolicExpressionTreeNode CreateFragment(SymbolNode symbolNode, IRandom random, ISymbolicExpressionTreeGrammar grammar, double threshold, int depthLimit, int lengthLimit, bool enforceGrammarArity = true) {
326      var node = symbolNode.Symbol.CreateTreeNode();
327      if (node.HasLocalParameters) node.ResetLocalParameters(random);
328      if (node is VariableTreeNode) (node as VariableTreeNode).VariableName = symbolNode.Label;
329
330      if (symbolNode.OutEdges == null || symbolNode.OutEdges.Count == 0 || depthLimit < 0)
331        return node; // node is a leaf so we just return it
332
333      int maxCount = grammar.GetMaximumSubtreeCount(node.Symbol);
334      if (maxCount > 0) {
335        int minCount = grammar.GetMinimumSubtreeCount(node.Symbol);
336        var arity = random.Next(enforceGrammarArity ? minCount : 1, maxCount + 1);
337        var possibleChildConnections = new List<SymbolArc>();
338
339        if (depthLimit <= 2 || lengthLimit <= maxCount) { // if we are near the depth limit then we want to add leafs on the next level
340          possibleChildConnections.AddRange(from e in symbolNode.OutEdges
341                                            let n = (SymbolNode)e.Target
342                                            where grammar.GetMaximumSubtreeCount(n.Symbol) == 0
343                                            where n.Weight >= threshold
344                                            select e);
345        }
346        // if there are no available connections towards leaf nodes (in case depthLimit <= 2)
347        // or in case the depthLimit has not been reached yet
348        if (possibleChildConnections.Count == 0)
349          possibleChildConnections.AddRange(symbolNode.OutEdges.Where(a => ((SymbolNode)a.Target).Weight >= threshold)); // this will not be empty since we check above if the OutEdges are null
350        var weights = new double[possibleChildConnections.Count];
351
352        for (int i = 0; i != arity; ++i) {
353          // adjust weights according to the probability a child will find itself on a certain argument position
354          for (int j = 0; j != possibleChildConnections.Count; ++j) {
355            var childSymbolNode = (SymbolNode)possibleChildConnections[j].Target;
356            double sumPos = childSymbolNode.Positions.Sum(x => x.Value);
357            int v; childSymbolNode.Positions.TryGetValue(i, out v);
358            weights[j] = possibleChildConnections[j].Weight * v / sumPos;
359          }
360
361          if (weights.Sum().IsAlmost(0.0)) continue;
362
363          var selectedConnection = possibleChildConnections.SelectRandom(weights, random);
364          var selectedChildSymbolNode = selectedConnection.Target as SymbolNode;
365          var child = CreateFragment(selectedChildSymbolNode, random, grammar, threshold, depthLimit - 1, --lengthLimit);
366          if (child == null) throw new Exception("Child cannot be null.");
367          node.AddSubtree(child);
368        }
369      }
370      return node;
371    }
372  }
373}
Note: See TracBrowser for help on using the repository browser.