Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Analyzers/SymbolicExpressionTreeFragmentsAnalyzer.cs @ 7997

Last change on this file since 7997 was 7997, checked in by bburlacu, 12 years ago

#1772: New analyzer: SymbolicExpressionTreeRelativeLengthAnalyzer. Rewrote the SymbolicExpressionTreeFragmentsAnalyzer, added generic wrapper to wrap HL objects as items.

File size: 20.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33// type definitions for convenience
34using CloneMapType = HeuristicLab.Core.ItemDictionary<HeuristicLab.Core.IItem, HeuristicLab.Core.IItem>;
35using SymbolicExpressionTreeNodeItem = HeuristicLab.EvolutionaryTracking.GenericWrapper<HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.SymbolicExpressionTreeNode>;
36using TraceMapType = HeuristicLab.Core.ItemDictionary<HeuristicLab.Core.IItem, HeuristicLab.Core.IItemList<HeuristicLab.Core.IItem>>;
37
38namespace HeuristicLab.EvolutionaryTracking {
39  /// <summary>
40  /// An operator that gathers information on tree fragments that get passed from one generation to the next via genetic operators
41  /// (Tracking of genetic variance and heredity)
42  /// </summary>
43  [Item("SymbolicExpressionTreeFragmentsAnalyzer", "An operator that provides statistics about crossover fragments")]
44  [StorableClass]
45  public sealed class SymbolicExpressionTreeFragmentsAnalyzer : SingleSuccessorOperator, IAnalyzer {
46    private const string UpdateIntervalParameterName = "UpdateInterval";
47    private const string UpdateCounterParameterName = "UpdateCounter";
48    private const string ResultsParameterName = "Results";
49    private const string SecondaryTraceMapParameterName = "SecondaryTraceMap";
50    private const string SecondaryFragmentMapParameterName = "SecondaryFragmentMap";
51    private const string SecondaryCloneMapParameterName = "SecondaryCloneMap";
52    private const string GlobalTraceMapParameterName = "GlobalTraceMap";
53    private const string GlobalCloneMapParameterName = "GlobalCloneMap";
54    private const string GlobalFragmentMapParameterName = "GlobalFragmentMap";
55    private const string GenerationsParameterName = "Generations";
56    private const string FragmentStatisticsParameterName = "FragmentStatistics";
57    private const string FragmentLengthsDistributionParametersName = "FragmentLengths";
58    private const string FragmentFrequenciesParameterName = "FragmentFrequencies";
59
60    #region Parameter properties
61    public ValueParameter<IntValue> UpdateIntervalParameter {
62      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
63    }
64    public ValueParameter<IntValue> UpdateCounterParameter {
65      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
66    }
67    public LookupParameter<ResultCollection> ResultsParameter {
68      get { return (LookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
69    }
70    public LookupParameter<IntValue> GenerationsParameter {
71      get { return (LookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
72    }
73    // tracking structures
74    public LookupParameter<TraceMapType> GlobalTraceMapParameter {
75      get { return (LookupParameter<TraceMapType>)Parameters[GlobalTraceMapParameterName]; }
76    }
77    public LookupParameter<CloneMapType> GlobalCloneMapParameter {
78      get { return (LookupParameter<CloneMapType>)Parameters[GlobalCloneMapParameterName]; }
79    }
80    public LookupParameter<CloneMapType> GlobalFragmentMapParameter {
81      get { return (LookupParameter<CloneMapType>)Parameters[GlobalFragmentMapParameterName]; }
82    }
83    public LookupParameter<ItemList<ItemList<IItem>>> FragmentFrequenciesParameter {
84      get { return (LookupParameter<ItemList<ItemList<IItem>>>)Parameters[FragmentFrequenciesParameterName]; }
85    }
86    public LookupParameter<DataTable> FragmentStatisticsParameter {
87      get { return (LookupParameter<DataTable>)Parameters[FragmentStatisticsParameterName]; }
88    }
89    public LookupParameter<DataTable> FragmentLengthsParameter {
90      get { return (LookupParameter<DataTable>)Parameters[FragmentLengthsDistributionParametersName]; }
91    }
92    // auxiliary structures for tracking fragments and individuals from the previous generation
93    // (this is needed because tracking is done in connection to the current generation, i.e., for
94    // counting "successful" offspring and fragments
95    public LookupParameter<TraceMapType> SecondaryTraceMapParameter {
96      get { return (LookupParameter<TraceMapType>)Parameters[SecondaryTraceMapParameterName]; }
97    }
98    public LookupParameter<CloneMapType> SecondaryFragmentMapParameter {
99      get { return (LookupParameter<CloneMapType>)Parameters[SecondaryFragmentMapParameterName]; }
100    }
101    public LookupParameter<CloneMapType> SecondaryCloneMapParameter {
102      get { return (LookupParameter<CloneMapType>)Parameters[SecondaryCloneMapParameterName]; }
103    }
104    #endregion
105
106    #region Parameters
107    public bool EnabledByDefault {
108      get { return true; }
109    }
110    public IntValue UpdateInterval {
111      get { return UpdateIntervalParameter.Value; }
112    }
113    public IntValue UpdateCounter {
114      get { return UpdateCounterParameter.Value; }
115    }
116    public ResultCollection Results {
117      get { return ResultsParameter.ActualValue; }
118    }
119    public CloneMapType GlobalCloneMap {
120      get { return GlobalCloneMapParameter.ActualValue; }
121    }
122    public TraceMapType GlobalTraceMap {
123      get { return GlobalTraceMapParameter.ActualValue; }
124    }
125    public TraceMapType SecondaryTraceMap {
126      get { return SecondaryTraceMapParameter.ActualValue; }
127      set { SecondaryTraceMapParameter.ActualValue = value; }
128    }
129    public CloneMapType SecondaryFragmentMap {
130      get { return SecondaryFragmentMapParameter.ActualValue; }
131      set { SecondaryFragmentMapParameter.ActualValue = value; }
132    }
133    public CloneMapType SecondaryCloneMap {
134      get { return SecondaryCloneMapParameter.ActualValue; }
135      set { SecondaryCloneMapParameter.ActualValue = value; }
136    }
137    public CloneMapType GlobalFragmentMap {
138      get { return GlobalFragmentMapParameter.ActualValue; }
139    }
140    public IntValue Generations {
141      get { return GenerationsParameter.ActualValue; }
142    }
143    public DataTable FragmentStatistics {
144      get { return FragmentStatisticsParameter.ActualValue; }
145    }
146    public DataTable FragmentLengths {
147      get { return FragmentLengthsParameter.ActualValue; }
148    }
149    public ItemList<ItemList<IItem>> FragmentFrequencies {
150      get { return FragmentFrequenciesParameter.ActualValue; }
151    }
152    #endregion
153
154    [StorableConstructor]
155    private SymbolicExpressionTreeFragmentsAnalyzer(bool deserializing)
156      : base() {
157    }
158    private SymbolicExpressionTreeFragmentsAnalyzer(SymbolicExpressionTreeFragmentsAnalyzer original, Cloner cloner)
159      : base(original, cloner) {
160    }
161    public override IDeepCloneable Clone(Cloner cloner) {
162      return new SymbolicExpressionTreeFragmentsAnalyzer(this, cloner);
163    }
164    public SymbolicExpressionTreeFragmentsAnalyzer()
165      : base() {
166      // analyzer update counter and update interval
167      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
168      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
169      // tracking
170      Parameters.Add(new LookupParameter<TraceMapType>(GlobalTraceMapParameterName, "A global cache containing the whole genealogy."));
171      Parameters.Add(new LookupParameter<CloneMapType>(GlobalCloneMapParameterName, "A global map keeping track of trees and their clones (made during selection)."));
172      Parameters.Add(new LookupParameter<CloneMapType>(GlobalFragmentMapParameterName, "A global map keeping track of tree fragments received via crossover."));
173      // secondary tracking
174      Parameters.Add(new LookupParameter<TraceMapType>(SecondaryTraceMapParameterName, "Parameter to store the trace map from the previous generation"));
175      Parameters.Add(new LookupParameter<CloneMapType>(SecondaryCloneMapParameterName, "Parameter to store the clone map from the previous generation"));
176      Parameters.Add(new LookupParameter<CloneMapType>(SecondaryFragmentMapParameterName, "Parameter to store the fragment map from the previous generation"));
177      // other params
178      Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
179      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations so far."));
180      Parameters.Add(new LookupParameter<DataTable>(FragmentStatisticsParameterName, "The data table to store the fragment statistics."));
181      Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsDistributionParametersName, "The data table to store the distribution of fragment lengths."));
182      Parameters.Add(new LookupParameter<ItemList<ItemList<IItem>>>(FragmentFrequenciesParameterName, "A data structure for fragment frequencies"));
183      UpdateCounterParameter.Hidden = true;
184      UpdateIntervalParameter.Hidden = true;
185    }
186    #region After deserialization code
187    [StorableHook(HookType.AfterDeserialization)]
188    private void AfterDeserialization() {
189      // check if all the parameters are present and accounted for
190      if (!Parameters.ContainsKey(UpdateIntervalParameterName)) {
191        Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
192      }
193      if (!Parameters.ContainsKey(UpdateCounterParameterName)) {
194        Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
195        UpdateCounterParameter.Hidden = true;
196      }
197      if (!Parameters.ContainsKey(FragmentLengthsDistributionParametersName)) {
198        Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsDistributionParametersName, "The data table to store the distribution of fragment lengths."));
199      }
200      if (!Parameters.ContainsKey(FragmentFrequenciesParameterName)) {
201        Parameters.Add(new LookupParameter<ItemList<ItemList<IItem>>>(FragmentFrequenciesParameterName, "A data structure for fragment frequencies"));
202      }
203    }
204    #endregion
205    #region IStatefulItem members
206    public override void InitializeState() {
207      base.InitializeState();
208      UpdateCounter.Value = 0;
209    }
210
211    public override void ClearState() {
212      base.ClearState();
213      UpdateCounter.Value = 0;
214    }
215    #endregion
216
217    public override IOperation Apply() {
218      if (Generations.Value == 0) return base.Apply();
219      UpdateCounter.Value++;
220      if (UpdateCounter.Value == UpdateInterval.Value) {
221        ResultCollection results = ResultsParameter.ActualValue;
222        if (FragmentStatistics == null) {
223          FragmentStatisticsParameter.ActualValue = new DataTable("Fragment Statistics", "Statistical measurements of fragments aggregated over the whole population");
224          FragmentStatistics.VisualProperties.YAxisTitle = "Fragment length/Similarities";
225          results.Add(new Result("Fragment Statistics", FragmentStatistics));
226        }
227        if (FragmentFrequencies == null) {
228          FragmentFrequenciesParameter.ActualValue = new ItemList<ItemList<IItem>>();
229        }
230        if (SecondaryTraceMap == null) {
231          SecondaryTraceMap = new TraceMapType();
232        }
233        if (SecondaryCloneMap == null) {
234          SecondaryCloneMap = new CloneMapType();
235        }
236        if (SecondaryFragmentMap == null) {
237          SecondaryFragmentMap = new CloneMapType();
238        }
239
240        UpdateCounter.Value = 0; // reset counter
241        var gScope = ExecutionContext.Scope;
242        while (gScope.Parent != null) gScope = gScope.Parent;
243
244        // for this kind of tracking/analysis, we need at least 2 successive generations of crossover
245        if (Generations.Value > 1) {
246          #region Fragment Statistics
247          InitializeRows();
248          // create a dictionary for fast lookup of parents (to see which children from the previous generation become parents, and count their fragments)
249          var parentsLookup = new Dictionary<ISymbolicExpressionTree, bool>();
250          foreach (var parent in GlobalTraceMap.Values.SelectMany(x => x.Cast<ISymbolicExpressionTree>())) {
251            if (!parentsLookup.ContainsKey(parent)) {
252              parentsLookup.Add(parent, true);
253            }
254          }
255          // the same, for fast lookup of clones (might not be needed after all)
256          var clonesLookup = new Dictionary<ISymbolicExpressionTree, bool>();
257          foreach (var original in GlobalCloneMap.Values.Cast<ISymbolicExpressionTree>()) {
258            if (!clonesLookup.ContainsKey(original)) {
259              clonesLookup.Add(original, true);
260            }
261          }
262          var goodFragments = new List<ISymbolicExpressionTreeNode>();
263          var goodParents = new List<ISymbolicExpressionTree>();
264          var goodChildren = new List<ISymbolicExpressionTree>();
265          // take all individuals that have received a fragment in the previous generation
266          foreach (var m in SecondaryFragmentMap) {
267            var individual = m.Key as ISymbolicExpressionTree;
268            // check if the individual became a parent in the current generation,
269            // by checking if it appears among the parents from the current generation trace map
270            if (parentsLookup.ContainsKey(individual)) {
271              var fragmentItem = m.Value as SymbolicExpressionTreeNodeItem;
272              goodFragments.Add(fragmentItem.Content);
273              goodParents.AddRange(SecondaryTraceMap[individual].Cast<ISymbolicExpressionTree>());
274              goodChildren.Add(individual);
275            }
276          }
277          var allFragments = SecondaryFragmentMap.Values.Select(x => ((SymbolicExpressionTreeNodeItem)x).Content as ISymbolicExpressionTreeNode).ToList();
278          var allParents = SecondaryTraceMap.Values.SelectMany(x => x.Cast<ISymbolicExpressionTree>()).ToList();
279          var allChildren = SecondaryFragmentMap.Keys.Select(x => x as ISymbolicExpressionTree).ToList();
280
281          FragmentStatistics.Rows["Fragment lengths (good)"].Values.Add(goodFragments.Average(x => x.GetLength()));
282          FragmentStatistics.Rows["Fragment lengths (all)"].Values.Add(allFragments.Average(x => x.GetLength()));
283          FragmentStatistics.Rows["Parent lengths (good)"].Values.Add(goodParents.Average(x => x.Length));
284          FragmentStatistics.Rows["Parent lengths (all)"].Values.Add(allParents.Average(x => x.Length));
285          FragmentStatistics.Rows["Child lengths (good)"].Values.Add(goodChildren.Average(x => x.Length));
286          FragmentStatistics.Rows["Child lengths (all)"].Values.Add(allChildren.Average(x => x.Length));
287          //FragmentStatistics.Rows["Exact similarity (good fragments)"].Values.Add(CalculateSimilarity(goodFragments, (int)SymbolicExpressionTreeMatching.SimilarityLevel.Exact));
288          //FragmentStatistics.Rows["Exact similarity (all fragments)"].Values.Add(CalculateSimilarity(allFragments, (int)SymbolicExpressionTreeMatching.SimilarityLevel.Exact));
289
290          //WriteFragmentFrequencies(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile) + "\\FragmentFrequencies.dat");
291          #endregion
292        }
293        // save the current generation so it can be compared to the following one, next time the analyzer is applied
294        SecondaryTraceMap.Clear();
295        foreach (var m in GlobalTraceMap)
296          SecondaryTraceMap.Add(m.Key, m.Value);
297        SecondaryCloneMap.Clear();
298        foreach (var m in GlobalCloneMap)
299          SecondaryCloneMap.Add(m.Key, m.Value);
300        SecondaryFragmentMap.Clear();
301        foreach (var m in GlobalFragmentMap)
302          SecondaryFragmentMap.Add(m.Key, m.Value);
303
304        // clear the global maps to save memory
305        if (this.Successor == null || !(this.Successor is SymbolicExpressionTreeGenealogyAnalyzer)) {
306          GlobalCloneMap.Clear();
307          GlobalTraceMap.Clear();
308          GlobalFragmentMap.Clear();
309        }
310      }
311      return base.Apply();
312    }
313
314    private void InitializeRows() {
315      if (!FragmentStatistics.Rows.ContainsKey("Parent lengths (good)")) {
316        FragmentStatistics.Rows.Add(new DataRow("Parent lengths (good)") {
317          VisualProperties = { StartIndexZero = true }
318        });
319      }
320      if (!FragmentStatistics.Rows.ContainsKey("Parent lengths (all)")) {
321        FragmentStatistics.Rows.Add(new DataRow("Parent lengths (all)") {
322          VisualProperties = { StartIndexZero = true }
323        });
324      }
325      if (!FragmentStatistics.Rows.ContainsKey("Child lengths (good)")) {
326        FragmentStatistics.Rows.Add(new DataRow("Child lengths (good)") {
327          VisualProperties = { StartIndexZero = true }
328        });
329      }
330      if (!FragmentStatistics.Rows.ContainsKey("Child lengths (all)")) {
331        FragmentStatistics.Rows.Add(new DataRow("Child lengths (all)") {
332          VisualProperties = { StartIndexZero = true }
333        });
334      }
335      if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (good)")) {
336        FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (good)") {
337          VisualProperties = { StartIndexZero = true }
338        });
339      }
340      if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (all)")) {
341        FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (all)") {
342          VisualProperties = { StartIndexZero = true }
343        });
344      }
345      // exact similarity
346      if (!FragmentStatistics.Rows.ContainsKey("Exact similarity (good fragments)")) {
347        FragmentStatistics.Rows.Add(new DataRow("Exact similarity (good fragments)") {
348          VisualProperties = { StartIndexZero = true }
349        });
350      }
351      if (!FragmentStatistics.Rows.ContainsKey("Exact similarity (all fragments)")) {
352        FragmentStatistics.Rows.Add(new DataRow("Exact similarity (all fragments)") {
353          VisualProperties = { StartIndexZero = true }
354        });
355      }
356    }
357
358    private void WriteFragmentFrequencies(string path) {
359      using (var file = new System.IO.StreamWriter(path)) {
360        foreach (var f in FragmentFrequencies) {
361          file.WriteLine(((SymbolicExpressionTreeNodeItem)f[0]).Content.GetLength() + " " + ((IntValue)f[1]).Value);
362        }
363      }
364    }
365
366    #region Similarity computations
367    /// <summary>
368    /// Provide a measure of how similar the fragments that get passed by crossover from one generation to the next are.
369    /// </summary>
370    /// <param name="fragments">The symbolic expression tree fragments</param>
371    /// <param name="mode">The similarity mode (0 - exact, 1 - high, 2 - relaxed)</param>
372    /// <returns>The average number of similar fragments</returns>
373    private static double CalculateSimilarity(IList<ISymbolicExpressionTreeNode> fragments, int mode) {
374      var visited = new bool[fragments.Count];
375      int groups = 0;
376      for (int i = 0; i != fragments.Count - 1; ++i) {
377        if (visited[i]) continue;
378        for (int j = i + 1; j != fragments.Count; ++j) {
379          if (visited[j]) continue;
380          if (fragments[i].IsSimilarTo(fragments[j], mode)) visited[j] = true;
381        }
382        ++groups;
383      }
384      return (double)fragments.Count / groups;
385    }
386    #endregion
387  } //SymbolicExpressionTreeFragmentsAnalyzer
388}
Note: See TracBrowser for help on using the repository browser.