Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Analyzers/SymbolicExpressionTreeFragmentsAnalyzer.cs @ 8557

Last change on this file since 8557 was 8557, checked in by bburlacu, 12 years ago

#1772: Introduced base class and interface for tracing operators. Introduced SymbolicExpressionTreeNodeComparer interface to be implemented by node matching operators. Fixed bug in the TracingSymbolicExpressionTreeManipulator where nodes modified by the Full- and OnePoint tree shakers were not correctly detected. The SymbolicExpressionTreeNodeSimilarityComparer is now injected in the tracing genetic operators as a parameter.

File size: 31.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Optimization;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis;
34using HeuristicLab.Problems.DataAnalysis.Symbolic;
35// type definitions for convenience
36using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
37using CloneMapType = HeuristicLab.Core.ItemDictionary<HeuristicLab.Core.IItem, HeuristicLab.Core.IItem>;
38using SymbolicExpressionTreeNodeItem = HeuristicLab.EvolutionaryTracking.GenericWrapper<HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ISymbolicExpressionTreeNode>;
39using TraceMapType = HeuristicLab.Core.ItemDictionary<HeuristicLab.Core.IItem, HeuristicLab.Core.IItemList<HeuristicLab.Core.IItem>>;
40
41namespace HeuristicLab.EvolutionaryTracking {
42  /// <summary>
43  /// An operator that gathers information on tree fragments that get passed from one generation to the next via genetic operators
44  /// (Tracking of genetic variance and heredity)
45  /// </summary>
46  [Item("SymbolicExpressionTreeFragmentsAnalyzer", "An operator that provides statistics about crossover fragments")]
47  [StorableClass]
48  public sealed class SymbolicExpressionTreeFragmentsAnalyzer : SingleSuccessorOperator, IAnalyzer {
49    #region Parameter names
50    private const string UpdateIntervalParameterName = "UpdateInterval";
51    private const string UpdateCounterParameterName = "UpdateCounter";
52    private const string ResultsParameterName = "Results";
53    private const string SecondaryTraceMapParameterName = "SecondaryTraceMap";
54    private const string SecondaryFragmentMapParameterName = "SecondaryFragmentMap";
55    private const string SecondaryCloneMapParameterName = "SecondaryCloneMap";
56    private const string GlobalTraceMapParameterName = "GlobalTraceMap";
57    private const string GlobalCloneMapParameterName = "GlobalCloneMap";
58    private const string GlobalFragmentMapParameterName = "GlobalFragmentMap";
59    private const string GenerationsParameterName = "Generations";
60    private const string SymbolicExpressionInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
61    private const string SymbolicRegressionProblemDataParameterName = "ProblemData";
62    private const string FragmentStatisticsParameterName = "FragmentStatistics";
63    private const string FragmentLengthsDistributionParametersName = "FragmentLengths";
64    private const string FragmentFrequenciesParameterName = "FragmentFrequencies";
65    private const string ParentsDiversityParameterName = "ParentsDiversity";
66    #endregion
67
68    // impact values calculator
69    private readonly SymbolicRegressionSolutionValuesCalculator calculator = new SymbolicRegressionSolutionValuesCalculator();
70
71    #region Parameter properties
72    public ValueParameter<IntValue> UpdateIntervalParameter {
73      get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
74    }
75    public ValueParameter<IntValue> UpdateCounterParameter {
76      get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
77    }
78    public LookupParameter<ResultCollection> ResultsParameter {
79      get { return (LookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
80    }
81    public LookupParameter<IntValue> GenerationsParameter {
82      get { return (LookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
83    }
84    // tracking structures
85    public LookupParameter<TraceMapType> GlobalTraceMapParameter {
86      get { return (LookupParameter<TraceMapType>)Parameters[GlobalTraceMapParameterName]; }
87    }
88    public LookupParameter<CloneMapType> GlobalCloneMapParameter {
89      get { return (LookupParameter<CloneMapType>)Parameters[GlobalCloneMapParameterName]; }
90    }
91    public LookupParameter<CloneMapType> GlobalFragmentMapParameter {
92      get { return (LookupParameter<CloneMapType>)Parameters[GlobalFragmentMapParameterName]; }
93    }
94    public LookupParameter<DataTable> FragmentFrequenciesParameter {
95      get { return (LookupParameter<DataTable>)Parameters[FragmentFrequenciesParameterName]; }
96    }
97    public LookupParameter<DataTable> FragmentStatisticsParameter {
98      get { return (LookupParameter<DataTable>)Parameters[FragmentStatisticsParameterName]; }
99    }
100    public LookupParameter<DataTable> FragmentLengthsParameter {
101      get { return (LookupParameter<DataTable>)Parameters[FragmentLengthsDistributionParametersName]; }
102    }
103    public LookupParameter<DataTable> ParentsDiversityParameter {
104      get { return (LookupParameter<DataTable>)Parameters[ParentsDiversityParameterName]; }
105    }
106    // auxiliary structures for tracking fragments and individuals from the previous generation
107    // (this is needed because tracking is done in connection to the current generation, i.e., for
108    // counting "successful" offspring and fragments
109    public LookupParameter<TraceMapType> SecondaryTraceMapParameter {
110      get { return (LookupParameter<TraceMapType>)Parameters[SecondaryTraceMapParameterName]; }
111    }
112    public LookupParameter<CloneMapType> SecondaryFragmentMapParameter {
113      get { return (LookupParameter<CloneMapType>)Parameters[SecondaryFragmentMapParameterName]; }
114    }
115    public LookupParameter<CloneMapType> SecondaryCloneMapParameter {
116      get { return (LookupParameter<CloneMapType>)Parameters[SecondaryCloneMapParameterName]; }
117    }
118    // problem data, interpreter and evaluator
119    public LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter> SymbolicExpressionInterpreterParameter {
120      get { return (LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicExpressionInterpreterParameterName]; }
121    }
122    public LookupParameter<RegressionProblemData> SymbolicRegressionProblemDataParameter {
123      get { return (LookupParameter<RegressionProblemData>)Parameters[SymbolicRegressionProblemDataParameterName]; }
124    }
125    #endregion
126
127    #region Parameters
128    public bool EnabledByDefault {
129      get { return true; }
130    }
131    public IntValue UpdateInterval {
132      get { return UpdateIntervalParameter.Value; }
133    }
134    public IntValue UpdateCounter {
135      get { return UpdateCounterParameter.Value; }
136    }
137    public ResultCollection Results {
138      get { return ResultsParameter.ActualValue; }
139    }
140    public CloneMapType GlobalCloneMap {
141      get { return GlobalCloneMapParameter.ActualValue; }
142    }
143    public TraceMapType GlobalTraceMap {
144      get { return GlobalTraceMapParameter.ActualValue; }
145    }
146    public TraceMapType SecondaryTraceMap {
147      get { return SecondaryTraceMapParameter.ActualValue; }
148      set { SecondaryTraceMapParameter.ActualValue = value; }
149    }
150    public CloneMapType SecondaryFragmentMap {
151      get { return SecondaryFragmentMapParameter.ActualValue; }
152      set { SecondaryFragmentMapParameter.ActualValue = value; }
153    }
154    public CloneMapType SecondaryCloneMap {
155      get { return SecondaryCloneMapParameter.ActualValue; }
156      set { SecondaryCloneMapParameter.ActualValue = value; }
157    }
158    public CloneMapType GlobalFragmentMap {
159      get { return GlobalFragmentMapParameter.ActualValue; }
160    }
161    public IntValue Generations {
162      get { return GenerationsParameter.ActualValue; }
163    }
164    public DataTable FragmentStatistics {
165      get { return FragmentStatisticsParameter.ActualValue; }
166    }
167    public DataTable FragmentLengths {
168      get { return FragmentLengthsParameter.ActualValue; }
169    }
170    public DataTable FragmentFrequencies {
171      get { return FragmentFrequenciesParameter.ActualValue; }
172    }
173    public DataTable ParentsDiversity {
174      get { return ParentsDiversityParameter.ActualValue; }
175    }
176    public SymbolicDataAnalysisExpressionTreeInterpreter SymbolicExpressionInterpreter {
177      get { return SymbolicExpressionInterpreterParameter.ActualValue; }
178    }
179    public RegressionProblemData SymbolicRegressionProblemData {
180      get { return SymbolicRegressionProblemDataParameter.ActualValue; }
181    }
182    #endregion
183
184    [StorableConstructor]
185    private SymbolicExpressionTreeFragmentsAnalyzer(bool deserializing) : base(deserializing) { }
186    private SymbolicExpressionTreeFragmentsAnalyzer(SymbolicExpressionTreeFragmentsAnalyzer original, Cloner cloner) : base(original, cloner) { }
187    public override IDeepCloneable Clone(Cloner cloner) {
188      return new SymbolicExpressionTreeFragmentsAnalyzer(this, cloner);
189    }
190    public SymbolicExpressionTreeFragmentsAnalyzer()
191      : base() {
192      #region Add parameters
193      // analyzer update counter and update interval
194      Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
195      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
196      // tracking
197      Parameters.Add(new LookupParameter<TraceMapType>(GlobalTraceMapParameterName, "A global cache containing the whole genealogy."));
198      Parameters.Add(new LookupParameter<CloneMapType>(GlobalCloneMapParameterName, "A global map keeping track of trees and their clones (made during selection)."));
199      Parameters.Add(new LookupParameter<CloneMapType>(GlobalFragmentMapParameterName, "A global map keeping track of tree fragments received via crossover."));
200      // secondary tracking
201      Parameters.Add(new LookupParameter<TraceMapType>(SecondaryTraceMapParameterName, "Parameter to store the trace map from the previous generation"));
202      Parameters.Add(new LookupParameter<CloneMapType>(SecondaryCloneMapParameterName, "Parameter to store the clone map from the previous generation"));
203      Parameters.Add(new LookupParameter<CloneMapType>(SecondaryFragmentMapParameterName, "Parameter to store the fragment map from the previous generation"));
204      // fragment statistics parameters
205      Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
206      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations so far."));
207      Parameters.Add(new LookupParameter<DataTable>(FragmentStatisticsParameterName, "The data table to store the fragment statistics."));
208      Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsDistributionParametersName, "The data table to store the distribution of fragment lengths."));
209      Parameters.Add(new LookupParameter<DataTable>(FragmentFrequenciesParameterName, "A data structure for fragment frequencies"));
210      Parameters.Add(new LookupParameter<DataTable>(ParentsDiversityParameterName, "A data structure to store the diversity of the parents per generation."));
211      // impact calculation
212      Parameters.Add(new LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicExpressionInterpreterParameterName, "Interpreter for symbolic expression trees"));
213      Parameters.Add(new LookupParameter<RegressionProblemData>(SymbolicRegressionProblemDataParameterName, "The symbolic data analysis problem."));
214      #endregion
215
216      UpdateCounterParameter.Hidden = true;
217      UpdateIntervalParameter.Hidden = true;
218    }
219    #region After deserialization code
220    [StorableHook(HookType.AfterDeserialization)]
221    private void AfterDeserialization() {
222      // check if all the parameters are present and accounted for
223      if (!Parameters.ContainsKey(UpdateIntervalParameterName)) {
224        Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
225      }
226      if (!Parameters.ContainsKey(UpdateCounterParameterName)) {
227        Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
228        UpdateCounterParameter.Hidden = true;
229      }
230      if (!Parameters.ContainsKey(FragmentStatisticsParameterName)) {
231        Parameters.Add(new LookupParameter<DataTable>(FragmentStatisticsParameterName, "The data table to store the fragment statistics."));
232      }
233      if (!Parameters.ContainsKey(FragmentLengthsDistributionParametersName)) {
234        Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsDistributionParametersName, "The data table to store the distribution of fragment lengths."));
235      }
236      if (!Parameters.ContainsKey(FragmentFrequenciesParameterName)) {
237        Parameters.Add(new LookupParameter<ItemList<ItemList<IItem>>>(FragmentFrequenciesParameterName, "A data structure for fragment frequencies"));
238      }
239      if (!Parameters.ContainsKey(ParentsDiversityParameterName)) {
240        Parameters.Add(new LookupParameter<DataTable>(ParentsDiversityParameterName, "A data table to store the diversity of the parents per generation."));
241      }
242    }
243    #endregion
244    #region IStatefulItem members
245    public override void InitializeState() {
246      base.InitializeState();
247      UpdateCounter.Value = 0;
248    }
249
250    public override void ClearState() {
251      base.ClearState();
252      UpdateCounter.Value = 0;
253    }
254    #endregion
255
256    /* A small description of the way this analyzer works
257     *
258     * We keep 3 maps in the global scope: the GlobalTraceMap, the GlobalFragmentMap and the GlobalCloneMap that are updated on each step: selection, crossover, mutation
259     * These maps provide information about the children, parents and transferred fragments as follows:
260     *
261     * 1) GlobalCloneMap
262     *    This data structure provides the basis for tracking. In the selection step, every selected individual gets cloned and transferred into the recombination pool.
263     *    The GlobalCloneMap keeps track of clones by mapping them to the original individuals. This is an essential step for building the genealogy graph. Using this
264     *    structure we can find out how many times each individual was selected.
265     *    The GlobalCloneMap consists of Key-Value pairs Clone->Original
266     *
267     * 2) GlobalTraceMap
268     *    Keeps information in the form of key-value pairs Child->{Parents}, where {Parents} is a set consisting of
269     *      - one parent individual (in the case of mutation), by language abuse we say that the individual that mutation acted on is the parent,
270     *        and the resulting individual is the child
271     *      - two parent individuals (in the case of crossover), named Parent0 and Parent1. Parent0 is the individual that crossover acts on by replacing
272     *        one of its subtrees with a compatible subtree, randomly selected from Parent1
273     *
274     *    CROSSOVER
275     *        Parent0     Parent1                                                                    {Parents}
276     *           \           /                                                                           ^
277     *            \       fragment (inserted from Parent1 into Parent0 to create the Child)              ^    [Mapping provided by the GlobalTraceMap]
278     *             \       /                                                                             ^
279     *              \     /                                                                              ^
280     *               Child                                                                             Child
281     *               
282     *    MUTATION
283     *        Parent0
284     *           |
285     *           |    [mutation acts on a random node/subtree in the 'parent' individual
286     *           |
287     *         Child
288     *         
289     *    Since crossover and mutation act on individuals in the recombination pool (which in fact are clones of individuals from the previous generation), the GlobalTraceMap
290     *    is populated with values given by the mappings in the GlobalCloneMap, so that the original parent for each child is known.
291     *   
292     *  3) GlobalFragmentMap
293     *     Keeps information in the form of Key-Value pairs about an individual and its respective fragment
294     *     (ie., the subtree received via crossover or created/altered by mutation)
295     * */
296    public override IOperation Apply() {
297      UpdateCounter.Value++;
298      if (UpdateCounter.Value == UpdateInterval.Value) {
299        UpdateCounter.Value = 0; // reset counter
300        if (Generations.Value > 0) {
301          InitializeParameters();
302
303          CalculateFragmentStatistics();
304
305          // save the current generation so it can be compared to the following one, next time the analyzer is applied
306          SecondaryTraceMap = (TraceMapType)GlobalTraceMap.Clone();
307          SecondaryTraceMap.Clear();
308          foreach (var m in GlobalTraceMap)
309            SecondaryTraceMap.Add(m.Key, m.Value);
310
311          SecondaryCloneMap.Clear();
312          foreach (var m in GlobalCloneMap)
313            SecondaryCloneMap.Add(m.Key, m.Value);
314
315          SecondaryFragmentMap.Clear();
316          foreach (var m in GlobalFragmentMap)
317            SecondaryFragmentMap.Add(m.Key, m.Value);
318        }
319      }
320      if (Generations.Value > 0)
321        if (this.Successor == null || !(this.Successor is SymbolicExpressionTreeGenealogyAnalyzer)) {
322          // clear the global maps to save memory
323          GlobalCloneMap.Clear();
324          GlobalTraceMap.Clear();
325          GlobalFragmentMap.Clear();
326        }
327      return base.Apply();
328    }
329
330    private void InitializeParameters() {
331      #region Init parameters
332      var results = ResultsParameter.ActualValue;
333
334      if (FragmentStatistics == null) {
335        FragmentStatisticsParameter.ActualValue = new DataTable("Fragment Statistics", "Statistical measurements of fragments aggregated over the whole population");
336        FragmentStatistics.VisualProperties.YAxisTitle = "Fragment length/Similarities";
337        results.Add(new Result("Fragment Statistics", FragmentStatistics));
338      }
339      if (FragmentLengths == null) {
340        FragmentLengthsParameter.ActualValue = new DataTable("Fragment Lengths", "Histograms for the distribution of fragment and individual lengths");
341        FragmentLengths.VisualProperties.YAxisTitle = "Frequency";
342        results.Add(new Result("Fragment Lengths", FragmentLengths));
343      }
344      if (FragmentFrequencies == null) {
345        FragmentFrequenciesParameter.ActualValue = new DataTable("Fragment Frequencies", "Frequencies of good and high impact fragments");
346        FragmentFrequencies.VisualProperties.YAxisTitle = "Frequency";
347        results.Add(new Result("Fragment Frequencies", FragmentFrequencies));
348      }
349      if (ParentsDiversity == null) {
350        ParentsDiversityParameter.ActualValue = new DataTable("Parents Diversity", "Number of unique parents per generation");
351        ParentsDiversity.VisualProperties.YAxisTitle = "Unique parents";
352        results.Add(new Result("Parents Diversity", ParentsDiversity));
353      }
354      if (SecondaryTraceMap == null) {
355        SecondaryTraceMap = new TraceMapType();
356      }
357      if (SecondaryCloneMap == null) {
358        SecondaryCloneMap = new CloneMapType();
359      }
360      if (SecondaryFragmentMap == null) {
361        SecondaryFragmentMap = new CloneMapType();
362      }
363      #endregion
364    }
365
366    private void InitializeRows() {
367      if (!FragmentStatistics.Rows.ContainsKey("Parent lengths (good)")) {
368        FragmentStatistics.Rows.Add(new DataRow("Parent lengths (good)") {
369          VisualProperties = { StartIndexZero = true }
370        });
371      }
372      if (!FragmentStatistics.Rows.ContainsKey("Parent lengths (all)")) {
373        FragmentStatistics.Rows.Add(new DataRow("Parent lengths (all)") {
374          VisualProperties = { StartIndexZero = true }
375        });
376      }
377      if (!FragmentStatistics.Rows.ContainsKey("Child lengths (good)")) {
378        FragmentStatistics.Rows.Add(new DataRow("Child lengths (good)") {
379          VisualProperties = { StartIndexZero = true }
380        });
381      }
382      if (!FragmentStatistics.Rows.ContainsKey("Child lengths (all)")) {
383        FragmentStatistics.Rows.Add(new DataRow("Child lengths (all)") {
384          VisualProperties = { StartIndexZero = true }
385        });
386      }
387      if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (good)")) {
388        FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (good)") {
389          VisualProperties = { StartIndexZero = true }
390        });
391      }
392      if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (all)")) {
393        FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (all)") {
394          VisualProperties = { StartIndexZero = true }
395        });
396      }
397      if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (high impact)")) {
398        FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (high impact)") {
399          VisualProperties = { StartIndexZero = true }
400        });
401      }
402      if (!FragmentStatistics.Rows.ContainsKey("Avg visitation length (all children)")) {
403        FragmentStatistics.Rows.Add(new DataRow("Avg visitation length (all children)") {
404          VisualProperties = { StartIndexZero = true }
405        });
406      }
407      if (!FragmentStatistics.Rows.ContainsKey("Avg visitation length (good children)")) {
408        FragmentStatistics.Rows.Add(new DataRow("Avg visitation length (good children)") {
409          VisualProperties = { StartIndexZero = true }
410        });
411      }
412      // exact similarity
413      if (!FragmentStatistics.Rows.ContainsKey("Exact similarity (good fragments)")) {
414        FragmentStatistics.Rows.Add(new DataRow("Exact similarity (good fragments)") {
415          VisualProperties = { StartIndexZero = true }
416        });
417      }
418      if (!FragmentStatistics.Rows.ContainsKey("Exact similarity (all fragments)")) {
419        FragmentStatistics.Rows.Add(new DataRow("Exact similarity (all fragments)") {
420          VisualProperties = { StartIndexZero = true }
421        });
422      }
423      if (!FragmentFrequencies.Rows.ContainsKey("Frequency of useful fragments")) {
424        FragmentFrequencies.Rows.Add(new DataRow("Frequency of useful fragments") {
425          VisualProperties = { StartIndexZero = true }
426        });
427      }
428      if (!FragmentFrequencies.Rows.ContainsKey("Frequency of high impact fragments")) {
429        FragmentFrequencies.Rows.Add(new DataRow("Frequency of high impact fragments") {
430          VisualProperties = { StartIndexZero = true }
431        });
432      }
433      if (!ParentsDiversity.Rows.ContainsKey("Unique parents")) {
434        ParentsDiversity.Rows.Add(new DataRow("Unique parents") {
435          VisualProperties = { StartIndexZero = true }
436        });
437      }
438      double scaleFactor = 1.0;
439      if (!FragmentLengths.Rows.ContainsKey("All fragments length distribution"))
440        FragmentLengths.Rows.Add(new DataRow("All fragments length distribution") {
441          VisualProperties = {
442            StartIndexZero = true,
443            ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
444            ScaleFactor = scaleFactor
445          }
446        });
447      if (!FragmentLengths.Rows.ContainsKey("Useful fragments length distribution"))
448        FragmentLengths.Rows.Add(new DataRow("Useful fragments length distribution") {
449          VisualProperties = {
450            StartIndexZero = true,
451            ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
452            ScaleFactor = scaleFactor
453          }
454        });
455      if (!FragmentLengths.Rows.ContainsKey("All children length distribution"))
456        FragmentLengths.Rows.Add(new DataRow("All children length distribution") {
457          VisualProperties = {
458            StartIndexZero = true,
459            ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
460            ScaleFactor = scaleFactor
461          }
462        });
463      if (!FragmentLengths.Rows.ContainsKey("Useful children length distribution"))
464        FragmentLengths.Rows.Add(new DataRow("Useful children length distribution") {
465          VisualProperties = {
466            StartIndexZero = true,
467            ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
468            ScaleFactor = scaleFactor
469          }
470        });
471    }
472
473    private void CalculateFragmentStatistics() {
474      // for this kind of tracking/analysis, we need at least 2 successive generations of crossover
475      if (Generations.Value > 1) {
476        InitializeRows();
477
478        #region Fragment Statistics
479        // create a hashset for fast lookup of parents (to see which children from the previous generation become parents, and count their fragments)
480        var parentsLookup = new HashSet<ISymbolicExpressionTree>(GlobalTraceMap.Values.SelectMany(x => x.Cast<ISymbolicExpressionTree>()));
481        var allParents = SecondaryTraceMap.Values.SelectMany(x => x.Cast<ISymbolicExpressionTree>()).ToList();
482        var allChildren = SecondaryFragmentMap.Keys.Cast<ISymbolicExpressionTree>().ToList(); /* SecondaryTraceMap.Keys should be the same with SecondaryFragmentMap.Keys */
483        var allFragments = SecondaryFragmentMap.Values.Cast<SymbolicExpressionTreeNodeItem>().Select(x => x.Content).Where(x => x != null).ToList();
484        // "good" fragments are fragments contained in a surviving offspring (that became a parent)
485        // "good" parents are those that produced a surviving offspring
486        // "good" children are the surviving offspring
487        var goodFragments = new List<ISymbolicExpressionTreeNode>();
488        var goodParents = new List<ISymbolicExpressionTree>();
489        var goodChildren = new List<ISymbolicExpressionTree>();
490
491        // take all individuals that have received a fragment in the previous generation
492        foreach (var m in SecondaryFragmentMap) {
493          var individual = m.Key as ISymbolicExpressionTree;
494          // check if the individual became a parent in the current generation,
495          // by checking if it appears among the parents from the current generation trace map
496          if (parentsLookup.Contains(individual)) {
497            var symbExprTreeNodeItem = m.Value as SymbolicExpressionTreeNodeItem;
498            var fragment = symbExprTreeNodeItem.Content;
499            if (fragment != null) {
500              goodFragments.Add(fragment);
501              goodParents.AddRange(SecondaryTraceMap[individual].Cast<ISymbolicExpressionTree>());
502              goodChildren.Add(individual);
503            }
504          }
505        }
506
507        if (false) {
508          var highImpactFragments = new List<ISymbolicExpressionTreeNode>();
509          foreach (var child in goodChildren) {
510            var impactValues = calculator.CalculateImpactValues(child, SymbolicExpressionInterpreter, SymbolicRegressionProblemData, 0, 0);
511            var fragmentItem = SecondaryFragmentMap[child] as SymbolicExpressionTreeNodeItem;
512            var fragment = fragmentItem.Content;
513            if (fragment != null && impactValues[fragment] > 0.1) {
514              if (highImpactFragments.Contains(fragment, new SymbolicExpressionTreeNodeSimilarityComparer(SimilarityLevel.Exact)))
515                continue;
516              // prune fragment (remove irrelevant/low impact nodes)
517              // this works because a child will never have an impact value greater than its parent
518              foreach (var fnode in fragment.IterateNodesBreadth().Where(x => x.SubtreeCount > 0)) {
519                for (int i = 0; i < fnode.SubtreeCount; ++i) {
520                  var subtree = fnode.GetSubtree(i);
521                  if (impactValues[subtree] < 0.1)
522                    fnode.RemoveSubtree(i);
523                }
524              }
525              highImpactFragments.Add(fragment);
526            }
527          }
528        }
529
530        FragmentFrequencies.Rows["Frequency of useful fragments"].Values.Add(goodFragments.Count);
531        //        FragmentFrequencies.Rows["Frequency of high impact fragments"].Values.Add(highImpactFragments.Count);
532        FragmentStatistics.Rows["Fragment lengths (good)"].Values.Add(goodFragments.Average(x => x.GetLength()));
533        FragmentStatistics.Rows["Fragment lengths (all)"].Values.Add(allFragments.Average(x => x.GetLength()));
534        //        double avg = highImpactFragments.Count > 0 ? highImpactFragments.Average(x => x.GetLength()) : 0;
535        //        FragmentStatistics.Rows["Fragment lengths (high impact)"].Values.Add(avg);
536        FragmentStatistics.Rows["Parent lengths (good)"].Values.Add(goodParents.Average(x => x.Length));
537        FragmentStatistics.Rows["Parent lengths (all)"].Values.Add(allParents.Average(x => x.Length));
538        FragmentStatistics.Rows["Child lengths (good)"].Values.Add(goodChildren.Average(x => x.Length));
539        FragmentStatistics.Rows["Child lengths (all)"].Values.Add(allChildren.Average(x => x.Length));
540        //        FragmentStatistics.Rows["Avg visitation length (all children)"].Values.Add(allChildren.Average(x => VisitationLength(x)));
541        //        FragmentStatistics.Rows["Avg visitation length (good children)"].Values.Add(goodChildren.Average(x => VisitationLength(x)));
542        FragmentLengths.Rows["All fragments length distribution"].Values.Replace(allFragments.Select(x => (double)x.GetLength()));
543        FragmentLengths.Rows["Useful fragments length distribution"].Values.Replace(goodFragments.Select(x => (double)x.GetLength()));
544        FragmentLengths.Rows["All children length distribution"].Values.Replace(allChildren.Select(x => (double)x.Length));
545        FragmentLengths.Rows["Useful children length distribution"].Values.Replace(goodChildren.Select(x => (double)x.Length));
546        ParentsDiversity.Rows["Unique parents"].Values.Add(SecondaryCloneMap.Values.GroupBy(x => x).Count());
547        #endregion
548      }
549    }
550
551    private static int VisitationLength(ISymbolicExpressionTree tree) {
552      return VisitationLength(tree.Root);
553    }
554
555    private static int VisitationLength(ISymbolicExpressionTreeNode node) {
556      return node.IterateNodesBreadth().Sum(n => n.GetLength());
557    }
558
559    #region Similarity computations
560    /// <summary>
561    /// Provide a measure of how similar the fragments that get passed by crossover from one generation to the next are.
562    /// </summary>
563    /// <param name="fragments">The symbolic expression tree fragments</param>
564    /// <param name="mode">The similarity mode (0 - exact, 1 - high, 2 - relaxed)</param>
565    /// <returns>The average number of similar fragments</returns>
566    private static double CalculateSimilarity(IList<ISymbolicExpressionTreeNode> fragments, SimilarityLevel similarity) {
567      var visited = new bool[fragments.Count];
568      int groups = 0;
569      for (int i = 0; i != fragments.Count - 1; ++i) {
570        if (visited[i]) continue;
571        for (int j = i + 1; j != fragments.Count; ++j) {
572          if (visited[j]) continue;
573          if (fragments[i].IsSimilarTo(fragments[j], similarity))
574            visited[j] = true;
575        }
576        ++groups;
577      }
578      return (double)fragments.Count / groups;
579    }
580    #endregion
581  } //SymbolicExpressionTreeFragmentsAnalyzer
582}
Note: See TracBrowser for help on using the repository browser.