Free cookie consent management tool by TermsFeed Policy Generator

Changeset 9082 for branches


Ignore:
Timestamp:
12/20/12 00:43:35 (12 years ago)
Author:
bburlacu
Message:

#1772: Renamed and refactored genealogy graph components. Added SymbolGraph and FPGraph (frequent pattern graph). Added evolvability and genetic operator average improvement analyzer.

Location:
branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4
Files:
18 added
6 deleted
5 edited
2 copied

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Analyzers/SymbolicExpressionTreeFragmentLengthsAnalyzer.cs

    r8557 r9082  
    2525using HeuristicLab.Common;
    2626using HeuristicLab.Core;
    27 using HeuristicLab.Data;
    2827using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    29 using HeuristicLab.Operators;
    3028using HeuristicLab.Optimization;
    3129using HeuristicLab.Parameters;
    3230using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    33 using HeuristicLab.Problems.DataAnalysis;
    34 using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3531// type definitions for convenience
    36 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
    3732using CloneMapType = HeuristicLab.Core.ItemDictionary<HeuristicLab.Core.IItem, HeuristicLab.Core.IItem>;
    38 using SymbolicExpressionTreeNodeItem = HeuristicLab.EvolutionaryTracking.GenericWrapper<HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ISymbolicExpressionTreeNode>;
    3933using TraceMapType = HeuristicLab.Core.ItemDictionary<HeuristicLab.Core.IItem, HeuristicLab.Core.IItemList<HeuristicLab.Core.IItem>>;
    4034
     
    4438  /// (Tracking of genetic variance and heredity)
    4539  /// </summary>
    46   [Item("SymbolicExpressionTreeFragmentsAnalyzer", "An operator that provides statistics about crossover fragments")]
     40  [Item("SymbolicExpressionTreeFragmentLengthAnalyzer", "An operator that provides statistics about fragment lengths")]
    4741  [StorableClass]
    48   public sealed class SymbolicExpressionTreeFragmentsAnalyzer : SingleSuccessorOperator, IAnalyzer {
    49     #region Parameter names
    50     private const string UpdateIntervalParameterName = "UpdateInterval";
    51     private const string UpdateCounterParameterName = "UpdateCounter";
    52     private const string ResultsParameterName = "Results";
    53     private const string SecondaryTraceMapParameterName = "SecondaryTraceMap";
    54     private const string SecondaryFragmentMapParameterName = "SecondaryFragmentMap";
    55     private const string SecondaryCloneMapParameterName = "SecondaryCloneMap";
    56     private const string GlobalTraceMapParameterName = "GlobalTraceMap";
    57     private const string GlobalCloneMapParameterName = "GlobalCloneMap";
    58     private const string GlobalFragmentMapParameterName = "GlobalFragmentMap";
    59     private const string GenerationsParameterName = "Generations";
    60     private const string SymbolicExpressionInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
    61     private const string SymbolicRegressionProblemDataParameterName = "ProblemData";
    62     private const string FragmentStatisticsParameterName = "FragmentStatistics";
    63     private const string FragmentLengthsDistributionParametersName = "FragmentLengths";
    64     private const string FragmentFrequenciesParameterName = "FragmentFrequencies";
    65     private const string ParentsDiversityParameterName = "ParentsDiversity";
    66     #endregion
    67 
     42  public sealed class SymbolicExpressionTreeFragmentLengthAnalyzer : SymbolicExpressionTreeFragmentsAnalyzer {
     43    private const string FragmentLengthsParameterName = "Fragment Lengths";
    6844    // impact values calculator
    69     private readonly SymbolicRegressionSolutionValuesCalculator calculator = new SymbolicRegressionSolutionValuesCalculator();
    70 
    71     #region Parameter properties
    72     public ValueParameter<IntValue> UpdateIntervalParameter {
    73       get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
    74     }
    75     public ValueParameter<IntValue> UpdateCounterParameter {
    76       get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
    77     }
    78     public LookupParameter<ResultCollection> ResultsParameter {
    79       get { return (LookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
    80     }
    81     public LookupParameter<IntValue> GenerationsParameter {
    82       get { return (LookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
    83     }
    84     // tracking structures
    85     public LookupParameter<TraceMapType> GlobalTraceMapParameter {
    86       get { return (LookupParameter<TraceMapType>)Parameters[GlobalTraceMapParameterName]; }
    87     }
    88     public LookupParameter<CloneMapType> GlobalCloneMapParameter {
    89       get { return (LookupParameter<CloneMapType>)Parameters[GlobalCloneMapParameterName]; }
    90     }
    91     public LookupParameter<CloneMapType> GlobalFragmentMapParameter {
    92       get { return (LookupParameter<CloneMapType>)Parameters[GlobalFragmentMapParameterName]; }
    93     }
    94     public LookupParameter<DataTable> FragmentFrequenciesParameter {
    95       get { return (LookupParameter<DataTable>)Parameters[FragmentFrequenciesParameterName]; }
    96     }
    97     public LookupParameter<DataTable> FragmentStatisticsParameter {
    98       get { return (LookupParameter<DataTable>)Parameters[FragmentStatisticsParameterName]; }
    99     }
    100     public LookupParameter<DataTable> FragmentLengthsParameter {
    101       get { return (LookupParameter<DataTable>)Parameters[FragmentLengthsDistributionParametersName]; }
    102     }
    103     public LookupParameter<DataTable> ParentsDiversityParameter {
    104       get { return (LookupParameter<DataTable>)Parameters[ParentsDiversityParameterName]; }
    105     }
    106     // auxiliary structures for tracking fragments and individuals from the previous generation
    107     // (this is needed because tracking is done in connection to the current generation, i.e., for
    108     // counting "successful" offspring and fragments
    109     public LookupParameter<TraceMapType> SecondaryTraceMapParameter {
    110       get { return (LookupParameter<TraceMapType>)Parameters[SecondaryTraceMapParameterName]; }
    111     }
    112     public LookupParameter<CloneMapType> SecondaryFragmentMapParameter {
    113       get { return (LookupParameter<CloneMapType>)Parameters[SecondaryFragmentMapParameterName]; }
    114     }
    115     public LookupParameter<CloneMapType> SecondaryCloneMapParameter {
    116       get { return (LookupParameter<CloneMapType>)Parameters[SecondaryCloneMapParameterName]; }
    117     }
    118     // problem data, interpreter and evaluator
    119     public LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter> SymbolicExpressionInterpreterParameter {
    120       get { return (LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicExpressionInterpreterParameterName]; }
    121     }
    122     public LookupParameter<RegressionProblemData> SymbolicRegressionProblemDataParameter {
    123       get { return (LookupParameter<RegressionProblemData>)Parameters[SymbolicRegressionProblemDataParameterName]; }
    124     }
    125     #endregion
    126 
    127     #region Parameters
    128     public bool EnabledByDefault {
    129       get { return true; }
    130     }
    131     public IntValue UpdateInterval {
    132       get { return UpdateIntervalParameter.Value; }
    133     }
    134     public IntValue UpdateCounter {
    135       get { return UpdateCounterParameter.Value; }
    136     }
    137     public ResultCollection Results {
    138       get { return ResultsParameter.ActualValue; }
    139     }
    140     public CloneMapType GlobalCloneMap {
    141       get { return GlobalCloneMapParameter.ActualValue; }
    142     }
    143     public TraceMapType GlobalTraceMap {
    144       get { return GlobalTraceMapParameter.ActualValue; }
    145     }
    146     public TraceMapType SecondaryTraceMap {
    147       get { return SecondaryTraceMapParameter.ActualValue; }
    148       set { SecondaryTraceMapParameter.ActualValue = value; }
    149     }
    150     public CloneMapType SecondaryFragmentMap {
    151       get { return SecondaryFragmentMapParameter.ActualValue; }
    152       set { SecondaryFragmentMapParameter.ActualValue = value; }
    153     }
    154     public CloneMapType SecondaryCloneMap {
    155       get { return SecondaryCloneMapParameter.ActualValue; }
    156       set { SecondaryCloneMapParameter.ActualValue = value; }
    157     }
    158     public CloneMapType GlobalFragmentMap {
    159       get { return GlobalFragmentMapParameter.ActualValue; }
    160     }
    161     public IntValue Generations {
    162       get { return GenerationsParameter.ActualValue; }
    163     }
    164     public DataTable FragmentStatistics {
    165       get { return FragmentStatisticsParameter.ActualValue; }
    166     }
     45    public ILookupParameter<DataTable> FragmentLengthsParameter { get { return (ILookupParameter<DataTable>)Parameters[FragmentLengthsParameterName]; } }
    16746    public DataTable FragmentLengths {
    16847      get { return FragmentLengthsParameter.ActualValue; }
     48      set { FragmentLengthsParameter.ActualValue = value; }
    16949    }
    170     public DataTable FragmentFrequencies {
    171       get { return FragmentFrequenciesParameter.ActualValue; }
    172     }
    173     public DataTable ParentsDiversity {
    174       get { return ParentsDiversityParameter.ActualValue; }
    175     }
    176     public SymbolicDataAnalysisExpressionTreeInterpreter SymbolicExpressionInterpreter {
    177       get { return SymbolicExpressionInterpreterParameter.ActualValue; }
    178     }
    179     public RegressionProblemData SymbolicRegressionProblemData {
    180       get { return SymbolicRegressionProblemDataParameter.ActualValue; }
    181     }
    182     #endregion
    18350
    18451    [StorableConstructor]
    185     private SymbolicExpressionTreeFragmentsAnalyzer(bool deserializing) : base(deserializing) { }
    186     private SymbolicExpressionTreeFragmentsAnalyzer(SymbolicExpressionTreeFragmentsAnalyzer original, Cloner cloner) : base(original, cloner) { }
     52    private SymbolicExpressionTreeFragmentLengthAnalyzer(bool deserializing) : base(deserializing) { }
     53    private SymbolicExpressionTreeFragmentLengthAnalyzer(SymbolicExpressionTreeFragmentLengthAnalyzer original, Cloner cloner) : base(original, cloner) { }
    18754    public override IDeepCloneable Clone(Cloner cloner) {
    188       return new SymbolicExpressionTreeFragmentsAnalyzer(this, cloner);
     55      return new SymbolicExpressionTreeFragmentLengthAnalyzer(this, cloner);
    18956    }
    190     public SymbolicExpressionTreeFragmentsAnalyzer()
     57    public SymbolicExpressionTreeFragmentLengthAnalyzer()
    19158      : base() {
    19259      #region Add parameters
    193       // analyzer update counter and update interval
    194       Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
    195       Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
    196       // tracking
    197       Parameters.Add(new LookupParameter<TraceMapType>(GlobalTraceMapParameterName, "A global cache containing the whole genealogy."));
    198       Parameters.Add(new LookupParameter<CloneMapType>(GlobalCloneMapParameterName, "A global map keeping track of trees and their clones (made during selection)."));
    199       Parameters.Add(new LookupParameter<CloneMapType>(GlobalFragmentMapParameterName, "A global map keeping track of tree fragments received via crossover."));
    200       // secondary tracking
    201       Parameters.Add(new LookupParameter<TraceMapType>(SecondaryTraceMapParameterName, "Parameter to store the trace map from the previous generation"));
    202       Parameters.Add(new LookupParameter<CloneMapType>(SecondaryCloneMapParameterName, "Parameter to store the clone map from the previous generation"));
    203       Parameters.Add(new LookupParameter<CloneMapType>(SecondaryFragmentMapParameterName, "Parameter to store the fragment map from the previous generation"));
    204       // fragment statistics parameters
    205       Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
    206       Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations so far."));
    207       Parameters.Add(new LookupParameter<DataTable>(FragmentStatisticsParameterName, "The data table to store the fragment statistics."));
    208       Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsDistributionParametersName, "The data table to store the distribution of fragment lengths."));
    209       Parameters.Add(new LookupParameter<DataTable>(FragmentFrequenciesParameterName, "A data structure for fragment frequencies"));
    210       Parameters.Add(new LookupParameter<DataTable>(ParentsDiversityParameterName, "A data structure to store the diversity of the parents per generation."));
    211       // impact calculation
    212       Parameters.Add(new LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicExpressionInterpreterParameterName, "Interpreter for symbolic expression trees"));
    213       Parameters.Add(new LookupParameter<RegressionProblemData>(SymbolicRegressionProblemDataParameterName, "The symbolic data analysis problem."));
     60      Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsParameterName));
    21461      #endregion
    21562
    21663      UpdateCounterParameter.Hidden = true;
    21764      UpdateIntervalParameter.Hidden = true;
     65
     66
    21867    }
    21968    #region After deserialization code
     69
    22070    [StorableHook(HookType.AfterDeserialization)]
    22171    private void AfterDeserialization() {
    222       // check if all the parameters are present and accounted for
    223       if (!Parameters.ContainsKey(UpdateIntervalParameterName)) {
    224         Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
    225       }
    226       if (!Parameters.ContainsKey(UpdateCounterParameterName)) {
    227         Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
    228         UpdateCounterParameter.Hidden = true;
    229       }
    230       if (!Parameters.ContainsKey(FragmentStatisticsParameterName)) {
    231         Parameters.Add(new LookupParameter<DataTable>(FragmentStatisticsParameterName, "The data table to store the fragment statistics."));
    232       }
    233       if (!Parameters.ContainsKey(FragmentLengthsDistributionParametersName)) {
    234         Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsDistributionParametersName, "The data table to store the distribution of fragment lengths."));
    235       }
    236       if (!Parameters.ContainsKey(FragmentFrequenciesParameterName)) {
    237         Parameters.Add(new LookupParameter<ItemList<ItemList<IItem>>>(FragmentFrequenciesParameterName, "A data structure for fragment frequencies"));
    238       }
    239       if (!Parameters.ContainsKey(ParentsDiversityParameterName)) {
    240         Parameters.Add(new LookupParameter<DataTable>(ParentsDiversityParameterName, "A data table to store the diversity of the parents per generation."));
    241       }
    24272    }
     73
    24374    #endregion
    24475    #region IStatefulItem members
     
    25485    #endregion
    25586
    256     /* A small description of the way this analyzer works
    257      *
    258      * We keep 3 maps in the global scope: the GlobalTraceMap, the GlobalFragmentMap and the GlobalCloneMap that are updated on each step: selection, crossover, mutation
    259      * These maps provide information about the children, parents and transferred fragments as follows:
    260      *
    261      * 1) GlobalCloneMap
    262      *    This data structure provides the basis for tracking. In the selection step, every selected individual gets cloned and transferred into the recombination pool.
    263      *    The GlobalCloneMap keeps track of clones by mapping them to the original individuals. This is an essential step for building the genealogy graph. Using this
    264      *    structure we can find out how many times each individual was selected.
    265      *    The GlobalCloneMap consists of Key-Value pairs Clone->Original
    266      *
    267      * 2) GlobalTraceMap
    268      *    Keeps information in the form of key-value pairs Child->{Parents}, where {Parents} is a set consisting of
    269      *      - one parent individual (in the case of mutation), by language abuse we say that the individual that mutation acted on is the parent,
    270      *        and the resulting individual is the child
    271      *      - two parent individuals (in the case of crossover), named Parent0 and Parent1. Parent0 is the individual that crossover acts on by replacing
    272      *        one of its subtrees with a compatible subtree, randomly selected from Parent1
    273      *
    274      *    CROSSOVER
    275      *        Parent0     Parent1                                                                    {Parents}
    276      *           \           /                                                                           ^
    277      *            \       fragment (inserted from Parent1 into Parent0 to create the Child)              ^    [Mapping provided by the GlobalTraceMap]
    278      *             \       /                                                                             ^
    279      *              \     /                                                                              ^
    280      *               Child                                                                             Child
    281      *               
    282      *    MUTATION
    283      *        Parent0
    284      *           |
    285      *           |    [mutation acts on a random node/subtree in the 'parent' individual
    286      *           |
    287      *         Child
    288      *         
    289      *    Since crossover and mutation act on individuals in the recombination pool (which in fact are clones of individuals from the previous generation), the GlobalTraceMap
    290      *    is populated with values given by the mappings in the GlobalCloneMap, so that the original parent for each child is known.
    291      *   
    292      *  3) GlobalFragmentMap
    293      *     Keeps information in the form of Key-Value pairs about an individual and its respective fragment
    294      *     (ie., the subtree received via crossover or created/altered by mutation)
    295      * */
    29687    public override IOperation Apply() {
    29788      UpdateCounter.Value++;
    29889      if (UpdateCounter.Value == UpdateInterval.Value) {
    29990        UpdateCounter.Value = 0; // reset counter
    300         if (Generations.Value > 0) {
     91        if (Generations.Value > 1) {
     92          SecondaryTraceMap = SecondaryTraceMap ?? new TraceMapType();
     93          SecondaryFragmentMap = SecondaryFragmentMap ?? new CloneMapType();
     94          SecondaryCloneMap = SecondaryCloneMap ?? new CloneMapType();
     95
    30196          InitializeParameters();
     97          InitializeRows();
    30298
    303           CalculateFragmentStatistics();
     99          var parents = SecondaryTraceMap.Values.SelectMany(list => list.Cast<ISymbolicExpressionTree>()).ToList(); // parents of individuals at generation N-1
     100          var offspring = SecondaryTraceMap.Keys.Cast<ISymbolicExpressionTree>().ToList(); // individuals at generation N-1
     101          var fragments = SecondaryFragmentMap.Values.Cast<IFragment>().ToList();
    304102
    305           // save the current generation so it can be compared to the following one, next time the analyzer is applied
    306           SecondaryTraceMap = (TraceMapType)GlobalTraceMap.Clone();
    307           SecondaryTraceMap.Clear();
    308           foreach (var m in GlobalTraceMap)
    309             SecondaryTraceMap.Add(m.Key, m.Value);
    310103
    311           SecondaryCloneMap.Clear();
    312           foreach (var m in GlobalCloneMap)
    313             SecondaryCloneMap.Add(m.Key, m.Value);
     104          // a hash of the offspring trees that got selected for reproduction at generation N
     105          var selected = new HashSet<ISymbolicExpressionTree>(GlobalTraceMap.Values.SelectMany(list => list.Cast<ISymbolicExpressionTree>()));
    314106
    315           SecondaryFragmentMap.Clear();
    316           foreach (var m in GlobalFragmentMap)
    317             SecondaryFragmentMap.Add(m.Key, m.Value);
     107          var selectedOffspringFragments = SecondaryFragmentMap.Where(m => selected.Contains((ISymbolicExpressionTree)m.Key)).Select(m => m.Key).ToList();
     108
     109          var selectedOffspring = offspring.Where(selected.Contains).ToList();
     110          var selectedOffspringParents = selectedOffspring.SelectMany(x => SecondaryTraceMap[x].Cast<ISymbolicExpressionTree>()).ToList();
     111
     112          var rows = FragmentLengths.Rows;
     113
     114          double avgParentsLength = parents.Count > 0 ? parents.Average(p => p.Length) : 0;
     115          double avgGoodParentsLength = selectedOffspringParents.Count > 0 ? selectedOffspringParents.Average(p => p.Length) : 0;
     116          rows["Parent lengths (all)"].Values.Add(avgParentsLength);
     117          rows["Parent lengths (good)"].Values.Add(avgGoodParentsLength);
     118          double avgOffspringLength = offspring.Count > 0 ? offspring.Average(o => o.Length) : 0;
     119          double avgGoodOffspringLength = selectedOffspring.Count > 0 ? selectedOffspring.Average(o => o.Length) : 0;
     120          rows["Offspring lengths (good)"].Values.Add(avgOffspringLength);
     121          rows["Offspring lengths (all)"].Values.Add(avgGoodOffspringLength);
     122          double avgFragmentLength = fragments.Count > 0 ? fragments.Average(f => f.Length) : 0;
     123          double avgGoodFragmentLength = selectedOffspringFragments.Count > 0 ? fragments.Average(f => f.Length) : 0;
     124          rows["Fragment lengths (good)"].Values.Add(avgGoodFragmentLength);
     125          rows["Fragment lengths (all)"].Values.Add(avgFragmentLength);
    318126        }
    319127      }
    320       if (Generations.Value > 0)
    321         if (this.Successor == null || !(this.Successor is SymbolicExpressionTreeGenealogyAnalyzer)) {
    322           // clear the global maps to save memory
    323           GlobalCloneMap.Clear();
    324           GlobalTraceMap.Clear();
    325           GlobalFragmentMap.Clear();
    326         }
     128
    327129      return base.Apply();
    328130    }
    329131
    330     private void InitializeParameters() {
    331       #region Init parameters
     132    protected override void InitializeParameters() {
    332133      var results = ResultsParameter.ActualValue;
    333 
    334       if (FragmentStatistics == null) {
    335         FragmentStatisticsParameter.ActualValue = new DataTable("Fragment Statistics", "Statistical measurements of fragments aggregated over the whole population");
    336         FragmentStatistics.VisualProperties.YAxisTitle = "Fragment length/Similarities";
    337         results.Add(new Result("Fragment Statistics", FragmentStatistics));
    338       }
    339       if (FragmentLengths == null) {
    340         FragmentLengthsParameter.ActualValue = new DataTable("Fragment Lengths", "Histograms for the distribution of fragment and individual lengths");
    341         FragmentLengths.VisualProperties.YAxisTitle = "Frequency";
     134      if (!results.ContainsKey("Fragment Lengths")) {
     135        FragmentLengths = FragmentLengths ?? new DataTable("Fragment Lengths") { VisualProperties = { YAxisTitle = "Fragment length" } };
    342136        results.Add(new Result("Fragment Lengths", FragmentLengths));
    343137      }
    344       if (FragmentFrequencies == null) {
    345         FragmentFrequenciesParameter.ActualValue = new DataTable("Fragment Frequencies", "Frequencies of good and high impact fragments");
    346         FragmentFrequencies.VisualProperties.YAxisTitle = "Frequency";
    347         results.Add(new Result("Fragment Frequencies", FragmentFrequencies));
    348       }
    349       if (ParentsDiversity == null) {
    350         ParentsDiversityParameter.ActualValue = new DataTable("Parents Diversity", "Number of unique parents per generation");
    351         ParentsDiversity.VisualProperties.YAxisTitle = "Unique parents";
    352         results.Add(new Result("Parents Diversity", ParentsDiversity));
    353       }
    354       if (SecondaryTraceMap == null) {
    355         SecondaryTraceMap = new TraceMapType();
    356       }
    357       if (SecondaryCloneMap == null) {
    358         SecondaryCloneMap = new CloneMapType();
    359       }
    360       if (SecondaryFragmentMap == null) {
    361         SecondaryFragmentMap = new CloneMapType();
    362       }
    363       #endregion
     138      base.InitializeParameters();
    364139    }
    365140
    366141    private void InitializeRows() {
    367       if (!FragmentStatistics.Rows.ContainsKey("Parent lengths (good)")) {
    368         FragmentStatistics.Rows.Add(new DataRow("Parent lengths (good)") {
    369           VisualProperties = { StartIndexZero = true }
    370         });
    371       }
    372       if (!FragmentStatistics.Rows.ContainsKey("Parent lengths (all)")) {
    373         FragmentStatistics.Rows.Add(new DataRow("Parent lengths (all)") {
    374           VisualProperties = { StartIndexZero = true }
    375         });
    376       }
    377       if (!FragmentStatistics.Rows.ContainsKey("Child lengths (good)")) {
    378         FragmentStatistics.Rows.Add(new DataRow("Child lengths (good)") {
    379           VisualProperties = { StartIndexZero = true }
    380         });
    381       }
    382       if (!FragmentStatistics.Rows.ContainsKey("Child lengths (all)")) {
    383         FragmentStatistics.Rows.Add(new DataRow("Child lengths (all)") {
    384           VisualProperties = { StartIndexZero = true }
    385         });
    386       }
    387       if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (good)")) {
    388         FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (good)") {
    389           VisualProperties = { StartIndexZero = true }
    390         });
    391       }
    392       if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (all)")) {
    393         FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (all)") {
    394           VisualProperties = { StartIndexZero = true }
    395         });
    396       }
    397       if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (high impact)")) {
    398         FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (high impact)") {
    399           VisualProperties = { StartIndexZero = true }
    400         });
    401       }
    402       if (!FragmentStatistics.Rows.ContainsKey("Avg visitation length (all children)")) {
    403         FragmentStatistics.Rows.Add(new DataRow("Avg visitation length (all children)") {
    404           VisualProperties = { StartIndexZero = true }
    405         });
    406       }
    407       if (!FragmentStatistics.Rows.ContainsKey("Avg visitation length (good children)")) {
    408         FragmentStatistics.Rows.Add(new DataRow("Avg visitation length (good children)") {
    409           VisualProperties = { StartIndexZero = true }
    410         });
    411       }
    412       // exact similarity
    413       if (!FragmentStatistics.Rows.ContainsKey("Exact similarity (good fragments)")) {
    414         FragmentStatistics.Rows.Add(new DataRow("Exact similarity (good fragments)") {
    415           VisualProperties = { StartIndexZero = true }
    416         });
    417       }
    418       if (!FragmentStatistics.Rows.ContainsKey("Exact similarity (all fragments)")) {
    419         FragmentStatistics.Rows.Add(new DataRow("Exact similarity (all fragments)") {
    420           VisualProperties = { StartIndexZero = true }
    421         });
    422       }
    423       if (!FragmentFrequencies.Rows.ContainsKey("Frequency of useful fragments")) {
    424         FragmentFrequencies.Rows.Add(new DataRow("Frequency of useful fragments") {
    425           VisualProperties = { StartIndexZero = true }
    426         });
    427       }
    428       if (!FragmentFrequencies.Rows.ContainsKey("Frequency of high impact fragments")) {
    429         FragmentFrequencies.Rows.Add(new DataRow("Frequency of high impact fragments") {
    430           VisualProperties = { StartIndexZero = true }
    431         });
    432       }
    433       if (!ParentsDiversity.Rows.ContainsKey("Unique parents")) {
    434         ParentsDiversity.Rows.Add(new DataRow("Unique parents") {
    435           VisualProperties = { StartIndexZero = true }
    436         });
    437       }
    438       double scaleFactor = 1.0;
    439       if (!FragmentLengths.Rows.ContainsKey("All fragments length distribution"))
    440         FragmentLengths.Rows.Add(new DataRow("All fragments length distribution") {
    441           VisualProperties = {
    442             StartIndexZero = true,
    443             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    444             ScaleFactor = scaleFactor
    445           }
    446         });
    447       if (!FragmentLengths.Rows.ContainsKey("Useful fragments length distribution"))
    448         FragmentLengths.Rows.Add(new DataRow("Useful fragments length distribution") {
    449           VisualProperties = {
    450             StartIndexZero = true,
    451             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    452             ScaleFactor = scaleFactor
    453           }
    454         });
    455       if (!FragmentLengths.Rows.ContainsKey("All children length distribution"))
    456         FragmentLengths.Rows.Add(new DataRow("All children length distribution") {
    457           VisualProperties = {
    458             StartIndexZero = true,
    459             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    460             ScaleFactor = scaleFactor
    461           }
    462         });
    463       if (!FragmentLengths.Rows.ContainsKey("Useful children length distribution"))
    464         FragmentLengths.Rows.Add(new DataRow("Useful children length distribution") {
    465           VisualProperties = {
    466             StartIndexZero = true,
    467             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    468             ScaleFactor = scaleFactor
    469           }
    470         });
    471     }
    472 
    473     private void CalculateFragmentStatistics() {
    474       // for this kind of tracking/analysis, we need at least 2 successive generations of crossover
    475       if (Generations.Value > 1) {
    476         InitializeRows();
    477 
    478         #region Fragment Statistics
    479         // create a hashset for fast lookup of parents (to see which children from the previous generation become parents, and count their fragments)
    480         var parentsLookup = new HashSet<ISymbolicExpressionTree>(GlobalTraceMap.Values.SelectMany(x => x.Cast<ISymbolicExpressionTree>()));
    481         var allParents = SecondaryTraceMap.Values.SelectMany(x => x.Cast<ISymbolicExpressionTree>()).ToList();
    482         var allChildren = SecondaryFragmentMap.Keys.Cast<ISymbolicExpressionTree>().ToList(); /* SecondaryTraceMap.Keys should be the same with SecondaryFragmentMap.Keys */
    483         var allFragments = SecondaryFragmentMap.Values.Cast<SymbolicExpressionTreeNodeItem>().Select(x => x.Content).Where(x => x != null).ToList();
    484         // "good" fragments are fragments contained in a surviving offspring (that became a parent)
    485         // "good" parents are those that produced a surviving offspring
    486         // "good" children are the surviving offspring
    487         var goodFragments = new List<ISymbolicExpressionTreeNode>();
    488         var goodParents = new List<ISymbolicExpressionTree>();
    489         var goodChildren = new List<ISymbolicExpressionTree>();
    490 
    491         // take all individuals that have received a fragment in the previous generation
    492         foreach (var m in SecondaryFragmentMap) {
    493           var individual = m.Key as ISymbolicExpressionTree;
    494           // check if the individual became a parent in the current generation,
    495           // by checking if it appears among the parents from the current generation trace map
    496           if (parentsLookup.Contains(individual)) {
    497             var symbExprTreeNodeItem = m.Value as SymbolicExpressionTreeNodeItem;
    498             var fragment = symbExprTreeNodeItem.Content;
    499             if (fragment != null) {
    500               goodFragments.Add(fragment);
    501               goodParents.AddRange(SecondaryTraceMap[individual].Cast<ISymbolicExpressionTree>());
    502               goodChildren.Add(individual);
    503             }
    504           }
    505         }
    506 
    507         if (false) {
    508           var highImpactFragments = new List<ISymbolicExpressionTreeNode>();
    509           foreach (var child in goodChildren) {
    510             var impactValues = calculator.CalculateImpactValues(child, SymbolicExpressionInterpreter, SymbolicRegressionProblemData, 0, 0);
    511             var fragmentItem = SecondaryFragmentMap[child] as SymbolicExpressionTreeNodeItem;
    512             var fragment = fragmentItem.Content;
    513             if (fragment != null && impactValues[fragment] > 0.1) {
    514               if (highImpactFragments.Contains(fragment, new SymbolicExpressionTreeNodeSimilarityComparer(SimilarityLevel.Exact)))
    515                 continue;
    516               // prune fragment (remove irrelevant/low impact nodes)
    517               // this works because a child will never have an impact value greater than its parent
    518               foreach (var fnode in fragment.IterateNodesBreadth().Where(x => x.SubtreeCount > 0)) {
    519                 for (int i = 0; i < fnode.SubtreeCount; ++i) {
    520                   var subtree = fnode.GetSubtree(i);
    521                   if (impactValues[subtree] < 0.1)
    522                     fnode.RemoveSubtree(i);
    523                 }
    524               }
    525               highImpactFragments.Add(fragment);
    526             }
    527           }
    528         }
    529 
    530         FragmentFrequencies.Rows["Frequency of useful fragments"].Values.Add(goodFragments.Count);
    531         //        FragmentFrequencies.Rows["Frequency of high impact fragments"].Values.Add(highImpactFragments.Count);
    532         FragmentStatistics.Rows["Fragment lengths (good)"].Values.Add(goodFragments.Average(x => x.GetLength()));
    533         FragmentStatistics.Rows["Fragment lengths (all)"].Values.Add(allFragments.Average(x => x.GetLength()));
    534         //        double avg = highImpactFragments.Count > 0 ? highImpactFragments.Average(x => x.GetLength()) : 0;
    535         //        FragmentStatistics.Rows["Fragment lengths (high impact)"].Values.Add(avg);
    536         FragmentStatistics.Rows["Parent lengths (good)"].Values.Add(goodParents.Average(x => x.Length));
    537         FragmentStatistics.Rows["Parent lengths (all)"].Values.Add(allParents.Average(x => x.Length));
    538         FragmentStatistics.Rows["Child lengths (good)"].Values.Add(goodChildren.Average(x => x.Length));
    539         FragmentStatistics.Rows["Child lengths (all)"].Values.Add(allChildren.Average(x => x.Length));
    540         //        FragmentStatistics.Rows["Avg visitation length (all children)"].Values.Add(allChildren.Average(x => VisitationLength(x)));
    541         //        FragmentStatistics.Rows["Avg visitation length (good children)"].Values.Add(goodChildren.Average(x => VisitationLength(x)));
    542         FragmentLengths.Rows["All fragments length distribution"].Values.Replace(allFragments.Select(x => (double)x.GetLength()));
    543         FragmentLengths.Rows["Useful fragments length distribution"].Values.Replace(goodFragments.Select(x => (double)x.GetLength()));
    544         FragmentLengths.Rows["All children length distribution"].Values.Replace(allChildren.Select(x => (double)x.Length));
    545         FragmentLengths.Rows["Useful children length distribution"].Values.Replace(goodChildren.Select(x => (double)x.Length));
    546         ParentsDiversity.Rows["Unique parents"].Values.Add(SecondaryCloneMap.Values.GroupBy(x => x).Count());
    547         #endregion
     142      string[] rowNames =
     143        {
     144          "Parent lengths (good)", "Parent lengths (all)",
     145          "Offspring lengths (good)", "Offspring lengths (all)",
     146          "Fragment lengths (good)", "Fragment lengths (all)"
     147        };
     148      foreach (var rowName in rowNames.Where(rowName => !FragmentLengths.Rows.ContainsKey(rowName))) {
     149        FragmentLengths.Rows.Add(new DataRow(rowName) { VisualProperties = { StartIndexZero = true } });
    548150      }
    549151    }
    550 
    551     private static int VisitationLength(ISymbolicExpressionTree tree) {
    552       return VisitationLength(tree.Root);
    553     }
    554 
    555     private static int VisitationLength(ISymbolicExpressionTreeNode node) {
    556       return node.IterateNodesBreadth().Sum(n => n.GetLength());
    557     }
    558 
    559     #region Similarity computations
    560     /// <summary>
    561     /// Provide a measure of how similar the fragments that get passed by crossover from one generation to the next are.
    562     /// </summary>
    563     /// <param name="fragments">The symbolic expression tree fragments</param>
    564     /// <param name="mode">The similarity mode (0 - exact, 1 - high, 2 - relaxed)</param>
    565     /// <returns>The average number of similar fragments</returns>
    566     private static double CalculateSimilarity(IList<ISymbolicExpressionTreeNode> fragments, SimilarityLevel similarity) {
    567       var visited = new bool[fragments.Count];
    568       int groups = 0;
    569       for (int i = 0; i != fragments.Count - 1; ++i) {
    570         if (visited[i]) continue;
    571         for (int j = i + 1; j != fragments.Count; ++j) {
    572           if (visited[j]) continue;
    573           if (fragments[i].IsSimilarTo(fragments[j], similarity))
    574             visited[j] = true;
    575         }
    576         ++groups;
    577       }
    578       return (double)fragments.Count / groups;
    579     }
    580     #endregion
    581   } //SymbolicExpressionTreeFragmentsAnalyzer
     152  } //SymbolicExpressionTreeFragmentLengthAnalyzer
    582153}
  • branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Analyzers/SymbolicExpressionTreeFragmentsAnalyzer.cs

    r8557 r9082  
    2222using System.Collections.Generic;
    2323using System.Linq;
    24 using HeuristicLab.Analysis;
    2524using HeuristicLab.Common;
    2625using HeuristicLab.Core;
     
    3635using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
    3736using CloneMapType = HeuristicLab.Core.ItemDictionary<HeuristicLab.Core.IItem, HeuristicLab.Core.IItem>;
    38 using SymbolicExpressionTreeNodeItem = HeuristicLab.EvolutionaryTracking.GenericWrapper<HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ISymbolicExpressionTreeNode>;
    3937using TraceMapType = HeuristicLab.Core.ItemDictionary<HeuristicLab.Core.IItem, HeuristicLab.Core.IItemList<HeuristicLab.Core.IItem>>;
    4038
     
    4644  [Item("SymbolicExpressionTreeFragmentsAnalyzer", "An operator that provides statistics about crossover fragments")]
    4745  [StorableClass]
    48   public sealed class SymbolicExpressionTreeFragmentsAnalyzer : SingleSuccessorOperator, IAnalyzer {
     46  public abstract class SymbolicExpressionTreeFragmentsAnalyzer : SingleSuccessorOperator, IAnalyzer {
    4947    #region Parameter names
    50     private const string UpdateIntervalParameterName = "UpdateInterval";
    51     private const string UpdateCounterParameterName = "UpdateCounter";
    52     private const string ResultsParameterName = "Results";
    53     private const string SecondaryTraceMapParameterName = "SecondaryTraceMap";
    54     private const string SecondaryFragmentMapParameterName = "SecondaryFragmentMap";
    55     private const string SecondaryCloneMapParameterName = "SecondaryCloneMap";
    56     private const string GlobalTraceMapParameterName = "GlobalTraceMap";
    57     private const string GlobalCloneMapParameterName = "GlobalCloneMap";
    58     private const string GlobalFragmentMapParameterName = "GlobalFragmentMap";
    59     private const string GenerationsParameterName = "Generations";
    60     private const string SymbolicExpressionInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
    61     private const string SymbolicRegressionProblemDataParameterName = "ProblemData";
    62     private const string FragmentStatisticsParameterName = "FragmentStatistics";
    63     private const string FragmentLengthsDistributionParametersName = "FragmentLengths";
    64     private const string FragmentFrequenciesParameterName = "FragmentFrequencies";
    65     private const string ParentsDiversityParameterName = "ParentsDiversity";
     48    protected const string UpdateIntervalParameterName = "UpdateInterval";
     49    protected const string UpdateCounterParameterName = "UpdateCounter";
     50    protected const string ResultsParameterName = "Results";
     51    protected const string SecondaryTraceMapParameterName = "SecondaryTraceMap";
     52    protected const string SecondaryFragmentMapParameterName = "SecondaryFragmentMap";
     53    protected const string SecondaryCloneMapParameterName = "SecondaryCloneMap";
     54    protected const string GlobalTraceMapParameterName = "GlobalTraceMap";
     55    protected const string GlobalTraceMapParameterDescription = "Global map of trees and their parents.";
     56    protected const string GlobalCloneMapParameterName = "GlobalCloneMap";
     57    protected const string GlobalCloneMapParameterDescription = "Global map of trees and their clones";
     58    protected const string GlobalFragmentMapParameterName = "GlobalFragmentMap";
     59    protected const string GlobalFragmentMapParameterDescription = "Global map of trees and their respective fragments.";
     60    protected const string GenerationsParameterName = "Generations";
     61    protected const string SymbolicExpressionInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
     62    protected const string SymbolicRegressionProblemDataParameterName = "ProblemData";
    6663    #endregion
    6764
    6865    // impact values calculator
    69     private readonly SymbolicRegressionSolutionValuesCalculator calculator = new SymbolicRegressionSolutionValuesCalculator();
     66    private readonly SymbolicRegressionSolutionImpactValuesCalculator calculator = new SymbolicRegressionSolutionImpactValuesCalculator();
    7067
    7168    #region Parameter properties
     
    9188    public LookupParameter<CloneMapType> GlobalFragmentMapParameter {
    9289      get { return (LookupParameter<CloneMapType>)Parameters[GlobalFragmentMapParameterName]; }
    93     }
    94     public LookupParameter<DataTable> FragmentFrequenciesParameter {
    95       get { return (LookupParameter<DataTable>)Parameters[FragmentFrequenciesParameterName]; }
    96     }
    97     public LookupParameter<DataTable> FragmentStatisticsParameter {
    98       get { return (LookupParameter<DataTable>)Parameters[FragmentStatisticsParameterName]; }
    99     }
    100     public LookupParameter<DataTable> FragmentLengthsParameter {
    101       get { return (LookupParameter<DataTable>)Parameters[FragmentLengthsDistributionParametersName]; }
    102     }
    103     public LookupParameter<DataTable> ParentsDiversityParameter {
    104       get { return (LookupParameter<DataTable>)Parameters[ParentsDiversityParameterName]; }
    10590    }
    10691    // auxiliary structures for tracking fragments and individuals from the previous generation
     
    162147      get { return GenerationsParameter.ActualValue; }
    163148    }
    164     public DataTable FragmentStatistics {
    165       get { return FragmentStatisticsParameter.ActualValue; }
    166     }
    167     public DataTable FragmentLengths {
    168       get { return FragmentLengthsParameter.ActualValue; }
    169     }
    170     public DataTable FragmentFrequencies {
    171       get { return FragmentFrequenciesParameter.ActualValue; }
    172     }
    173     public DataTable ParentsDiversity {
    174       get { return ParentsDiversityParameter.ActualValue; }
    175     }
    176149    public SymbolicDataAnalysisExpressionTreeInterpreter SymbolicExpressionInterpreter {
    177150      get { return SymbolicExpressionInterpreterParameter.ActualValue; }
     
    183156
    184157    [StorableConstructor]
    185     private SymbolicExpressionTreeFragmentsAnalyzer(bool deserializing) : base(deserializing) { }
    186     private SymbolicExpressionTreeFragmentsAnalyzer(SymbolicExpressionTreeFragmentsAnalyzer original, Cloner cloner) : base(original, cloner) { }
    187     public override IDeepCloneable Clone(Cloner cloner) {
    188       return new SymbolicExpressionTreeFragmentsAnalyzer(this, cloner);
    189     }
    190     public SymbolicExpressionTreeFragmentsAnalyzer()
     158    protected SymbolicExpressionTreeFragmentsAnalyzer(bool deserializing) : base(deserializing) { }
     159
     160    protected SymbolicExpressionTreeFragmentsAnalyzer(SymbolicExpressionTreeFragmentsAnalyzer original, Cloner cloner) : base(original, cloner) { }
     161
     162    protected SymbolicExpressionTreeFragmentsAnalyzer()
    191163      : base() {
    192164      #region Add parameters
     
    195167      Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
    196168      // tracking
    197       Parameters.Add(new LookupParameter<TraceMapType>(GlobalTraceMapParameterName, "A global cache containing the whole genealogy."));
    198       Parameters.Add(new LookupParameter<CloneMapType>(GlobalCloneMapParameterName, "A global map keeping track of trees and their clones (made during selection)."));
    199       Parameters.Add(new LookupParameter<CloneMapType>(GlobalFragmentMapParameterName, "A global map keeping track of tree fragments received via crossover."));
     169      Parameters.Add(new LookupParameter<TraceMapType>(GlobalTraceMapParameterName, GlobalTraceMapParameterDescription));
     170      Parameters.Add(new LookupParameter<CloneMapType>(GlobalCloneMapParameterName, GlobalCloneMapParameterDescription));
     171      Parameters.Add(new LookupParameter<CloneMapType>(GlobalFragmentMapParameterName, GlobalFragmentMapParameterDescription));
    200172      // secondary tracking
    201       Parameters.Add(new LookupParameter<TraceMapType>(SecondaryTraceMapParameterName, "Parameter to store the trace map from the previous generation"));
    202       Parameters.Add(new LookupParameter<CloneMapType>(SecondaryCloneMapParameterName, "Parameter to store the clone map from the previous generation"));
    203       Parameters.Add(new LookupParameter<CloneMapType>(SecondaryFragmentMapParameterName, "Parameter to store the fragment map from the previous generation"));
     173      Parameters.Add(new LookupParameter<TraceMapType>(SecondaryTraceMapParameterName, GlobalTraceMapParameterDescription));
     174      Parameters.Add(new LookupParameter<CloneMapType>(SecondaryCloneMapParameterName, GlobalCloneMapParameterDescription));
     175      Parameters.Add(new LookupParameter<CloneMapType>(SecondaryFragmentMapParameterName, GlobalFragmentMapParameterDescription));
     176      // impact calculation
     177      Parameters.Add(new LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicExpressionInterpreterParameterName, "Interpreter for symbolic expression trees"));
     178      Parameters.Add(new LookupParameter<RegressionProblemData>(SymbolicRegressionProblemDataParameterName, "The symbolic data analysis problem."));
    204179      // fragment statistics parameters
    205180      Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
    206181      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations so far."));
    207       Parameters.Add(new LookupParameter<DataTable>(FragmentStatisticsParameterName, "The data table to store the fragment statistics."));
    208       Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsDistributionParametersName, "The data table to store the distribution of fragment lengths."));
    209       Parameters.Add(new LookupParameter<DataTable>(FragmentFrequenciesParameterName, "A data structure for fragment frequencies"));
    210       Parameters.Add(new LookupParameter<DataTable>(ParentsDiversityParameterName, "A data structure to store the diversity of the parents per generation."));
    211       // impact calculation
    212       Parameters.Add(new LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicExpressionInterpreterParameterName, "Interpreter for symbolic expression trees"));
    213       Parameters.Add(new LookupParameter<RegressionProblemData>(SymbolicRegressionProblemDataParameterName, "The symbolic data analysis problem."));
    214182      #endregion
    215183
    216184      UpdateCounterParameter.Hidden = true;
    217185      UpdateIntervalParameter.Hidden = true;
     186
    218187    }
    219188    #region After deserialization code
     
    227196        Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
    228197        UpdateCounterParameter.Hidden = true;
    229       }
    230       if (!Parameters.ContainsKey(FragmentStatisticsParameterName)) {
    231         Parameters.Add(new LookupParameter<DataTable>(FragmentStatisticsParameterName, "The data table to store the fragment statistics."));
    232       }
    233       if (!Parameters.ContainsKey(FragmentLengthsDistributionParametersName)) {
    234         Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsDistributionParametersName, "The data table to store the distribution of fragment lengths."));
    235       }
    236       if (!Parameters.ContainsKey(FragmentFrequenciesParameterName)) {
    237         Parameters.Add(new LookupParameter<ItemList<ItemList<IItem>>>(FragmentFrequenciesParameterName, "A data structure for fragment frequencies"));
    238       }
    239       if (!Parameters.ContainsKey(ParentsDiversityParameterName)) {
    240         Parameters.Add(new LookupParameter<DataTable>(ParentsDiversityParameterName, "A data table to store the diversity of the parents per generation."));
    241198      }
    242199    }
     
    294251     *     (ie., the subtree received via crossover or created/altered by mutation)
    295252     * */
     253
    296254    public override IOperation Apply() {
    297       UpdateCounter.Value++;
    298       if (UpdateCounter.Value == UpdateInterval.Value) {
    299         UpdateCounter.Value = 0; // reset counter
    300         if (Generations.Value > 0) {
    301           InitializeParameters();
    302 
    303           CalculateFragmentStatistics();
    304 
    305           // save the current generation so it can be compared to the following one, next time the analyzer is applied
    306           SecondaryTraceMap = (TraceMapType)GlobalTraceMap.Clone();
    307           SecondaryTraceMap.Clear();
    308           foreach (var m in GlobalTraceMap)
    309             SecondaryTraceMap.Add(m.Key, m.Value);
    310 
    311           SecondaryCloneMap.Clear();
    312           foreach (var m in GlobalCloneMap)
    313             SecondaryCloneMap.Add(m.Key, m.Value);
    314 
    315           SecondaryFragmentMap.Clear();
    316           foreach (var m in GlobalFragmentMap)
    317             SecondaryFragmentMap.Add(m.Key, m.Value);
    318         }
    319       }
    320       if (Generations.Value > 0)
    321         if (this.Successor == null || !(this.Successor is SymbolicExpressionTreeGenealogyAnalyzer)) {
    322           // clear the global maps to save memory
    323           GlobalCloneMap.Clear();
    324           GlobalTraceMap.Clear();
    325           GlobalFragmentMap.Clear();
    326         }
     255      // save the current generation so it can be compared to the following one, next time the analyzer is applied
     256      SecondaryTraceMap = SecondaryTraceMap ?? new TraceMapType();
     257      SecondaryCloneMap = SecondaryCloneMap ?? new CloneMapType();
     258      SecondaryFragmentMap = SecondaryFragmentMap ?? new CloneMapType();
     259
     260      SecondaryTraceMap.Clear();
     261      if (GlobalTraceMap != null)
     262        foreach (var m in GlobalTraceMap)
     263          SecondaryTraceMap.Add(m.Key, m.Value);
     264
     265      SecondaryCloneMap.Clear();
     266      if (GlobalCloneMap != null)
     267        foreach (var m in GlobalCloneMap)
     268          SecondaryCloneMap.Add(m.Key, m.Value);
     269
     270      SecondaryFragmentMap.Clear();
     271      if (GlobalFragmentMap != null)
     272        foreach (var m in GlobalFragmentMap)
     273          SecondaryFragmentMap.Add(m.Key, m.Value);
    327274      return base.Apply();
    328275    }
    329276
    330     private void InitializeParameters() {
     277    protected virtual void InitializeParameters() {
    331278      #region Init parameters
    332       var results = ResultsParameter.ActualValue;
    333 
    334       if (FragmentStatistics == null) {
    335         FragmentStatisticsParameter.ActualValue = new DataTable("Fragment Statistics", "Statistical measurements of fragments aggregated over the whole population");
    336         FragmentStatistics.VisualProperties.YAxisTitle = "Fragment length/Similarities";
    337         results.Add(new Result("Fragment Statistics", FragmentStatistics));
    338       }
    339       if (FragmentLengths == null) {
    340         FragmentLengthsParameter.ActualValue = new DataTable("Fragment Lengths", "Histograms for the distribution of fragment and individual lengths");
    341         FragmentLengths.VisualProperties.YAxisTitle = "Frequency";
    342         results.Add(new Result("Fragment Lengths", FragmentLengths));
    343       }
    344       if (FragmentFrequencies == null) {
    345         FragmentFrequenciesParameter.ActualValue = new DataTable("Fragment Frequencies", "Frequencies of good and high impact fragments");
    346         FragmentFrequencies.VisualProperties.YAxisTitle = "Frequency";
    347         results.Add(new Result("Fragment Frequencies", FragmentFrequencies));
    348       }
    349       if (ParentsDiversity == null) {
    350         ParentsDiversityParameter.ActualValue = new DataTable("Parents Diversity", "Number of unique parents per generation");
    351         ParentsDiversity.VisualProperties.YAxisTitle = "Unique parents";
    352         results.Add(new Result("Parents Diversity", ParentsDiversity));
    353       }
    354       if (SecondaryTraceMap == null) {
    355         SecondaryTraceMap = new TraceMapType();
    356       }
    357       if (SecondaryCloneMap == null) {
    358         SecondaryCloneMap = new CloneMapType();
    359       }
    360       if (SecondaryFragmentMap == null) {
    361         SecondaryFragmentMap = new CloneMapType();
    362       }
     279
    363280      #endregion
    364     }
    365 
    366     private void InitializeRows() {
    367       if (!FragmentStatistics.Rows.ContainsKey("Parent lengths (good)")) {
    368         FragmentStatistics.Rows.Add(new DataRow("Parent lengths (good)") {
    369           VisualProperties = { StartIndexZero = true }
    370         });
    371       }
    372       if (!FragmentStatistics.Rows.ContainsKey("Parent lengths (all)")) {
    373         FragmentStatistics.Rows.Add(new DataRow("Parent lengths (all)") {
    374           VisualProperties = { StartIndexZero = true }
    375         });
    376       }
    377       if (!FragmentStatistics.Rows.ContainsKey("Child lengths (good)")) {
    378         FragmentStatistics.Rows.Add(new DataRow("Child lengths (good)") {
    379           VisualProperties = { StartIndexZero = true }
    380         });
    381       }
    382       if (!FragmentStatistics.Rows.ContainsKey("Child lengths (all)")) {
    383         FragmentStatistics.Rows.Add(new DataRow("Child lengths (all)") {
    384           VisualProperties = { StartIndexZero = true }
    385         });
    386       }
    387       if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (good)")) {
    388         FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (good)") {
    389           VisualProperties = { StartIndexZero = true }
    390         });
    391       }
    392       if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (all)")) {
    393         FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (all)") {
    394           VisualProperties = { StartIndexZero = true }
    395         });
    396       }
    397       if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (high impact)")) {
    398         FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (high impact)") {
    399           VisualProperties = { StartIndexZero = true }
    400         });
    401       }
    402       if (!FragmentStatistics.Rows.ContainsKey("Avg visitation length (all children)")) {
    403         FragmentStatistics.Rows.Add(new DataRow("Avg visitation length (all children)") {
    404           VisualProperties = { StartIndexZero = true }
    405         });
    406       }
    407       if (!FragmentStatistics.Rows.ContainsKey("Avg visitation length (good children)")) {
    408         FragmentStatistics.Rows.Add(new DataRow("Avg visitation length (good children)") {
    409           VisualProperties = { StartIndexZero = true }
    410         });
    411       }
    412       // exact similarity
    413       if (!FragmentStatistics.Rows.ContainsKey("Exact similarity (good fragments)")) {
    414         FragmentStatistics.Rows.Add(new DataRow("Exact similarity (good fragments)") {
    415           VisualProperties = { StartIndexZero = true }
    416         });
    417       }
    418       if (!FragmentStatistics.Rows.ContainsKey("Exact similarity (all fragments)")) {
    419         FragmentStatistics.Rows.Add(new DataRow("Exact similarity (all fragments)") {
    420           VisualProperties = { StartIndexZero = true }
    421         });
    422       }
    423       if (!FragmentFrequencies.Rows.ContainsKey("Frequency of useful fragments")) {
    424         FragmentFrequencies.Rows.Add(new DataRow("Frequency of useful fragments") {
    425           VisualProperties = { StartIndexZero = true }
    426         });
    427       }
    428       if (!FragmentFrequencies.Rows.ContainsKey("Frequency of high impact fragments")) {
    429         FragmentFrequencies.Rows.Add(new DataRow("Frequency of high impact fragments") {
    430           VisualProperties = { StartIndexZero = true }
    431         });
    432       }
    433       if (!ParentsDiversity.Rows.ContainsKey("Unique parents")) {
    434         ParentsDiversity.Rows.Add(new DataRow("Unique parents") {
    435           VisualProperties = { StartIndexZero = true }
    436         });
    437       }
    438       double scaleFactor = 1.0;
    439       if (!FragmentLengths.Rows.ContainsKey("All fragments length distribution"))
    440         FragmentLengths.Rows.Add(new DataRow("All fragments length distribution") {
    441           VisualProperties = {
    442             StartIndexZero = true,
    443             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    444             ScaleFactor = scaleFactor
    445           }
    446         });
    447       if (!FragmentLengths.Rows.ContainsKey("Useful fragments length distribution"))
    448         FragmentLengths.Rows.Add(new DataRow("Useful fragments length distribution") {
    449           VisualProperties = {
    450             StartIndexZero = true,
    451             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    452             ScaleFactor = scaleFactor
    453           }
    454         });
    455       if (!FragmentLengths.Rows.ContainsKey("All children length distribution"))
    456         FragmentLengths.Rows.Add(new DataRow("All children length distribution") {
    457           VisualProperties = {
    458             StartIndexZero = true,
    459             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    460             ScaleFactor = scaleFactor
    461           }
    462         });
    463       if (!FragmentLengths.Rows.ContainsKey("Useful children length distribution"))
    464         FragmentLengths.Rows.Add(new DataRow("Useful children length distribution") {
    465           VisualProperties = {
    466             StartIndexZero = true,
    467             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    468             ScaleFactor = scaleFactor
    469           }
    470         });
    471     }
    472 
    473     private void CalculateFragmentStatistics() {
    474       // for this kind of tracking/analysis, we need at least 2 successive generations of crossover
    475       if (Generations.Value > 1) {
    476         InitializeRows();
    477 
    478         #region Fragment Statistics
    479         // create a hashset for fast lookup of parents (to see which children from the previous generation become parents, and count their fragments)
    480         var parentsLookup = new HashSet<ISymbolicExpressionTree>(GlobalTraceMap.Values.SelectMany(x => x.Cast<ISymbolicExpressionTree>()));
    481         var allParents = SecondaryTraceMap.Values.SelectMany(x => x.Cast<ISymbolicExpressionTree>()).ToList();
    482         var allChildren = SecondaryFragmentMap.Keys.Cast<ISymbolicExpressionTree>().ToList(); /* SecondaryTraceMap.Keys should be the same with SecondaryFragmentMap.Keys */
    483         var allFragments = SecondaryFragmentMap.Values.Cast<SymbolicExpressionTreeNodeItem>().Select(x => x.Content).Where(x => x != null).ToList();
    484         // "good" fragments are fragments contained in a surviving offspring (that became a parent)
    485         // "good" parents are those that produced a surviving offspring
    486         // "good" children are the surviving offspring
    487         var goodFragments = new List<ISymbolicExpressionTreeNode>();
    488         var goodParents = new List<ISymbolicExpressionTree>();
    489         var goodChildren = new List<ISymbolicExpressionTree>();
    490 
    491         // take all individuals that have received a fragment in the previous generation
    492         foreach (var m in SecondaryFragmentMap) {
    493           var individual = m.Key as ISymbolicExpressionTree;
    494           // check if the individual became a parent in the current generation,
    495           // by checking if it appears among the parents from the current generation trace map
    496           if (parentsLookup.Contains(individual)) {
    497             var symbExprTreeNodeItem = m.Value as SymbolicExpressionTreeNodeItem;
    498             var fragment = symbExprTreeNodeItem.Content;
    499             if (fragment != null) {
    500               goodFragments.Add(fragment);
    501               goodParents.AddRange(SecondaryTraceMap[individual].Cast<ISymbolicExpressionTree>());
    502               goodChildren.Add(individual);
    503             }
    504           }
    505         }
    506 
    507         if (false) {
    508           var highImpactFragments = new List<ISymbolicExpressionTreeNode>();
    509           foreach (var child in goodChildren) {
    510             var impactValues = calculator.CalculateImpactValues(child, SymbolicExpressionInterpreter, SymbolicRegressionProblemData, 0, 0);
    511             var fragmentItem = SecondaryFragmentMap[child] as SymbolicExpressionTreeNodeItem;
    512             var fragment = fragmentItem.Content;
    513             if (fragment != null && impactValues[fragment] > 0.1) {
    514               if (highImpactFragments.Contains(fragment, new SymbolicExpressionTreeNodeSimilarityComparer(SimilarityLevel.Exact)))
    515                 continue;
    516               // prune fragment (remove irrelevant/low impact nodes)
    517               // this works because a child will never have an impact value greater than its parent
    518               foreach (var fnode in fragment.IterateNodesBreadth().Where(x => x.SubtreeCount > 0)) {
    519                 for (int i = 0; i < fnode.SubtreeCount; ++i) {
    520                   var subtree = fnode.GetSubtree(i);
    521                   if (impactValues[subtree] < 0.1)
    522                     fnode.RemoveSubtree(i);
    523                 }
    524               }
    525               highImpactFragments.Add(fragment);
    526             }
    527           }
    528         }
    529 
    530         FragmentFrequencies.Rows["Frequency of useful fragments"].Values.Add(goodFragments.Count);
    531         //        FragmentFrequencies.Rows["Frequency of high impact fragments"].Values.Add(highImpactFragments.Count);
    532         FragmentStatistics.Rows["Fragment lengths (good)"].Values.Add(goodFragments.Average(x => x.GetLength()));
    533         FragmentStatistics.Rows["Fragment lengths (all)"].Values.Add(allFragments.Average(x => x.GetLength()));
    534         //        double avg = highImpactFragments.Count > 0 ? highImpactFragments.Average(x => x.GetLength()) : 0;
    535         //        FragmentStatistics.Rows["Fragment lengths (high impact)"].Values.Add(avg);
    536         FragmentStatistics.Rows["Parent lengths (good)"].Values.Add(goodParents.Average(x => x.Length));
    537         FragmentStatistics.Rows["Parent lengths (all)"].Values.Add(allParents.Average(x => x.Length));
    538         FragmentStatistics.Rows["Child lengths (good)"].Values.Add(goodChildren.Average(x => x.Length));
    539         FragmentStatistics.Rows["Child lengths (all)"].Values.Add(allChildren.Average(x => x.Length));
    540         //        FragmentStatistics.Rows["Avg visitation length (all children)"].Values.Add(allChildren.Average(x => VisitationLength(x)));
    541         //        FragmentStatistics.Rows["Avg visitation length (good children)"].Values.Add(goodChildren.Average(x => VisitationLength(x)));
    542         FragmentLengths.Rows["All fragments length distribution"].Values.Replace(allFragments.Select(x => (double)x.GetLength()));
    543         FragmentLengths.Rows["Useful fragments length distribution"].Values.Replace(goodFragments.Select(x => (double)x.GetLength()));
    544         FragmentLengths.Rows["All children length distribution"].Values.Replace(allChildren.Select(x => (double)x.Length));
    545         FragmentLengths.Rows["Useful children length distribution"].Values.Replace(goodChildren.Select(x => (double)x.Length));
    546         ParentsDiversity.Rows["Unique parents"].Values.Add(SecondaryCloneMap.Values.GroupBy(x => x).Count());
    547         #endregion
    548       }
    549281    }
    550282
     
    564296    /// <param name="mode">The similarity mode (0 - exact, 1 - high, 2 - relaxed)</param>
    565297    /// <returns>The average number of similar fragments</returns>
    566     private static double CalculateSimilarity(IList<ISymbolicExpressionTreeNode> fragments, SimilarityLevel similarity) {
     298    private static double CalculateSimilarity(IList<IFragment> fragments, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
    567299      var visited = new bool[fragments.Count];
    568300      int groups = 0;
     
    571303        for (int j = i + 1; j != fragments.Count; ++j) {
    572304          if (visited[j]) continue;
    573           if (fragments[i].IsSimilarTo(fragments[j], similarity))
     305          if (fragments[i].Root.IsSimilarTo(fragments[j].Root, comparer))
    574306            visited[j] = true;
    575307        }
  • branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Analyzers/SymbolicExpressionTreeRelativeFragmentDepthAnalyzer.cs

    r8557 r9082  
    2525using HeuristicLab.Common;
    2626using HeuristicLab.Core;
    27 using HeuristicLab.Data;
    2827using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    29 using HeuristicLab.Operators;
    3028using HeuristicLab.Optimization;
    3129using HeuristicLab.Parameters;
    3230using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    33 using HeuristicLab.Problems.DataAnalysis;
    34 using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3531// type definitions for convenience
    36 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
    3732using CloneMapType = HeuristicLab.Core.ItemDictionary<HeuristicLab.Core.IItem, HeuristicLab.Core.IItem>;
    38 using SymbolicExpressionTreeNodeItem = HeuristicLab.EvolutionaryTracking.GenericWrapper<HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ISymbolicExpressionTreeNode>;
    3933using TraceMapType = HeuristicLab.Core.ItemDictionary<HeuristicLab.Core.IItem, HeuristicLab.Core.IItemList<HeuristicLab.Core.IItem>>;
    4034
     
    4438  /// (Tracking of genetic variance and heredity)
    4539  /// </summary>
    46   [Item("SymbolicExpressionTreeFragmentsAnalyzer", "An operator that provides statistics about crossover fragments")]
     40  [Item("SymbolicExpressionTreeRelativeFragmentDepthAnalyzer", "An operator that provides statistics about crossover fragments")]
    4741  [StorableClass]
    48   public sealed class SymbolicExpressionTreeFragmentsAnalyzer : SingleSuccessorOperator, IAnalyzer {
    49     #region Parameter names
    50     private const string UpdateIntervalParameterName = "UpdateInterval";
    51     private const string UpdateCounterParameterName = "UpdateCounter";
    52     private const string ResultsParameterName = "Results";
    53     private const string SecondaryTraceMapParameterName = "SecondaryTraceMap";
    54     private const string SecondaryFragmentMapParameterName = "SecondaryFragmentMap";
    55     private const string SecondaryCloneMapParameterName = "SecondaryCloneMap";
    56     private const string GlobalTraceMapParameterName = "GlobalTraceMap";
    57     private const string GlobalCloneMapParameterName = "GlobalCloneMap";
    58     private const string GlobalFragmentMapParameterName = "GlobalFragmentMap";
    59     private const string GenerationsParameterName = "Generations";
    60     private const string SymbolicExpressionInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
    61     private const string SymbolicRegressionProblemDataParameterName = "ProblemData";
    62     private const string FragmentStatisticsParameterName = "FragmentStatistics";
    63     private const string FragmentLengthsDistributionParametersName = "FragmentLengths";
    64     private const string FragmentFrequenciesParameterName = "FragmentFrequencies";
    65     private const string ParentsDiversityParameterName = "ParentsDiversity";
    66     #endregion
     42  public sealed class SymbolicExpressionTreeRelativeFragmentDepthAnalyzer : SymbolicExpressionTreeFragmentsAnalyzer {
     43    private const string RelativeFragmentDepthParameterName = "Relative fragment depth";
     44    // impact values calculator
     45    public ILookupParameter<DataTable> RelativeFragmentDepthParameter { get { return (ILookupParameter<DataTable>)Parameters[RelativeFragmentDepthParameterName]; } }
    6746
    68     // impact values calculator
    69     private readonly SymbolicRegressionSolutionValuesCalculator calculator = new SymbolicRegressionSolutionValuesCalculator();
    70 
    71     #region Parameter properties
    72     public ValueParameter<IntValue> UpdateIntervalParameter {
    73       get { return (ValueParameter<IntValue>)Parameters[UpdateIntervalParameterName]; }
     47    public DataTable RelativeFragmentDepths {
     48      get { return RelativeFragmentDepthParameter.ActualValue; }
     49      set { RelativeFragmentDepthParameter.ActualValue = value; }
    7450    }
    75     public ValueParameter<IntValue> UpdateCounterParameter {
    76       get { return (ValueParameter<IntValue>)Parameters[UpdateCounterParameterName]; }
    77     }
    78     public LookupParameter<ResultCollection> ResultsParameter {
    79       get { return (LookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
    80     }
    81     public LookupParameter<IntValue> GenerationsParameter {
    82       get { return (LookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
    83     }
    84     // tracking structures
    85     public LookupParameter<TraceMapType> GlobalTraceMapParameter {
    86       get { return (LookupParameter<TraceMapType>)Parameters[GlobalTraceMapParameterName]; }
    87     }
    88     public LookupParameter<CloneMapType> GlobalCloneMapParameter {
    89       get { return (LookupParameter<CloneMapType>)Parameters[GlobalCloneMapParameterName]; }
    90     }
    91     public LookupParameter<CloneMapType> GlobalFragmentMapParameter {
    92       get { return (LookupParameter<CloneMapType>)Parameters[GlobalFragmentMapParameterName]; }
    93     }
    94     public LookupParameter<DataTable> FragmentFrequenciesParameter {
    95       get { return (LookupParameter<DataTable>)Parameters[FragmentFrequenciesParameterName]; }
    96     }
    97     public LookupParameter<DataTable> FragmentStatisticsParameter {
    98       get { return (LookupParameter<DataTable>)Parameters[FragmentStatisticsParameterName]; }
    99     }
    100     public LookupParameter<DataTable> FragmentLengthsParameter {
    101       get { return (LookupParameter<DataTable>)Parameters[FragmentLengthsDistributionParametersName]; }
    102     }
    103     public LookupParameter<DataTable> ParentsDiversityParameter {
    104       get { return (LookupParameter<DataTable>)Parameters[ParentsDiversityParameterName]; }
    105     }
    106     // auxiliary structures for tracking fragments and individuals from the previous generation
    107     // (this is needed because tracking is done in connection to the current generation, i.e., for
    108     // counting "successful" offspring and fragments
    109     public LookupParameter<TraceMapType> SecondaryTraceMapParameter {
    110       get { return (LookupParameter<TraceMapType>)Parameters[SecondaryTraceMapParameterName]; }
    111     }
    112     public LookupParameter<CloneMapType> SecondaryFragmentMapParameter {
    113       get { return (LookupParameter<CloneMapType>)Parameters[SecondaryFragmentMapParameterName]; }
    114     }
    115     public LookupParameter<CloneMapType> SecondaryCloneMapParameter {
    116       get { return (LookupParameter<CloneMapType>)Parameters[SecondaryCloneMapParameterName]; }
    117     }
    118     // problem data, interpreter and evaluator
    119     public LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter> SymbolicExpressionInterpreterParameter {
    120       get { return (LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>)Parameters[SymbolicExpressionInterpreterParameterName]; }
    121     }
    122     public LookupParameter<RegressionProblemData> SymbolicRegressionProblemDataParameter {
    123       get { return (LookupParameter<RegressionProblemData>)Parameters[SymbolicRegressionProblemDataParameterName]; }
    124     }
    125     #endregion
    126 
    127     #region Parameters
    128     public bool EnabledByDefault {
    129       get { return true; }
    130     }
    131     public IntValue UpdateInterval {
    132       get { return UpdateIntervalParameter.Value; }
    133     }
    134     public IntValue UpdateCounter {
    135       get { return UpdateCounterParameter.Value; }
    136     }
    137     public ResultCollection Results {
    138       get { return ResultsParameter.ActualValue; }
    139     }
    140     public CloneMapType GlobalCloneMap {
    141       get { return GlobalCloneMapParameter.ActualValue; }
    142     }
    143     public TraceMapType GlobalTraceMap {
    144       get { return GlobalTraceMapParameter.ActualValue; }
    145     }
    146     public TraceMapType SecondaryTraceMap {
    147       get { return SecondaryTraceMapParameter.ActualValue; }
    148       set { SecondaryTraceMapParameter.ActualValue = value; }
    149     }
    150     public CloneMapType SecondaryFragmentMap {
    151       get { return SecondaryFragmentMapParameter.ActualValue; }
    152       set { SecondaryFragmentMapParameter.ActualValue = value; }
    153     }
    154     public CloneMapType SecondaryCloneMap {
    155       get { return SecondaryCloneMapParameter.ActualValue; }
    156       set { SecondaryCloneMapParameter.ActualValue = value; }
    157     }
    158     public CloneMapType GlobalFragmentMap {
    159       get { return GlobalFragmentMapParameter.ActualValue; }
    160     }
    161     public IntValue Generations {
    162       get { return GenerationsParameter.ActualValue; }
    163     }
    164     public DataTable FragmentStatistics {
    165       get { return FragmentStatisticsParameter.ActualValue; }
    166     }
    167     public DataTable FragmentLengths {
    168       get { return FragmentLengthsParameter.ActualValue; }
    169     }
    170     public DataTable FragmentFrequencies {
    171       get { return FragmentFrequenciesParameter.ActualValue; }
    172     }
    173     public DataTable ParentsDiversity {
    174       get { return ParentsDiversityParameter.ActualValue; }
    175     }
    176     public SymbolicDataAnalysisExpressionTreeInterpreter SymbolicExpressionInterpreter {
    177       get { return SymbolicExpressionInterpreterParameter.ActualValue; }
    178     }
    179     public RegressionProblemData SymbolicRegressionProblemData {
    180       get { return SymbolicRegressionProblemDataParameter.ActualValue; }
    181     }
    182     #endregion
    18351
    18452    [StorableConstructor]
    185     private SymbolicExpressionTreeFragmentsAnalyzer(bool deserializing) : base(deserializing) { }
    186     private SymbolicExpressionTreeFragmentsAnalyzer(SymbolicExpressionTreeFragmentsAnalyzer original, Cloner cloner) : base(original, cloner) { }
     53    private SymbolicExpressionTreeRelativeFragmentDepthAnalyzer(bool deserializing) : base(deserializing) { }
     54    private SymbolicExpressionTreeRelativeFragmentDepthAnalyzer(SymbolicExpressionTreeRelativeFragmentDepthAnalyzer original, Cloner cloner) : base(original, cloner) { }
    18755    public override IDeepCloneable Clone(Cloner cloner) {
    188       return new SymbolicExpressionTreeFragmentsAnalyzer(this, cloner);
     56      return new SymbolicExpressionTreeRelativeFragmentDepthAnalyzer(this, cloner);
    18957    }
    190     public SymbolicExpressionTreeFragmentsAnalyzer()
     58    public SymbolicExpressionTreeRelativeFragmentDepthAnalyzer()
    19159      : base() {
    19260      #region Add parameters
    193       // analyzer update counter and update interval
    194       Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
    195       Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
    196       // tracking
    197       Parameters.Add(new LookupParameter<TraceMapType>(GlobalTraceMapParameterName, "A global cache containing the whole genealogy."));
    198       Parameters.Add(new LookupParameter<CloneMapType>(GlobalCloneMapParameterName, "A global map keeping track of trees and their clones (made during selection)."));
    199       Parameters.Add(new LookupParameter<CloneMapType>(GlobalFragmentMapParameterName, "A global map keeping track of tree fragments received via crossover."));
    200       // secondary tracking
    201       Parameters.Add(new LookupParameter<TraceMapType>(SecondaryTraceMapParameterName, "Parameter to store the trace map from the previous generation"));
    202       Parameters.Add(new LookupParameter<CloneMapType>(SecondaryCloneMapParameterName, "Parameter to store the clone map from the previous generation"));
    203       Parameters.Add(new LookupParameter<CloneMapType>(SecondaryFragmentMapParameterName, "Parameter to store the fragment map from the previous generation"));
    204       // fragment statistics parameters
    205       Parameters.Add(new ValueLookupParameter<ResultCollection>(ResultsParameterName, "The results collection where the analysis values should be stored."));
    206       Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations so far."));
    207       Parameters.Add(new LookupParameter<DataTable>(FragmentStatisticsParameterName, "The data table to store the fragment statistics."));
    208       Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsDistributionParametersName, "The data table to store the distribution of fragment lengths."));
    209       Parameters.Add(new LookupParameter<DataTable>(FragmentFrequenciesParameterName, "A data structure for fragment frequencies"));
    210       Parameters.Add(new LookupParameter<DataTable>(ParentsDiversityParameterName, "A data structure to store the diversity of the parents per generation."));
    211       // impact calculation
    212       Parameters.Add(new LookupParameter<SymbolicDataAnalysisExpressionTreeInterpreter>(SymbolicExpressionInterpreterParameterName, "Interpreter for symbolic expression trees"));
    213       Parameters.Add(new LookupParameter<RegressionProblemData>(SymbolicRegressionProblemDataParameterName, "The symbolic data analysis problem."));
     61      Parameters.Add(new LookupParameter<DataTable>(RelativeFragmentDepthParameterName));
    21462      #endregion
    21563
    21664      UpdateCounterParameter.Hidden = true;
    21765      UpdateIntervalParameter.Hidden = true;
     66
    21867    }
    21968    #region After deserialization code
     69
    22070    [StorableHook(HookType.AfterDeserialization)]
    22171    private void AfterDeserialization() {
    222       // check if all the parameters are present and accounted for
    223       if (!Parameters.ContainsKey(UpdateIntervalParameterName)) {
    224         Parameters.Add(new ValueParameter<IntValue>(UpdateIntervalParameterName, "The interval in which the tree length analysis should be applied.", new IntValue(1)));
    225       }
    226       if (!Parameters.ContainsKey(UpdateCounterParameterName)) {
    227         Parameters.Add(new ValueParameter<IntValue>(UpdateCounterParameterName, "The value which counts how many times the operator was called since the last update", new IntValue(0)));
    228         UpdateCounterParameter.Hidden = true;
    229       }
    230       if (!Parameters.ContainsKey(FragmentStatisticsParameterName)) {
    231         Parameters.Add(new LookupParameter<DataTable>(FragmentStatisticsParameterName, "The data table to store the fragment statistics."));
    232       }
    233       if (!Parameters.ContainsKey(FragmentLengthsDistributionParametersName)) {
    234         Parameters.Add(new LookupParameter<DataTable>(FragmentLengthsDistributionParametersName, "The data table to store the distribution of fragment lengths."));
    235       }
    236       if (!Parameters.ContainsKey(FragmentFrequenciesParameterName)) {
    237         Parameters.Add(new LookupParameter<ItemList<ItemList<IItem>>>(FragmentFrequenciesParameterName, "A data structure for fragment frequencies"));
    238       }
    239       if (!Parameters.ContainsKey(ParentsDiversityParameterName)) {
    240         Parameters.Add(new LookupParameter<DataTable>(ParentsDiversityParameterName, "A data table to store the diversity of the parents per generation."));
    241       }
    24272    }
     73
    24374    #endregion
    24475    #region IStatefulItem members
     
    25485    #endregion
    25586
    256     /* A small description of the way this analyzer works
    257      *
    258      * We keep 3 maps in the global scope: the GlobalTraceMap, the GlobalFragmentMap and the GlobalCloneMap that are updated on each step: selection, crossover, mutation
    259      * These maps provide information about the children, parents and transferred fragments as follows:
    260      *
    261      * 1) GlobalCloneMap
    262      *    This data structure provides the basis for tracking. In the selection step, every selected individual gets cloned and transferred into the recombination pool.
    263      *    The GlobalCloneMap keeps track of clones by mapping them to the original individuals. This is an essential step for building the genealogy graph. Using this
    264      *    structure we can find out how many times each individual was selected.
    265      *    The GlobalCloneMap consists of Key-Value pairs Clone->Original
    266      *
    267      * 2) GlobalTraceMap
    268      *    Keeps information in the form of key-value pairs Child->{Parents}, where {Parents} is a set consisting of
    269      *      - one parent individual (in the case of mutation), by language abuse we say that the individual that mutation acted on is the parent,
    270      *        and the resulting individual is the child
    271      *      - two parent individuals (in the case of crossover), named Parent0 and Parent1. Parent0 is the individual that crossover acts on by replacing
    272      *        one of its subtrees with a compatible subtree, randomly selected from Parent1
    273      *
    274      *    CROSSOVER
    275      *        Parent0     Parent1                                                                    {Parents}
    276      *           \           /                                                                           ^
    277      *            \       fragment (inserted from Parent1 into Parent0 to create the Child)              ^    [Mapping provided by the GlobalTraceMap]
    278      *             \       /                                                                             ^
    279      *              \     /                                                                              ^
    280      *               Child                                                                             Child
    281      *               
    282      *    MUTATION
    283      *        Parent0
    284      *           |
    285      *           |    [mutation acts on a random node/subtree in the 'parent' individual
    286      *           |
    287      *         Child
    288      *         
    289      *    Since crossover and mutation act on individuals in the recombination pool (which in fact are clones of individuals from the previous generation), the GlobalTraceMap
    290      *    is populated with values given by the mappings in the GlobalCloneMap, so that the original parent for each child is known.
    291      *   
    292      *  3) GlobalFragmentMap
    293      *     Keeps information in the form of Key-Value pairs about an individual and its respective fragment
    294      *     (ie., the subtree received via crossover or created/altered by mutation)
    295      * */
    29687    public override IOperation Apply() {
    29788      UpdateCounter.Value++;
    29889      if (UpdateCounter.Value == UpdateInterval.Value) {
    29990        UpdateCounter.Value = 0; // reset counter
    300         if (Generations.Value > 0) {
     91        if (Generations.Value > 1) {
     92          SecondaryTraceMap = SecondaryTraceMap ?? new TraceMapType();
     93          SecondaryFragmentMap = SecondaryFragmentMap ?? new CloneMapType();
     94          SecondaryCloneMap = SecondaryCloneMap ?? new CloneMapType();
     95
    30196          InitializeParameters();
     97          InitializeRows();
    30298
    303           CalculateFragmentStatistics();
     99          var fragmentDepths = (from m in SecondaryFragmentMap
     100                                let tree = (ISymbolicExpressionTree)m.Key
     101                                let fragment = (IFragment)m.Value
     102                                where fragment.Root != null
     103                                select tree.Root.GetBranchLevel(fragment.Root)
     104                               ).ToList();
    304105
    305           // save the current generation so it can be compared to the following one, next time the analyzer is applied
    306           SecondaryTraceMap = (TraceMapType)GlobalTraceMap.Clone();
    307           SecondaryTraceMap.Clear();
    308           foreach (var m in GlobalTraceMap)
    309             SecondaryTraceMap.Add(m.Key, m.Value);
     106          // fragments of selected offspring
     107          var selected = new HashSet<ISymbolicExpressionTree>(GlobalTraceMap.Values.SelectMany(list => list.Cast<ISymbolicExpressionTree>()));
     108          var goodFragmentDepths = (from m in SecondaryFragmentMap
     109                                    let tree = (ISymbolicExpressionTree)m.Key
     110                                    let fragment = (IFragment)m.Value
     111                                    where fragment.Root != null
     112                                    where selected.Contains(tree)
     113                                    select tree.Root.GetBranchLevel(fragment.Root)
     114                                   ).ToList();
    310115
    311           SecondaryCloneMap.Clear();
    312           foreach (var m in GlobalCloneMap)
    313             SecondaryCloneMap.Add(m.Key, m.Value);
    314 
    315           SecondaryFragmentMap.Clear();
    316           foreach (var m in GlobalFragmentMap)
    317             SecondaryFragmentMap.Add(m.Key, m.Value);
     116          RelativeFragmentDepths.Rows["Average relative fragment depth (all)"].Values.Add(fragmentDepths.Count > 0 ? fragmentDepths.Average() : 0.0);
     117          RelativeFragmentDepths.Rows["Average relative fragment depth (good)"].Values.Add(goodFragmentDepths.Count > 0 ? goodFragmentDepths.Average() : 0.0);
    318118        }
    319119      }
    320       if (Generations.Value > 0)
    321         if (this.Successor == null || !(this.Successor is SymbolicExpressionTreeGenealogyAnalyzer)) {
    322           // clear the global maps to save memory
    323           GlobalCloneMap.Clear();
    324           GlobalTraceMap.Clear();
    325           GlobalFragmentMap.Clear();
    326         }
    327120      return base.Apply();
    328121    }
    329122
    330     private void InitializeParameters() {
    331       #region Init parameters
     123    protected override void InitializeParameters() {
    332124      var results = ResultsParameter.ActualValue;
    333 
    334       if (FragmentStatistics == null) {
    335         FragmentStatisticsParameter.ActualValue = new DataTable("Fragment Statistics", "Statistical measurements of fragments aggregated over the whole population");
    336         FragmentStatistics.VisualProperties.YAxisTitle = "Fragment length/Similarities";
    337         results.Add(new Result("Fragment Statistics", FragmentStatistics));
     125      if (!results.ContainsKey("Relative fragment depths")) {
     126        RelativeFragmentDepths = RelativeFragmentDepths ?? new DataTable("Relative fragment depths") { VisualProperties = { YAxisTitle = "Relative depth" } };
     127        results.Add(new Result("Relative fragment depths", RelativeFragmentDepths));
    338128      }
    339       if (FragmentLengths == null) {
    340         FragmentLengthsParameter.ActualValue = new DataTable("Fragment Lengths", "Histograms for the distribution of fragment and individual lengths");
    341         FragmentLengths.VisualProperties.YAxisTitle = "Frequency";
    342         results.Add(new Result("Fragment Lengths", FragmentLengths));
    343       }
    344       if (FragmentFrequencies == null) {
    345         FragmentFrequenciesParameter.ActualValue = new DataTable("Fragment Frequencies", "Frequencies of good and high impact fragments");
    346         FragmentFrequencies.VisualProperties.YAxisTitle = "Frequency";
    347         results.Add(new Result("Fragment Frequencies", FragmentFrequencies));
    348       }
    349       if (ParentsDiversity == null) {
    350         ParentsDiversityParameter.ActualValue = new DataTable("Parents Diversity", "Number of unique parents per generation");
    351         ParentsDiversity.VisualProperties.YAxisTitle = "Unique parents";
    352         results.Add(new Result("Parents Diversity", ParentsDiversity));
    353       }
    354       if (SecondaryTraceMap == null) {
    355         SecondaryTraceMap = new TraceMapType();
    356       }
    357       if (SecondaryCloneMap == null) {
    358         SecondaryCloneMap = new CloneMapType();
    359       }
    360       if (SecondaryFragmentMap == null) {
    361         SecondaryFragmentMap = new CloneMapType();
    362       }
    363       #endregion
     129      base.InitializeParameters();
    364130    }
    365131
    366132    private void InitializeRows() {
    367       if (!FragmentStatistics.Rows.ContainsKey("Parent lengths (good)")) {
    368         FragmentStatistics.Rows.Add(new DataRow("Parent lengths (good)") {
    369           VisualProperties = { StartIndexZero = true }
    370         });
    371       }
    372       if (!FragmentStatistics.Rows.ContainsKey("Parent lengths (all)")) {
    373         FragmentStatistics.Rows.Add(new DataRow("Parent lengths (all)") {
    374           VisualProperties = { StartIndexZero = true }
    375         });
    376       }
    377       if (!FragmentStatistics.Rows.ContainsKey("Child lengths (good)")) {
    378         FragmentStatistics.Rows.Add(new DataRow("Child lengths (good)") {
    379           VisualProperties = { StartIndexZero = true }
    380         });
    381       }
    382       if (!FragmentStatistics.Rows.ContainsKey("Child lengths (all)")) {
    383         FragmentStatistics.Rows.Add(new DataRow("Child lengths (all)") {
    384           VisualProperties = { StartIndexZero = true }
    385         });
    386       }
    387       if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (good)")) {
    388         FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (good)") {
    389           VisualProperties = { StartIndexZero = true }
    390         });
    391       }
    392       if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (all)")) {
    393         FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (all)") {
    394           VisualProperties = { StartIndexZero = true }
    395         });
    396       }
    397       if (!FragmentStatistics.Rows.ContainsKey("Fragment lengths (high impact)")) {
    398         FragmentStatistics.Rows.Add(new DataRow("Fragment lengths (high impact)") {
    399           VisualProperties = { StartIndexZero = true }
    400         });
    401       }
    402       if (!FragmentStatistics.Rows.ContainsKey("Avg visitation length (all children)")) {
    403         FragmentStatistics.Rows.Add(new DataRow("Avg visitation length (all children)") {
    404           VisualProperties = { StartIndexZero = true }
    405         });
    406       }
    407       if (!FragmentStatistics.Rows.ContainsKey("Avg visitation length (good children)")) {
    408         FragmentStatistics.Rows.Add(new DataRow("Avg visitation length (good children)") {
    409           VisualProperties = { StartIndexZero = true }
    410         });
    411       }
    412       // exact similarity
    413       if (!FragmentStatistics.Rows.ContainsKey("Exact similarity (good fragments)")) {
    414         FragmentStatistics.Rows.Add(new DataRow("Exact similarity (good fragments)") {
    415           VisualProperties = { StartIndexZero = true }
    416         });
    417       }
    418       if (!FragmentStatistics.Rows.ContainsKey("Exact similarity (all fragments)")) {
    419         FragmentStatistics.Rows.Add(new DataRow("Exact similarity (all fragments)") {
    420           VisualProperties = { StartIndexZero = true }
    421         });
    422       }
    423       if (!FragmentFrequencies.Rows.ContainsKey("Frequency of useful fragments")) {
    424         FragmentFrequencies.Rows.Add(new DataRow("Frequency of useful fragments") {
    425           VisualProperties = { StartIndexZero = true }
    426         });
    427       }
    428       if (!FragmentFrequencies.Rows.ContainsKey("Frequency of high impact fragments")) {
    429         FragmentFrequencies.Rows.Add(new DataRow("Frequency of high impact fragments") {
    430           VisualProperties = { StartIndexZero = true }
    431         });
    432       }
    433       if (!ParentsDiversity.Rows.ContainsKey("Unique parents")) {
    434         ParentsDiversity.Rows.Add(new DataRow("Unique parents") {
    435           VisualProperties = { StartIndexZero = true }
    436         });
    437       }
    438       double scaleFactor = 1.0;
    439       if (!FragmentLengths.Rows.ContainsKey("All fragments length distribution"))
    440         FragmentLengths.Rows.Add(new DataRow("All fragments length distribution") {
    441           VisualProperties = {
    442             StartIndexZero = true,
    443             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    444             ScaleFactor = scaleFactor
    445           }
    446         });
    447       if (!FragmentLengths.Rows.ContainsKey("Useful fragments length distribution"))
    448         FragmentLengths.Rows.Add(new DataRow("Useful fragments length distribution") {
    449           VisualProperties = {
    450             StartIndexZero = true,
    451             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    452             ScaleFactor = scaleFactor
    453           }
    454         });
    455       if (!FragmentLengths.Rows.ContainsKey("All children length distribution"))
    456         FragmentLengths.Rows.Add(new DataRow("All children length distribution") {
    457           VisualProperties = {
    458             StartIndexZero = true,
    459             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    460             ScaleFactor = scaleFactor
    461           }
    462         });
    463       if (!FragmentLengths.Rows.ContainsKey("Useful children length distribution"))
    464         FragmentLengths.Rows.Add(new DataRow("Useful children length distribution") {
    465           VisualProperties = {
    466             StartIndexZero = true,
    467             ChartType = DataRowVisualProperties.DataRowChartType.Histogram,
    468             ScaleFactor = scaleFactor
    469           }
    470         });
    471     }
    472 
    473     private void CalculateFragmentStatistics() {
    474       // for this kind of tracking/analysis, we need at least 2 successive generations of crossover
    475       if (Generations.Value > 1) {
    476         InitializeRows();
    477 
    478         #region Fragment Statistics
    479         // create a hashset for fast lookup of parents (to see which children from the previous generation become parents, and count their fragments)
    480         var parentsLookup = new HashSet<ISymbolicExpressionTree>(GlobalTraceMap.Values.SelectMany(x => x.Cast<ISymbolicExpressionTree>()));
    481         var allParents = SecondaryTraceMap.Values.SelectMany(x => x.Cast<ISymbolicExpressionTree>()).ToList();
    482         var allChildren = SecondaryFragmentMap.Keys.Cast<ISymbolicExpressionTree>().ToList(); /* SecondaryTraceMap.Keys should be the same with SecondaryFragmentMap.Keys */
    483         var allFragments = SecondaryFragmentMap.Values.Cast<SymbolicExpressionTreeNodeItem>().Select(x => x.Content).Where(x => x != null).ToList();
    484         // "good" fragments are fragments contained in a surviving offspring (that became a parent)
    485         // "good" parents are those that produced a surviving offspring
    486         // "good" children are the surviving offspring
    487         var goodFragments = new List<ISymbolicExpressionTreeNode>();
    488         var goodParents = new List<ISymbolicExpressionTree>();
    489         var goodChildren = new List<ISymbolicExpressionTree>();
    490 
    491         // take all individuals that have received a fragment in the previous generation
    492         foreach (var m in SecondaryFragmentMap) {
    493           var individual = m.Key as ISymbolicExpressionTree;
    494           // check if the individual became a parent in the current generation,
    495           // by checking if it appears among the parents from the current generation trace map
    496           if (parentsLookup.Contains(individual)) {
    497             var symbExprTreeNodeItem = m.Value as SymbolicExpressionTreeNodeItem;
    498             var fragment = symbExprTreeNodeItem.Content;
    499             if (fragment != null) {
    500               goodFragments.Add(fragment);
    501               goodParents.AddRange(SecondaryTraceMap[individual].Cast<ISymbolicExpressionTree>());
    502               goodChildren.Add(individual);
    503             }
    504           }
    505         }
    506 
    507         if (false) {
    508           var highImpactFragments = new List<ISymbolicExpressionTreeNode>();
    509           foreach (var child in goodChildren) {
    510             var impactValues = calculator.CalculateImpactValues(child, SymbolicExpressionInterpreter, SymbolicRegressionProblemData, 0, 0);
    511             var fragmentItem = SecondaryFragmentMap[child] as SymbolicExpressionTreeNodeItem;
    512             var fragment = fragmentItem.Content;
    513             if (fragment != null && impactValues[fragment] > 0.1) {
    514               if (highImpactFragments.Contains(fragment, new SymbolicExpressionTreeNodeSimilarityComparer(SimilarityLevel.Exact)))
    515                 continue;
    516               // prune fragment (remove irrelevant/low impact nodes)
    517               // this works because a child will never have an impact value greater than its parent
    518               foreach (var fnode in fragment.IterateNodesBreadth().Where(x => x.SubtreeCount > 0)) {
    519                 for (int i = 0; i < fnode.SubtreeCount; ++i) {
    520                   var subtree = fnode.GetSubtree(i);
    521                   if (impactValues[subtree] < 0.1)
    522                     fnode.RemoveSubtree(i);
    523                 }
    524               }
    525               highImpactFragments.Add(fragment);
    526             }
    527           }
    528         }
    529 
    530         FragmentFrequencies.Rows["Frequency of useful fragments"].Values.Add(goodFragments.Count);
    531         //        FragmentFrequencies.Rows["Frequency of high impact fragments"].Values.Add(highImpactFragments.Count);
    532         FragmentStatistics.Rows["Fragment lengths (good)"].Values.Add(goodFragments.Average(x => x.GetLength()));
    533         FragmentStatistics.Rows["Fragment lengths (all)"].Values.Add(allFragments.Average(x => x.GetLength()));
    534         //        double avg = highImpactFragments.Count > 0 ? highImpactFragments.Average(x => x.GetLength()) : 0;
    535         //        FragmentStatistics.Rows["Fragment lengths (high impact)"].Values.Add(avg);
    536         FragmentStatistics.Rows["Parent lengths (good)"].Values.Add(goodParents.Average(x => x.Length));
    537         FragmentStatistics.Rows["Parent lengths (all)"].Values.Add(allParents.Average(x => x.Length));
    538         FragmentStatistics.Rows["Child lengths (good)"].Values.Add(goodChildren.Average(x => x.Length));
    539         FragmentStatistics.Rows["Child lengths (all)"].Values.Add(allChildren.Average(x => x.Length));
    540         //        FragmentStatistics.Rows["Avg visitation length (all children)"].Values.Add(allChildren.Average(x => VisitationLength(x)));
    541         //        FragmentStatistics.Rows["Avg visitation length (good children)"].Values.Add(goodChildren.Average(x => VisitationLength(x)));
    542         FragmentLengths.Rows["All fragments length distribution"].Values.Replace(allFragments.Select(x => (double)x.GetLength()));
    543         FragmentLengths.Rows["Useful fragments length distribution"].Values.Replace(goodFragments.Select(x => (double)x.GetLength()));
    544         FragmentLengths.Rows["All children length distribution"].Values.Replace(allChildren.Select(x => (double)x.Length));
    545         FragmentLengths.Rows["Useful children length distribution"].Values.Replace(goodChildren.Select(x => (double)x.Length));
    546         ParentsDiversity.Rows["Unique parents"].Values.Add(SecondaryCloneMap.Values.GroupBy(x => x).Count());
    547         #endregion
     133      string[] rowNames =
     134        {
     135          "Average relative fragment depth (good)", "Average relative fragment depth (all)"
     136        };
     137      foreach (var rowName in rowNames.Where(rowName => !RelativeFragmentDepths.Rows.ContainsKey(rowName))) {
     138        RelativeFragmentDepths.Rows.Add(new DataRow(rowName) { VisualProperties = { StartIndexZero = true } });
    548139      }
    549140    }
    550 
    551     private static int VisitationLength(ISymbolicExpressionTree tree) {
    552       return VisitationLength(tree.Root);
    553     }
    554 
    555     private static int VisitationLength(ISymbolicExpressionTreeNode node) {
    556       return node.IterateNodesBreadth().Sum(n => n.GetLength());
    557     }
    558 
    559     #region Similarity computations
    560     /// <summary>
    561     /// Provide a measure of how similar the fragments that get passed by crossover from one generation to the next are.
    562     /// </summary>
    563     /// <param name="fragments">The symbolic expression tree fragments</param>
    564     /// <param name="mode">The similarity mode (0 - exact, 1 - high, 2 - relaxed)</param>
    565     /// <returns>The average number of similar fragments</returns>
    566     private static double CalculateSimilarity(IList<ISymbolicExpressionTreeNode> fragments, SimilarityLevel similarity) {
    567       var visited = new bool[fragments.Count];
    568       int groups = 0;
    569       for (int i = 0; i != fragments.Count - 1; ++i) {
    570         if (visited[i]) continue;
    571         for (int j = i + 1; j != fragments.Count; ++j) {
    572           if (visited[j]) continue;
    573           if (fragments[i].IsSimilarTo(fragments[j], similarity))
    574             visited[j] = true;
    575         }
    576         ++groups;
    577       }
    578       return (double)fragments.Count / groups;
    579     }
    580     #endregion
    581   } //SymbolicExpressionTreeFragmentsAnalyzer
     141  } //SymbolicExpressionTreeFragmentLengthAnalyzer
    582142}
  • branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Analyzers/SymbolicExpressionTreeRelativeLengthAnalyzer.cs

    r8213 r9082  
    5252    private const string ActualTournamentGroupSizeParameterName = "TournamentGroupSize";
    5353
    54     readonly MersenneTwister _random = new MersenneTwister(12345);
     54    readonly MersenneTwister random = new MersenneTwister(12345);
    5555
    5656    #region Parameter properties
     
    204204          const uint size = 1000;
    205205          for (int i = 0; i != size; ++i) {
    206             randomTrees.Add(GrowTreeCreator.Create(_random, grammar, 0, maxDepth.Value));
     206            randomTrees.Add(GrowTreeCreator.Create(random, grammar, 0, maxDepth.Value));
    207207            length += randomTrees.Last().Length;
    208208          }
  • branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/HeuristicLab.EvolutionaryTracking-3.4.csproj

    r8557 r9082  
    8787  </ItemGroup>
    8888  <ItemGroup>
     89    <Compile Include="Analyzers\SymbolicExpressionTreeRelativeFragmentDepthAnalyzer.cs" />
     90    <Compile Include="Analyzers\SymbolicExpressionTreeFragmentLengthsAnalyzer.cs" />
     91    <Compile Include="Analyzers\GeneticOperatorsAverageImprovementAnalyzer.cs" />
     92    <Compile Include="Analyzers\SymbolicExpressionTreeEvolvabilityAnalyzer.cs" />
     93    <Compile Include="Analyzers\SymbolicExpressionTreeFrequentPatternsAnalyzer.cs" />
    8994    <Compile Include="Analyzers\SymbolicExpressionTreeFragmentsAnalyzer.cs" />
    90     <Compile Include="Analyzers\SymbolicExpressionTreeGenealogyAnalyzer.cs" />
    91     <Compile Include="Analyzers\SymbolicExpressionTreeLineagesAnalyzer.cs" />
     95    <Compile Include="FPGraph.cs" />
     96    <Compile Include="Operators\SymbolicExpressionTreeFPBuilder.cs" />
     97    <Compile Include="Operators\SymbolicExpressionTreeGenealogyGraphBuilder.cs" />
     98    <Compile Include="Analyzers\SymbolicExpressionTreePopulationDiversityAnalyzer.cs" />
    9299    <Compile Include="Analyzers\SymbolicExpressionTreeRelativeLengthAnalyzer.cs" />
    93     <Compile Include="GenealogyGraph.cs" />
    94     <Compile Include="GenealogyGraphArc.cs" />
    95     <Compile Include="GenealogyGraphNode.cs" />
    96     <Compile Include="Interfaces\IGenealogyGraph.cs" />
    97     <Compile Include="Interfaces\IGenealogyGraphNode.cs" />
     100    <Compile Include="DirectedGraph\DirectedGraph.cs" />
     101    <Compile Include="DirectedGraph\Arc.cs" />
     102    <Compile Include="DirectedGraph\Vertex.cs" />
     103    <Compile Include="Interfaces\IDirectedGraph.cs" />
     104    <Compile Include="Interfaces\IVertex.cs" />
    98105    <Compile Include="Interfaces\ISymbolicExpressionTreeGenealogyGraph.cs" />
     106    <Compile Include="Operators\CleanupOperator.cs" />
     107    <Compile Include="Operators\SymbolicExpressionTreeSymbolGraphBuilder.cs" />
     108    <Compile Include="Operators\Util.cs" />
    99109    <Compile Include="Plugin.cs" />
    100110    <Compile Include="Properties\AssemblyInfo.cs" />
     111    <Compile Include="SymbolGraph.cs" />
    101112    <Compile Include="SymbolicExpressionTreeGenealogyGraph.cs" />
    102113  </ItemGroup>
     
    108119    </ProjectReference>
    109120    <ProjectReference Include="..\..\HeuristicLab.Problems.DataAnalysis.Symbolic\3.4\HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj">
    110       <Project>{3D28463F-EC96-4D82-AFEE-38BE91A0CA00}</Project>
     121      <Project>{3d28463f-ec96-4d82-afee-38be91a0ca00}</Project>
    111122      <Name>HeuristicLab.Problems.DataAnalysis.Symbolic-3.4</Name>
    112123    </ProjectReference>
  • branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/Interfaces/ISymbolicExpressionTreeGenealogyGraph.cs

    r8555 r9082  
    2020#endregion
    2121
     22using System.Collections.Generic;
     23using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     24
    2225namespace HeuristicLab.EvolutionaryTracking {
    23   public interface ISymbolicExpressionTreeGenealogyGraph {
     26  public interface ISymbolicExpressionTreeGenealogyGraph : IDirectedGraph<SymbolicExpressionGenealogyGraphNode> {
     27    List<SymbolicExpressionGenealogyGraphNode> GetGraphNodes(ISymbolicExpressionTree tree);
    2428  }
    2529}
  • branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.EvolutionaryTracking/3.4/SymbolicExpressionTreeGenealogyGraph.cs

    r8555 r9082  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     
    2627using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2728using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    28 using HeuristicLab.Problems.DataAnalysis.Symbolic;
    2929
    3030namespace HeuristicLab.EvolutionaryTracking {
    31   [Item("SymbolicExpressionTreeGenealogyGraph", "A genealogy graph for symbolic expression trees")]
     31  [Item("SymbolicExpressionTreeGenealogyGraph", "A genealogy graph for symbolic expression tree individuals")]
    3232  [StorableClass]
    33   public class SymbolicExpressionTreeGenealogyGraph : GenealogyGraph<SymbolicExpressionGenealogyGraphNode>, ISymbolicExpressionTreeGenealogyGraph {
     33  public sealed class SymbolicExpressionTreeGenealogyGraph : DirectedGraph<SymbolicExpressionGenealogyGraphNode>, ISymbolicExpressionTreeGenealogyGraph, IDisposable {
    3434    [Storable]
    35     private readonly Dictionary<ISymbolicExpressionTree, List<SymbolicExpressionGenealogyGraphNode>> nodeMap =
    36       new Dictionary<ISymbolicExpressionTree, List<SymbolicExpressionGenealogyGraphNode>>();
     35    private readonly Dictionary<ISymbolicExpressionTree, List<SymbolicExpressionGenealogyGraphNode>> nodeMap = new Dictionary<ISymbolicExpressionTree, List<SymbolicExpressionGenealogyGraphNode>>();
    3736
    3837    public SymbolicExpressionTreeGenealogyGraph() { }
     
    4342    public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicExpressionTreeGenealogyGraph(this, cloner); }
    4443
    45     protected SymbolicExpressionTreeGenealogyGraph(SymbolicExpressionTreeGenealogyGraph original, Cloner cloner) : base(original, cloner) { }
     44    private SymbolicExpressionTreeGenealogyGraph(SymbolicExpressionTreeGenealogyGraph original, Cloner cloner) : base(original, cloner) { }
    4645
    4746    public override void AddNode(SymbolicExpressionGenealogyGraphNode node) {
    48       if (!nodeMap.ContainsKey(node.SymbolicExpressionTree))
    49         nodeMap[node.SymbolicExpressionTree] = new List<SymbolicExpressionGenealogyGraphNode> { node };
     47      var tree = node.SymbolicExpressionTree;
     48      if (nodeMap.ContainsKey(tree))
     49        nodeMap[tree].Add(node);
    5050      else
    51         nodeMap[node.SymbolicExpressionTree].Add(node);
    52       base.AddNode(node);
     51        nodeMap[tree] = new List<SymbolicExpressionGenealogyGraphNode> { node };
     52      base.AddNode(node); // the genealogy graph should have as many nodes as individuals in the population multiplied by the number of generations
     53    }
     54
     55    public bool ContainsIndividual(ISymbolicExpressionTree tree) {
     56      return nodeMap.ContainsKey(tree);
    5357    }
    5458
    5559    public List<SymbolicExpressionGenealogyGraphNode> GetGraphNodes(ISymbolicExpressionTree tree) {
    5660      return nodeMap.ContainsKey(tree) ? nodeMap[tree] : null;
    57     }
    58 
    59     public bool HasNode(SymbolicExpressionTree tree) {
    60       return Nodes.Any(x => x.SymbolicExpressionTree == tree);
    61     }
    62 
    63     public int Compare(GenealogyGraphNode a, GenealogyGraphNode b) {
    64       return Compare(a as SymbolicExpressionGenealogyGraphNode, b as SymbolicExpressionGenealogyGraphNode);
    6561    }
    6662
     
    7470    }
    7571
    76     #region Fragment tracing
    77     public IEnumerable<ISymbolicExpressionTree> TraceFragment(ISymbolicExpressionTreeNode fragment, SimilarityLevel similarity = SimilarityLevel.Exact) {
    78       return Nodes.Where(x => x.SymbolicExpressionTree.ContainsFragment(fragment, similarity)).Select(x => x.SymbolicExpressionTree).ToList();
     72    //    #region Fragment tracing
     73    //    public IEnumerable<ISymbolicExpressionTree> TraceFragment(ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
     74    //      return Nodes.Select(x => x.SymbolicExpressionTree).Where(tree => tree.ContainsFragment(fragment, comparer));
     75    //    }
     76    //    #endregion
     77
     78    public void Dispose() {
     79      Nodes.Clear();
     80      nodeMap.Clear();
     81      // call GC.SupressFinalize?
    7982    }
    80     #endregion
    8183  }
    8284
    83   public class SymbolicExpressionGenealogyGraphNode : GenealogyGraphNode {
    84     public ISymbolicExpressionTree SymbolicExpressionTree { get; set; }
     85  public class SymbolicExpressionGenealogyGraphNode : Vertex {
     86    public ISymbolicExpressionTree SymbolicExpressionTree {
     87      get { return (ISymbolicExpressionTree)Content; }
     88      set { Content = value; }
     89    }
    8590    public double Quality { get; set; }
    8691    public bool IsElite { get; set; }
    87     public List<double> Ranks { get; set; }
     92    public double Rank { get; set; }
    8893
    89     public SymbolicExpressionGenealogyGraphNode() {
    90       Ranks = new List<double>();
    91     }
     94    public SymbolicExpressionGenealogyGraphNode() { }
    9295
    9396    public SymbolicExpressionGenealogyGraphNode(object content) {
    9497      Content = content;
    95       Ranks = new List<double>();
     98    }
     99
     100    public new IEnumerable<SymbolicExpressionGenealogyGraphNode> Ancestors() {
     101      return base.Ancestors().Cast<SymbolicExpressionGenealogyGraphNode>();
     102    }
     103
     104    public new IEnumerable<SymbolicExpressionGenealogyGraphNode> Descendants() {
     105      return base.Descendants().Cast<SymbolicExpressionGenealogyGraphNode>();
    96106    }
    97107  }
Note: See TracChangeset for help on using the changeset viewer.