source: branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FineGrainedStructuralPopulationDiversityAnalyzer.cs @ 4949

Last change on this file since 4949 was 4949, checked in by swinkler, 12 years ago

Worked on structural population diversity analysis (#1278): Implemented management of genetic information items in dictionaries.

File size: 29.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using HeuristicLab.Analysis;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Optimization.Operators;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34using System.Collections.Generic;
35using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
36using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
37
38namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
39  /// <summary>
40  /// An operator that analyzes the population diversity using fine grained structural tree similarity estimation.
41  /// </summary>
42  [Item("FineGrainedStructuralPopulationDiversityAnalyzer", "An operator that analyzes the population diversity using fine grained structural tree similarity estimation.")]
43  [StorableClass]
44  public sealed class FineGrainedStructuralPopulationDiversityAnalyzer : SymbolicRegressionPopulationDiversityAnalyzer {
45
46    // properties: min level delts, max level delta, etc.
47
48    #region Properties and Parameters
49
50    private const string FunctionTreeGrammarParameterName = "FunctionTreeGrammar";
51    private const string MinimumLevelDeltaParameterName = "MinimumLevelDelta";
52    private const string MaximumLevelDeltaParameterName = "MaximumLevelDelta";
53    private const string PreventMultipleComparisonContributionParameterName = "PreventMultipleComparisonContribution";
54    private const string MaximumExpressionDepthParameterName = "MaxExpressionDepth";
55    private const string LevelDifferenceCoefficientParameterName = "LevelDifferenceCoefficient";
56    private const string AncestorIndexCoefficientParameterName = "AncestorIndexCoefficient";
57    private const string ConstantValueCoefficientParameterName = "ConstantValueCoefficient";
58    private const string VariableWeightCoefficientParameterName = "VariableWeightCoefficient";
59    private const string TimeOffsetCoefficientParameterName = "TimeOffsetCoefficient";
60    private const string VariableIndexCoefficientParameterName = "VariableIndexCoefficient";
61    private const string AdditiveSimilarityCalculationParameterName = "AdditiveSimilarityCalculation";
62
63    public IValueLookupParameter<GlobalSymbolicExpressionGrammar> FunctionTreeGrammarParameter {
64      get { return (IValueLookupParameter<GlobalSymbolicExpressionGrammar>)Parameters[FunctionTreeGrammarParameterName]; }
65    }
66    public GlobalSymbolicExpressionGrammar FunctionTreeGrammar {
67      get { return FunctionTreeGrammarParameter.ActualValue; }
68    }
69    public IValueLookupParameter<IntValue> MaximumExpressionDepthParameter {
70      get { return (IValueLookupParameter<IntValue>)Parameters[MaximumExpressionDepthParameterName]; }
71    }
72    public int MaximumExpressionDepth {
73      get { return MaximumExpressionDepthParameter.ActualValue.Value; }
74    }
75
76    public IValueParameter<IntValue> MinimumLevelDeltaParameter {
77      get { return (IValueParameter<IntValue>)Parameters[MinimumLevelDeltaParameterName]; }
78    }
79    public int MinimumLevelDelta {
80      get { return MinimumLevelDeltaParameter.Value.Value; }
81    }
82    public IValueParameter<IntValue> MaximumLevelDeltaParameter {
83      get { return (IValueParameter<IntValue>)Parameters[MaximumLevelDeltaParameterName]; }
84    }
85    public int MaximumLevelDelta {
86      get { return MaximumLevelDeltaParameter.Value.Value; }
87    }
88    public IValueParameter<BoolValue> PreventMultipleComparisonContributionParameter {
89      get { return (IValueParameter<BoolValue>)Parameters[PreventMultipleComparisonContributionParameterName]; }
90    }
91    public bool PreventMultipleComparisonContribution {
92      get { return PreventMultipleComparisonContributionParameter.Value.Value; }
93    }
94
95    public IValueParameter<DoubleValue> LevelDifferenceCoefficientParameter {
96      get { return (IValueParameter<DoubleValue>)Parameters[LevelDifferenceCoefficientParameterName]; }
97    }
98    public double LevelDifferenceCoefficient {
99      get { return LevelDifferenceCoefficientParameter.Value.Value; }
100    }
101    public IValueParameter<DoubleValue> AncestorIndexCoefficientParameter {
102      get { return (IValueParameter<DoubleValue>)Parameters[AncestorIndexCoefficientParameterName]; }
103    }
104    public double AncestorIndexCoefficient {
105      get { return AncestorIndexCoefficientParameter.Value.Value; }
106    }
107    public IValueParameter<DoubleValue> ConstantValueCoefficientParameter {
108      get { return (IValueParameter<DoubleValue>)Parameters[ConstantValueCoefficientParameterName]; }
109    }
110    public double ConstantValueCoefficient {
111      get { return ConstantValueCoefficientParameter.Value.Value; }
112    }
113    public IValueParameter<DoubleValue> VariableWeightCoefficientParameter {
114      get { return (IValueParameter<DoubleValue>)Parameters[VariableWeightCoefficientParameterName]; }
115    }
116    public double VariableWeightCoefficient {
117      get { return VariableWeightCoefficientParameter.Value.Value; }
118    }
119    public IValueParameter<DoubleValue> TimeOffsetCoefficientParameter {
120      get { return (IValueParameter<DoubleValue>)Parameters[TimeOffsetCoefficientParameterName]; }
121    }
122    public double TimeOffsetCoefficientCoefficient {
123      get { return TimeOffsetCoefficientParameter.Value.Value; }
124    }
125    public IValueParameter<DoubleValue> VariableIndexCoefficientParameter {
126      get { return (IValueParameter<DoubleValue>)Parameters[VariableIndexCoefficientParameterName]; }
127    }
128    public double VariableIndexCoefficient {
129      get { return VariableIndexCoefficientParameter.Value.Value; }
130    }
131    public IValueParameter<BoolValue> AdditiveSimilarityCalculationParameter {
132      get { return (IValueParameter<BoolValue>)Parameters[AdditiveSimilarityCalculationParameterName]; }
133    }
134    public bool AdditiveSimilarityCalculation {
135      get { return AdditiveSimilarityCalculationParameter.Value.Value; }
136    }
137
138    #endregion
139
140    [StorableConstructor]
141    private FineGrainedStructuralPopulationDiversityAnalyzer(bool deserializing) : base(deserializing) { }
142    private FineGrainedStructuralPopulationDiversityAnalyzer(FineGrainedStructuralPopulationDiversityAnalyzer original, Cloner cloner) : base(original, cloner) { }
143    public FineGrainedStructuralPopulationDiversityAnalyzer() : base() {
144      Parameters.Add(new ValueLookupParameter<GlobalSymbolicExpressionGrammar>(FunctionTreeGrammarParameterName, "The grammar that is used for symbolic regression models."));
145      Parameters.Add(new ValueLookupParameter<IntValue>(MaximumExpressionDepthParameterName, "Maximal depth of the analyzed symbolic expressions."));
146      Parameters.Add(new ValueParameter<IntValue>(MinimumLevelDeltaParameterName, "Minimum value for the level delta of the analyzed genetic information items.", new IntValue(0)));
147      Parameters.Add(new ValueParameter<IntValue>(MaximumLevelDeltaParameterName, "Maximum value for the level delta of the analyzed genetic information items.", new IntValue(int.MaxValue)));
148      Parameters.Add(new ValueParameter<BoolValue>(PreventMultipleComparisonContributionParameterName, "Flag that denotes whether genetic information items are hindered from contributing to the similarity function multiple times.", new BoolValue(false)));
149      Parameters.Add(new ValueParameter<DoubleValue>(LevelDifferenceCoefficientParameterName, "Weighting coefficient for level differences.", new DoubleValue(0.2)));
150      Parameters.Add(new ValueParameter<DoubleValue>(AncestorIndexCoefficientParameterName, "Weighting coefficient for ancestor index differences.", new DoubleValue(0.2)));
151      Parameters.Add(new ValueParameter<DoubleValue>(ConstantValueCoefficientParameterName, "Weighting coefficient for constant value differences.", new DoubleValue(0.2)));
152      Parameters.Add(new ValueParameter<DoubleValue>(VariableWeightCoefficientParameterName, "Weighting coefficient for variable weight differences.", new DoubleValue(0.2)));
153      Parameters.Add(new ValueParameter<DoubleValue>(TimeOffsetCoefficientParameterName, "Weighting coefficient for time lag differences.", new DoubleValue(0.2)));
154      Parameters.Add(new ValueParameter<DoubleValue>(VariableIndexCoefficientParameterName, "Weighting coefficient for variable index differences.", new DoubleValue(0.2)));
155      Parameters.Add(new ValueParameter<BoolValue>(AdditiveSimilarityCalculationParameterName, "Flag that denotes whether the similarity of genetic information items shall be calculated using additive calculation.", new BoolValue(true)));
156    }
157
158    public override IDeepCloneable Clone(Cloner cloner) {
159      return new FineGrainedStructuralPopulationDiversityAnalyzer(this, cloner);
160    }
161
162    protected override double[,] CalculateSimilarities(SymbolicExpressionTree[] solutions) {
163      // collect information stored int the problem's parameters
164      double variableWeightSigma = 0;
165      double constantMinimumValue = 0;
166      double constantMaximumValue = 0;
167      int minimumTimeOffset = 0;
168      int maximumTimeOffset = 0;
169      foreach (Symbol symbol in FunctionTreeGrammar.Symbols) {
170        Constant constant = symbol as Constant;
171        if (constant !=null) {
172          constantMinimumValue = constant.MinValue;
173          constantMaximumValue = constant.MaxValue;
174        }
175        DataAnalysis.Symbolic.Symbols.Variable variable = symbol as DataAnalysis.Symbolic.Symbols.Variable;
176        if (variable != null)
177          variableWeightSigma = variable.WeightSigma;
178        LaggedVariable laggedVariable = symbol as LaggedVariable;
179        if (laggedVariable !=null) {
180          minimumTimeOffset = laggedVariable.MinLag;
181          maximumTimeOffset = laggedVariable.MaxLag;
182        }
183      }
184      int n = solutions.Length;
185      List<string> variableNames = new List<string>();
186      foreach (StringValue variableName in ProblemData.InputVariables) {
187        variableNames.Add(variableName.Value);
188      }
189      variableNames.Add(ProblemData.TargetVariable.Value);
190      // collect genetic information item lists and store them also in dictionaries
191      IList<GeneticInformationItem>[] geneticInformationItemsLists = new List<GeneticInformationItem>[n];
192      IDictionary<string, IList<GeneticInformationItem>>[] geneticInformationItemsListsDictionaries = new IDictionary<string, IList<GeneticInformationItem>>[n];
193      for (int i = 0; i < n; i++) {
194        geneticInformationItemsLists[i] = GeneticInformationItem.getGeneticInformationItems(solutions[i].Root, variableNames, MinimumLevelDelta, MaximumLevelDelta);
195        geneticInformationItemsListsDictionaries[i] = GeneticInformationItem.GetDictionary(geneticInformationItemsLists[i]);
196      }
197      // calculate solution similarities
198      double[,] similarities = new double[n, n];
199      for (int i = 0; i < n; i++) {
200        for (int j = 0; j < n; j++) {
201          if (i == j)
202            similarities[i, j] = 1;
203          else {
204            IList<GeneticInformationItem> solution1GeneticItems = geneticInformationItemsLists[i];
205            IDictionary<string, IList<GeneticInformationItem>> solution2GeneticItemsDictionary = GeneticInformationItem.CopyDictionary(geneticInformationItemsListsDictionaries[j]);
206            double similarity = 0;
207            for (int k = 0; k < solution1GeneticItems.Count; k++) {
208              double bestPendantSimilarity = 0;
209              GeneticInformationItem item = solution1GeneticItems[k];
210              GeneticInformationItem bestPendant = null;
211              IList<GeneticInformationItem> geneticInformationItemsList = null;
212              string key = GeneticInformationItem.GetKey(item);
213              if (solution2GeneticItemsDictionary.ContainsKey(key)) {
214                geneticInformationItemsList = solution2GeneticItemsDictionary[GeneticInformationItem.GetKey(item)];
215                bestPendant = GeneticInformationItem.FindBestPendant(item, geneticInformationItemsList,
216                  constantMinimumValue, constantMaximumValue, variableWeightSigma,
217                  MaximumExpressionDepth, minimumTimeOffset, maximumTimeOffset,
218                  LevelDifferenceCoefficient, AncestorIndexCoefficient, ConstantValueCoefficient, VariableWeightCoefficient,
219                  TimeOffsetCoefficientCoefficient, VariableIndexCoefficient, AdditiveSimilarityCalculation,
220                  out bestPendantSimilarity);
221              }
222              if (bestPendant != null) {
223                similarity += bestPendantSimilarity;
224                if (PreventMultipleComparisonContribution)
225                  geneticInformationItemsList.Remove(bestPendant);
226              }
227            }
228            similarities[i, j] = similarity / solution1GeneticItems.Count;
229          }
230        }
231      }
232      return similarities;
233    }
234
235    #region private class GeneticInformationItem
236
237    private class GeneticInformationItem {
238
239      private Type myAncestorDefinition;
240      public Type AncestorDefinition {
241        get { return myAncestorDefinition; }
242      }
243
244      private int myAncestorIndex;
245      public int AncestorIndex {
246        get { return myAncestorIndex; }
247      }
248
249      private Type myDescendantDefinition;
250      public Type DescendantDefinition {
251        get { return myDescendantDefinition; }
252      }
253
254      private int myAncestorLevel;
255      public int AncestorLevel {
256        get { return myAncestorLevel; }
257      }
258
259      private double myDescendantCoefficient;
260      public double DescendantCoefficient {
261        get { return myDescendantCoefficient; }
262      }
263
264      private double myDescendantVariableIndex;
265      public double DescendantVariableIndex {
266        get { return myDescendantVariableIndex; }
267      }
268
269      private int myDescendantTimeOffset;
270      public int DescendantTimeOffset {
271        get { return myDescendantTimeOffset; }
272      }
273
274      private SymbolicExpressionTreeNode myDescendantTreeNode;
275      public SymbolicExpressionTreeNode DescendantTreeNode {
276        get { return myDescendantTreeNode; }
277      }
278
279      private int myDescendantLevel;
280      public int DescendantLevel {
281        get { return myDescendantLevel; }
282      }
283
284      public int LevelDelta {
285        get { return (myDescendantLevel - myAncestorLevel); }
286      }
287
288      public static IList<GeneticInformationItem> CopyList(IList<GeneticInformationItem> GeneticInformationItemsList) {
289        List<GeneticInformationItem> list = new List<GeneticInformationItem>(GeneticInformationItemsList.Count);
290        list.AddRange(GeneticInformationItemsList);
291        return list;
292      }
293
294      public static string GetKey(GeneticInformationItem item) {
295        return item.AncestorDefinition.Name.ToString() + "," + item.DescendantDefinition.Name.ToString();
296      }
297
298      public static IDictionary<string, IList<GeneticInformationItem>> GetDictionary(IList<GeneticInformationItem> GeneticInformationItemsList) {
299        IDictionary<string, IList<GeneticInformationItem>> dictionary = new Dictionary<string, IList<GeneticInformationItem>>();
300        foreach (GeneticInformationItem item in GeneticInformationItemsList) {
301          string key = GetKey(item);
302          if (!dictionary.ContainsKey(key))
303            dictionary.Add(key, new List<GeneticInformationItem>());
304          dictionary[key].Add(item);
305        }
306        return dictionary;
307      }
308
309      public static IDictionary<string, IList<GeneticInformationItem>> CopyDictionary(IDictionary<string, IList<GeneticInformationItem>> Dictionary) {
310        IDictionary<string, IList<GeneticInformationItem>> copy = new Dictionary<string, IList<GeneticInformationItem>>();
311        foreach (KeyValuePair<string, IList<GeneticInformationItem>> pair in Dictionary) {
312          copy.Add(pair.Key, CopyList(pair.Value));
313        }
314        return copy;
315      }
316
317      public static GeneticInformationItem FindBestPendant(GeneticInformationItem Item, IList<GeneticInformationItem> ComparisonItems,
318          double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
319          int MaximumTreeHeight, int MinimumTimeOffset, int MaximumTimeOffset,
320          double LevelDifferenceCoefficient, double AncestorIndexCoefficient,
321          double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient,
322          bool AdditiveSimilarityCalculation,
323          out double BestPendantSimilarity) {
324        int maxSimilarityIndex = -1;
325        double similarity, maxSimilarity = -double.MaxValue;
326        for (int i = 0; i < ComparisonItems.Count; i++) {
327          similarity = Similarity(Item, ComparisonItems[i], ConstantMinimum, ConstantMaximum, VariableWeightSigma, MaximumTreeHeight, MinimumTimeOffset, MaximumTimeOffset,
328            LevelDifferenceCoefficient, AncestorIndexCoefficient, ConstantValueCoefficient, VariableWeightSigma, TimeOffsetCoefficient,
329            VariableWeightCoefficient, AdditiveSimilarityCalculation);
330          if (!double.IsNaN(similarity) && similarity > maxSimilarity) {
331            maxSimilarity = similarity;
332            maxSimilarityIndex = i;
333          }
334        }
335        BestPendantSimilarity = maxSimilarity;
336        if (maxSimilarityIndex >= 0)
337          return ComparisonItems[maxSimilarityIndex];
338        else
339          return null;
340      }
341
342      public static double Similarity(GeneticInformationItem Item1, GeneticInformationItem Item2,
343          double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
344          int MaximumTreeHeight, int MinimumTimeOffset, int MaximumTimeOffset,
345          double LevelDifferenceCoefficient, double AncestorIndexCoefficient,
346          double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient,
347          bool AdditiveSimilarityCalculation) {
348
349        if (Item1.AncestorDefinition != Item2.AncestorDefinition || Item1.DescendantDefinition != Item2.DescendantDefinition)
350          return double.NaN;
351
352        // the two items for sure have the same behavior definitions
353
354        #region initialize punishments
355
356        double punishmentContributionSum = 0;
357        double punishmentCoefficientsProduct = 1;
358
359        double ancestorIndexDifferencePunishment = 0;
360        double levelDifferencePunishment = 0;
361
362        double descendantConstantValueDifferencePunishment = 0;
363        double descendantVariableWeightDifferencePunishment = 0;
364        double descendantTimeOffsetDifferencePunishment = 0;
365        double descendantVariableIndexDifferencePunishment = 0;
366
367        #endregion
368
369        if (LevelDifferenceCoefficient > 0) {
370          levelDifferencePunishment = Item1.LevelDelta - Item2.LevelDelta;
371          if (levelDifferencePunishment < 0)
372            levelDifferencePunishment = -levelDifferencePunishment;
373          levelDifferencePunishment /= MaximumTreeHeight;
374          if (levelDifferencePunishment > 1)
375            levelDifferencePunishment = 1;
376          levelDifferencePunishment *= LevelDifferenceCoefficient;
377          punishmentContributionSum += LevelDifferenceCoefficient;
378          punishmentCoefficientsProduct *= (1 - LevelDifferenceCoefficient);
379        }
380        if (AncestorIndexCoefficient > 0) {
381          if (Item1.AncestorIndex != Item2.AncestorIndex)
382            ancestorIndexDifferencePunishment = 1;
383          else
384            ancestorIndexDifferencePunishment = 0;
385          ancestorIndexDifferencePunishment *= AncestorIndexCoefficient;
386          punishmentContributionSum += AncestorIndexCoefficient;
387          punishmentCoefficientsProduct *= (1 - AncestorIndexCoefficient);
388        }
389
390        if (Item1.DescendantTreeNode is ConstantTreeNode) {
391          if (ConstantValueCoefficient > 0) {
392            double constantValueCoefficientDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
393            // assume uniform distribution within given minimum and maximum
394            descendantConstantValueDifferencePunishment = (constantValueCoefficientDifference / (ConstantMaximum - ConstantMinimum));
395            if (descendantConstantValueDifferencePunishment > 1)
396              descendantConstantValueDifferencePunishment = 1;
397            descendantConstantValueDifferencePunishment *= ConstantValueCoefficient;
398            punishmentContributionSum += ConstantValueCoefficient;
399            punishmentCoefficientsProduct *= (1 - ConstantValueCoefficient);
400          }
401        }
402        if(Item1.DescendantTreeNode is VariableTreeNode) {
403          if (VariableWeightCoefficient > 0) {
404            double variableWeightDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
405            // assume normal distribution within given sigma
406            descendantVariableWeightDifferencePunishment = variableWeightDifference / (VariableWeightSigma * 4);
407            if (descendantVariableWeightDifferencePunishment > 1)
408              descendantVariableWeightDifferencePunishment = 1;
409            descendantVariableWeightDifferencePunishment *= VariableWeightCoefficient;
410            punishmentContributionSum += VariableWeightCoefficient;
411            punishmentCoefficientsProduct *= (1 - VariableWeightCoefficient);
412          }
413          if (TimeOffsetCoefficient > 0) {
414            double timeOffsetDifference = Math.Abs(Item1.DescendantTimeOffset - Item2.DescendantTimeOffset);
415            if (MaximumTimeOffset > 0)
416              descendantTimeOffsetDifferencePunishment = timeOffsetDifference / (MaximumTimeOffset - MinimumTimeOffset);
417            descendantTimeOffsetDifferencePunishment *= TimeOffsetCoefficient;
418            punishmentContributionSum += TimeOffsetCoefficient;
419            punishmentCoefficientsProduct *= (1 - TimeOffsetCoefficient);
420          }
421          if (VariableIndexCoefficient > 0) {
422            if (Item1.DescendantVariableIndex != Item2.DescendantVariableIndex)
423              descendantVariableIndexDifferencePunishment = 1;
424            else
425              descendantVariableIndexDifferencePunishment = 0;
426            descendantVariableIndexDifferencePunishment *= VariableIndexCoefficient;
427            punishmentContributionSum += VariableIndexCoefficient;
428            punishmentCoefficientsProduct *= (1 - VariableIndexCoefficient);
429          }
430        }
431
432        double result;
433
434        if (AdditiveSimilarityCalculation) {
435          double punishmentsSum = ancestorIndexDifferencePunishment + levelDifferencePunishment +
436            descendantConstantValueDifferencePunishment + descendantVariableWeightDifferencePunishment +
437            descendantTimeOffsetDifferencePunishment + descendantVariableIndexDifferencePunishment;
438          result = (1 - punishmentsSum / punishmentContributionSum);
439        } else {
440          result =
441            (1 - ancestorIndexDifferencePunishment) *
442            (1 - levelDifferencePunishment) *
443            (1 - descendantConstantValueDifferencePunishment) *
444            (1 - descendantVariableWeightDifferencePunishment) *
445            (1 - descendantTimeOffsetDifferencePunishment) *
446            (1 - descendantVariableIndexDifferencePunishment);
447          // worst possible result is (1-punishmentCoefficientsProduct), so scale linearly to [0;1]:
448          result = (result - punishmentCoefficientsProduct) / (1 - punishmentCoefficientsProduct);
449        }
450
451        if (result < 0 || result > 1)
452          throw new InvalidOperationException("ERROR in GeneticInformationItem.Similarity: An invalid similarity value (" + result.ToString() + ") has been calculated.");
453
454        return result;
455
456      }
457
458      public static IList<GeneticInformationItem> getGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames,
459          int MinimumLevelDelta, int MaximumLevelDelta) {
460        // first we have to collect all items, then we filter; it is not possible to filter while collecting because the items are
461        // collected recursively and used for collecting the parents' items.
462        if (MinimumLevelDelta > MaximumLevelDelta)
463          return new List<GeneticInformationItem>();
464        IList<GeneticInformationItem> list = getGeneticInformationItems(node, variableNames, 0);
465        List<GeneticInformationItem> resultList = new List<GeneticInformationItem>();
466        foreach (GeneticInformationItem item in list)
467          if (item.LevelDelta >= MinimumLevelDelta && item.LevelDelta <= MaximumLevelDelta)
468            resultList.Add(item);
469        return resultList;
470      }
471
472      private static IList<GeneticInformationItem> getGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames, int level) {
473        // Idea: collect all descendants' lists and then add new items using the retrieved ones.
474        // This should save lots of time and reduce complexity of the items retrieval process.
475        // Program roots are not considered, neither are start symbol nodes
476        if (node.Symbol is ProgramRootSymbol)
477          return getGeneticInformationItems(node.SubTrees[0], variableNames, level + 1);
478        List<GeneticInformationItem> list = new List<GeneticInformationItem>();
479        // add item for this node:
480        if (!(node.Symbol is StartSymbol)) {
481          list.Add(new GeneticInformationItem(node, variableNames, level));
482        }
483        // add items for the descendants, but prevent multiple references to descendant nodes:
484        List<SymbolicExpressionTreeNode> descendantNodes = new List<SymbolicExpressionTreeNode>();
485        for (int i = 0; i < node.SubTrees.Count; i++) {
486          IList<GeneticInformationItem> descendantItems = getGeneticInformationItems(node.SubTrees[i], variableNames, level + 1);
487          list.AddRange(descendantItems);
488          if (!(node.Symbol is StartSymbol))
489            foreach (GeneticInformationItem item in descendantItems) {
490              if (!descendantNodes.Contains(item.DescendantTreeNode)) {
491                list.Add(new GeneticInformationItem(node, item, i, level));
492                descendantNodes.Add(item.DescendantTreeNode);
493              }
494            }
495        }
496        return list;
497      }
498
499      private GeneticInformationItem (SymbolicExpressionTreeNode node, List<string> variableNames, int level) {
500        myAncestorIndex = -1;
501        VariableTreeNode variableTreeNode = node as VariableTreeNode;
502        LaggedVariableTreeNode laggedVariableTreeNode = node as LaggedVariableTreeNode;
503        ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
504        myAncestorDefinition = node.Symbol.GetType();
505        myDescendantDefinition = myAncestorDefinition;
506        if (variableTreeNode != null)
507          myDescendantCoefficient = variableTreeNode.Weight;
508        else if (constantTreeNode != null)
509          myDescendantCoefficient = constantTreeNode.Value;
510        else
511          myDescendantCoefficient = double.NaN;
512        if (laggedVariableTreeNode != null)
513          myDescendantTimeOffset = laggedVariableTreeNode.Lag;
514        else
515          myDescendantTimeOffset = 0;
516        if (variableTreeNode != null)
517          myDescendantVariableIndex = variableNames.IndexOf(variableTreeNode.VariableName);
518        else
519          myDescendantVariableIndex = -1;
520        myAncestorLevel = level;
521        myDescendantLevel = level;
522        myDescendantTreeNode = node;
523      }
524
525      private GeneticInformationItem(SymbolicExpressionTreeNode parentNode, GeneticInformationItem descendantGeneticInformationItem,
526          int ancestorIndex, int parentNodeLevel) {
527        myAncestorIndex = ancestorIndex;
528        myAncestorLevel = parentNodeLevel;
529        myAncestorDefinition = parentNode.Symbol.GetType();
530        myDescendantCoefficient = descendantGeneticInformationItem.DescendantCoefficient;
531        myDescendantDefinition = descendantGeneticInformationItem.DescendantDefinition;
532        myDescendantTimeOffset = descendantGeneticInformationItem.DescendantTimeOffset;
533        myDescendantVariableIndex = descendantGeneticInformationItem.DescendantVariableIndex;
534        myDescendantLevel = descendantGeneticInformationItem.DescendantLevel;
535        myDescendantTreeNode = descendantGeneticInformationItem.DescendantTreeNode;
536      }
537
538      public override string ToString() {
539        return "ancestor: " + AncestorDefinition.Name.ToString() + ", [" + AncestorIndex + "]; "
540          + "descendant: " + DescendantDefinition.Name.ToString() + " (varIndex " + DescendantVariableIndex + ", "
541          + DescendantCoefficient + ", t-" + DescendantTimeOffset + ");"
542          + " level delta = " + DescendantLevel + "-" + AncestorLevel + " = " + LevelDelta;
543      }
544
545    }
546
547    #endregion
548
549  }
550}
Note: See TracBrowser for help on using the repository browser.