Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FineGrainedStructuralPopulationDiversityAnalyzer.cs @ 4928

Last change on this file since 4928 was 4928, checked in by swinkler, 13 years ago

Worked on structural population diversity analysis (#1278):

  • Improved definition of genetic items.
  • Implemented recursive collection of all genetic items of symbolic expression trees.
  • Added FineGrainedStructuralPopulationDiversityAnalyzer to SymbolicRegressionProblemBase.
File size: 20.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using HeuristicLab.Analysis;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Optimization.Operators;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34using System.Collections.Generic;
35using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
36using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
37
38namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
39  /// <summary>
40  /// An operator that analyzes the population diversity using fine grained structural tree similarity estimation.
41  /// </summary>
42  [Item("FineGrainedStructuralPopulationDiversityAnalyzer", "An operator that analyzes the population diversity using fine grained structural tree similarity estimation.")]
43  [StorableClass]
44  public sealed class FineGrainedStructuralPopulationDiversityAnalyzer : SymbolicRegressionPopulationDiversityAnalyzer {
45
46    [StorableConstructor]
47    private FineGrainedStructuralPopulationDiversityAnalyzer(bool deserializing) : base(deserializing) { }
48    private FineGrainedStructuralPopulationDiversityAnalyzer(FineGrainedStructuralPopulationDiversityAnalyzer original, Cloner cloner) : base(original, cloner) { }
49    public FineGrainedStructuralPopulationDiversityAnalyzer() : base() { }
50
51    public override IDeepCloneable Clone(Cloner cloner) {
52      return new FineGrainedStructuralPopulationDiversityAnalyzer(this, cloner);
53    }
54
55    protected override double[,] CalculateSimilarities(SymbolicExpressionTree[] solutions) {
56      int n = solutions.Length;
57      List<string> variableNames = new List<string>();
58      foreach (StringValue variableName in ProblemData.InputVariables) {
59        variableNames.Add(variableName.Value);
60      }
61      variableNames.Add(ProblemData.TargetVariable.Value);
62      IList<GeneticInformationItem>[] geneticInformationItemsList = new List<GeneticInformationItem>[n];
63      for (int i = 0; i < n; i++) {
64        geneticInformationItemsList[i] = GeneticInformationItem.getGeneticInformationItems(solutions[i].Root, variableNames);
65      }
66      double[,] result = new double[n, n];
67      for (int i = 0; i < n; i++) {
68        for (int j = 0; j < n; j++) {
69          if (i == j)
70            result[i, j] = 1;
71          else
72            result[i, j] = 0;
73        }
74      }
75      return result;
76    }
77
78
79    #region private class GeneticInformationItem
80
81    private class GeneticInformationItem {
82
83      private Type myAncestorDefinition;
84      public Type AncestorDefinition {
85        get { return myAncestorDefinition; }
86      }
87
88      private int myAncestorIndex;
89      public int AncestorIndex {
90        get { return myAncestorIndex; }
91      }
92
93      private Type myDescendantDefinition;
94      public Type DescendantDefinition {
95        get { return myDescendantDefinition; }
96      }
97
98      private int myLevelDelta;
99      public int LevelDelta {
100        get { return myLevelDelta; }
101      }
102
103      // not used in HL 3.3
104      // private double myAncestorCoefficient;
105      // public double AncestorCoefficient {
106      //   get { return myAncestorCoefficient; }
107      // }
108
109      // not used in HL 3.3
110      // private double myAncestorVariableIndex;
111      // public double AncestorVariableIndex {
112      //   get { return myAncestorVariableIndex; }
113      // }
114
115      // not used in HL 3.3
116      // private int myAncestorTimeOffset;
117      // public int AncestorTimeOffset {
118      //   get { return myAncestorTimeOffset; }
119      // }
120
121      // not used in HL 3.3
122      // private int myAncestorVariant;
123      // public int AncestorVariant {
124      //   get { return myAncestorVariant; }
125      // }
126
127      private double myDescendantCoefficient;
128      public double DescendantCoefficient {
129        get { return myDescendantCoefficient; }
130      }
131
132      private double myDescendantVariableIndex;
133      public double DescendantVariableIndex {
134        get { return myDescendantVariableIndex; }
135      }
136
137      private int myDescendantTimeOffset;
138      public int DescendantTimeOffset {
139        get { return myDescendantTimeOffset; }
140      }
141
142      // not used in HL 3.3
143      //private int myDescendantVariant;
144      //public int DescendantVariant {
145      //  get { return myDescendantVariant; }
146      //}
147
148      /*
149      public static GeneticInformationItem FindBestPendant(GeneticInformationItem Item, List<GeneticInformationItem> ComparisonItems,
150          StructuralSimilarityAnalysisParameters Parameters,
151          int MaxTreeHeight, int MaxTimeOffset,
152          out double BestPendantSimilarity) {
153        int maxSimilarityIndex = -1;
154        double similarity, maxSimilarity = -double.MaxValue;
155        for (int i = 0; i < ComparisonItems.Count; i++) {
156          similarity = Similarity(Item, ComparisonItems[i], Parameters, MaxTreeHeight, MaxTimeOffset);
157          if (!double.IsNaN(similarity) && similarity > maxSimilarity) {
158            maxSimilarity = similarity;
159            maxSimilarityIndex = i;
160          }
161        }
162        BestPendantSimilarity = maxSimilarity;
163        if (maxSimilarityIndex >= 0)
164          return ComparisonItems[maxSimilarityIndex];
165        else
166          return null;
167      }*/
168
169      /*
170      public static double Similarity(GeneticInformationItem Item1, GeneticInformationItem Item2,
171          StructuralSimilarityAnalysisParameters Parameters,
172          int MaxTreeHeight, int MaxTimeOffset) {
173
174        if (Item1.AncestorDefinition != Item2.AncestorDefinition ||
175          Item1.DescendantDefinition != Item2.DescendantDefinition)
176          return double.NaN;
177
178        // the two items for sure have the same behavior definitions
179        #region init
180        double punishmentContributionSum = 0;
181        double punishmentCoefficientsProduct = 1;
182        double ancestorCoefficientDifferencePunishment = 0;
183        double ancestorTimeOffsetDifferencePunishment = 0;
184        double ancestorVariantDifferencePunishment = 0;
185        double ancestorVariableIndexDifferencePunishment = 0;
186        double descendantCoefficientDifferencePunishment = 0;
187        double descendantTimeOffsetDifferencePunishment = 0;
188        double descendantVariantDifferencePunishment = 0;
189        double descendantVariableIndexDifferencePunishment = 0;
190        double ancestorIndexDifferencePunishment = 0;
191        double levelDifferencePunishment = 0;
192        #endregion
193
194        ITerminalDefinition ancestorTerminal = Item1.AncestorDefinition as ITerminalDefinition;
195        ITerminalDefinition descendantTerminal = Item1.DescendantDefinition as ITerminalDefinition;
196        IFunctionDefinition ancestorFunction = Item1.AncestorDefinition as IFunctionDefinition;
197        IFunctionDefinition descendantFunction = Item1.DescendantDefinition as IFunctionDefinition;
198
199        if (Parameters.ConsiderLevelDifference) {
200          levelDifferencePunishment = Item1.LevelDelta - Item2.LevelDelta;
201          if (levelDifferencePunishment < 0)
202            levelDifferencePunishment = -levelDifferencePunishment;
203          levelDifferencePunishment /= MaxTreeHeight;
204          if (levelDifferencePunishment > 1)
205            levelDifferencePunishment = 1;
206          levelDifferencePunishment *= Parameters.LevelDifferenceCoefficient;
207          punishmentContributionSum += Parameters.LevelDifferenceCoefficient;
208          punishmentCoefficientsProduct *= (1 - Parameters.LevelDifferenceCoefficient);
209        }
210        if (Parameters.ConsiderAncestorIndex) {
211          if (Item1.AncestorIndex != Item2.AncestorIndex)
212            ancestorIndexDifferencePunishment = 1;
213          else
214            ancestorIndexDifferencePunishment = 0;
215          ancestorIndexDifferencePunishment *= Parameters.AncestorIndexCoefficient;
216          punishmentContributionSum += Parameters.AncestorIndexCoefficient;
217          punishmentCoefficientsProduct *= (1 - Parameters.AncestorIndexCoefficient);
218        }
219
220        if (Item1.AncestorDefinition is ITerminalDefinition) {
221          if (Parameters.ConsiderCoefficient) {
222            double coefficientDifference = Math.Abs(Item1.myAncestorCoefficient - Item2.myAncestorCoefficient);
223            if (ancestorTerminal.Parametrization.CoefficientIsGaussian)
224              ancestorCoefficientDifferencePunishment = (coefficientDifference / (ancestorTerminal.Parametrization.CoefficientSigma * 4));
225            else
226              ancestorCoefficientDifferencePunishment = (coefficientDifference / (ancestorTerminal.Parametrization.CoefficientMaxValue - ancestorTerminal.Parametrization.CoefficientMinValue));
227            if (ancestorCoefficientDifferencePunishment > 1)
228              ancestorCoefficientDifferencePunishment = 1;
229            ancestorCoefficientDifferencePunishment *= Parameters.CoefficientCoefficient;
230            punishmentContributionSum += Parameters.CoefficientCoefficient;
231            punishmentCoefficientsProduct *= (1 - Parameters.CoefficientCoefficient);
232          }
233          if (Parameters.ConsiderTimeOffset) {
234            double timeOffsetDifference = Math.Abs(Item1.AncestorTimeOffset - Item2.AncestorTimeOffset);
235            if (MaxTimeOffset > 0)
236              ancestorTimeOffsetDifferencePunishment = timeOffsetDifference / MaxTimeOffset;
237            ancestorTimeOffsetDifferencePunishment *= Parameters.TimeOffsetCoefficient;
238            punishmentContributionSum += Parameters.TimeOffsetCoefficient;
239            punishmentCoefficientsProduct *= (1 - Parameters.TimeOffsetCoefficient);
240          }
241          if (Parameters.ConsiderVariableIndex) {
242            if (Item1.AncestorVariableIndex != Item2.AncestorVariableIndex)
243              ancestorVariableIndexDifferencePunishment = 1;
244            else
245              ancestorVariableIndexDifferencePunishment = 0;
246            ancestorVariableIndexDifferencePunishment *= Parameters.VariableIndexCoefficient;
247            punishmentContributionSum += Parameters.VariableIndexCoefficient;
248            punishmentCoefficientsProduct *= (1 - Parameters.VariableIndexCoefficient);
249          }
250        } else {
251          if (Parameters.ConsiderVariant) {
252            if (Item1.AncestorVariant != Item2.AncestorVariant)
253              ancestorVariantDifferencePunishment = 1;
254            else
255              ancestorVariantDifferencePunishment = 0;
256            ancestorVariantDifferencePunishment *= Parameters.VariantCoefficient;
257            punishmentContributionSum += Parameters.VariantCoefficient;
258            punishmentCoefficientsProduct *= (1 - Parameters.VariantCoefficient);
259          }
260        }
261
262        if (Item1.DescendantDefinition is ITerminalDefinition) {
263          if (Parameters.ConsiderCoefficient) {
264            double coefficientDifference = Math.Abs(Item1.myDescendantCoefficient - Item2.myDescendantCoefficient);
265            if (descendantTerminal.Parametrization.CoefficientIsGaussian)
266              descendantCoefficientDifferencePunishment = (coefficientDifference / (descendantTerminal.Parametrization.CoefficientSigma * 4));
267            else
268              descendantCoefficientDifferencePunishment = (coefficientDifference / (descendantTerminal.Parametrization.CoefficientMaxValue - descendantTerminal.Parametrization.CoefficientMinValue));
269            if (descendantCoefficientDifferencePunishment > 1)
270              descendantCoefficientDifferencePunishment = 1;
271            descendantCoefficientDifferencePunishment *= Parameters.CoefficientCoefficient;
272            punishmentContributionSum += Parameters.CoefficientCoefficient;
273            punishmentCoefficientsProduct *= (1 - Parameters.CoefficientCoefficient);
274          }
275          if (Parameters.ConsiderTimeOffset) {
276            double timeOffsetDifference = Math.Abs(Item1.DescendantTimeOffset - Item2.DescendantTimeOffset);
277            if (MaxTimeOffset > 0)
278              descendantTimeOffsetDifferencePunishment = timeOffsetDifference / MaxTimeOffset;
279            descendantTimeOffsetDifferencePunishment *= Parameters.TimeOffsetCoefficient;
280            punishmentContributionSum += Parameters.TimeOffsetCoefficient;
281            punishmentCoefficientsProduct *= (1 - Parameters.TimeOffsetCoefficient);
282          }
283          if (Parameters.ConsiderVariableIndex) {
284            if (Item1.DescendantVariableIndex != Item2.DescendantVariableIndex)
285              descendantVariableIndexDifferencePunishment = 1;
286            else
287              descendantVariableIndexDifferencePunishment = 0;
288            descendantVariableIndexDifferencePunishment *= Parameters.VariableIndexCoefficient;
289            punishmentContributionSum += Parameters.VariableIndexCoefficient;
290            punishmentCoefficientsProduct *= (1 - Parameters.VariableIndexCoefficient);
291          }
292        } else {
293          if (Parameters.ConsiderVariant) {
294            if (Item1.DescendantVariant != Item2.DescendantVariant)
295              descendantVariantDifferencePunishment = 1;
296            else
297              descendantVariantDifferencePunishment = 0;
298            descendantVariantDifferencePunishment *= Parameters.VariantCoefficient;
299            punishmentContributionSum += Parameters.VariantCoefficient;
300            punishmentCoefficientsProduct *= (1 - Parameters.VariantCoefficient);
301          }
302        }
303
304        double result;
305
306        if (Parameters.AdditiveSimilarityCalculation) {
307          double punishmentsSum =
308            ancestorCoefficientDifferencePunishment + ancestorTimeOffsetDifferencePunishment +
309            ancestorVariantDifferencePunishment + ancestorVariableIndexDifferencePunishment +
310            descendantCoefficientDifferencePunishment + descendantTimeOffsetDifferencePunishment +
311            descendantVariantDifferencePunishment + descendantVariableIndexDifferencePunishment +
312            ancestorIndexDifferencePunishment + levelDifferencePunishment;
313          result = (1 - punishmentsSum / punishmentContributionSum);
314        } else {
315          result =
316            (1 - ancestorCoefficientDifferencePunishment) *
317            (1 - ancestorTimeOffsetDifferencePunishment) *
318            (1 - ancestorVariantDifferencePunishment) *
319            (1 - ancestorVariableIndexDifferencePunishment) *
320            (1 - descendantCoefficientDifferencePunishment) *
321            (1 - descendantTimeOffsetDifferencePunishment) *
322            (1 - descendantVariantDifferencePunishment) *
323            (1 - descendantVariableIndexDifferencePunishment) *
324            (1 - ancestorIndexDifferencePunishment) *
325            (1 - levelDifferencePunishment);
326          // worst possible result is (1-punishmentCoefficientsProduct), so scale linearly to [0;1]:
327          result = (result - punishmentCoefficientsProduct) / (1 - punishmentCoefficientsProduct);
328        }
329
330        if (result < 0 || result > 1)
331          throw new InvalidOperationException("ERROR in GeneticInformationItem.Similarity: An invalid similarity value (" + result.ToString() + ") has been calculated.");
332
333        return result;
334
335      }*/
336
337      /*
338      public static List<GeneticInformationItem> GetGeneticInformationItems(SymbolicExpressionTreeNode Formula, int MinLevelDifference, int MaxLevelDifference) {
339        List<GeneticInformationItem> result = new List<GeneticInformationItem>();
340        List<SymbolicExpressionTreeNode> allNodes = new List<SymbolicExpressionTreeNode>();
341        allNodes.Add(Formula);
342        allNodes.AddRange(getDescendants(Formula));
343        foreach (SymbolicExpressionTreeNode node in allNodes) {
344          List<SymbolicExpressionTreeNode> allDescendants = new List<SymbolicExpressionTreeNode>();
345          allDescendants.Add(node);
346          allDescendants.AddRange(getDescendants(node));
347          foreach (SymbolicExpressionTreeNode descendant in allDescendants) {
348            GeneticInformationItem item = new GeneticInformationItem(node, descendant);
349            if (item.LevelDelta >= MinLevelDifference && item.LevelDelta <= MaxLevelDifference)
350              result.Add(item);
351          }
352        }
353        return result;
354      }*/
355
356      /*
357      public static List<SymbolicExpressionTreeNode> GetDescendants(SymbolicExpressionTreeNode node) {
358        List<SymbolicExpressionTreeNode> descendants = new List<SymbolicExpressionTreeNode>();
359        foreach (SymbolicExpressionTreeNode subTreeNode in node.SubTrees) {
360          AddDescendants(descendants, subTreeNode);
361        }
362        return descendants;
363      }
364      private static void AddDescendants(List<SymbolicExpressionTreeNode> list, SymbolicExpressionTreeNode node) {
365        list.Add(node);
366        foreach (SymbolicExpressionTreeNode subTreeNode in node.SubTrees) {
367          AddDescendants(list, subTreeNode);
368        }
369      }*/
370
371      public static IList<GeneticInformationItem> getGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames) {
372        // Idea: collect all descendants' lists and then add new items using the retrieved ones.
373        // This should save lots of time and reduce complexity of the items retrieval process.
374        if (node.Symbol is ProgramRootSymbol)
375          return getGeneticInformationItems(node.SubTrees[0], variableNames);
376        List<GeneticInformationItem> list = new List<GeneticInformationItem>();
377        // add item for this node:
378        list.Add(new GeneticInformationItem(node, variableNames));
379        // add items for the descendants:
380        for (int i = 0; i < node.SubTrees.Count; i++) {
381          IList<GeneticInformationItem> descendantItems = getGeneticInformationItems(node.SubTrees[i], variableNames);
382          list.AddRange(descendantItems);
383          foreach (GeneticInformationItem item in descendantItems) {
384            list.Add(new GeneticInformationItem(node, item, i));
385          }
386        }
387        return list;
388      }
389
390      private GeneticInformationItem (SymbolicExpressionTreeNode node, List<string> variableNames) {
391        myAncestorIndex = -1;
392        VariableTreeNode variableTreeNode = node as VariableTreeNode;
393        LaggedVariableTreeNode laggedVariableTreeNode = node as LaggedVariableTreeNode;
394        ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
395        myAncestorDefinition = node.Symbol.GetType();
396        myDescendantDefinition = myAncestorDefinition;
397        if (variableTreeNode != null)
398          myDescendantCoefficient = variableTreeNode.Weight;
399        else if (constantTreeNode != null)
400          myDescendantCoefficient = constantTreeNode.Value;
401        else
402          myDescendantCoefficient = double.NaN;
403        if (laggedVariableTreeNode != null)
404          myDescendantTimeOffset = laggedVariableTreeNode.Lag;
405        else
406          myDescendantTimeOffset = 0;
407        if (variableTreeNode != null)
408          myDescendantVariableIndex = variableNames.IndexOf(variableTreeNode.VariableName);
409        else
410          myDescendantVariableIndex = -1;
411        myLevelDelta = 0;
412      }
413
414      private GeneticInformationItem(SymbolicExpressionTreeNode parentNode, GeneticInformationItem descendantGeneticInformationItem, int ancestorIndex) {
415        myAncestorIndex = ancestorIndex;
416        myLevelDelta = descendantGeneticInformationItem.LevelDelta + 1;
417        myAncestorDefinition = parentNode.Symbol.GetType();
418        myDescendantCoefficient = descendantGeneticInformationItem.DescendantCoefficient;
419        myDescendantDefinition = descendantGeneticInformationItem.DescendantDefinition;
420        myDescendantTimeOffset = descendantGeneticInformationItem.DescendantTimeOffset;
421        myDescendantVariableIndex = descendantGeneticInformationItem.DescendantVariableIndex;
422      }
423
424      public override string ToString() {
425        return "ancestor: " + AncestorDefinition.Name.ToString() + ", [" + AncestorIndex + "], delta " + LevelDelta
426          + "; descendant: " + DescendantDefinition.Name.ToString() + " (varIndex " + DescendantVariableIndex + ", "
427          + DescendantCoefficient + ", t-" + DescendantTimeOffset + ")";
428      }
429
430    }
431
432    #endregion
433
434  }
435}
Note: See TracBrowser for help on using the repository browser.