Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FineGrainedStructuralPopulationDiversityAnalyzer.cs @ 4941

Last change on this file since 4941 was 4941, checked in by swinkler, 13 years ago

Worked on structural population diversity analysis (#1278): Implemented calculation of structural similarity of symbolic expression trees.

File size: 20.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using HeuristicLab.Analysis;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Optimization.Operators;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34using System.Collections.Generic;
35using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
36using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
37
38namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
39  /// <summary>
40  /// An operator that analyzes the population diversity using fine grained structural tree similarity estimation.
41  /// </summary>
42  [Item("FineGrainedStructuralPopulationDiversityAnalyzer", "An operator that analyzes the population diversity using fine grained structural tree similarity estimation.")]
43  [StorableClass]
44  public sealed class FineGrainedStructuralPopulationDiversityAnalyzer : SymbolicRegressionPopulationDiversityAnalyzer {
45
46    // properties: min level delts, max level delta, etc.
47
48    private const string FunctionTreeGrammarParameterName = "FunctionTreeGrammar";
49
50    public IValueLookupParameter<GlobalSymbolicExpressionGrammar> FunctionTreeGrammarParameter {
51      get { return (IValueLookupParameter<GlobalSymbolicExpressionGrammar>)Parameters[FunctionTreeGrammarParameterName]; }
52    }
53    public GlobalSymbolicExpressionGrammar FunctionTreeGrammar {
54      get { return FunctionTreeGrammarParameter.ActualValue; }
55    }
56
57
58    [StorableConstructor]
59    private FineGrainedStructuralPopulationDiversityAnalyzer(bool deserializing) : base(deserializing) { }
60    private FineGrainedStructuralPopulationDiversityAnalyzer(FineGrainedStructuralPopulationDiversityAnalyzer original, Cloner cloner) : base(original, cloner) { }
61    public FineGrainedStructuralPopulationDiversityAnalyzer() : base() {
62      Parameters.Add(new ValueLookupParameter<GlobalSymbolicExpressionGrammar>(FunctionTreeGrammarParameterName, "The grammar that is used for symbolic regression models."));
63    }
64
65    public override IDeepCloneable Clone(Cloner cloner) {
66      return new FineGrainedStructuralPopulationDiversityAnalyzer(this, cloner);
67    }
68
69    protected override double[,] CalculateSimilarities(SymbolicExpressionTree[] solutions) {
70      Constant constant = null;
71      foreach (Symbol symbol in FunctionTreeGrammar.Symbols) {
72        if (symbol is Constant) {
73          if (constant != null) throw new InvalidProgramException("There are more and one constnat definitions in the given function tree grammar.");
74          constant = (Constant)symbol;
75        }
76      }
77      int n = solutions.Length;
78      List<string> variableNames = new List<string>();
79      foreach (StringValue variableName in ProblemData.InputVariables) {
80        variableNames.Add(variableName.Value);
81      }
82      variableNames.Add(ProblemData.TargetVariable.Value);
83      IList<GeneticInformationItem>[] geneticInformationItemsLists = new List<GeneticInformationItem>[n];
84      for (int i = 0; i < n; i++) {
85        // geneticInformationItemsList[i] = GeneticInformationItem.getGeneticInformationItems(solutions[i].Root, variableNames, MinimumLEvelDelta, MaximumLevelDelta);
86        geneticInformationItemsLists[i] = GeneticInformationItem.getGeneticInformationItems(solutions[i].Root, variableNames, 0, int.MaxValue);
87      }
88      double[,] result = new double[n, n];
89      for (int i = 0; i < n; i++) {
90        for (int j = 0; j < n; j++) {
91          if (i == j)
92            result[i, j] = 1;
93          else {
94            IList<GeneticInformationItem> solution1GeneticItems = GeneticInformationItem.CopyList(geneticInformationItemsLists[i]);
95            IList<GeneticInformationItem> solution2GeneticItems = GeneticInformationItem.CopyList(geneticInformationItemsLists[j]);
96            double similarity = 0;
97            for (int k = 0; k < solution1GeneticItems.Count; k++) {
98              double bestPendantSimilarity;
99              // GeneticInformationItem bestPendant = GeneticInformationItem.FindBestPendant(solution1GeneticItems[k],
100              //   ConstantMinimum, ConstantMaximum, VariableWeightSigma, MaximumTreeHeight, MaximumTimeOffset,
101              //   LevelDifferenceCoefficient, AncestorIndexCoefficient, ConstantValueCoefficient, VariableWeightSigma, TimeOffsetCoefficient,
102              //   VariableWeightCoefficient, AdditiveSimilarityCalculation, out bestPendantSimilarity);
103              GeneticInformationItem bestPendant = GeneticInformationItem.FindBestPendant(solution1GeneticItems[k], solution2GeneticItems,
104                -10, 10, 1, 100, 10,
105                1, 1, 1, 1, 1, 1, true, out bestPendantSimilarity);
106              if (bestPendant != null) {
107                similarity += bestPendantSimilarity;
108                // if (PreventMultipleComparisonContribution)
109                if (true)
110                  solution2GeneticItems.Remove(bestPendant);
111              }
112            }
113            result[i, j] = similarity / solution1GeneticItems.Count;
114          }
115        }
116      }
117      return result;
118    }
119
120
121    #region private class GeneticInformationItem
122
123    private class GeneticInformationItem {
124
125      private Type myAncestorDefinition;
126      public Type AncestorDefinition {
127        get { return myAncestorDefinition; }
128      }
129
130      private int myAncestorIndex;
131      public int AncestorIndex {
132        get { return myAncestorIndex; }
133      }
134
135      private Type myDescendantDefinition;
136      public Type DescendantDefinition {
137        get { return myDescendantDefinition; }
138      }
139
140      private int myAncestorLevel;
141      public int AncestorLevel {
142        get { return myAncestorLevel; }
143      }
144
145      private double myDescendantCoefficient;
146      public double DescendantCoefficient {
147        get { return myDescendantCoefficient; }
148      }
149
150      private double myDescendantVariableIndex;
151      public double DescendantVariableIndex {
152        get { return myDescendantVariableIndex; }
153      }
154
155      private int myDescendantTimeOffset;
156      public int DescendantTimeOffset {
157        get { return myDescendantTimeOffset; }
158      }
159
160      private SymbolicExpressionTreeNode myDescendantTreeNode;
161      public SymbolicExpressionTreeNode DescendantTreeNode {
162        get { return myDescendantTreeNode; }
163      }
164
165      private int myDescendantLevel;
166      public int DescendantLevel {
167        get { return myDescendantLevel; }
168      }
169
170      public int LevelDelta {
171        get { return (myDescendantLevel - myAncestorLevel); }
172      }
173
174      public static IList<GeneticInformationItem> CopyList(IList<GeneticInformationItem> GeneticInformationItemsList) {
175        List<GeneticInformationItem> list = new List<GeneticInformationItem>(GeneticInformationItemsList.Count);
176        list.AddRange(GeneticInformationItemsList);
177        return list;
178      }
179
180      public static GeneticInformationItem FindBestPendant(GeneticInformationItem Item, IList<GeneticInformationItem> ComparisonItems,
181          double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
182          int MaximumTreeHeight, int MaximumTimeOffset,
183          double LevelDifferenceCoefficient, double AncestorIndexCoefficient,
184          double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient,
185          bool AdditiveSimilarityCalculation,
186          out double BestPendantSimilarity) {
187        int maxSimilarityIndex = -1;
188        double similarity, maxSimilarity = -double.MaxValue;
189        for (int i = 0; i < ComparisonItems.Count; i++) {
190          similarity = Similarity(Item, ComparisonItems[i], ConstantMinimum, ConstantMaximum, VariableWeightSigma, MaximumTreeHeight, MaximumTimeOffset,
191            LevelDifferenceCoefficient, AncestorIndexCoefficient, ConstantValueCoefficient, VariableWeightSigma, TimeOffsetCoefficient,
192            VariableWeightCoefficient, AdditiveSimilarityCalculation);
193          if (!double.IsNaN(similarity) && similarity > maxSimilarity) {
194            maxSimilarity = similarity;
195            maxSimilarityIndex = i;
196          }
197        }
198        BestPendantSimilarity = maxSimilarity;
199        if (maxSimilarityIndex >= 0)
200          return ComparisonItems[maxSimilarityIndex];
201        else
202          return null;
203      }
204
205      public static double Similarity(GeneticInformationItem Item1, GeneticInformationItem Item2,
206          double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
207          int MaximumTreeHeight, int MaximumTimeOffset,
208          double LevelDifferenceCoefficient, double AncestorIndexCoefficient,
209          double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient,
210          bool AdditiveSimilarityCalculation) {
211
212        if (Item1.AncestorDefinition != Item2.AncestorDefinition || Item1.DescendantDefinition != Item2.DescendantDefinition)
213          return double.NaN;
214
215        // the two items for sure have the same behavior definitions
216
217        #region initialize punishments
218
219        double punishmentContributionSum = 0;
220        double punishmentCoefficientsProduct = 1;
221
222        double ancestorIndexDifferencePunishment = 0;
223        double levelDifferencePunishment = 0;
224
225        double descendantConstantValueDifferencePunishment = 0;
226        double descendantVariableWeightDifferencePunishment = 0;
227        double descendantTimeOffsetDifferencePunishment = 0;
228        double descendantVariableIndexDifferencePunishment = 0;
229
230        #endregion
231
232        if (LevelDifferenceCoefficient > 0) {
233          levelDifferencePunishment = Item1.LevelDelta - Item2.LevelDelta;
234          if (levelDifferencePunishment < 0)
235            levelDifferencePunishment = -levelDifferencePunishment;
236          levelDifferencePunishment /= MaximumTreeHeight;
237          if (levelDifferencePunishment > 1)
238            levelDifferencePunishment = 1;
239          levelDifferencePunishment *= LevelDifferenceCoefficient;
240          punishmentContributionSum += LevelDifferenceCoefficient;
241          punishmentCoefficientsProduct *= (1 - LevelDifferenceCoefficient);
242        }
243        if (AncestorIndexCoefficient > 0) {
244          if (Item1.AncestorIndex != Item2.AncestorIndex)
245            ancestorIndexDifferencePunishment = 1;
246          else
247            ancestorIndexDifferencePunishment = 0;
248          ancestorIndexDifferencePunishment *= AncestorIndexCoefficient;
249          punishmentContributionSum += AncestorIndexCoefficient;
250          punishmentCoefficientsProduct *= (1 - AncestorIndexCoefficient);
251        }
252
253        if (Item1.DescendantTreeNode is ConstantTreeNode) {
254          if (ConstantValueCoefficient > 0) {
255            double constantValueCoefficientDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
256            // assume uniform distribution within given minimum and maximum
257            descendantConstantValueDifferencePunishment = (constantValueCoefficientDifference / (ConstantMaximum - ConstantMinimum));
258            if (descendantConstantValueDifferencePunishment > 1)
259              descendantConstantValueDifferencePunishment = 1;
260            descendantConstantValueDifferencePunishment *= ConstantValueCoefficient;
261            punishmentContributionSum += ConstantValueCoefficient;
262            punishmentCoefficientsProduct *= (1 - ConstantValueCoefficient);
263          }
264        }
265        if(Item1.DescendantTreeNode is VariableTreeNode) {
266          if (VariableWeightCoefficient > 0) {
267            double variableWeightDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
268            // assume normal distribution within given sigma
269            descendantVariableWeightDifferencePunishment = variableWeightDifference / (VariableWeightSigma * 4);
270            if (descendantVariableWeightDifferencePunishment > 1)
271              descendantVariableWeightDifferencePunishment = 1;
272            descendantVariableWeightDifferencePunishment *= VariableWeightCoefficient;
273            punishmentContributionSum += VariableWeightCoefficient;
274            punishmentCoefficientsProduct *= (1 - VariableWeightCoefficient);
275          }
276          if (TimeOffsetCoefficient > 0) {
277            double timeOffsetDifference = Math.Abs(Item1.DescendantTimeOffset - Item2.DescendantTimeOffset);
278            if (MaximumTimeOffset > 0)
279              descendantTimeOffsetDifferencePunishment = timeOffsetDifference / MaximumTimeOffset;
280            descendantTimeOffsetDifferencePunishment *= TimeOffsetCoefficient;
281            punishmentContributionSum += TimeOffsetCoefficient;
282            punishmentCoefficientsProduct *= (1 - TimeOffsetCoefficient);
283          }
284          if (VariableIndexCoefficient > 0) {
285            if (Item1.DescendantVariableIndex != Item2.DescendantVariableIndex)
286              descendantVariableIndexDifferencePunishment = 1;
287            else
288              descendantVariableIndexDifferencePunishment = 0;
289            descendantVariableIndexDifferencePunishment *= VariableIndexCoefficient;
290            punishmentContributionSum += VariableIndexCoefficient;
291            punishmentCoefficientsProduct *= (1 - VariableIndexCoefficient);
292          }
293        }
294
295        double result;
296
297        if (AdditiveSimilarityCalculation) {
298          double punishmentsSum = ancestorIndexDifferencePunishment + levelDifferencePunishment +
299            descendantConstantValueDifferencePunishment + descendantVariableWeightDifferencePunishment +
300            descendantTimeOffsetDifferencePunishment + descendantVariableIndexDifferencePunishment;
301          result = (1 - punishmentsSum / punishmentContributionSum);
302        } else {
303          result =
304            (1 - ancestorIndexDifferencePunishment) *
305            (1 - levelDifferencePunishment) *
306            (1 - descendantConstantValueDifferencePunishment) *
307            (1 - descendantVariableWeightDifferencePunishment) *
308            (1 - descendantTimeOffsetDifferencePunishment) *
309            (1 - descendantVariableIndexDifferencePunishment);
310          // worst possible result is (1-punishmentCoefficientsProduct), so scale linearly to [0;1]:
311          result = (result - punishmentCoefficientsProduct) / (1 - punishmentCoefficientsProduct);
312        }
313
314        if (result < 0 || result > 1)
315          throw new InvalidOperationException("ERROR in GeneticInformationItem.Similarity: An invalid similarity value (" + result.ToString() + ") has been calculated.");
316
317        return result;
318
319      }
320
321      public static IList<GeneticInformationItem> getGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames,
322          int MinimumLevelDelta, int MaximumLevelDelta) {
323        // first we have to collect all items, then we filter; it is not possible to filter while collecting because the items are
324        // collected recursively and used for collecting the parents' items.
325        if (MinimumLevelDelta > MaximumLevelDelta)
326          return new List<GeneticInformationItem>();
327        IList<GeneticInformationItem> list = getGeneticInformationItems(node, variableNames, 0);
328        List<GeneticInformationItem> resultList = new List<GeneticInformationItem>();
329        foreach (GeneticInformationItem item in list)
330          if (item.LevelDelta >= MinimumLevelDelta && item.LevelDelta <= MaximumLevelDelta)
331            resultList.Add(item);
332        return resultList;
333      }
334
335      private static IList<GeneticInformationItem> getGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames, int level) {
336        // Idea: collect all descendants' lists and then add new items using the retrieved ones.
337        // This should save lots of time and reduce complexity of the items retrieval process.
338        // Program roots are not considered, neither are start symbol nodes
339        if (node.Symbol is ProgramRootSymbol)
340          return getGeneticInformationItems(node.SubTrees[0], variableNames, level + 1);
341        List<GeneticInformationItem> list = new List<GeneticInformationItem>();
342        // add item for this node:
343        if (!(node.Symbol is StartSymbol)) {
344          list.Add(new GeneticInformationItem(node, variableNames, level));
345        }
346        // add items for the descendants, but prevent multiple references to descendant nodes:
347        List<SymbolicExpressionTreeNode> descendantNodes = new List<SymbolicExpressionTreeNode>();
348        for (int i = 0; i < node.SubTrees.Count; i++) {
349          IList<GeneticInformationItem> descendantItems = getGeneticInformationItems(node.SubTrees[i], variableNames, level + 1);
350          list.AddRange(descendantItems);
351          if (!(node.Symbol is StartSymbol))
352            foreach (GeneticInformationItem item in descendantItems) {
353              if (!descendantNodes.Contains(item.DescendantTreeNode)) {
354                list.Add(new GeneticInformationItem(node, item, i, level));
355                descendantNodes.Add(item.DescendantTreeNode);
356              }
357            }
358        }
359        return list;
360      }
361
362      private GeneticInformationItem (SymbolicExpressionTreeNode node, List<string> variableNames, int level) {
363        myAncestorIndex = -1;
364        VariableTreeNode variableTreeNode = node as VariableTreeNode;
365        LaggedVariableTreeNode laggedVariableTreeNode = node as LaggedVariableTreeNode;
366        ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
367        myAncestorDefinition = node.Symbol.GetType();
368        myDescendantDefinition = myAncestorDefinition;
369        if (variableTreeNode != null)
370          myDescendantCoefficient = variableTreeNode.Weight;
371        else if (constantTreeNode != null)
372          myDescendantCoefficient = constantTreeNode.Value;
373        else
374          myDescendantCoefficient = double.NaN;
375        if (laggedVariableTreeNode != null)
376          myDescendantTimeOffset = laggedVariableTreeNode.Lag;
377        else
378          myDescendantTimeOffset = 0;
379        if (variableTreeNode != null)
380          myDescendantVariableIndex = variableNames.IndexOf(variableTreeNode.VariableName);
381        else
382          myDescendantVariableIndex = -1;
383        myAncestorLevel = level;
384        myDescendantLevel = level;
385        myDescendantTreeNode = node;
386      }
387
388      private GeneticInformationItem(SymbolicExpressionTreeNode parentNode, GeneticInformationItem descendantGeneticInformationItem,
389          int ancestorIndex, int parentNodeLevel) {
390        myAncestorIndex = ancestorIndex;
391        myAncestorLevel = parentNodeLevel;
392        myAncestorDefinition = parentNode.Symbol.GetType();
393        myDescendantCoefficient = descendantGeneticInformationItem.DescendantCoefficient;
394        myDescendantDefinition = descendantGeneticInformationItem.DescendantDefinition;
395        myDescendantTimeOffset = descendantGeneticInformationItem.DescendantTimeOffset;
396        myDescendantVariableIndex = descendantGeneticInformationItem.DescendantVariableIndex;
397        myDescendantLevel = descendantGeneticInformationItem.DescendantLevel;
398        myDescendantTreeNode = descendantGeneticInformationItem.DescendantTreeNode;
399      }
400
401      public override string ToString() {
402        return "ancestor: " + AncestorDefinition.Name.ToString() + ", [" + AncestorIndex + "]; "
403          + "descendant: " + DescendantDefinition.Name.ToString() + " (varIndex " + DescendantVariableIndex + ", "
404          + DescendantCoefficient + ", t-" + DescendantTimeOffset + ");"
405          + " level delta = " + DescendantLevel + "-" + AncestorLevel + " = " + LevelDelta;
406      }
407
408    }
409
410    #endregion
411
412  }
413}
Note: See TracBrowser for help on using the repository browser.