Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis.PopulationDiversityAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FineGrainedStructuralPopulationDiversityAnalyzer.cs @ 4942

Last change on this file since 4942 was 4942, checked in by swinkler, 14 years ago

Worked on structural population diversity analysis (#1278): Added parameters for similarity calculations.

File size: 22.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using HeuristicLab.Analysis;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Operators;
29using HeuristicLab.Optimization;
30using HeuristicLab.Optimization.Operators;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis.Symbolic;
34using System.Collections.Generic;
35using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
36using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
37
38namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
39  /// <summary>
40  /// An operator that analyzes the population diversity using fine grained structural tree similarity estimation.
41  /// </summary>
42  [Item("FineGrainedStructuralPopulationDiversityAnalyzer", "An operator that analyzes the population diversity using fine grained structural tree similarity estimation.")]
43  [StorableClass]
44  public sealed class FineGrainedStructuralPopulationDiversityAnalyzer : SymbolicRegressionPopulationDiversityAnalyzer {
45
46    // properties: min level delts, max level delta, etc.
47
48    private const string FunctionTreeGrammarParameterName = "FunctionTreeGrammar";
49    private const string MinimumLevelDeltaParameterName = "MinimumLevelDelta";
50    private const string MaximumLevelDeltaParameterName = "MaximumLevelDelta";
51    private const string PreventMultipleComparisonContributionParameterName = "PreventMultipleComparisonContribution";
52
53    public IValueLookupParameter<GlobalSymbolicExpressionGrammar> FunctionTreeGrammarParameter {
54      get { return (IValueLookupParameter<GlobalSymbolicExpressionGrammar>)Parameters[FunctionTreeGrammarParameterName]; }
55    }
56    public GlobalSymbolicExpressionGrammar FunctionTreeGrammar {
57      get { return FunctionTreeGrammarParameter.ActualValue; }
58    }
59
60    public IValueParameter<IntValue> MinimumLevelDeltaParameter {
61      get { return (IValueParameter<IntValue>)Parameters[MinimumLevelDeltaParameterName]; }
62    }
63    public int MinimumLevelDelta {
64      get { return MinimumLevelDeltaParameter.Value.Value; }
65    }
66    public IValueParameter<IntValue> MaximumLevelDeltaParameter {
67      get { return (IValueParameter<IntValue>)Parameters[MaximumLevelDeltaParameterName]; }
68    }
69    public int MaximumLevelDelta {
70      get { return MaximumLevelDeltaParameter.Value.Value; }
71    }
72    public IValueParameter<BoolValue> PreventMultipleComparisonContributionParameter {
73      get { return (IValueParameter<BoolValue>)Parameters[PreventMultipleComparisonContributionParameterName]; }
74    }
75    public bool PreventMultipleComparisonContribution {
76      get { return PreventMultipleComparisonContributionParameter.Value.Value; }
77    }
78
79    [StorableConstructor]
80    private FineGrainedStructuralPopulationDiversityAnalyzer(bool deserializing) : base(deserializing) { }
81    private FineGrainedStructuralPopulationDiversityAnalyzer(FineGrainedStructuralPopulationDiversityAnalyzer original, Cloner cloner) : base(original, cloner) { }
82    public FineGrainedStructuralPopulationDiversityAnalyzer() : base() {
83      Parameters.Add(new ValueLookupParameter<GlobalSymbolicExpressionGrammar>(FunctionTreeGrammarParameterName, "The grammar that is used for symbolic regression models."));
84      Parameters.Add(new ValueParameter<IntValue>(MinimumLevelDeltaParameterName, "Minimum value for the level delta of the analyzed genetic information items.", new IntValue(0)));
85      Parameters.Add(new ValueParameter<IntValue>(MaximumLevelDeltaParameterName, "Maximum value for the level delta of the analyzed genetic information items.", new IntValue(int.MaxValue)));
86      Parameters.Add(new ValueParameter<BoolValue>(PreventMultipleComparisonContributionParameterName, "Flag that denotes whether genetic information items are hindered from contributing to the similarity function multiple times.", new BoolValue(false)));
87    }
88
89    public override IDeepCloneable Clone(Cloner cloner) {
90      return new FineGrainedStructuralPopulationDiversityAnalyzer(this, cloner);
91    }
92
93    protected override double[,] CalculateSimilarities(SymbolicExpressionTree[] solutions) {
94      double variableWeightSigma = 0;
95      double constantMinimumValue = 0;
96      double constantMaximumValue = 0;
97      int minimumTimeOffset = 0;
98      int maximumTimeOffset = 0;
99      foreach (Symbol symbol in FunctionTreeGrammar.Symbols) {
100        Constant constant = symbol as Constant;
101        if (constant !=null) {
102          constantMinimumValue = constant.MinValue;
103          constantMaximumValue = constant.MaxValue;
104        }
105        DataAnalysis.Symbolic.Symbols.Variable variable = symbol as DataAnalysis.Symbolic.Symbols.Variable;
106        if (variable != null)
107          variableWeightSigma = variable.WeightSigma;
108        LaggedVariable laggedVariable = symbol as LaggedVariable;
109        if (laggedVariable !=null) {
110          minimumTimeOffset = laggedVariable.MinLag;
111          maximumTimeOffset = laggedVariable.MaxLag;
112        }
113      }
114      int n = solutions.Length;
115      List<string> variableNames = new List<string>();
116      foreach (StringValue variableName in ProblemData.InputVariables) {
117        variableNames.Add(variableName.Value);
118      }
119      variableNames.Add(ProblemData.TargetVariable.Value);
120      IList<GeneticInformationItem>[] geneticInformationItemsLists = new List<GeneticInformationItem>[n];
121      for (int i = 0; i < n; i++) {
122        geneticInformationItemsLists[i] = GeneticInformationItem.getGeneticInformationItems(solutions[i].Root, variableNames, MinimumLevelDelta, MaximumLevelDelta);
123      }
124      double[,] result = new double[n, n];
125      for (int i = 0; i < n; i++) {
126        for (int j = 0; j < n; j++) {
127          if (i == j)
128            result[i, j] = 1;
129          else {
130            IList<GeneticInformationItem> solution1GeneticItems = GeneticInformationItem.CopyList(geneticInformationItemsLists[i]);
131            IList<GeneticInformationItem> solution2GeneticItems = GeneticInformationItem.CopyList(geneticInformationItemsLists[j]);
132            double similarity = 0;
133            for (int k = 0; k < solution1GeneticItems.Count; k++) {
134              double bestPendantSimilarity;
135              GeneticInformationItem bestPendant = GeneticInformationItem.FindBestPendant(solution1GeneticItems[k], solution2GeneticItems,
136                constantMinimumValue, constantMaximumValue, variableWeightSigma,
137                /* TODO: */ 100, 10, 1, 1, 1, 1, 1, 1, true, out bestPendantSimilarity);
138              if (bestPendant != null) {
139                similarity += bestPendantSimilarity;
140                if (PreventMultipleComparisonContribution)
141                  solution2GeneticItems.Remove(bestPendant);
142              }
143            }
144            result[i, j] = similarity / solution1GeneticItems.Count;
145          }
146        }
147      }
148      return result;
149    }
150
151    #region private class GeneticInformationItem
152
153    private class GeneticInformationItem {
154
155      private Type myAncestorDefinition;
156      public Type AncestorDefinition {
157        get { return myAncestorDefinition; }
158      }
159
160      private int myAncestorIndex;
161      public int AncestorIndex {
162        get { return myAncestorIndex; }
163      }
164
165      private Type myDescendantDefinition;
166      public Type DescendantDefinition {
167        get { return myDescendantDefinition; }
168      }
169
170      private int myAncestorLevel;
171      public int AncestorLevel {
172        get { return myAncestorLevel; }
173      }
174
175      private double myDescendantCoefficient;
176      public double DescendantCoefficient {
177        get { return myDescendantCoefficient; }
178      }
179
180      private double myDescendantVariableIndex;
181      public double DescendantVariableIndex {
182        get { return myDescendantVariableIndex; }
183      }
184
185      private int myDescendantTimeOffset;
186      public int DescendantTimeOffset {
187        get { return myDescendantTimeOffset; }
188      }
189
190      private SymbolicExpressionTreeNode myDescendantTreeNode;
191      public SymbolicExpressionTreeNode DescendantTreeNode {
192        get { return myDescendantTreeNode; }
193      }
194
195      private int myDescendantLevel;
196      public int DescendantLevel {
197        get { return myDescendantLevel; }
198      }
199
200      public int LevelDelta {
201        get { return (myDescendantLevel - myAncestorLevel); }
202      }
203
204      public static IList<GeneticInformationItem> CopyList(IList<GeneticInformationItem> GeneticInformationItemsList) {
205        List<GeneticInformationItem> list = new List<GeneticInformationItem>(GeneticInformationItemsList.Count);
206        list.AddRange(GeneticInformationItemsList);
207        return list;
208      }
209
210      public static GeneticInformationItem FindBestPendant(GeneticInformationItem Item, IList<GeneticInformationItem> ComparisonItems,
211          double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
212          int MaximumTreeHeight, int MaximumTimeOffset,
213          double LevelDifferenceCoefficient, double AncestorIndexCoefficient,
214          double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient,
215          bool AdditiveSimilarityCalculation,
216          out double BestPendantSimilarity) {
217        int maxSimilarityIndex = -1;
218        double similarity, maxSimilarity = -double.MaxValue;
219        for (int i = 0; i < ComparisonItems.Count; i++) {
220          similarity = Similarity(Item, ComparisonItems[i], ConstantMinimum, ConstantMaximum, VariableWeightSigma, MaximumTreeHeight, MaximumTimeOffset,
221            LevelDifferenceCoefficient, AncestorIndexCoefficient, ConstantValueCoefficient, VariableWeightSigma, TimeOffsetCoefficient,
222            VariableWeightCoefficient, AdditiveSimilarityCalculation);
223          if (!double.IsNaN(similarity) && similarity > maxSimilarity) {
224            maxSimilarity = similarity;
225            maxSimilarityIndex = i;
226          }
227        }
228        BestPendantSimilarity = maxSimilarity;
229        if (maxSimilarityIndex >= 0)
230          return ComparisonItems[maxSimilarityIndex];
231        else
232          return null;
233      }
234
235      public static double Similarity(GeneticInformationItem Item1, GeneticInformationItem Item2,
236          double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
237          int MaximumTreeHeight, int MaximumTimeOffset,
238          double LevelDifferenceCoefficient, double AncestorIndexCoefficient,
239          double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient,
240          bool AdditiveSimilarityCalculation) {
241
242        if (Item1.AncestorDefinition != Item2.AncestorDefinition || Item1.DescendantDefinition != Item2.DescendantDefinition)
243          return double.NaN;
244
245        // the two items for sure have the same behavior definitions
246
247        #region initialize punishments
248
249        double punishmentContributionSum = 0;
250        double punishmentCoefficientsProduct = 1;
251
252        double ancestorIndexDifferencePunishment = 0;
253        double levelDifferencePunishment = 0;
254
255        double descendantConstantValueDifferencePunishment = 0;
256        double descendantVariableWeightDifferencePunishment = 0;
257        double descendantTimeOffsetDifferencePunishment = 0;
258        double descendantVariableIndexDifferencePunishment = 0;
259
260        #endregion
261
262        if (LevelDifferenceCoefficient > 0) {
263          levelDifferencePunishment = Item1.LevelDelta - Item2.LevelDelta;
264          if (levelDifferencePunishment < 0)
265            levelDifferencePunishment = -levelDifferencePunishment;
266          levelDifferencePunishment /= MaximumTreeHeight;
267          if (levelDifferencePunishment > 1)
268            levelDifferencePunishment = 1;
269          levelDifferencePunishment *= LevelDifferenceCoefficient;
270          punishmentContributionSum += LevelDifferenceCoefficient;
271          punishmentCoefficientsProduct *= (1 - LevelDifferenceCoefficient);
272        }
273        if (AncestorIndexCoefficient > 0) {
274          if (Item1.AncestorIndex != Item2.AncestorIndex)
275            ancestorIndexDifferencePunishment = 1;
276          else
277            ancestorIndexDifferencePunishment = 0;
278          ancestorIndexDifferencePunishment *= AncestorIndexCoefficient;
279          punishmentContributionSum += AncestorIndexCoefficient;
280          punishmentCoefficientsProduct *= (1 - AncestorIndexCoefficient);
281        }
282
283        if (Item1.DescendantTreeNode is ConstantTreeNode) {
284          if (ConstantValueCoefficient > 0) {
285            double constantValueCoefficientDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
286            // assume uniform distribution within given minimum and maximum
287            descendantConstantValueDifferencePunishment = (constantValueCoefficientDifference / (ConstantMaximum - ConstantMinimum));
288            if (descendantConstantValueDifferencePunishment > 1)
289              descendantConstantValueDifferencePunishment = 1;
290            descendantConstantValueDifferencePunishment *= ConstantValueCoefficient;
291            punishmentContributionSum += ConstantValueCoefficient;
292            punishmentCoefficientsProduct *= (1 - ConstantValueCoefficient);
293          }
294        }
295        if(Item1.DescendantTreeNode is VariableTreeNode) {
296          if (VariableWeightCoefficient > 0) {
297            double variableWeightDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
298            // assume normal distribution within given sigma
299            descendantVariableWeightDifferencePunishment = variableWeightDifference / (VariableWeightSigma * 4);
300            if (descendantVariableWeightDifferencePunishment > 1)
301              descendantVariableWeightDifferencePunishment = 1;
302            descendantVariableWeightDifferencePunishment *= VariableWeightCoefficient;
303            punishmentContributionSum += VariableWeightCoefficient;
304            punishmentCoefficientsProduct *= (1 - VariableWeightCoefficient);
305          }
306          if (TimeOffsetCoefficient > 0) {
307            double timeOffsetDifference = Math.Abs(Item1.DescendantTimeOffset - Item2.DescendantTimeOffset);
308            if (MaximumTimeOffset > 0)
309              descendantTimeOffsetDifferencePunishment = timeOffsetDifference / MaximumTimeOffset;
310            descendantTimeOffsetDifferencePunishment *= TimeOffsetCoefficient;
311            punishmentContributionSum += TimeOffsetCoefficient;
312            punishmentCoefficientsProduct *= (1 - TimeOffsetCoefficient);
313          }
314          if (VariableIndexCoefficient > 0) {
315            if (Item1.DescendantVariableIndex != Item2.DescendantVariableIndex)
316              descendantVariableIndexDifferencePunishment = 1;
317            else
318              descendantVariableIndexDifferencePunishment = 0;
319            descendantVariableIndexDifferencePunishment *= VariableIndexCoefficient;
320            punishmentContributionSum += VariableIndexCoefficient;
321            punishmentCoefficientsProduct *= (1 - VariableIndexCoefficient);
322          }
323        }
324
325        double result;
326
327        if (AdditiveSimilarityCalculation) {
328          double punishmentsSum = ancestorIndexDifferencePunishment + levelDifferencePunishment +
329            descendantConstantValueDifferencePunishment + descendantVariableWeightDifferencePunishment +
330            descendantTimeOffsetDifferencePunishment + descendantVariableIndexDifferencePunishment;
331          result = (1 - punishmentsSum / punishmentContributionSum);
332        } else {
333          result =
334            (1 - ancestorIndexDifferencePunishment) *
335            (1 - levelDifferencePunishment) *
336            (1 - descendantConstantValueDifferencePunishment) *
337            (1 - descendantVariableWeightDifferencePunishment) *
338            (1 - descendantTimeOffsetDifferencePunishment) *
339            (1 - descendantVariableIndexDifferencePunishment);
340          // worst possible result is (1-punishmentCoefficientsProduct), so scale linearly to [0;1]:
341          result = (result - punishmentCoefficientsProduct) / (1 - punishmentCoefficientsProduct);
342        }
343
344        if (result < 0 || result > 1)
345          throw new InvalidOperationException("ERROR in GeneticInformationItem.Similarity: An invalid similarity value (" + result.ToString() + ") has been calculated.");
346
347        return result;
348
349      }
350
351      public static IList<GeneticInformationItem> getGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames,
352          int MinimumLevelDelta, int MaximumLevelDelta) {
353        // first we have to collect all items, then we filter; it is not possible to filter while collecting because the items are
354        // collected recursively and used for collecting the parents' items.
355        if (MinimumLevelDelta > MaximumLevelDelta)
356          return new List<GeneticInformationItem>();
357        IList<GeneticInformationItem> list = getGeneticInformationItems(node, variableNames, 0);
358        List<GeneticInformationItem> resultList = new List<GeneticInformationItem>();
359        foreach (GeneticInformationItem item in list)
360          if (item.LevelDelta >= MinimumLevelDelta && item.LevelDelta <= MaximumLevelDelta)
361            resultList.Add(item);
362        return resultList;
363      }
364
365      private static IList<GeneticInformationItem> getGeneticInformationItems(SymbolicExpressionTreeNode node, List<string> variableNames, int level) {
366        // Idea: collect all descendants' lists and then add new items using the retrieved ones.
367        // This should save lots of time and reduce complexity of the items retrieval process.
368        // Program roots are not considered, neither are start symbol nodes
369        if (node.Symbol is ProgramRootSymbol)
370          return getGeneticInformationItems(node.SubTrees[0], variableNames, level + 1);
371        List<GeneticInformationItem> list = new List<GeneticInformationItem>();
372        // add item for this node:
373        if (!(node.Symbol is StartSymbol)) {
374          list.Add(new GeneticInformationItem(node, variableNames, level));
375        }
376        // add items for the descendants, but prevent multiple references to descendant nodes:
377        List<SymbolicExpressionTreeNode> descendantNodes = new List<SymbolicExpressionTreeNode>();
378        for (int i = 0; i < node.SubTrees.Count; i++) {
379          IList<GeneticInformationItem> descendantItems = getGeneticInformationItems(node.SubTrees[i], variableNames, level + 1);
380          list.AddRange(descendantItems);
381          if (!(node.Symbol is StartSymbol))
382            foreach (GeneticInformationItem item in descendantItems) {
383              if (!descendantNodes.Contains(item.DescendantTreeNode)) {
384                list.Add(new GeneticInformationItem(node, item, i, level));
385                descendantNodes.Add(item.DescendantTreeNode);
386              }
387            }
388        }
389        return list;
390      }
391
392      private GeneticInformationItem (SymbolicExpressionTreeNode node, List<string> variableNames, int level) {
393        myAncestorIndex = -1;
394        VariableTreeNode variableTreeNode = node as VariableTreeNode;
395        LaggedVariableTreeNode laggedVariableTreeNode = node as LaggedVariableTreeNode;
396        ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
397        myAncestorDefinition = node.Symbol.GetType();
398        myDescendantDefinition = myAncestorDefinition;
399        if (variableTreeNode != null)
400          myDescendantCoefficient = variableTreeNode.Weight;
401        else if (constantTreeNode != null)
402          myDescendantCoefficient = constantTreeNode.Value;
403        else
404          myDescendantCoefficient = double.NaN;
405        if (laggedVariableTreeNode != null)
406          myDescendantTimeOffset = laggedVariableTreeNode.Lag;
407        else
408          myDescendantTimeOffset = 0;
409        if (variableTreeNode != null)
410          myDescendantVariableIndex = variableNames.IndexOf(variableTreeNode.VariableName);
411        else
412          myDescendantVariableIndex = -1;
413        myAncestorLevel = level;
414        myDescendantLevel = level;
415        myDescendantTreeNode = node;
416      }
417
418      private GeneticInformationItem(SymbolicExpressionTreeNode parentNode, GeneticInformationItem descendantGeneticInformationItem,
419          int ancestorIndex, int parentNodeLevel) {
420        myAncestorIndex = ancestorIndex;
421        myAncestorLevel = parentNodeLevel;
422        myAncestorDefinition = parentNode.Symbol.GetType();
423        myDescendantCoefficient = descendantGeneticInformationItem.DescendantCoefficient;
424        myDescendantDefinition = descendantGeneticInformationItem.DescendantDefinition;
425        myDescendantTimeOffset = descendantGeneticInformationItem.DescendantTimeOffset;
426        myDescendantVariableIndex = descendantGeneticInformationItem.DescendantVariableIndex;
427        myDescendantLevel = descendantGeneticInformationItem.DescendantLevel;
428        myDescendantTreeNode = descendantGeneticInformationItem.DescendantTreeNode;
429      }
430
431      public override string ToString() {
432        return "ancestor: " + AncestorDefinition.Name.ToString() + ", [" + AncestorIndex + "]; "
433          + "descendant: " + DescendantDefinition.Name.ToString() + " (varIndex " + DescendantVariableIndex + ", "
434          + DescendantCoefficient + ", t-" + DescendantTimeOffset + ");"
435          + " level delta = " + DescendantLevel + "-" + AncestorLevel + " = " + LevelDelta;
436      }
437
438    }
439
440    #endregion
441
442  }
443}
Note: See TracBrowser for help on using the repository browser.