Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeSimilarity.cs @ 9423

Last change on this file since 9423 was 9423, checked in by bburlacu, 12 years ago

#1772: Implemented GeneticItem-based similarity measure. Renamed ISymbolicExpressionTreeNodeComparer to ISymbolicExpressionTreeNodeSimilarityComparer.

File size: 14.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [StorableClass]
35  public class SymbolicDataAnalysisExpressionTreeSimilarityCalculator : SingleSuccessorOperator {
36    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
37    private const string CurrentSymbolicExpressionTreeParameterName = "CurrentSymbolicExpressionTree";
38    private const string SimilarityValuesParmeterName = "Similarity";
39    // comparer parameters
40    private const string MatchVariablesParameterName = "MatchVariableNames";
41    private const string MatchVariableWeightsParameterName = "MatchVariableWeights";
42    private const string MatchConstantValuesParameterName = "MatchConstantValues";
43
44
45    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
46      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
47    }
48    public IValueParameter<ISymbolicExpressionTree> CurrentSymbolicExpressionTreeParameter {
49      get { return (IValueParameter<ISymbolicExpressionTree>)Parameters[CurrentSymbolicExpressionTreeParameterName]; }
50    }
51    public ILookupParameter<BoolValue> MatchVariableNamesParameter {
52      get { return (ILookupParameter<BoolValue>)Parameters[MatchVariablesParameterName]; }
53    }
54    public ILookupParameter<BoolValue> MatchVariableWeightsParameter {
55      get { return (ILookupParameter<BoolValue>)Parameters[MatchVariableWeightsParameterName]; }
56    }
57    public ILookupParameter<BoolValue> MatchConstantValuesParameter {
58      get { return (ILookupParameter<BoolValue>)Parameters[MatchConstantValuesParameterName]; }
59    }
60    public ILookupParameter<DoubleValue> SimilarityParameter {
61      get { return (ILookupParameter<DoubleValue>)Parameters[SimilarityValuesParmeterName]; }
62    }
63
64    public ISymbolicExpressionTree CurrentSymbolicExpressionTree {
65      get { return CurrentSymbolicExpressionTreeParameter.Value; }
66      set { CurrentSymbolicExpressionTreeParameter.Value = value; }
67    }
68
69    public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
70
71    public Dictionary<ISymbolicExpressionTree, SymbolicDataAnalysisExpressionTreeSimilarity.GeneticItem[]> GeneticItems;
72
73    public int MaximumTreeDepth { get; set; }
74
75    protected SymbolicDataAnalysisExpressionTreeSimilarityCalculator(SymbolicDataAnalysisExpressionTreeSimilarityCalculator original, Cloner cloner) : base(original, cloner) { }
76    public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicDataAnalysisExpressionTreeSimilarityCalculator(this, cloner); }
77    [StorableConstructor]
78    protected SymbolicDataAnalysisExpressionTreeSimilarityCalculator(bool deserializing) : base(deserializing) { }
79
80    public SymbolicDataAnalysisExpressionTreeSimilarityCalculator()
81      : base() {
82      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
83      Parameters.Add(new ValueParameter<ISymbolicExpressionTree>(CurrentSymbolicExpressionTreeParameterName, ""));
84      Parameters.Add(new LookupParameter<BoolValue>(MatchVariablesParameterName, "Specify if the symbolic expression tree comparer should match variable names."));
85      Parameters.Add(new LookupParameter<BoolValue>(MatchVariableWeightsParameterName, "Specify if the symbolic expression tree comparer should match variable weights."));
86      Parameters.Add(new LookupParameter<BoolValue>(MatchConstantValuesParameterName, "Specify if the symbolic expression tree comparer should match constant values."));
87      Parameters.Add(new LookupParameter<DoubleValue>(SimilarityValuesParmeterName, ""));
88    }
89
90    public override IOperation Apply() {
91      var trees = SymbolicExpressionTreeParameter.ActualValue;
92
93      double similarity = 0.0;
94      var current = CurrentSymbolicExpressionTree;
95
96      bool found = false;
97      foreach (var tree in trees) {
98        if (tree == current) {
99          found = true;
100          continue;
101        }
102
103        if (found) {
104          similarity += SymbolicDataAnalysisExpressionTreeSimilarity.MaxCommonSubtreeSimilarity(current, tree, SimilarityComparer);
105          //          similarity += SymbolicDataAnalysisExpressionTreeSimilarity.GeneticItemSimilarity(GeneticItems[current], GeneticItems[tree], MaximumTreeDepth);
106        }
107      }
108
109      lock (SimilarityParameter.ActualValue) {
110        SimilarityParameter.ActualValue.Value += similarity;
111      }
112      return base.Apply();
113    }
114  }
115
116  public static class SymbolicDataAnalysisExpressionTreeSimilarity {
117    private static double CalculateSimilarity(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) {
118      return 2.0 * SymbolicExpressionTreeMatching.Match(a, b, comp) / (a.GetLength() + b.GetLength());
119    }
120
121    public static double MaxCommonSubtreeSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
122      double max = 0;
123      var rootA = a.Root.GetSubtree(0).GetSubtree(0);
124      var rootB = b.Root.GetSubtree(0).GetSubtree(0);
125      foreach (var aa in rootA.IterateNodesBreadth()) {
126        int lenA = aa.GetLength();
127        if (lenA <= max) continue;
128        foreach (var bb in rootB.IterateNodesBreadth()) {
129          int lenB = bb.GetLength();
130          if (lenB <= max) continue;
131          int matches = SymbolicExpressionTreeMatching.Match(aa, bb, comparer);
132          if (max < matches) max = matches;
133        }
134      }
135      return 2.0 * max / (rootA.GetLength() + rootB.GetLength());
136    }
137
138    public static double GeneticItemSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, int maximumTreeHeight, bool preventMultipleContribution = true) {
139      const int minLevelDelta = 1;
140      const int maxLevelDelta = 4;
141
142      var itemsA = a.GetGeneticItems(minLevelDelta, maxLevelDelta).ToArray();
143      var itemsB = b.GetGeneticItems(minLevelDelta, maxLevelDelta).ToArray();
144
145      return GeneticItemSimilarity(itemsA, itemsB, maximumTreeHeight);
146    }
147
148    public static double GeneticItemSimilarity(GeneticItem[] itemsA, GeneticItem[] itemsB, int maximumTreeHeight, bool preventMultipleContribution = true) {
149      double similarity = 0.0;
150      if (itemsA.Length == 0 || itemsB.Length == 0) return similarity;
151
152      var flagsB = new bool[itemsB.Length];
153
154      for (int i = 0; i != itemsA.Length; ++i) {
155        double simMax = 0.0;
156        int index = -1;
157        for (int j = 0; j != itemsB.Length; ++j) {
158          if (flagsB[j]) continue;
159          double sim = StructuralSimilarity(itemsA[i], itemsB[j], maximumTreeHeight);
160          if (sim > simMax) {
161            simMax = sim;
162            index = j;
163          }
164          if (preventMultipleContribution && index > -1) {
165            flagsB[index] = true;
166          }
167        }
168        similarity += simMax;
169      }
170      return similarity / itemsA.Length;
171    }
172
173    public static double AdditiveSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
174      var nA = a.Root.GetSubtree(0).GetSubtree(0);
175      var nB = b.Root.GetSubtree(0).GetSubtree(0);
176
177      var nodesA = nA.IterateNodesBreadth().ToArray();
178      var nodesB = nB.IterateNodesBreadth().ToArray();
179
180      var similarities = nodesA.SelectMany(ia => nodesB, (ia, ib) => CalculateSimilarity(ia, ib, comparer)).Where(s => !s.IsAlmost(0.0)).ToList();
181
182      double average = similarities.Count > 0 ? similarities.Average() : 0;
183      if (average > 1.0) throw new Exception("Similarity average should be less than 1.0");
184      if (average < 0.0) throw new Exception("Similarity average should be greater than 0.0");
185      return average;
186    }
187
188    private static double StructuralSimilarity(GeneticItem g1, GeneticItem g2, int heightMax) {
189      if (!(SameType(g1.Ascendant, g2.Ascendant) && SameType(g1.Descendant, g2.Descendant))) return 0.0;
190
191      double s1 = 1.0 - Math.Abs(g1.LevelDelta - g2.LevelDelta) / heightMax;
192      double s2 = g1.Index == g2.Index ? 1.0 : 0.0;
193      double s3 = g1.ParamA.Variant.Name.Equals(g2.ParamA.Variant.Name) ? 1.0 : 0.0;
194      double s4 = g1.ParamB.Variant.Name.Equals(g2.ParamB.Variant.Name) ? 1.0 : 0.0;
195
196      double deltaCa = Math.Abs(g1.ParamA.Coeff - g2.ParamA.Coeff);
197      double deltaCb = Math.Abs(g1.ParamB.Coeff - g2.ParamB.Coeff);
198      double s5 = 0.0;
199      double s6 = 0.0;
200      // no time offsets so we hardcode s7 = s8 = 0.0
201      double s7 = 0.0;
202      double s8 = 0.0;
203      // variable indexes
204      double s9 = 0.0;
205      double s10 = 0.0;
206
207      // same type with g2.Ascendant so we only do one check
208      if (g1.Ascendant is VariableTreeNode) {
209        s5 = deltaCa / (((Variable)g1.Ascendant.Symbol).WeightManipulatorSigma * 4);
210        s9 = g1.ParamA.VariableIndex.Equals(g2.ParamA.VariableIndex) ? 1.0 : 0.0;
211      }
212      if (g1.Descendant is VariableTreeNode) {
213        s6 = deltaCb / (((Variable)g1.Descendant.Symbol).WeightManipulatorSigma * 4);
214        s10 = g1.ParamB.VariableIndex.Equals(g2.ParamB.VariableIndex) ? 1.0 : 0.0;
215      }
216
217      double similarity = 1.0;
218
219      double[] constributors = new double[10] { s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 }; // s1...s10
220      double[] coefficients = new double[10] { 0.8, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 }; // c1...c10
221
222      for (int i = 0; i != 10; ++i) {
223        similarity *= (1 - (1 - constributors[i]) * coefficients[i]);
224      }
225      return double.IsNaN(similarity) ? 0 : similarity;
226    }
227
228    // genetic items for computing tree similarity (S. Winkler)
229    public class GeneticItem {
230      public ISymbolicExpressionTreeNode Ascendant;
231      public ISymbolicExpressionTreeNode Descendant;
232      public int LevelDelta;
233      public int Index;
234      public double[] Coefficients; // c_i = 0.2, i=1,...,10, d_1 = 0.8
235      // parameters for the Ascendant and Descendant
236      public GeneticItemParameters ParamA;
237      public GeneticItemParameters ParamB;
238    }
239
240    public class GeneticItemParameters {
241      public Symbol Variant; // the variant of functions
242      public double Coeff; // the coefficient of terminals
243      public int TimeOffset; // the time offset of terminals
244      public int VariableIndex; // the variable index (of terminals)
245    }
246    // get genetic items
247    public static List<GeneticItem> GetGeneticItems(this ISymbolicExpressionTree tree, int minLevelDelta, int maxLevelDelta) {
248      return GetGeneticItems(tree.Root.GetSubtree(0).GetSubtree(0), minLevelDelta, maxLevelDelta).ToList();
249    }
250
251    private static double Coefficient(this ISymbolicExpressionTreeNode node) {
252      var variable = node as VariableTreeNode;
253      if (variable != null)
254        return variable.Weight;
255      var constant = node as ConstantTreeNode;
256      if (constant != null)
257        return constant.Value;
258      //      return double.NaN;
259      return 0.0;
260    }
261
262    private static int VariableIndex(this ISymbolicExpressionTreeNode node) {
263      var variable = node as VariableTreeNode;
264      if (variable != null)
265        return variable.Symbol.AllVariableNames.ToList().IndexOf(variable.VariableName);
266      return -1;
267    }
268
269    private static IEnumerable<GeneticItem> GetGeneticItems(ISymbolicExpressionTreeNode node, int minimumLevelDelta, int maximumLevelDelta) {
270      var descendants = node.IterateNodesBreadth().Skip(1).ToArray();
271      for (int i = 0; i != descendants.Length; ++i) {
272        var descendant = descendants[i];
273        var levelDelta = node.GetBranchLevel(descendant);
274        if (!(minimumLevelDelta <= levelDelta && levelDelta <= maximumLevelDelta)) continue;
275        var p = descendant;
276        while (p.Parent != node && p.Parent != null)
277          p = p.Parent;
278        if (p.Parent == null) throw new Exception("The child is not a descendant of node");
279        var geneticItem = new GeneticItem {
280          Ascendant = node, Descendant = descendant, LevelDelta = levelDelta, Index = node.IndexOfSubtree(p),
281          ParamA = new GeneticItemParameters {
282            Coeff = node.Coefficient(), TimeOffset = 0, VariableIndex = node.VariableIndex(), Variant = (Symbol)node.Symbol
283          },
284          ParamB = new GeneticItemParameters {
285            Coeff = descendant.Coefficient(), TimeOffset = 0, VariableIndex = descendant.VariableIndex(), Variant = (Symbol)descendant.Symbol
286          }
287        };
288        yield return geneticItem;
289      }
290    }
291
292    // returns true if both nodes are variables, or both are constants, or both are functions
293    private static bool SameType(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
294      if (a is VariableTreeNode) {
295        return b is VariableTreeNode;
296      }
297      if (a is ConstantTreeNode) {
298        return b is ConstantTreeNode;
299      }
300      return true;
301    }
302  }
303}
Note: See TracBrowser for help on using the repository browser.