Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeSimilarity.cs @ 12869

Last change on this file since 12869 was 9835, checked in by bburlacu, 11 years ago

#1772: Merged remaining trunk changes into the EvolutionaryTracking branch.

File size: 14.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Operators;
30using HeuristicLab.Parameters;
31using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
32
33namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
34  [StorableClass]
35  public class SymbolicDataAnalysisExpressionTreeSimilarityCalculator : SingleSuccessorOperator {
36    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
37    private const string CurrentSymbolicExpressionTreeParameterName = "CurrentSymbolicExpressionTree";
38    private const string SimilarityValuesParmeterName = "Similarity";
39    // comparer parameters
40    private const string MatchVariablesParameterName = "MatchVariableNames";
41    private const string MatchVariableWeightsParameterName = "MatchVariableWeights";
42    private const string MatchConstantValuesParameterName = "MatchConstantValues";
43
44    public IScopeTreeLookupParameter<ISymbolicExpressionTree> SymbolicExpressionTreeParameter {
45      get { return (IScopeTreeLookupParameter<ISymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
46    }
47    public IValueParameter<ISymbolicExpressionTree> CurrentSymbolicExpressionTreeParameter {
48      get { return (IValueParameter<ISymbolicExpressionTree>)Parameters[CurrentSymbolicExpressionTreeParameterName]; }
49    }
50    public ILookupParameter<BoolValue> MatchVariableNamesParameter {
51      get { return (ILookupParameter<BoolValue>)Parameters[MatchVariablesParameterName]; }
52    }
53    public ILookupParameter<BoolValue> MatchVariableWeightsParameter {
54      get { return (ILookupParameter<BoolValue>)Parameters[MatchVariableWeightsParameterName]; }
55    }
56    public ILookupParameter<BoolValue> MatchConstantValuesParameter {
57      get { return (ILookupParameter<BoolValue>)Parameters[MatchConstantValuesParameterName]; }
58    }
59    public ILookupParameter<DoubleValue> SimilarityParameter {
60      get { return (ILookupParameter<DoubleValue>)Parameters[SimilarityValuesParmeterName]; }
61    }
62
63    public ISymbolicExpressionTree CurrentSymbolicExpressionTree {
64      get { return CurrentSymbolicExpressionTreeParameter.Value; }
65      set { CurrentSymbolicExpressionTreeParameter.Value = value; }
66    }
67
68    public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
69
70    public Dictionary<ISymbolicExpressionTree, SymbolicDataAnalysisExpressionTreeSimilarity.GeneticItem[]> GeneticItems;
71
72    public int MaximumTreeDepth { get; set; }
73
74    protected SymbolicDataAnalysisExpressionTreeSimilarityCalculator(SymbolicDataAnalysisExpressionTreeSimilarityCalculator original, Cloner cloner) : base(original, cloner) { }
75    public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicDataAnalysisExpressionTreeSimilarityCalculator(this, cloner); }
76    [StorableConstructor]
77    protected SymbolicDataAnalysisExpressionTreeSimilarityCalculator(bool deserializing) : base(deserializing) { }
78
79    public SymbolicDataAnalysisExpressionTreeSimilarityCalculator()
80      : base() {
81      Parameters.Add(new ScopeTreeLookupParameter<ISymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
82      Parameters.Add(new ValueParameter<ISymbolicExpressionTree>(CurrentSymbolicExpressionTreeParameterName, ""));
83      Parameters.Add(new LookupParameter<BoolValue>(MatchVariablesParameterName, "Specify if the symbolic expression tree comparer should match variable names."));
84      Parameters.Add(new LookupParameter<BoolValue>(MatchVariableWeightsParameterName, "Specify if the symbolic expression tree comparer should match variable weights."));
85      Parameters.Add(new LookupParameter<BoolValue>(MatchConstantValuesParameterName, "Specify if the symbolic expression tree comparer should match constant values."));
86      Parameters.Add(new LookupParameter<DoubleValue>(SimilarityValuesParmeterName, ""));
87    }
88
89    public override IOperation Apply() {
90      var trees = SymbolicExpressionTreeParameter.ActualValue;
91
92      double similarity = 0.0;
93      var current = CurrentSymbolicExpressionTree;
94
95      bool found = false;
96      foreach (var tree in trees) {
97        if (tree == current) {
98          found = true;
99          continue;
100        }
101
102        if (found) {
103          similarity += SymbolicDataAnalysisExpressionTreeSimilarity.MaxCommonSubtreeSimilarity(current, tree, SimilarityComparer);
104          //          similarity += SymbolicDataAnalysisExpressionTreeSimilarity.GeneticItemSimilarity(GeneticItems[current], GeneticItems[tree], MaximumTreeDepth);
105        }
106      }
107
108      lock (SimilarityParameter.ActualValue) {
109        SimilarityParameter.ActualValue.Value += similarity;
110      }
111      return base.Apply();
112    }
113  }
114
115  public static class SymbolicDataAnalysisExpressionTreeSimilarity {
116    public static double CalculateSimilarity(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) {
117      return 2.0 * SymbolicExpressionTreeMatching.Match(a, b, comp) / (a.GetLength() + b.GetLength());
118    }
119
120    public static double MaxCommonSubtreeSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
121      double max = 0;
122      var rootA = a.Root.GetSubtree(0).GetSubtree(0);
123      var rootB = b.Root.GetSubtree(0).GetSubtree(0);
124      foreach (var aa in rootA.IterateNodesBreadth()) {
125        int lenA = aa.GetLength();
126        if (lenA <= max) continue;
127        foreach (var bb in rootB.IterateNodesBreadth()) {
128          int lenB = bb.GetLength();
129          if (lenB <= max) continue;
130          int matches = SymbolicExpressionTreeMatching.Match(aa, bb, comparer);
131          if (max < matches) max = matches;
132        }
133      }
134      return 2.0 * max / (rootA.GetLength() + rootB.GetLength());
135    }
136
137    public static double GeneticItemSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, int maximumTreeHeight, bool preventMultipleContribution = true) {
138      const int minLevelDelta = 1;
139      const int maxLevelDelta = 4;
140
141      var itemsA = a.GetGeneticItems(minLevelDelta, maxLevelDelta).ToArray();
142      var itemsB = b.GetGeneticItems(minLevelDelta, maxLevelDelta).ToArray();
143
144      return GeneticItemSimilarity(itemsA, itemsB, maximumTreeHeight);
145    }
146
147    public static double GeneticItemSimilarity(GeneticItem[] itemsA, GeneticItem[] itemsB, int maximumTreeHeight, bool preventMultipleContribution = true) {
148      double similarity = 0.0;
149      if (itemsA.Length == 0 || itemsB.Length == 0) return similarity;
150
151      var flagsB = new bool[itemsB.Length];
152
153      for (int i = 0; i != itemsA.Length; ++i) {
154        double simMax = 0.0;
155        int index = -1;
156        for (int j = 0; j != itemsB.Length; ++j) {
157          if (flagsB[j]) continue;
158          double sim = StructuralSimilarity(itemsA[i], itemsB[j], maximumTreeHeight);
159          if (sim > simMax) {
160            simMax = sim;
161            index = j;
162          }
163          if (preventMultipleContribution && index > -1) {
164            flagsB[index] = true;
165          }
166        }
167        similarity += simMax;
168      }
169      return similarity / itemsA.Length;
170    }
171
172    public static double AdditiveSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
173      var nA = a.Root.GetSubtree(0).GetSubtree(0);
174      var nB = b.Root.GetSubtree(0).GetSubtree(0);
175
176      var nodesA = nA.IterateNodesBreadth().ToArray();
177      var nodesB = nB.IterateNodesBreadth().ToArray();
178
179      var similarities = nodesA.SelectMany(ia => nodesB, (ia, ib) => CalculateSimilarity(ia, ib, comparer)).Where(s => !s.IsAlmost(0.0)).ToList();
180
181      double average = similarities.Count > 0 ? similarities.Average() : 0;
182      if (average > 1.0) throw new Exception("Similarity average should be less than 1.0");
183      if (average < 0.0) throw new Exception("Similarity average should be greater than 0.0");
184      return average;
185    }
186
187    private static double StructuralSimilarity(GeneticItem g1, GeneticItem g2, int heightMax) {
188      if (!(SameType(g1.Ascendant, g2.Ascendant) && SameType(g1.Descendant, g2.Descendant))) return 0.0;
189
190      double s1 = 1.0 - Math.Abs(g1.LevelDelta - g2.LevelDelta) / heightMax;
191      double s2 = g1.Index == g2.Index ? 1.0 : 0.0;
192      double s3 = g1.ParamA.Variant.Name.Equals(g2.ParamA.Variant.Name) ? 1.0 : 0.0;
193      double s4 = g1.ParamB.Variant.Name.Equals(g2.ParamB.Variant.Name) ? 1.0 : 0.0;
194
195      double deltaCa = Math.Abs(g1.ParamA.Coeff - g2.ParamA.Coeff);
196      double deltaCb = Math.Abs(g1.ParamB.Coeff - g2.ParamB.Coeff);
197      double s5 = 0.0;
198      double s6 = 0.0;
199      // no time offsets so we hardcode s7 = s8 = 0.0
200      double s7 = 0.0;
201      double s8 = 0.0;
202      // variable indexes
203      double s9 = 0.0;
204      double s10 = 0.0;
205
206      // same type with g2.Ascendant so we only do one check
207      if (g1.Ascendant is VariableTreeNode) {
208        s5 = deltaCa / (((Variable)g1.Ascendant.Symbol).WeightManipulatorSigma * 4);
209        s9 = g1.ParamA.VariableIndex.Equals(g2.ParamA.VariableIndex) ? 1.0 : 0.0;
210      }
211      if (g1.Descendant is VariableTreeNode) {
212        s6 = deltaCb / (((Variable)g1.Descendant.Symbol).WeightManipulatorSigma * 4);
213        s10 = g1.ParamB.VariableIndex.Equals(g2.ParamB.VariableIndex) ? 1.0 : 0.0;
214      }
215
216      double similarity = 1.0;
217
218      double[] constributors = new double[10] { s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 }; // s1...s10
219      double[] coefficients = new double[10] { 0.8, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 }; // c1...c10
220
221      for (int i = 0; i != 10; ++i) {
222        similarity *= (1 - (1 - constributors[i]) * coefficients[i]);
223      }
224      return double.IsNaN(similarity) ? 0 : similarity;
225    }
226
227    // genetic items for computing tree similarity (S. Winkler)
228    public class GeneticItem {
229      public ISymbolicExpressionTreeNode Ascendant;
230      public ISymbolicExpressionTreeNode Descendant;
231      public int LevelDelta;
232      public int Index;
233      public double[] Coefficients; // c_i = 0.2, i=1,...,10, d_1 = 0.8
234      // parameters for the Ascendant and Descendant
235      public GeneticItemParameters ParamA;
236      public GeneticItemParameters ParamB;
237    }
238
239    public class GeneticItemParameters {
240      public Symbol Variant; // the variant of functions
241      public double Coeff; // the coefficient of terminals
242      public int TimeOffset; // the time offset of terminals
243      public int VariableIndex; // the variable index (of terminals)
244    }
245    // get genetic items
246    public static List<GeneticItem> GetGeneticItems(this ISymbolicExpressionTree tree, int minLevelDelta, int maxLevelDelta) {
247      return GetGeneticItems(tree.Root.GetSubtree(0).GetSubtree(0), minLevelDelta, maxLevelDelta).ToList();
248    }
249
250    private static double Coefficient(this ISymbolicExpressionTreeNode node) {
251      var variable = node as VariableTreeNode;
252      if (variable != null)
253        return variable.Weight;
254      var constant = node as ConstantTreeNode;
255      if (constant != null)
256        return constant.Value;
257      return 0.0;
258    }
259
260    private static int VariableIndex(this ISymbolicExpressionTreeNode node) {
261      var variable = node as VariableTreeNode;
262      if (variable != null)
263        return variable.Symbol.AllVariableNames.ToList().IndexOf(variable.VariableName);
264      return -1;
265    }
266
267    private static IEnumerable<GeneticItem> GetGeneticItems(ISymbolicExpressionTreeNode node, int minimumLevelDelta, int maximumLevelDelta) {
268      var descendants = node.IterateNodesBreadth().Skip(1).ToArray();
269      for (int i = 0; i != descendants.Length; ++i) {
270        var descendant = descendants[i];
271        var levelDelta = node.GetBranchLevel(descendant);
272        if (!(minimumLevelDelta <= levelDelta && levelDelta <= maximumLevelDelta)) continue;
273        var p = descendant;
274        while (p.Parent != node && p.Parent != null)
275          p = p.Parent;
276        if (p.Parent == null) throw new Exception("The child is not a descendant of node");
277        var geneticItem = new GeneticItem {
278          Ascendant = node, Descendant = descendant, LevelDelta = levelDelta, Index = node.IndexOfSubtree(p),
279          ParamA = new GeneticItemParameters {
280            Coeff = node.Coefficient(), TimeOffset = 0, VariableIndex = node.VariableIndex(), Variant = (Symbol)node.Symbol
281          },
282          ParamB = new GeneticItemParameters {
283            Coeff = descendant.Coefficient(), TimeOffset = 0, VariableIndex = descendant.VariableIndex(), Variant = (Symbol)descendant.Symbol
284          }
285        };
286        yield return geneticItem;
287      }
288    }
289
290    // returns true if both nodes are variables, or both are constants, or both are functions
291    private static bool SameType(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
292      if (a is VariableTreeNode) {
293        return b is VariableTreeNode;
294      }
295      if (a is ConstantTreeNode) {
296        return b is ConstantTreeNode;
297      }
298      return true;
299    }
300  }
301}
Note: See TracBrowser for help on using the repository browser.