Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
05/02/13 13:48:38 (12 years ago)
Author:
bburlacu
Message:

#1772: Implemented GeneticItem-based similarity measure. Renamed ISymbolicExpressionTreeNodeComparer to ISymbolicExpressionTreeNodeSimilarityComparer.

Location:
branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic

  • branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeSimilarity.cs

    r9293 r9423  
    2020#endregion
    2121
    22 
    2322using System;
     23using System.Collections.Generic;
    2424using System.Linq;
    2525using HeuristicLab.Common;
     
    6969    public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; }
    7070
     71    public Dictionary<ISymbolicExpressionTree, SymbolicDataAnalysisExpressionTreeSimilarity.GeneticItem[]> GeneticItems;
     72
     73    public int MaximumTreeDepth { get; set; }
     74
    7175    protected SymbolicDataAnalysisExpressionTreeSimilarityCalculator(SymbolicDataAnalysisExpressionTreeSimilarityCalculator original, Cloner cloner) : base(original, cloner) { }
    7276    public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicDataAnalysisExpressionTreeSimilarityCalculator(this, cloner); }
     
    8791      var trees = SymbolicExpressionTreeParameter.ActualValue;
    8892
    89       bool found = false;
    9093      double similarity = 0.0;
    9194      var current = CurrentSymbolicExpressionTree;
    9295
     96      bool found = false;
    9397      foreach (var tree in trees) {
    94         if (tree == current) { found = true; }
    95         if (!found) continue;
    96 
    97         similarity += SymbolicDataAnalysisExpressionTreeSimilarity.MaxCommonSubtreeSimilarity(current, tree, SimilarityComparer);
    98       }
     98        if (tree == current) {
     99          found = true;
     100          continue;
     101        }
     102
     103        if (found) {
     104          similarity += SymbolicDataAnalysisExpressionTreeSimilarity.MaxCommonSubtreeSimilarity(current, tree, SimilarityComparer);
     105          //          similarity += SymbolicDataAnalysisExpressionTreeSimilarity.GeneticItemSimilarity(GeneticItems[current], GeneticItems[tree], MaximumTreeDepth);
     106        }
     107      }
     108
    99109      lock (SimilarityParameter.ActualValue) {
    100110        SimilarityParameter.ActualValue.Value += similarity;
     
    104114  }
    105115
    106 
    107 
    108116  public static class SymbolicDataAnalysisExpressionTreeSimilarity {
    109     /// <summary>
    110     /// Returns a similarity value based on the size of the maximum common subtree according to the given equality comparison.
    111     /// </summary>
    112     /// <param name="a"></param>
    113     /// <param name="b"></param>
    114     /// <param name="comparer"></param>
    115     /// <returns>Similarity degree between the two trees, scaled between [0,1], where 1 = similar, 0 = non-similar</returns>
     117    private static double CalculateSimilarity(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) {
     118      return 2.0 * SymbolicExpressionTreeMatching.Match(a, b, comp) / (a.GetLength() + b.GetLength());
     119    }
     120
    116121    public static double MaxCommonSubtreeSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
    117122      double max = 0;
    118       foreach (var aa in a.Root.GetSubtree(0).GetSubtree(0).IterateNodesBreadth()) {
     123      var rootA = a.Root.GetSubtree(0).GetSubtree(0);
     124      var rootB = b.Root.GetSubtree(0).GetSubtree(0);
     125      foreach (var aa in rootA.IterateNodesBreadth()) {
    119126        int lenA = aa.GetLength();
    120127        if (lenA <= max) continue;
    121         foreach (var bb in b.Root.GetSubtree(0).GetSubtree(0).IterateNodesBreadth()) {
     128        foreach (var bb in rootB.IterateNodesBreadth()) {
    122129          int lenB = bb.GetLength();
    123130          if (lenB <= max) continue;
     
    126133        }
    127134      }
    128       return max / Math.Max(a.Length, b.Length);
    129     }
    130 
    131     private static double CalculateSimilarity(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) {
    132       return (double)SymbolicExpressionTreeMatching.Match(a, b, comp) / Math.Max(a.GetLength(), b.GetLength());
    133     }
    134 
    135     public static double CalculateCompoundSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
     135      return 2.0 * max / (rootA.GetLength() + rootB.GetLength());
     136    }
     137
     138    public static double GeneticItemSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, int maximumTreeHeight, bool preventMultipleContribution = true) {
     139      const int minLevelDelta = 1;
     140      const int maxLevelDelta = 4;
     141
     142      var itemsA = a.GetGeneticItems(minLevelDelta, maxLevelDelta).ToArray();
     143      var itemsB = b.GetGeneticItems(minLevelDelta, maxLevelDelta).ToArray();
     144
     145      return GeneticItemSimilarity(itemsA, itemsB, maximumTreeHeight);
     146    }
     147
     148    public static double GeneticItemSimilarity(GeneticItem[] itemsA, GeneticItem[] itemsB, int maximumTreeHeight, bool preventMultipleContribution = true) {
     149      double similarity = 0.0;
     150      if (itemsA.Length == 0 || itemsB.Length == 0) return similarity;
     151
     152      var flagsB = new bool[itemsB.Length];
     153
     154      for (int i = 0; i != itemsA.Length; ++i) {
     155        double simMax = 0.0;
     156        int index = -1;
     157        for (int j = 0; j != itemsB.Length; ++j) {
     158          if (flagsB[j]) continue;
     159          double sim = StructuralSimilarity(itemsA[i], itemsB[j], maximumTreeHeight);
     160          if (sim > simMax) {
     161            simMax = sim;
     162            index = j;
     163          }
     164          if (preventMultipleContribution && index > -1) {
     165            flagsB[index] = true;
     166          }
     167        }
     168        similarity += simMax;
     169      }
     170      return similarity / itemsA.Length;
     171    }
     172
     173    public static double AdditiveSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) {
    136174      var nA = a.Root.GetSubtree(0).GetSubtree(0);
    137175      var nB = b.Root.GetSubtree(0).GetSubtree(0);
    138176
    139       var itemsA = nA.IterateNodesBreadth().Where(n => n.SubtreeCount > 0).Select(n => new MatchItem { Node = n, Matched = false }).ToArray();
    140       var itemsB = nB.IterateNodesBreadth().Where(n => n.SubtreeCount > 0).Select(n => new MatchItem { Node = n, Matched = false }).ToArray();
    141 
    142 
    143       double similaritySum = 0;
    144 
    145       foreach (var ia in itemsA) {
    146         foreach (var ib in itemsB) {
    147           similaritySum += CalculateSimilarity(ia.Node, ib.Node, comparer);
    148         }
    149       }
    150       return similaritySum / (itemsA.Length * itemsB.Length);
    151     }
    152   }
    153 
    154   class MatchItem {
    155     public ISymbolicExpressionTreeNode Node;
    156     public bool Matched;
     177      var nodesA = nA.IterateNodesBreadth().ToArray();
     178      var nodesB = nB.IterateNodesBreadth().ToArray();
     179
     180      var similarities = nodesA.SelectMany(ia => nodesB, (ia, ib) => CalculateSimilarity(ia, ib, comparer)).Where(s => !s.IsAlmost(0.0)).ToList();
     181
     182      double average = similarities.Count > 0 ? similarities.Average() : 0;
     183      if (average > 1.0) throw new Exception("Similarity average should be less than 1.0");
     184      if (average < 0.0) throw new Exception("Similarity average should be greater than 0.0");
     185      return average;
     186    }
     187
     188    private static double StructuralSimilarity(GeneticItem g1, GeneticItem g2, int heightMax) {
     189      if (!(SameType(g1.Ascendant, g2.Ascendant) && SameType(g1.Descendant, g2.Descendant))) return 0.0;
     190
     191      double s1 = 1.0 - Math.Abs(g1.LevelDelta - g2.LevelDelta) / heightMax;
     192      double s2 = g1.Index == g2.Index ? 1.0 : 0.0;
     193      double s3 = g1.ParamA.Variant.Name.Equals(g2.ParamA.Variant.Name) ? 1.0 : 0.0;
     194      double s4 = g1.ParamB.Variant.Name.Equals(g2.ParamB.Variant.Name) ? 1.0 : 0.0;
     195
     196      double deltaCa = Math.Abs(g1.ParamA.Coeff - g2.ParamA.Coeff);
     197      double deltaCb = Math.Abs(g1.ParamB.Coeff - g2.ParamB.Coeff);
     198      double s5 = 0.0;
     199      double s6 = 0.0;
     200      // no time offsets so we hardcode s7 = s8 = 0.0
     201      double s7 = 0.0;
     202      double s8 = 0.0;
     203      // variable indexes
     204      double s9 = 0.0;
     205      double s10 = 0.0;
     206
     207      // same type with g2.Ascendant so we only do one check
     208      if (g1.Ascendant is VariableTreeNode) {
     209        s5 = deltaCa / (((Variable)g1.Ascendant.Symbol).WeightManipulatorSigma * 4);
     210        s9 = g1.ParamA.VariableIndex.Equals(g2.ParamA.VariableIndex) ? 1.0 : 0.0;
     211      }
     212      if (g1.Descendant is VariableTreeNode) {
     213        s6 = deltaCb / (((Variable)g1.Descendant.Symbol).WeightManipulatorSigma * 4);
     214        s10 = g1.ParamB.VariableIndex.Equals(g2.ParamB.VariableIndex) ? 1.0 : 0.0;
     215      }
     216
     217      double similarity = 1.0;
     218
     219      double[] constributors = new double[10] { s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 }; // s1...s10
     220      double[] coefficients = new double[10] { 0.8, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 }; // c1...c10
     221
     222      for (int i = 0; i != 10; ++i) {
     223        similarity *= (1 - (1 - constributors[i]) * coefficients[i]);
     224      }
     225      return double.IsNaN(similarity) ? 0 : similarity;
     226    }
     227
     228    // genetic items for computing tree similarity (S. Winkler)
     229    public class GeneticItem {
     230      public ISymbolicExpressionTreeNode Ascendant;
     231      public ISymbolicExpressionTreeNode Descendant;
     232      public int LevelDelta;
     233      public int Index;
     234      public double[] Coefficients; // c_i = 0.2, i=1,...,10, d_1 = 0.8
     235      // parameters for the Ascendant and Descendant
     236      public GeneticItemParameters ParamA;
     237      public GeneticItemParameters ParamB;
     238    }
     239
     240    public class GeneticItemParameters {
     241      public Symbol Variant; // the variant of functions
     242      public double Coeff; // the coefficient of terminals
     243      public int TimeOffset; // the time offset of terminals
     244      public int VariableIndex; // the variable index (of terminals)
     245    }
     246    // get genetic items
     247    public static List<GeneticItem> GetGeneticItems(this ISymbolicExpressionTree tree, int minLevelDelta, int maxLevelDelta) {
     248      return GetGeneticItems(tree.Root.GetSubtree(0).GetSubtree(0), minLevelDelta, maxLevelDelta).ToList();
     249    }
     250
     251    private static double Coefficient(this ISymbolicExpressionTreeNode node) {
     252      var variable = node as VariableTreeNode;
     253      if (variable != null)
     254        return variable.Weight;
     255      var constant = node as ConstantTreeNode;
     256      if (constant != null)
     257        return constant.Value;
     258      //      return double.NaN;
     259      return 0.0;
     260    }
     261
     262    private static int VariableIndex(this ISymbolicExpressionTreeNode node) {
     263      var variable = node as VariableTreeNode;
     264      if (variable != null)
     265        return variable.Symbol.AllVariableNames.ToList().IndexOf(variable.VariableName);
     266      return -1;
     267    }
     268
     269    private static IEnumerable<GeneticItem> GetGeneticItems(ISymbolicExpressionTreeNode node, int minimumLevelDelta, int maximumLevelDelta) {
     270      var descendants = node.IterateNodesBreadth().Skip(1).ToArray();
     271      for (int i = 0; i != descendants.Length; ++i) {
     272        var descendant = descendants[i];
     273        var levelDelta = node.GetBranchLevel(descendant);
     274        if (!(minimumLevelDelta <= levelDelta && levelDelta <= maximumLevelDelta)) continue;
     275        var p = descendant;
     276        while (p.Parent != node && p.Parent != null)
     277          p = p.Parent;
     278        if (p.Parent == null) throw new Exception("The child is not a descendant of node");
     279        var geneticItem = new GeneticItem {
     280          Ascendant = node, Descendant = descendant, LevelDelta = levelDelta, Index = node.IndexOfSubtree(p),
     281          ParamA = new GeneticItemParameters {
     282            Coeff = node.Coefficient(), TimeOffset = 0, VariableIndex = node.VariableIndex(), Variant = (Symbol)node.Symbol
     283          },
     284          ParamB = new GeneticItemParameters {
     285            Coeff = descendant.Coefficient(), TimeOffset = 0, VariableIndex = descendant.VariableIndex(), Variant = (Symbol)descendant.Symbol
     286          }
     287        };
     288        yield return geneticItem;
     289      }
     290    }
     291
     292    // returns true if both nodes are variables, or both are constants, or both are functions
     293    private static bool SameType(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) {
     294      if (a is VariableTreeNode) {
     295        return b is VariableTreeNode;
     296      }
     297      if (a is ConstantTreeNode) {
     298        return b is ConstantTreeNode;
     299      }
     300      return true;
     301    }
    157302  }
    158303}
Note: See TracChangeset for help on using the changeset viewer.