Changeset 9423 for branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeSimilarity.cs
- Timestamp:
- 05/02/13 13:48:38 (12 years ago)
- Location:
- branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic merged: 9288
- Property svn:mergeinfo changed
-
branches/HeuristicLab.EvolutionaryTracking/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeSimilarity.cs
r9293 r9423 20 20 #endregion 21 21 22 23 22 using System; 23 using System.Collections.Generic; 24 24 using System.Linq; 25 25 using HeuristicLab.Common; … … 69 69 public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; } 70 70 71 public Dictionary<ISymbolicExpressionTree, SymbolicDataAnalysisExpressionTreeSimilarity.GeneticItem[]> GeneticItems; 72 73 public int MaximumTreeDepth { get; set; } 74 71 75 protected SymbolicDataAnalysisExpressionTreeSimilarityCalculator(SymbolicDataAnalysisExpressionTreeSimilarityCalculator original, Cloner cloner) : base(original, cloner) { } 72 76 public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicDataAnalysisExpressionTreeSimilarityCalculator(this, cloner); } … … 87 91 var trees = SymbolicExpressionTreeParameter.ActualValue; 88 92 89 bool found = false;90 93 double similarity = 0.0; 91 94 var current = CurrentSymbolicExpressionTree; 92 95 96 bool found = false; 93 97 foreach (var tree in trees) { 94 if (tree == current) { found = true; } 95 if (!found) continue; 96 97 similarity += SymbolicDataAnalysisExpressionTreeSimilarity.MaxCommonSubtreeSimilarity(current, tree, SimilarityComparer); 98 } 98 if (tree == current) { 99 found = true; 100 continue; 101 } 102 103 if (found) { 104 similarity += SymbolicDataAnalysisExpressionTreeSimilarity.MaxCommonSubtreeSimilarity(current, tree, SimilarityComparer); 105 // similarity += SymbolicDataAnalysisExpressionTreeSimilarity.GeneticItemSimilarity(GeneticItems[current], GeneticItems[tree], MaximumTreeDepth); 106 } 107 } 108 99 109 lock (SimilarityParameter.ActualValue) { 100 110 SimilarityParameter.ActualValue.Value += similarity; … … 104 114 } 105 115 106 107 108 116 public static class SymbolicDataAnalysisExpressionTreeSimilarity { 109 /// <summary> 110 /// Returns a similarity value based on the size of the maximum common subtree according to the given equality comparison. 111 /// </summary> 112 /// <param name="a"></param> 113 /// <param name="b"></param> 114 /// <param name="comparer"></param> 115 /// <returns>Similarity degree between the two trees, scaled between [0,1], where 1 = similar, 0 = non-similar</returns> 117 private static double CalculateSimilarity(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) { 118 return 2.0 * SymbolicExpressionTreeMatching.Match(a, b, comp) / (a.GetLength() + b.GetLength()); 119 } 120 116 121 public static double MaxCommonSubtreeSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) { 117 122 double max = 0; 118 foreach (var aa in a.Root.GetSubtree(0).GetSubtree(0).IterateNodesBreadth()) { 123 var rootA = a.Root.GetSubtree(0).GetSubtree(0); 124 var rootB = b.Root.GetSubtree(0).GetSubtree(0); 125 foreach (var aa in rootA.IterateNodesBreadth()) { 119 126 int lenA = aa.GetLength(); 120 127 if (lenA <= max) continue; 121 foreach (var bb in b.Root.GetSubtree(0).GetSubtree(0).IterateNodesBreadth()) {128 foreach (var bb in rootB.IterateNodesBreadth()) { 122 129 int lenB = bb.GetLength(); 123 130 if (lenB <= max) continue; … … 126 133 } 127 134 } 128 return max / Math.Max(a.Length, b.Length); 129 } 130 131 private static double CalculateSimilarity(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) { 132 return (double)SymbolicExpressionTreeMatching.Match(a, b, comp) / Math.Max(a.GetLength(), b.GetLength()); 133 } 134 135 public static double CalculateCompoundSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) { 135 return 2.0 * max / (rootA.GetLength() + rootB.GetLength()); 136 } 137 138 public static double GeneticItemSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, int maximumTreeHeight, bool preventMultipleContribution = true) { 139 const int minLevelDelta = 1; 140 const int maxLevelDelta = 4; 141 142 var itemsA = a.GetGeneticItems(minLevelDelta, maxLevelDelta).ToArray(); 143 var itemsB = b.GetGeneticItems(minLevelDelta, maxLevelDelta).ToArray(); 144 145 return GeneticItemSimilarity(itemsA, itemsB, maximumTreeHeight); 146 } 147 148 public static double GeneticItemSimilarity(GeneticItem[] itemsA, GeneticItem[] itemsB, int maximumTreeHeight, bool preventMultipleContribution = true) { 149 double similarity = 0.0; 150 if (itemsA.Length == 0 || itemsB.Length == 0) return similarity; 151 152 var flagsB = new bool[itemsB.Length]; 153 154 for (int i = 0; i != itemsA.Length; ++i) { 155 double simMax = 0.0; 156 int index = -1; 157 for (int j = 0; j != itemsB.Length; ++j) { 158 if (flagsB[j]) continue; 159 double sim = StructuralSimilarity(itemsA[i], itemsB[j], maximumTreeHeight); 160 if (sim > simMax) { 161 simMax = sim; 162 index = j; 163 } 164 if (preventMultipleContribution && index > -1) { 165 flagsB[index] = true; 166 } 167 } 168 similarity += simMax; 169 } 170 return similarity / itemsA.Length; 171 } 172 173 public static double AdditiveSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) { 136 174 var nA = a.Root.GetSubtree(0).GetSubtree(0); 137 175 var nB = b.Root.GetSubtree(0).GetSubtree(0); 138 176 139 var itemsA = nA.IterateNodesBreadth().Where(n => n.SubtreeCount > 0).Select(n => new MatchItem { Node = n, Matched = false }).ToArray(); 140 var itemsB = nB.IterateNodesBreadth().Where(n => n.SubtreeCount > 0).Select(n => new MatchItem { Node = n, Matched = false }).ToArray(); 141 142 143 double similaritySum = 0; 144 145 foreach (var ia in itemsA) { 146 foreach (var ib in itemsB) { 147 similaritySum += CalculateSimilarity(ia.Node, ib.Node, comparer); 148 } 149 } 150 return similaritySum / (itemsA.Length * itemsB.Length); 151 } 152 } 153 154 class MatchItem { 155 public ISymbolicExpressionTreeNode Node; 156 public bool Matched; 177 var nodesA = nA.IterateNodesBreadth().ToArray(); 178 var nodesB = nB.IterateNodesBreadth().ToArray(); 179 180 var similarities = nodesA.SelectMany(ia => nodesB, (ia, ib) => CalculateSimilarity(ia, ib, comparer)).Where(s => !s.IsAlmost(0.0)).ToList(); 181 182 double average = similarities.Count > 0 ? similarities.Average() : 0; 183 if (average > 1.0) throw new Exception("Similarity average should be less than 1.0"); 184 if (average < 0.0) throw new Exception("Similarity average should be greater than 0.0"); 185 return average; 186 } 187 188 private static double StructuralSimilarity(GeneticItem g1, GeneticItem g2, int heightMax) { 189 if (!(SameType(g1.Ascendant, g2.Ascendant) && SameType(g1.Descendant, g2.Descendant))) return 0.0; 190 191 double s1 = 1.0 - Math.Abs(g1.LevelDelta - g2.LevelDelta) / heightMax; 192 double s2 = g1.Index == g2.Index ? 1.0 : 0.0; 193 double s3 = g1.ParamA.Variant.Name.Equals(g2.ParamA.Variant.Name) ? 1.0 : 0.0; 194 double s4 = g1.ParamB.Variant.Name.Equals(g2.ParamB.Variant.Name) ? 1.0 : 0.0; 195 196 double deltaCa = Math.Abs(g1.ParamA.Coeff - g2.ParamA.Coeff); 197 double deltaCb = Math.Abs(g1.ParamB.Coeff - g2.ParamB.Coeff); 198 double s5 = 0.0; 199 double s6 = 0.0; 200 // no time offsets so we hardcode s7 = s8 = 0.0 201 double s7 = 0.0; 202 double s8 = 0.0; 203 // variable indexes 204 double s9 = 0.0; 205 double s10 = 0.0; 206 207 // same type with g2.Ascendant so we only do one check 208 if (g1.Ascendant is VariableTreeNode) { 209 s5 = deltaCa / (((Variable)g1.Ascendant.Symbol).WeightManipulatorSigma * 4); 210 s9 = g1.ParamA.VariableIndex.Equals(g2.ParamA.VariableIndex) ? 1.0 : 0.0; 211 } 212 if (g1.Descendant is VariableTreeNode) { 213 s6 = deltaCb / (((Variable)g1.Descendant.Symbol).WeightManipulatorSigma * 4); 214 s10 = g1.ParamB.VariableIndex.Equals(g2.ParamB.VariableIndex) ? 1.0 : 0.0; 215 } 216 217 double similarity = 1.0; 218 219 double[] constributors = new double[10] { s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 }; // s1...s10 220 double[] coefficients = new double[10] { 0.8, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 }; // c1...c10 221 222 for (int i = 0; i != 10; ++i) { 223 similarity *= (1 - (1 - constributors[i]) * coefficients[i]); 224 } 225 return double.IsNaN(similarity) ? 0 : similarity; 226 } 227 228 // genetic items for computing tree similarity (S. Winkler) 229 public class GeneticItem { 230 public ISymbolicExpressionTreeNode Ascendant; 231 public ISymbolicExpressionTreeNode Descendant; 232 public int LevelDelta; 233 public int Index; 234 public double[] Coefficients; // c_i = 0.2, i=1,...,10, d_1 = 0.8 235 // parameters for the Ascendant and Descendant 236 public GeneticItemParameters ParamA; 237 public GeneticItemParameters ParamB; 238 } 239 240 public class GeneticItemParameters { 241 public Symbol Variant; // the variant of functions 242 public double Coeff; // the coefficient of terminals 243 public int TimeOffset; // the time offset of terminals 244 public int VariableIndex; // the variable index (of terminals) 245 } 246 // get genetic items 247 public static List<GeneticItem> GetGeneticItems(this ISymbolicExpressionTree tree, int minLevelDelta, int maxLevelDelta) { 248 return GetGeneticItems(tree.Root.GetSubtree(0).GetSubtree(0), minLevelDelta, maxLevelDelta).ToList(); 249 } 250 251 private static double Coefficient(this ISymbolicExpressionTreeNode node) { 252 var variable = node as VariableTreeNode; 253 if (variable != null) 254 return variable.Weight; 255 var constant = node as ConstantTreeNode; 256 if (constant != null) 257 return constant.Value; 258 // return double.NaN; 259 return 0.0; 260 } 261 262 private static int VariableIndex(this ISymbolicExpressionTreeNode node) { 263 var variable = node as VariableTreeNode; 264 if (variable != null) 265 return variable.Symbol.AllVariableNames.ToList().IndexOf(variable.VariableName); 266 return -1; 267 } 268 269 private static IEnumerable<GeneticItem> GetGeneticItems(ISymbolicExpressionTreeNode node, int minimumLevelDelta, int maximumLevelDelta) { 270 var descendants = node.IterateNodesBreadth().Skip(1).ToArray(); 271 for (int i = 0; i != descendants.Length; ++i) { 272 var descendant = descendants[i]; 273 var levelDelta = node.GetBranchLevel(descendant); 274 if (!(minimumLevelDelta <= levelDelta && levelDelta <= maximumLevelDelta)) continue; 275 var p = descendant; 276 while (p.Parent != node && p.Parent != null) 277 p = p.Parent; 278 if (p.Parent == null) throw new Exception("The child is not a descendant of node"); 279 var geneticItem = new GeneticItem { 280 Ascendant = node, Descendant = descendant, LevelDelta = levelDelta, Index = node.IndexOfSubtree(p), 281 ParamA = new GeneticItemParameters { 282 Coeff = node.Coefficient(), TimeOffset = 0, VariableIndex = node.VariableIndex(), Variant = (Symbol)node.Symbol 283 }, 284 ParamB = new GeneticItemParameters { 285 Coeff = descendant.Coefficient(), TimeOffset = 0, VariableIndex = descendant.VariableIndex(), Variant = (Symbol)descendant.Symbol 286 } 287 }; 288 yield return geneticItem; 289 } 290 } 291 292 // returns true if both nodes are variables, or both are constants, or both are functions 293 private static bool SameType(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) { 294 if (a is VariableTreeNode) { 295 return b is VariableTreeNode; 296 } 297 if (a is ConstantTreeNode) { 298 return b is ConstantTreeNode; 299 } 300 return true; 301 } 157 302 } 158 303 }
Note: See TracChangeset
for help on using the changeset viewer.