#region License Information /* HeuristicLab * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Operators; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [StorableClass] public class SymbolicDataAnalysisExpressionTreeSimilarityCalculator : SingleSuccessorOperator { private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; private const string CurrentSymbolicExpressionTreeParameterName = "CurrentSymbolicExpressionTree"; private const string SimilarityValuesParmeterName = "Similarity"; // comparer parameters private const string MatchVariablesParameterName = "MatchVariableNames"; private const string MatchVariableWeightsParameterName = "MatchVariableWeights"; private const string MatchConstantValuesParameterName = "MatchConstantValues"; public IScopeTreeLookupParameter SymbolicExpressionTreeParameter { get { return (IScopeTreeLookupParameter)Parameters[SymbolicExpressionTreeParameterName]; } } public IValueParameter CurrentSymbolicExpressionTreeParameter { get { return (IValueParameter)Parameters[CurrentSymbolicExpressionTreeParameterName]; } } public ILookupParameter MatchVariableNamesParameter { get { return (ILookupParameter)Parameters[MatchVariablesParameterName]; } } public ILookupParameter MatchVariableWeightsParameter { get { return (ILookupParameter)Parameters[MatchVariableWeightsParameterName]; } } public ILookupParameter MatchConstantValuesParameter { get { return (ILookupParameter)Parameters[MatchConstantValuesParameterName]; } } public ILookupParameter SimilarityParameter { get { return (ILookupParameter)Parameters[SimilarityValuesParmeterName]; } } public ISymbolicExpressionTree CurrentSymbolicExpressionTree { get { return CurrentSymbolicExpressionTreeParameter.Value; } set { CurrentSymbolicExpressionTreeParameter.Value = value; } } public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; } public Dictionary GeneticItems; public int MaximumTreeDepth { get; set; } protected SymbolicDataAnalysisExpressionTreeSimilarityCalculator(SymbolicDataAnalysisExpressionTreeSimilarityCalculator original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicDataAnalysisExpressionTreeSimilarityCalculator(this, cloner); } [StorableConstructor] protected SymbolicDataAnalysisExpressionTreeSimilarityCalculator(bool deserializing) : base(deserializing) { } public SymbolicDataAnalysisExpressionTreeSimilarityCalculator() : base() { Parameters.Add(new ScopeTreeLookupParameter(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze.")); Parameters.Add(new ValueParameter(CurrentSymbolicExpressionTreeParameterName, "")); Parameters.Add(new LookupParameter(MatchVariablesParameterName, "Specify if the symbolic expression tree comparer should match variable names.")); Parameters.Add(new LookupParameter(MatchVariableWeightsParameterName, "Specify if the symbolic expression tree comparer should match variable weights.")); Parameters.Add(new LookupParameter(MatchConstantValuesParameterName, "Specify if the symbolic expression tree comparer should match constant values.")); Parameters.Add(new LookupParameter(SimilarityValuesParmeterName, "")); } public override IOperation Apply() { var trees = SymbolicExpressionTreeParameter.ActualValue; double similarity = 0.0; var current = CurrentSymbolicExpressionTree; bool found = false; foreach (var tree in trees) { if (tree == current) { found = true; continue; } if (found) { similarity += SymbolicDataAnalysisExpressionTreeSimilarity.MaxCommonSubtreeSimilarity(current, tree, SimilarityComparer); // similarity += SymbolicDataAnalysisExpressionTreeSimilarity.GeneticItemSimilarity(GeneticItems[current], GeneticItems[tree], MaximumTreeDepth); } } lock (SimilarityParameter.ActualValue) { SimilarityParameter.ActualValue.Value += similarity; } return base.Apply(); } } public static class SymbolicDataAnalysisExpressionTreeSimilarity { public static double CalculateSimilarity(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) { return 2.0 * SymbolicExpressionTreeMatching.Match(a, b, comp) / (a.GetLength() + b.GetLength()); } public static double MaxCommonSubtreeSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) { double max = 0; var rootA = a.Root.GetSubtree(0).GetSubtree(0); var rootB = b.Root.GetSubtree(0).GetSubtree(0); foreach (var aa in rootA.IterateNodesBreadth()) { int lenA = aa.GetLength(); if (lenA <= max) continue; foreach (var bb in rootB.IterateNodesBreadth()) { int lenB = bb.GetLength(); if (lenB <= max) continue; int matches = SymbolicExpressionTreeMatching.Match(aa, bb, comparer); if (max < matches) max = matches; } } return 2.0 * max / (rootA.GetLength() + rootB.GetLength()); } public static double GeneticItemSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, int maximumTreeHeight, bool preventMultipleContribution = true) { const int minLevelDelta = 1; const int maxLevelDelta = 4; var itemsA = a.GetGeneticItems(minLevelDelta, maxLevelDelta).ToArray(); var itemsB = b.GetGeneticItems(minLevelDelta, maxLevelDelta).ToArray(); return GeneticItemSimilarity(itemsA, itemsB, maximumTreeHeight); } public static double GeneticItemSimilarity(GeneticItem[] itemsA, GeneticItem[] itemsB, int maximumTreeHeight, bool preventMultipleContribution = true) { double similarity = 0.0; if (itemsA.Length == 0 || itemsB.Length == 0) return similarity; var flagsB = new bool[itemsB.Length]; for (int i = 0; i != itemsA.Length; ++i) { double simMax = 0.0; int index = -1; for (int j = 0; j != itemsB.Length; ++j) { if (flagsB[j]) continue; double sim = StructuralSimilarity(itemsA[i], itemsB[j], maximumTreeHeight); if (sim > simMax) { simMax = sim; index = j; } if (preventMultipleContribution && index > -1) { flagsB[index] = true; } } similarity += simMax; } return similarity / itemsA.Length; } public static double AdditiveSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) { var nA = a.Root.GetSubtree(0).GetSubtree(0); var nB = b.Root.GetSubtree(0).GetSubtree(0); var nodesA = nA.IterateNodesBreadth().ToArray(); var nodesB = nB.IterateNodesBreadth().ToArray(); var similarities = nodesA.SelectMany(ia => nodesB, (ia, ib) => CalculateSimilarity(ia, ib, comparer)).Where(s => !s.IsAlmost(0.0)).ToList(); double average = similarities.Count > 0 ? similarities.Average() : 0; if (average > 1.0) throw new Exception("Similarity average should be less than 1.0"); if (average < 0.0) throw new Exception("Similarity average should be greater than 0.0"); return average; } private static double StructuralSimilarity(GeneticItem g1, GeneticItem g2, int heightMax) { if (!(SameType(g1.Ascendant, g2.Ascendant) && SameType(g1.Descendant, g2.Descendant))) return 0.0; double s1 = 1.0 - Math.Abs(g1.LevelDelta - g2.LevelDelta) / heightMax; double s2 = g1.Index == g2.Index ? 1.0 : 0.0; double s3 = g1.ParamA.Variant.Name.Equals(g2.ParamA.Variant.Name) ? 1.0 : 0.0; double s4 = g1.ParamB.Variant.Name.Equals(g2.ParamB.Variant.Name) ? 1.0 : 0.0; double deltaCa = Math.Abs(g1.ParamA.Coeff - g2.ParamA.Coeff); double deltaCb = Math.Abs(g1.ParamB.Coeff - g2.ParamB.Coeff); double s5 = 0.0; double s6 = 0.0; // no time offsets so we hardcode s7 = s8 = 0.0 double s7 = 0.0; double s8 = 0.0; // variable indexes double s9 = 0.0; double s10 = 0.0; // same type with g2.Ascendant so we only do one check if (g1.Ascendant is VariableTreeNode) { s5 = deltaCa / (((Variable)g1.Ascendant.Symbol).WeightManipulatorSigma * 4); s9 = g1.ParamA.VariableIndex.Equals(g2.ParamA.VariableIndex) ? 1.0 : 0.0; } if (g1.Descendant is VariableTreeNode) { s6 = deltaCb / (((Variable)g1.Descendant.Symbol).WeightManipulatorSigma * 4); s10 = g1.ParamB.VariableIndex.Equals(g2.ParamB.VariableIndex) ? 1.0 : 0.0; } double similarity = 1.0; double[] constributors = new double[10] { s1, s2, s3, s4, s5, s6, s7, s8, s9, s10 }; // s1...s10 double[] coefficients = new double[10] { 0.8, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2 }; // c1...c10 for (int i = 0; i != 10; ++i) { similarity *= (1 - (1 - constributors[i]) * coefficients[i]); } return double.IsNaN(similarity) ? 0 : similarity; } // genetic items for computing tree similarity (S. Winkler) public class GeneticItem { public ISymbolicExpressionTreeNode Ascendant; public ISymbolicExpressionTreeNode Descendant; public int LevelDelta; public int Index; public double[] Coefficients; // c_i = 0.2, i=1,...,10, d_1 = 0.8 // parameters for the Ascendant and Descendant public GeneticItemParameters ParamA; public GeneticItemParameters ParamB; } public class GeneticItemParameters { public Symbol Variant; // the variant of functions public double Coeff; // the coefficient of terminals public int TimeOffset; // the time offset of terminals public int VariableIndex; // the variable index (of terminals) } // get genetic items public static List GetGeneticItems(this ISymbolicExpressionTree tree, int minLevelDelta, int maxLevelDelta) { return GetGeneticItems(tree.Root.GetSubtree(0).GetSubtree(0), minLevelDelta, maxLevelDelta).ToList(); } private static double Coefficient(this ISymbolicExpressionTreeNode node) { var variable = node as VariableTreeNode; if (variable != null) return variable.Weight; var constant = node as ConstantTreeNode; if (constant != null) return constant.Value; return 0.0; } private static int VariableIndex(this ISymbolicExpressionTreeNode node) { var variable = node as VariableTreeNode; if (variable != null) return variable.Symbol.AllVariableNames.ToList().IndexOf(variable.VariableName); return -1; } private static IEnumerable GetGeneticItems(ISymbolicExpressionTreeNode node, int minimumLevelDelta, int maximumLevelDelta) { var descendants = node.IterateNodesBreadth().Skip(1).ToArray(); for (int i = 0; i != descendants.Length; ++i) { var descendant = descendants[i]; var levelDelta = node.GetBranchLevel(descendant); if (!(minimumLevelDelta <= levelDelta && levelDelta <= maximumLevelDelta)) continue; var p = descendant; while (p.Parent != node && p.Parent != null) p = p.Parent; if (p.Parent == null) throw new Exception("The child is not a descendant of node"); var geneticItem = new GeneticItem { Ascendant = node, Descendant = descendant, LevelDelta = levelDelta, Index = node.IndexOfSubtree(p), ParamA = new GeneticItemParameters { Coeff = node.Coefficient(), TimeOffset = 0, VariableIndex = node.VariableIndex(), Variant = (Symbol)node.Symbol }, ParamB = new GeneticItemParameters { Coeff = descendant.Coefficient(), TimeOffset = 0, VariableIndex = descendant.VariableIndex(), Variant = (Symbol)descendant.Symbol } }; yield return geneticItem; } } // returns true if both nodes are variables, or both are constants, or both are functions private static bool SameType(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) { if (a is VariableTreeNode) { return b is VariableTreeNode; } if (a is ConstantTreeNode) { return b is ConstantTreeNode; } return true; } } }