#region License Information /* HeuristicLab * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Operators; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [StorableClass] public class SymbolicDataAnalysisExpressionTreeSimilarityCalculator : SingleSuccessorOperator { private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree"; private const string CurrentSymbolicExpressionTreeParameterName = "CurrentSymbolicExpressionTree"; private const string SimilarityValuesParmeterName = "Similarity"; // comparer parameters private const string MatchVariablesParameterName = "MatchVariableNames"; private const string MatchVariableWeightsParameterName = "MatchVariableWeights"; private const string MatchConstantValuesParameterName = "MatchConstantValues"; public IScopeTreeLookupParameter SymbolicExpressionTreeParameter { get { return (IScopeTreeLookupParameter)Parameters[SymbolicExpressionTreeParameterName]; } } public IValueParameter CurrentSymbolicExpressionTreeParameter { get { return (IValueParameter)Parameters[CurrentSymbolicExpressionTreeParameterName]; } } public ILookupParameter MatchVariableNamesParameter { get { return (ILookupParameter)Parameters[MatchVariablesParameterName]; } } public ILookupParameter MatchVariableWeightsParameter { get { return (ILookupParameter)Parameters[MatchVariableWeightsParameterName]; } } public ILookupParameter MatchConstantValuesParameter { get { return (ILookupParameter)Parameters[MatchConstantValuesParameterName]; } } public ILookupParameter SimilarityParameter { get { return (ILookupParameter)Parameters[SimilarityValuesParmeterName]; } } public ISymbolicExpressionTree CurrentSymbolicExpressionTree { get { return CurrentSymbolicExpressionTreeParameter.Value; } set { CurrentSymbolicExpressionTreeParameter.Value = value; } } public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; } protected SymbolicDataAnalysisExpressionTreeSimilarityCalculator(SymbolicDataAnalysisExpressionTreeSimilarityCalculator original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicDataAnalysisExpressionTreeSimilarityCalculator(this, cloner); } [StorableConstructor] protected SymbolicDataAnalysisExpressionTreeSimilarityCalculator(bool deserializing) : base(deserializing) { } public SymbolicDataAnalysisExpressionTreeSimilarityCalculator() : base() { Parameters.Add(new ScopeTreeLookupParameter(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze.")); Parameters.Add(new ValueParameter(CurrentSymbolicExpressionTreeParameterName, "")); Parameters.Add(new LookupParameter(MatchVariablesParameterName, "Specify if the symbolic expression tree comparer should match variable names.")); Parameters.Add(new LookupParameter(MatchVariableWeightsParameterName, "Specify if the symbolic expression tree comparer should match variable weights.")); Parameters.Add(new LookupParameter(MatchConstantValuesParameterName, "Specify if the symbolic expression tree comparer should match constant values.")); Parameters.Add(new LookupParameter(SimilarityValuesParmeterName, "")); } public override IOperation Apply() { var trees = SymbolicExpressionTreeParameter.ActualValue; bool found = false; double similarity = 0.0; var current = CurrentSymbolicExpressionTree; foreach (var tree in trees) { if (tree == current) { found = true; } if (!found) continue; similarity += SymbolicDataAnalysisExpressionTreeSimilarity.MaxCommonSubtreeSimilarity(current, tree, SimilarityComparer); } lock (SimilarityParameter.ActualValue) { SimilarityParameter.ActualValue.Value += similarity; } return base.Apply(); } } public static class SymbolicDataAnalysisExpressionTreeSimilarity { /// /// Returns a similarity value based on the size of the maximum common subtree according to the given equality comparison. /// /// /// /// /// Similarity degree between the two trees, scaled between [0,1], where 1 = similar, 0 = non-similar public static double MaxCommonSubtreeSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) { double max = 0; foreach (var aa in a.Root.GetSubtree(0).GetSubtree(0).IterateNodesBreadth()) { int lenA = aa.GetLength(); if (lenA <= max) continue; foreach (var bb in b.Root.GetSubtree(0).GetSubtree(0).IterateNodesBreadth()) { int lenB = bb.GetLength(); if (lenB <= max) continue; int matches = SymbolicExpressionTreeMatching.Match(aa, bb, comparer); if (max < matches) max = matches; } } return max / Math.Max(a.Length, b.Length); } private static double CalculateSimilarity(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) { return (double)SymbolicExpressionTreeMatching.Match(a, b, comp) / Math.Max(a.GetLength(), b.GetLength()); } public static double CalculateCompoundSimilarity(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SymbolicExpressionTreeNodeSimilarityComparer comparer) { var nA = a.Root.GetSubtree(0).GetSubtree(0); var nB = b.Root.GetSubtree(0).GetSubtree(0); var itemsA = nA.IterateNodesBreadth().Where(n => n.SubtreeCount > 0).Select(n => new MatchItem { Node = n, Matched = false }).ToArray(); var itemsB = nB.IterateNodesBreadth().Where(n => n.SubtreeCount > 0).Select(n => new MatchItem { Node = n, Matched = false }).ToArray(); double similaritySum = 0; foreach (var ia in itemsA) { foreach (var ib in itemsB) { similaritySum += CalculateSimilarity(ia.Node, ib.Node, comparer); } } return similaritySum / (itemsA.Length * itemsB.Length); } } class MatchItem { public ISymbolicExpressionTreeNode Node; public bool Matched; } }