#region License Information /* HeuristicLab * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using HeuristicLab.Analysis; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Operators; using HeuristicLab.Optimization; using HeuristicLab.Optimization.Operators; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis.Symbolic; using System.Collections.Generic; using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols; namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers { /// /// An operator that analyzes the population diversity using fine grained structural tree similarity estimation. /// [Item("FineGrainedStructuralPopulationDiversityAnalyzer", "An operator that analyzes the population diversity using fine grained structural tree similarity estimation.")] [StorableClass] public sealed class FineGrainedStructuralPopulationDiversityAnalyzer : SymbolicRegressionPopulationDiversityAnalyzer { // properties: min level delts, max level delta, etc. #region Properties and Parameters private const string FunctionTreeGrammarParameterName = "FunctionTreeGrammar"; private const string MinimumLevelDeltaParameterName = "MinimumLevelDelta"; private const string MaximumLevelDeltaParameterName = "MaximumLevelDelta"; private const string PreventMultipleComparisonContributionParameterName = "PreventMultipleComparisonContribution"; private const string MaximumExpressionDepthParameterName = "MaxExpressionDepth"; private const string LevelDifferenceCoefficientParameterName = "LevelDifferenceCoefficient"; private const string AncestorIndexCoefficientParameterName = "AncestorIndexCoefficient"; private const string ConstantValueCoefficientParameterName = "ConstantValueCoefficient"; private const string VariableWeightCoefficientParameterName = "VariableWeightCoefficient"; private const string TimeOffsetCoefficientParameterName = "TimeOffsetCoefficient"; private const string VariableIndexCoefficientParameterName = "VariableIndexCoefficient"; private const string AdditiveSimilarityCalculationParameterName = "AdditiveSimilarityCalculation"; public IValueLookupParameter FunctionTreeGrammarParameter { get { return (IValueLookupParameter)Parameters[FunctionTreeGrammarParameterName]; } } public GlobalSymbolicExpressionGrammar FunctionTreeGrammar { get { return FunctionTreeGrammarParameter.ActualValue; } } public IValueLookupParameter MaximumExpressionDepthParameter { get { return (IValueLookupParameter)Parameters[MaximumExpressionDepthParameterName]; } } public int MaximumExpressionDepth { get { return MaximumExpressionDepthParameter.ActualValue.Value; } } public IValueParameter MinimumLevelDeltaParameter { get { return (IValueParameter)Parameters[MinimumLevelDeltaParameterName]; } } public int MinimumLevelDelta { get { return MinimumLevelDeltaParameter.Value.Value; } } public IValueParameter MaximumLevelDeltaParameter { get { return (IValueParameter)Parameters[MaximumLevelDeltaParameterName]; } } public int MaximumLevelDelta { get { return MaximumLevelDeltaParameter.Value.Value; } } public IValueParameter PreventMultipleComparisonContributionParameter { get { return (IValueParameter)Parameters[PreventMultipleComparisonContributionParameterName]; } } public bool PreventMultipleComparisonContribution { get { return PreventMultipleComparisonContributionParameter.Value.Value; } } public IValueParameter LevelDifferenceCoefficientParameter { get { return (IValueParameter)Parameters[LevelDifferenceCoefficientParameterName]; } } public double LevelDifferenceCoefficient { get { return LevelDifferenceCoefficientParameter.Value.Value; } } public IValueParameter AncestorIndexCoefficientParameter { get { return (IValueParameter)Parameters[AncestorIndexCoefficientParameterName]; } } public double AncestorIndexCoefficient { get { return AncestorIndexCoefficientParameter.Value.Value; } } public IValueParameter ConstantValueCoefficientParameter { get { return (IValueParameter)Parameters[ConstantValueCoefficientParameterName]; } } public double ConstantValueCoefficient { get { return ConstantValueCoefficientParameter.Value.Value; } } public IValueParameter VariableWeightCoefficientParameter { get { return (IValueParameter)Parameters[VariableWeightCoefficientParameterName]; } } public double VariableWeightCoefficient { get { return VariableWeightCoefficientParameter.Value.Value; } } public IValueParameter TimeOffsetCoefficientParameter { get { return (IValueParameter)Parameters[TimeOffsetCoefficientParameterName]; } } public double TimeOffsetCoefficientCoefficient { get { return TimeOffsetCoefficientParameter.Value.Value; } } public IValueParameter VariableIndexCoefficientParameter { get { return (IValueParameter)Parameters[VariableIndexCoefficientParameterName]; } } public double VariableIndexCoefficient { get { return VariableIndexCoefficientParameter.Value.Value; } } public IValueParameter AdditiveSimilarityCalculationParameter { get { return (IValueParameter)Parameters[AdditiveSimilarityCalculationParameterName]; } } public bool AdditiveSimilarityCalculation { get { return AdditiveSimilarityCalculationParameter.Value.Value; } } #endregion [StorableConstructor] private FineGrainedStructuralPopulationDiversityAnalyzer(bool deserializing) : base(deserializing) { } private FineGrainedStructuralPopulationDiversityAnalyzer(FineGrainedStructuralPopulationDiversityAnalyzer original, Cloner cloner) : base(original, cloner) { } public FineGrainedStructuralPopulationDiversityAnalyzer() : base() { Parameters.Add(new ValueLookupParameter(FunctionTreeGrammarParameterName, "The grammar that is used for symbolic regression models.")); Parameters.Add(new ValueLookupParameter(MaximumExpressionDepthParameterName, "Maximal depth of the analyzed symbolic expressions.")); Parameters.Add(new ValueParameter(MinimumLevelDeltaParameterName, "Minimum value for the level delta of the analyzed genetic information items.", new IntValue(0))); Parameters.Add(new ValueParameter(MaximumLevelDeltaParameterName, "Maximum value for the level delta of the analyzed genetic information items.", new IntValue(int.MaxValue))); Parameters.Add(new ValueParameter(PreventMultipleComparisonContributionParameterName, "Flag that denotes whether genetic information items are hindered from contributing to the similarity function multiple times.", new BoolValue(false))); Parameters.Add(new ValueParameter(LevelDifferenceCoefficientParameterName, "Weighting coefficient for level differences.", new DoubleValue(0.2))); Parameters.Add(new ValueParameter(AncestorIndexCoefficientParameterName, "Weighting coefficient for ancestor index differences.", new DoubleValue(0.2))); Parameters.Add(new ValueParameter(ConstantValueCoefficientParameterName, "Weighting coefficient for constant value differences.", new DoubleValue(0.2))); Parameters.Add(new ValueParameter(VariableWeightCoefficientParameterName, "Weighting coefficient for variable weight differences.", new DoubleValue(0.2))); Parameters.Add(new ValueParameter(TimeOffsetCoefficientParameterName, "Weighting coefficient for time lag differences.", new DoubleValue(0.2))); Parameters.Add(new ValueParameter(VariableIndexCoefficientParameterName, "Weighting coefficient for variable index differences.", new DoubleValue(0.2))); Parameters.Add(new ValueParameter(AdditiveSimilarityCalculationParameterName, "Flag that denotes whether the similarity of genetic information items shall be calculated using additive calculation.", new BoolValue(true))); } public override IDeepCloneable Clone(Cloner cloner) { return new FineGrainedStructuralPopulationDiversityAnalyzer(this, cloner); } protected override double[,] CalculateSimilarities(SymbolicExpressionTree[] solutions) { // collect information stored int the problem's parameters double variableWeightSigma = 0; double constantMinimumValue = 0; double constantMaximumValue = 0; int minimumTimeOffset = 0; int maximumTimeOffset = 0; foreach (Symbol symbol in FunctionTreeGrammar.Symbols) { Constant constant = symbol as Constant; if (constant !=null) { constantMinimumValue = constant.MinValue; constantMaximumValue = constant.MaxValue; } DataAnalysis.Symbolic.Symbols.Variable variable = symbol as DataAnalysis.Symbolic.Symbols.Variable; if (variable != null) variableWeightSigma = variable.WeightSigma; LaggedVariable laggedVariable = symbol as LaggedVariable; if (laggedVariable !=null) { minimumTimeOffset = laggedVariable.MinLag; maximumTimeOffset = laggedVariable.MaxLag; } } int n = solutions.Length; List variableNames = new List(); foreach (StringValue variableName in ProblemData.InputVariables) { variableNames.Add(variableName.Value); } variableNames.Add(ProblemData.TargetVariable.Value); // collect genetic information item lists and store them also in dictionaries IList[] geneticInformationItemsLists = new List[n]; IDictionary>[] geneticInformationItemsListsDictionaries = new IDictionary>[n]; for (int i = 0; i < n; i++) { geneticInformationItemsLists[i] = GeneticInformationItem.getGeneticInformationItems(solutions[i].Root, variableNames, MinimumLevelDelta, MaximumLevelDelta); geneticInformationItemsListsDictionaries[i] = GeneticInformationItem.GetDictionary(geneticInformationItemsLists[i]); } // calculate solution similarities double[,] similarities = new double[n, n]; for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { if (i == j) similarities[i, j] = 1; else { IList solution1GeneticItems = geneticInformationItemsLists[i]; IDictionary> solution2GeneticItemsDictionary = GeneticInformationItem.CopyDictionary(geneticInformationItemsListsDictionaries[j]); double similarity = 0; for (int k = 0; k < solution1GeneticItems.Count; k++) { double bestPendantSimilarity = 0; GeneticInformationItem item = solution1GeneticItems[k]; GeneticInformationItem bestPendant = null; IList geneticInformationItemsList = null; string key = GeneticInformationItem.GetKey(item); if (solution2GeneticItemsDictionary.ContainsKey(key)) { geneticInformationItemsList = solution2GeneticItemsDictionary[GeneticInformationItem.GetKey(item)]; bestPendant = GeneticInformationItem.FindBestPendant(item, geneticInformationItemsList, constantMinimumValue, constantMaximumValue, variableWeightSigma, MaximumExpressionDepth, minimumTimeOffset, maximumTimeOffset, LevelDifferenceCoefficient, AncestorIndexCoefficient, ConstantValueCoefficient, VariableWeightCoefficient, TimeOffsetCoefficientCoefficient, VariableIndexCoefficient, AdditiveSimilarityCalculation, out bestPendantSimilarity); } if (bestPendant != null) { similarity += bestPendantSimilarity; if (PreventMultipleComparisonContribution) geneticInformationItemsList.Remove(bestPendant); } } similarities[i, j] = similarity / solution1GeneticItems.Count; } } } return similarities; } #region private class GeneticInformationItem private class GeneticInformationItem { private Type myAncestorDefinition; public Type AncestorDefinition { get { return myAncestorDefinition; } } private int myAncestorIndex; public int AncestorIndex { get { return myAncestorIndex; } } private Type myDescendantDefinition; public Type DescendantDefinition { get { return myDescendantDefinition; } } private int myAncestorLevel; public int AncestorLevel { get { return myAncestorLevel; } } private double myDescendantCoefficient; public double DescendantCoefficient { get { return myDescendantCoefficient; } } private double myDescendantVariableIndex; public double DescendantVariableIndex { get { return myDescendantVariableIndex; } } private int myDescendantTimeOffset; public int DescendantTimeOffset { get { return myDescendantTimeOffset; } } private SymbolicExpressionTreeNode myDescendantTreeNode; public SymbolicExpressionTreeNode DescendantTreeNode { get { return myDescendantTreeNode; } } private int myDescendantLevel; public int DescendantLevel { get { return myDescendantLevel; } } public int LevelDelta { get { return (myDescendantLevel - myAncestorLevel); } } public static IList CopyList(IList GeneticInformationItemsList) { List list = new List(GeneticInformationItemsList.Count); list.AddRange(GeneticInformationItemsList); return list; } public static string GetKey(GeneticInformationItem item) { return item.AncestorDefinition.Name.ToString() + "," + item.DescendantDefinition.Name.ToString(); } public static IDictionary> GetDictionary(IList GeneticInformationItemsList) { IDictionary> dictionary = new Dictionary>(); foreach (GeneticInformationItem item in GeneticInformationItemsList) { string key = GetKey(item); if (!dictionary.ContainsKey(key)) dictionary.Add(key, new List()); dictionary[key].Add(item); } return dictionary; } public static IDictionary> CopyDictionary(IDictionary> Dictionary) { IDictionary> copy = new Dictionary>(); foreach (KeyValuePair> pair in Dictionary) { copy.Add(pair.Key, CopyList(pair.Value)); } return copy; } public static GeneticInformationItem FindBestPendant(GeneticInformationItem Item, IList ComparisonItems, double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma, int MaximumTreeHeight, int MinimumTimeOffset, int MaximumTimeOffset, double LevelDifferenceCoefficient, double AncestorIndexCoefficient, double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient, bool AdditiveSimilarityCalculation, out double BestPendantSimilarity) { int maxSimilarityIndex = -1; double similarity, maxSimilarity = -double.MaxValue; for (int i = 0; i < ComparisonItems.Count; i++) { similarity = Similarity(Item, ComparisonItems[i], ConstantMinimum, ConstantMaximum, VariableWeightSigma, MaximumTreeHeight, MinimumTimeOffset, MaximumTimeOffset, LevelDifferenceCoefficient, AncestorIndexCoefficient, ConstantValueCoefficient, VariableWeightSigma, TimeOffsetCoefficient, VariableWeightCoefficient, AdditiveSimilarityCalculation); if (!double.IsNaN(similarity) && similarity > maxSimilarity) { maxSimilarity = similarity; maxSimilarityIndex = i; } } BestPendantSimilarity = maxSimilarity; if (maxSimilarityIndex >= 0) return ComparisonItems[maxSimilarityIndex]; else return null; } public static double Similarity(GeneticInformationItem Item1, GeneticInformationItem Item2, double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma, int MaximumTreeHeight, int MinimumTimeOffset, int MaximumTimeOffset, double LevelDifferenceCoefficient, double AncestorIndexCoefficient, double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient, bool AdditiveSimilarityCalculation) { if (Item1.AncestorDefinition != Item2.AncestorDefinition || Item1.DescendantDefinition != Item2.DescendantDefinition) return double.NaN; // the two items for sure have the same behavior definitions #region initialize punishments double punishmentContributionSum = 0; double punishmentCoefficientsProduct = 1; double ancestorIndexDifferencePunishment = 0; double levelDifferencePunishment = 0; double descendantConstantValueDifferencePunishment = 0; double descendantVariableWeightDifferencePunishment = 0; double descendantTimeOffsetDifferencePunishment = 0; double descendantVariableIndexDifferencePunishment = 0; #endregion if (LevelDifferenceCoefficient > 0) { levelDifferencePunishment = Item1.LevelDelta - Item2.LevelDelta; if (levelDifferencePunishment < 0) levelDifferencePunishment = -levelDifferencePunishment; levelDifferencePunishment /= MaximumTreeHeight; if (levelDifferencePunishment > 1) levelDifferencePunishment = 1; levelDifferencePunishment *= LevelDifferenceCoefficient; punishmentContributionSum += LevelDifferenceCoefficient; punishmentCoefficientsProduct *= (1 - LevelDifferenceCoefficient); } if (AncestorIndexCoefficient > 0) { if (Item1.AncestorIndex != Item2.AncestorIndex) ancestorIndexDifferencePunishment = 1; else ancestorIndexDifferencePunishment = 0; ancestorIndexDifferencePunishment *= AncestorIndexCoefficient; punishmentContributionSum += AncestorIndexCoefficient; punishmentCoefficientsProduct *= (1 - AncestorIndexCoefficient); } if (Item1.DescendantTreeNode is ConstantTreeNode) { if (ConstantValueCoefficient > 0) { double constantValueCoefficientDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient); // assume uniform distribution within given minimum and maximum descendantConstantValueDifferencePunishment = (constantValueCoefficientDifference / (ConstantMaximum - ConstantMinimum)); if (descendantConstantValueDifferencePunishment > 1) descendantConstantValueDifferencePunishment = 1; descendantConstantValueDifferencePunishment *= ConstantValueCoefficient; punishmentContributionSum += ConstantValueCoefficient; punishmentCoefficientsProduct *= (1 - ConstantValueCoefficient); } } if(Item1.DescendantTreeNode is VariableTreeNode) { if (VariableWeightCoefficient > 0) { double variableWeightDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient); // assume normal distribution within given sigma descendantVariableWeightDifferencePunishment = variableWeightDifference / (VariableWeightSigma * 4); if (descendantVariableWeightDifferencePunishment > 1) descendantVariableWeightDifferencePunishment = 1; descendantVariableWeightDifferencePunishment *= VariableWeightCoefficient; punishmentContributionSum += VariableWeightCoefficient; punishmentCoefficientsProduct *= (1 - VariableWeightCoefficient); } if (TimeOffsetCoefficient > 0) { double timeOffsetDifference = Math.Abs(Item1.DescendantTimeOffset - Item2.DescendantTimeOffset); if (MaximumTimeOffset > 0) descendantTimeOffsetDifferencePunishment = timeOffsetDifference / (MaximumTimeOffset - MinimumTimeOffset); descendantTimeOffsetDifferencePunishment *= TimeOffsetCoefficient; punishmentContributionSum += TimeOffsetCoefficient; punishmentCoefficientsProduct *= (1 - TimeOffsetCoefficient); } if (VariableIndexCoefficient > 0) { if (Item1.DescendantVariableIndex != Item2.DescendantVariableIndex) descendantVariableIndexDifferencePunishment = 1; else descendantVariableIndexDifferencePunishment = 0; descendantVariableIndexDifferencePunishment *= VariableIndexCoefficient; punishmentContributionSum += VariableIndexCoefficient; punishmentCoefficientsProduct *= (1 - VariableIndexCoefficient); } } double result; if (AdditiveSimilarityCalculation) { double punishmentsSum = ancestorIndexDifferencePunishment + levelDifferencePunishment + descendantConstantValueDifferencePunishment + descendantVariableWeightDifferencePunishment + descendantTimeOffsetDifferencePunishment + descendantVariableIndexDifferencePunishment; result = (1 - punishmentsSum / punishmentContributionSum); } else { result = (1 - ancestorIndexDifferencePunishment) * (1 - levelDifferencePunishment) * (1 - descendantConstantValueDifferencePunishment) * (1 - descendantVariableWeightDifferencePunishment) * (1 - descendantTimeOffsetDifferencePunishment) * (1 - descendantVariableIndexDifferencePunishment); // worst possible result is (1-punishmentCoefficientsProduct), so scale linearly to [0;1]: result = (result - punishmentCoefficientsProduct) / (1 - punishmentCoefficientsProduct); } if (result < 0 || result > 1) throw new InvalidOperationException("ERROR in GeneticInformationItem.Similarity: An invalid similarity value (" + result.ToString() + ") has been calculated."); return result; } public static IList getGeneticInformationItems(SymbolicExpressionTreeNode node, List variableNames, int MinimumLevelDelta, int MaximumLevelDelta) { // first we have to collect all items, then we filter; it is not possible to filter while collecting because the items are // collected recursively and used for collecting the parents' items. if (MinimumLevelDelta > MaximumLevelDelta) return new List(); IList list = getGeneticInformationItems(node, variableNames, 0); List resultList = new List(); foreach (GeneticInformationItem item in list) if (item.LevelDelta >= MinimumLevelDelta && item.LevelDelta <= MaximumLevelDelta) resultList.Add(item); return resultList; } private static IList getGeneticInformationItems(SymbolicExpressionTreeNode node, List variableNames, int level) { // Idea: collect all descendants' lists and then add new items using the retrieved ones. // This should save lots of time and reduce complexity of the items retrieval process. // Program roots are not considered, neither are start symbol nodes if (node.Symbol is ProgramRootSymbol) return getGeneticInformationItems(node.SubTrees[0], variableNames, level + 1); List list = new List(); // add item for this node: if (!(node.Symbol is StartSymbol)) { list.Add(new GeneticInformationItem(node, variableNames, level)); } // add items for the descendants, but prevent multiple references to descendant nodes: List descendantNodes = new List(); for (int i = 0; i < node.SubTrees.Count; i++) { IList descendantItems = getGeneticInformationItems(node.SubTrees[i], variableNames, level + 1); list.AddRange(descendantItems); if (!(node.Symbol is StartSymbol)) foreach (GeneticInformationItem item in descendantItems) { if (!descendantNodes.Contains(item.DescendantTreeNode)) { list.Add(new GeneticInformationItem(node, item, i, level)); descendantNodes.Add(item.DescendantTreeNode); } } } return list; } private GeneticInformationItem (SymbolicExpressionTreeNode node, List variableNames, int level) { myAncestorIndex = -1; VariableTreeNode variableTreeNode = node as VariableTreeNode; LaggedVariableTreeNode laggedVariableTreeNode = node as LaggedVariableTreeNode; ConstantTreeNode constantTreeNode = node as ConstantTreeNode; myAncestorDefinition = node.Symbol.GetType(); myDescendantDefinition = myAncestorDefinition; if (variableTreeNode != null) myDescendantCoefficient = variableTreeNode.Weight; else if (constantTreeNode != null) myDescendantCoefficient = constantTreeNode.Value; else myDescendantCoefficient = double.NaN; if (laggedVariableTreeNode != null) myDescendantTimeOffset = laggedVariableTreeNode.Lag; else myDescendantTimeOffset = 0; if (variableTreeNode != null) myDescendantVariableIndex = variableNames.IndexOf(variableTreeNode.VariableName); else myDescendantVariableIndex = -1; myAncestorLevel = level; myDescendantLevel = level; myDescendantTreeNode = node; } private GeneticInformationItem(SymbolicExpressionTreeNode parentNode, GeneticInformationItem descendantGeneticInformationItem, int ancestorIndex, int parentNodeLevel) { myAncestorIndex = ancestorIndex; myAncestorLevel = parentNodeLevel; myAncestorDefinition = parentNode.Symbol.GetType(); myDescendantCoefficient = descendantGeneticInformationItem.DescendantCoefficient; myDescendantDefinition = descendantGeneticInformationItem.DescendantDefinition; myDescendantTimeOffset = descendantGeneticInformationItem.DescendantTimeOffset; myDescendantVariableIndex = descendantGeneticInformationItem.DescendantVariableIndex; myDescendantLevel = descendantGeneticInformationItem.DescendantLevel; myDescendantTreeNode = descendantGeneticInformationItem.DescendantTreeNode; } public override string ToString() { return "ancestor: " + AncestorDefinition.Name.ToString() + ", [" + AncestorIndex + "]; " + "descendant: " + DescendantDefinition.Name.ToString() + " (varIndex " + DescendantVariableIndex + ", " + DescendantCoefficient + ", t-" + DescendantTimeOffset + ");" + " level delta = " + DescendantLevel + "-" + AncestorLevel + " = " + LevelDelta; } } #endregion } }