#region License Information
/* HeuristicLab
* Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using HeuristicLab.Analysis;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Operators;
using HeuristicLab.Optimization;
using HeuristicLab.Optimization.Operators;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Problems.DataAnalysis.Symbolic;
using System.Collections.Generic;
using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
///
/// An operator that analyzes the population diversity using fine grained structural tree similarity estimation.
///
[Item("FineGrainedStructuralPopulationDiversityAnalyzer", "An operator that analyzes the population diversity using fine grained structural tree similarity estimation.")]
[StorableClass]
public sealed class FineGrainedStructuralPopulationDiversityAnalyzer : SymbolicRegressionPopulationDiversityAnalyzer {
// properties: min level delts, max level delta, etc.
private const string FunctionTreeGrammarParameterName = "FunctionTreeGrammar";
private const string MinimumLevelDeltaParameterName = "MinimumLevelDelta";
private const string MaximumLevelDeltaParameterName = "MaximumLevelDelta";
private const string PreventMultipleComparisonContributionParameterName = "PreventMultipleComparisonContribution";
public IValueLookupParameter FunctionTreeGrammarParameter {
get { return (IValueLookupParameter)Parameters[FunctionTreeGrammarParameterName]; }
}
public GlobalSymbolicExpressionGrammar FunctionTreeGrammar {
get { return FunctionTreeGrammarParameter.ActualValue; }
}
public IValueParameter MinimumLevelDeltaParameter {
get { return (IValueParameter)Parameters[MinimumLevelDeltaParameterName]; }
}
public int MinimumLevelDelta {
get { return MinimumLevelDeltaParameter.Value.Value; }
}
public IValueParameter MaximumLevelDeltaParameter {
get { return (IValueParameter)Parameters[MaximumLevelDeltaParameterName]; }
}
public int MaximumLevelDelta {
get { return MaximumLevelDeltaParameter.Value.Value; }
}
public IValueParameter PreventMultipleComparisonContributionParameter {
get { return (IValueParameter)Parameters[PreventMultipleComparisonContributionParameterName]; }
}
public bool PreventMultipleComparisonContribution {
get { return PreventMultipleComparisonContributionParameter.Value.Value; }
}
[StorableConstructor]
private FineGrainedStructuralPopulationDiversityAnalyzer(bool deserializing) : base(deserializing) { }
private FineGrainedStructuralPopulationDiversityAnalyzer(FineGrainedStructuralPopulationDiversityAnalyzer original, Cloner cloner) : base(original, cloner) { }
public FineGrainedStructuralPopulationDiversityAnalyzer() : base() {
Parameters.Add(new ValueLookupParameter(FunctionTreeGrammarParameterName, "The grammar that is used for symbolic regression models."));
Parameters.Add(new ValueParameter(MinimumLevelDeltaParameterName, "Minimum value for the level delta of the analyzed genetic information items.", new IntValue(0)));
Parameters.Add(new ValueParameter(MaximumLevelDeltaParameterName, "Maximum value for the level delta of the analyzed genetic information items.", new IntValue(int.MaxValue)));
Parameters.Add(new ValueParameter(PreventMultipleComparisonContributionParameterName, "Flag that denotes whether genetic information items are hindered from contributing to the similarity function multiple times.", new BoolValue(false)));
}
public override IDeepCloneable Clone(Cloner cloner) {
return new FineGrainedStructuralPopulationDiversityAnalyzer(this, cloner);
}
protected override double[,] CalculateSimilarities(SymbolicExpressionTree[] solutions) {
double variableWeightSigma = 0;
double constantMinimumValue = 0;
double constantMaximumValue = 0;
int minimumTimeOffset = 0;
int maximumTimeOffset = 0;
foreach (Symbol symbol in FunctionTreeGrammar.Symbols) {
Constant constant = symbol as Constant;
if (constant !=null) {
constantMinimumValue = constant.MinValue;
constantMaximumValue = constant.MaxValue;
}
DataAnalysis.Symbolic.Symbols.Variable variable = symbol as DataAnalysis.Symbolic.Symbols.Variable;
if (variable != null)
variableWeightSigma = variable.WeightSigma;
LaggedVariable laggedVariable = symbol as LaggedVariable;
if (laggedVariable !=null) {
minimumTimeOffset = laggedVariable.MinLag;
maximumTimeOffset = laggedVariable.MaxLag;
}
}
int n = solutions.Length;
List variableNames = new List();
foreach (StringValue variableName in ProblemData.InputVariables) {
variableNames.Add(variableName.Value);
}
variableNames.Add(ProblemData.TargetVariable.Value);
IList[] geneticInformationItemsLists = new List[n];
for (int i = 0; i < n; i++) {
geneticInformationItemsLists[i] = GeneticInformationItem.getGeneticInformationItems(solutions[i].Root, variableNames, MinimumLevelDelta, MaximumLevelDelta);
}
double[,] result = new double[n, n];
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (i == j)
result[i, j] = 1;
else {
IList solution1GeneticItems = GeneticInformationItem.CopyList(geneticInformationItemsLists[i]);
IList solution2GeneticItems = GeneticInformationItem.CopyList(geneticInformationItemsLists[j]);
double similarity = 0;
for (int k = 0; k < solution1GeneticItems.Count; k++) {
double bestPendantSimilarity;
GeneticInformationItem bestPendant = GeneticInformationItem.FindBestPendant(solution1GeneticItems[k], solution2GeneticItems,
constantMinimumValue, constantMaximumValue, variableWeightSigma,
/* TODO: */ 100, 10, 1, 1, 1, 1, 1, 1, true, out bestPendantSimilarity);
if (bestPendant != null) {
similarity += bestPendantSimilarity;
if (PreventMultipleComparisonContribution)
solution2GeneticItems.Remove(bestPendant);
}
}
result[i, j] = similarity / solution1GeneticItems.Count;
}
}
}
return result;
}
#region private class GeneticInformationItem
private class GeneticInformationItem {
private Type myAncestorDefinition;
public Type AncestorDefinition {
get { return myAncestorDefinition; }
}
private int myAncestorIndex;
public int AncestorIndex {
get { return myAncestorIndex; }
}
private Type myDescendantDefinition;
public Type DescendantDefinition {
get { return myDescendantDefinition; }
}
private int myAncestorLevel;
public int AncestorLevel {
get { return myAncestorLevel; }
}
private double myDescendantCoefficient;
public double DescendantCoefficient {
get { return myDescendantCoefficient; }
}
private double myDescendantVariableIndex;
public double DescendantVariableIndex {
get { return myDescendantVariableIndex; }
}
private int myDescendantTimeOffset;
public int DescendantTimeOffset {
get { return myDescendantTimeOffset; }
}
private SymbolicExpressionTreeNode myDescendantTreeNode;
public SymbolicExpressionTreeNode DescendantTreeNode {
get { return myDescendantTreeNode; }
}
private int myDescendantLevel;
public int DescendantLevel {
get { return myDescendantLevel; }
}
public int LevelDelta {
get { return (myDescendantLevel - myAncestorLevel); }
}
public static IList CopyList(IList GeneticInformationItemsList) {
List list = new List(GeneticInformationItemsList.Count);
list.AddRange(GeneticInformationItemsList);
return list;
}
public static GeneticInformationItem FindBestPendant(GeneticInformationItem Item, IList ComparisonItems,
double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
int MaximumTreeHeight, int MaximumTimeOffset,
double LevelDifferenceCoefficient, double AncestorIndexCoefficient,
double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient,
bool AdditiveSimilarityCalculation,
out double BestPendantSimilarity) {
int maxSimilarityIndex = -1;
double similarity, maxSimilarity = -double.MaxValue;
for (int i = 0; i < ComparisonItems.Count; i++) {
similarity = Similarity(Item, ComparisonItems[i], ConstantMinimum, ConstantMaximum, VariableWeightSigma, MaximumTreeHeight, MaximumTimeOffset,
LevelDifferenceCoefficient, AncestorIndexCoefficient, ConstantValueCoefficient, VariableWeightSigma, TimeOffsetCoefficient,
VariableWeightCoefficient, AdditiveSimilarityCalculation);
if (!double.IsNaN(similarity) && similarity > maxSimilarity) {
maxSimilarity = similarity;
maxSimilarityIndex = i;
}
}
BestPendantSimilarity = maxSimilarity;
if (maxSimilarityIndex >= 0)
return ComparisonItems[maxSimilarityIndex];
else
return null;
}
public static double Similarity(GeneticInformationItem Item1, GeneticInformationItem Item2,
double ConstantMinimum, double ConstantMaximum, double VariableWeightSigma,
int MaximumTreeHeight, int MaximumTimeOffset,
double LevelDifferenceCoefficient, double AncestorIndexCoefficient,
double ConstantValueCoefficient, double VariableWeightCoefficient, double TimeOffsetCoefficient, double VariableIndexCoefficient,
bool AdditiveSimilarityCalculation) {
if (Item1.AncestorDefinition != Item2.AncestorDefinition || Item1.DescendantDefinition != Item2.DescendantDefinition)
return double.NaN;
// the two items for sure have the same behavior definitions
#region initialize punishments
double punishmentContributionSum = 0;
double punishmentCoefficientsProduct = 1;
double ancestorIndexDifferencePunishment = 0;
double levelDifferencePunishment = 0;
double descendantConstantValueDifferencePunishment = 0;
double descendantVariableWeightDifferencePunishment = 0;
double descendantTimeOffsetDifferencePunishment = 0;
double descendantVariableIndexDifferencePunishment = 0;
#endregion
if (LevelDifferenceCoefficient > 0) {
levelDifferencePunishment = Item1.LevelDelta - Item2.LevelDelta;
if (levelDifferencePunishment < 0)
levelDifferencePunishment = -levelDifferencePunishment;
levelDifferencePunishment /= MaximumTreeHeight;
if (levelDifferencePunishment > 1)
levelDifferencePunishment = 1;
levelDifferencePunishment *= LevelDifferenceCoefficient;
punishmentContributionSum += LevelDifferenceCoefficient;
punishmentCoefficientsProduct *= (1 - LevelDifferenceCoefficient);
}
if (AncestorIndexCoefficient > 0) {
if (Item1.AncestorIndex != Item2.AncestorIndex)
ancestorIndexDifferencePunishment = 1;
else
ancestorIndexDifferencePunishment = 0;
ancestorIndexDifferencePunishment *= AncestorIndexCoefficient;
punishmentContributionSum += AncestorIndexCoefficient;
punishmentCoefficientsProduct *= (1 - AncestorIndexCoefficient);
}
if (Item1.DescendantTreeNode is ConstantTreeNode) {
if (ConstantValueCoefficient > 0) {
double constantValueCoefficientDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
// assume uniform distribution within given minimum and maximum
descendantConstantValueDifferencePunishment = (constantValueCoefficientDifference / (ConstantMaximum - ConstantMinimum));
if (descendantConstantValueDifferencePunishment > 1)
descendantConstantValueDifferencePunishment = 1;
descendantConstantValueDifferencePunishment *= ConstantValueCoefficient;
punishmentContributionSum += ConstantValueCoefficient;
punishmentCoefficientsProduct *= (1 - ConstantValueCoefficient);
}
}
if(Item1.DescendantTreeNode is VariableTreeNode) {
if (VariableWeightCoefficient > 0) {
double variableWeightDifference = Math.Abs(Item1.DescendantCoefficient - Item2.DescendantCoefficient);
// assume normal distribution within given sigma
descendantVariableWeightDifferencePunishment = variableWeightDifference / (VariableWeightSigma * 4);
if (descendantVariableWeightDifferencePunishment > 1)
descendantVariableWeightDifferencePunishment = 1;
descendantVariableWeightDifferencePunishment *= VariableWeightCoefficient;
punishmentContributionSum += VariableWeightCoefficient;
punishmentCoefficientsProduct *= (1 - VariableWeightCoefficient);
}
if (TimeOffsetCoefficient > 0) {
double timeOffsetDifference = Math.Abs(Item1.DescendantTimeOffset - Item2.DescendantTimeOffset);
if (MaximumTimeOffset > 0)
descendantTimeOffsetDifferencePunishment = timeOffsetDifference / MaximumTimeOffset;
descendantTimeOffsetDifferencePunishment *= TimeOffsetCoefficient;
punishmentContributionSum += TimeOffsetCoefficient;
punishmentCoefficientsProduct *= (1 - TimeOffsetCoefficient);
}
if (VariableIndexCoefficient > 0) {
if (Item1.DescendantVariableIndex != Item2.DescendantVariableIndex)
descendantVariableIndexDifferencePunishment = 1;
else
descendantVariableIndexDifferencePunishment = 0;
descendantVariableIndexDifferencePunishment *= VariableIndexCoefficient;
punishmentContributionSum += VariableIndexCoefficient;
punishmentCoefficientsProduct *= (1 - VariableIndexCoefficient);
}
}
double result;
if (AdditiveSimilarityCalculation) {
double punishmentsSum = ancestorIndexDifferencePunishment + levelDifferencePunishment +
descendantConstantValueDifferencePunishment + descendantVariableWeightDifferencePunishment +
descendantTimeOffsetDifferencePunishment + descendantVariableIndexDifferencePunishment;
result = (1 - punishmentsSum / punishmentContributionSum);
} else {
result =
(1 - ancestorIndexDifferencePunishment) *
(1 - levelDifferencePunishment) *
(1 - descendantConstantValueDifferencePunishment) *
(1 - descendantVariableWeightDifferencePunishment) *
(1 - descendantTimeOffsetDifferencePunishment) *
(1 - descendantVariableIndexDifferencePunishment);
// worst possible result is (1-punishmentCoefficientsProduct), so scale linearly to [0;1]:
result = (result - punishmentCoefficientsProduct) / (1 - punishmentCoefficientsProduct);
}
if (result < 0 || result > 1)
throw new InvalidOperationException("ERROR in GeneticInformationItem.Similarity: An invalid similarity value (" + result.ToString() + ") has been calculated.");
return result;
}
public static IList getGeneticInformationItems(SymbolicExpressionTreeNode node, List variableNames,
int MinimumLevelDelta, int MaximumLevelDelta) {
// first we have to collect all items, then we filter; it is not possible to filter while collecting because the items are
// collected recursively and used for collecting the parents' items.
if (MinimumLevelDelta > MaximumLevelDelta)
return new List();
IList list = getGeneticInformationItems(node, variableNames, 0);
List resultList = new List();
foreach (GeneticInformationItem item in list)
if (item.LevelDelta >= MinimumLevelDelta && item.LevelDelta <= MaximumLevelDelta)
resultList.Add(item);
return resultList;
}
private static IList getGeneticInformationItems(SymbolicExpressionTreeNode node, List variableNames, int level) {
// Idea: collect all descendants' lists and then add new items using the retrieved ones.
// This should save lots of time and reduce complexity of the items retrieval process.
// Program roots are not considered, neither are start symbol nodes
if (node.Symbol is ProgramRootSymbol)
return getGeneticInformationItems(node.SubTrees[0], variableNames, level + 1);
List list = new List();
// add item for this node:
if (!(node.Symbol is StartSymbol)) {
list.Add(new GeneticInformationItem(node, variableNames, level));
}
// add items for the descendants, but prevent multiple references to descendant nodes:
List descendantNodes = new List();
for (int i = 0; i < node.SubTrees.Count; i++) {
IList descendantItems = getGeneticInformationItems(node.SubTrees[i], variableNames, level + 1);
list.AddRange(descendantItems);
if (!(node.Symbol is StartSymbol))
foreach (GeneticInformationItem item in descendantItems) {
if (!descendantNodes.Contains(item.DescendantTreeNode)) {
list.Add(new GeneticInformationItem(node, item, i, level));
descendantNodes.Add(item.DescendantTreeNode);
}
}
}
return list;
}
private GeneticInformationItem (SymbolicExpressionTreeNode node, List variableNames, int level) {
myAncestorIndex = -1;
VariableTreeNode variableTreeNode = node as VariableTreeNode;
LaggedVariableTreeNode laggedVariableTreeNode = node as LaggedVariableTreeNode;
ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
myAncestorDefinition = node.Symbol.GetType();
myDescendantDefinition = myAncestorDefinition;
if (variableTreeNode != null)
myDescendantCoefficient = variableTreeNode.Weight;
else if (constantTreeNode != null)
myDescendantCoefficient = constantTreeNode.Value;
else
myDescendantCoefficient = double.NaN;
if (laggedVariableTreeNode != null)
myDescendantTimeOffset = laggedVariableTreeNode.Lag;
else
myDescendantTimeOffset = 0;
if (variableTreeNode != null)
myDescendantVariableIndex = variableNames.IndexOf(variableTreeNode.VariableName);
else
myDescendantVariableIndex = -1;
myAncestorLevel = level;
myDescendantLevel = level;
myDescendantTreeNode = node;
}
private GeneticInformationItem(SymbolicExpressionTreeNode parentNode, GeneticInformationItem descendantGeneticInformationItem,
int ancestorIndex, int parentNodeLevel) {
myAncestorIndex = ancestorIndex;
myAncestorLevel = parentNodeLevel;
myAncestorDefinition = parentNode.Symbol.GetType();
myDescendantCoefficient = descendantGeneticInformationItem.DescendantCoefficient;
myDescendantDefinition = descendantGeneticInformationItem.DescendantDefinition;
myDescendantTimeOffset = descendantGeneticInformationItem.DescendantTimeOffset;
myDescendantVariableIndex = descendantGeneticInformationItem.DescendantVariableIndex;
myDescendantLevel = descendantGeneticInformationItem.DescendantLevel;
myDescendantTreeNode = descendantGeneticInformationItem.DescendantTreeNode;
}
public override string ToString() {
return "ancestor: " + AncestorDefinition.Name.ToString() + ", [" + AncestorIndex + "]; "
+ "descendant: " + DescendantDefinition.Name.ToString() + " (varIndex " + DescendantVariableIndex + ", "
+ DescendantCoefficient + ", t-" + DescendantTimeOffset + ");"
+ " level delta = " + DescendantLevel + "-" + AncestorLevel + " = " + LevelDelta;
}
}
#endregion
}
}