#region License Information
/* HeuristicLab
* Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using HeuristicLab.Core;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
public class GeneticInformationItem {
private Type myAncestorDefinition;
public Type AncestorDefinition {
get { return myAncestorDefinition; }
}
private int myAncestorIndex;
public int AncestorIndex {
get { return myAncestorIndex; }
}
private Type myDescendantDefinition;
public Type DescendantDefinition {
get { return myDescendantDefinition; }
}
private int myAncestorLevel;
public int AncestorLevel {
get { return myAncestorLevel; }
}
private double myDescendantCoefficient;
public double DescendantCoefficient {
get { return myDescendantCoefficient; }
}
private double myDescendantVariableIndex;
public double DescendantVariableIndex {
get { return myDescendantVariableIndex; }
}
private int myDescendantTimeOffset;
public int DescendantTimeOffset {
get { return myDescendantTimeOffset; }
}
private SymbolicExpressionTreeNode myDescendantTreeNode;
public SymbolicExpressionTreeNode DescendantTreeNode {
get { return myDescendantTreeNode; }
}
private int myDescendantLevel;
public int DescendantLevel {
get { return myDescendantLevel; }
}
public int LevelDelta {
get { return (myDescendantLevel - myAncestorLevel); }
}
public GeneticInformationItem(SymbolicExpressionTreeNode parentNode, GeneticInformationItem descendantGeneticInformationItem,
int ancestorIndex, int parentNodeLevel) {
myAncestorIndex = ancestorIndex;
myAncestorLevel = parentNodeLevel;
myAncestorDefinition = parentNode.Symbol.GetType();
myDescendantCoefficient = descendantGeneticInformationItem.DescendantCoefficient;
myDescendantDefinition = descendantGeneticInformationItem.DescendantDefinition;
myDescendantTimeOffset = descendantGeneticInformationItem.DescendantTimeOffset;
myDescendantVariableIndex = descendantGeneticInformationItem.DescendantVariableIndex;
myDescendantLevel = descendantGeneticInformationItem.DescendantLevel;
myDescendantTreeNode = descendantGeneticInformationItem.DescendantTreeNode;
}
public static IList CopyList(IList geneticInformationItemsList) {
List list = new List(geneticInformationItemsList.Count);
list.AddRange(geneticInformationItemsList);
return list;
}
public static string GetKey(GeneticInformationItem item) {
return item.AncestorDefinition.Name.ToString() + "," + item.DescendantDefinition.Name.ToString();
}
public static IDictionary> GetDictionary(IList geneticInformationItemsList) {
IDictionary> dictionary = new Dictionary>();
foreach (GeneticInformationItem item in geneticInformationItemsList) {
string key = GetKey(item);
if (!dictionary.ContainsKey(key))
dictionary.Add(key, new List());
dictionary[key].Add(item);
}
return dictionary;
}
public static IDictionary> CopyDictionary(IDictionary> dictionary) {
IDictionary> copy = new Dictionary>();
foreach (KeyValuePair> pair in dictionary) {
copy.Add(pair.Key, CopyList(pair.Value));
}
return copy;
}
public static GeneticInformationItem FindBestPendant(GeneticInformationItem item, IList comparisonItems,
double constantMinimum, double constantMaximum, double variableWeightSigma,
int maximumTreeHeight, int minimumTimeOffset, int maximumTimeOffset,
double levelDifferenceCoefficient, double ancestorIndexCoefficient,
double constantValueCoefficient, double variableWeightCoefficient, double timeOffsetCoefficient, double variableIndexCoefficient,
bool additiveSimilarityCalculation,
out double bestPendantSimilarity) {
int maxSimilarityIndex = -1;
double similarity, maxSimilarity = -double.MaxValue;
for (int i = 0; i < comparisonItems.Count; i++) {
similarity = Similarity(item, comparisonItems[i], constantMinimum, constantMaximum, variableWeightSigma, maximumTreeHeight, minimumTimeOffset, maximumTimeOffset,
levelDifferenceCoefficient, ancestorIndexCoefficient, constantValueCoefficient, variableWeightSigma, timeOffsetCoefficient,
variableWeightCoefficient, additiveSimilarityCalculation);
if (!double.IsNaN(similarity) && similarity > maxSimilarity) {
maxSimilarity = similarity;
maxSimilarityIndex = i;
}
}
bestPendantSimilarity = maxSimilarity;
if (maxSimilarityIndex >= 0)
return comparisonItems[maxSimilarityIndex];
else
return null;
}
public static double Similarity(GeneticInformationItem item1, GeneticInformationItem item2,
double constantMinimum, double constantMaximum, double variableWeightSigma,
int maximumTreeHeight, int minimumTimeOffset, int maximumTimeOffset,
double levelDifferenceCoefficient, double ancestorIndexCoefficient,
double constantValueCoefficient, double variableWeightCoefficient, double timeOffsetCoefficient, double variableIndexCoefficient,
bool additiveSimilarityCalculation) {
if (item1.AncestorDefinition != item2.AncestorDefinition || item1.DescendantDefinition != item2.DescendantDefinition)
return double.NaN;
// the two items for sure have the same behavior definitions
#region initialize punishments
double punishmentContributionSum = 0;
double punishmentCoefficientsProduct = 1;
double ancestorIndexDifferencePunishment = 0;
double levelDifferencePunishment = 0;
double descendantConstantValueDifferencePunishment = 0;
double descendantVariableWeightDifferencePunishment = 0;
double descendantTimeOffsetDifferencePunishment = 0;
double descendantVariableIndexDifferencePunishment = 0;
#endregion
if (levelDifferenceCoefficient > 0) {
levelDifferencePunishment = item1.LevelDelta - item2.LevelDelta;
if (levelDifferencePunishment < 0)
levelDifferencePunishment = -levelDifferencePunishment;
levelDifferencePunishment /= maximumTreeHeight;
if (levelDifferencePunishment > 1)
levelDifferencePunishment = 1;
levelDifferencePunishment *= levelDifferenceCoefficient;
punishmentContributionSum += levelDifferenceCoefficient;
punishmentCoefficientsProduct *= (1 - levelDifferenceCoefficient);
}
if (ancestorIndexCoefficient > 0) {
if (item1.AncestorIndex != item2.AncestorIndex)
ancestorIndexDifferencePunishment = 1;
else
ancestorIndexDifferencePunishment = 0;
ancestorIndexDifferencePunishment *= ancestorIndexCoefficient;
punishmentContributionSum += ancestorIndexCoefficient;
punishmentCoefficientsProduct *= (1 - ancestorIndexCoefficient);
}
if (item1.DescendantTreeNode is ConstantTreeNode) {
if (constantValueCoefficient > 0) {
double constantValueCoefficientDifference = Math.Abs(item1.DescendantCoefficient - item2.DescendantCoefficient);
// assume uniform distribution within given minimum and maximum
descendantConstantValueDifferencePunishment = (constantValueCoefficientDifference / (constantMaximum - constantMinimum));
if (descendantConstantValueDifferencePunishment > 1)
descendantConstantValueDifferencePunishment = 1;
descendantConstantValueDifferencePunishment *= constantValueCoefficient;
punishmentContributionSum += constantValueCoefficient;
punishmentCoefficientsProduct *= (1 - constantValueCoefficient);
}
}
if (item1.DescendantTreeNode is VariableTreeNode) {
if (variableWeightCoefficient > 0) {
double variableWeightDifference = Math.Abs(item1.DescendantCoefficient - item2.DescendantCoefficient);
// assume normal distribution within given sigma
descendantVariableWeightDifferencePunishment = variableWeightDifference / (variableWeightSigma * 4);
if (descendantVariableWeightDifferencePunishment > 1)
descendantVariableWeightDifferencePunishment = 1;
descendantVariableWeightDifferencePunishment *= variableWeightCoefficient;
punishmentContributionSum += variableWeightCoefficient;
punishmentCoefficientsProduct *= (1 - variableWeightCoefficient);
}
if (timeOffsetCoefficient > 0) {
double timeOffsetDifference = Math.Abs(item1.DescendantTimeOffset - item2.DescendantTimeOffset);
if (maximumTimeOffset > 0)
descendantTimeOffsetDifferencePunishment = timeOffsetDifference / (maximumTimeOffset - minimumTimeOffset);
descendantTimeOffsetDifferencePunishment *= timeOffsetCoefficient;
punishmentContributionSum += timeOffsetCoefficient;
punishmentCoefficientsProduct *= (1 - timeOffsetCoefficient);
}
if (variableIndexCoefficient > 0) {
if (item1.DescendantVariableIndex != item2.DescendantVariableIndex)
descendantVariableIndexDifferencePunishment = 1;
else
descendantVariableIndexDifferencePunishment = 0;
descendantVariableIndexDifferencePunishment *= variableIndexCoefficient;
punishmentContributionSum += variableIndexCoefficient;
punishmentCoefficientsProduct *= (1 - variableIndexCoefficient);
}
}
double result;
if (additiveSimilarityCalculation) {
double punishmentsSum = ancestorIndexDifferencePunishment + levelDifferencePunishment +
descendantConstantValueDifferencePunishment + descendantVariableWeightDifferencePunishment +
descendantTimeOffsetDifferencePunishment + descendantVariableIndexDifferencePunishment;
result = (1 - punishmentsSum / punishmentContributionSum);
} else {
result =
(1 - ancestorIndexDifferencePunishment) *
(1 - levelDifferencePunishment) *
(1 - descendantConstantValueDifferencePunishment) *
(1 - descendantVariableWeightDifferencePunishment) *
(1 - descendantTimeOffsetDifferencePunishment) *
(1 - descendantVariableIndexDifferencePunishment);
// worst possible result is (1-punishmentCoefficientsProduct), so scale linearly to [0;1]:
result = (result - punishmentCoefficientsProduct) / (1 - punishmentCoefficientsProduct);
}
if (result < 0 || result > 1)
throw new InvalidOperationException("ERROR in GeneticInformationItem.Similarity: An invalid similarity value (" + result.ToString() + ") has been calculated.");
return result;
}
public static IList GetGeneticInformationItems(SymbolicExpressionTreeNode node, List variableNames,
int MinimumLevelDelta, int MaximumLevelDelta) {
// first we have to collect all items, then we filter; it is not possible to filter while collecting because the items are
// collected recursively and used for collecting the parents' items.
if (MinimumLevelDelta > MaximumLevelDelta)
return new List();
IList list = GetGeneticInformationItems(node, variableNames, 0);
List resultList = new List();
foreach (GeneticInformationItem item in list)
if (item.LevelDelta >= MinimumLevelDelta && item.LevelDelta <= MaximumLevelDelta)
resultList.Add(item);
return resultList;
}
private static IList GetGeneticInformationItems(SymbolicExpressionTreeNode node, List variableNames, int level) {
// Idea: collect all descendants' lists and then add new items using the retrieved ones.
// This should save lots of time and reduce complexity of the items retrieval process.
// Program roots are not considered, neither are start symbol nodes
if (node.Symbol is ProgramRootSymbol)
return GetGeneticInformationItems(node.SubTrees[0], variableNames, level + 1);
List list = new List();
// add item for this node:
if (!(node.Symbol is StartSymbol)) {
list.Add(new GeneticInformationItem(node, variableNames, level));
}
// add items for the descendants, but prevent multiple references to descendant nodes:
List descendantNodes = new List();
for (int i = 0; i < node.SubTrees.Count; i++) {
IList descendantItems = GetGeneticInformationItems(node.SubTrees[i], variableNames, level + 1);
list.AddRange(descendantItems);
if (!(node.Symbol is StartSymbol))
foreach (GeneticInformationItem item in descendantItems) {
if (!descendantNodes.Contains(item.DescendantTreeNode)) {
list.Add(new GeneticInformationItem(node, item, i, level));
descendantNodes.Add(item.DescendantTreeNode);
}
}
}
return list;
}
private GeneticInformationItem(SymbolicExpressionTreeNode node, List variableNames, int level) {
myAncestorIndex = -1;
VariableTreeNode variableTreeNode = node as VariableTreeNode;
LaggedVariableTreeNode laggedVariableTreeNode = node as LaggedVariableTreeNode;
ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
myAncestorDefinition = node.Symbol.GetType();
myDescendantDefinition = myAncestorDefinition;
if (variableTreeNode != null)
myDescendantCoefficient = variableTreeNode.Weight;
else if (constantTreeNode != null)
myDescendantCoefficient = constantTreeNode.Value;
else
myDescendantCoefficient = double.NaN;
if (laggedVariableTreeNode != null)
myDescendantTimeOffset = laggedVariableTreeNode.Lag;
else
myDescendantTimeOffset = 0;
if (variableTreeNode != null)
myDescendantVariableIndex = variableNames.IndexOf(variableTreeNode.VariableName);
else
myDescendantVariableIndex = -1;
myAncestorLevel = level;
myDescendantLevel = level;
myDescendantTreeNode = node;
}
public override string ToString() {
return "ancestor: " + AncestorDefinition.Name.ToString() + ", [" + AncestorIndex + "]; "
+ "descendant: " + DescendantDefinition.Name.ToString() + " (varIndex " + DescendantVariableIndex + ", "
+ DescendantCoefficient + ", t-" + DescendantTimeOffset + ");"
+ " level delta = " + DescendantLevel + "-" + AncestorLevel + " = " + LevelDelta;
}
}
}