#region License Information
/* HeuristicLab
* Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Symbols;
using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
///
/// Simplistic simplifier for arithmetic expressions
///
public class SymbolicSimplifier {
private Addition addSymbol = new Addition();
private Multiplication mulSymbol = new Multiplication();
private Division divSymbol = new Division();
private Constant constSymbol = new Constant();
private Variable varSymbol = new Variable();
public SymbolicExpressionTree Simplify(SymbolicExpressionTree originalTree) {
var clone = (SymbolicExpressionTreeNode)originalTree.Root.Clone();
// macro expand (initially no argument trees)
var macroExpandedTree = MacroExpand(clone, clone.SubTrees[0], new List());
return new SymbolicExpressionTree(GetSimplifiedTree(macroExpandedTree));
}
// the argumentTrees list contains already expanded trees used as arguments for invocations
private SymbolicExpressionTreeNode MacroExpand(SymbolicExpressionTreeNode root, SymbolicExpressionTreeNode node, IList argumentTrees) {
List subtrees = new List(node.SubTrees);
while (node.SubTrees.Count > 0) node.RemoveSubTree(0);
if (node.Symbol is InvokeFunction) {
var invokeSym = node.Symbol as InvokeFunction;
var defunNode = FindFunctionDefinition(root, invokeSym.FunctionName);
var macroExpandedArguments = new List();
foreach (var subtree in subtrees) {
macroExpandedArguments.Add(MacroExpand(root, subtree, argumentTrees));
}
return MacroExpand(root, defunNode, macroExpandedArguments);
} else if (node.Symbol is Argument) {
var argSym = node.Symbol as Argument;
// return the correct argument sub-tree (already macro-expanded)
return (SymbolicExpressionTreeNode)argumentTrees[argSym.ArgumentIndex].Clone();
} else if (node.Symbol is StartSymbol) {
return MacroExpand(root, subtrees[0], argumentTrees);
} else {
// recursive application
foreach (var subtree in subtrees) {
node.AddSubTree(MacroExpand(root, subtree, argumentTrees));
}
return node;
}
}
private SymbolicExpressionTreeNode FindFunctionDefinition(SymbolicExpressionTreeNode root, string functionName) {
foreach (var subtree in root.SubTrees.OfType()) {
if (subtree.FunctionName == functionName) return subtree.SubTrees[0];
}
throw new ArgumentException("Definition of function " + functionName + " not found.");
}
#region symbol predicates
private bool IsDivision(SymbolicExpressionTreeNode original) {
return original.Symbol is Division;
}
private bool IsMultiplication(SymbolicExpressionTreeNode original) {
return original.Symbol is Multiplication;
}
private bool IsSubtraction(SymbolicExpressionTreeNode original) {
return original.Symbol is Subtraction;
}
private bool IsAddition(SymbolicExpressionTreeNode original) {
return original.Symbol is Addition;
}
private bool IsVariable(SymbolicExpressionTreeNode original) {
return original.Symbol is Variable;
}
private bool IsConstant(SymbolicExpressionTreeNode original) {
return original.Symbol is Constant;
}
private bool IsAverage(SymbolicExpressionTreeNode original) {
return original.Symbol is Average;
}
private bool IsLog(SymbolicExpressionTreeNode original) {
return original.Symbol is Logarithm;
}
private bool IsIfThenElse(SymbolicExpressionTreeNode original) {
return original.Symbol is IfThenElse;
}
#endregion
///
/// Creates a new simplified tree
///
///
///
public SymbolicExpressionTreeNode GetSimplifiedTree(SymbolicExpressionTreeNode original) {
if (IsConstant(original) || IsVariable(original)) {
return (SymbolicExpressionTreeNode)original.Clone();
} else if (IsAddition(original)) {
return SimplifyAddition(original);
} else if (IsSubtraction(original)) {
return SimplifySubtraction(original);
} else if (IsMultiplication(original)) {
return SimplifyMultiplication(original);
} else if (IsDivision(original)) {
return SimplifyDivision(original);
} else if (IsAverage(original)) {
return SimplifyAverage(original);
} else if (IsLog(original)) {
// TODO simplify logarithm
return SimplifyAny(original);
} else if (IsIfThenElse(original)) {
// TODO simplify conditionals
return SimplifyAny(original);
} else if (IsAverage(original)) {
return SimplifyAverage(original);
} else {
return SimplifyAny(original);
}
}
#region specific simplification routines
private SymbolicExpressionTreeNode SimplifyAny(SymbolicExpressionTreeNode original) {
// can't simplify this function but simplify all subtrees
List subTrees = new List(original.SubTrees);
while (original.SubTrees.Count > 0) original.RemoveSubTree(0);
var clone = (SymbolicExpressionTreeNode)original.Clone();
List simplifiedSubTrees = new List();
foreach (var subTree in subTrees) {
simplifiedSubTrees.Add(GetSimplifiedTree(subTree));
original.AddSubTree(subTree);
}
foreach (var simplifiedSubtree in simplifiedSubTrees) {
clone.AddSubTree(simplifiedSubtree);
}
if (simplifiedSubTrees.TrueForAll(t => IsConstant(t))) {
SimplifyConstantExpression(clone);
}
return clone;
}
private SymbolicExpressionTreeNode SimplifyConstantExpression(SymbolicExpressionTreeNode original) {
// not yet implemented
return original;
}
private SymbolicExpressionTreeNode SimplifyAverage(SymbolicExpressionTreeNode original) {
if (original.SubTrees.Count == 1) {
return GetSimplifiedTree(original.SubTrees[0]);
} else {
// simplify expressions x0..xn
// make sum(x0..xn) / n
Trace.Assert(original.SubTrees.Count > 1);
var sum = original.SubTrees
.Select(x => GetSimplifiedTree(x))
.Aggregate((a, b) => MakeSum(a, b));
return MakeFraction(sum, MakeConstant(original.SubTrees.Count));
}
}
private SymbolicExpressionTreeNode SimplifyDivision(SymbolicExpressionTreeNode original) {
if (original.SubTrees.Count == 1) {
return Invert(GetSimplifiedTree(original.SubTrees[0]));
} else {
// simplify expressions x0..xn
// make multiplication (x0 * 1/(x1 * x1 * .. * xn))
Trace.Assert(original.SubTrees.Count > 1);
var simplifiedTrees = original.SubTrees.Select(x => GetSimplifiedTree(x));
return
MakeProduct(simplifiedTrees.First(), Invert(simplifiedTrees.Skip(1).Aggregate((a, b) => MakeProduct(a, b))));
}
}
private SymbolicExpressionTreeNode SimplifyMultiplication(SymbolicExpressionTreeNode original) {
if (original.SubTrees.Count == 1) {
return GetSimplifiedTree(original.SubTrees[0]);
} else {
Trace.Assert(original.SubTrees.Count > 1);
return original.SubTrees
.Select(x => GetSimplifiedTree(x))
.Aggregate((a, b) => MakeProduct(a, b));
}
}
private SymbolicExpressionTreeNode SimplifySubtraction(SymbolicExpressionTreeNode original) {
if (original.SubTrees.Count == 1) {
return Negate(GetSimplifiedTree(original.SubTrees[0]));
} else {
// simplify expressions x0..xn
// make addition (x0,-x1..-xn)
Trace.Assert(original.SubTrees.Count > 1);
var simplifiedTrees = original.SubTrees.Select(x => GetSimplifiedTree(x));
return simplifiedTrees.Take(1)
.Concat(simplifiedTrees.Skip(1).Select(x => Negate(x)))
.Aggregate((a, b) => MakeSum(a, b));
}
}
private SymbolicExpressionTreeNode SimplifyAddition(SymbolicExpressionTreeNode original) {
if (original.SubTrees.Count == 1) {
return GetSimplifiedTree(original.SubTrees[0]);
} else {
// simplify expression x0..xn
// make addition (x0..xn)
Trace.Assert(original.SubTrees.Count > 1);
return original.SubTrees
.Select(x => GetSimplifiedTree(x))
.Aggregate((a, b) => MakeSum(a, b));
}
}
#endregion
#region low level tree restructuring
// MakeFraction, MakeProduct and MakeSum take two already simplified trees and create a new simplified tree
private SymbolicExpressionTreeNode MakeFraction(SymbolicExpressionTreeNode a, SymbolicExpressionTreeNode b) {
if (IsConstant(a) && IsConstant(b)) {
// fold constants
return MakeConstant(((ConstantTreeNode)a).Value / ((ConstantTreeNode)b).Value);
} if (IsConstant(a) && !((ConstantTreeNode)a).Value.IsAlmost(1.0)) {
return MakeFraction(MakeConstant(1.0), MakeProduct(b, Invert(a)));
} else if (IsVariable(a) && IsConstant(b)) {
// merge constant values into variable weights
var constB = ((ConstantTreeNode)b).Value;
((VariableTreeNode)a).Weight /= constB;
return a;
} else if (IsAddition(a) && IsConstant(b)) {
return a.SubTrees
.Select(x => MakeFraction(x, b))
.Aggregate((c, d) => MakeSum(c, d));
} else if (IsMultiplication(a) && IsConstant(b)) {
return MakeProduct(a, Invert(b));
} else if (IsDivision(a) && IsConstant(b)) {
// (a1 / a2) / c => (a1 / (a2 * c))
Trace.Assert(a.SubTrees.Count == 2);
return MakeFraction(a.SubTrees[0], MakeProduct(a.SubTrees[1], b));
} else if (IsDivision(a) && IsDivision(b)) {
// (a1 / a2) / (b1 / b2) =>
Trace.Assert(a.SubTrees.Count == 2);
Trace.Assert(b.SubTrees.Count == 2);
return MakeFraction(MakeProduct(a.SubTrees[0], b.SubTrees[1]), MakeProduct(a.SubTrees[1], b.SubTrees[0]));
} else if (IsDivision(a)) {
// (a1 / a2) / b => (a1 / (a2 * b))
Trace.Assert(a.SubTrees.Count == 2);
return MakeFraction(a.SubTrees[0], MakeProduct(a.SubTrees[1], b));
} else if (IsDivision(b)) {
// a / (b1 / b2) => (a * b2) / b1
Trace.Assert(b.SubTrees.Count == 2);
return MakeFraction(MakeProduct(a, b.SubTrees[1]), b.SubTrees[0]);
} else {
var div = divSymbol.CreateTreeNode();
div.AddSubTree(a);
div.AddSubTree(b);
return div;
}
}
private SymbolicExpressionTreeNode MakeSum(SymbolicExpressionTreeNode a, SymbolicExpressionTreeNode b) {
if (IsConstant(a) && IsConstant(b)) {
// fold constants
((ConstantTreeNode)a).Value += ((ConstantTreeNode)b).Value;
return a;
} else if (IsConstant(a)) {
// c + x => x + c
// b is not constant => make sure constant is on the right
return MakeSum(b, a);
} else if (IsConstant(b) && ((ConstantTreeNode)b).Value.IsAlmost(0.0)) {
// x + 0 => x
return a;
} else if (IsAddition(a) && IsAddition(b)) {
// merge additions
var add = addSymbol.CreateTreeNode();
for (int i = 0; i < a.SubTrees.Count - 1; i++) add.AddSubTree(a.SubTrees[i]);
for (int i = 0; i < b.SubTrees.Count - 1; i++) add.AddSubTree(b.SubTrees[i]);
if (IsConstant(a.SubTrees.Last()) && IsConstant(b.SubTrees.Last())) {
add.AddSubTree(MakeSum(a.SubTrees.Last(), b.SubTrees.Last()));
} else if (IsConstant(a.SubTrees.Last())) {
add.AddSubTree(b.SubTrees.Last());
add.AddSubTree(a.SubTrees.Last());
} else {
add.AddSubTree(a.SubTrees.Last());
add.AddSubTree(b.SubTrees.Last());
}
MergeVariablesInSum(add);
return add;
} else if (IsAddition(b)) {
return MakeSum(b, a);
} else if (IsAddition(a) && IsConstant(b)) {
// a is an addition and b is a constant => append b to a and make sure the constants are merged
var add = addSymbol.CreateTreeNode();
for (int i = 0; i < a.SubTrees.Count - 1; i++) add.AddSubTree(a.SubTrees[i]);
if (IsConstant(a.SubTrees.Last()))
add.AddSubTree(MakeSum(a.SubTrees.Last(), b));
else {
add.AddSubTree(a.SubTrees.Last());
add.AddSubTree(b);
}
return add;
} else if (IsAddition(a)) {
// a is already an addition => append b
var add = addSymbol.CreateTreeNode();
add.AddSubTree(b);
foreach (var subTree in a.SubTrees) {
add.AddSubTree(subTree);
}
MergeVariablesInSum(add);
return add;
} else {
var add = addSymbol.CreateTreeNode();
add.AddSubTree(a);
add.AddSubTree(b);
MergeVariablesInSum(add);
return add;
}
}
// makes sure variable symbols in sums are combined
// possible improvment: combine sums of products where the products only reference the same variable
private void MergeVariablesInSum(SymbolicExpressionTreeNode sum) {
var subtrees = new List(sum.SubTrees);
while (sum.SubTrees.Count > 0) sum.RemoveSubTree(0);
var groupedVarNodes = from node in subtrees.OfType()
group node by node.VariableName into g
select g;
var unchangedSubTrees = subtrees.Where(t => !(t is VariableTreeNode));
foreach (var variableNodeGroup in groupedVarNodes) {
var weightSum = variableNodeGroup.Select(t => t.Weight).Sum();
var representative = variableNodeGroup.First();
representative.Weight = weightSum;
sum.AddSubTree(representative);
}
foreach (var unchangedSubtree in unchangedSubTrees)
sum.AddSubTree(unchangedSubtree);
}
private SymbolicExpressionTreeNode MakeProduct(SymbolicExpressionTreeNode a, SymbolicExpressionTreeNode b) {
if (IsConstant(a) && IsConstant(b)) {
// fold constants
((ConstantTreeNode)a).Value *= ((ConstantTreeNode)b).Value;
return a;
} else if (IsConstant(a)) {
// a * $ => $ * a
return MakeProduct(b, a);
} else if (IsConstant(b) && ((ConstantTreeNode)b).Value.IsAlmost(1.0)) {
// $ * 1.0 => $
return a;
} else if (IsConstant(b) && IsVariable(a)) {
// multiply constants into variables weights
((VariableTreeNode)a).Weight *= ((ConstantTreeNode)b).Value;
return a;
} else if (IsConstant(b) && IsAddition(a)) {
// multiply constants into additions
return a.SubTrees.Select(x => MakeProduct(x, b)).Aggregate((c, d) => MakeSum(c, d));
} else if (IsDivision(a) && IsDivision(b)) {
// (a1 / a2) * (b1 / b2) => (a1 * b1) / (a2 * b2)
Trace.Assert(a.SubTrees.Count == 2);
Trace.Assert(b.SubTrees.Count == 2);
return MakeFraction(MakeProduct(a.SubTrees[0], b.SubTrees[0]), MakeProduct(a.SubTrees[1], b.SubTrees[1]));
} else if (IsDivision(a)) {
// (a1 / a2) * b => (a1 * b) / a2
Trace.Assert(a.SubTrees.Count == 2);
return MakeFraction(MakeProduct(a.SubTrees[0], b), a.SubTrees[1]);
} else if (IsDivision(b)) {
// a * (b1 / b2) => (b1 * a) / b2
Trace.Assert(b.SubTrees.Count == 2);
return MakeFraction(MakeProduct(b.SubTrees[0], a), b.SubTrees[1]);
} else if (IsMultiplication(a) && IsMultiplication(b)) {
// merge multiplications (make sure constants are merged)
var mul = mulSymbol.CreateTreeNode();
for (int i = 0; i < a.SubTrees.Count; i++) mul.AddSubTree(a.SubTrees[i]);
for (int i = 0; i < b.SubTrees.Count; i++) mul.AddSubTree(b.SubTrees[i]);
MergeVariablesAndConstantsInProduct(mul);
return mul;
} else if (IsMultiplication(b)) {
return MakeProduct(b, a);
} else if (IsMultiplication(a)) {
// a is already an multiplication => append b
a.AddSubTree(b);
MergeVariablesAndConstantsInProduct(a);
return a;
} else {
var mul = mulSymbol.CreateTreeNode();
mul.SubTrees.Add(a);
mul.SubTrees.Add(b);
MergeVariablesAndConstantsInProduct(mul);
return mul;
}
}
#endregion
// helper to combine the constant factors in products and to combine variables (powers of 2, 3...)
private void MergeVariablesAndConstantsInProduct(SymbolicExpressionTreeNode prod) {
var subtrees = new List(prod.SubTrees);
while (prod.SubTrees.Count > 0) prod.RemoveSubTree(0);
var groupedVarNodes = from node in subtrees.OfType()
group node by node.VariableName into g
orderby g.Count()
select g;
var constantProduct = (from node in subtrees.OfType()
select node.Weight)
.Concat(from node in subtrees.OfType()
select node.Value)
.DefaultIfEmpty(1.0)
.Aggregate((c1, c2) => c1 * c2);
var unchangedSubTrees = from tree in subtrees
where !(tree is VariableTreeNode)
where !(tree is ConstantTreeNode)
select tree;
foreach (var variableNodeGroup in groupedVarNodes) {
var representative = variableNodeGroup.First();
representative.Weight = 1.0;
if (variableNodeGroup.Count() > 1) {
var poly = mulSymbol.CreateTreeNode();
for (int p = 0; p < variableNodeGroup.Count(); p++) {
poly.AddSubTree((SymbolicExpressionTreeNode)representative.Clone());
}
prod.AddSubTree(poly);
} else {
prod.AddSubTree(representative);
}
}
foreach (var unchangedSubtree in unchangedSubTrees)
prod.AddSubTree(unchangedSubtree);
if (!constantProduct.IsAlmost(1.0)) {
prod.AddSubTree(MakeConstant(constantProduct));
}
}
#region helper functions
///
/// x => x * -1
/// Doesn't create new trees and manipulates x
///
///
/// -x
private SymbolicExpressionTreeNode Negate(SymbolicExpressionTreeNode x) {
if (IsConstant(x)) {
((ConstantTreeNode)x).Value *= -1;
} else if (IsVariable(x)) {
var variableTree = (VariableTreeNode)x;
variableTree.Weight *= -1.0;
} else if (IsAddition(x)) {
// (x0 + x1 + .. + xn) * -1 => (-x0 + -x1 + .. + -xn)
foreach (var subTree in x.SubTrees) {
Negate(subTree);
}
} else if (IsMultiplication(x) || IsDivision(x)) {
// x0 * x1 * .. * xn * -1 => x0 * x1 * .. * -xn
Negate(x.SubTrees.Last()); // last is maybe a constant, prefer to negate the constant
} else {
// any other function
return MakeProduct(x, MakeConstant(-1));
}
return x;
}
///
/// x => 1/x
/// Doesn't create new trees and manipulates x
///
///
///
private SymbolicExpressionTreeNode Invert(SymbolicExpressionTreeNode x) {
if (IsConstant(x)) {
return MakeConstant(1.0 / ((ConstantTreeNode)x).Value);
} else if (IsDivision(x)) {
Trace.Assert(x.SubTrees.Count == 2);
return MakeFraction(x.SubTrees[1], x.SubTrees[0]);
} else {
// any other function
return MakeFraction(MakeConstant(1), x);
}
}
private SymbolicExpressionTreeNode MakeConstant(double value) {
ConstantTreeNode constantTreeNode = (ConstantTreeNode)(constSymbol.CreateTreeNode());
constantTreeNode.Value = value;
return (SymbolicExpressionTreeNode)constantTreeNode;
}
private SymbolicExpressionTreeNode MakeVariable(double weight, string name) {
var tree = (VariableTreeNode)varSymbol.CreateTreeNode();
tree.Weight = weight;
tree.VariableName = name;
return tree;
}
#endregion
}
}