#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; // code for hashing of expressions based on symbolic analysis of monomials and polynomials. // slow, but easy to implement correctly. namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression { internal enum UnaryFunctionType { Log, Exp, Inv }; internal interface Factor { } internal class SymbolFactor : Factor { internal char symbolId; public SymbolFactor(char symbolId) { this.symbolId = symbolId; } public override int GetHashCode() { return symbolId.GetHashCode(); } public override bool Equals(object obj) { SymbolFactor other = obj as SymbolFactor; if (other == null) return false; else return other.symbolId == this.symbolId; } } internal class FunctionFactor : Factor { internal UnaryFunctionType functionType; internal Polynomial argument; public FunctionFactor(UnaryFunctionType functionType, Polynomial argument) { this.functionType = functionType; this.argument = argument; } public override int GetHashCode() { var h = functionType.GetHashCode(); return ((h<<5)+h) ^ argument.GetHashCode(); } public override bool Equals(object obj) { FunctionFactor other = obj as FunctionFactor; if (other == null) return false; return other.functionType == this.functionType && other.argument == this.argument; } } internal class Monomial { internal List factors = new List(); public Monomial(params Factor[] factor) { foreach (var f in factor) factors.Add(f); } public override int GetHashCode() { return factors.OrderBy(ti => ti.GetHashCode()).Aggregate(0, (a, ti) => ((a << 5) + a) ^ ti.GetHashCode()); } public override bool Equals(object obj) { Monomial other = obj as Monomial; if (other == null) return false; if (other.factors.Count != this.factors.Count) return false; return factors.All(ti => other.factors.Contains(ti)) && other.factors.All(ti => factors.Contains(ti)); } public static Monomial Product(Monomial a, Monomial b) { var p = new Monomial(); var invFactorArg = new Polynomial(); var expFactorArg = new Polynomial(); var expFactor = new FunctionFactor(UnaryFunctionType.Exp, expFactorArg); foreach (var aFactor in a.factors.Concat(b.factors)) { // collect all exp and inv factors into one and simplify var funcFactor = aFactor as FunctionFactor; if (funcFactor != null && funcFactor.functionType == UnaryFunctionType.Exp) { expFactorArg.Add(funcFactor.argument); } else if (funcFactor != null && funcFactor.functionType == UnaryFunctionType.Inv) { if (!invFactorArg.terms.Any()) invFactorArg.terms = new HashSet(funcFactor.argument.terms); else invFactorArg.Mul(funcFactor.argument); } else { p.factors.Add(aFactor); } } if (expFactorArg.terms.Any()) { p.factors.Add(expFactor); } if (invFactorArg.terms.Any()) { var invFactor = new FunctionFactor(UnaryFunctionType.Inv, invFactorArg); p.factors.Add(invFactor); } return p; } } internal class Polynomial { internal HashSet terms = new HashSet(); public Polynomial(params Monomial[] term) { foreach (var t in term) terms.Add(t); } public void Add(Polynomial other) { this.terms.UnionWith(other.terms); } public void Mul(Polynomial other) { var myTerms = terms; var otherTerms = other.terms; var newTerms = new HashSet(); foreach (var a in myTerms) { foreach (var b in otherTerms) { newTerms.Add(Monomial.Product(a, b)); } } terms = newTerms; } public override int GetHashCode() { return terms.OrderBy(ti => ti.GetHashCode()).Aggregate(0, (a, ti) => ((a<<5)+a) ^ ti.GetHashCode()); } public override bool Equals(object obj) { Polynomial other = obj as Polynomial; if (other == null) return false; if (other.terms.Count != this.terms.Count) return false; return terms.All(ti => other.terms.Contains(ti)) && other.terms.All(ti => terms.Contains(ti)); } } // calculates a hash-code for expressions. public static class ExprHashSymbolic { const int MaxStackSize = 100; const int MaxVariables = 26; private static SymbolFactor[] varSymbols; private static SymbolFactor zero; private static SymbolFactor one; static ExprHashSymbolic() { const string symbols = "abcdefghijklmnopqrstuvwxyz"; // limited to 26 variables -> TODO varSymbols = new SymbolFactor[MaxVariables]; for (int i = 0; i < MaxVariables; i++) { varSymbols[i] = new SymbolFactor(symbols[i]); } zero = new SymbolFactor('0'); one = new SymbolFactor('1'); } public static int GetHash(byte[] code, int nParams) { return Eval(code, nParams); } // takes an array of code and transforms it into a polynomial for hashing. // slow! private static int Eval(byte[] code, int nParams) { // The hash code calculation already preserves commutativity, associativity and distributivity of operations. // We assume that the structure contains numeric parameters // x = c*x // exp(x) = c*exp(c*x) // log(x) = c*log(x+c) // inv(x) = c/(x+c) // Accordingly, each structure represents a class of functions. // The following expressions should hash to the same value as they represent // equivalent function classes // - x1 + x1 is equivalent to x1 // - exp(x1) * exp(x1) is equivalent to exp(x1) // // The following expressions must not hash to the same value. // - exp(x1) + exp(x1) is different from exp(x1), because c1*exp(c2*x1) + c3*exp(c4*x1) cannot be simplified to c5*exp(c6*x1) for all values of c2 and c4 // - log(x1) + log(x1) is different from log(x1), same as above // - 1/x1 + 1/x1 is different from 1/x1, same as above 1/(x1 + c1) + 1/(x1 + c2) // - TODO: list further exceptions var stack = new Polynomial[MaxStackSize]; int topOfStack = -1; int pc = 0; int nextParamIdx = -1; OpCodes op; short arg; while (true) { ReadNext(code, ref pc, out op, out arg); switch (op) { case OpCodes.Nop: break; case OpCodes.LoadConst0: { ++topOfStack; stack[topOfStack] = new Polynomial(new Monomial(zero)); break; } case OpCodes.LoadConst1: { ++topOfStack; stack[topOfStack] = new Polynomial(new Monomial(one)); break; } case OpCodes.LoadParamN: { ++topOfStack; stack[topOfStack] = new Polynomial(new Monomial(one)); // TODO empty, or unique? break; } case OpCodes.LoadVar: { ++topOfStack; stack[topOfStack] = new Polynomial(new Monomial(varSymbols[arg])); break; } case OpCodes.Add: { stack[topOfStack - 1].Add(stack[topOfStack]); topOfStack--; break; } case OpCodes.Mul: { stack[topOfStack - 1].Mul(stack[topOfStack]); topOfStack--; break; } case OpCodes.Log: { var v1 = stack[topOfStack]; stack[topOfStack] = new Polynomial(new Monomial(new FunctionFactor(UnaryFunctionType.Log, v1))); break; } case OpCodes.Exp: { var v1 = stack[topOfStack]; stack[topOfStack] = new Polynomial(new Monomial(new FunctionFactor(UnaryFunctionType.Exp, v1))); break; } case OpCodes.Inv: { var v1 = stack[topOfStack]; stack[topOfStack] = new Polynomial(new Monomial(new FunctionFactor(UnaryFunctionType.Inv, v1))); break; } case OpCodes.Exit: Contract.Assert(topOfStack == 0); return stack[topOfStack].GetHashCode(); default: throw new InvalidOperationException(); } } } private static void EvalTerms(HashSet terms, double[] stack, ref int topOfStack) { ++topOfStack; stack[topOfStack] = terms.Sum(); terms.Clear(); } private static void ReadNext(byte[] code, ref int pc, out OpCodes op, out short s) { op = (OpCodes)Enum.ToObject(typeof(OpCodes), code[pc++]); s = 0; if (op == OpCodes.LoadVar) { s = (short)((code[pc] << 8) | code[pc + 1]); pc += 2; } } } }