#region License Information
/* HeuristicLab
* Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.Linq;
namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
// This class restricts the set of allowed transitions of the automaton to prevent exploration of duplicate expressions.
// It would be possible to implement this class in such a way that the search never visits a duplicate expression. However,
// it seems very intricate to detect this robustly and in all cases while generating an expression because
// some for of lookahead is necessary.
// Instead the constraint handler only catches the obvious duplicates directly, but does not guarantee that the search always produces a valid expression.
// The ratio of the number of unsuccessful searches, that need backtracking should be tracked in the MCTS alg (MctsSymbolicRegressionStatic)
// All changes to this class should be tested through unit tests. It is important that the ConstraintHandler is not too restrictive.
// the constraints are derived from a canonical form for expressions.
// overall we can enforce a limited number of variable references
//
// an expression is a sum of terms t_1 ... t_n where terms are ordered according to a relation t_i (<=)_term t_j for each pair t_i, t_j and i <= j
// a term is a product of factors where factors are ordered according to relation f_i (<=)_factor f_j for each pair f_i,f_j and i <= j
// we want to enforce lower-order terms before higher-order terms in expressions (based on number of variable references)
// factors can have different types (variable, exp, log, inverse)
// (<=)_term [IsSmallerOrEqualTerm(t_i, t_j)]
// 1. NumberOfVarRefs(t_i) < NumberOfVarRefs(t_j) --> true enforce terms with non-decreasing number of var refs
// 2. NumberOfVarRefs(t_i) > NumberOfVarRefs(t_j) --> false
// 3. NumFactors(t_i) > NumFactors(t_j) --> true enforce terms with non-increasing number of factors
// 4. NumFactors(t_i) < NumFactors(t_j) --> false
// 5. for all k factors: Factor(k, t_i) (<=)_factor Factor(k, t_j) --> true // factors must be non-decreasing
// 6. all factors are (=)_factor --> true
// 7. else false
// (<=)_factor [IsSmallerOrEqualFactor(f_i, f_j)]
// 1. FactorType(t_i) < FactorType(t_j) --> true enforce terms with non-decreasing factor type (var < exp < log < inv)
// 2. FactorType(t_i) > FactorType(t_j) --> false
// 3. Compare the two factors specifically
// - variables: varIdx_i <= varIdx_j (only one var reference)
// - exp: number of variable references and then varIdx_i <= varIdx_j for each position
// - log: number of variable references and ...
// - inv: number of variable references and ...
//
// for log and inverse factors we allow all polynomials as argument
// a polynomial is a sum of terms t_1 ... t_n where terms are ordered according to a relation t_i (<=)_poly t_j for each pair t_i, t_j and i <= j
// (<=)_poly [IsSmallerOrEqualPoly(t_i, t_j)]
// 1. NumberOfVarRefs(t_i) < NumberOfVarRefs(t_j) --> true // enforce non-decreasing number of var refs
// 2. NumberOfVarRefs(t_i) > NumberOfVarRefs(t_j) --> false // enforce non-decreasing number of var refs
// 3. for all k variables: VarIdx(k,t_i) > VarIdx(k, t_j) --> false // enforce non-decreasing variable idx
// we store the following to make comparsions:
// - prevTerm (complete & containing all factors)
// - curTerm (incomplete & containing all completed factors)
// - curFactor (incomplete)
internal class ConstraintHandler {
private int nVars;
private readonly int maxVariables;
private bool invalidExpression;
public bool IsInvalidExpression {
get { return invalidExpression; }
}
private TermInformation prevTerm;
private TermInformation curTerm;
private FactorInformation curFactor;
private class TermInformation {
public int numVarReferences { get { return factors.Sum(f => f.numVarReferences); } }
public List factors = new List();
}
private class FactorInformation {
public int numVarReferences = 0;
public int factorType; // use the state number to represent types
// for variable factors
public int variableState = -1;
// for exp factors
public List expVariableStates = new List();
// for log and inv factors
public List> polyVariableStates = new List>();
}
public ConstraintHandler(int maxVars) {
this.maxVariables = maxVars;
}
// the order relations for terms and factors
private static int CompareTerms(TermInformation a, TermInformation b) {
if (a.numVarReferences < b.numVarReferences) return -1;
if (a.numVarReferences > b.numVarReferences) return 1;
if (a.factors.Count > b.factors.Count) return -1; // terms with more factors should be ordered first
if (a.factors.Count < b.factors.Count) return +1;
var aFactors = a.factors.GetEnumerator();
var bFactors = b.factors.GetEnumerator();
while (aFactors.MoveNext() & bFactors.MoveNext()) {
var c = CompareFactors(aFactors.Current, bFactors.Current);
if (c < 0) return -1;
if (c > 0) return 1;
}
// all factors are the same => terms are the same
return 0;
}
private static int CompareFactors(FactorInformation a, FactorInformation b) {
if (a.factorType < b.factorType) return -1;
if (a.factorType > b.factorType) return +1;
// same factor types
if (a.factorType == Automaton.StateVariableFactorStart) {
return a.variableState.CompareTo(b.variableState);
} else if (a.factorType == Automaton.StateExpFactorStart) {
return CompareStateLists(a.expVariableStates, b.expVariableStates);
} else {
if (a.numVarReferences < b.numVarReferences) return -1;
if (a.numVarReferences > b.numVarReferences) return +1;
if (a.polyVariableStates.Count > b.polyVariableStates.Count) return -1; // more terms in the poly should be ordered first
if (a.polyVariableStates.Count < b.polyVariableStates.Count) return +1;
// log and inv
var aTerms = a.polyVariableStates.GetEnumerator();
var bTerms = b.polyVariableStates.GetEnumerator();
while (aTerms.MoveNext() & bTerms.MoveNext()) {
var c = CompareStateLists(aTerms.Current, bTerms.Current);
if (c != 0) return c;
}
return 0; // all terms in the polynomial are the same
}
}
private static int CompareStateLists(List a, List b) {
if (a.Count < b.Count) return -1;
if (a.Count > b.Count) return +1;
for (int i = 0; i < a.Count; i++) {
if (a[i] < b[i]) return -1;
if (a[i] > b[i]) return +1;
}
return 0; // all states are the same
}
private bool IsNewTermAllowed() {
// next term must have at least as many variable references as the previous term
return prevTerm == null || nVars + prevTerm.numVarReferences <= maxVariables;
}
private bool IsNewFactorAllowed() {
// next factor must have a larger or equal type compared to the previous factor.
// if the types are the same it must have at least as many variable references.
// so if the prevFactor is any other than invFactor (last possible type) then we only need to be able to add one variable
// otherwise we need to be able to add at least as many variables as the previous factor
return !curTerm.factors.Any() ||
(nVars + curTerm.factors.Last().numVarReferences <= maxVariables);
}
private bool IsAllowedAsNextFactorType(int followState) {
// IsNewTermAllowed already ensures that we can add a term with enough variable references
// enforce constraints within terms (compare to prev factor)
if (curTerm.factors.Any()) {
// enforce non-decreasing factor types
if (curTerm.factors.Last().factorType > followState) return false;
// when the factor type is the same, starting a new factor is only allowed if we can add at least the number of variables of the prev factor
if (curTerm.factors.Last().factorType == followState && nVars + curTerm.factors.Last().numVarReferences > maxVariables) return false;
}
// enforce constraints on terms (compare to prev term)
// meaning that we must ensure non-decreasing terms
if (prevTerm != null) {
// a factor type is only allowed if we can then produce a term that is larger or equal to the prev term
// (1) if we the number of variable references still remaining is larger than the number of variable references in the prev term
// then it is always possible to build a larger term
// (2) otherwise we try to build the largest possible term starting from current factors in the term.
//
var numVarRefsRemaining = maxVariables - nVars;
Contract.Assert(!curTerm.factors.Any() || curTerm.factors.Last().numVarReferences <= numVarRefsRemaining);
if (prevTerm.numVarReferences < numVarRefsRemaining) return true;
// variable factors must be handled differently because they can only contain one variable reference
if (followState == Automaton.StateVariableFactorStart) {
// append the variable factor and the maximum possible state from the previous factor to create a larger factor
var varF = CreateLargestPossibleFactor(Automaton.StateVariableFactorStart, 1);
var maxF = CreateLargestPossibleFactor(prevTerm.factors.Max(f => f.factorType), numVarRefsRemaining - 1);
var origFactorCount = curTerm.factors.Count;
// add this factor to the current term
curTerm.factors.Add(varF);
curTerm.factors.Add(maxF);
var c = CompareTerms(prevTerm, curTerm);
// restore term
curTerm.factors.RemoveRange(origFactorCount, 2);
// if the prev term is still larger then this followstate is not allowed
if (c > 0) {
return false;
}
} else {
var newF = CreateLargestPossibleFactor(followState, numVarRefsRemaining);
var origFactorCount = curTerm.factors.Count;
// add this factor to the current term
curTerm.factors.Add(newF);
var c = CompareTerms(prevTerm, curTerm);
// restore term
curTerm.factors.RemoveAt(origFactorCount);
// if the prev term is still larger then this followstate is not allowed
if (c > 0) {
return false;
}
}
}
return true;
}
// largest possible factor of the given kind
private FactorInformation CreateLargestPossibleFactor(int factorType, int numVarRefs) {
var newF = new FactorInformation();
newF.factorType = factorType;
if (factorType == Automaton.StateVariableFactorStart) {
newF.variableState = int.MaxValue;
newF.numVarReferences = 1;
} else if (factorType == Automaton.StateExpFactorStart) {
for (int i = 0; i < numVarRefs; i++)
newF.expVariableStates.Add(int.MaxValue);
newF.numVarReferences = numVarRefs;
} else if (factorType == Automaton.StateInvFactorStart || factorType == Automaton.StateLogFactorStart) {
for (int i = 0; i < numVarRefs; i++) {
newF.polyVariableStates.Add(new List());
newF.polyVariableStates[i].Add(int.MaxValue);
}
newF.numVarReferences = numVarRefs;
}
return newF;
}
private bool IsAllowedAsNextVariableFactor(int variableState) {
Contract.Assert(variableState >= Automaton.FirstDynamicState);
return !curTerm.factors.Any() || curTerm.factors.Last().variableState <= variableState;
}
private bool IsAllowedAsNextInExp(int variableState) {
Contract.Assert(variableState >= Automaton.FirstDynamicState);
if (curFactor.expVariableStates.Any() && curFactor.expVariableStates.Last() > variableState) return false;
if (curTerm.factors.Any()) {
// try and compare with prev factor
curFactor.numVarReferences++;
curFactor.expVariableStates.Add(variableState);
var c = CompareFactors(curTerm.factors.Last(), curFactor);
curFactor.numVarReferences--;
curFactor.expVariableStates.RemoveAt(curFactor.expVariableStates.Count - 1);
return c <= 0;
}
return true;
}
private bool IsNewTermAllowedInPoly() {
return nVars + curFactor.polyVariableStates.Last().Count() <= maxVariables;
}
private bool IsAllowedAsNextInPoly(int variableState) {
Contract.Assert(variableState >= Automaton.FirstDynamicState);
return !curFactor.polyVariableStates.Any() ||
!curFactor.polyVariableStates.Last().Any() ||
curFactor.polyVariableStates.Last().Last() <= variableState;
}
private bool IsTermCompleteInPoly() {
var nTerms = curFactor.polyVariableStates.Count;
return nTerms == 1 ||
curFactor.polyVariableStates[nTerms - 2].Count <= curFactor.polyVariableStates[nTerms - 1].Count;
}
private bool IsCompleteExp() {
return !curTerm.factors.Any() || CompareFactors(curTerm.factors.Last(), curFactor) <= 0;
}
public bool IsAllowedFollowState(int currentState, int followState) {
// an invalid action was taken earlier on => nothing can be done anymore
if (invalidExpression) return false;
// states that have no alternative are always allowed
// some ending states are only allowed if enough variables have been used in the term
if (
currentState == Automaton.StateTermStart || // no alternative
currentState == Automaton.StateExpFactorStart ||
currentState == Automaton.StateLogFactorStart ||
currentState == Automaton.StateInvFactorStart ||
followState == Automaton.StateVariableFactorEnd || // no alternative
followState == Automaton.StateExpFEnd || // no alternative
followState == Automaton.StateLogTFEnd || // no alternative
followState == Automaton.StateInvTFEnd || // no alternative
followState == Automaton.StateFactorEnd || // always allowed because no alternative
followState == Automaton.StateExprEnd // we could also constrain the minimum number of terms here
) return true;
// starting a new term is only allowed if we can add a term with at least the number of variables of the prev term
if (followState == Automaton.StateTermStart && !IsNewTermAllowed()) return false;
if (followState == Automaton.StateFactorStart && !IsNewFactorAllowed()) return false;
if (currentState == Automaton.StateFactorStart && !IsAllowedAsNextFactorType(followState)) return false;
if (followState == Automaton.StateTermEnd && prevTerm != null && CompareTerms(prevTerm, curTerm) > 0) return false;
// all of these states add at least one variable
if (
followState == Automaton.StateVariableFactorStart ||
followState == Automaton.StateExpFactorStart || followState == Automaton.StateExpFStart ||
followState == Automaton.StateLogFactorStart || followState == Automaton.StateLogTStart ||
followState == Automaton.StateLogTFStart ||
followState == Automaton.StateInvFactorStart || followState == Automaton.StateInvTStart ||
followState == Automaton.StateInvTFStart) {
if (nVars + 1 > maxVariables) return false;
}
if (currentState == Automaton.StateVariableFactorStart && !IsAllowedAsNextVariableFactor(followState)) return false;
else if (currentState == Automaton.StateExpFStart && !IsAllowedAsNextInExp(followState)) return false;
else if (followState == Automaton.StateLogTStart && !IsNewTermAllowedInPoly()) return false;
else if (currentState == Automaton.StateLogTFStart && !IsAllowedAsNextInPoly(followState)) return false;
else if (followState == Automaton.StateInvTStart && !IsNewTermAllowedInPoly()) return false;
else if (currentState == Automaton.StateInvTFStart && !IsAllowedAsNextInPoly(followState)) return false;
// finishing an exponential factor is only allowed when the number of variable references is large enough
else if (followState == Automaton.StateExpFactorEnd && !IsCompleteExp()) return false;
// finishing a polynomial (in log or inv) is only allowed when the number of variable references is large enough
else if (followState == Automaton.StateInvTEnd && !IsTermCompleteInPoly()) return false;
else if (followState == Automaton.StateLogTEnd && !IsTermCompleteInPoly()) return false;
else if (nVars > maxVariables) return false;
else return true;
}
public void Reset() {
nVars = 0;
prevTerm = null;
curTerm = null;
curFactor = null;
invalidExpression = false;
}
public void StartTerm() {
curTerm = new TermInformation();
}
public void StartFactor(int state) {
curFactor = new FactorInformation();
curFactor.factorType = state;
}
public void AddVarToCurrentFactor(int state) {
Contract.Assert(Automaton.FirstDynamicState <= state);
Contract.Assert(curTerm != null);
Contract.Assert(curFactor != null);
nVars++;
curFactor.numVarReferences++;
if (curFactor.factorType == Automaton.StateVariableFactorStart) {
Contract.Assert(curFactor.variableState < 0); // not set before
curFactor.variableState = state;
} else if (curFactor.factorType == Automaton.StateExpFactorStart) {
curFactor.expVariableStates.Add(state);
} else if (curFactor.factorType == Automaton.StateLogFactorStart ||
curFactor.factorType == Automaton.StateInvFactorStart) {
curFactor.polyVariableStates.Last().Add(state);
} else throw new InvalidProgramException();
}
public void StartNewTermInPoly() {
curFactor.polyVariableStates.Add(new List());
}
public void EndFactor() {
// enforce non-decreasing factors
if (curTerm.factors.Any() && CompareFactors(curTerm.factors.Last(), curFactor) > 0)
invalidExpression = true;
curTerm.factors.Add(curFactor);
curFactor = null;
}
public void EndTerm() {
// enforce non-decreasing terms (TODO: equal terms should not be allowed)
if (prevTerm != null && CompareTerms(prevTerm, curTerm) > 0)
invalidExpression = true;
prevTerm = curTerm;
curTerm = null;
}
}
}