#region License Information
/* HeuristicLab
* Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
// This is the core class for generating expressions.
// It represents a finite state automaton, each state transition can be associated with an action (e.g. to produce code).
// The automaton determines the possible structures for expressions.
//
// To understand this code, it is worthwhile to generate a graphical visualization of the automaton (see PrintAutomaton).
// If the code is compiled in debug mode, the automaton produces a Graphviz file into the folder of the application
// whenever an instance of the automaton is constructed.
//
// This class relies on two other classes:
// - CodeGenerator to produce code for a stack-based evaluator and
// - ConstraintHandler to restrict the allowed set of expressions.
//
// The ConstraintHandler extends the automaton and adds semantic restrictions for expressions produced by the automaton.
//
//
internal class Automaton {
// there is a single final state (ExprEnd)
// states with lower values are closer to the final state
// (this is helpful when we try to navigate to the final state)
// we cannot use an enum type here because the set of states is dynamic (including states for variables)
public const int StateExprEnd = 1;
public const int StateTermEnd = 2;
public const int StateFactorEnd = 3;
public const int StateVariableFactorEnd = 4;
public const int StateExpFactorEnd = 5;
public const int StateLogFactorEnd = 6;
public const int StateInvFactorEnd = 7;
public const int StateExpFEnd = 8;
public const int StateLogTEnd = 9;
public const int StateInvTEnd = 10;
public const int StateLogTFEnd = 11;
public const int StateInvTFEnd = 12;
public const int StateLogTFStart = 13;
public const int StateInvTFStart = 14;
public const int StateExpFStart = 15;
public const int StateLogTStart = 16;
public const int StateInvTStart = 17;
public const int StateVariableFactorStart = 18;
public const int StateExpFactorStart = 19;
public const int StateLogFactorStart = 20;
public const int StateInvFactorStart = 21;
public const int StateFactorStart = 22;
public const int StateTermStart = 23;
public const int StateExpr = 24;
public const int FirstDynamicState = 25;
// more states for individual variables are created dynamically
public readonly List stateNames = new List() {
string.Empty,
"ExprEnd",
"TermEnd",
"FactorEnd",
"VariableFactorEnd",
"ExpFactorEnd",
"LogFactorEnd",
"InvFactorEnd",
"ExpFEnd",
"LogTEnd",
"InvTEnd",
"LogTFEnd",
"InfTFEnd",
"LogTFStart",
"InvTFStart",
"ExpFStart",
"LogTStart",
"InvTStart",
"VariableFactorStart",
"ExpFactorStart",
"LogFactorStart",
"InvFactorStart",
"FactorStart",
"TermStart",
"Expr",
};
private const int StartState = StateExpr;
public int CurrentState { get; private set; }
private List[] followStates;
private List[,] actions; // not every follow state is possible but this representation should be efficient
private List[,] actionStrings; // just for printing
private readonly CodeGenerator codeGenerator;
private int numVarRefs;
private int maximumNumberOfVariables;
public Automaton(double[][] vars,
bool allowProdOfVars = true,
bool allowExp = true,
bool allowLog = true,
bool allowInv = true,
bool allowMultipleTerms = false,
int maxNumberOfVariables = 5) {
int nVars = vars.Length;
this.maximumNumberOfVariables = maxNumberOfVariables;
codeGenerator = new CodeGenerator();
BuildAutomaton(nVars, allowProdOfVars, allowExp, allowLog, allowInv, allowMultipleTerms);
Reset();
#if DEBUG
PrintAutomaton();
#endif
}
// Produce postfix notation for expression:
// Expr -> 0 Term { '+' Term } '+' 'exit'
// Term -> c Fact { '*' Fact } '*'
// Fact -> VarFact | ExpFact | LogFact | InvFact
// VarFact -> var_1 ... var_n
// ExpFact -> 1 ExpF { '*' ExpF } '*' c '*' 'exp' // c must be at end to allow scaling in evaluator
// ExpF -> var_1 ... var_n
// LogFact -> 0 LogT { '+' LogT } '+' c '+' 'log' // c must be at end to allow scaling in evaluator
// LogT -> c LogTF { '*' LogTF } '*'
// LogTF -> var_1 ... var_n
// InvFact -> 1 InvT { '+' InvT } '+' 'inv'
// InvT -> (var_1 ... var_n) c '*'
private void BuildAutomaton(int nVars,
bool allowProdOfVars = true,
bool allowExp = true,
bool allowLog = true,
bool allowInv = true,
bool allowMultipleTerms = false) {
int nStates = FirstDynamicState + 4 * nVars;
followStates = new List[nStates];
actions = new List[nStates, nStates];
actionStrings = new List[nStates, nStates];
// Expr -> 0 Term { '+' Term } '+' 'exit'
AddTransition(StateExpr, StateTermStart, () => {
codeGenerator.Reset();
codeGenerator.Emit1(OpCodes.LoadConst0);
numVarRefs = 0;
}, "0");
AddTransition(StateTermEnd, StateExprEnd, () => {
codeGenerator.Emit1(OpCodes.Add);
codeGenerator.Emit1(OpCodes.Exit);
}, "+ exit");
if (allowMultipleTerms)
AddTransition(StateTermEnd, StateTermStart, () => {
codeGenerator.Emit1(OpCodes.Add);
}, "+");
// Term -> c Fact { '*' Fact } '*'
AddTransition(StateTermStart, StateFactorStart,
() => {
codeGenerator.Emit1(OpCodes.LoadParamN);
},
"c");
AddTransition(StateFactorEnd, StateTermEnd,
() => {
codeGenerator.Emit1(OpCodes.Mul);
},
"*");
AddTransition(StateFactorEnd, StateFactorStart,
() => { codeGenerator.Emit1(OpCodes.Mul); },
"*");
// Fact -> VarFact | ExpFact | LogFact | InvFact
if (allowProdOfVars)
AddTransition(StateFactorStart, StateVariableFactorStart, () => {
}, "");
if (allowExp)
AddTransition(StateFactorStart, StateExpFactorStart, () => {
}, "");
if (allowLog)
AddTransition(StateFactorStart, StateLogFactorStart, () => {
}, "");
if (allowInv)
AddTransition(StateFactorStart, StateInvFactorStart, () => {
}, "");
AddTransition(StateVariableFactorEnd, StateFactorEnd);
AddTransition(StateExpFactorEnd, StateFactorEnd);
AddTransition(StateLogFactorEnd, StateFactorEnd);
AddTransition(StateInvFactorEnd, StateFactorEnd);
// VarFact -> var_1 ... var_n
// add dynamic states for each variable
int curDynVarState = FirstDynamicState;
for (int i = 0; i < nVars; i++) {
short varIdx = (short)i;
var varState = curDynVarState;
stateNames.Add("var_1");
AddTransition(StateVariableFactorStart, curDynVarState,
() => {
codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
numVarRefs++;
},
"var_" + varIdx + "");
AddTransition(curDynVarState, StateVariableFactorEnd);
curDynVarState++;
}
// ExpFact -> 1 ExpF { '*' ExpF } '*' c '*' 'exp'
AddTransition(StateExpFactorStart, StateExpFStart,
() => {
codeGenerator.Emit1(OpCodes.LoadConst1);
},
"1");
AddTransition(StateExpFEnd, StateExpFactorEnd,
() => {
codeGenerator.Emit1(OpCodes.LoadParamN);
codeGenerator.Emit1(OpCodes.Mul);
codeGenerator.Emit1(OpCodes.Exp);
},
"c*exp");
AddTransition(StateExpFEnd, StateExpFStart,
() => { },
"");
// ExpF -> var_1 ... var_n
for (int i = 0; i < nVars; i++) {
short varIdx = (short)i;
int varState = curDynVarState;
stateNames.Add("var_2");
AddTransition(StateExpFStart, curDynVarState,
() => {
codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
numVarRefs++;
},
"var_" + varIdx + "");
AddTransition(curDynVarState, StateExpFEnd,
() => {
codeGenerator.Emit1(OpCodes.Mul);
}, "*");
curDynVarState++;
}
// must have c at end because of adjustment of c in evaluator
// LogFact -> 0 LogT { '+' LogT } '+' c '+' 'log'
AddTransition(StateLogFactorStart, StateLogTStart,
() => {
codeGenerator.Emit1(OpCodes.LoadConst0);
},
"0");
AddTransition(StateLogTEnd, StateLogFactorEnd,
() => {
codeGenerator.Emit1(OpCodes.Add);
codeGenerator.Emit1(OpCodes.LoadParamN);
codeGenerator.Emit1(OpCodes.Add);
codeGenerator.Emit1(OpCodes.Log);
},
"+c+log");
AddTransition(StateLogTEnd, StateLogTStart,
() => { codeGenerator.Emit1(OpCodes.Add); },
"+");
// LogT -> c LogTF { '*' LogTF } '*'
AddTransition(StateLogTStart, StateLogTFStart,
() => {
codeGenerator.Emit1(OpCodes.LoadParamN);
},
"c");
AddTransition(StateLogTFEnd, StateLogTEnd,
() => {
codeGenerator.Emit1(OpCodes.Mul);
},
"*");
AddTransition(StateLogTFEnd, StateLogTFStart,
() => {
codeGenerator.Emit1(OpCodes.Mul);
},
"*");
// LogTF -> var_1 ... var_n
for (int i = 0; i < nVars; i++) {
short varIdx = (short)i;
int varState = curDynVarState;
stateNames.Add("var_3");
AddTransition(StateLogTFStart, curDynVarState,
() => {
codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
numVarRefs++;
},
"var_" + varIdx + "");
AddTransition(curDynVarState, StateLogTFEnd);
curDynVarState++;
}
// InvFact -> 1 InvT { '+' InvT } '+' 'inv'
AddTransition(StateInvFactorStart, StateInvTStart,
() => {
codeGenerator.Emit1(OpCodes.LoadConst1);
},
"c");
AddTransition(StateInvTEnd, StateInvFactorEnd,
() => {
codeGenerator.Emit1(OpCodes.Add);
codeGenerator.Emit1(OpCodes.Inv);
},
"+inv");
AddTransition(StateInvTEnd, StateInvTStart,
() => { codeGenerator.Emit1(OpCodes.Add); },
"+");
// InvT -> c InvTF { '*' InvTF } '*'
AddTransition(StateInvTStart, StateInvTFStart,
() => {
codeGenerator.Emit1(OpCodes.LoadParamN);
},
"c");
AddTransition(StateInvTFEnd, StateInvTEnd,
() => {
codeGenerator.Emit1(OpCodes.Mul);
},
"*");
AddTransition(StateInvTFEnd, StateInvTFStart,
() => {
codeGenerator.Emit1(OpCodes.Mul);
},
"*");
// InvTF -> (var_1 ... var_n) c '*'
for (int i = 0; i < nVars; i++) {
short varIdx = (short)i;
int varState = curDynVarState;
stateNames.Add("var_4");
AddTransition(StateInvTFStart, curDynVarState,
() => {
codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
numVarRefs++;
},
"var_" + varIdx + "");
AddTransition(curDynVarState, StateInvTFEnd);
curDynVarState++;
}
followStates[StateExprEnd] = new List(); // no follow states
// order all follow states (the first follow state leads to the final state)
foreach (var list in followStates) {
if (list != null)
list.Sort();
}
}
private void AddTransition(int fromState, int toState) {
if (followStates[fromState] == null) followStates[fromState] = new List();
followStates[fromState].Add(toState);
}
private void AddTransition(int fromState, int toState, Action action, string str) {
if (followStates[fromState] == null) followStates[fromState] = new List();
followStates[fromState].Add(toState);
if (actions[fromState, toState] == null) {
actions[fromState, toState] = new List();
actionStrings[fromState, toState] = new List();
}
actions[fromState, toState].Add(action);
actionStrings[fromState, toState].Add(str);
}
public void FollowStates(int state, ref int[] buf, out int nElements) {
var fs = followStates[state];
int j = 0;
for (int i = 0; i < fs.Count; i++) {
var s = fs[i];
if (IsAllowedFollowState(state, s)) {
buf[j++] = s;
}
}
nElements = j;
}
private bool IsAllowedFollowState(int state, int nextState) {
// any state is allowed if we have not reached the max number of variable references
// otherwise we can only go towards the final state (smaller state numbers)
if (numVarRefs < maximumNumberOfVariables) return true;
else return state > nextState;
}
public void Goto(int targetState) {
Debug.Assert(followStates[CurrentState].Contains(targetState));
if (actions[CurrentState, targetState] != null)
actions[CurrentState, targetState].ForEach(a => a()); // execute all actions
CurrentState = targetState;
}
public bool IsFinalState(int s) {
return s == StateExprEnd && numVarRefs <= maximumNumberOfVariables;
}
public bool IsEvalState(int v) {
return v == StateFactorEnd ||
v == StateLogTFEnd ||
v == StateInvTFEnd ||
v == StateExpFEnd ||
v == StateLogTEnd ||
v == StateInvTEnd ||
v == StateTermEnd
;
}
// Always returns valid code.
// If the method is called in an intermediate state the expression is completed by
// taking the shortest route to the final state.
// After that state of the automaton is restored to the current state.
public void GetCode(out byte[] code, out int nParams) {
int storedState = CurrentState;
int storedPC = codeGenerator.ProgramCounter;
if (CurrentState != StateExprEnd) {
// save state and code,
storedState = CurrentState;
storedPC = codeGenerator.ProgramCounter;
// take shortest route to final state (smaller state values are closer to the final state)
while (CurrentState != StateExprEnd) {
Debug.Assert(followStates[CurrentState][0] == followStates[CurrentState].Min());
var nextState = followStates[CurrentState][0];
Goto(nextState);
}
}
codeGenerator.GetCode(out code, out nParams);
// restore
codeGenerator.ProgramCounter = storedPC;
CurrentState = storedState;
}
public void Reset() {
CurrentState = StartState;
codeGenerator.Reset();
numVarRefs = 0;
}
internal string GetActionString(int fromState, int toState) {
return actionStrings[fromState, toState] != null ? string.Join(" , ", actionStrings[fromState, toState]) : "";
}
#if DEBUG
public void PrintAutomaton() {
using (var writer = new StreamWriter("automaton.gv")) {
writer.WriteLine("digraph {");
// writer.WriteLine("rankdir=LR");
for (int s = 1; s < stateNames.Count; s++) {
for (int i = 0; i < followStates[s].Count; i++) {
if (followStates[s][i] <= 0) continue;
var followS = followStates[s][i];
var label = actionStrings[s, followS] != null ? string.Join(" , ", actionStrings[s, followS]) : "";
writer.WriteLine("{0} -> {1} [ label = \"{2}\" ];", stateNames[s], stateNames[followS], label);
}
}
writer.WriteLine("}");
}
}
#endif
}
}