#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression { // This is the core class for generating expressions. // It represents a finite state automaton, each state transition can be associated with an action (e.g. to produce code). // The automaton determines the possible structures for expressions. // // To understand this code, it is worthwhile to generate a graphical visualization of the automaton (see PrintAutomaton). // If the code is compiled in debug mode, the automaton produces a Graphviz file into the folder of the application // whenever an instance of the automaton is constructed. // // This class relies on two other classes: // - CodeGenerator to produce code for a stack-based evaluator and // - ConstraintHandler to restrict the allowed set of expressions. // // The ConstraintHandler extends the automaton and adds semantic restrictions for expressions produced by the automaton. // // internal class Automaton { // there is a single final state (ExprEnd) // states with lower values are closer to the final state // (this is helpful when we try to navigate to the final state) // we cannot use an enum type here because the set of states is dynamic (including states for variables) public const int StateExprEnd = 1; public const int StateTermEnd = 2; public const int StateFactorEnd = 3; public const int StateVariableFactorEnd = 4; public const int StateExpFactorEnd = 5; public const int StateLogFactorEnd = 6; public const int StateInvFactorEnd = 7; public const int StateExpFEnd = 8; public const int StateLogTEnd = 9; public const int StateInvTEnd = 10; public const int StateLogTFEnd = 11; public const int StateInvTFEnd = 12; public const int StateLogTFStart = 13; public const int StateInvTFStart = 14; public const int StateExpFStart = 15; public const int StateLogTStart = 16; public const int StateInvTStart = 17; public const int StateVariableFactorStart = 18; public const int StateExpFactorStart = 19; public const int StateLogFactorStart = 20; public const int StateInvFactorStart = 21; public const int StateFactorStart = 22; public const int StateTermStart = 23; public const int StateExpr = 24; public const int FirstDynamicState = 25; // more states for individual variables are created dynamically public readonly List stateNames = new List() { string.Empty, "ExprEnd", "TermEnd", "FactorEnd", "VariableFactorEnd", "ExpFactorEnd", "LogFactorEnd", "InvFactorEnd", "ExpFEnd", "LogTEnd", "InvTEnd", "LogTFEnd", "InfTFEnd", "LogTFStart", "InvTFStart", "ExpFStart", "LogTStart", "InvTStart", "VariableFactorStart", "ExpFactorStart", "LogFactorStart", "InvFactorStart", "FactorStart", "TermStart", "Expr", }; private const int StartState = StateExpr; public int CurrentState { get; private set; } private List[] followStates; private List[,] actions; // not every follow state is possible but this representation should be efficient private List[,] actionStrings; // just for printing private readonly CodeGenerator codeGenerator; private int numVarRefs; private int maximumNumberOfVariables; public Automaton(double[][] vars, bool allowProdOfVars = true, bool allowExp = true, bool allowLog = true, bool allowInv = true, bool allowMultipleTerms = false, int maxNumberOfVariables = 5) { int nVars = vars.Length; this.maximumNumberOfVariables = maxNumberOfVariables; codeGenerator = new CodeGenerator(); BuildAutomaton(nVars, allowProdOfVars, allowExp, allowLog, allowInv, allowMultipleTerms); Reset(); #if DEBUG PrintAutomaton(); #endif } // Produce postfix notation for expression: // Expr -> 0 Term { '+' Term } '+' 'exit' // Term -> c Fact { '*' Fact } '*' // Fact -> VarFact | ExpFact | LogFact | InvFact // VarFact -> var_1 ... var_n // ExpFact -> 1 ExpF { '*' ExpF } '*' c '*' 'exp' // c must be at end to allow scaling in evaluator // ExpF -> var_1 ... var_n // LogFact -> 0 LogT { '+' LogT } '+' c '+' 'log' // c must be at end to allow scaling in evaluator // LogT -> c LogTF { '*' LogTF } '*' // LogTF -> var_1 ... var_n // InvFact -> 1 InvT { '+' InvT } '+' 'inv' // InvT -> (var_1 ... var_n) c '*' private void BuildAutomaton(int nVars, bool allowProdOfVars = true, bool allowExp = true, bool allowLog = true, bool allowInv = true, bool allowMultipleTerms = false) { int nStates = FirstDynamicState + 4 * nVars; followStates = new List[nStates]; actions = new List[nStates, nStates]; actionStrings = new List[nStates, nStates]; // Expr -> 0 Term { '+' Term } '+' 'exit' AddTransition(StateExpr, StateTermStart, () => { codeGenerator.Reset(); codeGenerator.Emit1(OpCodes.LoadConst0); numVarRefs = 0; }, "0"); AddTransition(StateTermEnd, StateExprEnd, () => { codeGenerator.Emit1(OpCodes.Add); codeGenerator.Emit1(OpCodes.Exit); }, "+ exit"); if (allowMultipleTerms) AddTransition(StateTermEnd, StateTermStart, () => { codeGenerator.Emit1(OpCodes.Add); }, "+"); // Term -> c Fact { '*' Fact } '*' AddTransition(StateTermStart, StateFactorStart, () => { codeGenerator.Emit1(OpCodes.LoadParamN); }, "c"); AddTransition(StateFactorEnd, StateTermEnd, () => { codeGenerator.Emit1(OpCodes.Mul); }, "*"); AddTransition(StateFactorEnd, StateFactorStart, () => { codeGenerator.Emit1(OpCodes.Mul); }, "*"); // Fact -> VarFact | ExpFact | LogFact | InvFact if (allowProdOfVars) AddTransition(StateFactorStart, StateVariableFactorStart, () => { }, ""); if (allowExp) AddTransition(StateFactorStart, StateExpFactorStart, () => { }, ""); if (allowLog) AddTransition(StateFactorStart, StateLogFactorStart, () => { }, ""); if (allowInv) AddTransition(StateFactorStart, StateInvFactorStart, () => { }, ""); AddTransition(StateVariableFactorEnd, StateFactorEnd); AddTransition(StateExpFactorEnd, StateFactorEnd); AddTransition(StateLogFactorEnd, StateFactorEnd); AddTransition(StateInvFactorEnd, StateFactorEnd); // VarFact -> var_1 ... var_n // add dynamic states for each variable int curDynVarState = FirstDynamicState; for (int i = 0; i < nVars; i++) { short varIdx = (short)i; var varState = curDynVarState; stateNames.Add("var_1"); AddTransition(StateVariableFactorStart, curDynVarState, () => { codeGenerator.Emit2(OpCodes.LoadVar, varIdx); numVarRefs++; }, "var_" + varIdx + ""); AddTransition(curDynVarState, StateVariableFactorEnd); curDynVarState++; } // ExpFact -> 1 ExpF { '*' ExpF } '*' c '*' 'exp' AddTransition(StateExpFactorStart, StateExpFStart, () => { codeGenerator.Emit1(OpCodes.LoadConst1); }, "1"); AddTransition(StateExpFEnd, StateExpFactorEnd, () => { codeGenerator.Emit1(OpCodes.LoadParamN); codeGenerator.Emit1(OpCodes.Mul); codeGenerator.Emit1(OpCodes.Exp); }, "c*exp"); AddTransition(StateExpFEnd, StateExpFStart, () => { }, ""); // ExpF -> var_1 ... var_n for (int i = 0; i < nVars; i++) { short varIdx = (short)i; int varState = curDynVarState; stateNames.Add("var_2"); AddTransition(StateExpFStart, curDynVarState, () => { codeGenerator.Emit2(OpCodes.LoadVar, varIdx); numVarRefs++; }, "var_" + varIdx + ""); AddTransition(curDynVarState, StateExpFEnd, () => { codeGenerator.Emit1(OpCodes.Mul); }, "*"); curDynVarState++; } // must have c at end because of adjustment of c in evaluator // LogFact -> 0 LogT { '+' LogT } '+' c '+' 'log' AddTransition(StateLogFactorStart, StateLogTStart, () => { codeGenerator.Emit1(OpCodes.LoadConst0); }, "0"); AddTransition(StateLogTEnd, StateLogFactorEnd, () => { codeGenerator.Emit1(OpCodes.Add); codeGenerator.Emit1(OpCodes.LoadParamN); codeGenerator.Emit1(OpCodes.Add); codeGenerator.Emit1(OpCodes.Log); }, "+c+log"); AddTransition(StateLogTEnd, StateLogTStart, () => { codeGenerator.Emit1(OpCodes.Add); }, "+"); // LogT -> c LogTF { '*' LogTF } '*' AddTransition(StateLogTStart, StateLogTFStart, () => { codeGenerator.Emit1(OpCodes.LoadParamN); }, "c"); AddTransition(StateLogTFEnd, StateLogTEnd, () => { codeGenerator.Emit1(OpCodes.Mul); }, "*"); AddTransition(StateLogTFEnd, StateLogTFStart, () => { codeGenerator.Emit1(OpCodes.Mul); }, "*"); // LogTF -> var_1 ... var_n for (int i = 0; i < nVars; i++) { short varIdx = (short)i; int varState = curDynVarState; stateNames.Add("var_3"); AddTransition(StateLogTFStart, curDynVarState, () => { codeGenerator.Emit2(OpCodes.LoadVar, varIdx); numVarRefs++; }, "var_" + varIdx + ""); AddTransition(curDynVarState, StateLogTFEnd); curDynVarState++; } // InvFact -> 1 InvT { '+' InvT } '+' 'inv' AddTransition(StateInvFactorStart, StateInvTStart, () => { codeGenerator.Emit1(OpCodes.LoadConst1); }, "c"); AddTransition(StateInvTEnd, StateInvFactorEnd, () => { codeGenerator.Emit1(OpCodes.Add); codeGenerator.Emit1(OpCodes.Inv); }, "+inv"); AddTransition(StateInvTEnd, StateInvTStart, () => { codeGenerator.Emit1(OpCodes.Add); }, "+"); // InvT -> c InvTF { '*' InvTF } '*' AddTransition(StateInvTStart, StateInvTFStart, () => { codeGenerator.Emit1(OpCodes.LoadParamN); }, "c"); AddTransition(StateInvTFEnd, StateInvTEnd, () => { codeGenerator.Emit1(OpCodes.Mul); }, "*"); AddTransition(StateInvTFEnd, StateInvTFStart, () => { codeGenerator.Emit1(OpCodes.Mul); }, "*"); // InvTF -> (var_1 ... var_n) c '*' for (int i = 0; i < nVars; i++) { short varIdx = (short)i; int varState = curDynVarState; stateNames.Add("var_4"); AddTransition(StateInvTFStart, curDynVarState, () => { codeGenerator.Emit2(OpCodes.LoadVar, varIdx); numVarRefs++; }, "var_" + varIdx + ""); AddTransition(curDynVarState, StateInvTFEnd); curDynVarState++; } followStates[StateExprEnd] = new List(); // no follow states // order all follow states (the first follow state leads to the final state) foreach (var list in followStates) { if (list != null) list.Sort(); } } private void AddTransition(int fromState, int toState) { if (followStates[fromState] == null) followStates[fromState] = new List(); followStates[fromState].Add(toState); } private void AddTransition(int fromState, int toState, Action action, string str) { if (followStates[fromState] == null) followStates[fromState] = new List(); followStates[fromState].Add(toState); if (actions[fromState, toState] == null) { actions[fromState, toState] = new List(); actionStrings[fromState, toState] = new List(); } actions[fromState, toState].Add(action); actionStrings[fromState, toState].Add(str); } public void FollowStates(int state, ref int[] buf, out int nElements) { var fs = followStates[state]; int j = 0; for (int i = 0; i < fs.Count; i++) { var s = fs[i]; if (IsAllowedFollowState(state, s)) { buf[j++] = s; } } nElements = j; } private bool IsAllowedFollowState(int state, int nextState) { // any state is allowed if we have not reached the max number of variable references // otherwise we can only go towards the final state (smaller state numbers) if (numVarRefs < maximumNumberOfVariables) return true; else return state > nextState; } public void Goto(int targetState) { Debug.Assert(followStates[CurrentState].Contains(targetState)); if (actions[CurrentState, targetState] != null) actions[CurrentState, targetState].ForEach(a => a()); // execute all actions CurrentState = targetState; } public bool IsFinalState(int s) { return s == StateExprEnd && numVarRefs <= maximumNumberOfVariables; } public bool IsEvalState(int v) { return v == StateFactorEnd || v == StateLogTFEnd || v == StateInvTFEnd || v == StateExpFEnd || v == StateLogTEnd || v == StateInvTEnd || v == StateTermEnd ; } // Always returns valid code. // If the method is called in an intermediate state the expression is completed by // taking the shortest route to the final state. // After that state of the automaton is restored to the current state. public void GetCode(out byte[] code, out int nParams) { int storedState = CurrentState; int storedPC = codeGenerator.ProgramCounter; if (CurrentState != StateExprEnd) { // save state and code, storedState = CurrentState; storedPC = codeGenerator.ProgramCounter; // take shortest route to final state (smaller state values are closer to the final state) while (CurrentState != StateExprEnd) { Debug.Assert(followStates[CurrentState][0] == followStates[CurrentState].Min()); var nextState = followStates[CurrentState][0]; Goto(nextState); } } codeGenerator.GetCode(out code, out nParams); // restore codeGenerator.ProgramCounter = storedPC; CurrentState = storedState; } public void Reset() { CurrentState = StartState; codeGenerator.Reset(); numVarRefs = 0; } internal string GetActionString(int fromState, int toState) { return actionStrings[fromState, toState] != null ? string.Join(" , ", actionStrings[fromState, toState]) : ""; } #if DEBUG public void PrintAutomaton() { using (var writer = new StreamWriter("automaton.gv")) { writer.WriteLine("digraph {"); // writer.WriteLine("rankdir=LR"); for (int s = 1; s < stateNames.Count; s++) { for (int i = 0; i < followStates[s].Count; i++) { if (followStates[s][i] <= 0) continue; var followS = followStates[s][i]; var label = actionStrings[s, followS] != null ? string.Join(" , ", actionStrings[s, followS]) : ""; writer.WriteLine("{0} -> {1} [ label = \"{2}\" ];", stateNames[s], stateNames[followS], label); } } writer.WriteLine("}"); } } #endif } }