Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2796_SymbReg/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Automaton.cs @ 17511

Last change on this file since 17511 was 15606, checked in by gkronber, 7 years ago

#2796: comments and typos

File size: 16.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.IO;
26using System.Linq;
27
28namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
29  // This is the core class for generating expressions.
30  // It represents a finite state automaton, each state transition can be associated with an action (e.g. to produce code).
31  // The automaton determines the possible structures for expressions.
32  //
33  // To understand this code, it is worthwhile to generate a graphical visualization of the automaton (see PrintAutomaton).
34  // If the code is compiled in debug mode, the automaton produces a Graphviz file into the folder of the application
35  // whenever an instance of the automaton is constructed.
36  //
37  // This class relies on two other classes:
38  // - CodeGenerator to produce code for a stack-based evaluator and
39  // - ConstraintHandler to restrict the allowed set of expressions.
40  //
41  // The ConstraintHandler extends the automaton and adds semantic restrictions for expressions produced by the automaton.
42  //
43  //
44  internal class Automaton {
45
46    // there is a single final state (ExprEnd)
47    // states with lower values are closer to the final state
48    // (this is helpful when we try to navigate to the final state)
49
50    // we cannot use an enum type here because the set of states is dynamic (including states for variables)
51    public const int StateExprEnd = 1;
52    public const int StateTermEnd = 2;
53    public const int StateFactorEnd = 3;
54
55    public const int StateVariableFactorEnd = 4;
56    public const int StateExpFactorEnd = 5;
57    public const int StateLogFactorEnd = 6;
58    public const int StateInvFactorEnd = 7;
59
60    public const int StateExpFEnd = 8;
61    public const int StateLogTEnd = 9;
62    public const int StateInvTEnd = 10;
63
64    public const int StateLogTFEnd = 11;
65    public const int StateInvTFEnd = 12;
66
67    public const int StateLogTFStart = 13;
68    public const int StateInvTFStart = 14;
69
70    public const int StateExpFStart = 15;
71    public const int StateLogTStart = 16;
72    public const int StateInvTStart = 17;
73
74    public const int StateVariableFactorStart = 18;
75    public const int StateExpFactorStart = 19;
76    public const int StateLogFactorStart = 20;
77    public const int StateInvFactorStart = 21;
78
79    public const int StateFactorStart = 22;
80    public const int StateTermStart = 23;
81    public const int StateExpr = 24;
82    public const int FirstDynamicState = 25;
83    // more states for individual variables are created dynamically
84    public readonly List<string> stateNames = new List<string>() {
85       string.Empty,
86      "ExprEnd",
87      "TermEnd",
88      "FactorEnd",
89      "VariableFactorEnd",
90      "ExpFactorEnd",
91      "LogFactorEnd",
92      "InvFactorEnd",
93      "ExpFEnd",
94      "LogTEnd",
95      "InvTEnd",
96      "LogTFEnd",
97      "InfTFEnd",
98      "LogTFStart",
99      "InvTFStart",
100      "ExpFStart",
101      "LogTStart",
102      "InvTStart",
103      "VariableFactorStart",
104      "ExpFactorStart",
105      "LogFactorStart",
106      "InvFactorStart",
107      "FactorStart",
108      "TermStart",
109      "Expr",
110    };
111
112
113    private const int StartState = StateExpr;
114    public int CurrentState { get; private set; }
115
116    private List<int>[] followStates;
117    private List<Action>[,] actions; // not every follow state is possible but this representation should be efficient
118    private List<string>[,] actionStrings; // just for printing
119    private readonly CodeGenerator codeGenerator;
120    private int numVarRefs;
121    private int maximumNumberOfVariables;
122
123    public Automaton(double[][] vars,
124       bool allowProdOfVars = true,
125       bool allowExp = true,
126       bool allowLog = true,
127       bool allowInv = true,
128       bool allowMultipleTerms = false,
129       int maxNumberOfVariables = 5) {
130      int nVars = vars.Length;
131      this.maximumNumberOfVariables = maxNumberOfVariables;
132      codeGenerator = new CodeGenerator();
133      BuildAutomaton(nVars, allowProdOfVars, allowExp, allowLog, allowInv, allowMultipleTerms);
134
135      Reset();
136#if DEBUG
137      PrintAutomaton();
138#endif
139    }
140
141    // Produce postfix notation for expression:
142    // Expr -> 0 Term { '+' Term } '+' 'exit'
143    // Term -> c Fact { '*' Fact } '*'
144    // Fact -> VarFact | ExpFact | LogFact | InvFact
145    // VarFact -> var_1 ... var_n
146    // ExpFact -> 1 ExpF { '*' ExpF } '*' c '*' 'exp' // c must be at end to allow scaling in evaluator
147    // ExpF    -> var_1 ... var_n
148    // LogFact -> 0 LogT { '+' LogT } '+' c '+' 'log' // c must be at end to allow scaling in evaluator
149    // LogT    -> c LogTF { '*' LogTF } '*'
150    // LogTF   -> var_1 ... var_n
151    // InvFact -> 1 InvT { '+' InvT } '+' 'inv'
152    // InvT    -> (var_1 ... var_n) c '*'
153    private void BuildAutomaton(int nVars,
154      bool allowProdOfVars = true,
155       bool allowExp = true,
156       bool allowLog = true,
157       bool allowInv = true,
158       bool allowMultipleTerms = false) {
159
160      int nStates = FirstDynamicState + 4 * nVars;
161      followStates = new List<int>[nStates];
162      actions = new List<Action>[nStates, nStates];
163      actionStrings = new List<string>[nStates, nStates];
164
165      // Expr -> 0 Term { '+' Term } '+' 'exit'
166      AddTransition(StateExpr, StateTermStart, () => {
167        codeGenerator.Reset();
168        codeGenerator.Emit1(OpCodes.LoadConst0);
169        numVarRefs = 0;
170      }, "0");
171      AddTransition(StateTermEnd, StateExprEnd, () => {
172        codeGenerator.Emit1(OpCodes.Add);
173        codeGenerator.Emit1(OpCodes.Exit);
174      }, "+ exit");
175      if (allowMultipleTerms)
176        AddTransition(StateTermEnd, StateTermStart, () => {
177          codeGenerator.Emit1(OpCodes.Add);
178        }, "+");
179
180      // Term -> c Fact { '*' Fact } '*'
181      AddTransition(StateTermStart, StateFactorStart,
182        () => {
183          codeGenerator.Emit1(OpCodes.LoadParamN);
184        },
185        "c");
186      AddTransition(StateFactorEnd, StateTermEnd,
187        () => {
188          codeGenerator.Emit1(OpCodes.Mul);
189        },
190        "*");
191
192      AddTransition(StateFactorEnd, StateFactorStart,
193        () => { codeGenerator.Emit1(OpCodes.Mul); },
194        "*");
195
196
197      // Fact -> VarFact | ExpFact | LogFact | InvFact
198      if (allowProdOfVars)
199        AddTransition(StateFactorStart, StateVariableFactorStart, () => {
200        }, "");
201      if (allowExp)
202        AddTransition(StateFactorStart, StateExpFactorStart, () => {
203        }, "");
204      if (allowLog)
205        AddTransition(StateFactorStart, StateLogFactorStart, () => {
206        }, "");
207      if (allowInv)
208        AddTransition(StateFactorStart, StateInvFactorStart, () => {
209        }, "");
210      AddTransition(StateVariableFactorEnd, StateFactorEnd);
211      AddTransition(StateExpFactorEnd, StateFactorEnd);
212      AddTransition(StateLogFactorEnd, StateFactorEnd);
213      AddTransition(StateInvFactorEnd, StateFactorEnd);
214
215      // VarFact -> var_1 ... var_n
216      // add dynamic states for each variable
217      int curDynVarState = FirstDynamicState;
218      for (int i = 0; i < nVars; i++) {
219        short varIdx = (short)i;
220        var varState = curDynVarState;
221        stateNames.Add("var_1");
222        AddTransition(StateVariableFactorStart, curDynVarState,
223          () => {
224            codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
225            numVarRefs++;
226          },
227          "var_" + varIdx + "");
228        AddTransition(curDynVarState, StateVariableFactorEnd);
229        curDynVarState++;
230      }
231
232      // ExpFact -> 1 ExpF { '*' ExpF } '*' c '*' 'exp'
233      AddTransition(StateExpFactorStart, StateExpFStart,
234        () => {
235          codeGenerator.Emit1(OpCodes.LoadConst1);
236        },
237        "1");
238      AddTransition(StateExpFEnd, StateExpFactorEnd,
239        () => {
240          codeGenerator.Emit1(OpCodes.LoadParamN);
241          codeGenerator.Emit1(OpCodes.Mul);
242          codeGenerator.Emit1(OpCodes.Exp);
243        },
244        "c*exp");
245      AddTransition(StateExpFEnd, StateExpFStart,
246        () => { },
247        "");
248
249      // ExpF    -> var_1 ... var_n
250      for (int i = 0; i < nVars; i++) {
251        short varIdx = (short)i;
252        int varState = curDynVarState;
253        stateNames.Add("var_2");
254        AddTransition(StateExpFStart, curDynVarState,
255          () => {
256            codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
257            numVarRefs++;
258          },
259          "var_" + varIdx + "");
260        AddTransition(curDynVarState, StateExpFEnd,
261          () => {
262            codeGenerator.Emit1(OpCodes.Mul);
263          }, "*");
264        curDynVarState++;
265      }
266
267      // must have c at end because of adjustment of c in evaluator
268      // LogFact -> 0 LogT { '+' LogT } '+' c '+' 'log'
269      AddTransition(StateLogFactorStart, StateLogTStart,
270        () => {
271          codeGenerator.Emit1(OpCodes.LoadConst0);
272        },
273        "0");
274      AddTransition(StateLogTEnd, StateLogFactorEnd,
275        () => {
276          codeGenerator.Emit1(OpCodes.Add);
277          codeGenerator.Emit1(OpCodes.LoadParamN);
278          codeGenerator.Emit1(OpCodes.Add);
279          codeGenerator.Emit1(OpCodes.Log);
280        },
281        "+c+log");
282      AddTransition(StateLogTEnd, StateLogTStart,
283        () => { codeGenerator.Emit1(OpCodes.Add); },
284        "+");
285
286      // LogT    -> c LogTF { '*' LogTF } '*'
287      AddTransition(StateLogTStart, StateLogTFStart,
288        () => {
289          codeGenerator.Emit1(OpCodes.LoadParamN);
290        },
291        "c");
292      AddTransition(StateLogTFEnd, StateLogTEnd,
293        () => {
294          codeGenerator.Emit1(OpCodes.Mul);
295        },
296        "*");
297      AddTransition(StateLogTFEnd, StateLogTFStart,
298        () => {
299          codeGenerator.Emit1(OpCodes.Mul);
300        },
301        "*");
302
303      // LogTF   -> var_1 ... var_n
304      for (int i = 0; i < nVars; i++) {
305        short varIdx = (short)i;
306        int varState = curDynVarState;
307        stateNames.Add("var_3");
308        AddTransition(StateLogTFStart, curDynVarState,
309          () => {
310            codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
311            numVarRefs++;
312          },
313          "var_" + varIdx + "");
314        AddTransition(curDynVarState, StateLogTFEnd);
315        curDynVarState++;
316      }
317
318      // InvFact -> 1 InvT { '+' InvT } '+' 'inv'
319      AddTransition(StateInvFactorStart, StateInvTStart,
320        () => {
321          codeGenerator.Emit1(OpCodes.LoadConst1);
322        },
323        "c");
324      AddTransition(StateInvTEnd, StateInvFactorEnd,
325        () => {
326          codeGenerator.Emit1(OpCodes.Add);
327          codeGenerator.Emit1(OpCodes.Inv);
328        },
329        "+inv");
330      AddTransition(StateInvTEnd, StateInvTStart,
331        () => { codeGenerator.Emit1(OpCodes.Add); },
332        "+");
333
334      // InvT    -> c InvTF { '*' InvTF } '*'
335      AddTransition(StateInvTStart, StateInvTFStart,
336        () => {
337          codeGenerator.Emit1(OpCodes.LoadParamN);
338        },
339        "c");
340      AddTransition(StateInvTFEnd, StateInvTEnd,
341        () => {
342          codeGenerator.Emit1(OpCodes.Mul);
343        },
344        "*");
345      AddTransition(StateInvTFEnd, StateInvTFStart,
346        () => {
347          codeGenerator.Emit1(OpCodes.Mul);
348        },
349        "*");
350
351      // InvTF    -> (var_1 ... var_n) c '*'
352      for (int i = 0; i < nVars; i++) {
353        short varIdx = (short)i;
354        int varState = curDynVarState;
355        stateNames.Add("var_4");
356        AddTransition(StateInvTFStart, curDynVarState,
357          () => {
358            codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
359            numVarRefs++;
360          },
361          "var_" + varIdx + "");
362        AddTransition(curDynVarState, StateInvTFEnd);
363        curDynVarState++;
364      }
365
366      followStates[StateExprEnd] = new List<int>(); // no follow states
367
368      // order all follow states (the first follow state leads to the final state)
369      foreach (var list in followStates) {
370        if (list != null)
371          list.Sort();
372      }
373    }
374
375    private void AddTransition(int fromState, int toState) {
376      if (followStates[fromState] == null) followStates[fromState] = new List<int>();
377      followStates[fromState].Add(toState);
378    }
379    private void AddTransition(int fromState, int toState, Action action, string str) {
380      if (followStates[fromState] == null) followStates[fromState] = new List<int>();
381      followStates[fromState].Add(toState);
382
383      if (actions[fromState, toState] == null) {
384        actions[fromState, toState] = new List<Action>();
385        actionStrings[fromState, toState] = new List<string>();
386      }
387
388      actions[fromState, toState].Add(action);
389      actionStrings[fromState, toState].Add(str);
390    }
391
392    public void FollowStates(int state, ref int[] buf, out int nElements) {
393      var fs = followStates[state];
394      int j = 0;
395      for (int i = 0; i < fs.Count; i++) {
396        var s = fs[i];
397        if (IsAllowedFollowState(state, s)) {
398          buf[j++] = s;
399        }
400      }
401      nElements = j;
402    }
403
404    private bool IsAllowedFollowState(int state, int nextState) {
405      // any state is allowed if we have not reached the max number of variable references
406      // otherwise we can only go towards the final state (smaller state numbers)
407      if (numVarRefs < maximumNumberOfVariables) return true;
408      else return state > nextState;
409    }
410
411    public void Goto(int targetState) {
412      Debug.Assert(followStates[CurrentState].Contains(targetState));
413      if (actions[CurrentState, targetState] != null)
414        actions[CurrentState, targetState].ForEach(a => a()); // execute all actions
415      CurrentState = targetState;
416    }
417
418    public bool IsFinalState(int s) {
419      return s == StateExprEnd && numVarRefs <= maximumNumberOfVariables;
420    }
421
422    public bool IsEvalState(int v) {
423      return v == StateFactorEnd ||
424        v == StateLogTFEnd ||
425        v == StateInvTFEnd ||
426        v == StateExpFEnd ||
427        v == StateLogTEnd ||
428        v == StateInvTEnd ||
429        v == StateTermEnd
430        ;
431    }
432
433
434    // Always returns valid code.
435    // If the method is called in an intermediate state the expression is completed by
436    // taking the shortest route to the final state.
437    // After that state of the automaton is restored to the current state.
438    public void GetCode(out byte[] code, out int nParams) {
439      int storedState = CurrentState;
440      int storedPC = codeGenerator.ProgramCounter;
441
442      if (CurrentState != StateExprEnd) {
443        // save state and code,
444        storedState = CurrentState;
445        storedPC = codeGenerator.ProgramCounter;
446
447        // take shortest route to final state (smaller state values are closer to the final state)
448        while (CurrentState != StateExprEnd) {
449          Debug.Assert(followStates[CurrentState][0] == followStates[CurrentState].Min());
450          var nextState = followStates[CurrentState][0];
451          Goto(nextState);
452        }
453      }
454
455      codeGenerator.GetCode(out code, out nParams);
456
457      // restore
458      codeGenerator.ProgramCounter = storedPC;
459      CurrentState = storedState;
460    }
461
462    public void Reset() {
463      CurrentState = StartState;
464      codeGenerator.Reset();
465      numVarRefs = 0;
466    }
467
468    internal string GetActionString(int fromState, int toState) {
469      return actionStrings[fromState, toState] != null ? string.Join(" , ", actionStrings[fromState, toState]) : "";
470    }
471
472#if DEBUG
473    public void PrintAutomaton() {
474      using (var writer = new StreamWriter("automaton.gv")) {
475        writer.WriteLine("digraph {");
476        // writer.WriteLine("rankdir=LR");
477        for (int s = 1; s < stateNames.Count; s++) {
478          for (int i = 0; i < followStates[s].Count; i++) {
479            if (followStates[s][i] <= 0) continue;
480            var followS = followStates[s][i];
481            var label = actionStrings[s, followS] != null ? string.Join(" , ", actionStrings[s, followS]) : "";
482            writer.WriteLine("{0} -> {1} [ label = \"{2}\" ];", stateNames[s], stateNames[followS], label);
483          }
484        }
485        writer.WriteLine("}");
486      }
487    }
488
489#endif
490  }
491}
Note: See TracBrowser for help on using the repository browser.