Free cookie consent management tool by TermsFeed Policy Generator

source: branches/crossvalidation-2434/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/Automaton.cs @ 15373

Last change on this file since 15373 was 13651, checked in by gkronber, 9 years ago

#2581:

  • added unit tests for the number of different expressions
  • fixed problems in Automaton and constraintHandler that lead to duplicate expressions
  • added possibility for MCTS to handle dead-ends in the search tree (when it is not possible to construct a valid new expression)
  • added statistics on function and gradient evaluations
File size: 15.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.IO;
25
26namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
27  // this is the core class for generating expressions.
28  // it represents a finite state automaton, each state transition can be associated with an action (e.g. to produce code).
29  // the automaton determines the possible structures for expressions.
30  //
31  // to understand this code it is worthwile to generate a graphical visualization of the automaton (see PrintAutomaton).
32  // If the code is compiled in debug mode the automaton produces a Graphviz file into the folder of the application
33  // whenever an instance of the automaton is constructed.
34  //
35  // This class relies on two other classes:
36  // - CodeGenerator to produce code for a stack-based evaluator and
37  // - ConstraintHandler to restrict the allowed set of expressions.
38  //
39  // The ConstraintHandler extends the automaton and adds semantic restrictions for expressions produced by the automaton.
40  //
41  //
42  internal class Automaton {
43    public const int StateExpr = 1;
44    public const int StateExprEnd = 2;
45    public const int StateTermStart = 3;
46    public const int StateTermEnd = 4;
47    public const int StateFactorStart = 5;
48    public const int StateFactorEnd = 6;
49    public const int StateVariableFactorStart = 7;
50    public const int StateVariableFactorEnd = 8;
51    public const int StateExpFactorStart = 9;
52    public const int StateExpFactorEnd = 10;
53    public const int StateLogFactorStart = 11;
54    public const int StateLogFactorEnd = 12;
55    public const int StateInvFactorStart = 13;
56    public const int StateInvFactorEnd = 14;
57    public const int StateExpFStart = 15;
58    public const int StateExpFEnd = 16;
59    public const int StateLogTStart = 17;
60    public const int StateLogTEnd = 18;
61    public const int StateLogTFStart = 19;
62    public const int StateLogTFEnd = 20;
63    public const int StateInvTStart = 21;
64    public const int StateInvTEnd = 22;
65    public const int StateInvTFStart = 23;
66    public const int StateInvTFEnd = 24;
67    public const int FirstDynamicState = 25;
68    // more states for individual variables are created dynamically
69
70    private const int StartState = StateExpr;
71    public int CurrentState { get; private set; }
72
73    public readonly List<string> stateNames;
74    private List<int>[] followStates;
75    private List<Action>[,] actions; // not every follow state is possible but this representation should be efficient
76    private List<string>[,] actionStrings; // just for printing
77    private readonly CodeGenerator codeGenerator;
78    private readonly ConstraintHandler constraintHandler;
79
80    public Automaton(double[][] vars, int maxVarsInExpression = 100,
81       bool allowProdOfVars = true,
82       bool allowExp = true,
83       bool allowLog = true,
84       bool allowInv = true,
85       bool allowMultipleTerms = false) {
86      int nVars = vars.Length;
87      stateNames = new List<string>() { string.Empty, "Expr", "ExprEnd", "TermStart", "TermEnd", "FactorStart", "FactorEnd", "VarFactorStart", "VarFactorEnd", "ExpFactorStart", "ExpFactorEnd", "LogFactorStart", "LogFactorEnd", "InvFactorStart", "InvFactorEnd", "ExpFStart", "ExpFEnd", "LogTStart", "LogTEnd", "LogTFStart", "LogTFEnd", "InvTStart", "InvTEnd", "InvTFStart", "InvTFEnd" };
88      codeGenerator = new CodeGenerator();
89      constraintHandler = new ConstraintHandler(maxVarsInExpression);
90      BuildAutomaton(nVars, allowProdOfVars, allowExp, allowLog, allowInv, allowMultipleTerms);
91
92      Reset();
93#if DEBUG
94      PrintAutomaton();
95#endif
96    }
97
98    // reverse notation ops
99    // Expr -> c 0 Term { '+' Term } '+' '*' c '+' 'exit'
100    // Term -> c Fact { '*' Fact } '*'
101    // Fact -> VarFact | ExpFact | LogFact | InvFact
102    // VarFact -> var_1 ... var_n
103    // ExpFact -> 1 ExpF { '*' ExpF } '*' c '*' 'exp' // c must be at end to allow scaling in evaluator
104    // ExpF    -> var_1 ... var_n
105    // LogFact -> 0 LogT { '+' LogT } '+' c '+' 'log' // c must be at end to allow scaling in evaluator
106    // LogT    -> c LogTF { '*' LogTF } '*'
107    // LogTF   -> var_1 ... var_n
108    // InvFact -> 1 InvT { '+' InvT } '+' 'inv'
109    // InvT    -> (var_1 ... var_n) c '*'
110    private void BuildAutomaton(int nVars,
111      bool allowProdOfVars = true,
112       bool allowExp = true,
113       bool allowLog = true,
114       bool allowInv = true,
115       bool allowMultipleTerms = false) {
116
117      int nStates = FirstDynamicState + 4 * nVars;
118      followStates = new List<int>[nStates];
119      actions = new List<Action>[nStates, nStates];
120      actionStrings = new List<string>[nStates, nStates];
121
122      // Expr -> c 0 Term { '+' Term } '+' '*' c '+' 'exit'
123      AddTransition(StateExpr, StateTermStart, () => {
124        codeGenerator.Reset();
125        codeGenerator.Emit1(OpCodes.LoadParamN);
126        codeGenerator.Emit1(OpCodes.LoadConst0);
127        constraintHandler.Reset();
128      }, "c 0, Reset");
129      AddTransition(StateTermEnd, StateExprEnd, () => {
130        codeGenerator.Emit1(OpCodes.Add);
131        codeGenerator.Emit1(OpCodes.Mul);
132        codeGenerator.Emit1(OpCodes.LoadParamN);
133        codeGenerator.Emit1(OpCodes.Add);
134        codeGenerator.Emit1(OpCodes.Exit);
135      }, "+*c+ exit");
136      if (allowMultipleTerms)
137        AddTransition(StateTermEnd, StateTermStart, () => {
138          codeGenerator.Emit1(OpCodes.Add);
139        }, "+");
140
141      // Term -> c Fact { '*' Fact } '*'
142      AddTransition(StateTermStart, StateFactorStart,
143        () => {
144          codeGenerator.Emit1(OpCodes.LoadParamN);
145          constraintHandler.StartTerm();
146        },
147        "c, StartTerm");
148      AddTransition(StateFactorEnd, StateTermEnd,
149        () => {
150          codeGenerator.Emit1(OpCodes.Mul);
151          constraintHandler.EndTerm();
152        },
153        "*, EndTerm");
154
155      AddTransition(StateFactorEnd, StateFactorStart,
156        () => { codeGenerator.Emit1(OpCodes.Mul); },
157        "*");
158
159
160      // Fact -> VarFact | ExpFact | LogFact | InvFact
161      if (allowProdOfVars)
162        AddTransition(StateFactorStart, StateVariableFactorStart, () => {
163          constraintHandler.StartFactor(StateVariableFactorStart);
164        }, "StartFactor");
165      if (allowExp)
166        AddTransition(StateFactorStart, StateExpFactorStart, () => {
167          constraintHandler.StartFactor(StateExpFactorStart);
168        }, "StartFactor");
169      if (allowLog)
170        AddTransition(StateFactorStart, StateLogFactorStart, () => {
171          constraintHandler.StartFactor(StateLogFactorStart);
172        }, "StartFactor");
173      if (allowInv)
174        AddTransition(StateFactorStart, StateInvFactorStart, () => {
175          constraintHandler.StartFactor(StateInvFactorStart);
176        }, "StartFactor");
177      AddTransition(StateVariableFactorEnd, StateFactorEnd, () => { constraintHandler.EndFactor(); }, "EndFactor");
178      AddTransition(StateExpFactorEnd, StateFactorEnd, () => { constraintHandler.EndFactor(); }, "EndFactor");
179      AddTransition(StateLogFactorEnd, StateFactorEnd, () => { constraintHandler.EndFactor(); }, "EndFactor");
180      AddTransition(StateInvFactorEnd, StateFactorEnd, () => { constraintHandler.EndFactor(); }, "EndFactor");
181
182      // VarFact -> var_1 ... var_n
183      // add dynamic states for each variable
184      int curDynVarState = FirstDynamicState;
185      for (int i = 0; i < nVars; i++) {
186        short varIdx = (short)i;
187        var varState = curDynVarState;
188        stateNames.Add("var_1");
189        AddTransition(StateVariableFactorStart, curDynVarState,
190          () => {
191            codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
192            constraintHandler.AddVarToCurrentFactor(varState);
193          },
194          "var_" + varIdx + ", AddVar");
195        AddTransition(curDynVarState, StateVariableFactorEnd);
196        curDynVarState++;
197      }
198
199      // ExpFact -> 1 ExpF { '*' ExpF } '*' c '*' 'exp'
200      AddTransition(StateExpFactorStart, StateExpFStart,
201        () => {
202          codeGenerator.Emit1(OpCodes.LoadConst1);
203        },
204        "1");
205      AddTransition(StateExpFEnd, StateExpFactorEnd,
206        () => {
207          codeGenerator.Emit1(OpCodes.Mul);
208          codeGenerator.Emit1(OpCodes.LoadParamN);
209          codeGenerator.Emit1(OpCodes.Mul);
210          codeGenerator.Emit1(OpCodes.Exp);
211        },
212        "*c*exp");
213      AddTransition(StateExpFEnd, StateExpFStart,
214        () => { codeGenerator.Emit1(OpCodes.Mul); },
215        "*");
216
217      // ExpF    -> var_1 ... var_n
218      for (int i = 0; i < nVars; i++) {
219        short varIdx = (short)i;
220        int varState = curDynVarState;
221        stateNames.Add("var_2");
222        AddTransition(StateExpFStart, curDynVarState,
223          () => {
224            codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
225            constraintHandler.AddVarToCurrentFactor(varState);
226          },
227          "var_" + varIdx + ", AddVar");
228        AddTransition(curDynVarState, StateExpFEnd);
229        curDynVarState++;
230      }
231
232      // must have c at end because of adjustment of c in evaluator
233      // LogFact -> 0 LogT { '+' LogT } '+' c '+' 'log'
234      AddTransition(StateLogFactorStart, StateLogTStart,
235        () => {
236          codeGenerator.Emit1(OpCodes.LoadConst0);
237          constraintHandler.StartNewTermInPoly();
238        },
239        "0, StartTermInPoly");
240      AddTransition(StateLogTEnd, StateLogFactorEnd,
241        () => {
242          codeGenerator.Emit1(OpCodes.Add);
243          codeGenerator.Emit1(OpCodes.LoadParamN);
244          codeGenerator.Emit1(OpCodes.Add);
245          codeGenerator.Emit1(OpCodes.Log);
246        },
247        "+c+log");
248      AddTransition(StateLogTEnd, StateLogTStart,
249        () => { codeGenerator.Emit1(OpCodes.Add); },
250        "+");
251
252      // LogT    -> c LogTF { '*' LogTF } '*'
253      AddTransition(StateLogTStart, StateLogTFStart,
254        () => {
255          codeGenerator.Emit1(OpCodes.LoadParamN);
256        },
257        "c");
258      AddTransition(StateLogTFEnd, StateLogTEnd,
259        () => {
260          codeGenerator.Emit1(OpCodes.Mul);
261        },
262        "*");
263      AddTransition(StateLogTFEnd, StateLogTFStart,
264        () => {
265          codeGenerator.Emit1(OpCodes.Mul);
266        },
267        "*");
268
269      // LogTF   -> var_1 ... var_n
270      for (int i = 0; i < nVars; i++) {
271        short varIdx = (short)i;
272        int varState = curDynVarState;
273        stateNames.Add("var_3");
274        AddTransition(StateLogTFStart, curDynVarState,
275          () => {
276            codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
277            constraintHandler.AddVarToCurrentFactor(varState);
278          },
279          "var_" + varIdx + ", AddVar");
280        AddTransition(curDynVarState, StateLogTFEnd);
281        curDynVarState++;
282      }
283
284      // InvFact -> 1 InvT { '+' InvT } '+' 'inv'
285      AddTransition(StateInvFactorStart, StateInvTStart,
286        () => {
287          codeGenerator.Emit1(OpCodes.LoadConst1);
288          constraintHandler.StartNewTermInPoly();
289        },
290        "c, StartTermInPoly");
291      AddTransition(StateInvTEnd, StateInvFactorEnd,
292        () => {
293          codeGenerator.Emit1(OpCodes.Add);
294          codeGenerator.Emit1(OpCodes.Inv);
295        },
296        "+inv");
297      AddTransition(StateInvTEnd, StateInvTStart,
298        () => { codeGenerator.Emit1(OpCodes.Add); },
299        "+");
300
301      // InvT    -> c InvTF { '*' InvTF } '*'
302      AddTransition(StateInvTStart, StateInvTFStart,
303        () => {
304          codeGenerator.Emit1(OpCodes.LoadParamN);
305        },
306        "c");
307      AddTransition(StateInvTFEnd, StateInvTEnd,
308        () => {
309          codeGenerator.Emit1(OpCodes.Mul);
310        },
311        "*");
312      AddTransition(StateInvTFEnd, StateInvTFStart,
313        () => {
314          codeGenerator.Emit1(OpCodes.Mul);
315        },
316        "*");
317
318      // InvTF    -> (var_1 ... var_n) c '*'
319      for (int i = 0; i < nVars; i++) {
320        short varIdx = (short)i;
321        int varState = curDynVarState;
322        stateNames.Add("var_4");
323        AddTransition(StateInvTFStart, curDynVarState,
324          () => {
325            codeGenerator.Emit2(OpCodes.LoadVar, varIdx);
326            constraintHandler.AddVarToCurrentFactor(varState);
327          },
328          "var_" + varIdx + ", AddVar");
329        AddTransition(curDynVarState, StateInvTFEnd);
330        curDynVarState++;
331      }
332
333      followStates[StateExprEnd] = new List<int>(); // no follow states
334    }
335
336    private void AddTransition(int fromState, int toState) {
337      if (followStates[fromState] == null) followStates[fromState] = new List<int>();
338      followStates[fromState].Add(toState);
339    }
340    private void AddTransition(int fromState, int toState, Action action, string str) {
341      if (followStates[fromState] == null) followStates[fromState] = new List<int>();
342      followStates[fromState].Add(toState);
343
344      if (actions[fromState, toState] == null) {
345        actions[fromState, toState] = new List<Action>();
346        actionStrings[fromState, toState] = new List<string>();
347      }
348
349      actions[fromState, toState].Add(action);
350      actionStrings[fromState, toState].Add(str);
351    }
352
353    private readonly int[] followStatesBuf = new int[1000];
354    public void FollowStates(int state, out int[] buf, out int nElements) {
355      // for loop instead of where iterator
356      var fs = followStates[state];
357      int j = 0;
358      for (int i = 0; i < fs.Count; i++) {
359        var s = fs[i];
360        if (constraintHandler.IsAllowedFollowState(state, s)) {
361          followStatesBuf[j++] = s;
362        }
363      }
364      buf = followStatesBuf;
365      nElements = j;
366    }
367
368
369    public void Goto(int targetState) {
370      if (actions[CurrentState, targetState] != null)
371        actions[CurrentState, targetState].ForEach(a => a()); // execute all actions
372      CurrentState = targetState;
373    }
374
375    public bool IsFinalState(int s) {
376      return s == StateExprEnd && !constraintHandler.IsInvalidExpression;
377    }
378
379    public void GetCode(out byte[] code, out int nParams) {
380      codeGenerator.GetCode(out code, out nParams);
381    }
382
383    public void Reset() {
384      CurrentState = StartState;
385      codeGenerator.Reset();
386      constraintHandler.Reset();
387    }
388
389#if DEBUG
390    public void PrintAutomaton() {
391      using (var writer = new StreamWriter("automaton.gv")) {
392        writer.WriteLine("digraph {");
393        // writer.WriteLine("rankdir=LR");
394        int[] fs;
395        int nFs;
396        for (int s = StartState; s < stateNames.Count; s++) {
397          for (int i = 0; i < followStates[s].Count; i++) {
398            if (followStates[s][i] <= 0) continue;
399            var followS = followStates[s][i];
400            var label = actionStrings[s, followS] != null ? string.Join(" , ", actionStrings[s, followS]) : "";
401            writer.WriteLine("{0} -> {1} [ label = \"{2}\" ];", stateNames[s], stateNames[followS], label);
402          }
403        }
404        writer.WriteLine("}");
405      }
406    }
407#endif
408  }
409}
Note: See TracBrowser for help on using the repository browser.