Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2434_crossvalidation/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/ConstraintHandler.cs @ 15647

Last change on this file since 15647 was 13651, checked in by gkronber, 9 years ago

#2581:

  • added unit tests for the number of different expressions
  • fixed problems in Automaton and constraintHandler that lead to duplicate expressions
  • added possibility for MCTS to handle dead-ends in the search tree (when it is not possible to construct a valid new expression)
  • added statistics on function and gradient evaluations
File size: 19.7 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics.Contracts;
25using System.Linq;
26
27namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
28
29  // This class restricts the set of allowed transitions of the automaton to prevent exploration of duplicate expressions.
30  // It would be possible to implement this class in such a way that the search never visits a duplicate expression. However,
31  // it seems very intricate to detect this robustly and in all cases while generating an expression because
32  // some for of lookahead is necessary.
33  // Instead the constraint handler only catches the obvious duplicates directly, but does not guarantee that the search always produces a valid expression.
34  // The ratio of the number of unsuccessful searches, that need backtracking should be tracked in the MCTS alg (MctsSymbolicRegressionStatic)
35
36  // All changes to this class should be tested through unit tests. It is important that the ConstraintHandler is not too restrictive.
37
38  // the constraints are derived from a canonical form for expressions.
39  // overall we can enforce a limited number of variable references
40  //
41  // an expression is a sum of terms t_1 ... t_n where terms are ordered according to a relation t_i (<=)_term t_j for each pair t_i, t_j and i <= j
42  // a term is a product of factors where factors are ordered according to relation f_i (<=)_factor f_j for each pair f_i,f_j and i <= j
43
44  // we want to enforce lower-order terms before higher-order terms in expressions (based on number of variable references)
45  // factors can have different types (variable, exp, log, inverse)
46
47  // (<=)_term  [IsSmallerOrEqualTerm(t_i, t_j)]
48  //   1.  NumberOfVarRefs(t_i) < NumberOfVarRefs(t_j)  --> true           enforce terms with non-decreasing number of var refs
49  //   2.  NumberOfVarRefs(t_i) > NumberOfVarRefs(t_j)  --> false
50  //   3.  NumFactors(t_i) > NumFactors(t_j)            --> true           enforce terms with non-increasing number of factors
51  //   4.  NumFactors(t_i) < NumFactors(t_j)            --> false
52  //   5.  for all k factors: Factor(k, t_i) (<=)_factor  Factor(k, t_j) --> true // factors must be non-decreasing
53  //   6.  all factors are (=)_factor                   --> true
54  //   7.  else false
55
56  // (<=)_factor  [IsSmallerOrEqualFactor(f_i, f_j)]
57  //   1.  FactorType(t_i) < FactorType(t_j)  --> true           enforce terms with non-decreasing factor type (var < exp < log < inv)
58  //   2.  FactorType(t_i) > FactorType(t_j)  --> false
59  //   3.  Compare the two factors specifically
60  //     - variables: varIdx_i <= varIdx_j (only one var reference)
61  //     - exp: number of variable references and then varIdx_i <= varIdx_j for each position
62  //     - log: number of variable references and ...
63  //     - inv: number of variable references and ...
64  //
65
66  // for log and inverse factors we allow all polynomials as argument
67  // a polynomial is a sum of terms t_1 ... t_n where terms are ordered according to a relation t_i (<=)_poly t_j for each pair t_i, t_j and i <= j
68
69  // (<=)_poly  [IsSmallerOrEqualPoly(t_i, t_j)]
70  //  1. NumberOfVarRefs(t_i) < NumberOfVarRefs(t_j)         --> true // enforce non-decreasing number of var refs
71  //  2. NumberOfVarRefs(t_i) > NumberOfVarRefs(t_j)         --> false // enforce non-decreasing number of var refs
72  //  3. for all k variables: VarIdx(k,t_i) > VarIdx(k, t_j) --> false // enforce non-decreasing variable idx
73
74
75  // we store the following to make comparsions:
76  // - prevTerm (complete & containing all factors)
77  // - curTerm  (incomplete & containing all completed factors)
78  // - curFactor (incomplete)
79  internal class ConstraintHandler {
80    private int nVars;
81    private readonly int maxVariables;
82    private bool invalidExpression;
83
84    public bool IsInvalidExpression {
85      get { return invalidExpression; }
86    }
87
88
89    private TermInformation prevTerm;
90    private TermInformation curTerm;
91    private FactorInformation curFactor;
92
93
94    private class TermInformation {
95      public int numVarReferences { get { return factors.Sum(f => f.numVarReferences); } }
96      public List<FactorInformation> factors = new List<FactorInformation>();
97    }
98
99    private class FactorInformation {
100      public int numVarReferences = 0;
101      public int factorType; // use the state number to represent types
102
103      // for variable factors
104      public int variableState = -1;
105
106      // for exp  factors
107      public List<int> expVariableStates = new List<int>();
108
109      // for log and inv factors
110      public List<List<int>> polyVariableStates = new List<List<int>>();
111    }
112
113
114    public ConstraintHandler(int maxVars) {
115      this.maxVariables = maxVars;
116    }
117
118    // the order relations for terms and factors
119
120    private static int CompareTerms(TermInformation a, TermInformation b) {
121      if (a.numVarReferences < b.numVarReferences) return -1;
122      if (a.numVarReferences > b.numVarReferences) return 1;
123
124      if (a.factors.Count > b.factors.Count) return -1;  // terms with more factors should be ordered first
125      if (a.factors.Count < b.factors.Count) return +1;
126
127      var aFactors = a.factors.GetEnumerator();
128      var bFactors = b.factors.GetEnumerator();
129      while (aFactors.MoveNext() & bFactors.MoveNext()) {
130        var c = CompareFactors(aFactors.Current, bFactors.Current);
131        if (c < 0) return -1;
132        if (c > 0) return 1;
133      }
134      // all factors are the same => terms are the same
135      return 0;
136    }
137
138    private static int CompareFactors(FactorInformation a, FactorInformation b) {
139      if (a.factorType < b.factorType) return -1;
140      if (a.factorType > b.factorType) return +1;
141      // same factor types
142      if (a.factorType == Automaton.StateVariableFactorStart) {
143        return a.variableState.CompareTo(b.variableState);
144      } else if (a.factorType == Automaton.StateExpFactorStart) {
145        return CompareStateLists(a.expVariableStates, b.expVariableStates);
146      } else {
147        if (a.numVarReferences < b.numVarReferences) return -1;
148        if (a.numVarReferences > b.numVarReferences) return +1;
149        if (a.polyVariableStates.Count > b.polyVariableStates.Count) return -1; // more terms in the poly should be ordered first
150        if (a.polyVariableStates.Count < b.polyVariableStates.Count) return +1;
151        // log and inv
152        var aTerms = a.polyVariableStates.GetEnumerator();
153        var bTerms = b.polyVariableStates.GetEnumerator();
154        while (aTerms.MoveNext() & bTerms.MoveNext()) {
155          var c = CompareStateLists(aTerms.Current, bTerms.Current);
156          if (c != 0) return c;
157        }
158        return 0; // all terms in the polynomial are the same
159      }
160    }
161
162    private static int CompareStateLists(List<int> a, List<int> b) {
163      if (a.Count < b.Count) return -1;
164      if (a.Count > b.Count) return +1;
165      for (int i = 0; i < a.Count; i++) {
166        if (a[i] < b[i]) return -1;
167        if (a[i] > b[i]) return +1;
168      }
169      return 0; // all states are the same
170    }
171
172
173    private bool IsNewTermAllowed() {
174      // next term must have at least as many variable references as the previous term
175      return prevTerm == null || nVars + prevTerm.numVarReferences <= maxVariables;
176    }
177
178    private bool IsNewFactorAllowed() {
179      // next factor must have a larger or equal type compared to the previous factor.
180      // if the types are the same it must have at least as many variable references.
181      // so if the prevFactor is any other than invFactor (last possible type) then we only need to be able to add one variable
182      // otherwise we need to be able to add at least as many variables as the previous factor
183      return !curTerm.factors.Any() ||
184             (nVars + curTerm.factors.Last().numVarReferences <= maxVariables);
185    }
186
187    private bool IsAllowedAsNextFactorType(int followState) {
188      // IsNewTermAllowed already ensures that we can add a term with enough variable references
189
190      // enforce constraints within terms (compare to prev factor)
191      if (curTerm.factors.Any()) {
192        // enforce non-decreasing factor types
193        if (curTerm.factors.Last().factorType > followState) return false;
194        // when the factor type is the same, starting a new factor is only allowed if we can add at least the number of variables of the prev factor
195        if (curTerm.factors.Last().factorType == followState && nVars + curTerm.factors.Last().numVarReferences > maxVariables) return false;
196      }
197
198      // enforce constraints on terms (compare to prev term)
199      // meaning that we must ensure non-decreasing terms
200      if (prevTerm != null) {
201        // a factor type is only allowed if we can then produce a term that is larger or equal to the prev term
202        // (1) if we the number of variable references still remaining is larger than the number of variable references in the prev term
203        //     then it is always possible to build a larger term
204        // (2) otherwise we try to build the largest possible term starting from current factors in the term.
205        //     
206
207        var numVarRefsRemaining = maxVariables - nVars;
208        Contract.Assert(!curTerm.factors.Any() || curTerm.factors.Last().numVarReferences <= numVarRefsRemaining);
209
210        if (prevTerm.numVarReferences < numVarRefsRemaining) return true;
211
212        // variable factors must be handled differently because they can only contain one variable reference
213        if (followState == Automaton.StateVariableFactorStart) {
214          // append the variable factor and the maximum possible state from the previous factor to create a larger factor
215          var varF = CreateLargestPossibleFactor(Automaton.StateVariableFactorStart, 1);
216          var maxF = CreateLargestPossibleFactor(prevTerm.factors.Max(f => f.factorType), numVarRefsRemaining - 1);
217          var origFactorCount = curTerm.factors.Count;
218          // add this factor to the current term
219          curTerm.factors.Add(varF);
220          curTerm.factors.Add(maxF);
221          var c = CompareTerms(prevTerm, curTerm);
222          // restore term
223          curTerm.factors.RemoveRange(origFactorCount, 2);
224          // if the prev term is still larger then this followstate is not allowed
225          if (c > 0) {
226            return false;
227          }
228        } else {
229          var newF = CreateLargestPossibleFactor(followState, numVarRefsRemaining);
230
231          var origFactorCount = curTerm.factors.Count;
232          // add this factor to the current term
233          curTerm.factors.Add(newF);
234          var c = CompareTerms(prevTerm, curTerm);
235          // restore term
236          curTerm.factors.RemoveAt(origFactorCount);
237          // if the prev term is still larger then this followstate is not allowed
238          if (c > 0) {
239            return false;
240          }
241        }
242      }
243      return true;
244    }
245
246    // largest possible factor of the given kind
247    private FactorInformation CreateLargestPossibleFactor(int factorType, int numVarRefs) {
248      var newF = new FactorInformation();
249      newF.factorType = factorType;
250      if (factorType == Automaton.StateVariableFactorStart) {
251        newF.variableState = int.MaxValue;
252        newF.numVarReferences = 1;
253      } else if (factorType == Automaton.StateExpFactorStart) {
254        for (int i = 0; i < numVarRefs; i++)
255          newF.expVariableStates.Add(int.MaxValue);
256        newF.numVarReferences = numVarRefs;
257      } else if (factorType == Automaton.StateInvFactorStart || factorType == Automaton.StateLogFactorStart) {
258        for (int i = 0; i < numVarRefs; i++) {
259          newF.polyVariableStates.Add(new List<int>());
260          newF.polyVariableStates[i].Add(int.MaxValue);
261        }
262        newF.numVarReferences = numVarRefs;
263      }
264      return newF;
265    }
266
267    private bool IsAllowedAsNextVariableFactor(int variableState) {
268      Contract.Assert(variableState >= Automaton.FirstDynamicState);
269      return !curTerm.factors.Any() || curTerm.factors.Last().variableState <= variableState;
270    }
271
272    private bool IsAllowedAsNextInExp(int variableState) {
273      Contract.Assert(variableState >= Automaton.FirstDynamicState);
274      if (curFactor.expVariableStates.Any() && curFactor.expVariableStates.Last() > variableState) return false;
275      if (curTerm.factors.Any()) {
276        // try and compare with prev factor     
277        curFactor.numVarReferences++;
278        curFactor.expVariableStates.Add(variableState);
279        var c = CompareFactors(curTerm.factors.Last(), curFactor);
280        curFactor.numVarReferences--;
281        curFactor.expVariableStates.RemoveAt(curFactor.expVariableStates.Count - 1);
282        return c <= 0;
283      }
284      return true;
285    }
286
287    private bool IsNewTermAllowedInPoly() {
288      return nVars + curFactor.polyVariableStates.Last().Count() <= maxVariables;
289    }
290
291    private bool IsAllowedAsNextInPoly(int variableState) {
292      Contract.Assert(variableState >= Automaton.FirstDynamicState);
293      return !curFactor.polyVariableStates.Any() ||
294             !curFactor.polyVariableStates.Last().Any() ||
295              curFactor.polyVariableStates.Last().Last() <= variableState;
296    }
297    private bool IsTermCompleteInPoly() {
298      var nTerms = curFactor.polyVariableStates.Count;
299      return nTerms == 1 ||
300             curFactor.polyVariableStates[nTerms - 2].Count <= curFactor.polyVariableStates[nTerms - 1].Count;
301
302    }
303    private bool IsCompleteExp() {
304      return !curTerm.factors.Any() || CompareFactors(curTerm.factors.Last(), curFactor) <= 0;
305    }
306
307    public bool IsAllowedFollowState(int currentState, int followState) {
308      // an invalid action was taken earlier on => nothing can be done anymore
309      if (invalidExpression) return false;
310      // states that have no alternative are always allowed
311      // some ending states are only allowed if enough variables have been used in the term
312      if (
313        currentState == Automaton.StateTermStart ||           // no alternative
314        currentState == Automaton.StateExpFactorStart ||
315        currentState == Automaton.StateLogFactorStart ||
316        currentState == Automaton.StateInvFactorStart ||
317        followState == Automaton.StateVariableFactorEnd ||    // no alternative
318        followState == Automaton.StateExpFEnd ||              // no alternative
319        followState == Automaton.StateLogTFEnd ||             // no alternative
320        followState == Automaton.StateInvTFEnd ||             // no alternative
321        followState == Automaton.StateFactorEnd ||            // always allowed because no alternative
322        followState == Automaton.StateExprEnd                 // we could also constrain the minimum number of terms here
323      ) return true;
324
325
326      // starting a new term is only allowed if we can add a term with at least the number of variables of the prev term
327      if (followState == Automaton.StateTermStart && !IsNewTermAllowed()) return false;
328      if (followState == Automaton.StateFactorStart && !IsNewFactorAllowed()) return false;
329      if (currentState == Automaton.StateFactorStart && !IsAllowedAsNextFactorType(followState)) return false;
330      if (followState == Automaton.StateTermEnd && prevTerm != null && CompareTerms(prevTerm, curTerm) > 0) return false;
331
332      // all of these states add at least one variable
333      if (
334          followState == Automaton.StateVariableFactorStart ||
335          followState == Automaton.StateExpFactorStart || followState == Automaton.StateExpFStart ||
336          followState == Automaton.StateLogFactorStart || followState == Automaton.StateLogTStart ||
337          followState == Automaton.StateLogTFStart ||
338          followState == Automaton.StateInvFactorStart || followState == Automaton.StateInvTStart ||
339          followState == Automaton.StateInvTFStart) {
340        if (nVars + 1 > maxVariables) return false;
341      }
342
343      if (currentState == Automaton.StateVariableFactorStart && !IsAllowedAsNextVariableFactor(followState)) return false;
344      else if (currentState == Automaton.StateExpFStart && !IsAllowedAsNextInExp(followState)) return false;
345      else if (followState == Automaton.StateLogTStart && !IsNewTermAllowedInPoly()) return false;
346      else if (currentState == Automaton.StateLogTFStart && !IsAllowedAsNextInPoly(followState)) return false;
347      else if (followState == Automaton.StateInvTStart && !IsNewTermAllowedInPoly()) return false;
348      else if (currentState == Automaton.StateInvTFStart && !IsAllowedAsNextInPoly(followState)) return false;
349      // finishing an exponential factor is only allowed when the number of variable references is large enough
350      else if (followState == Automaton.StateExpFactorEnd && !IsCompleteExp()) return false;
351      // finishing a polynomial (in log or inv) is only allowed when the number of variable references is large enough
352      else if (followState == Automaton.StateInvTEnd && !IsTermCompleteInPoly()) return false;
353      else if (followState == Automaton.StateLogTEnd && !IsTermCompleteInPoly()) return false;
354
355      else if (nVars > maxVariables) return false;
356      else return true;
357    }
358
359
360    public void Reset() {
361      nVars = 0;
362      prevTerm = null;
363      curTerm = null;
364      curFactor = null;
365      invalidExpression = false;
366    }
367
368    public void StartTerm() {
369      curTerm = new TermInformation();
370    }
371
372    public void StartFactor(int state) {
373      curFactor = new FactorInformation();
374      curFactor.factorType = state;
375    }
376
377
378    public void AddVarToCurrentFactor(int state) {
379      Contract.Assert(Automaton.FirstDynamicState <= state);
380      Contract.Assert(curTerm != null);
381      Contract.Assert(curFactor != null);
382
383      nVars++;
384      curFactor.numVarReferences++;
385
386      if (curFactor.factorType == Automaton.StateVariableFactorStart) {
387        Contract.Assert(curFactor.variableState < 0); // not set before
388        curFactor.variableState = state;
389      } else if (curFactor.factorType == Automaton.StateExpFactorStart) {
390        curFactor.expVariableStates.Add(state);
391      } else if (curFactor.factorType == Automaton.StateLogFactorStart ||
392                 curFactor.factorType == Automaton.StateInvFactorStart) {
393        curFactor.polyVariableStates.Last().Add(state);
394      } else throw new InvalidProgramException();
395    }
396
397    public void StartNewTermInPoly() {
398      curFactor.polyVariableStates.Add(new List<int>());
399    }
400
401    public void EndFactor() {
402      // enforce non-decreasing factors
403      if (curTerm.factors.Any() && CompareFactors(curTerm.factors.Last(), curFactor) > 0)
404        invalidExpression = true;
405      curTerm.factors.Add(curFactor);
406      curFactor = null;
407    }
408
409    public void EndTerm() {
410      // enforce non-decreasing terms (TODO: equal terms should not be allowed)
411      if (prevTerm != null && CompareTerms(prevTerm, curTerm) > 0)
412        invalidExpression = true;
413      prevTerm = curTerm;
414      curTerm = null;
415    }
416  }
417}
Note: See TracBrowser for help on using the repository browser.