Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/ExpressionExtender.cs @ 13771

Last change on this file since 13771 was 12024, checked in by gkronber, 10 years ago

#2283: changed handling of inverse expressions in transformation of expressions to canonical form

File size: 13.7 KB
RevLine 
[11902]1using System;
[11966]2using System.CodeDom;
[11902]3using System.Collections.Generic;
4using System.Diagnostics;
5using System.Linq;
[11966]6using System.Security.Cryptography.X509Certificates;
[11902]7using System.Text;
8using System.Threading.Tasks;
9using HeuristicLab.Common;
10
11namespace HeuristicLab.Algorithms.Bandits {
12  // helper to create canonical forms of expressions
13  // TODO: change symbolicregressionpoly10problem to use this class
14  // this does not support expressions with constants (in transformations we assume constant opt is used)
15  // (e.g. we transform all negative signs to + because it is assumed a negative constant can be produced for the term)
16  public class ExpressionExtender {
17
18    // supports the grammar
19    // G(E):
[12024]20    //     E -> V | V+E | V-E | V*E | V%E | (E)
[11902]21    //     V -> <variables>
22    //     ";
23
[12024]24    // might produce expressions of the form /x (= 1/x)
[12014]25    // the pipe symbol | is used for the constant one (in comparison to other constants)
[11966]26    private string sentence;
27    private int syIdx;
[11902]28
29    public string CanonicalRepresentation(string phrase) {
[11966]30      InitLex(phrase);
31      var e = CanonicalExpr();
[12024]32      return e.ToString();
[11902]33    }
34
[11966]35
36    private void InitLex(string sentence) {
37      this.sentence = sentence;
38      this.syIdx = 0;
39    }
40
41    private char CurSy() {
42      if (syIdx >= sentence.Length) return '\0';
43      return sentence[syIdx];
44    }
45    private void NewSy() {
46      if (syIdx < sentence.Length) syIdx++;
47    }
48
[11972]49    // an expression is a sorted set of terms
[11966]50    // a term is an (ordered) list of factors
[11972]51    // a factor is either a single symbol or an inverted expression
[11966]52    // CanonicalExpression reads multiple terms (expressions) and must merge the terms in the expression (ordering and duplicates)
53    // CanonicalTerm reads multiple factors and must expand factors whenever it reads a combined factor (expression) it produces an expression
54    // CanonicalFactor produces an expression
55
56
57    // canonical expression returns either a single term or a set of terms
58    private Expr CanonicalExpr() {
59      var terms = new List<Expr>();
60      terms.Add(CanonicalTerm());
61      var curSy = CurSy();
[11902]62      while (curSy == '+' || curSy == '-' || curSy == '^') {
63        if (curSy == '+') {
[11966]64          NewSy();
65          terms.Add(CanonicalTerm());
[11902]66        } else if (curSy == '-') {
[11966]67          NewSy();
68          terms.Add(CanonicalTerm()); // minus is the same as plus assuming constant opt
[11902]69        } else {
[11966]70          NewSy();
[11902]71          throw new NotImplementedException();
72          // var e = Expr(variables, constants);
73          // r = Not(r) * e + r * Not(e); // xor = (!x AND y) OR (x AND !y)
74        }
[11966]75        curSy = CurSy();
[11902]76      }
77
[11966]78      return new Expr(terms.SelectMany(t => t.Terms));
[11902]79    }
80
[11966]81    // canonical term returns either a single term (product of factors) or a set of terms
82    private Expr CanonicalTerm() {
[11972]83      var factors = new List<Factor>();
[11966]84      var f = CanonicalFact();
[11902]85      if (f != null) factors.Add(f);
[11966]86      var curSy = CurSy();
[11902]87      while (curSy == '*' || curSy == '%') {
88        if (curSy == '*') {
[11966]89          NewSy();
90          f = CanonicalFact();
[11902]91          if (f != null) factors.Add(f);
92        } else {
[11966]93          NewSy();
94          f = CanonicalFact();
[12014]95          // if there is only one term we can add multiple inverted simple factors instead of the whole inverted expression
96          if (!f.IsSimpleFactor && f.Expr.Terms.Count == 1) {
97            foreach (var invF in f.Expr.Terms.First().Factors) {
[12024]98              if (invF.ToString() == "1") continue;
[12014]99              invF.Invert();
100              factors.Add(invF);
101            }
102          } else {
103            f.Invert();
104            if (f != null) factors.Add(f);
105          }
[11902]106        }
[11966]107        curSy = CurSy();
[11902]108      }
[12014]109
110      factors = CancelFactors(factors).ToList();
[11972]111      return ExpandFactors(factors);
112    }
[11902]113
[11966]114    // canonical fact returns a factor (either a singe variable, or a set of terms)
[11972]115    private Factor CanonicalFact() {
[11966]116      var curSy = CurSy();
[11902]117      if (curSy == '!') {
118        throw new NotSupportedException();
119      } else if (curSy == '(') {
[11966]120        NewSy();
[11972]121        Expr r = CanonicalExpr(); // this is already simplified
[11966]122        if (CurSy() != ')') throw new ArgumentException();
123        NewSy();
[11972]124        return new Factor(r);
[11902]125      } else if (curSy >= 'a' && curSy <= 'z') {
[11966]126        NewSy();
[11972]127        return new Factor(curSy);
[11902]128        //       } else if (curSy >= '0' && curSy <= '9') {
[11966]129      } else if (curSy >= 'A' && curSy <= 'Z') {
130        // treat nonterminals in the same way as variables
131        NewSy();
[11972]132        return new Factor(curSy);
[11966]133
[11902]134      } else throw new ArgumentException("found symbol " + curSy);
135    }
136
[11972]137    // a list of factors (symbols, or expressions, and possibly inverses are read
[12014]138    // a list to factors symbols or expressions and possibly inverses are produced
[11972]139    // all non-inverse expression factors are expanded
140    private Expr ExpandFactors(IEnumerable<Factor> factors) {
141      // if (invFactors.Count > 0) throw new NotImplementedException();
142      Debug.Assert(!factors.First().IsInverse); // the first factor is never an inverted factor
143
144      // each factor could be a list of terms (expression)
145
146      Expr currentFact = null;
147      var firstFactor = factors.First();
[12014]148      if (firstFactor.IsSimpleFactor || firstFactor.IsInverse) currentFact = new Expr(new Term(firstFactor));
[11972]149      else currentFact = firstFactor.Expr;
150
151      foreach (var fact in factors.Skip(1)) {
152        Expr curExpr = null;
153        if (fact.IsSimpleFactor || fact.IsInverse) curExpr = new Expr(new Term(fact));
154        else curExpr = fact.Expr;
155        currentFact = AllProducts(currentFact, curExpr);
156      }
157      return currentFact;
158    }
159
160    private Expr AllProducts(Expr a, Expr b) {
161      var aTerms = a.Terms.ToArray();
162      var bTerms = b.Terms.ToArray();
163      var combs = from aT in aTerms
164                  from bT in bTerms
165                  let factors = CancelFactors(aT.Factors.Concat(bT.Factors))
166                  select new Term(factors);
167      return new Expr(combs);
168    }
169
170    private IEnumerable<Factor> CancelFactors(IEnumerable<Factor> factors) {
171      var factorsArr = factors.ToArray();
172      var results = new List<Factor>(factors);
173      foreach (var f in factorsArr) {
[12024]174        if (f.ToString() == "1") results.Remove(f);
175        if (f.ToString() == "1/(1)") results.Remove(f);
[11972]176        if (f.IsInverse) {
177          // find matching
178          Factor match;
[12014]179          match = factorsArr.FirstOrDefault(other => !other.IsInverse && f.Cancels(other));
[11972]180          if (match != null) {
[12014]181            results.Remove(f);
182            var idx = results.IndexOf(match);
183
[11972]184            results.Remove(match);
[12014]185            if (!results.Any())
[12024]186              results.Insert(idx, new Factor('1')); // when the factor is the last one then insert a one
[12014]187
188            // also mark as cancelled in the factorsArr
189            idx = Array.IndexOf(factorsArr, match);
[12024]190            factorsArr[idx] = new Factor('1');
[11972]191          }
192        }
193      }
[12024]194      if (results.Count == 0) results.Add(new Factor('1'));
[11972]195      return results;
196    }
197
[11966]198    #region term
199    // term can be merged (essentially an ordered list of factors)
200    internal class Term : IComparable<Term> {
[11972]201      private readonly SortedList<Factor, int> factors; // factor symbol and the number of occurrences
[11902]202
[11972]203      public IEnumerable<Factor> Factors {
[11966]204        get {
[11972]205          return factors.SelectMany(p => Enumerable.Repeat(p.Key, p.Value));
[11966]206        }
[11902]207      }
[11966]208
[11972]209      public Term(Factor f) {
210        factors = new SortedList<Factor, int>();
211        factors.Add(f, 1);
[11902]212      }
[11972]213      public Term(IEnumerable<Factor> factors) {
214        this.factors = new SortedList<Factor, int>();
[11966]215        foreach (var f in factors) {
[11972]216          if (this.factors.ContainsKey(f)) this.factors[f] += 1;
217          else this.factors.Add(f, 1);
[11902]218        }
219      }
220
[11966]221      public int CompareTo(Term other) {
[11972]222        if (ContainsNonTerminal(Factors) && !ContainsNonTerminal(other.Factors)) {
[11966]223          return 1;
[11972]224        } else if (!ContainsNonTerminal(Factors) && ContainsNonTerminal(other.Factors)) {
[11966]225          return -1;
226        } else {
[11972]227          var countComp = Factors.Count().CompareTo(other.Factors.Count());
[11966]228          if (countComp != 0) return countComp;
[12024]229          foreach (var pair in Factors.Zip(other.Factors, Tuple.Create)) {
230            var fComp = pair.Item1.CompareTo(pair.Item2);
231            if (fComp != 0) return fComp;
232          }
233          return 0;
[11966]234        }
[11902]235      }
236
237      public override string ToString() {
[11972]238
239        return string.Join("*", Factors);
[11902]240      }
241      public override bool Equals(object obj) {
242        var other = obj as Term;
243        if (other == null) return false;
[11972]244        if (this.Factors.Count() != other.Factors.Count()) return false;
245        if (this.Factors.Zip(other.Factors, Tuple.Create).Any(t => t.Item1 != t.Item2)) return false;
[11966]246        return true;
[11902]247      }
248      public override int GetHashCode() {
249        var h = 31415;
[11972]250        foreach (var v in Factors) {
[11966]251          h ^= v.GetHashCode();
[11902]252        }
253        return h;
254      }
255    }
256    #endregion
[11966]257
[11972]258    #region factor
259    // factors is either a single symbol or an inverted expression
260    internal class Factor : IComparable<Factor> {
261      public bool IsSimpleFactor { get { return Expr == null; } }
262      public char Symbol { get { return symbol; } }
263      public Expr Expr { get { return expr; } }
264      public bool IsInverse { get { return inv; } }
265      private readonly char symbol = '\0';
266      private readonly Expr expr;
267      private bool inv;
268
269      public Factor(char f) {
270        this.symbol = f;
271      }
272      public Factor(Expr expr) {
273        this.expr = expr;
274      }
275
276      public void Invert() {
277        this.inv = !inv;
278      }
[12014]279      public bool Cancels(Factor other) {
280        if (this.inv == other.inv) return false;
281        if (this.Expr != null && other.Expr == null) return false;
282        if (this.Expr == null && other.Expr != null) return false;
283        if (Expr == null) return this.Symbol.Equals(other.symbol);
284        else return this.Expr.CompareTo(other.Expr) == 0;
285      }
[11972]286
287      public int CompareTo(Factor other) {
288        // 1) single symbol factors first
289        // 2) expression factors by expression compare
290        var crit1 = ContainsNonTerminal(this).CompareTo(ContainsNonTerminal(other));
291        if (crit1 != 0) return crit1;
292
293        var crit2 = this.IsInverse.CompareTo(other.IsInverse);
294        if (crit2 != 0) return crit2;
295
296        var crit3 = this.IsSimpleFactor.CompareTo(other.IsSimpleFactor);
297        if (crit3 != 0) return crit3;
298
299        // both are simple or expressions
300        if (IsSimpleFactor) return this.symbol.CompareTo(other.symbol);
301        else return this.Expr.CompareTo(other.Expr);
302      }
303
304      public override string ToString() {
305        var s = Expr == null ? symbol.ToString() : "(" + expr.ToString() + ")";
306        if (IsInverse) {
[12024]307          return "/" + s;
[11972]308        } else return s;
309      }
310      public override bool Equals(object obj) {
311        var other = obj as Factor;
312        if (other == null) return false;
313        if (IsInverse != other.IsInverse) return false;
314        if (this.symbol != other.symbol) return false;
315        if (this.Expr != other.Expr) return false;
316        return true;
317      }
318      public override int GetHashCode() {
319        var h = 31415;
320        h ^= symbol.GetHashCode();
321        if (Expr != null) h ^= Expr.GetHashCode();
322        return h;
323      }
324    }
325    #endregion
326
[11902]327    #region expr
328
[11966]329    internal class Expr : IComparable<Expr> {
330      public readonly SortedSet<Term> Terms; // only set for Kind == Expr
[11972]331      //public bool Inverse;
[11966]332      public Expr(Term t) {
333        Terms = new SortedSet<Term>();
334        Terms.Add(t);
[11902]335      }
[11966]336      public Expr(IEnumerable<Term> exprTerms) {
337        Terms = new SortedSet<Term>();
338        foreach (var t in exprTerms) {
339          Terms.Add(t);
340        }
[11902]341      }
342
[11966]343      public void Merge(Expr other) {
344        this.Terms.UnionWith(other.Terms);
345      }
346
347      public int CompareTo(Expr other) {
348        var sizeComp = this.Terms.Count.CompareTo(other.Terms.Count);
349        if (sizeComp != 0) return sizeComp;
350        // same size => compare terms
351        foreach (var pair in Terms.Zip(other.Terms, Tuple.Create)) {
352          var termComp = pair.Item1.CompareTo(pair.Item2);
353          if (termComp != 0) return termComp;
354        }
355        return 0;
356      }
357
[11902]358      public override string ToString() {
[11972]359        return string.Join("+", Terms);
[11902]360      }
361      public override bool Equals(object obj) {
362        var other = obj as Expr;
363        if (other == null) return false;
364        if (this.Terms.Count() != other.Terms.Count()) return false;
[11966]365        return this.Terms.Intersect(other.Terms).Count() == this.Terms.Count;
[11902]366      }
[12014]367
[11902]368      public override int GetHashCode() {
369        var h = 31415;
[11966]370        if (Terms != null)
371          foreach (var t in Terms) {
372            h ^= t.GetHashCode();
373          }
[11902]374        return h;
375      }
376    }
377    #endregion
[11966]378    internal static bool IsNonTerminal(char symb) {
379      return symb >= 'A' && symb <= 'Z';
380    }
[11972]381    internal static bool ContainsNonTerminal(IEnumerable<Factor> factors) {
382      return factors.Any(ContainsNonTerminal);
[11966]383    }
[11972]384    internal static bool ContainsNonTerminal(Factor f) {
385      if (f.Expr == null) return IsNonTerminal(f.Symbol);
386      else return ContainsNonTerminal(f.Expr);
387    }
[11902]388
[11972]389    private static bool ContainsNonTerminal(Expr expr) {
390      return expr.Terms.Any(ContainsNonTerminal);
391    }
392
393    private static bool ContainsNonTerminal(Term term) {
394      return ContainsNonTerminal(term.Factors);
395    }
[11902]396  }
397}
Note: See TracBrowser for help on using the repository browser.