source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/ExpressionExtender.cs @ 12024

Last change on this file since 12024 was 12024, checked in by gkronber, 7 years ago

#2283: changed handling of inverse expressions in transformation of expressions to canonical form

File size: 13.7 KB
Line 
1using System;
2using System.CodeDom;
3using System.Collections.Generic;
4using System.Diagnostics;
5using System.Linq;
6using System.Security.Cryptography.X509Certificates;
7using System.Text;
8using System.Threading.Tasks;
9using HeuristicLab.Common;
10
11namespace HeuristicLab.Algorithms.Bandits {
12  // helper to create canonical forms of expressions
13  // TODO: change symbolicregressionpoly10problem to use this class
14  // this does not support expressions with constants (in transformations we assume constant opt is used)
15  // (e.g. we transform all negative signs to + because it is assumed a negative constant can be produced for the term)
16  public class ExpressionExtender {
17
18    // supports the grammar
19    // G(E):
20    //     E -> V | V+E | V-E | V*E | V%E | (E)
21    //     V -> <variables>
22    //     ";
23
24    // might produce expressions of the form /x (= 1/x)
25    // the pipe symbol | is used for the constant one (in comparison to other constants)
26    private string sentence;
27    private int syIdx;
28
29    public string CanonicalRepresentation(string phrase) {
30      InitLex(phrase);
31      var e = CanonicalExpr();
32      return e.ToString();
33    }
34
35
36    private void InitLex(string sentence) {
37      this.sentence = sentence;
38      this.syIdx = 0;
39    }
40
41    private char CurSy() {
42      if (syIdx >= sentence.Length) return '\0';
43      return sentence[syIdx];
44    }
45    private void NewSy() {
46      if (syIdx < sentence.Length) syIdx++;
47    }
48
49    // an expression is a sorted set of terms
50    // a term is an (ordered) list of factors
51    // a factor is either a single symbol or an inverted expression
52    // CanonicalExpression reads multiple terms (expressions) and must merge the terms in the expression (ordering and duplicates)
53    // CanonicalTerm reads multiple factors and must expand factors whenever it reads a combined factor (expression) it produces an expression
54    // CanonicalFactor produces an expression
55
56
57    // canonical expression returns either a single term or a set of terms
58    private Expr CanonicalExpr() {
59      var terms = new List<Expr>();
60      terms.Add(CanonicalTerm());
61      var curSy = CurSy();
62      while (curSy == '+' || curSy == '-' || curSy == '^') {
63        if (curSy == '+') {
64          NewSy();
65          terms.Add(CanonicalTerm());
66        } else if (curSy == '-') {
67          NewSy();
68          terms.Add(CanonicalTerm()); // minus is the same as plus assuming constant opt
69        } else {
70          NewSy();
71          throw new NotImplementedException();
72          // var e = Expr(variables, constants);
73          // r = Not(r) * e + r * Not(e); // xor = (!x AND y) OR (x AND !y)
74        }
75        curSy = CurSy();
76      }
77
78      return new Expr(terms.SelectMany(t => t.Terms));
79    }
80
81    // canonical term returns either a single term (product of factors) or a set of terms
82    private Expr CanonicalTerm() {
83      var factors = new List<Factor>();
84      var f = CanonicalFact();
85      if (f != null) factors.Add(f);
86      var curSy = CurSy();
87      while (curSy == '*' || curSy == '%') {
88        if (curSy == '*') {
89          NewSy();
90          f = CanonicalFact();
91          if (f != null) factors.Add(f);
92        } else {
93          NewSy();
94          f = CanonicalFact();
95          // if there is only one term we can add multiple inverted simple factors instead of the whole inverted expression
96          if (!f.IsSimpleFactor && f.Expr.Terms.Count == 1) {
97            foreach (var invF in f.Expr.Terms.First().Factors) {
98              if (invF.ToString() == "1") continue;
99              invF.Invert();
100              factors.Add(invF);
101            }
102          } else {
103            f.Invert();
104            if (f != null) factors.Add(f);
105          }
106        }
107        curSy = CurSy();
108      }
109
110      factors = CancelFactors(factors).ToList();
111      return ExpandFactors(factors);
112    }
113
114    // canonical fact returns a factor (either a singe variable, or a set of terms)
115    private Factor CanonicalFact() {
116      var curSy = CurSy();
117      if (curSy == '!') {
118        throw new NotSupportedException();
119      } else if (curSy == '(') {
120        NewSy();
121        Expr r = CanonicalExpr(); // this is already simplified
122        if (CurSy() != ')') throw new ArgumentException();
123        NewSy();
124        return new Factor(r);
125      } else if (curSy >= 'a' && curSy <= 'z') {
126        NewSy();
127        return new Factor(curSy);
128        //       } else if (curSy >= '0' && curSy <= '9') {
129      } else if (curSy >= 'A' && curSy <= 'Z') {
130        // treat nonterminals in the same way as variables
131        NewSy();
132        return new Factor(curSy);
133
134      } else throw new ArgumentException("found symbol " + curSy);
135    }
136
137    // a list of factors (symbols, or expressions, and possibly inverses are read
138    // a list to factors symbols or expressions and possibly inverses are produced
139    // all non-inverse expression factors are expanded
140    private Expr ExpandFactors(IEnumerable<Factor> factors) {
141      // if (invFactors.Count > 0) throw new NotImplementedException();
142      Debug.Assert(!factors.First().IsInverse); // the first factor is never an inverted factor
143
144      // each factor could be a list of terms (expression)
145
146      Expr currentFact = null;
147      var firstFactor = factors.First();
148      if (firstFactor.IsSimpleFactor || firstFactor.IsInverse) currentFact = new Expr(new Term(firstFactor));
149      else currentFact = firstFactor.Expr;
150
151      foreach (var fact in factors.Skip(1)) {
152        Expr curExpr = null;
153        if (fact.IsSimpleFactor || fact.IsInverse) curExpr = new Expr(new Term(fact));
154        else curExpr = fact.Expr;
155        currentFact = AllProducts(currentFact, curExpr);
156      }
157      return currentFact;
158    }
159
160    private Expr AllProducts(Expr a, Expr b) {
161      var aTerms = a.Terms.ToArray();
162      var bTerms = b.Terms.ToArray();
163      var combs = from aT in aTerms
164                  from bT in bTerms
165                  let factors = CancelFactors(aT.Factors.Concat(bT.Factors))
166                  select new Term(factors);
167      return new Expr(combs);
168    }
169
170    private IEnumerable<Factor> CancelFactors(IEnumerable<Factor> factors) {
171      var factorsArr = factors.ToArray();
172      var results = new List<Factor>(factors);
173      foreach (var f in factorsArr) {
174        if (f.ToString() == "1") results.Remove(f);
175        if (f.ToString() == "1/(1)") results.Remove(f);
176        if (f.IsInverse) {
177          // find matching
178          Factor match;
179          match = factorsArr.FirstOrDefault(other => !other.IsInverse && f.Cancels(other));
180          if (match != null) {
181            results.Remove(f);
182            var idx = results.IndexOf(match);
183
184            results.Remove(match);
185            if (!results.Any())
186              results.Insert(idx, new Factor('1')); // when the factor is the last one then insert a one
187
188            // also mark as cancelled in the factorsArr
189            idx = Array.IndexOf(factorsArr, match);
190            factorsArr[idx] = new Factor('1');
191          }
192        }
193      }
194      if (results.Count == 0) results.Add(new Factor('1'));
195      return results;
196    }
197
198    #region term
199    // term can be merged (essentially an ordered list of factors)
200    internal class Term : IComparable<Term> {
201      private readonly SortedList<Factor, int> factors; // factor symbol and the number of occurrences
202
203      public IEnumerable<Factor> Factors {
204        get {
205          return factors.SelectMany(p => Enumerable.Repeat(p.Key, p.Value));
206        }
207      }
208
209      public Term(Factor f) {
210        factors = new SortedList<Factor, int>();
211        factors.Add(f, 1);
212      }
213      public Term(IEnumerable<Factor> factors) {
214        this.factors = new SortedList<Factor, int>();
215        foreach (var f in factors) {
216          if (this.factors.ContainsKey(f)) this.factors[f] += 1;
217          else this.factors.Add(f, 1);
218        }
219      }
220
221      public int CompareTo(Term other) {
222        if (ContainsNonTerminal(Factors) && !ContainsNonTerminal(other.Factors)) {
223          return 1;
224        } else if (!ContainsNonTerminal(Factors) && ContainsNonTerminal(other.Factors)) {
225          return -1;
226        } else {
227          var countComp = Factors.Count().CompareTo(other.Factors.Count());
228          if (countComp != 0) return countComp;
229          foreach (var pair in Factors.Zip(other.Factors, Tuple.Create)) {
230            var fComp = pair.Item1.CompareTo(pair.Item2);
231            if (fComp != 0) return fComp;
232          }
233          return 0;
234        }
235      }
236
237      public override string ToString() {
238
239        return string.Join("*", Factors);
240      }
241      public override bool Equals(object obj) {
242        var other = obj as Term;
243        if (other == null) return false;
244        if (this.Factors.Count() != other.Factors.Count()) return false;
245        if (this.Factors.Zip(other.Factors, Tuple.Create).Any(t => t.Item1 != t.Item2)) return false;
246        return true;
247      }
248      public override int GetHashCode() {
249        var h = 31415;
250        foreach (var v in Factors) {
251          h ^= v.GetHashCode();
252        }
253        return h;
254      }
255    }
256    #endregion
257
258    #region factor
259    // factors is either a single symbol or an inverted expression
260    internal class Factor : IComparable<Factor> {
261      public bool IsSimpleFactor { get { return Expr == null; } }
262      public char Symbol { get { return symbol; } }
263      public Expr Expr { get { return expr; } }
264      public bool IsInverse { get { return inv; } }
265      private readonly char symbol = '\0';
266      private readonly Expr expr;
267      private bool inv;
268
269      public Factor(char f) {
270        this.symbol = f;
271      }
272      public Factor(Expr expr) {
273        this.expr = expr;
274      }
275
276      public void Invert() {
277        this.inv = !inv;
278      }
279      public bool Cancels(Factor other) {
280        if (this.inv == other.inv) return false;
281        if (this.Expr != null && other.Expr == null) return false;
282        if (this.Expr == null && other.Expr != null) return false;
283        if (Expr == null) return this.Symbol.Equals(other.symbol);
284        else return this.Expr.CompareTo(other.Expr) == 0;
285      }
286
287      public int CompareTo(Factor other) {
288        // 1) single symbol factors first
289        // 2) expression factors by expression compare
290        var crit1 = ContainsNonTerminal(this).CompareTo(ContainsNonTerminal(other));
291        if (crit1 != 0) return crit1;
292
293        var crit2 = this.IsInverse.CompareTo(other.IsInverse);
294        if (crit2 != 0) return crit2;
295
296        var crit3 = this.IsSimpleFactor.CompareTo(other.IsSimpleFactor);
297        if (crit3 != 0) return crit3;
298
299        // both are simple or expressions
300        if (IsSimpleFactor) return this.symbol.CompareTo(other.symbol);
301        else return this.Expr.CompareTo(other.Expr);
302      }
303
304      public override string ToString() {
305        var s = Expr == null ? symbol.ToString() : "(" + expr.ToString() + ")";
306        if (IsInverse) {
307          return "/" + s;
308        } else return s;
309      }
310      public override bool Equals(object obj) {
311        var other = obj as Factor;
312        if (other == null) return false;
313        if (IsInverse != other.IsInverse) return false;
314        if (this.symbol != other.symbol) return false;
315        if (this.Expr != other.Expr) return false;
316        return true;
317      }
318      public override int GetHashCode() {
319        var h = 31415;
320        h ^= symbol.GetHashCode();
321        if (Expr != null) h ^= Expr.GetHashCode();
322        return h;
323      }
324    }
325    #endregion
326
327    #region expr
328
329    internal class Expr : IComparable<Expr> {
330      public readonly SortedSet<Term> Terms; // only set for Kind == Expr
331      //public bool Inverse;
332      public Expr(Term t) {
333        Terms = new SortedSet<Term>();
334        Terms.Add(t);
335      }
336      public Expr(IEnumerable<Term> exprTerms) {
337        Terms = new SortedSet<Term>();
338        foreach (var t in exprTerms) {
339          Terms.Add(t);
340        }
341      }
342
343      public void Merge(Expr other) {
344        this.Terms.UnionWith(other.Terms);
345      }
346
347      public int CompareTo(Expr other) {
348        var sizeComp = this.Terms.Count.CompareTo(other.Terms.Count);
349        if (sizeComp != 0) return sizeComp;
350        // same size => compare terms
351        foreach (var pair in Terms.Zip(other.Terms, Tuple.Create)) {
352          var termComp = pair.Item1.CompareTo(pair.Item2);
353          if (termComp != 0) return termComp;
354        }
355        return 0;
356      }
357
358      public override string ToString() {
359        return string.Join("+", Terms);
360      }
361      public override bool Equals(object obj) {
362        var other = obj as Expr;
363        if (other == null) return false;
364        if (this.Terms.Count() != other.Terms.Count()) return false;
365        return this.Terms.Intersect(other.Terms).Count() == this.Terms.Count;
366      }
367
368      public override int GetHashCode() {
369        var h = 31415;
370        if (Terms != null)
371          foreach (var t in Terms) {
372            h ^= t.GetHashCode();
373          }
374        return h;
375      }
376    }
377    #endregion
378    internal static bool IsNonTerminal(char symb) {
379      return symb >= 'A' && symb <= 'Z';
380    }
381    internal static bool ContainsNonTerminal(IEnumerable<Factor> factors) {
382      return factors.Any(ContainsNonTerminal);
383    }
384    internal static bool ContainsNonTerminal(Factor f) {
385      if (f.Expr == null) return IsNonTerminal(f.Symbol);
386      else return ContainsNonTerminal(f.Expr);
387    }
388
389    private static bool ContainsNonTerminal(Expr expr) {
390      return expr.Terms.Any(ContainsNonTerminal);
391    }
392
393    private static bool ContainsNonTerminal(Term term) {
394      return ContainsNonTerminal(term.Factors);
395    }
396  }
397}
Note: See TracBrowser for help on using the repository browser.