source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/ExpressionExtender.cs @ 12014

Last change on this file since 12014 was 12014, checked in by gkronber, 7 years ago

#2283: worked on transformation of arithmetic expressions to canonical form

File size: 14.0 KB
Line 
1using System;
2using System.CodeDom;
3using System.Collections.Generic;
4using System.Diagnostics;
5using System.Linq;
6using System.Security.Cryptography.X509Certificates;
7using System.Text;
8using System.Threading.Tasks;
9using HeuristicLab.Common;
10
11namespace HeuristicLab.Algorithms.Bandits {
12  // helper to create canonical forms of expressions
13  // NOTE: Not implemented yet (see unit test for divisions)
14  // TODO: change symbolicregressionpoly10problem to use this class
15  // this does not support expressions with constants (in transformations we assume constant opt is used)
16  // (e.g. we transform all negative signs to + because it is assumed a negative constant can be produced for the term)
17  public class ExpressionExtender {
18
19    // supports the grammar
20    // G(E):
21    //     E -> V | V+E | V-E | V*E | V%E | V/E | (E)
22    //     V -> <variables>
23    //     ";
24
25    // might produce expressions of the form |/x
26    // the pipe symbol | is used for the constant one (in comparison to other constants)
27    private string sentence;
28    private int syIdx;
29
30    public string CanonicalRepresentation(string phrase) {
31      InitLex(phrase);
32      var e = CanonicalExpr();
33      return e.ToString();
34    }
35
36
37    private void InitLex(string sentence) {
38      this.sentence = sentence;
39      this.syIdx = 0;
40    }
41
42    private char CurSy() {
43      if (syIdx >= sentence.Length) return '\0';
44      return sentence[syIdx];
45    }
46    private void NewSy() {
47      if (syIdx < sentence.Length) syIdx++;
48    }
49
50    // an expression is a sorted set of terms
51    // a term is an (ordered) list of factors
52    // a factor is either a single symbol or an inverted expression
53    // CanonicalExpression reads multiple terms (expressions) and must merge the terms in the expression (ordering and duplicates)
54    // CanonicalTerm reads multiple factors and must expand factors whenever it reads a combined factor (expression) it produces an expression
55    // CanonicalFactor produces an expression
56
57
58    // canonical expression returns either a single term or a set of terms
59    private Expr CanonicalExpr() {
60      var terms = new List<Expr>();
61      terms.Add(CanonicalTerm());
62      var curSy = CurSy();
63      while (curSy == '+' || curSy == '-' || curSy == '^') {
64        if (curSy == '+') {
65          NewSy();
66          terms.Add(CanonicalTerm());
67        } else if (curSy == '-') {
68          NewSy();
69          terms.Add(CanonicalTerm()); // minus is the same as plus assuming constant opt
70        } else {
71          NewSy();
72          throw new NotImplementedException();
73          // var e = Expr(variables, constants);
74          // r = Not(r) * e + r * Not(e); // xor = (!x AND y) OR (x AND !y)
75        }
76        curSy = CurSy();
77      }
78
79      return new Expr(terms.SelectMany(t => t.Terms));
80    }
81
82    // canonical term returns either a single term (product of factors) or a set of terms
83    private Expr CanonicalTerm() {
84      var factors = new List<Factor>();
85      var f = CanonicalFact();
86      if (f != null) factors.Add(f);
87      var curSy = CurSy();
88      while (curSy == '*' || curSy == '%') {
89        if (curSy == '*') {
90          NewSy();
91          f = CanonicalFact();
92          if (f != null) factors.Add(f);
93        } else {
94          NewSy();
95          f = CanonicalFact();
96          // if there is only one term we can add multiple inverted simple factors instead of the whole inverted expression
97          if (!f.IsSimpleFactor && f.Expr.Terms.Count == 1) {
98            foreach (var invF in f.Expr.Terms.First().Factors) {
99              if (invF.ToString() == "|") continue;
100              invF.Invert();
101              factors.Add(invF);
102            }
103          } else {
104            f.Invert();
105            if (f != null) factors.Add(f);
106          }
107        }
108        curSy = CurSy();
109      }
110
111      factors = CancelFactors(factors).ToList();
112      return ExpandFactors(factors);
113    }
114
115    // canonical fact returns a factor (either a singe variable, or a set of terms)
116    private Factor CanonicalFact() {
117      var curSy = CurSy();
118      if (curSy == '!') {
119        throw new NotSupportedException();
120      } else if (curSy == '(') {
121        NewSy();
122        Expr r = CanonicalExpr(); // this is already simplified
123        if (CurSy() != ')') throw new ArgumentException();
124        NewSy();
125        return new Factor(r);
126      } else if (curSy >= 'a' && curSy <= 'z') {
127        NewSy();
128        return new Factor(curSy);
129        //       } else if (curSy >= '0' && curSy <= '9') {
130      } else if (curSy >= 'A' && curSy <= 'Z') {
131        // treat nonterminals in the same way as variables
132        NewSy();
133        return new Factor(curSy);
134
135      } else throw new ArgumentException("found symbol " + curSy);
136    }
137
138    // a list of factors (symbols, or expressions, and possibly inverses are read
139    // a list to factors symbols or expressions and possibly inverses are produced
140    // all non-inverse expression factors are expanded
141    private Expr ExpandFactors(IEnumerable<Factor> factors) {
142      // if (invFactors.Count > 0) throw new NotImplementedException();
143      Debug.Assert(!factors.First().IsInverse); // the first factor is never an inverted factor
144
145      // each factor could be a list of terms (expression)
146
147      Expr currentFact = null;
148      var firstFactor = factors.First();
149      if (firstFactor.IsSimpleFactor || firstFactor.IsInverse) currentFact = new Expr(new Term(firstFactor));
150      else currentFact = firstFactor.Expr;
151
152      foreach (var fact in factors.Skip(1)) {
153        Expr curExpr = null;
154        if (fact.IsSimpleFactor || fact.IsInverse) curExpr = new Expr(new Term(fact));
155        else curExpr = fact.Expr;
156        currentFact = AllProducts(currentFact, curExpr);
157      }
158      return currentFact;
159    }
160
161    private Expr AllProducts(Expr a, Expr b) {
162      var aTerms = a.Terms.ToArray();
163      var bTerms = b.Terms.ToArray();
164      var combs = from aT in aTerms
165                  from bT in bTerms
166                  let factors = CancelFactors(aT.Factors.Concat(bT.Factors))
167                  select new Term(factors);
168      return new Expr(combs);
169    }
170
171    private IEnumerable<Factor> CancelFactors(IEnumerable<Factor> factors) {
172      var factorsArr = factors.ToArray();
173      var results = new List<Factor>(factors);
174      foreach (var f in factorsArr) {
175        if (f.ToString() == "|") results.Remove(f);
176        if (f.ToString() == "|/(|)") results.Remove(f);
177        if (f.IsInverse) {
178          // find matching
179          Factor match;
180          match = factorsArr.FirstOrDefault(other => !other.IsInverse && f.Cancels(other));
181          if (match != null) {
182            results.Remove(f);
183            var idx = results.IndexOf(match);
184
185            results.Remove(match);
186            if (!results.Any())
187              results.Insert(idx, new Factor('|')); // when the factor is the last one then insert a one
188
189            // also mark as cancelled in the factorsArr
190            idx = Array.IndexOf(factorsArr, match);
191            factorsArr[idx] = new Factor('|');
192          }
193        }
194      }
195      // remove all unnecessary "1* factors"
196
197      if (results.Count == 0) results.Add(new Factor('|'));
198      return results;
199    }
200
201    #region term
202    // term can be merged (essentially an ordered list of factors)
203    internal class Term : IComparable<Term> {
204      private readonly SortedList<Factor, int> factors; // factor symbol and the number of occurrences
205
206      public IEnumerable<Factor> Factors {
207        get {
208          return factors.SelectMany(p => Enumerable.Repeat(p.Key, p.Value));
209        }
210      }
211
212      public Term(Factor f) {
213        factors = new SortedList<Factor, int>();
214        factors.Add(f, 1);
215      }
216      public Term(IEnumerable<Factor> factors) {
217        this.factors = new SortedList<Factor, int>();
218        foreach (var f in factors) {
219          if (this.factors.ContainsKey(f)) this.factors[f] += 1;
220          else this.factors.Add(f, 1);
221        }
222      }
223
224      public int CompareTo(Term other) {
225        if (ContainsNonTerminal(Factors) && !ContainsNonTerminal(other.Factors)) {
226          return 1;
227        } else if (!ContainsNonTerminal(Factors) && ContainsNonTerminal(other.Factors)) {
228          return -1;
229        } else {
230          var countComp = Factors.Count().CompareTo(other.Factors.Count());
231          if (countComp != 0) return countComp;
232          return string.Join("", Factors).CompareTo(string.Join("", other.Factors));
233        }
234      }
235
236      public override string ToString() {
237
238        return string.Join("*", Factors);
239      }
240      public override bool Equals(object obj) {
241        var other = obj as Term;
242        if (other == null) return false;
243        if (this.Factors.Count() != other.Factors.Count()) return false;
244        if (this.Factors.Zip(other.Factors, Tuple.Create).Any(t => t.Item1 != t.Item2)) return false;
245        return true;
246      }
247      public override int GetHashCode() {
248        var h = 31415;
249        foreach (var v in Factors) {
250          h ^= v.GetHashCode();
251        }
252        return h;
253      }
254    }
255    #endregion
256
257    #region factor
258    // factors is either a single symbol or an inverted expression
259    internal class Factor : IComparable<Factor> {
260      public bool IsSimpleFactor { get { return Expr == null; } }
261      public char Symbol { get { return symbol; } }
262      public Expr Expr { get { return expr; } }
263      public bool IsInverse { get { return inv; } }
264      private readonly char symbol = '\0';
265      private readonly Expr expr;
266      private bool inv;
267
268      public Factor(char f) {
269        this.symbol = f;
270      }
271      public Factor(Expr expr) {
272        this.expr = expr;
273      }
274
275      public void Invert() {
276        this.inv = !inv;
277      }
278      public bool Cancels(Factor other) {
279        if (this.inv == other.inv) return false;
280        if (this.Expr != null && other.Expr == null) return false;
281        if (this.Expr == null && other.Expr != null) return false;
282        if (Expr == null) return this.Symbol.Equals(other.symbol);
283        else return this.Expr.CompareTo(other.Expr) == 0;
284      }
285
286      public int CompareTo(Factor other) {
287        // 1) single symbol factors first
288        // 2) expression factors by expression compare
289        var crit1 = ContainsNonTerminal(this).CompareTo(ContainsNonTerminal(other));
290        if (crit1 != 0) return crit1;
291
292        var crit2 = this.IsInverse.CompareTo(other.IsInverse);
293        if (crit2 != 0) return crit2;
294
295        var crit3 = this.IsSimpleFactor.CompareTo(other.IsSimpleFactor);
296        if (crit3 != 0) return crit3;
297
298        // both are simple or expressions
299        if (IsSimpleFactor) return this.symbol.CompareTo(other.symbol);
300        else return this.Expr.CompareTo(other.Expr);
301      }
302
303      public override string ToString() {
304        var s = Expr == null ? symbol.ToString() : "(" + expr.ToString() + ")";
305        if (IsInverse) {
306          return "|/" + s;
307        } else return s;
308      }
309      public override bool Equals(object obj) {
310        var other = obj as Factor;
311        if (other == null) return false;
312        if (IsInverse != other.IsInverse) return false;
313        if (this.symbol != other.symbol) return false;
314        if (this.Expr != other.Expr) return false;
315        return true;
316      }
317      public override int GetHashCode() {
318        var h = 31415;
319        h ^= symbol.GetHashCode();
320        if (Expr != null) h ^= Expr.GetHashCode();
321        return h;
322      }
323    }
324    #endregion
325
326    #region expr
327
328    internal class Expr : IComparable<Expr> {
329      public readonly SortedSet<Term> Terms; // only set for Kind == Expr
330      //public bool Inverse;
331      public Expr(Term t) {
332        Terms = new SortedSet<Term>();
333        Terms.Add(t);
334      }
335      public Expr(IEnumerable<Term> exprTerms) {
336        Terms = new SortedSet<Term>();
337        foreach (var t in exprTerms) {
338          Terms.Add(t);
339        }
340      }
341
342      public void Merge(Expr other) {
343        this.Terms.UnionWith(other.Terms);
344      }
345
346      public int CompareTo(Expr other) {
347        var sizeComp = this.Terms.Count.CompareTo(other.Terms.Count);
348        if (sizeComp != 0) return sizeComp;
349        // same size => compare terms
350        foreach (var pair in Terms.Zip(other.Terms, Tuple.Create)) {
351          var termComp = pair.Item1.CompareTo(pair.Item2);
352          if (termComp != 0) return termComp;
353        }
354        return 0;
355      }
356
357      public override string ToString() {
358        // if (Inverse && Terms.Count > 1)
359        //   return "(" + string.Join("+", Terms) + ")";
360        // else if (Inverse && Terms.Count == 1) {
361        //   return Terms.First().ToString().Replace("%", "#").Replace("*", "%").Replace("#", "*");
362        // } else
363        return string.Join("+", Terms);
364      }
365      public override bool Equals(object obj) {
366        var other = obj as Expr;
367        if (other == null) return false;
368        if (this.Terms.Count() != other.Terms.Count()) return false;
369        return this.Terms.Intersect(other.Terms).Count() == this.Terms.Count;
370      }
371
372      public override int GetHashCode() {
373        var h = 31415;
374        if (Terms != null)
375          foreach (var t in Terms) {
376            h ^= t.GetHashCode();
377          }
378        return h;
379      }
380    }
381    #endregion
382    internal static bool IsNonTerminal(char symb) {
383      return symb >= 'A' && symb <= 'Z';
384    }
385    internal static bool ContainsNonTerminal(IEnumerable<Factor> factors) {
386      return factors.Any(ContainsNonTerminal);
387    }
388    internal static bool ContainsNonTerminal(Factor f) {
389      if (f.Expr == null) return IsNonTerminal(f.Symbol);
390      else return ContainsNonTerminal(f.Expr);
391    }
392
393    private static bool ContainsNonTerminal(Expr expr) {
394      return expr.Terms.Any(ContainsNonTerminal);
395    }
396
397    private static bool ContainsNonTerminal(Term term) {
398      return ContainsNonTerminal(term.Factors);
399    }
400  }
401}
Note: See TracBrowser for help on using the repository browser.