Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/ExpressionExtender.cs @ 11972

Last change on this file since 11972 was 11972, checked in by gkronber, 9 years ago

#2283 new experiments

File size: 13.7 KB
Line 
1using System;
2using System.CodeDom;
3using System.Collections.Generic;
4using System.Diagnostics;
5using System.Linq;
6using System.Security.Cryptography.X509Certificates;
7using System.Text;
8using System.Threading.Tasks;
9using HeuristicLab.Common;
10
11namespace HeuristicLab.Algorithms.Bandits {
12  // helper to create canonical forms of expressions
13  // TODO: change symbolicregressionpoly10problem to use this class
14  // this does not support expressions with constants (in transformations we assume constant opt is used)
15  // (e.g. we transform all negative signs to + because it is assumed a negative constant can be produced for the term)
16  public class ExpressionExtender {
17
18    // supports the grammar
19    // G(E):
20    //     E -> V | V+E | V-E | V*E | V%E | (E)
21    //     V -> <variables>
22    //     ";
23
24    // transformations:
25    // - transform c*b*a to a*b*c (lexicographic ordering for factors, cached)
26    // - transfrom - to + (assumed optimized constants for terms)
27
28    private string sentence;
29    private int syIdx;
30
31    public string CanonicalRepresentation(string phrase) {
32      InitLex(phrase);
33      var e = CanonicalExpr();
34      return e.ToString();
35    }
36
37
38    private void InitLex(string sentence) {
39      this.sentence = sentence;
40      this.syIdx = 0;
41    }
42
43    private char CurSy() {
44      if (syIdx >= sentence.Length) return '\0';
45      return sentence[syIdx];
46    }
47    private void NewSy() {
48      if (syIdx < sentence.Length) syIdx++;
49    }
50
51    // an expression is a sorted set of terms
52    // a term is an (ordered) list of factors
53    // a factor is either a single symbol or an inverted expression
54    // CanonicalExpression reads multiple terms (expressions) and must merge the terms in the expression (ordering and duplicates)
55    // CanonicalTerm reads multiple factors and must expand factors whenever it reads a combined factor (expression) it produces an expression
56    // CanonicalFactor produces an expression
57
58
59    // canonical expression returns either a single term or a set of terms
60    private Expr CanonicalExpr() {
61      var terms = new List<Expr>();
62      terms.Add(CanonicalTerm());
63      var curSy = CurSy();
64      while (curSy == '+' || curSy == '-' || curSy == '^') {
65        if (curSy == '+') {
66          NewSy();
67          terms.Add(CanonicalTerm());
68        } else if (curSy == '-') {
69          NewSy();
70          terms.Add(CanonicalTerm()); // minus is the same as plus assuming constant opt
71        } else {
72          NewSy();
73          throw new NotImplementedException();
74          // var e = Expr(variables, constants);
75          // r = Not(r) * e + r * Not(e); // xor = (!x AND y) OR (x AND !y)
76        }
77        curSy = CurSy();
78      }
79
80      return new Expr(terms.SelectMany(t => t.Terms));
81    }
82
83    // canonical term returns either a single term (product of factors) or a set of terms
84    private Expr CanonicalTerm() {
85      var factors = new List<Factor>();
86      var f = CanonicalFact();
87      if (f != null) factors.Add(f);
88      var curSy = CurSy();
89      while (curSy == '*' || curSy == '%') {
90        if (curSy == '*') {
91          NewSy();
92          f = CanonicalFact();
93          if (f != null) factors.Add(f);
94        } else {
95          NewSy();
96          f = CanonicalFact();
97          f.Invert();
98          if (f != null) factors.Add(f);
99        }
100        curSy = CurSy();
101      }
102      return ExpandFactors(factors);
103    }
104
105    /*
106    private List<Factor> CancelFactors(Term t) {
107      var nonInvF = t.Factors.Where(f => !f.IsInverse).ToArray();
108      var invF = t.Factors.Where(f => f.IsInverse).ToArray();
109
110      var result = new List<Factor>();
111      foreach (var f in nonInvF) {
112        if (!invF.Contains(f)) {
113          result.Add(f);
114        }
115      }
116      if (result.Count == 0) result.Add(new Factor('1'));
117      return result;
118    }
119    */
120    // canonical fact returns a factor (either a singe variable, or a set of terms)
121    private Factor CanonicalFact() {
122      var curSy = CurSy();
123      if (curSy == '!') {
124        throw new NotSupportedException();
125      } else if (curSy == '(') {
126        NewSy();
127        Expr r = CanonicalExpr(); // this is already simplified
128        if (CurSy() != ')') throw new ArgumentException();
129        NewSy();
130        return new Factor(r);
131      } else if (curSy >= 'a' && curSy <= 'z') {
132        NewSy();
133        return new Factor(curSy);
134        //       } else if (curSy >= '0' && curSy <= '9') {
135      } else if (curSy >= 'A' && curSy <= 'Z') {
136        // treat nonterminals in the same way as variables
137        NewSy();
138        return new Factor(curSy);
139
140      } else throw new ArgumentException("found symbol " + curSy);
141    }
142
143    // a list of factors (symbols, or expressions, and possibly inverses are read
144    // a lis to factors symbols or expressions and possibly inverses are produced
145    // all non-inverse expression factors are expanded
146    private Expr ExpandFactors(IEnumerable<Factor> factors) {
147      // if (invFactors.Count > 0) throw new NotImplementedException();
148      Debug.Assert(!factors.First().IsInverse); // the first factor is never an inverted factor
149
150      // each factor could be a list of terms (expression)
151
152      Expr currentFact = null;
153      var firstFactor = factors.First();
154      if (firstFactor.IsSimpleFactor) currentFact = new Expr(new Term(firstFactor));
155      else currentFact = firstFactor.Expr;
156
157      foreach (var fact in factors.Skip(1)) {
158        Expr curExpr = null;
159        if (fact.IsSimpleFactor || fact.IsInverse) curExpr = new Expr(new Term(fact));
160        else curExpr = fact.Expr;
161        currentFact = AllProducts(currentFact, curExpr);
162      }
163      return currentFact;
164    }
165
166    private Expr AllProducts(Expr a, Expr b) {
167      var aTerms = a.Terms.ToArray();
168      var bTerms = b.Terms.ToArray();
169      var combs = from aT in aTerms
170                  from bT in bTerms
171                  let factors = CancelFactors(aT.Factors.Concat(bT.Factors))
172                  select new Term(factors);
173      return new Expr(combs);
174    }
175
176    private IEnumerable<Factor> CancelFactors(IEnumerable<Factor> factors) {
177      var factorsArr = factors.ToArray();
178      var results = new List<Factor>(factors);
179      foreach (var f in factorsArr) {
180        if (f.ToString() == "1") results.Remove(f);
181        if (f.IsInverse) {
182          // find matching
183          Factor match;
184          if (f.IsSimpleFactor) match = factorsArr.FirstOrDefault(other => !other.IsInverse && f.Symbol.Equals(other.Symbol));
185          else match = factorsArr.FirstOrDefault(other => !other.IsInverse && f.Expr.Equals(other.Expr));
186          if (match != null) {
187            results.Remove(match);
188            results.Remove(f);
189          }
190        }
191      }
192      if (results.Count == 0) results.Add(new Factor('1'));
193      return results;
194    }
195
196    #region term
197    // term can be merged (essentially an ordered list of factors)
198    internal class Term : IComparable<Term> {
199      private readonly SortedList<Factor, int> factors; // factor symbol and the number of occurrences
200
201      public IEnumerable<Factor> Factors {
202        get {
203          return factors.SelectMany(p => Enumerable.Repeat(p.Key, p.Value));
204        }
205      }
206
207      public Term(Factor f) {
208        factors = new SortedList<Factor, int>();
209        factors.Add(f, 1);
210      }
211      public Term(IEnumerable<Factor> factors) {
212        this.factors = new SortedList<Factor, int>();
213        foreach (var f in factors) {
214          if (this.factors.ContainsKey(f)) this.factors[f] += 1;
215          else this.factors.Add(f, 1);
216        }
217      }
218
219      public int CompareTo(Term other) {
220        if (ContainsNonTerminal(Factors) && !ContainsNonTerminal(other.Factors)) {
221          return 1;
222        } else if (!ContainsNonTerminal(Factors) && ContainsNonTerminal(other.Factors)) {
223          return -1;
224        } else {
225          var countComp = Factors.Count().CompareTo(other.Factors.Count());
226          if (countComp != 0) return countComp;
227          return string.Join("", Factors).CompareTo(string.Join("", other.Factors));
228        }
229      }
230
231      public override string ToString() {
232
233        return string.Join("*", Factors);
234      }
235      public override bool Equals(object obj) {
236        var other = obj as Term;
237        if (other == null) return false;
238        if (this.Factors.Count() != other.Factors.Count()) return false;
239        if (this.Factors.Zip(other.Factors, Tuple.Create).Any(t => t.Item1 != t.Item2)) return false;
240        return true;
241      }
242      public override int GetHashCode() {
243        var h = 31415;
244        foreach (var v in Factors) {
245          h ^= v.GetHashCode();
246        }
247        return h;
248      }
249    }
250    #endregion
251
252    #region factor
253    // factors is either a single symbol or an inverted expression
254    internal class Factor : IComparable<Factor> {
255      public bool IsSimpleFactor { get { return Expr == null; } }
256      public char Symbol { get { return symbol; } }
257      public Expr Expr { get { return expr; } }
258      public bool IsInverse { get { return inv; } }
259      private readonly char symbol = '\0';
260      private readonly Expr expr;
261      private bool inv;
262
263      public Factor(char f) {
264        this.symbol = f;
265      }
266      public Factor(Expr expr) {
267        this.expr = expr;
268      }
269
270      public void Invert() {
271        this.inv = !inv;
272      }
273
274      public int CompareTo(Factor other) {
275        // 1) single symbol factors first
276        // 2) expression factors by expression compare
277        var crit1 = ContainsNonTerminal(this).CompareTo(ContainsNonTerminal(other));
278        if (crit1 != 0) return crit1;
279
280        var crit2 = this.IsInverse.CompareTo(other.IsInverse);
281        if (crit2 != 0) return crit2;
282
283        var crit3 = this.IsSimpleFactor.CompareTo(other.IsSimpleFactor);
284        if (crit3 != 0) return crit3;
285
286        // both are simple or expressions
287        if (IsSimpleFactor) return this.symbol.CompareTo(other.symbol);
288        else return this.Expr.CompareTo(other.Expr);
289      }
290
291      public override string ToString() {
292        var s = Expr == null ? symbol.ToString() : "(" + expr.ToString() + ")";
293        if (IsInverse) {
294          return "1/" + s;
295        } else return s;
296      }
297      public override bool Equals(object obj) {
298        var other = obj as Factor;
299        if (other == null) return false;
300        if (IsInverse != other.IsInverse) return false;
301        if (this.symbol != other.symbol) return false;
302        if (this.Expr != other.Expr) return false;
303        return true;
304      }
305      public override int GetHashCode() {
306        var h = 31415;
307        h ^= symbol.GetHashCode();
308        if (Expr != null) h ^= Expr.GetHashCode();
309        return h;
310      }
311    }
312    #endregion
313
314    #region expr
315
316    internal class Expr : IComparable<Expr> {
317      public readonly SortedSet<Term> Terms; // only set for Kind == Expr
318      //public bool Inverse;
319      public Expr(Term t) {
320        Terms = new SortedSet<Term>();
321        Terms.Add(t);
322      }
323      public Expr(IEnumerable<Term> exprTerms) {
324        Terms = new SortedSet<Term>();
325        foreach (var t in exprTerms) {
326          Terms.Add(t);
327        }
328      }
329
330      public void Merge(Expr other) {
331        this.Terms.UnionWith(other.Terms);
332      }
333
334      public int CompareTo(Expr other) {
335        var sizeComp = this.Terms.Count.CompareTo(other.Terms.Count);
336        if (sizeComp != 0) return sizeComp;
337        // same size => compare terms
338        foreach (var pair in Terms.Zip(other.Terms, Tuple.Create)) {
339          var termComp = pair.Item1.CompareTo(pair.Item2);
340          if (termComp != 0) return termComp;
341        }
342        return 0;
343      }
344
345      public override string ToString() {
346        // if (Inverse && Terms.Count > 1)
347        //   return "(" + string.Join("+", Terms) + ")";
348        // else if (Inverse && Terms.Count == 1) {
349        //   return Terms.First().ToString().Replace("%", "#").Replace("*", "%").Replace("#", "*");
350        // } else
351        return string.Join("+", Terms);
352      }
353      public override bool Equals(object obj) {
354        var other = obj as Expr;
355        if (other == null) return false;
356        //if (this.Inverse != other.Inverse) return false;
357        if (this.Terms.Count() != other.Terms.Count()) return false;
358        return this.Terms.Intersect(other.Terms).Count() == this.Terms.Count;
359      }
360      public override int GetHashCode() {
361        var h = 31415;
362        if (Terms != null)
363          foreach (var t in Terms) {
364            h ^= t.GetHashCode();
365          }
366        return h;
367      }
368    }
369    #endregion
370    internal static bool IsNonTerminal(char symb) {
371      return symb >= 'A' && symb <= 'Z';
372    }
373    internal static bool ContainsNonTerminal(IEnumerable<Factor> factors) {
374      return factors.Any(ContainsNonTerminal);
375    }
376    internal static bool ContainsNonTerminal(Factor f) {
377      if (f.Expr == null) return IsNonTerminal(f.Symbol);
378      else return ContainsNonTerminal(f.Expr);
379    }
380
381    private static bool ContainsNonTerminal(Expr expr) {
382      return expr.Terms.Any(ContainsNonTerminal);
383    }
384
385    private static bool ContainsNonTerminal(Term term) {
386      return ContainsNonTerminal(term.Factors);
387    }
388
389
390    /*
391
392    internal class FactorComparer : IComparer<char> {
393      public int Compare(char x, char y) {
394        if (IsNonTerminal(x) && !IsNonTerminal(y)) {
395          return 1;
396        } else if (!IsNonTerminal(x) && IsNonTerminal(y)) {
397          return -1;
398        } else if (IsNonTerminal(x) && IsNonTerminal(y)) {
399          return x.CompareTo(y);
400        } else {
401          return x.CompareTo(y);
402        }
403      }
404    }*/
405  }
406}
Note: See TracBrowser for help on using the repository browser.