Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/ExpressionExtender.cs @ 12005

Last change on this file since 12005 was 11981, checked in by gkronber, 10 years ago

#2283: cleanup and included HeuristicLab.dlls to create a self-contained branch

File size: 13.8 KB
Line 
1using System;
2using System.CodeDom;
3using System.Collections.Generic;
4using System.Diagnostics;
5using System.Linq;
6using System.Security.Cryptography.X509Certificates;
7using System.Text;
8using System.Threading.Tasks;
9using HeuristicLab.Common;
10
11namespace HeuristicLab.Algorithms.Bandits {
12  // helper to create canonical forms of expressions
13  // NOTE: Not implemented yet (see unit test for divisions)
14  // TODO: change symbolicregressionpoly10problem to use this class
15  // this does not support expressions with constants (in transformations we assume constant opt is used)
16  // (e.g. we transform all negative signs to + because it is assumed a negative constant can be produced for the term)
17  public class ExpressionExtender {
18
19    // supports the grammar
20    // G(E):
21    //     E -> V | V+E | V-E | V*E | V%E | (E)
22    //     V -> <variables>
23    //     ";
24
25    // transformations:
26    // - transform c*b*a to a*b*c (lexicographic ordering for factors, cached)
27    // - transfrom - to + (assumed optimized constants for terms)
28
29    private string sentence;
30    private int syIdx;
31
32    public string CanonicalRepresentation(string phrase) {
33      throw new NotImplementedException();
34      InitLex(phrase);
35      var e = CanonicalExpr();
36      return e.ToString();
37    }
38
39
40    private void InitLex(string sentence) {
41      this.sentence = sentence;
42      this.syIdx = 0;
43    }
44
45    private char CurSy() {
46      if (syIdx >= sentence.Length) return '\0';
47      return sentence[syIdx];
48    }
49    private void NewSy() {
50      if (syIdx < sentence.Length) syIdx++;
51    }
52
53    // an expression is a sorted set of terms
54    // a term is an (ordered) list of factors
55    // a factor is either a single symbol or an inverted expression
56    // CanonicalExpression reads multiple terms (expressions) and must merge the terms in the expression (ordering and duplicates)
57    // CanonicalTerm reads multiple factors and must expand factors whenever it reads a combined factor (expression) it produces an expression
58    // CanonicalFactor produces an expression
59
60
61    // canonical expression returns either a single term or a set of terms
62    private Expr CanonicalExpr() {
63      var terms = new List<Expr>();
64      terms.Add(CanonicalTerm());
65      var curSy = CurSy();
66      while (curSy == '+' || curSy == '-' || curSy == '^') {
67        if (curSy == '+') {
68          NewSy();
69          terms.Add(CanonicalTerm());
70        } else if (curSy == '-') {
71          NewSy();
72          terms.Add(CanonicalTerm()); // minus is the same as plus assuming constant opt
73        } else {
74          NewSy();
75          throw new NotImplementedException();
76          // var e = Expr(variables, constants);
77          // r = Not(r) * e + r * Not(e); // xor = (!x AND y) OR (x AND !y)
78        }
79        curSy = CurSy();
80      }
81
82      return new Expr(terms.SelectMany(t => t.Terms));
83    }
84
85    // canonical term returns either a single term (product of factors) or a set of terms
86    private Expr CanonicalTerm() {
87      var factors = new List<Factor>();
88      var f = CanonicalFact();
89      if (f != null) factors.Add(f);
90      var curSy = CurSy();
91      while (curSy == '*' || curSy == '%') {
92        if (curSy == '*') {
93          NewSy();
94          f = CanonicalFact();
95          if (f != null) factors.Add(f);
96        } else {
97          NewSy();
98          f = CanonicalFact();
99          f.Invert();
100          if (f != null) factors.Add(f);
101        }
102        curSy = CurSy();
103      }
104      return ExpandFactors(factors);
105    }
106
107    /*
108    private List<Factor> CancelFactors(Term t) {
109      var nonInvF = t.Factors.Where(f => !f.IsInverse).ToArray();
110      var invF = t.Factors.Where(f => f.IsInverse).ToArray();
111
112      var result = new List<Factor>();
113      foreach (var f in nonInvF) {
114        if (!invF.Contains(f)) {
115          result.Add(f);
116        }
117      }
118      if (result.Count == 0) result.Add(new Factor('1'));
119      return result;
120    }
121    */
122    // canonical fact returns a factor (either a singe variable, or a set of terms)
123    private Factor CanonicalFact() {
124      var curSy = CurSy();
125      if (curSy == '!') {
126        throw new NotSupportedException();
127      } else if (curSy == '(') {
128        NewSy();
129        Expr r = CanonicalExpr(); // this is already simplified
130        if (CurSy() != ')') throw new ArgumentException();
131        NewSy();
132        return new Factor(r);
133      } else if (curSy >= 'a' && curSy <= 'z') {
134        NewSy();
135        return new Factor(curSy);
136        //       } else if (curSy >= '0' && curSy <= '9') {
137      } else if (curSy >= 'A' && curSy <= 'Z') {
138        // treat nonterminals in the same way as variables
139        NewSy();
140        return new Factor(curSy);
141
142      } else throw new ArgumentException("found symbol " + curSy);
143    }
144
145    // a list of factors (symbols, or expressions, and possibly inverses are read
146    // a lis to factors symbols or expressions and possibly inverses are produced
147    // all non-inverse expression factors are expanded
148    private Expr ExpandFactors(IEnumerable<Factor> factors) {
149      // if (invFactors.Count > 0) throw new NotImplementedException();
150      Debug.Assert(!factors.First().IsInverse); // the first factor is never an inverted factor
151
152      // each factor could be a list of terms (expression)
153
154      Expr currentFact = null;
155      var firstFactor = factors.First();
156      if (firstFactor.IsSimpleFactor) currentFact = new Expr(new Term(firstFactor));
157      else currentFact = firstFactor.Expr;
158
159      foreach (var fact in factors.Skip(1)) {
160        Expr curExpr = null;
161        if (fact.IsSimpleFactor || fact.IsInverse) curExpr = new Expr(new Term(fact));
162        else curExpr = fact.Expr;
163        currentFact = AllProducts(currentFact, curExpr);
164      }
165      return currentFact;
166    }
167
168    private Expr AllProducts(Expr a, Expr b) {
169      var aTerms = a.Terms.ToArray();
170      var bTerms = b.Terms.ToArray();
171      var combs = from aT in aTerms
172                  from bT in bTerms
173                  let factors = CancelFactors(aT.Factors.Concat(bT.Factors))
174                  select new Term(factors);
175      return new Expr(combs);
176    }
177
178    private IEnumerable<Factor> CancelFactors(IEnumerable<Factor> factors) {
179      var factorsArr = factors.ToArray();
180      var results = new List<Factor>(factors);
181      foreach (var f in factorsArr) {
182        if (f.ToString() == "1") results.Remove(f);
183        if (f.IsInverse) {
184          // find matching
185          Factor match;
186          if (f.IsSimpleFactor) match = factorsArr.FirstOrDefault(other => !other.IsInverse && f.Symbol.Equals(other.Symbol));
187          else match = factorsArr.FirstOrDefault(other => !other.IsInverse && f.Expr.Equals(other.Expr));
188          if (match != null) {
189            results.Remove(match);
190            results.Remove(f);
191          }
192        }
193      }
194      if (results.Count == 0) results.Add(new Factor('1'));
195      return results;
196    }
197
198    #region term
199    // term can be merged (essentially an ordered list of factors)
200    internal class Term : IComparable<Term> {
201      private readonly SortedList<Factor, int> factors; // factor symbol and the number of occurrences
202
203      public IEnumerable<Factor> Factors {
204        get {
205          return factors.SelectMany(p => Enumerable.Repeat(p.Key, p.Value));
206        }
207      }
208
209      public Term(Factor f) {
210        factors = new SortedList<Factor, int>();
211        factors.Add(f, 1);
212      }
213      public Term(IEnumerable<Factor> factors) {
214        this.factors = new SortedList<Factor, int>();
215        foreach (var f in factors) {
216          if (this.factors.ContainsKey(f)) this.factors[f] += 1;
217          else this.factors.Add(f, 1);
218        }
219      }
220
221      public int CompareTo(Term other) {
222        if (ContainsNonTerminal(Factors) && !ContainsNonTerminal(other.Factors)) {
223          return 1;
224        } else if (!ContainsNonTerminal(Factors) && ContainsNonTerminal(other.Factors)) {
225          return -1;
226        } else {
227          var countComp = Factors.Count().CompareTo(other.Factors.Count());
228          if (countComp != 0) return countComp;
229          return string.Join("", Factors).CompareTo(string.Join("", other.Factors));
230        }
231      }
232
233      public override string ToString() {
234
235        return string.Join("*", Factors);
236      }
237      public override bool Equals(object obj) {
238        var other = obj as Term;
239        if (other == null) return false;
240        if (this.Factors.Count() != other.Factors.Count()) return false;
241        if (this.Factors.Zip(other.Factors, Tuple.Create).Any(t => t.Item1 != t.Item2)) return false;
242        return true;
243      }
244      public override int GetHashCode() {
245        var h = 31415;
246        foreach (var v in Factors) {
247          h ^= v.GetHashCode();
248        }
249        return h;
250      }
251    }
252    #endregion
253
254    #region factor
255    // factors is either a single symbol or an inverted expression
256    internal class Factor : IComparable<Factor> {
257      public bool IsSimpleFactor { get { return Expr == null; } }
258      public char Symbol { get { return symbol; } }
259      public Expr Expr { get { return expr; } }
260      public bool IsInverse { get { return inv; } }
261      private readonly char symbol = '\0';
262      private readonly Expr expr;
263      private bool inv;
264
265      public Factor(char f) {
266        this.symbol = f;
267      }
268      public Factor(Expr expr) {
269        this.expr = expr;
270      }
271
272      public void Invert() {
273        this.inv = !inv;
274      }
275
276      public int CompareTo(Factor other) {
277        // 1) single symbol factors first
278        // 2) expression factors by expression compare
279        var crit1 = ContainsNonTerminal(this).CompareTo(ContainsNonTerminal(other));
280        if (crit1 != 0) return crit1;
281
282        var crit2 = this.IsInverse.CompareTo(other.IsInverse);
283        if (crit2 != 0) return crit2;
284
285        var crit3 = this.IsSimpleFactor.CompareTo(other.IsSimpleFactor);
286        if (crit3 != 0) return crit3;
287
288        // both are simple or expressions
289        if (IsSimpleFactor) return this.symbol.CompareTo(other.symbol);
290        else return this.Expr.CompareTo(other.Expr);
291      }
292
293      public override string ToString() {
294        var s = Expr == null ? symbol.ToString() : "(" + expr.ToString() + ")";
295        if (IsInverse) {
296          return "1/" + s;
297        } else return s;
298      }
299      public override bool Equals(object obj) {
300        var other = obj as Factor;
301        if (other == null) return false;
302        if (IsInverse != other.IsInverse) return false;
303        if (this.symbol != other.symbol) return false;
304        if (this.Expr != other.Expr) return false;
305        return true;
306      }
307      public override int GetHashCode() {
308        var h = 31415;
309        h ^= symbol.GetHashCode();
310        if (Expr != null) h ^= Expr.GetHashCode();
311        return h;
312      }
313    }
314    #endregion
315
316    #region expr
317
318    internal class Expr : IComparable<Expr> {
319      public readonly SortedSet<Term> Terms; // only set for Kind == Expr
320      //public bool Inverse;
321      public Expr(Term t) {
322        Terms = new SortedSet<Term>();
323        Terms.Add(t);
324      }
325      public Expr(IEnumerable<Term> exprTerms) {
326        Terms = new SortedSet<Term>();
327        foreach (var t in exprTerms) {
328          Terms.Add(t);
329        }
330      }
331
332      public void Merge(Expr other) {
333        this.Terms.UnionWith(other.Terms);
334      }
335
336      public int CompareTo(Expr other) {
337        var sizeComp = this.Terms.Count.CompareTo(other.Terms.Count);
338        if (sizeComp != 0) return sizeComp;
339        // same size => compare terms
340        foreach (var pair in Terms.Zip(other.Terms, Tuple.Create)) {
341          var termComp = pair.Item1.CompareTo(pair.Item2);
342          if (termComp != 0) return termComp;
343        }
344        return 0;
345      }
346
347      public override string ToString() {
348        // if (Inverse && Terms.Count > 1)
349        //   return "(" + string.Join("+", Terms) + ")";
350        // else if (Inverse && Terms.Count == 1) {
351        //   return Terms.First().ToString().Replace("%", "#").Replace("*", "%").Replace("#", "*");
352        // } else
353        return string.Join("+", Terms);
354      }
355      public override bool Equals(object obj) {
356        var other = obj as Expr;
357        if (other == null) return false;
358        //if (this.Inverse != other.Inverse) return false;
359        if (this.Terms.Count() != other.Terms.Count()) return false;
360        return this.Terms.Intersect(other.Terms).Count() == this.Terms.Count;
361      }
362      public override int GetHashCode() {
363        var h = 31415;
364        if (Terms != null)
365          foreach (var t in Terms) {
366            h ^= t.GetHashCode();
367          }
368        return h;
369      }
370    }
371    #endregion
372    internal static bool IsNonTerminal(char symb) {
373      return symb >= 'A' && symb <= 'Z';
374    }
375    internal static bool ContainsNonTerminal(IEnumerable<Factor> factors) {
376      return factors.Any(ContainsNonTerminal);
377    }
378    internal static bool ContainsNonTerminal(Factor f) {
379      if (f.Expr == null) return IsNonTerminal(f.Symbol);
380      else return ContainsNonTerminal(f.Expr);
381    }
382
383    private static bool ContainsNonTerminal(Expr expr) {
384      return expr.Terms.Any(ContainsNonTerminal);
385    }
386
387    private static bool ContainsNonTerminal(Term term) {
388      return ContainsNonTerminal(term.Factors);
389    }
390
391
392    /*
393
394    internal class FactorComparer : IComparer<char> {
395      public int Compare(char x, char y) {
396        if (IsNonTerminal(x) && !IsNonTerminal(y)) {
397          return 1;
398        } else if (!IsNonTerminal(x) && IsNonTerminal(y)) {
399          return -1;
400        } else if (IsNonTerminal(x) && IsNonTerminal(y)) {
401          return x.CompareTo(y);
402        } else {
403          return x.CompareTo(y);
404        }
405      }
406    }*/
407  }
408}
Note: See TracBrowser for help on using the repository browser.