using System; using System.CodeDom; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Security.Cryptography.X509Certificates; using System.Text; using System.Threading.Tasks; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits { // helper to create canonical forms of expressions // TODO: change symbolicregressionpoly10problem to use this class // this does not support expressions with constants (in transformations we assume constant opt is used) // (e.g. we transform all negative signs to + because it is assumed a negative constant can be produced for the term) public class ExpressionExtender { // supports the grammar // G(E): // E -> V | V+E | V-E | V*E | V%E | (E) // V -> // "; // might produce expressions of the form /x (= 1/x) // the pipe symbol | is used for the constant one (in comparison to other constants) private string sentence; private int syIdx; public string CanonicalRepresentation(string phrase) { InitLex(phrase); var e = CanonicalExpr(); return e.ToString(); } private void InitLex(string sentence) { this.sentence = sentence; this.syIdx = 0; } private char CurSy() { if (syIdx >= sentence.Length) return '\0'; return sentence[syIdx]; } private void NewSy() { if (syIdx < sentence.Length) syIdx++; } // an expression is a sorted set of terms // a term is an (ordered) list of factors // a factor is either a single symbol or an inverted expression // CanonicalExpression reads multiple terms (expressions) and must merge the terms in the expression (ordering and duplicates) // CanonicalTerm reads multiple factors and must expand factors whenever it reads a combined factor (expression) it produces an expression // CanonicalFactor produces an expression // canonical expression returns either a single term or a set of terms private Expr CanonicalExpr() { var terms = new List(); terms.Add(CanonicalTerm()); var curSy = CurSy(); while (curSy == '+' || curSy == '-' || curSy == '^') { if (curSy == '+') { NewSy(); terms.Add(CanonicalTerm()); } else if (curSy == '-') { NewSy(); terms.Add(CanonicalTerm()); // minus is the same as plus assuming constant opt } else { NewSy(); throw new NotImplementedException(); // var e = Expr(variables, constants); // r = Not(r) * e + r * Not(e); // xor = (!x AND y) OR (x AND !y) } curSy = CurSy(); } return new Expr(terms.SelectMany(t => t.Terms)); } // canonical term returns either a single term (product of factors) or a set of terms private Expr CanonicalTerm() { var factors = new List(); var f = CanonicalFact(); if (f != null) factors.Add(f); var curSy = CurSy(); while (curSy == '*' || curSy == '%') { if (curSy == '*') { NewSy(); f = CanonicalFact(); if (f != null) factors.Add(f); } else { NewSy(); f = CanonicalFact(); // if there is only one term we can add multiple inverted simple factors instead of the whole inverted expression if (!f.IsSimpleFactor && f.Expr.Terms.Count == 1) { foreach (var invF in f.Expr.Terms.First().Factors) { if (invF.ToString() == "1") continue; invF.Invert(); factors.Add(invF); } } else { f.Invert(); if (f != null) factors.Add(f); } } curSy = CurSy(); } factors = CancelFactors(factors).ToList(); return ExpandFactors(factors); } // canonical fact returns a factor (either a singe variable, or a set of terms) private Factor CanonicalFact() { var curSy = CurSy(); if (curSy == '!') { throw new NotSupportedException(); } else if (curSy == '(') { NewSy(); Expr r = CanonicalExpr(); // this is already simplified if (CurSy() != ')') throw new ArgumentException(); NewSy(); return new Factor(r); } else if (curSy >= 'a' && curSy <= 'z') { NewSy(); return new Factor(curSy); // } else if (curSy >= '0' && curSy <= '9') { } else if (curSy >= 'A' && curSy <= 'Z') { // treat nonterminals in the same way as variables NewSy(); return new Factor(curSy); } else throw new ArgumentException("found symbol " + curSy); } // a list of factors (symbols, or expressions, and possibly inverses are read // a list to factors symbols or expressions and possibly inverses are produced // all non-inverse expression factors are expanded private Expr ExpandFactors(IEnumerable factors) { // if (invFactors.Count > 0) throw new NotImplementedException(); Debug.Assert(!factors.First().IsInverse); // the first factor is never an inverted factor // each factor could be a list of terms (expression) Expr currentFact = null; var firstFactor = factors.First(); if (firstFactor.IsSimpleFactor || firstFactor.IsInverse) currentFact = new Expr(new Term(firstFactor)); else currentFact = firstFactor.Expr; foreach (var fact in factors.Skip(1)) { Expr curExpr = null; if (fact.IsSimpleFactor || fact.IsInverse) curExpr = new Expr(new Term(fact)); else curExpr = fact.Expr; currentFact = AllProducts(currentFact, curExpr); } return currentFact; } private Expr AllProducts(Expr a, Expr b) { var aTerms = a.Terms.ToArray(); var bTerms = b.Terms.ToArray(); var combs = from aT in aTerms from bT in bTerms let factors = CancelFactors(aT.Factors.Concat(bT.Factors)) select new Term(factors); return new Expr(combs); } private IEnumerable CancelFactors(IEnumerable factors) { var factorsArr = factors.ToArray(); var results = new List(factors); foreach (var f in factorsArr) { if (f.ToString() == "1") results.Remove(f); if (f.ToString() == "1/(1)") results.Remove(f); if (f.IsInverse) { // find matching Factor match; match = factorsArr.FirstOrDefault(other => !other.IsInverse && f.Cancels(other)); if (match != null) { results.Remove(f); var idx = results.IndexOf(match); results.Remove(match); if (!results.Any()) results.Insert(idx, new Factor('1')); // when the factor is the last one then insert a one // also mark as cancelled in the factorsArr idx = Array.IndexOf(factorsArr, match); factorsArr[idx] = new Factor('1'); } } } if (results.Count == 0) results.Add(new Factor('1')); return results; } #region term // term can be merged (essentially an ordered list of factors) internal class Term : IComparable { private readonly SortedList factors; // factor symbol and the number of occurrences public IEnumerable Factors { get { return factors.SelectMany(p => Enumerable.Repeat(p.Key, p.Value)); } } public Term(Factor f) { factors = new SortedList(); factors.Add(f, 1); } public Term(IEnumerable factors) { this.factors = new SortedList(); foreach (var f in factors) { if (this.factors.ContainsKey(f)) this.factors[f] += 1; else this.factors.Add(f, 1); } } public int CompareTo(Term other) { if (ContainsNonTerminal(Factors) && !ContainsNonTerminal(other.Factors)) { return 1; } else if (!ContainsNonTerminal(Factors) && ContainsNonTerminal(other.Factors)) { return -1; } else { var countComp = Factors.Count().CompareTo(other.Factors.Count()); if (countComp != 0) return countComp; foreach (var pair in Factors.Zip(other.Factors, Tuple.Create)) { var fComp = pair.Item1.CompareTo(pair.Item2); if (fComp != 0) return fComp; } return 0; } } public override string ToString() { return string.Join("*", Factors); } public override bool Equals(object obj) { var other = obj as Term; if (other == null) return false; if (this.Factors.Count() != other.Factors.Count()) return false; if (this.Factors.Zip(other.Factors, Tuple.Create).Any(t => t.Item1 != t.Item2)) return false; return true; } public override int GetHashCode() { var h = 31415; foreach (var v in Factors) { h ^= v.GetHashCode(); } return h; } } #endregion #region factor // factors is either a single symbol or an inverted expression internal class Factor : IComparable { public bool IsSimpleFactor { get { return Expr == null; } } public char Symbol { get { return symbol; } } public Expr Expr { get { return expr; } } public bool IsInverse { get { return inv; } } private readonly char symbol = '\0'; private readonly Expr expr; private bool inv; public Factor(char f) { this.symbol = f; } public Factor(Expr expr) { this.expr = expr; } public void Invert() { this.inv = !inv; } public bool Cancels(Factor other) { if (this.inv == other.inv) return false; if (this.Expr != null && other.Expr == null) return false; if (this.Expr == null && other.Expr != null) return false; if (Expr == null) return this.Symbol.Equals(other.symbol); else return this.Expr.CompareTo(other.Expr) == 0; } public int CompareTo(Factor other) { // 1) single symbol factors first // 2) expression factors by expression compare var crit1 = ContainsNonTerminal(this).CompareTo(ContainsNonTerminal(other)); if (crit1 != 0) return crit1; var crit2 = this.IsInverse.CompareTo(other.IsInverse); if (crit2 != 0) return crit2; var crit3 = this.IsSimpleFactor.CompareTo(other.IsSimpleFactor); if (crit3 != 0) return crit3; // both are simple or expressions if (IsSimpleFactor) return this.symbol.CompareTo(other.symbol); else return this.Expr.CompareTo(other.Expr); } public override string ToString() { var s = Expr == null ? symbol.ToString() : "(" + expr.ToString() + ")"; if (IsInverse) { return "/" + s; } else return s; } public override bool Equals(object obj) { var other = obj as Factor; if (other == null) return false; if (IsInverse != other.IsInverse) return false; if (this.symbol != other.symbol) return false; if (this.Expr != other.Expr) return false; return true; } public override int GetHashCode() { var h = 31415; h ^= symbol.GetHashCode(); if (Expr != null) h ^= Expr.GetHashCode(); return h; } } #endregion #region expr internal class Expr : IComparable { public readonly SortedSet Terms; // only set for Kind == Expr //public bool Inverse; public Expr(Term t) { Terms = new SortedSet(); Terms.Add(t); } public Expr(IEnumerable exprTerms) { Terms = new SortedSet(); foreach (var t in exprTerms) { Terms.Add(t); } } public void Merge(Expr other) { this.Terms.UnionWith(other.Terms); } public int CompareTo(Expr other) { var sizeComp = this.Terms.Count.CompareTo(other.Terms.Count); if (sizeComp != 0) return sizeComp; // same size => compare terms foreach (var pair in Terms.Zip(other.Terms, Tuple.Create)) { var termComp = pair.Item1.CompareTo(pair.Item2); if (termComp != 0) return termComp; } return 0; } public override string ToString() { return string.Join("+", Terms); } public override bool Equals(object obj) { var other = obj as Expr; if (other == null) return false; if (this.Terms.Count() != other.Terms.Count()) return false; return this.Terms.Intersect(other.Terms).Count() == this.Terms.Count; } public override int GetHashCode() { var h = 31415; if (Terms != null) foreach (var t in Terms) { h ^= t.GetHashCode(); } return h; } } #endregion internal static bool IsNonTerminal(char symb) { return symb >= 'A' && symb <= 'Z'; } internal static bool ContainsNonTerminal(IEnumerable factors) { return factors.Any(ContainsNonTerminal); } internal static bool ContainsNonTerminal(Factor f) { if (f.Expr == null) return IsNonTerminal(f.Symbol); else return ContainsNonTerminal(f.Expr); } private static bool ContainsNonTerminal(Expr expr) { return expr.Terms.Any(ContainsNonTerminal); } private static bool ContainsNonTerminal(Term term) { return ContainsNonTerminal(term.Factors); } } }