[11895] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Diagnostics.Eventing.Reader;
|
---|
| 4 | using System.Linq;
|
---|
| 5 | using System.Runtime.CompilerServices;
|
---|
| 6 | using System.Runtime.Remoting.Messaging;
|
---|
| 7 | using System.Text;
|
---|
| 8 | using System.Threading.Tasks;
|
---|
| 9 | using AutoDiff;
|
---|
| 10 | using HeuristicLab.Common;
|
---|
| 11 |
|
---|
| 12 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
| 13 | public class ExpressionCompiler {
|
---|
| 14 | private string sentence;
|
---|
| 15 | private int syIdx;
|
---|
| 16 | // compiles sentences from L(G(Expr)) using AutoDiff
|
---|
| 17 | // does not compile to IL it is only necessary to calculate gradients for parameter optimization
|
---|
| 18 | // Expr -> Term { ('+' | '-' | '^' ) Term }
|
---|
[12014] | 19 | // Term -> Fact { ('*' | '%' | '/') Fact }
|
---|
| 20 | // Fact -> '!' Expr | '(' Expr ')' | Var | const | one
|
---|
[11895] | 21 | // Var -> 'a'..'z'
|
---|
| 22 | // const -> '0' .. '9'
|
---|
[12014] | 23 | // one -> | // pipe symbol for constant one instead of ERC 1
|
---|
[11895] | 24 |
|
---|
| 25 | // constants are completely ignored, instead we introduce a multiplicative constant factor for each term and an additive constant term for each expression
|
---|
| 26 |
|
---|
| 27 | // for boolean problems the symbol * representes the AND operator, the symbol + represents the OR operator, ! = NOT, ^ = XOR
|
---|
| 28 |
|
---|
[11972] | 29 | public void Compile(string sentence, out Term f, Variable[] variables, out Variable[] constants) {
|
---|
[11895] | 30 | InitLex(sentence);
|
---|
| 31 | var constantsList = new List<Variable>();
|
---|
[11972] | 32 | f = Expr(variables, constantsList);
|
---|
[11895] | 33 | constants = constantsList.ToArray();
|
---|
| 34 | }
|
---|
| 35 |
|
---|
| 36 | private void InitLex(string sentence) {
|
---|
| 37 | this.sentence = sentence;
|
---|
| 38 | this.syIdx = 0;
|
---|
| 39 | }
|
---|
| 40 |
|
---|
| 41 | private char CurSy() {
|
---|
| 42 | if (syIdx >= sentence.Length) return '\0';
|
---|
| 43 | return sentence[syIdx];
|
---|
| 44 | }
|
---|
| 45 | private void NewSy() {
|
---|
| 46 | if (syIdx < sentence.Length) syIdx++;
|
---|
| 47 | }
|
---|
| 48 |
|
---|
| 49 | // helper for xor
|
---|
| 50 | private Term Not(Term x) {
|
---|
| 51 | return 1.0 - x;
|
---|
| 52 | }
|
---|
| 53 |
|
---|
| 54 | private Term Expr(IList<Variable> variables, IList<Variable> constants) {
|
---|
| 55 | var terms = new List<Term>();
|
---|
| 56 | terms.Add(Term(variables, constants));
|
---|
| 57 | var curSy = CurSy();
|
---|
| 58 | while (curSy == '+' || curSy == '-' || curSy == '^') {
|
---|
| 59 | if (curSy == '+') {
|
---|
| 60 | NewSy();
|
---|
| 61 | terms.Add(Term(variables, constants));
|
---|
| 62 | } else if (curSy == '-') {
|
---|
| 63 | NewSy();
|
---|
| 64 | terms.Add(-Term(variables, constants)); // minus
|
---|
| 65 | } else {
|
---|
| 66 | NewSy();
|
---|
| 67 | throw new NotImplementedException();
|
---|
| 68 | // var e = Expr(variables, constants);
|
---|
| 69 | // r = Not(r) * e + r * Not(e); // xor = (!x AND y) OR (x AND !y)
|
---|
| 70 | }
|
---|
| 71 | curSy = CurSy();
|
---|
| 72 | }
|
---|
| 73 |
|
---|
| 74 | // at this point we might have multiple constants in the list of terms but we only need one of them
|
---|
| 75 | var firstConstant = terms.OfType<Variable>().FirstOrDefault(); // if a term is only a variable then it must be a constant (otherwise (for real variables) it would be a product of two constants)
|
---|
| 76 | if (firstConstant == null) {
|
---|
| 77 | // no constant found => add a constant offset for each full expression
|
---|
| 78 | var offset = new Variable();
|
---|
| 79 | constants.Add(offset);
|
---|
| 80 | terms.Add(offset);
|
---|
| 81 | } else {
|
---|
| 82 | // there is already a constant => remove all others if there are more
|
---|
| 83 | var otherConsts = terms.OfType<Variable>().Where(t => t != firstConstant).ToArray();
|
---|
| 84 | foreach (var otherConst in otherConsts) {
|
---|
| 85 | terms.Remove(otherConst);
|
---|
| 86 | constants.Remove(otherConst);
|
---|
| 87 | }
|
---|
| 88 | }
|
---|
| 89 | return TermBuilder.Sum(terms);
|
---|
| 90 | }
|
---|
| 91 |
|
---|
| 92 | private Term Term(IList<Variable> variables, IList<Variable> constants) {
|
---|
| 93 | var factors = new List<Term>();
|
---|
| 94 | var f = Fact(variables, constants);
|
---|
| 95 | if (f != null) factors.Add(f);
|
---|
| 96 | var curSy = CurSy();
|
---|
[12024] | 97 | while (curSy == '*' || curSy == '%') { // division and protected division symbols are handled in the same way
|
---|
[11895] | 98 | if (curSy == '*') {
|
---|
| 99 | NewSy();
|
---|
| 100 | f = Fact(variables, constants); // fact might return null (for constants)
|
---|
| 101 | if (f != null) factors.Add(f);
|
---|
| 102 | } else {
|
---|
| 103 | NewSy();
|
---|
| 104 | f = Fact(variables, constants);
|
---|
| 105 | if (f != null) factors.Add(1.0 / f); // cannot use protected division here
|
---|
| 106 | }
|
---|
| 107 | curSy = CurSy();
|
---|
| 108 | }
|
---|
| 109 |
|
---|
| 110 | // generate a constant factor (scale) for each full term
|
---|
| 111 | // since we ignore constants in the string it could be possible that factors is still empty at this point
|
---|
| 112 | var scale = new Variable();
|
---|
| 113 | constants.Add(scale);
|
---|
| 114 | factors.Add(scale);
|
---|
| 115 |
|
---|
| 116 | if (factors.Count == 1) return factors[0];
|
---|
| 117 | else if (factors.Count == 2) return factors[0] * factors[1];
|
---|
| 118 | else return TermBuilder.Product(factors[0], factors[1], factors.Skip(2).ToArray());
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | private Term Fact(IList<Variable> variables, IList<Variable> constants) {
|
---|
| 122 | Term r = null;
|
---|
| 123 | var curSy = CurSy();
|
---|
| 124 | if (curSy == '!') {
|
---|
| 125 | NewSy();
|
---|
| 126 | r = Not(Expr(variables, constants));
|
---|
| 127 | } else if (curSy == '(') {
|
---|
| 128 | NewSy();
|
---|
| 129 | r = Expr(variables, constants);
|
---|
| 130 | if (CurSy() != ')') throw new ArgumentException();
|
---|
| 131 | NewSy();
|
---|
| 132 | } else if (curSy >= 'a' && curSy <= 'z') {
|
---|
[11972] | 133 | var varIdx = (byte)curSy - (byte)'a';
|
---|
| 134 | if (varIdx >= variables.Count) throw new ArgumentException();
|
---|
| 135 | r = variables[varIdx];
|
---|
[11895] | 136 | NewSy();
|
---|
[12024] | 137 | } else if (curSy == '/') {
|
---|
| 138 | // /-symbol used in the expressionextender to represent inverse (1/x).
|
---|
[12014] | 139 | // this is necessary because we also use symbols 0..9 as indices for ERCs
|
---|
| 140 | NewSy();
|
---|
[12024] | 141 | r = 1.0 / Fact(variables, constants);
|
---|
[11895] | 142 | } else if (curSy >= '0' && curSy <= '9') {
|
---|
| 143 | int o = (byte)curSy - (byte)'0';
|
---|
| 144 | //int o = Convert.ToByte(CurSy()) - Convert.ToByte('a');
|
---|
| 145 | if (o < 0 || o >= 10) throw new ArgumentException();
|
---|
| 146 |
|
---|
| 147 | // completely ignore existing constants
|
---|
| 148 |
|
---|
| 149 | // var newConst = new Variable();
|
---|
| 150 | // constants.Add(newConst);
|
---|
| 151 | // r = newConst;
|
---|
| 152 | NewSy();
|
---|
| 153 | } else throw new ArgumentException();
|
---|
| 154 | return r;
|
---|
| 155 | }
|
---|
| 156 |
|
---|
| 157 | }
|
---|
| 158 | }
|
---|