1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Diagnostics.Eventing.Reader;
|
---|
4 | using System.Linq;
|
---|
5 | using System.Runtime.CompilerServices;
|
---|
6 | using System.Runtime.Remoting.Messaging;
|
---|
7 | using System.Text;
|
---|
8 | using System.Threading.Tasks;
|
---|
9 | using AutoDiff;
|
---|
10 | using HeuristicLab.Common;
|
---|
11 |
|
---|
12 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
13 | public class ExpressionCompiler {
|
---|
14 | private string sentence;
|
---|
15 | private int syIdx;
|
---|
16 | // compiles sentences from L(G(Expr)) using AutoDiff
|
---|
17 | // does not compile to IL it is only necessary to calculate gradients for parameter optimization
|
---|
18 | // Expr -> Term { ('+' | '-' | '^' ) Term }
|
---|
19 | // Term -> Fact { ('*' | '%' | '/') Fact }
|
---|
20 | // Fact -> '!' Expr | '(' Expr ')' | Var | const | one
|
---|
21 | // Var -> 'a'..'z'
|
---|
22 | // const -> '0' .. '9'
|
---|
23 | // one -> | // pipe symbol for constant one instead of ERC 1
|
---|
24 |
|
---|
25 | // constants are completely ignored, instead we introduce a multiplicative constant factor for each term and an additive constant term for each expression
|
---|
26 |
|
---|
27 | // for boolean problems the symbol * representes the AND operator, the symbol + represents the OR operator, ! = NOT, ^ = XOR
|
---|
28 |
|
---|
29 | public void Compile(string sentence, out Term f, Variable[] variables, out Variable[] constants) {
|
---|
30 | InitLex(sentence);
|
---|
31 | var constantsList = new List<Variable>();
|
---|
32 | f = Expr(variables, constantsList);
|
---|
33 | constants = constantsList.ToArray();
|
---|
34 | }
|
---|
35 |
|
---|
36 | private void InitLex(string sentence) {
|
---|
37 | this.sentence = sentence;
|
---|
38 | this.syIdx = 0;
|
---|
39 | }
|
---|
40 |
|
---|
41 | private char CurSy() {
|
---|
42 | if (syIdx >= sentence.Length) return '\0';
|
---|
43 | return sentence[syIdx];
|
---|
44 | }
|
---|
45 | private void NewSy() {
|
---|
46 | if (syIdx < sentence.Length) syIdx++;
|
---|
47 | }
|
---|
48 |
|
---|
49 | // helper for xor
|
---|
50 | private Term Not(Term x) {
|
---|
51 | return 1.0 - x;
|
---|
52 | }
|
---|
53 |
|
---|
54 | private Term Expr(IList<Variable> variables, IList<Variable> constants) {
|
---|
55 | var terms = new List<Term>();
|
---|
56 | terms.Add(Term(variables, constants));
|
---|
57 | var curSy = CurSy();
|
---|
58 | while (curSy == '+' || curSy == '-' || curSy == '^') {
|
---|
59 | if (curSy == '+') {
|
---|
60 | NewSy();
|
---|
61 | terms.Add(Term(variables, constants));
|
---|
62 | } else if (curSy == '-') {
|
---|
63 | NewSy();
|
---|
64 | terms.Add(-Term(variables, constants)); // minus
|
---|
65 | } else {
|
---|
66 | NewSy();
|
---|
67 | throw new NotImplementedException();
|
---|
68 | // var e = Expr(variables, constants);
|
---|
69 | // r = Not(r) * e + r * Not(e); // xor = (!x AND y) OR (x AND !y)
|
---|
70 | }
|
---|
71 | curSy = CurSy();
|
---|
72 | }
|
---|
73 |
|
---|
74 | // at this point we might have multiple constants in the list of terms but we only need one of them
|
---|
75 | var firstConstant = terms.OfType<Variable>().FirstOrDefault(); // if a term is only a variable then it must be a constant (otherwise (for real variables) it would be a product of two constants)
|
---|
76 | if (firstConstant == null) {
|
---|
77 | // no constant found => add a constant offset for each full expression
|
---|
78 | var offset = new Variable();
|
---|
79 | constants.Add(offset);
|
---|
80 | terms.Add(offset);
|
---|
81 | } else {
|
---|
82 | // there is already a constant => remove all others if there are more
|
---|
83 | var otherConsts = terms.OfType<Variable>().Where(t => t != firstConstant).ToArray();
|
---|
84 | foreach (var otherConst in otherConsts) {
|
---|
85 | terms.Remove(otherConst);
|
---|
86 | constants.Remove(otherConst);
|
---|
87 | }
|
---|
88 | }
|
---|
89 | return TermBuilder.Sum(terms);
|
---|
90 | }
|
---|
91 |
|
---|
92 | private Term Term(IList<Variable> variables, IList<Variable> constants) {
|
---|
93 | var factors = new List<Term>();
|
---|
94 | var f = Fact(variables, constants);
|
---|
95 | if (f != null) factors.Add(f);
|
---|
96 | var curSy = CurSy();
|
---|
97 | while (curSy == '*' || curSy == '%') { // division and protected division symbols are handled in the same way
|
---|
98 | if (curSy == '*') {
|
---|
99 | NewSy();
|
---|
100 | f = Fact(variables, constants); // fact might return null (for constants)
|
---|
101 | if (f != null) factors.Add(f);
|
---|
102 | } else {
|
---|
103 | NewSy();
|
---|
104 | f = Fact(variables, constants);
|
---|
105 | if (f != null) factors.Add(1.0 / f); // cannot use protected division here
|
---|
106 | }
|
---|
107 | curSy = CurSy();
|
---|
108 | }
|
---|
109 |
|
---|
110 | // generate a constant factor (scale) for each full term
|
---|
111 | // since we ignore constants in the string it could be possible that factors is still empty at this point
|
---|
112 | var scale = new Variable();
|
---|
113 | constants.Add(scale);
|
---|
114 | factors.Add(scale);
|
---|
115 |
|
---|
116 | if (factors.Count == 1) return factors[0];
|
---|
117 | else if (factors.Count == 2) return factors[0] * factors[1];
|
---|
118 | else return TermBuilder.Product(factors[0], factors[1], factors.Skip(2).ToArray());
|
---|
119 | }
|
---|
120 |
|
---|
121 | private Term Fact(IList<Variable> variables, IList<Variable> constants) {
|
---|
122 | Term r = null;
|
---|
123 | var curSy = CurSy();
|
---|
124 | if (curSy == '!') {
|
---|
125 | NewSy();
|
---|
126 | r = Not(Expr(variables, constants));
|
---|
127 | } else if (curSy == '(') {
|
---|
128 | NewSy();
|
---|
129 | r = Expr(variables, constants);
|
---|
130 | if (CurSy() != ')') throw new ArgumentException();
|
---|
131 | NewSy();
|
---|
132 | } else if (curSy >= 'a' && curSy <= 'z') {
|
---|
133 | var varIdx = (byte)curSy - (byte)'a';
|
---|
134 | if (varIdx >= variables.Count) throw new ArgumentException();
|
---|
135 | r = variables[varIdx];
|
---|
136 | NewSy();
|
---|
137 | } else if (curSy == '/') {
|
---|
138 | // /-symbol used in the expressionextender to represent inverse (1/x).
|
---|
139 | // this is necessary because we also use symbols 0..9 as indices for ERCs
|
---|
140 | NewSy();
|
---|
141 | r = 1.0 / Fact(variables, constants);
|
---|
142 | } else if (curSy >= '0' && curSy <= '9') {
|
---|
143 | int o = (byte)curSy - (byte)'0';
|
---|
144 | //int o = Convert.ToByte(CurSy()) - Convert.ToByte('a');
|
---|
145 | if (o < 0 || o >= 10) throw new ArgumentException();
|
---|
146 |
|
---|
147 | // completely ignore existing constants
|
---|
148 |
|
---|
149 | // var newConst = new Variable();
|
---|
150 | // constants.Add(newConst);
|
---|
151 | // r = newConst;
|
---|
152 | NewSy();
|
---|
153 | } else throw new ArgumentException();
|
---|
154 | return r;
|
---|
155 | }
|
---|
156 |
|
---|
157 | }
|
---|
158 | }
|
---|