[15714] | 1 | using System.Collections;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.ComponentModel;
|
---|
| 4 | using System.Diagnostics;
|
---|
| 5 | using System.Linq;
|
---|
| 6 | using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
|
---|
| 7 | using HeuristicLab.Common;
|
---|
[15722] | 8 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
| 9 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
|
---|
[15714] | 10 |
|
---|
| 11 | namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
|
---|
[15712] | 12 | public class Grammar {
|
---|
| 13 |
|
---|
| 14 | public Symbol StartSymbol;
|
---|
| 15 |
|
---|
[15714] | 16 | #region Symbols
|
---|
[15722] | 17 |
|
---|
[15714] | 18 | public VariableSymbol Var;
|
---|
| 19 |
|
---|
| 20 | public NonterminalSymbol Expr;
|
---|
| 21 | public NonterminalSymbol Term;
|
---|
| 22 | public NonterminalSymbol Factor;
|
---|
| 23 |
|
---|
| 24 | public TerminalSymbol Addition;
|
---|
| 25 | public TerminalSymbol Multiplication;
|
---|
[15722] | 26 |
|
---|
[15714] | 27 | #endregion
|
---|
| 28 |
|
---|
| 29 |
|
---|
[15722] | 30 | #region HL Symbols for Parsing ExpressionTrees
|
---|
| 31 |
|
---|
| 32 | private TypeCoherentExpressionGrammar symbolicExpressionGrammar;
|
---|
| 33 |
|
---|
| 34 | private ISymbol constSy;
|
---|
| 35 | private ISymbol varSy;
|
---|
| 36 |
|
---|
| 37 | private ISymbol addSy;
|
---|
| 38 | private ISymbol mulSy;
|
---|
| 39 | private ISymbol logSy;
|
---|
| 40 | private ISymbol expSy;
|
---|
| 41 | private ISymbol divSy;
|
---|
| 42 |
|
---|
| 43 | private ISymbol rootSy;
|
---|
| 44 | private ISymbol startSy;
|
---|
| 45 |
|
---|
| 46 | #endregion
|
---|
| 47 |
|
---|
[15712] | 48 | public Grammar(string[] variables) {
|
---|
| 49 | #region Define Symbols
|
---|
[15714] | 50 | Var = new VariableSymbol("var", variables);
|
---|
[15712] | 51 |
|
---|
[15714] | 52 | Expr = new NonterminalSymbol("Expr");
|
---|
| 53 | Term = new NonterminalSymbol("Term");
|
---|
| 54 | Factor = new NonterminalSymbol("Factor");
|
---|
[15712] | 55 |
|
---|
[15714] | 56 | Addition = new TerminalSymbol("+");
|
---|
| 57 | Multiplication = new TerminalSymbol("*");
|
---|
[15712] | 58 | #endregion
|
---|
| 59 |
|
---|
| 60 |
|
---|
[15723] | 61 | #region Production rules
|
---|
| 62 | // order of production is important, since they are accessed via index
|
---|
| 63 | // in memoization.
|
---|
[15714] | 64 | StartSymbol = Expr;
|
---|
[15712] | 65 |
|
---|
[15714] | 66 | Expr.AddProduction(Term, Expr, Addition);
|
---|
| 67 | Expr.AddProduction(Term);
|
---|
[15712] | 68 |
|
---|
[15714] | 69 | Term.AddProduction(Factor, Term, Multiplication);
|
---|
| 70 | Term.AddProduction(Factor);
|
---|
[15712] | 71 |
|
---|
[15714] | 72 | Factor.AddProduction(Var);
|
---|
[15712] | 73 | #endregion
|
---|
[15722] | 74 |
|
---|
| 75 | #region Parsing to SymbolicExpressionTree
|
---|
| 76 | symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
|
---|
| 77 | symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
|
---|
| 78 |
|
---|
| 79 | constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
|
---|
| 80 | varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
|
---|
| 81 | addSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Addition>().First();
|
---|
| 82 | mulSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Multiplication>().First();
|
---|
| 83 | logSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Logarithm>().First();
|
---|
| 84 | expSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Exponential>().First();
|
---|
| 85 | divSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Division>().First();
|
---|
| 86 |
|
---|
| 87 | rootSy = symbolicExpressionGrammar.AllowedSymbols.OfType<ProgramRootSymbol>().First();
|
---|
| 88 | startSy = symbolicExpressionGrammar.AllowedSymbols.OfType<StartSymbol>().First();
|
---|
| 89 |
|
---|
| 90 | #endregion
|
---|
[15712] | 91 | }
|
---|
[15714] | 92 |
|
---|
[15723] | 93 | /*
|
---|
| 94 | #region Memoize subtrees
|
---|
| 95 |
|
---|
| 96 | public void MemoizeSubtrees(SymbolString sentence) {
|
---|
| 97 | Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
|
---|
| 98 |
|
---|
| 99 | // Parse root symbol "+"
|
---|
| 100 | MemoizeSubtreeExpression(parseStack);
|
---|
| 101 | }
|
---|
| 102 |
|
---|
| 103 | private SymbolString MemoizeSubtreeExpression(Stack<TerminalSymbol> parseStack) {
|
---|
| 104 | SymbolString subtree = new SymbolString();
|
---|
| 105 |
|
---|
| 106 | if (ReferenceEquals(parseStack.Peek(), Addition)) {
|
---|
| 107 | subtree.Add(parseStack.Pop());
|
---|
| 108 | subtree.InsertRange(0, MemoizeSubtreeExpression(parseStack));
|
---|
| 109 | subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
|
---|
| 110 |
|
---|
| 111 | Expr.Alternatives[0].GeneratedSentences.Add(subtree);
|
---|
| 112 | } else {
|
---|
| 113 | subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
|
---|
| 114 |
|
---|
| 115 | Expr.Alternatives[1].GeneratedSentences.Add(subtree);
|
---|
| 116 | }
|
---|
| 117 |
|
---|
| 118 | return subtree;
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | private SymbolString MemoizeSubtreeTerm(Stack<TerminalSymbol> parseStack) {
|
---|
| 122 | SymbolString subtree = new SymbolString();
|
---|
| 123 |
|
---|
| 124 | if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
|
---|
| 125 | subtree.Add(parseStack.Pop());
|
---|
| 126 | subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
|
---|
| 127 | subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));
|
---|
| 128 |
|
---|
| 129 | Term.Alternatives[0].GeneratedSentences.Add(subtree);
|
---|
| 130 | } else {
|
---|
| 131 | subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));
|
---|
| 132 |
|
---|
| 133 | Term.Alternatives[1].GeneratedSentences.Add(subtree);
|
---|
| 134 | }
|
---|
| 135 |
|
---|
| 136 | return subtree;
|
---|
| 137 | }
|
---|
| 138 |
|
---|
| 139 | private SymbolString MemoizeSubtreeFactor(Stack<TerminalSymbol> parseStack) {
|
---|
| 140 | SymbolString subtree = new SymbolString(MemoizeSubtreeVar(parseStack));
|
---|
| 141 |
|
---|
| 142 | Factor.Alternatives[0].GeneratedSentences.Add(subtree);
|
---|
| 143 | return subtree;
|
---|
| 144 | }
|
---|
| 145 |
|
---|
| 146 | private SymbolString MemoizeSubtreeVar(Stack<TerminalSymbol> parseStack) {
|
---|
| 147 | SymbolString subtree = new SymbolString(parseStack.Pop().ToEnumerable());
|
---|
| 148 |
|
---|
| 149 | // ... not really
|
---|
| 150 | //Var.Alternatives[0].GeneratedSentences.Add(subtree);
|
---|
| 151 | return subtree;
|
---|
| 152 | }
|
---|
| 153 |
|
---|
| 154 |
|
---|
| 155 | #endregion
|
---|
| 156 | */
|
---|
[15724] | 157 |
|
---|
[15723] | 158 | #region Hashing
|
---|
[15714] | 159 | public int CalcHashCode(SymbolString sentence) {
|
---|
| 160 | Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
|
---|
| 161 | Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
|
---|
| 162 |
|
---|
| 163 | Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
|
---|
| 164 |
|
---|
| 165 | int[] subtreeHashes = GetSubtreeHashes(parseStack.Pop(), parseStack);
|
---|
| 166 | return AggregateHashes(subtreeHashes);
|
---|
| 167 | }
|
---|
| 168 |
|
---|
| 169 | private int[] GetSubtreeHashes(TerminalSymbol currentSymbol, Stack<TerminalSymbol> parseStack) {
|
---|
| 170 | List<int> childHashes = null;
|
---|
| 171 |
|
---|
| 172 | if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
|
---|
| 173 | childHashes = currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToList();
|
---|
| 174 |
|
---|
[15722] | 175 | } else if (ReferenceEquals(currentSymbol, Multiplication)) {
|
---|
| 176 | // MULTIPLICATION
|
---|
[15714] | 177 | childHashes = new List<int>();
|
---|
| 178 |
|
---|
| 179 | // First subtree
|
---|
| 180 | TerminalSymbol firstSubtreeRoot = parseStack.Pop();
|
---|
| 181 | int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
|
---|
| 182 |
|
---|
| 183 | if (ReferenceEquals(firstSubtreeRoot, Multiplication)) {
|
---|
| 184 | childHashes.AddRange(firstSubtreeHashes);
|
---|
| 185 | } else {
|
---|
| 186 | childHashes.Add(AggregateHashes(firstSubtreeHashes));
|
---|
| 187 | }
|
---|
| 188 |
|
---|
| 189 | // Second subtree
|
---|
| 190 | TerminalSymbol secondSubtreeRoot = parseStack.Pop();
|
---|
| 191 | int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
|
---|
| 192 |
|
---|
| 193 | if (ReferenceEquals(secondSubtreeRoot, Multiplication)) {
|
---|
| 194 | childHashes.AddRange(secondSubtreeHashes);
|
---|
| 195 | } else {
|
---|
| 196 | childHashes.Add(AggregateHashes(secondSubtreeHashes));
|
---|
| 197 | }
|
---|
| 198 |
|
---|
| 199 | // Sort due to commutativity
|
---|
| 200 | childHashes.Sort();
|
---|
| 201 |
|
---|
| 202 |
|
---|
| 203 |
|
---|
[15722] | 204 | } else if (ReferenceEquals(currentSymbol, Addition)) {
|
---|
| 205 | // ADDITION
|
---|
[15714] | 206 | HashSet<int> uniqueChildHashes = new HashSet<int>();
|
---|
| 207 |
|
---|
| 208 | TerminalSymbol firstSubtreeRoot = parseStack.Pop();
|
---|
| 209 | int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
|
---|
| 210 |
|
---|
| 211 | // First subtree
|
---|
| 212 | if (ReferenceEquals(firstSubtreeRoot, Addition)) {
|
---|
| 213 | uniqueChildHashes.UnionWith(firstSubtreeHashes);
|
---|
| 214 | } else {
|
---|
| 215 | uniqueChildHashes.Add(AggregateHashes(firstSubtreeHashes));
|
---|
| 216 | }
|
---|
| 217 |
|
---|
| 218 | // Second subtree
|
---|
| 219 | TerminalSymbol secondSubtreeRoot = parseStack.Pop();
|
---|
| 220 | int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
|
---|
| 221 |
|
---|
| 222 | if (ReferenceEquals(secondSubtreeRoot, Addition)) {
|
---|
| 223 | uniqueChildHashes.UnionWith(secondSubtreeHashes);
|
---|
| 224 | } else {
|
---|
| 225 | uniqueChildHashes.Add(AggregateHashes(secondSubtreeHashes));
|
---|
| 226 | }
|
---|
| 227 |
|
---|
| 228 | childHashes = uniqueChildHashes.ToList();
|
---|
| 229 | childHashes.Sort();
|
---|
| 230 | }
|
---|
| 231 |
|
---|
| 232 | Debug.Assert(childHashes != null, "Sentence not hashable!");
|
---|
| 233 | return childHashes.ToArray();
|
---|
| 234 | }
|
---|
| 235 |
|
---|
| 236 | private int AggregateHashes(IEnumerable<int> hashes) {
|
---|
| 237 | return hashes.Aggregate(0, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
|
---|
| 238 | }
|
---|
[15722] | 239 | #endregion
|
---|
| 240 |
|
---|
| 241 | #region Parse to SymbolicExpressionTree
|
---|
| 242 | public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
|
---|
| 243 | Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
|
---|
| 244 | Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
|
---|
| 245 |
|
---|
| 246 | symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
|
---|
| 247 |
|
---|
| 248 | var rootNode = rootSy.CreateTreeNode();
|
---|
| 249 | var startNode = startSy.CreateTreeNode();
|
---|
| 250 | rootNode.AddSubtree(startNode);
|
---|
| 251 |
|
---|
| 252 | Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
|
---|
| 253 | startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
|
---|
| 254 |
|
---|
| 255 | return new SymbolicExpressionTree(rootNode);
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 | public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
|
---|
| 259 | TerminalSymbol currentSymbol = parseStack.Pop();
|
---|
| 260 |
|
---|
| 261 | ISymbolicExpressionTreeNode parsedSubTree = null;
|
---|
| 262 |
|
---|
| 263 | if (ReferenceEquals(currentSymbol, Addition)) {
|
---|
| 264 | parsedSubTree = addSy.CreateTreeNode();
|
---|
| 265 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
|
---|
| 266 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
|
---|
| 267 |
|
---|
| 268 | } else if (ReferenceEquals(currentSymbol, Multiplication)) {
|
---|
| 269 | parsedSubTree = mulSy.CreateTreeNode();
|
---|
| 270 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
|
---|
| 271 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
|
---|
| 272 |
|
---|
| 273 | } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
|
---|
| 274 | VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
|
---|
| 275 | varNode.Weight = 1.0;
|
---|
| 276 | varNode.VariableName = currentSymbol.StringRepresentation;
|
---|
| 277 | parsedSubTree = varNode;
|
---|
| 278 | }
|
---|
| 279 |
|
---|
| 280 | Debug.Assert(parsedSubTree != null);
|
---|
| 281 | return parsedSubTree;
|
---|
| 282 | }
|
---|
| 283 | #endregion
|
---|
[15724] | 284 |
|
---|
| 285 | #region Parse to Infix string
|
---|
| 286 |
|
---|
| 287 | public SymbolString PostfixToInfixParser(SymbolString phrase) {
|
---|
| 288 | Stack<Symbol> parseStack = new Stack<Symbol>(phrase);
|
---|
| 289 |
|
---|
| 290 | return PostfixToInfixSubtreeParser(parseStack);
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | private SymbolString PostfixToInfixSubtreeParser(Stack<Symbol> parseStack) {
|
---|
| 294 | Symbol head = parseStack.Pop();
|
---|
| 295 |
|
---|
| 296 | SymbolString result = new SymbolString();
|
---|
| 297 |
|
---|
| 298 | if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
|
---|
| 299 | result.AddRange(PostfixToInfixSubtreeParser(parseStack));
|
---|
| 300 | result.Add(head);
|
---|
| 301 | result.AddRange(PostfixToInfixSubtreeParser(parseStack));
|
---|
| 302 | } else {
|
---|
| 303 | result.Add(head);
|
---|
| 304 | }
|
---|
| 305 | return result;
|
---|
| 306 | }
|
---|
| 307 |
|
---|
| 308 | #endregion
|
---|
[15712] | 309 | }
|
---|
| 310 | }
|
---|