[15725] | 1 | using System;
|
---|
[15714] | 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Diagnostics;
|
---|
| 4 | using System.Linq;
|
---|
| 5 | using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
|
---|
| 6 | using HeuristicLab.Common;
|
---|
[15722] | 7 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
| 8 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
|
---|
[15714] | 9 |
|
---|
| 10 | namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
|
---|
[15712] | 11 | public class Grammar {
|
---|
| 12 |
|
---|
| 13 | public Symbol StartSymbol;
|
---|
| 14 |
|
---|
[15714] | 15 | #region Symbols
|
---|
[15722] | 16 |
|
---|
[15714] | 17 | public VariableSymbol Var;
|
---|
| 18 |
|
---|
| 19 | public NonterminalSymbol Expr;
|
---|
| 20 | public NonterminalSymbol Term;
|
---|
| 21 | public NonterminalSymbol Factor;
|
---|
| 22 |
|
---|
| 23 | public TerminalSymbol Addition;
|
---|
| 24 | public TerminalSymbol Multiplication;
|
---|
[15722] | 25 |
|
---|
[15714] | 26 | #endregion
|
---|
| 27 |
|
---|
| 28 |
|
---|
[15722] | 29 | #region HL Symbols for Parsing ExpressionTrees
|
---|
| 30 |
|
---|
| 31 | private TypeCoherentExpressionGrammar symbolicExpressionGrammar;
|
---|
| 32 |
|
---|
| 33 | private ISymbol constSy;
|
---|
| 34 | private ISymbol varSy;
|
---|
| 35 |
|
---|
| 36 | private ISymbol addSy;
|
---|
| 37 | private ISymbol mulSy;
|
---|
| 38 | private ISymbol logSy;
|
---|
| 39 | private ISymbol expSy;
|
---|
| 40 | private ISymbol divSy;
|
---|
| 41 |
|
---|
| 42 | private ISymbol rootSy;
|
---|
| 43 | private ISymbol startSy;
|
---|
| 44 |
|
---|
| 45 | #endregion
|
---|
| 46 |
|
---|
[15712] | 47 | public Grammar(string[] variables) {
|
---|
| 48 | #region Define Symbols
|
---|
[15714] | 49 | Var = new VariableSymbol("var", variables);
|
---|
[15712] | 50 |
|
---|
[15714] | 51 | Expr = new NonterminalSymbol("Expr");
|
---|
| 52 | Term = new NonterminalSymbol("Term");
|
---|
| 53 | Factor = new NonterminalSymbol("Factor");
|
---|
[15712] | 54 |
|
---|
[15714] | 55 | Addition = new TerminalSymbol("+");
|
---|
| 56 | Multiplication = new TerminalSymbol("*");
|
---|
[15712] | 57 | #endregion
|
---|
| 58 |
|
---|
| 59 |
|
---|
[15723] | 60 | #region Production rules
|
---|
| 61 | // order of production is important, since they are accessed via index
|
---|
| 62 | // in memoization.
|
---|
[15714] | 63 | StartSymbol = Expr;
|
---|
[15712] | 64 |
|
---|
[15714] | 65 | Expr.AddProduction(Term, Expr, Addition);
|
---|
| 66 | Expr.AddProduction(Term);
|
---|
[15712] | 67 |
|
---|
[15714] | 68 | Term.AddProduction(Factor, Term, Multiplication);
|
---|
| 69 | Term.AddProduction(Factor);
|
---|
[15712] | 70 |
|
---|
[15714] | 71 | Factor.AddProduction(Var);
|
---|
[15712] | 72 | #endregion
|
---|
[15722] | 73 |
|
---|
| 74 | #region Parsing to SymbolicExpressionTree
|
---|
| 75 | symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
|
---|
| 76 | symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
|
---|
| 77 |
|
---|
| 78 | constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
|
---|
| 79 | varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
|
---|
| 80 | addSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Addition>().First();
|
---|
| 81 | mulSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Multiplication>().First();
|
---|
| 82 | logSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Logarithm>().First();
|
---|
| 83 | expSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Exponential>().First();
|
---|
| 84 | divSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Division>().First();
|
---|
| 85 |
|
---|
| 86 | rootSy = symbolicExpressionGrammar.AllowedSymbols.OfType<ProgramRootSymbol>().First();
|
---|
| 87 | startSy = symbolicExpressionGrammar.AllowedSymbols.OfType<StartSymbol>().First();
|
---|
| 88 |
|
---|
| 89 | #endregion
|
---|
[15712] | 90 | }
|
---|
[15714] | 91 |
|
---|
[15723] | 92 | #region Hashing
|
---|
[15714] | 93 | public int CalcHashCode(SymbolString sentence) {
|
---|
| 94 | Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
|
---|
[15734] | 95 | // Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
|
---|
[15714] | 96 |
|
---|
[15734] | 97 | Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
|
---|
[15714] | 98 |
|
---|
[15734] | 99 | Symbol peek = parseStack.Peek();
|
---|
[15725] | 100 | int[] subtreeHashes = GetSubtreeHashes(parseStack);
|
---|
| 101 | return AggregateHashes(peek, subtreeHashes);
|
---|
[15714] | 102 | }
|
---|
| 103 |
|
---|
[15734] | 104 | private int[] GetSubtreeHashes(Stack<Symbol> parseStack) {
|
---|
| 105 | Symbol currentSymbol = parseStack.Pop();
|
---|
[15714] | 106 |
|
---|
[15725] | 107 | // VARIABLE
|
---|
[15734] | 108 | // if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
|
---|
| 109 | // return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
|
---|
| 110 | // }
|
---|
[15714] | 111 |
|
---|
[15725] | 112 | // MULTIPLICATION
|
---|
| 113 | if (ReferenceEquals(currentSymbol, Multiplication)) {
|
---|
| 114 | List<int> childHashes = new List<int>();
|
---|
[15714] | 115 |
|
---|
| 116 | // First subtree
|
---|
[15725] | 117 | if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
|
---|
| 118 | childHashes.AddRange(GetSubtreeHashes(parseStack));
|
---|
[15714] | 119 | } else {
|
---|
[15725] | 120 | childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
|
---|
[15714] | 121 | }
|
---|
| 122 | // Second subtree
|
---|
[15725] | 123 | if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
|
---|
| 124 | childHashes.AddRange(GetSubtreeHashes(parseStack));
|
---|
[15714] | 125 | } else {
|
---|
[15725] | 126 | childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
|
---|
[15714] | 127 | }
|
---|
| 128 |
|
---|
| 129 | // Sort due to commutativity
|
---|
| 130 | childHashes.Sort();
|
---|
[15725] | 131 | return childHashes.ToArray();
|
---|
| 132 | }
|
---|
[15714] | 133 |
|
---|
[15725] | 134 | // ADDITION
|
---|
| 135 | if (ReferenceEquals(currentSymbol, Addition)) {
|
---|
[15714] | 136 | HashSet<int> uniqueChildHashes = new HashSet<int>();
|
---|
| 137 |
|
---|
| 138 | // First subtree
|
---|
[15725] | 139 | if (ReferenceEquals(parseStack.Peek(), Addition)) {
|
---|
| 140 | uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
|
---|
[15714] | 141 | } else {
|
---|
[15725] | 142 | var peek = parseStack.Peek();
|
---|
| 143 | uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
|
---|
[15714] | 144 | }
|
---|
| 145 | // Second subtree
|
---|
[15725] | 146 | if (ReferenceEquals(parseStack.Peek(), Addition)) {
|
---|
| 147 | uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
|
---|
[15714] | 148 | } else {
|
---|
[15725] | 149 | var peek = parseStack.Peek();
|
---|
| 150 | uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
|
---|
[15714] | 151 | }
|
---|
| 152 |
|
---|
[15725] | 153 | var result = uniqueChildHashes.ToList();
|
---|
| 154 | result.Sort();
|
---|
| 155 | return result.ToArray();
|
---|
[15714] | 156 | }
|
---|
[15734] | 157 |
|
---|
[15746] | 158 | // var or nonterminal symbol
|
---|
[15734] | 159 | return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
|
---|
[15714] | 160 | }
|
---|
| 161 |
|
---|
[15734] | 162 | private int AggregateHashes(Symbol operatorSym, IEnumerable<int> hashes) {
|
---|
[15725] | 163 | // If multiple subtrees are "merged" (e.g. added, multiplied, etc.), consider the executed operation
|
---|
| 164 | var hashesArray = hashes.ToArray();
|
---|
[15734] | 165 | int start = hashesArray.Length > 1 ? operatorSym.StringRepresentation.GetHashCode() : 0;
|
---|
[15725] | 166 | return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
|
---|
[15714] | 167 | }
|
---|
[15722] | 168 | #endregion
|
---|
| 169 |
|
---|
| 170 | #region Parse to SymbolicExpressionTree
|
---|
| 171 | public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
|
---|
| 172 | Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
|
---|
| 173 | Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
|
---|
| 174 |
|
---|
| 175 | symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
|
---|
| 176 |
|
---|
| 177 | var rootNode = rootSy.CreateTreeNode();
|
---|
| 178 | var startNode = startSy.CreateTreeNode();
|
---|
| 179 | rootNode.AddSubtree(startNode);
|
---|
| 180 |
|
---|
| 181 | Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
|
---|
| 182 | startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
|
---|
| 183 |
|
---|
| 184 | return new SymbolicExpressionTree(rootNode);
|
---|
| 185 | }
|
---|
| 186 |
|
---|
| 187 | public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
|
---|
| 188 | TerminalSymbol currentSymbol = parseStack.Pop();
|
---|
| 189 |
|
---|
| 190 | ISymbolicExpressionTreeNode parsedSubTree = null;
|
---|
| 191 |
|
---|
| 192 | if (ReferenceEquals(currentSymbol, Addition)) {
|
---|
| 193 | parsedSubTree = addSy.CreateTreeNode();
|
---|
| 194 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
|
---|
| 195 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
|
---|
| 196 |
|
---|
| 197 | } else if (ReferenceEquals(currentSymbol, Multiplication)) {
|
---|
| 198 | parsedSubTree = mulSy.CreateTreeNode();
|
---|
| 199 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
|
---|
| 200 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
|
---|
| 201 |
|
---|
| 202 | } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
|
---|
| 203 | VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
|
---|
| 204 | varNode.Weight = 1.0;
|
---|
| 205 | varNode.VariableName = currentSymbol.StringRepresentation;
|
---|
| 206 | parsedSubTree = varNode;
|
---|
| 207 | }
|
---|
| 208 |
|
---|
| 209 | Debug.Assert(parsedSubTree != null);
|
---|
| 210 | return parsedSubTree;
|
---|
| 211 | }
|
---|
| 212 | #endregion
|
---|
[15724] | 213 |
|
---|
| 214 | #region Parse to Infix string
|
---|
| 215 |
|
---|
| 216 | public SymbolString PostfixToInfixParser(SymbolString phrase) {
|
---|
| 217 | Stack<Symbol> parseStack = new Stack<Symbol>(phrase);
|
---|
| 218 |
|
---|
| 219 | return PostfixToInfixSubtreeParser(parseStack);
|
---|
| 220 | }
|
---|
| 221 |
|
---|
| 222 | private SymbolString PostfixToInfixSubtreeParser(Stack<Symbol> parseStack) {
|
---|
| 223 | Symbol head = parseStack.Pop();
|
---|
| 224 |
|
---|
| 225 | SymbolString result = new SymbolString();
|
---|
| 226 |
|
---|
| 227 | if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
|
---|
[15725] | 228 | // right part
|
---|
| 229 | SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack);
|
---|
| 230 | SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack);
|
---|
| 231 |
|
---|
| 232 | result.AddRange(leftPart);
|
---|
[15724] | 233 | result.Add(head);
|
---|
[15725] | 234 | result.AddRange(rightPart);
|
---|
[15724] | 235 | } else {
|
---|
| 236 | result.Add(head);
|
---|
| 237 | }
|
---|
| 238 | return result;
|
---|
| 239 | }
|
---|
| 240 |
|
---|
| 241 | #endregion
|
---|
[15712] | 242 | }
|
---|
| 243 | }
|
---|