[15725] | 1 | using System;
|
---|
[15714] | 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Diagnostics;
|
---|
| 4 | using System.Linq;
|
---|
| 5 | using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
|
---|
| 6 | using HeuristicLab.Common;
|
---|
[15722] | 7 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
| 8 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
|
---|
[15714] | 9 |
|
---|
| 10 | namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
|
---|
[15712] | 11 | public class Grammar {
|
---|
| 12 |
|
---|
| 13 | public Symbol StartSymbol;
|
---|
| 14 |
|
---|
[15714] | 15 | #region Symbols
|
---|
[15722] | 16 |
|
---|
[15714] | 17 | public VariableSymbol Var;
|
---|
| 18 |
|
---|
| 19 | public NonterminalSymbol Expr;
|
---|
| 20 | public NonterminalSymbol Term;
|
---|
| 21 | public NonterminalSymbol Factor;
|
---|
[15772] | 22 | public NonterminalSymbol LogFactor;
|
---|
| 23 | public NonterminalSymbol ExpFactor;
|
---|
| 24 | public NonterminalSymbol SinFactor;
|
---|
[15714] | 25 |
|
---|
[15772] | 26 | public NonterminalSymbol SimpleExpr;
|
---|
| 27 | public NonterminalSymbol SimpleTerm;
|
---|
| 28 |
|
---|
[15714] | 29 | public TerminalSymbol Addition;
|
---|
| 30 | public TerminalSymbol Multiplication;
|
---|
[15772] | 31 | public TerminalSymbol Log;
|
---|
| 32 | public TerminalSymbol Exp;
|
---|
| 33 | public TerminalSymbol Sin;
|
---|
[15722] | 34 |
|
---|
[15772] | 35 | // For infix notation
|
---|
| 36 | public TerminalSymbol OpeningBracket;
|
---|
| 37 | public TerminalSymbol ClosingBracket;
|
---|
| 38 |
|
---|
[15714] | 39 | #endregion
|
---|
| 40 |
|
---|
| 41 |
|
---|
[15722] | 42 | #region HL Symbols for Parsing ExpressionTrees
|
---|
| 43 |
|
---|
| 44 | private TypeCoherentExpressionGrammar symbolicExpressionGrammar;
|
---|
| 45 |
|
---|
| 46 | private ISymbol constSy;
|
---|
| 47 | private ISymbol varSy;
|
---|
| 48 |
|
---|
| 49 | private ISymbol addSy;
|
---|
| 50 | private ISymbol mulSy;
|
---|
| 51 | private ISymbol logSy;
|
---|
| 52 | private ISymbol expSy;
|
---|
| 53 | private ISymbol divSy;
|
---|
[15772] | 54 | private ISymbol sinSy;
|
---|
[15722] | 55 |
|
---|
| 56 | private ISymbol rootSy;
|
---|
| 57 | private ISymbol startSy;
|
---|
| 58 |
|
---|
| 59 | #endregion
|
---|
| 60 |
|
---|
[15712] | 61 | public Grammar(string[] variables) {
|
---|
| 62 | #region Define Symbols
|
---|
[15714] | 63 | Var = new VariableSymbol("var", variables);
|
---|
[15712] | 64 |
|
---|
[15714] | 65 | Expr = new NonterminalSymbol("Expr");
|
---|
| 66 | Term = new NonterminalSymbol("Term");
|
---|
| 67 | Factor = new NonterminalSymbol("Factor");
|
---|
[15772] | 68 | LogFactor = new NonterminalSymbol("LogFactor");
|
---|
| 69 | ExpFactor = new NonterminalSymbol("ExpFactor");
|
---|
| 70 | SinFactor = new NonterminalSymbol("SinFactor");
|
---|
[15712] | 71 |
|
---|
[15772] | 72 | SimpleExpr = new NonterminalSymbol("SimpleExpr");
|
---|
| 73 | SimpleTerm = new NonterminalSymbol("SimpleTerm");
|
---|
| 74 |
|
---|
[15714] | 75 | Addition = new TerminalSymbol("+");
|
---|
| 76 | Multiplication = new TerminalSymbol("*");
|
---|
[15772] | 77 | Log = new TerminalSymbol("log");
|
---|
| 78 | Exp = new TerminalSymbol("exp");
|
---|
| 79 | Sin = new TerminalSymbol("sin");
|
---|
| 80 |
|
---|
| 81 | OpeningBracket = new TerminalSymbol("(");
|
---|
| 82 | ClosingBracket = new TerminalSymbol(")");
|
---|
[15712] | 83 | #endregion
|
---|
| 84 |
|
---|
| 85 |
|
---|
[15723] | 86 | #region Production rules
|
---|
| 87 | // order of production is important, since they are accessed via index
|
---|
| 88 | // in memoization.
|
---|
[15714] | 89 | StartSymbol = Expr;
|
---|
[15712] | 90 |
|
---|
[15714] | 91 | Expr.AddProduction(Term, Expr, Addition);
|
---|
| 92 | Expr.AddProduction(Term);
|
---|
[15712] | 93 |
|
---|
[15714] | 94 | Term.AddProduction(Factor, Term, Multiplication);
|
---|
| 95 | Term.AddProduction(Factor);
|
---|
[15712] | 96 |
|
---|
[15714] | 97 | Factor.AddProduction(Var);
|
---|
[15772] | 98 | Factor.AddProduction(LogFactor);
|
---|
| 99 | Factor.AddProduction(ExpFactor);
|
---|
| 100 | Factor.AddProduction(SinFactor);
|
---|
| 101 |
|
---|
| 102 | LogFactor.AddProduction(SimpleExpr, Log);
|
---|
| 103 | ExpFactor.AddProduction(SimpleTerm, Exp);
|
---|
| 104 | SinFactor.AddProduction(SimpleExpr, Sin);
|
---|
| 105 |
|
---|
| 106 | SimpleExpr.AddProduction(SimpleTerm, SimpleExpr, Addition);
|
---|
| 107 | SimpleExpr.AddProduction(SimpleTerm);
|
---|
| 108 |
|
---|
| 109 | SimpleTerm.AddProduction(Var, SimpleTerm, Multiplication);
|
---|
| 110 | SimpleTerm.AddProduction(Var);
|
---|
[15712] | 111 | #endregion
|
---|
[15722] | 112 |
|
---|
| 113 | #region Parsing to SymbolicExpressionTree
|
---|
| 114 | symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
|
---|
| 115 | symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
|
---|
| 116 |
|
---|
| 117 | constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
|
---|
| 118 | varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
|
---|
[15772] | 119 | addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
|
---|
| 120 | mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
|
---|
| 121 | logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
|
---|
| 122 | expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
|
---|
| 123 | divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
|
---|
| 124 | sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
|
---|
[15722] | 125 |
|
---|
[15772] | 126 | rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
|
---|
| 127 | startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
|
---|
[15722] | 128 |
|
---|
| 129 | #endregion
|
---|
[15712] | 130 | }
|
---|
[15714] | 131 |
|
---|
[15723] | 132 | #region Hashing
|
---|
[15714] | 133 | public int CalcHashCode(SymbolString sentence) {
|
---|
| 134 | Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
|
---|
[15734] | 135 | // Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
|
---|
[15714] | 136 |
|
---|
[15734] | 137 | Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
|
---|
[15714] | 138 |
|
---|
[15734] | 139 | Symbol peek = parseStack.Peek();
|
---|
[15725] | 140 | int[] subtreeHashes = GetSubtreeHashes(parseStack);
|
---|
| 141 | return AggregateHashes(peek, subtreeHashes);
|
---|
[15714] | 142 | }
|
---|
| 143 |
|
---|
[15734] | 144 | private int[] GetSubtreeHashes(Stack<Symbol> parseStack) {
|
---|
| 145 | Symbol currentSymbol = parseStack.Pop();
|
---|
[15714] | 146 |
|
---|
[15725] | 147 | // MULTIPLICATION
|
---|
| 148 | if (ReferenceEquals(currentSymbol, Multiplication)) {
|
---|
| 149 | List<int> childHashes = new List<int>();
|
---|
[15714] | 150 |
|
---|
| 151 | // First subtree
|
---|
[15725] | 152 | if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
|
---|
| 153 | childHashes.AddRange(GetSubtreeHashes(parseStack));
|
---|
[15714] | 154 | } else {
|
---|
[15725] | 155 | childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
|
---|
[15714] | 156 | }
|
---|
| 157 | // Second subtree
|
---|
[15725] | 158 | if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
|
---|
| 159 | childHashes.AddRange(GetSubtreeHashes(parseStack));
|
---|
[15714] | 160 | } else {
|
---|
[15725] | 161 | childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
|
---|
[15714] | 162 | }
|
---|
| 163 |
|
---|
| 164 | // Sort due to commutativity
|
---|
| 165 | childHashes.Sort();
|
---|
[15725] | 166 | return childHashes.ToArray();
|
---|
| 167 | }
|
---|
[15714] | 168 |
|
---|
[15725] | 169 | // ADDITION
|
---|
| 170 | if (ReferenceEquals(currentSymbol, Addition)) {
|
---|
[15714] | 171 | HashSet<int> uniqueChildHashes = new HashSet<int>();
|
---|
| 172 |
|
---|
| 173 | // First subtree
|
---|
[15725] | 174 | if (ReferenceEquals(parseStack.Peek(), Addition)) {
|
---|
| 175 | uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
|
---|
[15714] | 176 | } else {
|
---|
[15725] | 177 | var peek = parseStack.Peek();
|
---|
| 178 | uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
|
---|
[15714] | 179 | }
|
---|
| 180 | // Second subtree
|
---|
[15725] | 181 | if (ReferenceEquals(parseStack.Peek(), Addition)) {
|
---|
| 182 | uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
|
---|
[15714] | 183 | } else {
|
---|
[15725] | 184 | var peek = parseStack.Peek();
|
---|
| 185 | uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
|
---|
[15714] | 186 | }
|
---|
| 187 |
|
---|
[15725] | 188 | var result = uniqueChildHashes.ToList();
|
---|
| 189 | result.Sort();
|
---|
| 190 | return result.ToArray();
|
---|
[15714] | 191 | }
|
---|
[15734] | 192 |
|
---|
[15772] | 193 | // LOG, EXP, SIN
|
---|
| 194 | if (ReferenceEquals(currentSymbol, Log) || ReferenceEquals(currentSymbol, Exp) ||
|
---|
| 195 | ReferenceEquals(currentSymbol, Sin)) {
|
---|
| 196 | return AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)).ToEnumerable().ToArray();
|
---|
| 197 | }
|
---|
| 198 |
|
---|
[15746] | 199 | // var or nonterminal symbol
|
---|
[15734] | 200 | return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
|
---|
[15714] | 201 | }
|
---|
| 202 |
|
---|
[15734] | 203 | private int AggregateHashes(Symbol operatorSym, IEnumerable<int> hashes) {
|
---|
[15725] | 204 | var hashesArray = hashes.ToArray();
|
---|
[15772] | 205 |
|
---|
| 206 | int start;
|
---|
| 207 | if (ReferenceEquals(operatorSym, Addition) && hashesArray.Count() <= 1) {
|
---|
| 208 | start = 0;
|
---|
| 209 | } else {
|
---|
| 210 | start = operatorSym.StringRepresentation.GetHashCode();
|
---|
| 211 | }
|
---|
| 212 |
|
---|
[15725] | 213 | return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
|
---|
[15714] | 214 | }
|
---|
[15722] | 215 | #endregion
|
---|
| 216 |
|
---|
| 217 | #region Parse to SymbolicExpressionTree
|
---|
| 218 | public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
|
---|
| 219 | Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
|
---|
| 220 | Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
|
---|
| 221 |
|
---|
| 222 | symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
|
---|
| 223 |
|
---|
| 224 | var rootNode = rootSy.CreateTreeNode();
|
---|
| 225 | var startNode = startSy.CreateTreeNode();
|
---|
| 226 | rootNode.AddSubtree(startNode);
|
---|
| 227 |
|
---|
| 228 | Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
|
---|
| 229 | startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
|
---|
| 230 |
|
---|
| 231 | return new SymbolicExpressionTree(rootNode);
|
---|
| 232 | }
|
---|
| 233 |
|
---|
| 234 | public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
|
---|
| 235 | TerminalSymbol currentSymbol = parseStack.Pop();
|
---|
| 236 |
|
---|
| 237 | ISymbolicExpressionTreeNode parsedSubTree = null;
|
---|
| 238 |
|
---|
| 239 | if (ReferenceEquals(currentSymbol, Addition)) {
|
---|
| 240 | parsedSubTree = addSy.CreateTreeNode();
|
---|
| 241 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
|
---|
| 242 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
|
---|
| 243 |
|
---|
| 244 | } else if (ReferenceEquals(currentSymbol, Multiplication)) {
|
---|
| 245 | parsedSubTree = mulSy.CreateTreeNode();
|
---|
| 246 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
|
---|
| 247 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
|
---|
| 248 |
|
---|
[15772] | 249 | } else if (ReferenceEquals(currentSymbol, Log)) {
|
---|
| 250 | parsedSubTree = logSy.CreateTreeNode();
|
---|
| 251 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
|
---|
| 252 |
|
---|
| 253 | } else if (ReferenceEquals(currentSymbol, Exp)) {
|
---|
| 254 | parsedSubTree = expSy.CreateTreeNode();
|
---|
| 255 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
|
---|
| 256 |
|
---|
| 257 | } else if (ReferenceEquals(currentSymbol, Sin)) {
|
---|
| 258 | parsedSubTree = sinSy.CreateTreeNode();
|
---|
| 259 | parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
|
---|
| 260 |
|
---|
[15722] | 261 | } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
|
---|
| 262 | VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
|
---|
| 263 | varNode.Weight = 1.0;
|
---|
| 264 | varNode.VariableName = currentSymbol.StringRepresentation;
|
---|
| 265 | parsedSubTree = varNode;
|
---|
| 266 | }
|
---|
| 267 |
|
---|
| 268 | Debug.Assert(parsedSubTree != null);
|
---|
| 269 | return parsedSubTree;
|
---|
| 270 | }
|
---|
| 271 | #endregion
|
---|
[15724] | 272 |
|
---|
| 273 | #region Parse to Infix string
|
---|
| 274 |
|
---|
| 275 | public SymbolString PostfixToInfixParser(SymbolString phrase) {
|
---|
| 276 | Stack<Symbol> parseStack = new Stack<Symbol>(phrase);
|
---|
| 277 |
|
---|
| 278 | return PostfixToInfixSubtreeParser(parseStack);
|
---|
| 279 | }
|
---|
| 280 |
|
---|
| 281 | private SymbolString PostfixToInfixSubtreeParser(Stack<Symbol> parseStack) {
|
---|
| 282 | Symbol head = parseStack.Pop();
|
---|
| 283 |
|
---|
| 284 | SymbolString result = new SymbolString();
|
---|
| 285 |
|
---|
| 286 | if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
|
---|
[15725] | 287 | // right part
|
---|
| 288 | SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack);
|
---|
| 289 | SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack);
|
---|
| 290 |
|
---|
| 291 | result.AddRange(leftPart);
|
---|
[15724] | 292 | result.Add(head);
|
---|
[15725] | 293 | result.AddRange(rightPart);
|
---|
[15772] | 294 |
|
---|
| 295 | } else if (ReferenceEquals(head, Log) || ReferenceEquals(head, Exp) || ReferenceEquals(head, Sin)) {
|
---|
| 296 | result.Add(head);
|
---|
| 297 | result.Add(OpeningBracket);
|
---|
| 298 | result.AddRange(PostfixToInfixSubtreeParser(parseStack));
|
---|
| 299 | result.Add(ClosingBracket);
|
---|
| 300 |
|
---|
[15724] | 301 | } else {
|
---|
| 302 | result.Add(head);
|
---|
| 303 | }
|
---|
| 304 | return result;
|
---|
| 305 | }
|
---|
| 306 |
|
---|
| 307 | #endregion
|
---|
[15712] | 308 | }
|
---|
| 309 | }
|
---|