- Timestamp:
- 12/27/21 09:39:42 (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/InfixExpressionParser.cs
r18132 r18169 40 40 /// 41 41 /// S = Expr EOF 42 /// Expr = ['-' | '+']Term { '+' Term | '-' Term }42 /// Expr = Term { '+' Term | '-' Term } 43 43 /// Term = Fact { '*' Fact | '/' Fact } 44 44 /// Fact = SimpleFact [ '^' SimpleFact ] … … 49 49 /// | VarExpr 50 50 /// | number 51 /// | ['+' | '-'] SimpleFact 51 52 /// ArgList = Expr { ',' Expr } 52 53 /// VarExpr = varId OptFactorPart 53 /// OptFactorPart = [ ('=' varVal | '[' ['+' | '-' ] number {',' ['+' | '-' ]number } ']' ) ]54 /// OptFactorPart = [ ('=' varVal | '[' ['+' | '-' ] number {',' ['+' | '-' ] number } ']' ) ] 54 55 /// varId = ident | ' ident ' | " ident " 55 56 /// varVal = ident | ' ident ' | " ident " … … 83 84 84 85 private Number number = new Number(); 85 private Constant constant = new Constant();86 private Constant minusOne = new Constant() { Value = -1 }; 86 87 private Variable variable = new Variable(); 87 88 private BinaryFactorVariable binaryFactorVar = new BinaryFactorVariable(); … … 162 163 int pos = 0; 163 164 while (true) { 164 while (pos < str.Length && Char.IsWhiteSpace(str[pos])) pos++;165 while (pos < str.Length && char.IsWhiteSpace(str[pos])) pos++; 165 166 if (pos >= str.Length) { 166 167 yield return new Token { TokenType = TokenType.End, strVal = "" }; … … 168 169 } 169 170 if (char.IsDigit(str[pos])) { 170 // read number (=> read until white space or o perator or comma)171 // read number (=> read until white space or other symbol) 171 172 var sb = new StringBuilder(); 172 173 sb.Append(str[pos]); … … 269 270 } else if (str[pos] == '<') { 270 271 pos++; 271 yield return new Token { TokenType = TokenType.LeftAngleBracket, strVal = "<"};272 yield return new Token { TokenType = TokenType.LeftAngleBracket, strVal = "<" }; 272 273 } else if (str[pos] == '>') { 273 274 pos++; 274 yield return new Token { TokenType = TokenType.RightAngleBracket, strVal = ">"};275 yield return new Token { TokenType = TokenType.RightAngleBracket, strVal = ">" }; 275 276 } else { 276 277 throw new ArgumentException("Invalid character: " + str[pos]); … … 289 290 } 290 291 291 /// Expr = ['-' | '+']Term { '+' Term | '-' Term }292 /// Expr = Term { '+' Term | '-' Term } 292 293 private ISymbolicExpressionTreeNode ParseExpr(Queue<Token> tokens) { 294 // build tree from bottom to top and left to right 295 // a + b - c => ((a + b) - c) 296 // a - b - c => ((a - b) - c) 297 // and then flatten as far as possible 298 var left = ParseTerm(tokens); 299 293 300 var next = tokens.Peek(); 294 var posTerms = new List<ISymbolicExpressionTreeNode>();295 var negTerms = new List<ISymbolicExpressionTreeNode>();296 bool negateFirstTerm = false;297 if (next.TokenType == TokenType.Operator && (next.strVal == "+" || next.strVal == "-")) {298 tokens.Dequeue();299 if (next.strVal == "-")300 negateFirstTerm = true;301 }302 var t = ParseTerm(tokens);303 if (negateFirstTerm) negTerms.Add(t);304 else posTerms.Add(t);305 306 next = tokens.Peek();307 301 while (next.strVal == "+" || next.strVal == "-") { 308 302 switch (next.strVal) { 309 303 case "+": { 310 304 tokens.Dequeue(); 311 var term = ParseTerm(tokens); 312 posTerms.Add(term); 305 var right = ParseTerm(tokens); 306 var op = GetSymbol("+").CreateTreeNode(); 307 op.AddSubtree(left); 308 op.AddSubtree(right); 309 left = op; 313 310 break; 314 311 } 315 312 case "-": { 316 313 tokens.Dequeue(); 317 var term = ParseTerm(tokens); 318 negTerms.Add(term); 314 var right = ParseTerm(tokens); 315 var op = GetSymbol("-").CreateTreeNode(); 316 op.AddSubtree(left); 317 op.AddSubtree(right); 318 left = op; 319 319 break; 320 320 } … … 323 323 } 324 324 325 var sum = GetSymbol("+").CreateTreeNode(); 326 foreach (var posTerm in posTerms) sum.AddSubtree(posTerm); 327 if (negTerms.Any()) { 328 if (negTerms.Count == 1) { 329 var sub = GetSymbol("-").CreateTreeNode(); 330 sub.AddSubtree(negTerms.Single()); 331 sum.AddSubtree(sub); 332 } else { 333 var sumNeg = GetSymbol("+").CreateTreeNode(); 334 foreach (var negTerm in negTerms) sumNeg.AddSubtree(negTerm); 335 336 var constNode = (NumberTreeNode)number.CreateTreeNode(); 337 constNode.Value = -1.0; 338 var prod = GetSymbol("*").CreateTreeNode(); 339 prod.AddSubtree(constNode); 340 prod.AddSubtree(sumNeg); 341 342 sum.AddSubtree(prod); 343 } 344 } 345 if (sum.SubtreeCount == 1) return sum.Subtrees.First(); 346 else return sum; 325 FoldLeftRecursive(left); 326 return left; 347 327 } 348 328 … … 355 335 /// Term = Fact { '*' Fact | '/' Fact } 356 336 private ISymbolicExpressionTreeNode ParseTerm(Queue<Token> tokens) { 357 var factors = new List<ISymbolicExpressionTreeNode>(); 358 var firstFactor = ParseFact(tokens); 359 factors.Add(firstFactor); 337 // build tree from bottom to top and left to right 338 // a / b * c => ((a / b) * c) 339 // a / b / c => ((a / b) / c) 340 // and then flatten as far as possible 341 342 var left = ParseFact(tokens); 360 343 361 344 var next = tokens.Peek(); … … 364 347 case "*": { 365 348 tokens.Dequeue(); 366 var fact = ParseFact(tokens); 367 factors.Add(fact); 349 var right = ParseFact(tokens); 350 351 var op = GetSymbol("*").CreateTreeNode(); 352 op.AddSubtree(left); 353 op.AddSubtree(right); 354 left = op; 368 355 break; 369 356 } 370 357 case "/": { 371 358 tokens.Dequeue(); 372 var invFact = ParseFact(tokens); 373 var divNode = GetSymbol("/").CreateTreeNode(); // 1/x 374 divNode.AddSubtree(invFact); 375 factors.Add(divNode); 359 var right = ParseFact(tokens); 360 var op = GetSymbol("/").CreateTreeNode(); 361 op.AddSubtree(left); 362 op.AddSubtree(right); 363 left = op; 376 364 break; 377 365 } … … 380 368 next = tokens.Peek(); 381 369 } 382 if (factors.Count == 1) return factors.First(); 383 else { 384 var prod = GetSymbol("*").CreateTreeNode(); 385 foreach (var f in factors) prod.AddSubtree(f); 386 return prod; 370 // remove all nodes where the child op is the same as the parent op 371 // (a * b) * c) => (a * b * c) 372 // (a / b) / c) => (a / b / c) 373 374 FoldLeftRecursive(left); 375 return left; 376 } 377 378 private void FoldLeftRecursive(ISymbolicExpressionTreeNode parent) { 379 if (parent.SubtreeCount > 0) { 380 var child = parent.GetSubtree(0); 381 FoldLeftRecursive(child); 382 if(parent.Symbol == child.Symbol) { 383 parent.RemoveSubtree(0); 384 for(int i=0;i<child.SubtreeCount; i++) { 385 parent.InsertSubtree(i, child.GetSubtree(i)); 386 } 387 } 387 388 } 388 389 } … … 409 410 /// | funcId '(' ArgList ') 410 411 /// | VarExpr 412 /// | '<' 'num' [ '=' [ '+' | '-' ] number ] '>' 411 413 /// | number 414 /// | ['+' | '-' ] SimpleFact 412 415 /// ArgList = Expr { ',' Expr } 413 416 /// VarExpr = varId OptFactorPart … … 433 436 if (tokens.Peek().TokenType == TokenType.LeftPar) { 434 437 // function identifier or LAG 435 var funcId = idTok.strVal.ToUpperInvariant(); 436 437 var funcNode = GetSymbol(funcId).CreateTreeNode(); 438 var lPar = tokens.Dequeue(); 439 if (lPar.TokenType != TokenType.LeftPar) 440 throw new ArgumentException("expected ("); 441 442 // handle 'lag' specifically 443 if (funcNode.Symbol is LaggedVariable) { 444 var varId = tokens.Dequeue(); 445 if (varId.TokenType != TokenType.Identifier) throw new ArgumentException("Identifier expected. Format for lagged variables: \"lag(x, -1)\""); 446 var comma = tokens.Dequeue(); 447 if (comma.TokenType != TokenType.Comma) throw new ArgumentException("',' expected, Format for lagged variables: \"lag(x, -1)\""); 448 double sign = 1.0; 449 if (tokens.Peek().strVal == "+" || tokens.Peek().strVal == "-") { 450 // read sign 451 var signTok = tokens.Dequeue(); 452 if (signTok.strVal == "-") sign = -1.0; 453 } 454 var lagToken = tokens.Dequeue(); 455 if (lagToken.TokenType != TokenType.Number) throw new ArgumentException("Number expected, Format for lagged variables: \"lag(x, -1)\""); 456 if (!lagToken.doubleVal.IsAlmost(Math.Round(lagToken.doubleVal))) 457 throw new ArgumentException("Time lags must be integer values"); 458 var laggedVarNode = funcNode as LaggedVariableTreeNode; 459 laggedVarNode.VariableName = varId.strVal; 460 laggedVarNode.Lag = (int)Math.Round(sign * lagToken.doubleVal); 461 laggedVarNode.Weight = 1.0; 438 return ParseFunctionOrLaggedVariable(tokens, idTok); 439 } else { 440 return ParseVariable(tokens, idTok); 441 } 442 } else if (next.TokenType == TokenType.LeftAngleBracket) { 443 // '<' 'num' [ '=' ['+'|'-'] number ] '>' 444 return ParseNumber(tokens); 445 } else if(next.TokenType == TokenType.Operator && (next.strVal == "-" || next.strVal == "+")) { 446 // ['+' | '-' ] SimpleFact 447 if (tokens.Dequeue().strVal == "-") { 448 var arg = ParseSimpleFact(tokens); 449 if (arg is NumberTreeNode numNode) { 450 numNode.Value *= -1; 451 return numNode; 452 } else if (arg is ConstantTreeNode constNode) { 453 var constSy = new Constant { Value = -constNode.Value }; 454 return constSy.CreateTreeNode(); 455 } else if (arg is VariableTreeNode varNode) { 456 varNode.Weight *= -1; 457 return varNode; 462 458 } else { 463 // functions 464 var args = ParseArgList(tokens); 465 // check number of arguments 466 if (funcNode.Symbol.MinimumArity > args.Length || funcNode.Symbol.MaximumArity < args.Length) { 467 throw new ArgumentException(string.Format("Symbol {0} requires between {1} and {2} arguments.", funcId, 468 funcNode.Symbol.MinimumArity, funcNode.Symbol.MaximumArity)); 469 } 470 foreach (var arg in args) funcNode.AddSubtree(arg); 459 var mul = GetSymbol("*").CreateTreeNode(); 460 var neg = minusOne.CreateTreeNode(); 461 mul.AddSubtree(neg); 462 mul.AddSubtree(arg); 463 return mul; 471 464 } 472 473 var rPar = tokens.Dequeue();474 if (rPar.TokenType != TokenType.RightPar)475 throw new ArgumentException("expected )");476 477 478 return funcNode;479 465 } else { 480 // variable 481 if (tokens.Peek().TokenType == TokenType.Eq) { 482 // binary factor 483 tokens.Dequeue(); // skip Eq 484 var valTok = tokens.Dequeue(); 485 if (valTok.TokenType != TokenType.Identifier) throw new ArgumentException("expected identifier"); 486 var binFactorNode = (BinaryFactorVariableTreeNode)binaryFactorVar.CreateTreeNode(); 487 binFactorNode.Weight = 1.0; 488 binFactorNode.VariableName = idTok.strVal; 489 binFactorNode.VariableValue = valTok.strVal; 490 return binFactorNode; 491 } else if (tokens.Peek().TokenType == TokenType.LeftBracket) { 492 // factor variable 493 var factorVariableNode = (FactorVariableTreeNode)factorVar.CreateTreeNode(); 494 factorVariableNode.VariableName = idTok.strVal; 495 496 tokens.Dequeue(); // skip [ 497 var weights = new List<double>(); 498 // at least one weight is necessary 499 var sign = 1.0; 500 if (tokens.Peek().TokenType == TokenType.Operator) { 501 var opToken = tokens.Dequeue(); 502 if (opToken.strVal == "+") sign = 1.0; 503 else if (opToken.strVal == "-") sign = -1.0; 504 else throw new ArgumentException(); 505 } 506 if (tokens.Peek().TokenType != TokenType.Number) throw new ArgumentException("number expected"); 507 var weightTok = tokens.Dequeue(); 508 weights.Add(sign * weightTok.doubleVal); 509 while (tokens.Peek().TokenType == TokenType.Comma) { 510 // skip comma 511 tokens.Dequeue(); 512 if (tokens.Peek().TokenType == TokenType.Operator) { 513 var opToken = tokens.Dequeue(); 514 if (opToken.strVal == "+") sign = 1.0; 515 else if (opToken.strVal == "-") sign = -1.0; 516 else throw new ArgumentException(); 517 } 518 weightTok = tokens.Dequeue(); 519 if (weightTok.TokenType != TokenType.Number) throw new ArgumentException("number expected"); 520 weights.Add(sign * weightTok.doubleVal); 521 } 522 var rightBracketToken = tokens.Dequeue(); 523 if (rightBracketToken.TokenType != TokenType.RightBracket) throw new ArgumentException("closing bracket ] expected"); 524 factorVariableNode.Weights = weights.ToArray(); 525 return factorVariableNode; 526 } else { 527 // variable 528 var varNode = (VariableTreeNode)variable.CreateTreeNode(); 529 varNode.Weight = 1.0; 530 varNode.VariableName = idTok.strVal; 531 return varNode; 532 } 533 } 534 } else if (next.TokenType == TokenType.LeftAngleBracket) { 535 Token numberTok = null; 536 var leftAngleBracket = tokens.Dequeue(); 537 if (leftAngleBracket.TokenType != TokenType.LeftAngleBracket) 538 throw new ArgumentException("opening bracket < expected"); 539 540 var idTok = tokens.Dequeue(); 541 if (idTok.TokenType != TokenType.Identifier || idTok.strVal.ToLower() != "num") 542 throw new ArgumentException("string 'num' expected"); 543 544 if (tokens.Peek().TokenType == TokenType.Eq) { 545 var equalTok = tokens.Dequeue(); 546 if (tokens.Peek().TokenType != TokenType.Number) 547 throw new ArgumentException("No value for number specified."); 548 549 numberTok = tokens.Dequeue(); 550 } 551 552 var rightAngleBracket = tokens.Dequeue(); 553 if (rightAngleBracket.TokenType != TokenType.RightAngleBracket) 554 throw new ArgumentException("closing bracket > expected"); 555 var numNode = (NumberTreeNode)number.CreateTreeNode(); 556 if (numberTok != null) numNode.Value = numberTok.doubleVal; 557 return numNode; 466 return ParseSimpleFact(tokens); 467 } 558 468 } else if (next.TokenType == TokenType.Number) { 469 // number 559 470 var numTok = tokens.Dequeue(); 560 var constSy = new Constant { Value = numTok.doubleVal};471 var constSy = new Constant { Value = numTok.doubleVal }; 561 472 return constSy.CreateTreeNode(); 562 /*var constNode = (ConstantTreeNode)constant.CreateTreeNode();563 constNode.Value = numTok.doubleVal;564 return constNode;*/565 473 } else { 566 474 throw new ArgumentException(string.Format("unexpected token in expression {0}", next.strVal)); 567 475 } 476 } 477 478 private ISymbolicExpressionTreeNode ParseNumber(Queue<Token> tokens) { 479 // we distinguish parameters and constants. The values of parameters can be changed. 480 // a parameter is written as '<' 'num' [ '=' ['+'|'-'] number ] '>' with an optional initialization 481 Token numberTok = null; 482 var leftAngleBracket = tokens.Dequeue(); 483 if (leftAngleBracket.TokenType != TokenType.LeftAngleBracket) 484 throw new ArgumentException("opening bracket < expected"); 485 486 var idTok = tokens.Dequeue(); 487 if (idTok.TokenType != TokenType.Identifier || idTok.strVal.ToLower() != "num") 488 throw new ArgumentException("string 'num' expected"); 489 490 var numNode = (NumberTreeNode)number.CreateTreeNode(); 491 492 if (tokens.Peek().TokenType == TokenType.Eq) { 493 tokens.Dequeue(); // skip "=" 494 var next = tokens.Peek(); 495 if (next.strVal != "+" && next.strVal != "-" && next.TokenType != TokenType.Number) 496 throw new ArgumentException("Expected '+', '-' or number."); 497 498 var sign = 1.0; 499 if (next.strVal == "+" || next.strVal == "-") { 500 if (tokens.Dequeue().strVal == "-") sign = -1.0; 501 } 502 if(tokens.Peek().TokenType != TokenType.Number) { 503 throw new ArgumentException("Expected number."); 504 } 505 numberTok = tokens.Dequeue(); 506 numNode.Value = sign * numberTok.doubleVal; 507 } 508 509 var rightAngleBracket = tokens.Dequeue(); 510 if (rightAngleBracket.TokenType != TokenType.RightAngleBracket) 511 throw new ArgumentException("closing bracket > expected"); 512 513 return numNode; 514 } 515 516 private ISymbolicExpressionTreeNode ParseVariable(Queue<Token> tokens, Token idTok) { 517 // variable 518 if (tokens.Peek().TokenType == TokenType.Eq) { 519 // binary factor 520 tokens.Dequeue(); // skip Eq 521 var valTok = tokens.Dequeue(); 522 if (valTok.TokenType != TokenType.Identifier) throw new ArgumentException("expected identifier"); 523 var binFactorNode = (BinaryFactorVariableTreeNode)binaryFactorVar.CreateTreeNode(); 524 binFactorNode.Weight = 1.0; 525 binFactorNode.VariableName = idTok.strVal; 526 binFactorNode.VariableValue = valTok.strVal; 527 return binFactorNode; 528 } else if (tokens.Peek().TokenType == TokenType.LeftBracket) { 529 // factor variable 530 var factorVariableNode = (FactorVariableTreeNode)factorVar.CreateTreeNode(); 531 factorVariableNode.VariableName = idTok.strVal; 532 533 tokens.Dequeue(); // skip [ 534 var weights = new List<double>(); 535 // at least one weight is necessary 536 var sign = 1.0; 537 if (tokens.Peek().TokenType == TokenType.Operator) { 538 var opToken = tokens.Dequeue(); 539 if (opToken.strVal == "+") sign = 1.0; 540 else if (opToken.strVal == "-") sign = -1.0; 541 else throw new ArgumentException(); 542 } 543 if (tokens.Peek().TokenType != TokenType.Number) throw new ArgumentException("number expected"); 544 var weightTok = tokens.Dequeue(); 545 weights.Add(sign * weightTok.doubleVal); 546 while (tokens.Peek().TokenType == TokenType.Comma) { 547 // skip comma 548 tokens.Dequeue(); 549 if (tokens.Peek().TokenType == TokenType.Operator) { 550 var opToken = tokens.Dequeue(); 551 if (opToken.strVal == "+") sign = 1.0; 552 else if (opToken.strVal == "-") sign = -1.0; 553 else throw new ArgumentException(); 554 } 555 weightTok = tokens.Dequeue(); 556 if (weightTok.TokenType != TokenType.Number) throw new ArgumentException("number expected"); 557 weights.Add(sign * weightTok.doubleVal); 558 } 559 var rightBracketToken = tokens.Dequeue(); 560 if (rightBracketToken.TokenType != TokenType.RightBracket) throw new ArgumentException("closing bracket ] expected"); 561 factorVariableNode.Weights = weights.ToArray(); 562 return factorVariableNode; 563 } else { 564 // variable 565 var varNode = (VariableTreeNode)variable.CreateTreeNode(); 566 varNode.Weight = 1.0; 567 varNode.VariableName = idTok.strVal; 568 return varNode; 569 } 570 } 571 572 private ISymbolicExpressionTreeNode ParseFunctionOrLaggedVariable(Queue<Token> tokens, Token idTok) { 573 var funcId = idTok.strVal.ToUpperInvariant(); 574 575 var funcNode = GetSymbol(funcId).CreateTreeNode(); 576 var lPar = tokens.Dequeue(); 577 if (lPar.TokenType != TokenType.LeftPar) 578 throw new ArgumentException("expected ("); 579 580 // handle 'lag' specifically 581 if (funcNode.Symbol is LaggedVariable) { 582 ParseLaggedVariable(tokens, funcNode); 583 } else { 584 // functions 585 var args = ParseArgList(tokens); 586 // check number of arguments 587 if (funcNode.Symbol.MinimumArity > args.Length || funcNode.Symbol.MaximumArity < args.Length) { 588 throw new ArgumentException(string.Format("Symbol {0} requires between {1} and {2} arguments.", funcId, 589 funcNode.Symbol.MinimumArity, funcNode.Symbol.MaximumArity)); 590 } 591 foreach (var arg in args) funcNode.AddSubtree(arg); 592 } 593 594 var rPar = tokens.Dequeue(); 595 if (rPar.TokenType != TokenType.RightPar) 596 throw new ArgumentException("expected )"); 597 598 599 return funcNode; 600 } 601 602 private static void ParseLaggedVariable(Queue<Token> tokens, ISymbolicExpressionTreeNode funcNode) { 603 var varId = tokens.Dequeue(); 604 if (varId.TokenType != TokenType.Identifier) throw new ArgumentException("Identifier expected. Format for lagged variables: \"lag(x, -1)\""); 605 var comma = tokens.Dequeue(); 606 if (comma.TokenType != TokenType.Comma) throw new ArgumentException("',' expected, Format for lagged variables: \"lag(x, -1)\""); 607 double sign = 1.0; 608 if (tokens.Peek().strVal == "+" || tokens.Peek().strVal == "-") { 609 // read sign 610 var signTok = tokens.Dequeue(); 611 if (signTok.strVal == "-") sign = -1.0; 612 } 613 var lagToken = tokens.Dequeue(); 614 if (lagToken.TokenType != TokenType.Number) throw new ArgumentException("Number expected, Format for lagged variables: \"lag(x, -1)\""); 615 if (!lagToken.doubleVal.IsAlmost(Math.Round(lagToken.doubleVal))) 616 throw new ArgumentException("Time lags must be integer values"); 617 var laggedVarNode = funcNode as LaggedVariableTreeNode; 618 laggedVarNode.VariableName = varId.strVal; 619 laggedVarNode.Lag = (int)Math.Round(sign * lagToken.doubleVal); 620 laggedVarNode.Weight = 1.0; 568 621 } 569 622
Note: See TracChangeset
for help on using the changeset viewer.