Changeset 14761 for branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/InfixExpressionParser.cs
- Timestamp:
- 03/18/17 12:17:13 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/InfixExpressionParser.cs
r14351 r14761 39 39 /// 40 40 /// 41 /// S = Expr EOF 42 /// Expr = ['-' | '+'] Term { '+' Term | '-' Term } 43 /// Term = Fact { '*' Fact | '/' Fact } 44 /// Fact = '(' Expr ')' | funcId '(' Expr ')' | VarExpr | number 45 /// VarExpr = varId [ '=' varVal] 46 /// varId = ident | ' ident ' | " ident " 47 /// varVal = ident | ' ident ' | " ident " 48 /// ident = '_' | letter { '_' | letter | digit } 41 /// S = Expr EOF 42 /// Expr = ['-' | '+'] Term { '+' Term | '-' Term } 43 /// Term = Fact { '*' Fact | '/' Fact } 44 /// Fact = '(' Expr ')' 45 /// | 'LAG' '(' varId ',' ['+' | '-' ] number ')' 46 /// | funcId '(' ArgList ')' 47 /// | VarExpr | number 48 /// ArgList = Expr { ',' Expr } 49 /// VarExpr = varId OptFactorPart 50 /// OptFactorPart = [ ('=' varVal | '[' number {',' number } ']' ) ] 51 /// varId = ident | ' ident ' | " ident " 52 /// varVal = ident | ' ident ' | " ident " 53 /// ident = '_' | letter { '_' | letter | digit } 49 54 /// </summary> 50 55 public sealed class InfixExpressionParser { 51 private enum TokenType { Operator, Identifier, Number, LeftPar, RightPar, Comma, Eq, End, NA };56 private enum TokenType { Operator, Identifier, Number, LeftPar, RightPar, LeftBracket, RightBracket, Comma, Eq, End, NA }; 52 57 private class Token { 53 58 internal double doubleVal; … … 77 82 private Variable variable = new Variable(); 78 83 private BinaryFactorVariable binaryFactorVar = new BinaryFactorVariable(); 84 private FactorVariable factorVar = new FactorVariable(); 79 85 80 86 private ProgramRootSymbol programRootSymbol = new ProgramRootSymbol(); … … 126 132 127 133 128 foreach 134 foreach(var kvp in dict) { 129 135 knownSymbols.Add(kvp.Key, kvp.Value); 130 136 } … … 145 151 private IEnumerable<Token> GetAllTokens(string str) { 146 152 int pos = 0; 147 while 148 while 149 if 153 while(true) { 154 while(pos < str.Length && Char.IsWhiteSpace(str[pos])) pos++; 155 if(pos >= str.Length) { 150 156 yield return new Token { TokenType = TokenType.End, strVal = "" }; 151 157 yield break; 152 158 } 153 if 159 if(char.IsDigit(str[pos])) { 154 160 // read number (=> read until white space or operator or comma) 155 161 var sb = new StringBuilder(); 156 162 sb.Append(str[pos]); 157 163 pos++; 158 while 164 while(pos < str.Length && !char.IsWhiteSpace(str[pos]) 159 165 && (str[pos] != '+' || str[pos - 1] == 'e' || str[pos - 1] == 'E') // continue reading exponents 160 166 && (str[pos] != '-' || str[pos - 1] == 'e' || str[pos - 1] == 'E') … … 162 168 && str[pos] != '/' 163 169 && str[pos] != ')' 170 && str[pos] != ']' 164 171 && str[pos] != ',') { 165 172 sb.Append(str[pos]); … … 167 174 } 168 175 double dblVal; 169 if 176 if(double.TryParse(sb.ToString(), NumberStyles.Float, CultureInfo.InvariantCulture, out dblVal)) 170 177 yield return new Token { TokenType = TokenType.Number, strVal = sb.ToString(), doubleVal = dblVal }; 171 178 else yield return new Token { TokenType = TokenType.NA, strVal = sb.ToString() }; 172 } else if 179 } else if(char.IsLetter(str[pos]) || str[pos] == '_') { 173 180 // read ident 174 181 var sb = new StringBuilder(); 175 182 sb.Append(str[pos]); 176 183 pos++; 177 while 184 while(pos < str.Length && 178 185 (char.IsLetter(str[pos]) || str[pos] == '_' || char.IsDigit(str[pos]))) { 179 186 sb.Append(str[pos]); … … 181 188 } 182 189 yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() }; 183 } else if 190 } else if(str[pos] == '"') { 184 191 // read to next " 185 192 pos++; 186 193 var sb = new StringBuilder(); 187 while 194 while(pos < str.Length && str[pos] != '"') { 188 195 sb.Append(str[pos]); 189 196 pos++; 190 197 } 191 if 198 if(pos < str.Length && str[pos] == '"') { 192 199 pos++; // skip " 193 200 yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() }; … … 195 202 yield return new Token { TokenType = TokenType.NA }; 196 203 197 } else if 204 } else if(str[pos] == '\'') { 198 205 // read to next ' 199 206 pos++; 200 207 var sb = new StringBuilder(); 201 while 208 while(pos < str.Length && str[pos] != '\'') { 202 209 sb.Append(str[pos]); 203 210 pos++; 204 211 } 205 if 212 if(pos < str.Length && str[pos] == '\'') { 206 213 pos++; // skip ' 207 214 yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() }; 208 215 } else 209 216 yield return new Token { TokenType = TokenType.NA }; 210 } else if 217 } else if(str[pos] == '+') { 211 218 pos++; 212 219 yield return new Token { TokenType = TokenType.Operator, strVal = "+" }; 213 } else if 220 } else if(str[pos] == '-') { 214 221 pos++; 215 222 yield return new Token { TokenType = TokenType.Operator, strVal = "-" }; 216 } else if 223 } else if(str[pos] == '/') { 217 224 pos++; 218 225 yield return new Token { TokenType = TokenType.Operator, strVal = "/" }; 219 } else if 226 } else if(str[pos] == '*') { 220 227 pos++; 221 228 yield return new Token { TokenType = TokenType.Operator, strVal = "*" }; 222 } else if 229 } else if(str[pos] == '(') { 223 230 pos++; 224 231 yield return new Token { TokenType = TokenType.LeftPar, strVal = "(" }; 225 } else if 232 } else if(str[pos] == ')') { 226 233 pos++; 227 234 yield return new Token { TokenType = TokenType.RightPar, strVal = ")" }; 228 } else if (str[pos] == '=') { 235 } else if(str[pos] == '[') { 236 pos++; 237 yield return new Token { TokenType = TokenType.LeftBracket, strVal = "[" }; 238 } else if(str[pos] == ']') { 239 pos++; 240 yield return new Token { TokenType = TokenType.RightBracket, strVal = "]" }; 241 } else if(str[pos] == '=') { 229 242 pos++; 230 243 yield return new Token { TokenType = TokenType.Eq, strVal = "=" }; 231 } else if 244 } else if(str[pos] == ',') { 232 245 pos++; 233 246 yield return new Token { TokenType = TokenType.Comma, strVal = "," }; … … 237 250 } 238 251 } 239 240 // S = Expr EOF 241 // Expr = ['-' | '+'] Term { '+' Term | '-' Term } 242 // Term = Fact { '*' Fact | '/' Fact } 243 // Fact = '(' Expr ')' | funcId '(' ArgList ')' | varId | number 244 // ArgList = Expr { ',' Expr } 252 /// S = Expr EOF 245 253 private ISymbolicExpressionTreeNode ParseS(Queue<Token> tokens) { 246 254 var expr = ParseExpr(tokens); 247 255 248 256 var endTok = tokens.Dequeue(); 249 if 257 if(endTok.TokenType != TokenType.End) 250 258 throw new ArgumentException(string.Format("Expected end of expression (got {0})", endTok.strVal)); 251 259 252 260 return expr; 253 261 } 262 263 /// Expr = ['-' | '+'] Term { '+' Term | '-' Term } 254 264 private ISymbolicExpressionTreeNode ParseExpr(Queue<Token> tokens) { 255 265 var next = tokens.Peek(); … … 257 267 var negTerms = new List<ISymbolicExpressionTreeNode>(); 258 268 bool negateFirstTerm = false; 259 if 269 if(next.TokenType == TokenType.Operator && (next.strVal == "+" || next.strVal == "-")) { 260 270 tokens.Dequeue(); 261 if 271 if(next.strVal == "-") 262 272 negateFirstTerm = true; 263 273 } 264 274 var t = ParseTerm(tokens); 265 if 275 if(negateFirstTerm) negTerms.Add(t); 266 276 else posTerms.Add(t); 267 277 268 278 next = tokens.Peek(); 269 while 270 switch 279 while(next.strVal == "+" || next.strVal == "-") { 280 switch(next.strVal) { 271 281 case "+": { 272 282 tokens.Dequeue(); … … 286 296 287 297 var sum = GetSymbol("+").CreateTreeNode(); 288 foreach 289 if 290 if 298 foreach(var posTerm in posTerms) sum.AddSubtree(posTerm); 299 if(negTerms.Any()) { 300 if(negTerms.Count == 1) { 291 301 var sub = GetSymbol("-").CreateTreeNode(); 292 302 sub.AddSubtree(negTerms.Single()); … … 294 304 } else { 295 305 var sumNeg = GetSymbol("+").CreateTreeNode(); 296 foreach 306 foreach(var negTerm in negTerms) sumNeg.AddSubtree(negTerm); 297 307 298 308 var constNode = (ConstantTreeNode)constant.CreateTreeNode(); … … 305 315 } 306 316 } 307 if 317 if(sum.SubtreeCount == 1) return sum.Subtrees.First(); 308 318 else return sum; 309 319 } … … 311 321 private ISymbol GetSymbol(string tok) { 312 322 var symb = knownSymbols.GetByFirst(tok).FirstOrDefault(); 313 if 323 if(symb == null) throw new ArgumentException(string.Format("Unknown token {0} found.", tok)); 314 324 return symb; 315 325 } 316 326 317 // Term= Fact { '*' Fact | '/' Fact }327 /// Term = Fact { '*' Fact | '/' Fact } 318 328 private ISymbolicExpressionTreeNode ParseTerm(Queue<Token> tokens) { 319 329 var factors = new List<ISymbolicExpressionTreeNode>(); … … 322 332 323 333 var next = tokens.Peek(); 324 while 325 switch 334 while(next.strVal == "*" || next.strVal == "/") { 335 switch(next.strVal) { 326 336 case "*": { 327 337 tokens.Dequeue(); … … 342 352 next = tokens.Peek(); 343 353 } 344 if 354 if(factors.Count == 1) return factors.First(); 345 355 else { 346 356 var prod = GetSymbol("*").CreateTreeNode(); 347 foreach 357 foreach(var f in factors) prod.AddSubtree(f); 348 358 return prod; 349 359 } 350 360 } 351 361 352 // Fact = '(' Expr ')' | 'LAG' '(' varId ',' ['+' | '-' ] number ')' | funcId '(' Expr ')' | varId [ = valId ] | number 362 /// Fact = '(' Expr ')' 363 /// | 'LAG' '(' varId ',' ['+' | '-' ] number ')' 364 /// | funcId '(' ArgList ')' 365 /// | VarExpr | number 366 /// ArgList = Expr { ',' Expr } 367 /// VarExpr = varId OptFactorPart 368 /// OptFactorPart = [ ('=' varVal | '[' number {',' number } ']' ) ] 369 /// varId = ident | ' ident ' | " ident " 370 /// varVal = ident | ' ident ' | " ident " 371 /// ident = '_' | letter { '_' | letter | digit } 353 372 private ISymbolicExpressionTreeNode ParseFact(Queue<Token> tokens) { 354 373 var next = tokens.Peek(); 355 if 374 if(next.TokenType == TokenType.LeftPar) { 356 375 tokens.Dequeue(); 357 376 var expr = ParseExpr(tokens); 358 377 var rPar = tokens.Dequeue(); 359 if 378 if(rPar.TokenType != TokenType.RightPar) 360 379 throw new ArgumentException("expected )"); 361 380 return expr; 362 } else if 381 } else if(next.TokenType == TokenType.Identifier) { 363 382 var idTok = tokens.Dequeue(); 364 if 365 // function identifier 383 if(tokens.Peek().TokenType == TokenType.LeftPar) { 384 // function identifier or LAG 366 385 var funcId = idTok.strVal.ToUpperInvariant(); 367 386 368 387 var funcNode = GetSymbol(funcId).CreateTreeNode(); 369 388 var lPar = tokens.Dequeue(); 370 if 389 if(lPar.TokenType != TokenType.LeftPar) 371 390 throw new ArgumentException("expected ("); 372 391 373 392 // handle 'lag' specifically 374 if 393 if(funcNode.Symbol is LaggedVariable) { 375 394 var varId = tokens.Dequeue(); 376 if 395 if(varId.TokenType != TokenType.Identifier) throw new ArgumentException("Identifier expected. Format for lagged variables: \"lag(x, -1)\""); 377 396 var comma = tokens.Dequeue(); 378 if 397 if(comma.TokenType != TokenType.Comma) throw new ArgumentException("',' expected, Format for lagged variables: \"lag(x, -1)\""); 379 398 double sign = 1.0; 380 if 399 if(tokens.Peek().strVal == "+" || tokens.Peek().strVal == "-") { 381 400 // read sign 382 401 var signTok = tokens.Dequeue(); 383 if 402 if(signTok.strVal == "-") sign = -1.0; 384 403 } 385 404 var lagToken = tokens.Dequeue(); 386 if 387 if 405 if(lagToken.TokenType != TokenType.Number) throw new ArgumentException("Number expected, Format for lagged variables: \"lag(x, -1)\""); 406 if(!lagToken.doubleVal.IsAlmost(Math.Round(lagToken.doubleVal))) 388 407 throw new ArgumentException("Time lags must be integer values"); 389 408 var laggedVarNode = funcNode as LaggedVariableTreeNode; … … 395 414 var args = ParseArgList(tokens); 396 415 // check number of arguments 397 if 416 if(funcNode.Symbol.MinimumArity > args.Length || funcNode.Symbol.MaximumArity < args.Length) { 398 417 throw new ArgumentException(string.Format("Symbol {0} requires between {1} and {2} arguments.", funcId, 399 418 funcNode.Symbol.MinimumArity, funcNode.Symbol.MaximumArity)); 400 419 } 401 foreach 420 foreach(var arg in args) funcNode.AddSubtree(arg); 402 421 } 403 422 404 423 var rPar = tokens.Dequeue(); 405 if 424 if(rPar.TokenType != TokenType.RightPar) 406 425 throw new ArgumentException("expected )"); 407 426 … … 409 428 } else { 410 429 // variable 411 if 430 if(tokens.Peek().TokenType == TokenType.Eq) { 412 431 // binary factor 413 432 tokens.Dequeue(); // skip Eq 414 433 var valTok = tokens.Dequeue(); 415 if 434 if(valTok.TokenType != TokenType.Identifier) throw new ArgumentException("expected identifier"); 416 435 var binFactorNode = (BinaryFactorVariableTreeNode)binaryFactorVar.CreateTreeNode(); 417 436 binFactorNode.Weight = 1.0; … … 419 438 binFactorNode.VariableValue = valTok.strVal; 420 439 return binFactorNode; 440 } else if(tokens.Peek().TokenType == TokenType.LeftBracket) { 441 // factor variable 442 var factorVariableNode = (FactorVariableTreeNode) factorVar.CreateTreeNode(); 443 factorVariableNode.VariableName = idTok.strVal; 444 445 tokens.Dequeue(); // skip [ 446 var weights = new List<double>(); 447 // at least one weight is necessary 448 if(tokens.Peek().TokenType != TokenType.Number) throw new ArgumentException("number expected"); 449 var weightTok = tokens.Dequeue(); 450 weights.Add(weightTok.doubleVal); 451 while(tokens.Peek().TokenType == TokenType.Comma) { 452 // skip comma 453 tokens.Dequeue(); 454 weightTok = tokens.Dequeue(); 455 if(weightTok.TokenType != TokenType.Number) throw new ArgumentException("number expected"); 456 weights.Add(weightTok.doubleVal); 457 } 458 var rightBracketToken = tokens.Dequeue(); 459 if(rightBracketToken.TokenType != TokenType.RightBracket) throw new ArgumentException("closing bracket ] expected"); 460 factorVariableNode.Weights = weights.ToArray(); 461 return factorVariableNode; 421 462 } else { 422 463 // variable … … 427 468 } 428 469 } 429 } else if 470 } else if(next.TokenType == TokenType.Number) { 430 471 var numTok = tokens.Dequeue(); 431 472 var constNode = (ConstantTreeNode)constant.CreateTreeNode(); … … 441 482 var exprList = new List<ISymbolicExpressionTreeNode>(); 442 483 exprList.Add(ParseExpr(tokens)); 443 while 484 while(tokens.Peek().TokenType != TokenType.RightPar) { 444 485 var comma = tokens.Dequeue(); 445 if 486 if(comma.TokenType != TokenType.Comma) throw new ArgumentException("expected ',' "); 446 487 exprList.Add(ParseExpr(tokens)); 447 488 }
Note: See TracChangeset
for help on using the changeset viewer.