[6152] | 1 | package ec.util; |
---|
| 2 | import java.util.regex.*; |
---|
| 3 | |
---|
| 4 | /* |
---|
| 5 | * Lexer.java |
---|
| 6 | * |
---|
| 7 | * Created: Sun Dec 5 11:33:43 EST 2010 |
---|
| 8 | * By: Sean Luke |
---|
| 9 | * |
---|
| 10 | */ |
---|
| 11 | |
---|
| 12 | /** |
---|
| 13 | * A simple line-by-line String tokenizer. You provide Lexer with a String or other |
---|
| 14 | * CharSequence as input, plus an array of regular expressions. Each time you call |
---|
| 15 | * nextToken(...), the Lexer matches the next token against the regular expressions |
---|
| 16 | * and returns it. The regular expressions are checked in order, and the first one |
---|
| 17 | * that matches is the winner. |
---|
| 18 | * |
---|
| 19 | */ |
---|
| 20 | |
---|
| 21 | public class Lexer |
---|
| 22 | { |
---|
| 23 | /** An index which indicates that no further tokens were found. This could be due to the end of the string or due to a bad |
---|
| 24 | string. You'll need to check the index to determine for sure.*/ |
---|
| 25 | public static final int FAILURE = -1; |
---|
| 26 | |
---|
| 27 | CharSequence input; |
---|
| 28 | int position = 0; |
---|
| 29 | Matcher[] matchers; |
---|
| 30 | String[] regexps; |
---|
| 31 | int matchingIndex = FAILURE; |
---|
| 32 | |
---|
| 33 | /** Builds a Lexer for the given input with the provided regular expressions. The regular expressions |
---|
| 34 | will be checked in order against the input, and the first one which matches will be assumed to be the token.*/ |
---|
| 35 | |
---|
| 36 | public Lexer(CharSequence input, String[] regexps) |
---|
| 37 | { |
---|
| 38 | this.regexps = regexps; |
---|
| 39 | matchers = new Matcher[regexps.length]; |
---|
| 40 | for(int i = 0 ; i < regexps.length; i++) |
---|
| 41 | matchers[i] = Pattern.compile(regexps[i]).matcher(input); // not DOTALL |
---|
| 42 | this.input = input; |
---|
| 43 | } |
---|
| 44 | |
---|
| 45 | /** Returns the next token as a string. If *trim* is true, then the string is first trimmed of whitespace. */ |
---|
| 46 | public String nextToken(boolean trim) |
---|
| 47 | { |
---|
| 48 | for(int i = 0 ; i < regexps.length; i++) |
---|
| 49 | { |
---|
| 50 | if (!matchers[i].region(position, input.length()).lookingAt()) continue; |
---|
| 51 | position = matchers[i].end(); |
---|
| 52 | matchingIndex = i; |
---|
| 53 | return ( trim ? matchers[i].group().trim() : matchers[i].group() ); |
---|
| 54 | } |
---|
| 55 | // we failed |
---|
| 56 | matchingIndex = -1; |
---|
| 57 | return null; |
---|
| 58 | } |
---|
| 59 | |
---|
| 60 | /** Returns the next token as a string. The string is first trimmed of whitespace. */ |
---|
| 61 | public String nextToken() { return nextToken(true); } |
---|
| 62 | |
---|
| 63 | |
---|
| 64 | /** Returns the index of the regular expression which matched the most recent token. */ |
---|
| 65 | public int getMatchingIndex() |
---|
| 66 | { |
---|
| 67 | return matchingIndex; |
---|
| 68 | } |
---|
| 69 | |
---|
| 70 | /** Returns the regular expression which matched the most recent token. */ |
---|
| 71 | public String getMatchingRule() |
---|
| 72 | { |
---|
| 73 | if (matchingIndex == -1) return null; |
---|
| 74 | return regexps[matchingIndex]; |
---|
| 75 | } |
---|
| 76 | |
---|
| 77 | /** Returns the position in the String just beyond the most recent token. */ |
---|
| 78 | public int getMatchingPosition() |
---|
| 79 | { |
---|
| 80 | return position; |
---|
| 81 | } |
---|
| 82 | |
---|
| 83 | } |
---|