1 | package ec.gp.ge; |
---|
2 | import java.io.*; |
---|
3 | import java.util.*; |
---|
4 | import ec.*; |
---|
5 | import ec.gp.GPFunctionSet; |
---|
6 | import ec.gp.GPNode; |
---|
7 | import ec.util.*; |
---|
8 | |
---|
9 | /* |
---|
10 | * GrammarParser.java |
---|
11 | * |
---|
12 | * Created: Sun Dec 5 11:33:43 EST 2010 |
---|
13 | * By: Houston Mooers, with modifications by Sean Luke |
---|
14 | * |
---|
15 | */ |
---|
16 | |
---|
17 | /** |
---|
18 | * A GrammarParser is the basic class for parsing a GE ruleset into a parse graph of GrammarNodes. |
---|
19 | * This parse graph is then later used to produce a GPIndividual from a GEIndividual in GESpecies. |
---|
20 | * It is assumed that the root will represent the first rule given in the grammar. |
---|
21 | * |
---|
22 | */ |
---|
23 | |
---|
24 | |
---|
25 | |
---|
26 | public class GrammarParser implements Prototype |
---|
27 | { |
---|
28 | public static final String P_PARSER = "parser"; |
---|
29 | |
---|
30 | // The parsed rules, hashed by name |
---|
31 | HashMap rules = new HashMap(); |
---|
32 | // The resulting parse graph |
---|
33 | GrammarRuleNode root = null; |
---|
34 | |
---|
35 | /** The default regular expressions for tokens in the parser. If you'd |
---|
36 | like to change minor features of the regular expressions, override the |
---|
37 | getRegexes() method in a subclass to return a different array. Note that |
---|
38 | if you INSERT a new regular expression into the middle of these, the values |
---|
39 | of the various token constants ("LPAREN", "RULE", etc.) will be wrong, so you |
---|
40 | will need to override or modify the methods which use them.*/ |
---|
41 | public static final String[] DEFAULT_REGEXES = new String[] |
---|
42 | { |
---|
43 | "\\p{Blank}*#[^\\n\\r]*", // COMMENT: matches a #foo up to but not including the newline. Should appear first. |
---|
44 | "\\p{Blank}*\\(", // LPAREN: matches a ( |
---|
45 | "\\p{Blank}*\\)", // RPAREN: matches a ) |
---|
46 | "\\p{Blank}*<[^<>()\\p{Space}]*>", // RULE: matches a rule of the form <foo>. No <, >, (, ), |, or spaces may appear in foo |
---|
47 | "\\p{Blank}*[|]", // PIPE: matches a | |
---|
48 | "\\p{Blank}*::=", // EQUALS: matches a :== |
---|
49 | "\\p{Blank}*::=", // NUMERIC_CONSTANT: does nothing right now, so set to be identical to EQUALS. Reserved for future use. |
---|
50 | "\\p{Blank}*::=", // BOOLEAN_CONSTANT: does nothing right now, so set to be identical to EQUALS. Reserved for future use. |
---|
51 | "\\p{Blank}*::=", // STRING_CONSTANT: does nothing right now, so set to be identical to EQUALS. Reserved for future use. |
---|
52 | "\\p{Blank}*[^<>()|\\p{Space}]+", // FUNCTION (must appear after RULE and PIPE): matches a rule of the form foo. No <, >, (, ), |, or spaces may appear in foo, and foo must have at least one character. |
---|
53 | }; |
---|
54 | |
---|
55 | protected static final int COMMENT = 0; |
---|
56 | protected static final int LPAREN = 1; |
---|
57 | protected static final int RPAREN = 2; |
---|
58 | protected static final int RULE = 3; |
---|
59 | protected static final int PIPE = 4; |
---|
60 | protected static final int EQUALS = 5; |
---|
61 | // the following three are reserved for future use |
---|
62 | protected static final int NUMERIC_CONSTANT = 6; |
---|
63 | protected static final int BOOLEAN_CONSTANT = 7; |
---|
64 | protected static final int STRING_CONSTANT = 8; |
---|
65 | // and now we continue with our regularly scheduled program |
---|
66 | protected static final int FUNCTION = 9; |
---|
67 | |
---|
68 | /** Returns the regular expressions to use for tokenizing these rules. By default DEFAULT_REGEXES are returned. */ |
---|
69 | public String[] getRegexes() { return DEFAULT_REGEXES; } |
---|
70 | |
---|
71 | public Parameter defaultBase() |
---|
72 | { |
---|
73 | return GEDefaults.base().push(P_PARSER); |
---|
74 | } |
---|
75 | |
---|
76 | public void setup(EvolutionState state, Parameter base) |
---|
77 | { |
---|
78 | } |
---|
79 | |
---|
80 | public Object clone() |
---|
81 | { |
---|
82 | try |
---|
83 | { |
---|
84 | GrammarParser other = (GrammarParser) (super.clone()); |
---|
85 | other.rules = (HashMap)(rules.clone()); |
---|
86 | // we'll pointer-copy the root |
---|
87 | return other; |
---|
88 | } |
---|
89 | catch (CloneNotSupportedException e) { return null; } // never happens |
---|
90 | } |
---|
91 | |
---|
92 | // Returns a rule from the hashmap. If one does not exist, creates a rule with the |
---|
93 | // given head and stores, then returns that. |
---|
94 | GrammarRuleNode getRule(HashMap rules, String head) |
---|
95 | { |
---|
96 | if (rules.containsKey(head)) |
---|
97 | return (GrammarRuleNode)(rules.get(head)); |
---|
98 | else |
---|
99 | { |
---|
100 | GrammarRuleNode node = new GrammarRuleNode(head); |
---|
101 | rules.put(head, node); |
---|
102 | return node; |
---|
103 | } |
---|
104 | } |
---|
105 | |
---|
106 | // Parses a rule, one rule per line, from the lexer. Adds to the existing hashmap if there's already a rule there. |
---|
107 | GrammarRuleNode parseRule(EvolutionState state, Lexer lexer, GPFunctionSet gpfs) |
---|
108 | { |
---|
109 | GrammarRuleNode retResult = null; |
---|
110 | |
---|
111 | String token = lexer.nextToken(); |
---|
112 | if(lexer.getMatchingIndex() == COMMENT) return null; //ignore the comment |
---|
113 | if(lexer.getMatchingIndex() == RULE) //rule head, good, as expected... |
---|
114 | { |
---|
115 | lexer.nextToken(); |
---|
116 | if(lexer.getMatchingIndex() != EQUALS) |
---|
117 | { |
---|
118 | state.output.fatal("GE Grammar Error: Expecting equal sign after rule head: " + token); |
---|
119 | } |
---|
120 | retResult = getRule(rules, token); |
---|
121 | parseProductions(state, retResult, lexer, gpfs); |
---|
122 | } |
---|
123 | else |
---|
124 | { |
---|
125 | state.output.fatal("GE Grammar Error - Unexpected token: Expecting rule head.: " + token); |
---|
126 | } |
---|
127 | return retResult; |
---|
128 | // IMPLEMENTED |
---|
129 | // Need to parse the rule using a recursive descent parser |
---|
130 | // If there was an error, then try to call state.output.error(...). |
---|
131 | // |
---|
132 | // Don't merge into any existing rule -- I do that in parseRules below. Instead, just pull out |
---|
133 | // rules and hang them into your "new rule" as necessary. |
---|
134 | // Use getRule(rules, "<rulename>") to extract the rule representing the current rule name which you |
---|
135 | // can hang inside there as necessary. |
---|
136 | // |
---|
137 | // If you have to you can call state.output.fatal(...) which will terminate the program, |
---|
138 | // but piling up some errors might be useful. I'll handle the exitIfErors() in parseRules below |
---|
139 | // |
---|
140 | // Return null if there was no rule to parse (blank line or all comments) but no errors. |
---|
141 | // Also return null if you called state.output.error(...). |
---|
142 | } |
---|
143 | |
---|
144 | // Parses each of a rule's production choices. |
---|
145 | void parseProductions(EvolutionState state, GrammarRuleNode retResult, Lexer lexer, GPFunctionSet gpfs) |
---|
146 | { |
---|
147 | GrammarFunctionNode grammarfuncnode; |
---|
148 | do |
---|
149 | { |
---|
150 | String token = lexer.nextToken(); |
---|
151 | if(lexer.getMatchingIndex() == RULE) |
---|
152 | { |
---|
153 | retResult.addChoice(getRule(rules, token)); |
---|
154 | token = lexer.nextToken(); |
---|
155 | } |
---|
156 | else |
---|
157 | { |
---|
158 | if(lexer.getMatchingIndex() != LPAREN) //first expect '(' |
---|
159 | { |
---|
160 | state.output.fatal("GE Grammar Error - Unexpected token for rule: " + retResult.getHead() + "Expecting '('."); |
---|
161 | } |
---|
162 | token = lexer.nextToken(); |
---|
163 | if(lexer.getMatchingIndex() != FUNCTION) //now expecting function |
---|
164 | { |
---|
165 | state.output.fatal("GE Grammar Error - Expecting a function name after first '(' for rule: " + retResult.getHead() + " Error: " + token); |
---|
166 | } |
---|
167 | else |
---|
168 | { |
---|
169 | if (!(gpfs.nodesByName.containsKey(token))) |
---|
170 | { |
---|
171 | state.output.fatal("GPNode " + token + " is not defined in the function set."); |
---|
172 | } |
---|
173 | grammarfuncnode = new GrammarFunctionNode(gpfs, token); |
---|
174 | token = lexer.nextToken(); |
---|
175 | while(lexer.getMatchingIndex() != RPAREN) |
---|
176 | { |
---|
177 | if(lexer.getMatchingIndex() != RULE) //this better be the name of a rule node |
---|
178 | { |
---|
179 | state.output.fatal("GE Grammar Error - Expecting a rule name as argument for function definition: " + grammarfuncnode.getHead() + " Error on : " + token); |
---|
180 | } |
---|
181 | grammarfuncnode.addArgument(getRule(rules, token)); |
---|
182 | token = lexer.nextToken(); |
---|
183 | } |
---|
184 | retResult.addChoice(grammarfuncnode); |
---|
185 | } |
---|
186 | //after right paren, should see either '|' or newline |
---|
187 | token = lexer.nextToken(); |
---|
188 | if(lexer.getMatchingIndex() != PIPE && lexer.getMatchingIndex() != Lexer.FAILURE) |
---|
189 | { |
---|
190 | state.output.fatal("GE Grammar Error - Expecting either '|' delimiter or newline. Error on : " + token); |
---|
191 | } |
---|
192 | } |
---|
193 | } |
---|
194 | while(lexer.getMatchingIndex() == PIPE); |
---|
195 | } |
---|
196 | |
---|
197 | /** Parses the rules from a grammar and returns the resulting GrammarRuleNode root. */ |
---|
198 | public GrammarRuleNode parseRules(EvolutionState state, BufferedReader reader, GPFunctionSet gpfs) |
---|
199 | { |
---|
200 | rules = new HashMap(); |
---|
201 | try |
---|
202 | { |
---|
203 | String line; |
---|
204 | while ((line = reader.readLine()) != null) |
---|
205 | { |
---|
206 | GrammarRuleNode rule = parseRule(state, new Lexer(line.trim(), DEFAULT_REGEXES), gpfs); |
---|
207 | if (rule != null && root == null) root = rule; |
---|
208 | } |
---|
209 | } |
---|
210 | catch (IOException e) { } // do nothing |
---|
211 | state.output.exitIfErrors(); |
---|
212 | return root; |
---|
213 | } |
---|
214 | |
---|
215 | public String toString() |
---|
216 | { |
---|
217 | String ret = "Grammar["; |
---|
218 | Iterator i = rules.values().iterator(); |
---|
219 | while(i.hasNext()) |
---|
220 | { |
---|
221 | ret = ret +"\n" + i.next(); |
---|
222 | } |
---|
223 | return ret + "\n\t]"; |
---|
224 | } |
---|
225 | |
---|
226 | /** |
---|
227 | * Checks that all grammar rules in ruleshashmap have at least one possible production |
---|
228 | * @return true if grammar rules are properly defined, false otherwise |
---|
229 | */ |
---|
230 | public boolean validateRules() |
---|
231 | { |
---|
232 | boolean isok = true; |
---|
233 | Iterator i = rules.values().iterator(); |
---|
234 | while(i.hasNext()) |
---|
235 | { |
---|
236 | GrammarRuleNode rule = (GrammarRuleNode)(i.next()); |
---|
237 | if(rule.getNumChoices() < 1) |
---|
238 | { |
---|
239 | System.out.println("Grammar is bad! - Rule not defined: " + rule); |
---|
240 | isok = false; |
---|
241 | } |
---|
242 | } |
---|
243 | if (isok) { System.out.println("All rules appear properly defined!"); return true; } |
---|
244 | return false; |
---|
245 | } |
---|
246 | |
---|
247 | /** A simple testing fcility. */ |
---|
248 | public static void main(String args[]) throws FileNotFoundException |
---|
249 | { |
---|
250 | // make a dummy EvolutionState that just has an output for testing |
---|
251 | EvolutionState state = new EvolutionState(); |
---|
252 | state.output = new Output(true); |
---|
253 | state.output.addLog(ec.util.Log.D_STDOUT,false); |
---|
254 | state.output.addLog(ec.util.Log.D_STDERR,true); |
---|
255 | |
---|
256 | GrammarParser gp = new GrammarParser(); |
---|
257 | gp.parseRules(state, new BufferedReader(new FileReader(new File(args[0]))), null); |
---|
258 | gp.validateRules(); |
---|
259 | System.err.println(gp); |
---|
260 | } |
---|
261 | } |
---|