Context Navigation

source: branches/OKBJavaConnector/ECJClient/src/ec/gp/ge/GrammarParser.java @ 6152

Visit:

Last change on this file since 6152 was 6152, checked in by bfarka, 13 years ago
added ecj and custom statistics to communicate with the okb services #1441
File size: 10.9 KB

Line
1	package ec.gp.ge;
2	import java.io.*;
3	import java.util.*;
4	import ec.*;
5	import ec.gp.GPFunctionSet;
6	import ec.gp.GPNode;
7	import ec.util.*;
8
9	/*
10	* GrammarParser.java
11	*
12	* Created: Sun Dec 5 11:33:43 EST 2010
13	* By: Houston Mooers, with modifications by Sean Luke
14	*
15	*/
16
17	/**
18	* A GrammarParser is the basic class for parsing a GE ruleset into a parse graph of GrammarNodes.
19	* This parse graph is then later used to produce a GPIndividual from a GEIndividual in GESpecies.
20	* It is assumed that the root will represent the first rule given in the grammar.
21	*
22	*/
23
24
25
26	public class GrammarParser implements Prototype
27	{
28	public static final String P_PARSER = "parser";
29
30	// The parsed rules, hashed by name
31	HashMap rules = new HashMap();
32	// The resulting parse graph
33	GrammarRuleNode root = null;
34
35	/** The default regular expressions for tokens in the parser. If you'd
36	like to change minor features of the regular expressions, override the
37	getRegexes() method in a subclass to return a different array. Note that
38	if you INSERT a new regular expression into the middle of these, the values
39	of the various token constants ("LPAREN", "RULE", etc.) will be wrong, so you
40	will need to override or modify the methods which use them.*/
41	public static final String[] DEFAULT_REGEXES = new String[]
42	{
43	"\\p{Blank}#[^\\n\\r]", // COMMENT: matches a #foo up to but not including the newline. Should appear first.
44	"\\p{Blank}*\\(", // LPAREN: matches a (
45	"\\p{Blank}*\\)", // RPAREN: matches a )
46	"\\p{Blank}<[^<>()\\p{Space}]>", // RULE: matches a rule of the form <foo>. No <, >, (, ), \|, or spaces may appear in foo
47	"\\p{Blank}*[\|]", // PIPE: matches a \|
48	"\\p{Blank}*::=", // EQUALS: matches a :==
49	"\\p{Blank}*::=", // NUMERIC_CONSTANT: does nothing right now, so set to be identical to EQUALS. Reserved for future use.
50	"\\p{Blank}*::=", // BOOLEAN_CONSTANT: does nothing right now, so set to be identical to EQUALS. Reserved for future use.
51	"\\p{Blank}*::=", // STRING_CONSTANT: does nothing right now, so set to be identical to EQUALS. Reserved for future use.
52	"\\p{Blank}*[^<>()\|\\p{Space}]+", // FUNCTION (must appear after RULE and PIPE): matches a rule of the form foo. No <, >, (, ), \|, or spaces may appear in foo, and foo must have at least one character.
53	};
54
55	protected static final int COMMENT = 0;
56	protected static final int LPAREN = 1;
57	protected static final int RPAREN = 2;
58	protected static final int RULE = 3;
59	protected static final int PIPE = 4;
60	protected static final int EQUALS = 5;
61	// the following three are reserved for future use
62	protected static final int NUMERIC_CONSTANT = 6;
63	protected static final int BOOLEAN_CONSTANT = 7;
64	protected static final int STRING_CONSTANT = 8;
65	// and now we continue with our regularly scheduled program
66	protected static final int FUNCTION = 9;
67
68	/** Returns the regular expressions to use for tokenizing these rules. By default DEFAULT_REGEXES are returned. */
69	public String[] getRegexes() { return DEFAULT_REGEXES; }
70
71	public Parameter defaultBase()
72	{
73	return GEDefaults.base().push(P_PARSER);
74	}
75
76	public void setup(EvolutionState state, Parameter base)
77	{
78	}
79
80	public Object clone()
81	{
82	try
83	{
84	GrammarParser other = (GrammarParser) (super.clone());
85	other.rules = (HashMap)(rules.clone());
86	// we'll pointer-copy the root
87	return other;
88	}
89	catch (CloneNotSupportedException e) { return null; } // never happens
90	}
91
92	// Returns a rule from the hashmap. If one does not exist, creates a rule with the
93	// given head and stores, then returns that.
94	GrammarRuleNode getRule(HashMap rules, String head)
95	{
96	if (rules.containsKey(head))
97	return (GrammarRuleNode)(rules.get(head));
98	else
99	{
100	GrammarRuleNode node = new GrammarRuleNode(head);
101	rules.put(head, node);
102	return node;
103	}
104	}
105
106	// Parses a rule, one rule per line, from the lexer. Adds to the existing hashmap if there's already a rule there.
107	GrammarRuleNode parseRule(EvolutionState state, Lexer lexer, GPFunctionSet gpfs)
108	{
109	GrammarRuleNode retResult = null;
110
111	String token = lexer.nextToken();
112	if(lexer.getMatchingIndex() == COMMENT) return null; //ignore the comment
113	if(lexer.getMatchingIndex() == RULE) //rule head, good, as expected...
114	{
115	lexer.nextToken();
116	if(lexer.getMatchingIndex() != EQUALS)
117	{
118	state.output.fatal("GE Grammar Error: Expecting equal sign after rule head: " + token);
119	}
120	retResult = getRule(rules, token);
121	parseProductions(state, retResult, lexer, gpfs);
122	}
123	else
124	{
125	state.output.fatal("GE Grammar Error - Unexpected token: Expecting rule head.: " + token);
126	}
127	return retResult;
128	// IMPLEMENTED
129	// Need to parse the rule using a recursive descent parser
130	// If there was an error, then try to call state.output.error(...).
131	//
132	// Don't merge into any existing rule -- I do that in parseRules below. Instead, just pull out
133	// rules and hang them into your "new rule" as necessary.
134	// Use getRule(rules, "<rulename>") to extract the rule representing the current rule name which you
135	// can hang inside there as necessary.
136	//
137	// If you have to you can call state.output.fatal(...) which will terminate the program,
138	// but piling up some errors might be useful. I'll handle the exitIfErors() in parseRules below
139	//
140	// Return null if there was no rule to parse (blank line or all comments) but no errors.
141	// Also return null if you called state.output.error(...).
142	}
143
144	// Parses each of a rule's production choices.
145	void parseProductions(EvolutionState state, GrammarRuleNode retResult, Lexer lexer, GPFunctionSet gpfs)
146	{
147	GrammarFunctionNode grammarfuncnode;
148	do
149	{
150	String token = lexer.nextToken();
151	if(lexer.getMatchingIndex() == RULE)
152	{
153	retResult.addChoice(getRule(rules, token));
154	token = lexer.nextToken();
155	}
156	else
157	{
158	if(lexer.getMatchingIndex() != LPAREN) //first expect '('
159	{
160	state.output.fatal("GE Grammar Error - Unexpected token for rule: " + retResult.getHead() + "Expecting '('.");
161	}
162	token = lexer.nextToken();
163	if(lexer.getMatchingIndex() != FUNCTION) //now expecting function
164	{
165	state.output.fatal("GE Grammar Error - Expecting a function name after first '(' for rule: " + retResult.getHead() + " Error: " + token);
166	}
167	else
168	{
169	if (!(gpfs.nodesByName.containsKey(token)))
170	{
171	state.output.fatal("GPNode " + token + " is not defined in the function set.");
172	}
173	grammarfuncnode = new GrammarFunctionNode(gpfs, token);
174	token = lexer.nextToken();
175	while(lexer.getMatchingIndex() != RPAREN)
176	{
177	if(lexer.getMatchingIndex() != RULE) //this better be the name of a rule node
178	{
179	state.output.fatal("GE Grammar Error - Expecting a rule name as argument for function definition: " + grammarfuncnode.getHead() + " Error on : " + token);
180	}
181	grammarfuncnode.addArgument(getRule(rules, token));
182	token = lexer.nextToken();
183	}
184	retResult.addChoice(grammarfuncnode);
185	}
186	//after right paren, should see either '\|' or newline
187	token = lexer.nextToken();
188	if(lexer.getMatchingIndex() != PIPE && lexer.getMatchingIndex() != Lexer.FAILURE)
189	{
190	state.output.fatal("GE Grammar Error - Expecting either '\|' delimiter or newline. Error on : " + token);
191	}
192	}
193	}
194	while(lexer.getMatchingIndex() == PIPE);
195	}
196
197	/** Parses the rules from a grammar and returns the resulting GrammarRuleNode root. */
198	public GrammarRuleNode parseRules(EvolutionState state, BufferedReader reader, GPFunctionSet gpfs)
199	{
200	rules = new HashMap();
201	try
202	{
203	String line;
204	while ((line = reader.readLine()) != null)
205	{
206	GrammarRuleNode rule = parseRule(state, new Lexer(line.trim(), DEFAULT_REGEXES), gpfs);
207	if (rule != null && root == null) root = rule;
208	}
209	}
210	catch (IOException e) { } // do nothing
211	state.output.exitIfErrors();
212	return root;
213	}
214
215	public String toString()
216	{
217	String ret = "Grammar[";
218	Iterator i = rules.values().iterator();
219	while(i.hasNext())
220	{
221	ret = ret +"\n" + i.next();
222	}
223	return ret + "\n\t]";
224	}
225
226	/**
227	* Checks that all grammar rules in ruleshashmap have at least one possible production
228	* @return true if grammar rules are properly defined, false otherwise
229	*/
230	public boolean validateRules()
231	{
232	boolean isok = true;
233	Iterator i = rules.values().iterator();
234	while(i.hasNext())
235	{
236	GrammarRuleNode rule = (GrammarRuleNode)(i.next());
237	if(rule.getNumChoices() < 1)
238	{
239	System.out.println("Grammar is bad! - Rule not defined: " + rule);
240	isok = false;
241	}
242	}
243	if (isok) { System.out.println("All rules appear properly defined!"); return true; }
244	return false;
245	}
246
247	/** A simple testing fcility. */
248	public static void main(String args[]) throws FileNotFoundException
249	{
250	// make a dummy EvolutionState that just has an output for testing
251	EvolutionState state = new EvolutionState();
252	state.output = new Output(true);
253	state.output.addLog(ec.util.Log.D_STDOUT,false);
254	state.output.addLog(ec.util.Log.D_STDERR,true);
255
256	GrammarParser gp = new GrammarParser();
257	gp.parseRules(state, new BufferedReader(new FileReader(new File(args[0]))), null);
258	gp.validateRules();
259	System.err.println(gp);
260	}
261	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences