Free cookie consent management tool by TermsFeed Policy Generator

source: branches/OKBJavaConnector/ECJClient/src/ec/gp/ge/GrammarParser.java @ 6152

Last change on this file since 6152 was 6152, checked in by bfarka, 13 years ago

added ecj and custom statistics to communicate with the okb services #1441

File size: 10.9 KB
Line 
1package ec.gp.ge;
2import java.io.*;
3import java.util.*;
4import ec.*;
5import ec.gp.GPFunctionSet;
6import ec.gp.GPNode;
7import ec.util.*;
8
9/*
10 * GrammarParser.java
11 *
12 * Created: Sun Dec  5 11:33:43 EST 2010
13 * By: Houston Mooers, with modifications by Sean Luke
14 *
15 */
16
17/**
18 * A GrammarParser is the basic class for parsing a GE ruleset into a parse graph of GrammarNodes.
19 * This parse graph is then later used to produce a GPIndividual from a GEIndividual in GESpecies.
20 * It is assumed that the root will represent the first rule given in the grammar.
21 *
22 */
23
24
25
26public class GrammarParser implements Prototype
27    {
28    public static final String P_PARSER = "parser";
29
30    // The parsed rules, hashed by name
31    HashMap rules = new HashMap();
32    // The resulting parse graph
33    GrammarRuleNode root = null;
34
35    /** The default regular expressions for tokens in the parser.  If you'd
36        like to change minor features of the regular expressions, override the
37        getRegexes() method in a subclass to return a different array.  Note that
38        if you INSERT a new regular expression into the middle of these, the values
39        of the various token constants ("LPAREN", "RULE", etc.) will be wrong, so you
40        will need to override or modify the methods which use them.*/
41    public static final String[] DEFAULT_REGEXES = new String[]
42    {
43    "\\p{Blank}*#[^\\n\\r]*",               // COMMENT: matches a #foo up to but not including the newline.  Should appear first.
44    "\\p{Blank}*\\(",                       // LPAREN: matches a (
45    "\\p{Blank}*\\)",                       // RPAREN: matches a )
46    "\\p{Blank}*<[^<>()\\p{Space}]*>",      // RULE: matches a rule of the form <foo>.  No <, >, (, ), |, or spaces may appear in foo
47    "\\p{Blank}*[|]",                       // PIPE: matches a |
48    "\\p{Blank}*::=",                       // EQUALS: matches a :==
49    "\\p{Blank}*::=",                       // NUMERIC_CONSTANT: does nothing right now, so set to be identical to EQUALS.  Reserved for future use.
50    "\\p{Blank}*::=",                       // BOOLEAN_CONSTANT: does nothing right now, so set to be identical to EQUALS.  Reserved for future use.
51    "\\p{Blank}*::=",                       // STRING_CONSTANT: does nothing right now, so set to be identical to EQUALS.  Reserved for future use.
52    "\\p{Blank}*[^<>()|\\p{Space}]+",       // FUNCTION (must appear after RULE and PIPE): matches a rule of the form foo.  No <, >, (, ), |, or spaces may appear in foo, and foo must have at least one character.
53    };
54       
55    protected static final int COMMENT = 0;
56    protected static final int LPAREN = 1;
57    protected static final int RPAREN = 2;
58    protected static final int RULE = 3;
59    protected static final int PIPE = 4;
60    protected static final int EQUALS = 5;
61    // the following three are reserved for future use
62    protected static final int NUMERIC_CONSTANT = 6;
63    protected static final int BOOLEAN_CONSTANT = 7;
64    protected static final int STRING_CONSTANT = 8;
65    // and now we continue with our regularly scheduled program
66    protected static final int FUNCTION = 9;
67       
68    /** Returns the regular expressions to use for tokenizing these rules.  By default DEFAULT_REGEXES are returned. */
69    public String[] getRegexes() { return DEFAULT_REGEXES; }
70       
71    public Parameter defaultBase()
72        {
73        return GEDefaults.base().push(P_PARSER);
74        }
75
76    public void setup(EvolutionState state, Parameter base)
77        {
78        }
79
80    public Object clone()
81        {
82        try
83            {
84            GrammarParser other = (GrammarParser) (super.clone());
85            other.rules = (HashMap)(rules.clone());
86            // we'll pointer-copy the root
87            return other;
88            }
89        catch (CloneNotSupportedException e) { return null; } // never happens
90        }
91
92    // Returns a rule from the hashmap.  If one does not exist, creates a rule with the
93    // given head and stores, then returns that.
94    GrammarRuleNode getRule(HashMap rules, String head)
95        {
96        if (rules.containsKey(head))
97            return (GrammarRuleNode)(rules.get(head));
98        else
99            {
100            GrammarRuleNode node = new GrammarRuleNode(head);
101            rules.put(head, node);
102            return node;
103            }
104        }
105
106    // Parses a rule, one rule per line, from the lexer.  Adds to the existing hashmap if there's already a rule there.
107    GrammarRuleNode parseRule(EvolutionState state, Lexer lexer, GPFunctionSet gpfs)
108        {
109        GrammarRuleNode retResult = null;
110               
111        String token = lexer.nextToken();
112        if(lexer.getMatchingIndex() == COMMENT) return null; //ignore the comment
113        if(lexer.getMatchingIndex() == RULE) //rule head, good, as expected...
114            {
115            lexer.nextToken();
116            if(lexer.getMatchingIndex() != EQUALS)
117                {
118                state.output.fatal("GE Grammar Error: Expecting equal sign after rule head: " + token);
119                }
120            retResult = getRule(rules, token);
121            parseProductions(state, retResult, lexer, gpfs);
122            }
123        else
124            {
125            state.output.fatal("GE Grammar Error - Unexpected token: Expecting rule head.: " + token);
126            }
127        return retResult;
128        // IMPLEMENTED
129        // Need to parse the rule using a recursive descent parser
130        // If there was an error, then try to call state.output.error(...).
131        //
132        // Don't merge into any existing rule -- I do that in parseRules below.  Instead, just pull out
133        // rules and hang them into your "new rule" as necessary. 
134        // Use getRule(rules, "<rulename>") to extract the rule representing the current rule name which you
135        // can hang inside there as necessary.
136        //
137        // If you have to you can call state.output.fatal(...) which will terminate the program,
138        // but piling up some errors might be useful.  I'll handle the exitIfErors() in parseRules below
139        //
140        // Return null if there was no rule to parse (blank line or all comments) but no errors.
141        // Also return null if you called state.output.error(...).
142        }
143
144    // Parses each of a rule's production choices.
145    void parseProductions(EvolutionState state, GrammarRuleNode retResult, Lexer lexer, GPFunctionSet gpfs)
146        {
147        GrammarFunctionNode grammarfuncnode;
148        do
149            {
150            String token = lexer.nextToken();
151            if(lexer.getMatchingIndex() == RULE)
152                {
153                retResult.addChoice(getRule(rules, token));
154                token = lexer.nextToken();
155                }
156            else
157                {
158                if(lexer.getMatchingIndex() != LPAREN) //first expect '('
159                    {
160                    state.output.fatal("GE Grammar Error - Unexpected token for rule: " + retResult.getHead() + "Expecting '('.");
161                    }
162                token = lexer.nextToken();
163                if(lexer.getMatchingIndex() != FUNCTION) //now expecting function
164                    {
165                    state.output.fatal("GE Grammar Error - Expecting a function name after first '(' for rule: " + retResult.getHead() + " Error: " + token);
166                    }
167                else
168                    {
169                    if (!(gpfs.nodesByName.containsKey(token)))
170                        {
171                        state.output.fatal("GPNode " + token + " is not defined in the function set.");
172                        }
173                    grammarfuncnode = new GrammarFunctionNode(gpfs, token);
174                    token = lexer.nextToken();
175                    while(lexer.getMatchingIndex() != RPAREN)
176                        {
177                        if(lexer.getMatchingIndex() != RULE) //this better be the name of a rule node
178                            {
179                            state.output.fatal("GE Grammar Error - Expecting a rule name as argument for function definition: " + grammarfuncnode.getHead() + " Error on : " + token);
180                            }
181                        grammarfuncnode.addArgument(getRule(rules, token));
182                        token = lexer.nextToken();
183                        }
184                    retResult.addChoice(grammarfuncnode);
185                    }
186                //after right paren, should see either '|' or newline
187                token = lexer.nextToken();
188                if(lexer.getMatchingIndex() != PIPE && lexer.getMatchingIndex() != Lexer.FAILURE)
189                    {
190                    state.output.fatal("GE Grammar Error - Expecting either '|' delimiter or newline. Error on : " + token);
191                    }
192                }
193            }
194        while(lexer.getMatchingIndex() == PIPE);
195        }
196
197    /** Parses the rules from a grammar and returns the resulting GrammarRuleNode root. */
198    public GrammarRuleNode parseRules(EvolutionState state, BufferedReader reader, GPFunctionSet gpfs)
199        {
200        rules = new HashMap();
201        try
202            {
203            String line;
204            while ((line = reader.readLine()) != null)
205                {
206                GrammarRuleNode rule = parseRule(state, new Lexer(line.trim(), DEFAULT_REGEXES), gpfs);
207                if (rule != null && root == null) root = rule;
208                }
209            }
210        catch (IOException e) { } // do nothing
211        state.output.exitIfErrors();
212        return root;
213        }
214
215    public String toString()
216        {
217        String ret = "Grammar[";
218        Iterator i = rules.values().iterator();
219        while(i.hasNext())
220            {
221            ret = ret +"\n" + i.next();
222            }
223        return ret + "\n\t]";
224        }
225
226    /**
227     * Checks that all grammar rules in ruleshashmap have at least one possible production
228     * @return true if grammar rules are properly defined, false otherwise
229     */
230    public boolean validateRules()
231        {
232        boolean isok = true;
233        Iterator i = rules.values().iterator();
234        while(i.hasNext())
235            {
236            GrammarRuleNode rule = (GrammarRuleNode)(i.next());
237            if(rule.getNumChoices() < 1)
238                {
239                System.out.println("Grammar is bad! - Rule not defined: " + rule);
240                isok = false;
241                }
242            }
243        if (isok) { System.out.println("All rules appear properly defined!"); return true; }           
244        return false;
245        }
246
247    /** A simple testing fcility. */
248    public static void main(String args[]) throws  FileNotFoundException
249        {
250        // make a dummy EvolutionState that just has an output for testing
251        EvolutionState state = new EvolutionState();
252        state.output = new Output(true);
253        state.output.addLog(ec.util.Log.D_STDOUT,false);
254        state.output.addLog(ec.util.Log.D_STDERR,true);
255
256        GrammarParser gp = new GrammarParser();
257        gp.parseRules(state, new BufferedReader(new FileReader(new File(args[0]))), null);
258        gp.validateRules();
259        System.err.println(gp);
260        }
261    }
Note: See TracBrowser for help on using the repository browser.