Free cookie consent management tool by TermsFeed Policy Generator

source: branches/OKBJavaConnector/ECJClient/src/ec/gp/ge/GESpecies.java @ 9449

Last change on this file since 9449 was 6152, checked in by bfarka, 14 years ago

added ecj and custom statistics to communicate with the okb services #1441

File size: 16.8 KB
Line 
1/*
2  Copyright 20010 by Sean Luke and George Mason University
3  Licensed under the Academic Free License version 3.0
4  See the file "LICENSE" for more information
5*/
6package ec.gp.ge;
7
8import java.io.BufferedReader;
9import java.io.File;
10import java.io.FileNotFoundException;
11import java.io.FileReader;
12import java.io.IOException;
13import java.util.ArrayList;
14import java.util.HashMap;
15import java.util.List;
16
17import ec.gp.*;
18import ec.*;
19import ec.vector.*;
20import ec.util.*;
21
22import java.util.Arrays;
23import java.util.regex.Matcher;
24import java.util.regex.Pattern;
25
26/*
27 * GESpecies.java
28 *
29 * Created: Sun Dec  5 11:33:43 EST 2010
30 * By: Eric Kangas, Joseph Zelibor III, Houston Mooers, and Sean Luke
31 *
32 */
33
34/**
35 * <p>GESpecies generates GPIndividuals from GEIndividuals through the application of a grammar parse graph
36 * computed by the GrammarParser.
37 *
38 * <p>GESpecies uses a <b>GrammarParser</b> to do its dirty work.  This parser's job is to take a grammar (in the form of a BufferedReader)
39 * and convert it to a tree of GrammarNodes which define the parse graph of the grammar. The GESpecies then interprets his parse graph
40 * according to the values in the GEIndividual to produce the equivalent GPIndividual, which is then evaluated.
41 *
42 * <p>To do this, GESpecies relies on a subsidiary GPSpecies which defines the GPIndividual and various GPFunctionSets from which to
43 * build the parser.  This is a grand hack -- the GPSpecies does not know it's being used this way, and so we must provide various dummy
44 * parameters to keep the GPSpecies happy even though they'll never be used.
45 *
46 * <p>If you are daring, you can replace the GrammarParser with one of your own to customize the parse structure and grammar.
47 *
48 * <p><b>ECJ's Default GE Grammar</b>  GE traditionally can use any grammar, and builds parse graphs from that.  For simplicity, and in order to
49 * remain as compatable as possible with ECJ's existing GP facilities (and GP tradition), ECJ only uses a single Lisp-like grammar which
50 * generates standard ECJ trees.  This doesn't lose much in generality as the grammar is quite genral.
51 *
52 * <p>The grammar assumes that expansion points are enclosed in &lt;> and functions are enclosed in ().  For example:
53 *
54 * <p><tt>
55 * # This is a comment
56 * &lt;prog> ::= &lt;op><br>
57 * &lt;op> ::= (if-food-ahead &lt;op> &lt;op>)<br>
58 * &lt;op> ::=  (progn2 &lt;op> &lt;op>)<br>
59 * &lt;op> ::= (progn3 &lt;op> &lt;op> &lt;op>)<br>
60 * &lt;op> ::= (left) | (right) | (move)<br>
61 * </tt>
62 *
63 * <p>alternatively the grammar could also be writen in the following format:
64 *
65 * <p><tt>
66 * &lt;prog> ::= &lt;op><br>
67 * &lt;op> ::= (if-food-ahead &lt;op> &lt;op>) | (progn2 &lt;op> &lt;op>) | (progn3 &lt;op> &lt;op> &lt;op>) | (left) | (right) | (move)<br>
68 * </tt>
69 *
70 * <p>Note that you can use several lines to define the same grammar rule: for example, <tt>&lt;op></tt> was defined by several lines when
71 * it could have consisted of several elements separated by vertical pipes ( <tt>|</tt> ).  Either way is fine, or a combination of both.
72 *
73 * <p>GPNodes are included in the grammar by using their name.  This includes ERCs, ADFs, ADMs, and ADFArguments, which should all work just fine.
74 * For example, since most ERC GPNodes are simply named "ERC", if you have only one ERC GPNode in your function set, you can just use <tt>(ERC)</tt>
75 * in your grammar.
76 *
77 * <p>Once the gammar file has been created and setup has been run trees can the be created using the genome (chromosome) of a GEIndividual.
78 * A genome of an individual is an array of random integers each of which are one byte long.  These numbers are used when a decision point
79 * (a rule having more that one choice) is reached within the grammar.  Once a particular gene (index) in the genome has been used it will
80 * not be used again (this may change) when creating the tree.
81 *
82 * <p>For example:<br>
83 * number of chromosomes used = 0<br>
84 * genome = {23, 654, 86}<br>
85 * the current rule we are considering is &lt;op>.<br>
86 * %lt;op> can map into one of the following: (if-food-ahead &lt;op> &lt;op>) | (progn2 &lt;op> &lt;op>) | (progn3 &lt;op> &lt;op> &lt;op>)
87 * | (left) | (right) | (move)<br>
88 * Since the rule &lt;op> has more than one choice that it can map to, we must consult the genome to decide which choice to take.  In this case
89 * the number of chromosomes used is 0 so genome[0] is used and number of chromosomes used is incremented.  Since values in the genome can
90 * be negitive values they are offset by 128 (max negitive of a byte) giving us a value from 0-255.  A modulus is performed on this resulting
91 * number by the number of choices present for the given rule.  In the above example since we are using genome[0] the resulting operation would
92 * look like: 23+128=151, number of choices for &lt;op> = 6, 151%6=1 so we use choices[1] which is: (progn2 &lt;op> &lt;op>).  If all the genes
93 * in a genome are used and the tree is still incompete an invalid tree error is returned.
94 *
95 * <p>Each node in the tree is a GPNode and trees are constructed depth first.
96 *
97 *
98 * <p><b>Parameters</b><br>
99 * <table>
100 * <tr><td valign=top><i>base.</i><tt>file</tt><br>
101 * <font size=-1>String</font></td>
102 * <td valign=top>(the file is where the rules of the gammar are stored)</td></tr>
103 *
104 * <tr><td valign=top><i>base.</i><tt>gp-species</tt><br>
105 * <font size=-1>classname, inherits and != ec.gp.GPSpecies</font></td>
106 * <td valign=top>(the GPSpecies subservient to the GESpecies)</td></tr>
107 *
108 * <tr><td valign=top><i>base.</i><tt>parser</tt><br>
109 * <font size=-1>classname, inherits and != ec.gp.ge.GrammarParser</font></td>
110 * <td valign=top>(the GrammarParser used by the GESpecies)</td></tr>
111 *
112 * </table>
113 *
114 * <p><b>Default Base</b><br>
115 * ec.gp.ge.GESpecies
116 *
117 * @author Joseph Zelibor III, Eric Kangas, Houston Mooers, and Sean Luke
118 * @version 1.0
119 */
120 
121public class GESpecies extends IntegerVectorSpecies
122    {
123    public static final String P_GESPECIES = "species";
124    public static final String P_FILE = "file";
125    public static final String P_GPSPECIES = "gp-species";
126    public static final String P_PARSER = "parser";
127       
128    /* Return value which denotes that the tree has grown too large. */
129    public static final int BIG_TREE_ERROR = -1;
130       
131    /** The GPSpecies subsidiary to GESpecies. */
132    public GPSpecies gpspecies;
133
134    /** All the ERCs created so far. */
135    public HashMap ERCBank;
136
137    /** The parsed grammars. */
138    public GrammarRuleNode[] grammar;
139
140    /** The prototypical parser used to parse the grammars. */
141    public GrammarParser parser_prototype;
142
143    public void setup(final EvolutionState state, final Parameter base)
144        {
145        super.setup(state, base);
146
147        Parameter p = base;
148        Parameter def = defaultBase();
149
150        p = base.push(P_GPSPECIES);
151        gpspecies = (GPSpecies) (state.parameters.getInstanceForParameterEq(p, def.push(P_GPSPECIES), GPSpecies.class));
152        gpspecies.setup(state, p);
153
154        // check to make sure that our individual prototype is a GPIndividual
155        if (!(i_prototype instanceof ByteVectorIndividual))
156            {
157            state.output.fatal("The Individual class for the Species " + getClass().getName() + " is must be a subclass of ec.gp.ge.GEIndividual.", base);
158            }
159
160        ERCBank = new HashMap();
161
162        // load the grammars, one per ADF tree
163        GPIndividual gpi = (GPIndividual) (gpspecies.i_prototype);
164        GPTree[] trees = gpi.trees;
165        int numGrammars = trees.length;
166
167        parser_prototype = (GrammarParser) (state.parameters.getInstanceForParameterEq(base.push(P_PARSER), def.push(P_PARSER), GrammarParser.class));
168
169        grammar = new GrammarRuleNode[numGrammars];
170        for(int i = 0; i < numGrammars; i++)
171            {
172            p = base.push(P_FILE);
173            def = defaultBase();
174                       
175            File grammarFile = state.parameters.getFile(p, def.push(P_FILE).push("" + i));
176            if(grammarFile == null)
177                {
178                state.output.fatal("Error retrieving grammar file(s): " + def.toString() + "."+ P_FILE + "." + i + " is undefined.");
179                }
180
181            try
182                {
183                GPFunctionSet gpfs = trees[i].constraints((GPInitializer) state.initializer).functionset;
184                GrammarParser grammarparser = (GrammarParser)(parser_prototype.clone());
185                grammar[i] = grammarparser.parseRules(state, new BufferedReader(new FileReader(grammarFile)), gpfs);
186                }
187            catch (FileNotFoundException e)
188                {
189                state.output.fatal("Error retrieving grammar file(s): " + def.toString() + "."+ P_FILE + "." + i + " does not exist or cannot be opened.");
190                }
191            }
192        }
193
194
195    /**
196     * creates all of an individual's trees
197     * @param state Evolution state
198     * @param trees array of trees for the individual
199     * @param ind the GEIndividual
200     * @param threadnum tread number
201     * @return number of chromosomes consumed
202     */
203    public int makeTrees(EvolutionState state, GEIndividual ind, GPTree[] trees, int threadnum)
204        {
205        int position = 0;
206
207        for (int i = 0; i < trees.length; i++)
208            {
209            //cannot complete one of the trees with the given chromosome
210            if(position < 0)
211                return BIG_TREE_ERROR;
212
213            position = makeTree(state, ind, trees[i], position, i, threadnum);
214            }
215
216        return position;
217        }
218
219    /**
220     * makeTree, edits the tree that its given by adding a root (and all subtrees attached)
221     * @param state
222     * @param ind
223     * @param tree
224     * @param position
225     * @param treeNum
226     * @param threadnum
227     * @return the number of chromosomes used, or an BIG_TREE_ERROR sentinel value.
228     */
229    public int makeTree(EvolutionState state, GEIndividual ind, GPTree tree, int position, int treeNum, int threadnum)
230        {
231        int[] countNumberOfChromosomesUsed = {  position  };  // hack, use an array to pass an extra value
232        byte[] genome = ind.genome;
233        GPFunctionSet gpfs = tree.constraints((GPInitializer) state.initializer).functionset;
234        GPNode root;
235
236        try // get the tree, or return an error.
237            {
238            root = makeSubtree(countNumberOfChromosomesUsed, genome, state, gpfs, grammar[treeNum], treeNum, threadnum);
239            }
240        catch (BigTreeException e)
241            {
242            return BIG_TREE_ERROR;
243            }
244
245        if(root == null)
246            {
247            state.output.fatal("Invalid tree: tree #" + treeNum);
248            }
249
250        root.parent = tree;
251        tree.child = root;
252        return countNumberOfChromosomesUsed[0];
253        }
254
255    // thrown by makeSubtree when chromosome is not large enough for the generated tree.
256    class BigTreeException extends RuntimeException { static final long serialVersionUID = 1L; }
257
258    GPNode makeSubtree(int[] index, byte[] genome, EvolutionState es, GPFunctionSet gpfs, GrammarRuleNode rule, int treeNum, int threadnum)
259        {
260        //have we exceeded the length of the genome?  No point in going further.
261        if (index[0] >= genome.length)
262            {
263            throw new BigTreeException();
264            }
265
266        //expand the rule with the chromosome to get a body element
267        int i;
268
269        //key for ERC hashtable look ups is the current index within the genome
270        int key = genome[index[0]];
271
272        //non existant rule got passed in
273        if (rule == null)
274            {
275            es.output.fatal("An undefined rule exists within the grammar.");
276            }
277
278        //more than one rule to consider, pick one based off the genome, and consume the current gene
279        if (rule.getNumChoices() > 1)
280            {
281            //casting to an int should be ok since the biggest these genes can be is a byte
282            i = ((genome[index[0]]) - ((int)(this.minGene(index[0])))) % rule.getNumChoices();
283            index[0]++;
284            }
285        //only 1 rule to consider
286        else
287            {
288            i = 0;
289            }               
290        GrammarNode choice = rule.getChoice(i);         
291
292        // if body is another rule head
293        //look up rule
294        if(choice instanceof GrammarRuleNode)
295            {
296            GrammarRuleNode nextrule = (GrammarRuleNode) choice;
297            return makeSubtree(index, genome, es, gpfs, nextrule, treeNum, threadnum);
298            }                               
299        else //handle functions
300            {
301            GrammarFunctionNode funcgrammarnode = (GrammarFunctionNode) choice;
302
303            GPNode validNode = funcgrammarnode.getGPNodePrototype();
304
305            int numChildren = validNode.children.length;
306            //index 0 is the node itself
307            int numChildrenInGrammar = funcgrammarnode.getNumArguments();
308
309            //does the grammar contain the correct amount of children that the GPNode requires
310            if (numChildren != numChildrenInGrammar)
311                {
312                es.output.fatal("GPNode " + validNode.toStringForHumans() + " requires " + numChildren + " children.  "
313                    + numChildrenInGrammar + " children found in the grammar.");
314                }
315
316            //check to see if it is an ERC node
317            if (validNode instanceof ERC)
318                {               
319                validNode = obtainERC(es, key, genome, threadnum, validNode);
320                }
321            //non ERC node
322            else
323                {
324                validNode = validNode.lightClone();
325                }
326
327            //get the rest.
328            for (int j = 0, childNumber = 0; j < funcgrammarnode.getNumArguments(); j++)
329                {
330                //get and link children to the current GPNode
331                validNode.children[childNumber] = makeSubtree(index, genome, es, gpfs, (GrammarRuleNode)funcgrammarnode.getArgument(j), treeNum, threadnum);
332                if (validNode.children[childNumber] == null)
333                    {
334                    return null;
335                    }
336                childNumber++;
337                }
338            return validNode;
339            }
340        }
341
342    /** Loads an ERC from the ERCBank given the value in the genome.  If there is no such ERC, then one is created and randomized, then added to the bank.
343        The point of this mechanism is to enable ERCs to appear in multiple places in a GPTree. */
344    public GPNode obtainERC(EvolutionState state, int key, byte[] genome, int threadnum, GPNode node)
345        {
346        ArrayList ERCList = (ArrayList) (ERCBank.get(new Integer(key)));
347
348        if (ERCList == null)
349            {
350            ERCList = new ArrayList();
351            ERCBank.put(new Integer(key), ERCList);
352            }
353
354        GPNode dummy = null;
355
356        // search array list for an ERC of the same type we want
357        for (int i = 0; i < ERCList.size(); i++)
358            {
359            dummy = (GPNode) ERCList.get(i);
360
361            // ERC was found inside the arraylist
362            if (dummy.nodeEquivalentTo(node))
363                {
364                return dummy.lightClone();
365                }
366            }
367
368        // erc was not found in the array list lets make one
369        node = node.lightClone();
370        node.resetNode(state, threadnum);
371        ERCList.add(node);
372
373        return node;
374        }
375
376    public Object clone()
377        {
378        GESpecies other = (GESpecies) (super.clone());
379        other.gpspecies = (GPSpecies) (gpspecies.clone());
380        return other;
381        }
382
383    public Parameter defaultBase()
384        {
385        return GEDefaults.base().push(P_GESPECIES);
386        }
387
388    /** Returns the number of elements consumed from the GEIndividual array to produce
389        the tree, else returns -1 if an error occurs, specifically if all elements were
390        consumed and the tree had still not been completed. */
391    public int consumed(EvolutionState state, GEIndividual ind, int threadnum)
392        {
393        // create a dummy individual
394        GPIndividual newind = ((GPIndividual) (gpspecies.i_prototype)).lightClone();
395
396        // do the mapping and return the number consumed
397        return makeTrees(state, ind, newind.trees, threadnum);
398        }
399
400    /** Returns a dummy GPIndividual with a single tree which was built by mapping
401        over the elements of the given GEIndividual.  Null is returned if an error occurs,
402        specifically, if all elements were consumed and the tree had still not been completed. */
403    public GPIndividual map(EvolutionState state, GEIndividual ind, int threadnum)
404        {
405        // create a dummy individual
406        GPIndividual newind = ((GPIndividual) (gpspecies.i_prototype)).lightClone();
407
408        // Do NOT initialize its trees
409
410        // Set the fitness to the ByteVectorIndividual's fitness
411        newind.fitness = ind.fitness;
412        newind.evaluated = false;
413
414        // Set the species to me
415        newind.species = gpspecies;
416
417        // do the mapping
418        if (makeTrees(state, ind, newind.trees, threadnum) < 0)  // error
419            return null;
420        else
421            return newind;
422        }
423    }
Note: See TracBrowser for help on using the repository browser.