1 | /* |
---|
2 | Copyright 20010 by Sean Luke and George Mason University |
---|
3 | Licensed under the Academic Free License version 3.0 |
---|
4 | See the file "LICENSE" for more information |
---|
5 | */ |
---|
6 | package ec.gp.ge; |
---|
7 | |
---|
8 | import java.io.BufferedReader; |
---|
9 | import java.io.File; |
---|
10 | import java.io.FileNotFoundException; |
---|
11 | import java.io.FileReader; |
---|
12 | import java.io.IOException; |
---|
13 | import java.util.ArrayList; |
---|
14 | import java.util.HashMap; |
---|
15 | import java.util.List; |
---|
16 | |
---|
17 | import ec.gp.*; |
---|
18 | import ec.*; |
---|
19 | import ec.vector.*; |
---|
20 | import ec.util.*; |
---|
21 | |
---|
22 | import java.util.Arrays; |
---|
23 | import java.util.regex.Matcher; |
---|
24 | import java.util.regex.Pattern; |
---|
25 | |
---|
26 | /* |
---|
27 | * GESpecies.java |
---|
28 | * |
---|
29 | * Created: Sun Dec 5 11:33:43 EST 2010 |
---|
30 | * By: Eric Kangas, Joseph Zelibor III, Houston Mooers, and Sean Luke |
---|
31 | * |
---|
32 | */ |
---|
33 | |
---|
34 | /** |
---|
35 | * <p>GESpecies generates GPIndividuals from GEIndividuals through the application of a grammar parse graph |
---|
36 | * computed by the GrammarParser. |
---|
37 | * |
---|
38 | * <p>GESpecies uses a <b>GrammarParser</b> to do its dirty work. This parser's job is to take a grammar (in the form of a BufferedReader) |
---|
39 | * and convert it to a tree of GrammarNodes which define the parse graph of the grammar. The GESpecies then interprets his parse graph |
---|
40 | * according to the values in the GEIndividual to produce the equivalent GPIndividual, which is then evaluated. |
---|
41 | * |
---|
42 | * <p>To do this, GESpecies relies on a subsidiary GPSpecies which defines the GPIndividual and various GPFunctionSets from which to |
---|
43 | * build the parser. This is a grand hack -- the GPSpecies does not know it's being used this way, and so we must provide various dummy |
---|
44 | * parameters to keep the GPSpecies happy even though they'll never be used. |
---|
45 | * |
---|
46 | * <p>If you are daring, you can replace the GrammarParser with one of your own to customize the parse structure and grammar. |
---|
47 | * |
---|
48 | * <p><b>ECJ's Default GE Grammar</b> GE traditionally can use any grammar, and builds parse graphs from that. For simplicity, and in order to |
---|
49 | * remain as compatable as possible with ECJ's existing GP facilities (and GP tradition), ECJ only uses a single Lisp-like grammar which |
---|
50 | * generates standard ECJ trees. This doesn't lose much in generality as the grammar is quite genral. |
---|
51 | * |
---|
52 | * <p>The grammar assumes that expansion points are enclosed in <> and functions are enclosed in (). For example: |
---|
53 | * |
---|
54 | * <p><tt> |
---|
55 | * # This is a comment |
---|
56 | * <prog> ::= <op><br> |
---|
57 | * <op> ::= (if-food-ahead <op> <op>)<br> |
---|
58 | * <op> ::= (progn2 <op> <op>)<br> |
---|
59 | * <op> ::= (progn3 <op> <op> <op>)<br> |
---|
60 | * <op> ::= (left) | (right) | (move)<br> |
---|
61 | * </tt> |
---|
62 | * |
---|
63 | * <p>alternatively the grammar could also be writen in the following format: |
---|
64 | * |
---|
65 | * <p><tt> |
---|
66 | * <prog> ::= <op><br> |
---|
67 | * <op> ::= (if-food-ahead <op> <op>) | (progn2 <op> <op>) | (progn3 <op> <op> <op>) | (left) | (right) | (move)<br> |
---|
68 | * </tt> |
---|
69 | * |
---|
70 | * <p>Note that you can use several lines to define the same grammar rule: for example, <tt><op></tt> was defined by several lines when |
---|
71 | * it could have consisted of several elements separated by vertical pipes ( <tt>|</tt> ). Either way is fine, or a combination of both. |
---|
72 | * |
---|
73 | * <p>GPNodes are included in the grammar by using their name. This includes ERCs, ADFs, ADMs, and ADFArguments, which should all work just fine. |
---|
74 | * For example, since most ERC GPNodes are simply named "ERC", if you have only one ERC GPNode in your function set, you can just use <tt>(ERC)</tt> |
---|
75 | * in your grammar. |
---|
76 | * |
---|
77 | * <p>Once the gammar file has been created and setup has been run trees can the be created using the genome (chromosome) of a GEIndividual. |
---|
78 | * A genome of an individual is an array of random integers each of which are one byte long. These numbers are used when a decision point |
---|
79 | * (a rule having more that one choice) is reached within the grammar. Once a particular gene (index) in the genome has been used it will |
---|
80 | * not be used again (this may change) when creating the tree. |
---|
81 | * |
---|
82 | * <p>For example:<br> |
---|
83 | * number of chromosomes used = 0<br> |
---|
84 | * genome = {23, 654, 86}<br> |
---|
85 | * the current rule we are considering is <op>.<br> |
---|
86 | * %lt;op> can map into one of the following: (if-food-ahead <op> <op>) | (progn2 <op> <op>) | (progn3 <op> <op> <op>) |
---|
87 | * | (left) | (right) | (move)<br> |
---|
88 | * Since the rule <op> has more than one choice that it can map to, we must consult the genome to decide which choice to take. In this case |
---|
89 | * the number of chromosomes used is 0 so genome[0] is used and number of chromosomes used is incremented. Since values in the genome can |
---|
90 | * be negitive values they are offset by 128 (max negitive of a byte) giving us a value from 0-255. A modulus is performed on this resulting |
---|
91 | * number by the number of choices present for the given rule. In the above example since we are using genome[0] the resulting operation would |
---|
92 | * look like: 23+128=151, number of choices for <op> = 6, 151%6=1 so we use choices[1] which is: (progn2 <op> <op>). If all the genes |
---|
93 | * in a genome are used and the tree is still incompete an invalid tree error is returned. |
---|
94 | * |
---|
95 | * <p>Each node in the tree is a GPNode and trees are constructed depth first. |
---|
96 | * |
---|
97 | * |
---|
98 | * <p><b>Parameters</b><br> |
---|
99 | * <table> |
---|
100 | * <tr><td valign=top><i>base.</i><tt>file</tt><br> |
---|
101 | * <font size=-1>String</font></td> |
---|
102 | * <td valign=top>(the file is where the rules of the gammar are stored)</td></tr> |
---|
103 | * |
---|
104 | * <tr><td valign=top><i>base.</i><tt>gp-species</tt><br> |
---|
105 | * <font size=-1>classname, inherits and != ec.gp.GPSpecies</font></td> |
---|
106 | * <td valign=top>(the GPSpecies subservient to the GESpecies)</td></tr> |
---|
107 | * |
---|
108 | * <tr><td valign=top><i>base.</i><tt>parser</tt><br> |
---|
109 | * <font size=-1>classname, inherits and != ec.gp.ge.GrammarParser</font></td> |
---|
110 | * <td valign=top>(the GrammarParser used by the GESpecies)</td></tr> |
---|
111 | * |
---|
112 | * </table> |
---|
113 | * |
---|
114 | * <p><b>Default Base</b><br> |
---|
115 | * ec.gp.ge.GESpecies |
---|
116 | * |
---|
117 | * @author Joseph Zelibor III, Eric Kangas, Houston Mooers, and Sean Luke |
---|
118 | * @version 1.0 |
---|
119 | */ |
---|
120 | |
---|
121 | public class GESpecies extends IntegerVectorSpecies |
---|
122 | { |
---|
123 | public static final String P_GESPECIES = "species"; |
---|
124 | public static final String P_FILE = "file"; |
---|
125 | public static final String P_GPSPECIES = "gp-species"; |
---|
126 | public static final String P_PARSER = "parser"; |
---|
127 | |
---|
128 | /* Return value which denotes that the tree has grown too large. */ |
---|
129 | public static final int BIG_TREE_ERROR = -1; |
---|
130 | |
---|
131 | /** The GPSpecies subsidiary to GESpecies. */ |
---|
132 | public GPSpecies gpspecies; |
---|
133 | |
---|
134 | /** All the ERCs created so far. */ |
---|
135 | public HashMap ERCBank; |
---|
136 | |
---|
137 | /** The parsed grammars. */ |
---|
138 | public GrammarRuleNode[] grammar; |
---|
139 | |
---|
140 | /** The prototypical parser used to parse the grammars. */ |
---|
141 | public GrammarParser parser_prototype; |
---|
142 | |
---|
143 | public void setup(final EvolutionState state, final Parameter base) |
---|
144 | { |
---|
145 | super.setup(state, base); |
---|
146 | |
---|
147 | Parameter p = base; |
---|
148 | Parameter def = defaultBase(); |
---|
149 | |
---|
150 | p = base.push(P_GPSPECIES); |
---|
151 | gpspecies = (GPSpecies) (state.parameters.getInstanceForParameterEq(p, def.push(P_GPSPECIES), GPSpecies.class)); |
---|
152 | gpspecies.setup(state, p); |
---|
153 | |
---|
154 | // check to make sure that our individual prototype is a GPIndividual |
---|
155 | if (!(i_prototype instanceof ByteVectorIndividual)) |
---|
156 | { |
---|
157 | state.output.fatal("The Individual class for the Species " + getClass().getName() + " is must be a subclass of ec.gp.ge.GEIndividual.", base); |
---|
158 | } |
---|
159 | |
---|
160 | ERCBank = new HashMap(); |
---|
161 | |
---|
162 | // load the grammars, one per ADF tree |
---|
163 | GPIndividual gpi = (GPIndividual) (gpspecies.i_prototype); |
---|
164 | GPTree[] trees = gpi.trees; |
---|
165 | int numGrammars = trees.length; |
---|
166 | |
---|
167 | parser_prototype = (GrammarParser) (state.parameters.getInstanceForParameterEq(base.push(P_PARSER), def.push(P_PARSER), GrammarParser.class)); |
---|
168 | |
---|
169 | grammar = new GrammarRuleNode[numGrammars]; |
---|
170 | for(int i = 0; i < numGrammars; i++) |
---|
171 | { |
---|
172 | p = base.push(P_FILE); |
---|
173 | def = defaultBase(); |
---|
174 | |
---|
175 | File grammarFile = state.parameters.getFile(p, def.push(P_FILE).push("" + i)); |
---|
176 | if(grammarFile == null) |
---|
177 | { |
---|
178 | state.output.fatal("Error retrieving grammar file(s): " + def.toString() + "."+ P_FILE + "." + i + " is undefined."); |
---|
179 | } |
---|
180 | |
---|
181 | try |
---|
182 | { |
---|
183 | GPFunctionSet gpfs = trees[i].constraints((GPInitializer) state.initializer).functionset; |
---|
184 | GrammarParser grammarparser = (GrammarParser)(parser_prototype.clone()); |
---|
185 | grammar[i] = grammarparser.parseRules(state, new BufferedReader(new FileReader(grammarFile)), gpfs); |
---|
186 | } |
---|
187 | catch (FileNotFoundException e) |
---|
188 | { |
---|
189 | state.output.fatal("Error retrieving grammar file(s): " + def.toString() + "."+ P_FILE + "." + i + " does not exist or cannot be opened."); |
---|
190 | } |
---|
191 | } |
---|
192 | } |
---|
193 | |
---|
194 | |
---|
195 | /** |
---|
196 | * creates all of an individual's trees |
---|
197 | * @param state Evolution state |
---|
198 | * @param trees array of trees for the individual |
---|
199 | * @param ind the GEIndividual |
---|
200 | * @param threadnum tread number |
---|
201 | * @return number of chromosomes consumed |
---|
202 | */ |
---|
203 | public int makeTrees(EvolutionState state, GEIndividual ind, GPTree[] trees, int threadnum) |
---|
204 | { |
---|
205 | int position = 0; |
---|
206 | |
---|
207 | for (int i = 0; i < trees.length; i++) |
---|
208 | { |
---|
209 | //cannot complete one of the trees with the given chromosome |
---|
210 | if(position < 0) |
---|
211 | return BIG_TREE_ERROR; |
---|
212 | |
---|
213 | position = makeTree(state, ind, trees[i], position, i, threadnum); |
---|
214 | } |
---|
215 | |
---|
216 | return position; |
---|
217 | } |
---|
218 | |
---|
219 | /** |
---|
220 | * makeTree, edits the tree that its given by adding a root (and all subtrees attached) |
---|
221 | * @param state |
---|
222 | * @param ind |
---|
223 | * @param tree |
---|
224 | * @param position |
---|
225 | * @param treeNum |
---|
226 | * @param threadnum |
---|
227 | * @return the number of chromosomes used, or an BIG_TREE_ERROR sentinel value. |
---|
228 | */ |
---|
229 | public int makeTree(EvolutionState state, GEIndividual ind, GPTree tree, int position, int treeNum, int threadnum) |
---|
230 | { |
---|
231 | int[] countNumberOfChromosomesUsed = { position }; // hack, use an array to pass an extra value |
---|
232 | byte[] genome = ind.genome; |
---|
233 | GPFunctionSet gpfs = tree.constraints((GPInitializer) state.initializer).functionset; |
---|
234 | GPNode root; |
---|
235 | |
---|
236 | try // get the tree, or return an error. |
---|
237 | { |
---|
238 | root = makeSubtree(countNumberOfChromosomesUsed, genome, state, gpfs, grammar[treeNum], treeNum, threadnum); |
---|
239 | } |
---|
240 | catch (BigTreeException e) |
---|
241 | { |
---|
242 | return BIG_TREE_ERROR; |
---|
243 | } |
---|
244 | |
---|
245 | if(root == null) |
---|
246 | { |
---|
247 | state.output.fatal("Invalid tree: tree #" + treeNum); |
---|
248 | } |
---|
249 | |
---|
250 | root.parent = tree; |
---|
251 | tree.child = root; |
---|
252 | return countNumberOfChromosomesUsed[0]; |
---|
253 | } |
---|
254 | |
---|
255 | // thrown by makeSubtree when chromosome is not large enough for the generated tree. |
---|
256 | class BigTreeException extends RuntimeException { static final long serialVersionUID = 1L; } |
---|
257 | |
---|
258 | GPNode makeSubtree(int[] index, byte[] genome, EvolutionState es, GPFunctionSet gpfs, GrammarRuleNode rule, int treeNum, int threadnum) |
---|
259 | { |
---|
260 | //have we exceeded the length of the genome? No point in going further. |
---|
261 | if (index[0] >= genome.length) |
---|
262 | { |
---|
263 | throw new BigTreeException(); |
---|
264 | } |
---|
265 | |
---|
266 | //expand the rule with the chromosome to get a body element |
---|
267 | int i; |
---|
268 | |
---|
269 | //key for ERC hashtable look ups is the current index within the genome |
---|
270 | int key = genome[index[0]]; |
---|
271 | |
---|
272 | //non existant rule got passed in |
---|
273 | if (rule == null) |
---|
274 | { |
---|
275 | es.output.fatal("An undefined rule exists within the grammar."); |
---|
276 | } |
---|
277 | |
---|
278 | //more than one rule to consider, pick one based off the genome, and consume the current gene |
---|
279 | if (rule.getNumChoices() > 1) |
---|
280 | { |
---|
281 | //casting to an int should be ok since the biggest these genes can be is a byte |
---|
282 | i = ((genome[index[0]]) - ((int)(this.minGene(index[0])))) % rule.getNumChoices(); |
---|
283 | index[0]++; |
---|
284 | } |
---|
285 | //only 1 rule to consider |
---|
286 | else |
---|
287 | { |
---|
288 | i = 0; |
---|
289 | } |
---|
290 | GrammarNode choice = rule.getChoice(i); |
---|
291 | |
---|
292 | // if body is another rule head |
---|
293 | //look up rule |
---|
294 | if(choice instanceof GrammarRuleNode) |
---|
295 | { |
---|
296 | GrammarRuleNode nextrule = (GrammarRuleNode) choice; |
---|
297 | return makeSubtree(index, genome, es, gpfs, nextrule, treeNum, threadnum); |
---|
298 | } |
---|
299 | else //handle functions |
---|
300 | { |
---|
301 | GrammarFunctionNode funcgrammarnode = (GrammarFunctionNode) choice; |
---|
302 | |
---|
303 | GPNode validNode = funcgrammarnode.getGPNodePrototype(); |
---|
304 | |
---|
305 | int numChildren = validNode.children.length; |
---|
306 | //index 0 is the node itself |
---|
307 | int numChildrenInGrammar = funcgrammarnode.getNumArguments(); |
---|
308 | |
---|
309 | //does the grammar contain the correct amount of children that the GPNode requires |
---|
310 | if (numChildren != numChildrenInGrammar) |
---|
311 | { |
---|
312 | es.output.fatal("GPNode " + validNode.toStringForHumans() + " requires " + numChildren + " children. " |
---|
313 | + numChildrenInGrammar + " children found in the grammar."); |
---|
314 | } |
---|
315 | |
---|
316 | //check to see if it is an ERC node |
---|
317 | if (validNode instanceof ERC) |
---|
318 | { |
---|
319 | validNode = obtainERC(es, key, genome, threadnum, validNode); |
---|
320 | } |
---|
321 | //non ERC node |
---|
322 | else |
---|
323 | { |
---|
324 | validNode = validNode.lightClone(); |
---|
325 | } |
---|
326 | |
---|
327 | //get the rest. |
---|
328 | for (int j = 0, childNumber = 0; j < funcgrammarnode.getNumArguments(); j++) |
---|
329 | { |
---|
330 | //get and link children to the current GPNode |
---|
331 | validNode.children[childNumber] = makeSubtree(index, genome, es, gpfs, (GrammarRuleNode)funcgrammarnode.getArgument(j), treeNum, threadnum); |
---|
332 | if (validNode.children[childNumber] == null) |
---|
333 | { |
---|
334 | return null; |
---|
335 | } |
---|
336 | childNumber++; |
---|
337 | } |
---|
338 | return validNode; |
---|
339 | } |
---|
340 | } |
---|
341 | |
---|
342 | /** Loads an ERC from the ERCBank given the value in the genome. If there is no such ERC, then one is created and randomized, then added to the bank. |
---|
343 | The point of this mechanism is to enable ERCs to appear in multiple places in a GPTree. */ |
---|
344 | public GPNode obtainERC(EvolutionState state, int key, byte[] genome, int threadnum, GPNode node) |
---|
345 | { |
---|
346 | ArrayList ERCList = (ArrayList) (ERCBank.get(new Integer(key))); |
---|
347 | |
---|
348 | if (ERCList == null) |
---|
349 | { |
---|
350 | ERCList = new ArrayList(); |
---|
351 | ERCBank.put(new Integer(key), ERCList); |
---|
352 | } |
---|
353 | |
---|
354 | GPNode dummy = null; |
---|
355 | |
---|
356 | // search array list for an ERC of the same type we want |
---|
357 | for (int i = 0; i < ERCList.size(); i++) |
---|
358 | { |
---|
359 | dummy = (GPNode) ERCList.get(i); |
---|
360 | |
---|
361 | // ERC was found inside the arraylist |
---|
362 | if (dummy.nodeEquivalentTo(node)) |
---|
363 | { |
---|
364 | return dummy.lightClone(); |
---|
365 | } |
---|
366 | } |
---|
367 | |
---|
368 | // erc was not found in the array list lets make one |
---|
369 | node = node.lightClone(); |
---|
370 | node.resetNode(state, threadnum); |
---|
371 | ERCList.add(node); |
---|
372 | |
---|
373 | return node; |
---|
374 | } |
---|
375 | |
---|
376 | public Object clone() |
---|
377 | { |
---|
378 | GESpecies other = (GESpecies) (super.clone()); |
---|
379 | other.gpspecies = (GPSpecies) (gpspecies.clone()); |
---|
380 | return other; |
---|
381 | } |
---|
382 | |
---|
383 | public Parameter defaultBase() |
---|
384 | { |
---|
385 | return GEDefaults.base().push(P_GESPECIES); |
---|
386 | } |
---|
387 | |
---|
388 | /** Returns the number of elements consumed from the GEIndividual array to produce |
---|
389 | the tree, else returns -1 if an error occurs, specifically if all elements were |
---|
390 | consumed and the tree had still not been completed. */ |
---|
391 | public int consumed(EvolutionState state, GEIndividual ind, int threadnum) |
---|
392 | { |
---|
393 | // create a dummy individual |
---|
394 | GPIndividual newind = ((GPIndividual) (gpspecies.i_prototype)).lightClone(); |
---|
395 | |
---|
396 | // do the mapping and return the number consumed |
---|
397 | return makeTrees(state, ind, newind.trees, threadnum); |
---|
398 | } |
---|
399 | |
---|
400 | /** Returns a dummy GPIndividual with a single tree which was built by mapping |
---|
401 | over the elements of the given GEIndividual. Null is returned if an error occurs, |
---|
402 | specifically, if all elements were consumed and the tree had still not been completed. */ |
---|
403 | public GPIndividual map(EvolutionState state, GEIndividual ind, int threadnum) |
---|
404 | { |
---|
405 | // create a dummy individual |
---|
406 | GPIndividual newind = ((GPIndividual) (gpspecies.i_prototype)).lightClone(); |
---|
407 | |
---|
408 | // Do NOT initialize its trees |
---|
409 | |
---|
410 | // Set the fitness to the ByteVectorIndividual's fitness |
---|
411 | newind.fitness = ind.fitness; |
---|
412 | newind.evaluated = false; |
---|
413 | |
---|
414 | // Set the species to me |
---|
415 | newind.species = gpspecies; |
---|
416 | |
---|
417 | // do the mapping |
---|
418 | if (makeTrees(state, ind, newind.trees, threadnum) < 0) // error |
---|
419 | return null; |
---|
420 | else |
---|
421 | return newind; |
---|
422 | } |
---|
423 | } |
---|