1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Diagnostics;
|
---|
4 | using System.Linq;
|
---|
5 | using System.Text;
|
---|
6 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
7 |
|
---|
8 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
9 | public class RoyalTreeProblem : ISymbolicExpressionTreeProblem {
|
---|
10 | // Punch: "How Effective Are Multiple Populations in Genetic Programming", 1998
|
---|
11 | // In fact, for a new GP system it is dicult to judge whether it is performing
|
---|
12 | // as intended or not, since the programs it generates are not necessarily identical to
|
---|
13 | // those generated by other GP systems. This raised two questions: what constitutes
|
---|
14 | // a "standard" problem in GP, and how do we rate the performance of a system on
|
---|
15 | // such a problem.
|
---|
16 | // One of the goals of this research was to create a benchmark problem to test how
|
---|
17 | // well a particular GP configuration would perform as compared to other configurations.
|
---|
18 | // ...
|
---|
19 | // We were interested in addressing the same issues, showing how GP could address
|
---|
20 | // difficult problems as well as providing a tunable benchmark for comparing GP con-
|
---|
21 | // figurations. Furthermore, we were interested in creating and using this benchmark
|
---|
22 | // to test the effectiveness of coarse-grain parallel GP's as compared to single population
|
---|
23 | // GP's.
|
---|
24 | // ...
|
---|
25 | // A suitable benchmark problem which has only a single, unique tree as an answer can remove this obstruction.
|
---|
26 |
|
---|
27 | private readonly IGrammar grammar;
|
---|
28 | private readonly char targetRootSymbol;
|
---|
29 | public string Name { get { return "RoyalTree"; } }
|
---|
30 | // the number of levels determines the number of non-terminal symbols
|
---|
31 | // non-terminal A has only one argument, non-terminal B has two arguments ...
|
---|
32 | // optimal level-e tree has quality 122880,
|
---|
33 | // level-a (a) tree has
|
---|
34 | // level-f (6) tree "is extremely difficult"
|
---|
35 | // level-g (7) tree "never succeeded"
|
---|
36 | public RoyalTreeProblem(int levels) {
|
---|
37 | if (levels < 1 || levels > 7) throw new ArgumentException();
|
---|
38 | var sentenceSymbol = 'S';
|
---|
39 | var terminalSymbols = new char[] { 'x', 'y', 'z' };
|
---|
40 | // originally non-terminal-symbols (but for the sequential search grammar these are only alternatives
|
---|
41 | var nonTerminalSymbols = Enumerable.Range(0, levels).Select(lvl => (char)(lvl + (byte)'A')).ToArray(); // [A, B ... ]
|
---|
42 | var arities = Enumerable.Range(1, levels).ToArray(); // [1, 2, ... levels]
|
---|
43 | this.targetRootSymbol = char.ToLower(nonTerminalSymbols.Last());
|
---|
44 | {
|
---|
45 | // grammar for sequential search
|
---|
46 | // S -> x | y | z | aS | bSS | cSSS ...
|
---|
47 |
|
---|
48 | var rules = new List<Tuple<char, string>>();
|
---|
49 | foreach (var symb in terminalSymbols) {
|
---|
50 | rules.Add(Tuple.Create('S', symb.ToString()));
|
---|
51 | }
|
---|
52 | for (int ntIdx = 0; ntIdx < nonTerminalSymbols.Length; ntIdx++) {
|
---|
53 | var opSymb = char.ToLower(nonTerminalSymbols[ntIdx]);
|
---|
54 | var remChain = string.Join("", Enumerable.Repeat('S', arities[ntIdx]));
|
---|
55 | rules.Add(Tuple.Create('S', opSymb + remChain));
|
---|
56 | }
|
---|
57 |
|
---|
58 | this.grammar = new Grammar(sentenceSymbol, terminalSymbols.Concat(nonTerminalSymbols.Select(char.ToLower)), new char[] { 'S' }, rules);
|
---|
59 | }
|
---|
60 |
|
---|
61 | {
|
---|
62 | // grammar for tree-based GP
|
---|
63 | // S -> x | y | z | A | B | C ...
|
---|
64 | // A -> S
|
---|
65 | // B -> SS
|
---|
66 | // C -> SSS
|
---|
67 | var rules = new List<Tuple<char, string>>();
|
---|
68 | foreach (var symb in terminalSymbols.Concat(nonTerminalSymbols)) {
|
---|
69 | rules.Add(Tuple.Create('S', symb.ToString()));
|
---|
70 | }
|
---|
71 |
|
---|
72 | for (int ntIdx = 0; ntIdx < nonTerminalSymbols.Length; ntIdx++) {
|
---|
73 | var ntSymb = nonTerminalSymbols[ntIdx];
|
---|
74 | var alt = string.Join("", Enumerable.Repeat('S', arities[ntIdx]));
|
---|
75 | rules.Add(Tuple.Create(ntSymb, alt));
|
---|
76 | }
|
---|
77 | this.TreeBasedGPGrammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols.Concat(new char[] { 'S' }), rules);
|
---|
78 | }
|
---|
79 |
|
---|
80 | maxSizeForLevel = new int[8];
|
---|
81 | optimalSentencesForLevel = new string[8];
|
---|
82 | optimalQualityForLevel = new double[8];
|
---|
83 | maxSizeForLevel[0] = 1; // lvl-0 (x | y)
|
---|
84 | maxSizeForLevel[1] = 2; // lvl-1 (e.g. ax)
|
---|
85 | optimalSentencesForLevel[0] = "x";
|
---|
86 | optimalQualityForLevel[0] = 1;
|
---|
87 | //optimalSentencesForLevel[1] = "ax";
|
---|
88 | //optimalQualityForLevel[1] = 4;
|
---|
89 |
|
---|
90 | for (int i = 1; i < levels; i++) {
|
---|
91 | maxSizeForLevel[i] = i * maxSizeForLevel[i - 1] + 1; // e.g. lvl-2: baxax
|
---|
92 |
|
---|
93 | var opSymb = char.ToLower(nonTerminalSymbols[i - 1]);
|
---|
94 | optimalSentencesForLevel[i] = opSymb +
|
---|
95 | string.Join("",
|
---|
96 | Enumerable.Repeat(optimalSentencesForLevel[i - 1], arities[i - 1]));
|
---|
97 |
|
---|
98 | optimalQualityForLevel[i] = Evaluate(optimalSentencesForLevel[i]);
|
---|
99 | }
|
---|
100 |
|
---|
101 | // from the paper
|
---|
102 | Debug.Assert(maxSizeForLevel[5] == 326);
|
---|
103 | Debug.Assert(maxSizeForLevel[6] == 1957);
|
---|
104 | }
|
---|
105 |
|
---|
106 | private readonly string[] optimalSentencesForLevel;
|
---|
107 | private readonly double[] optimalQualityForLevel;
|
---|
108 | private readonly int[] maxSizeForLevel;
|
---|
109 |
|
---|
110 | public double BestKnownQuality(int maxLen) {
|
---|
111 | // search for corresponding level
|
---|
112 | int level = 0;
|
---|
113 | while (level+1 < maxSizeForLevel.Length && maxSizeForLevel[level + 1] < maxLen) level++;
|
---|
114 | // ASSERT: maxSizeForLevel[level + 1] >= maxLen
|
---|
115 | return optimalQualityForLevel[level];
|
---|
116 | }
|
---|
117 |
|
---|
118 | public IGrammar Grammar {
|
---|
119 | get { return grammar; }
|
---|
120 | }
|
---|
121 |
|
---|
122 | public double Evaluate(string sentence) {
|
---|
123 | int pos = 0;
|
---|
124 | bool perfect;
|
---|
125 | return Evaluate(sentence, ref pos, out perfect);
|
---|
126 | }
|
---|
127 |
|
---|
128 | private double Evaluate(string sentence, ref int pos, out bool perfect) {
|
---|
129 | const double completeBonus = 2.0;
|
---|
130 | double t = 0.0;
|
---|
131 | double weight = 0.0;
|
---|
132 | double sum = 0.0;
|
---|
133 | bool correctRoot = false;
|
---|
134 | bool allPerfect = true, tPerfect;
|
---|
135 | int arity;
|
---|
136 | switch (sentence[pos]) {
|
---|
137 | case 'a':
|
---|
138 | pos++;
|
---|
139 | correctRoot = (sentence[pos] == 'x');
|
---|
140 | t = Evaluate(sentence, ref pos, out tPerfect); // ignores the perfect flag (false for terminals)
|
---|
141 | weight = GetWeight(correctRoot, true);
|
---|
142 | sum = weight * t;
|
---|
143 | perfect = correctRoot;
|
---|
144 | if (correctRoot) sum *= completeBonus;
|
---|
145 | return sum;
|
---|
146 | case 'b': {
|
---|
147 | arity = 2;
|
---|
148 | pos++;
|
---|
149 | for (int i = 0; i < arity; i++) {
|
---|
150 | correctRoot = (sentence[pos] == 'a');
|
---|
151 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
152 | weight = GetWeight(correctRoot, tPerfect);
|
---|
153 | allPerfect &= correctRoot & tPerfect;
|
---|
154 | sum += weight * t;
|
---|
155 | }
|
---|
156 | perfect = allPerfect;
|
---|
157 | if (allPerfect) sum *= completeBonus;
|
---|
158 | return sum;
|
---|
159 | }
|
---|
160 | case 'c': {
|
---|
161 | arity = 3;
|
---|
162 | sum = 0.0;
|
---|
163 | pos++;
|
---|
164 | for (int i = 0; i < arity; i++) {
|
---|
165 | correctRoot = (sentence[pos] == 'b');
|
---|
166 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
167 | weight = GetWeight(correctRoot, tPerfect);
|
---|
168 | allPerfect &= correctRoot & tPerfect;
|
---|
169 | sum += weight * t;
|
---|
170 | }
|
---|
171 | perfect = allPerfect;
|
---|
172 | if (allPerfect) sum *= completeBonus;
|
---|
173 | return sum;
|
---|
174 | }
|
---|
175 | case 'd': {
|
---|
176 | arity = 4;
|
---|
177 | sum = 0.0;
|
---|
178 | pos++;
|
---|
179 | for (int i = 0; i < arity; i++) {
|
---|
180 | correctRoot = (sentence[pos] == 'c');
|
---|
181 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
182 | weight = GetWeight(correctRoot, tPerfect);
|
---|
183 | allPerfect &= correctRoot & tPerfect;
|
---|
184 | sum += weight * t;
|
---|
185 | }
|
---|
186 | perfect = allPerfect;
|
---|
187 | if (allPerfect) sum *= completeBonus;
|
---|
188 | return sum;
|
---|
189 | }
|
---|
190 | case 'e': {
|
---|
191 | arity = 5;
|
---|
192 | sum = 0.0;
|
---|
193 | pos++;
|
---|
194 | for (int i = 0; i < arity; i++) {
|
---|
195 | correctRoot = (sentence[pos] == 'd');
|
---|
196 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
197 | weight = GetWeight(correctRoot, tPerfect);
|
---|
198 | allPerfect &= correctRoot & tPerfect;
|
---|
199 | sum += weight * t;
|
---|
200 | }
|
---|
201 | perfect = allPerfect;
|
---|
202 | if (allPerfect) sum *= completeBonus;
|
---|
203 | return sum;
|
---|
204 | }
|
---|
205 | case 'f': {
|
---|
206 | arity = 6;
|
---|
207 | sum = 0.0;
|
---|
208 | pos++;
|
---|
209 | for (int i = 0; i < arity; i++) {
|
---|
210 | correctRoot = (sentence[pos] == 'e');
|
---|
211 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
212 | weight = GetWeight(correctRoot, tPerfect);
|
---|
213 | allPerfect &= correctRoot & tPerfect;
|
---|
214 | sum += weight * t;
|
---|
215 | }
|
---|
216 | perfect = allPerfect;
|
---|
217 | if (allPerfect) sum *= completeBonus;
|
---|
218 | return sum;
|
---|
219 | }
|
---|
220 | case 'g': {
|
---|
221 | arity = 7;
|
---|
222 | sum = 0.0;
|
---|
223 | pos++;
|
---|
224 | for (int i = 0; i < arity; i++) {
|
---|
225 | correctRoot = (sentence[pos] == 'f');
|
---|
226 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
227 | weight = GetWeight(correctRoot, tPerfect);
|
---|
228 | allPerfect &= correctRoot & tPerfect;
|
---|
229 | sum += weight * t;
|
---|
230 | }
|
---|
231 | perfect = allPerfect;
|
---|
232 | if (allPerfect) sum *= completeBonus;
|
---|
233 | return sum;
|
---|
234 | }
|
---|
235 | case 'x':
|
---|
236 | pos++;
|
---|
237 | perfect = false;
|
---|
238 | return 1.0;
|
---|
239 | case 'y':
|
---|
240 | case 'z':
|
---|
241 | pos++;
|
---|
242 | perfect = false;
|
---|
243 | return 0.0;
|
---|
244 | default: throw new ArgumentException();
|
---|
245 | }
|
---|
246 | }
|
---|
247 |
|
---|
248 | private double GetWeight(bool correctRoot, bool perfect) {
|
---|
249 | const double fullBonus = 2.0;
|
---|
250 | const double partialBonus = 1.0;
|
---|
251 | const double penalty = 0.33;
|
---|
252 | if (correctRoot && perfect) return fullBonus;
|
---|
253 | else if (correctRoot) return partialBonus;
|
---|
254 | else return penalty;
|
---|
255 | }
|
---|
256 |
|
---|
257 | public string CanonicalRepresentation(string phrase) {
|
---|
258 | throw new NotImplementedException();
|
---|
259 | return phrase;
|
---|
260 | }
|
---|
261 |
|
---|
262 | public IEnumerable<Feature> GetFeatures(string phrase) {
|
---|
263 | throw new NotImplementedException();
|
---|
264 | }
|
---|
265 | public bool IsOptimalPhrase(string phrase) {
|
---|
266 | throw new NotImplementedException();
|
---|
267 | }
|
---|
268 |
|
---|
269 | public IGrammar TreeBasedGPGrammar { get; private set; }
|
---|
270 | public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
|
---|
271 | var sb = new StringBuilder();
|
---|
272 | foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
|
---|
273 | if (s.Symbol.Name == "S") continue;
|
---|
274 | sb.Append(s.Symbol.Name.ToLower());
|
---|
275 | }
|
---|
276 | return sb.ToString();
|
---|
277 | }
|
---|
278 | }
|
---|
279 | }
|
---|