[11659] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
[11865] | 3 | using System.Diagnostics;
|
---|
[11659] | 4 | using System.Linq;
|
---|
| 5 | using System.Text;
|
---|
[11865] | 6 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
[11659] | 7 |
|
---|
| 8 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
[11865] | 9 | public class RoyalTreeProblem : ISymbolicExpressionTreeProblem {
|
---|
[11659] | 10 | // Punch: "How Effective Are Multiple Populations in Genetic Programming", 1998
|
---|
[11865] | 11 | // In fact, for a new GP system it is dicult to judge whether it is performing
|
---|
| 12 | // as intended or not, since the programs it generates are not necessarily identical to
|
---|
| 13 | // those generated by other GP systems. This raised two questions: what constitutes
|
---|
| 14 | // a "standard" problem in GP, and how do we rate the performance of a system on
|
---|
| 15 | // such a problem.
|
---|
| 16 | // One of the goals of this research was to create a benchmark problem to test how
|
---|
| 17 | // well a particular GP configuration would perform as compared to other configurations.
|
---|
| 18 | // ...
|
---|
| 19 | // We were interested in addressing the same issues, showing how GP could address
|
---|
| 20 | // difficult problems as well as providing a tunable benchmark for comparing GP con-
|
---|
| 21 | // figurations. Furthermore, we were interested in creating and using this benchmark
|
---|
| 22 | // to test the effectiveness of coarse-grain parallel GP's as compared to single population
|
---|
| 23 | // GP's.
|
---|
| 24 | // ...
|
---|
| 25 | // A suitable benchmark problem which has only a single, unique tree as an answer can remove this obstruction.
|
---|
[11659] | 26 |
|
---|
| 27 | private readonly IGrammar grammar;
|
---|
[11865] | 28 | private readonly char targetRootSymbol;
|
---|
[12099] | 29 | public string Name { get { return "RoyalTree"; } }
|
---|
[11865] | 30 | // the number of levels determines the number of non-terminal symbols
|
---|
| 31 | // non-terminal A has only one argument, non-terminal B has two arguments ...
|
---|
| 32 | // optimal level-e tree has quality 122880,
|
---|
| 33 | // level-a (a) tree has
|
---|
| 34 | // level-f (6) tree "is extremely difficult"
|
---|
| 35 | // level-g (7) tree "never succeeded"
|
---|
| 36 | public RoyalTreeProblem(int levels) {
|
---|
| 37 | if (levels < 1 || levels > 7) throw new ArgumentException();
|
---|
| 38 | var sentenceSymbol = 'S';
|
---|
| 39 | var terminalSymbols = new char[] { 'x', 'y', 'z' };
|
---|
| 40 | // originally non-terminal-symbols (but for the sequential search grammar these are only alternatives
|
---|
| 41 | var nonTerminalSymbols = Enumerable.Range(0, levels).Select(lvl => (char)(lvl + (byte)'A')).ToArray(); // [A, B ... ]
|
---|
| 42 | var arities = Enumerable.Range(1, levels).ToArray(); // [1, 2, ... levels]
|
---|
| 43 | this.targetRootSymbol = char.ToLower(nonTerminalSymbols.Last());
|
---|
| 44 | {
|
---|
| 45 | // grammar for sequential search
|
---|
| 46 | // S -> x | y | z | aS | bSS | cSSS ...
|
---|
| 47 |
|
---|
| 48 | var rules = new List<Tuple<char, string>>();
|
---|
| 49 | foreach (var symb in terminalSymbols) {
|
---|
| 50 | rules.Add(Tuple.Create('S', symb.ToString()));
|
---|
| 51 | }
|
---|
| 52 | for (int ntIdx = 0; ntIdx < nonTerminalSymbols.Length; ntIdx++) {
|
---|
| 53 | var opSymb = char.ToLower(nonTerminalSymbols[ntIdx]);
|
---|
| 54 | var remChain = string.Join("", Enumerable.Repeat('S', arities[ntIdx]));
|
---|
| 55 | rules.Add(Tuple.Create('S', opSymb + remChain));
|
---|
| 56 | }
|
---|
| 57 |
|
---|
| 58 | this.grammar = new Grammar(sentenceSymbol, terminalSymbols.Concat(nonTerminalSymbols.Select(char.ToLower)), new char[] { 'S' }, rules);
|
---|
| 59 | }
|
---|
| 60 |
|
---|
| 61 | {
|
---|
| 62 | // grammar for tree-based GP
|
---|
| 63 | // S -> x | y | z | A | B | C ...
|
---|
| 64 | // A -> S
|
---|
| 65 | // B -> SS
|
---|
| 66 | // C -> SSS
|
---|
| 67 | var rules = new List<Tuple<char, string>>();
|
---|
| 68 | foreach (var symb in terminalSymbols.Concat(nonTerminalSymbols)) {
|
---|
| 69 | rules.Add(Tuple.Create('S', symb.ToString()));
|
---|
| 70 | }
|
---|
| 71 |
|
---|
| 72 | for (int ntIdx = 0; ntIdx < nonTerminalSymbols.Length; ntIdx++) {
|
---|
| 73 | var ntSymb = nonTerminalSymbols[ntIdx];
|
---|
| 74 | var alt = string.Join("", Enumerable.Repeat('S', arities[ntIdx]));
|
---|
| 75 | rules.Add(Tuple.Create(ntSymb, alt));
|
---|
| 76 | }
|
---|
| 77 | this.TreeBasedGPGrammar = new Grammar(sentenceSymbol, terminalSymbols, nonTerminalSymbols.Concat(new char[] { 'S' }), rules);
|
---|
| 78 | }
|
---|
| 79 |
|
---|
| 80 | maxSizeForLevel = new int[8];
|
---|
| 81 | optimalSentencesForLevel = new string[8];
|
---|
| 82 | optimalQualityForLevel = new double[8];
|
---|
| 83 | maxSizeForLevel[0] = 1; // lvl-0 (x | y)
|
---|
| 84 | maxSizeForLevel[1] = 2; // lvl-1 (e.g. ax)
|
---|
| 85 | optimalSentencesForLevel[0] = "x";
|
---|
| 86 | optimalQualityForLevel[0] = 1;
|
---|
| 87 | //optimalSentencesForLevel[1] = "ax";
|
---|
| 88 | //optimalQualityForLevel[1] = 4;
|
---|
| 89 |
|
---|
[11972] | 90 | for (int i = 1; i < levels; i++) {
|
---|
[11865] | 91 | maxSizeForLevel[i] = i * maxSizeForLevel[i - 1] + 1; // e.g. lvl-2: baxax
|
---|
| 92 |
|
---|
| 93 | var opSymb = char.ToLower(nonTerminalSymbols[i - 1]);
|
---|
| 94 | optimalSentencesForLevel[i] = opSymb +
|
---|
| 95 | string.Join("",
|
---|
| 96 | Enumerable.Repeat(optimalSentencesForLevel[i - 1], arities[i - 1]));
|
---|
| 97 |
|
---|
| 98 | optimalQualityForLevel[i] = Evaluate(optimalSentencesForLevel[i]);
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 | // from the paper
|
---|
| 102 | Debug.Assert(maxSizeForLevel[5] == 326);
|
---|
| 103 | Debug.Assert(maxSizeForLevel[6] == 1957);
|
---|
[11659] | 104 | }
|
---|
| 105 |
|
---|
[11865] | 106 | private readonly string[] optimalSentencesForLevel;
|
---|
| 107 | private readonly double[] optimalQualityForLevel;
|
---|
| 108 | private readonly int[] maxSizeForLevel;
|
---|
| 109 |
|
---|
[11732] | 110 | public double BestKnownQuality(int maxLen) {
|
---|
[11865] | 111 | // search for corresponding level
|
---|
| 112 | int level = 0;
|
---|
[11972] | 113 | while (level+1 < maxSizeForLevel.Length && maxSizeForLevel[level + 1] < maxLen) level++;
|
---|
[11865] | 114 | // ASSERT: maxSizeForLevel[level + 1] >= maxLen
|
---|
| 115 | return optimalQualityForLevel[level];
|
---|
[11659] | 116 | }
|
---|
| 117 |
|
---|
| 118 | public IGrammar Grammar {
|
---|
| 119 | get { return grammar; }
|
---|
| 120 | }
|
---|
| 121 |
|
---|
| 122 | public double Evaluate(string sentence) {
|
---|
[11865] | 123 | int pos = 0;
|
---|
| 124 | bool perfect;
|
---|
| 125 | return Evaluate(sentence, ref pos, out perfect);
|
---|
[11659] | 126 | }
|
---|
[11865] | 127 |
|
---|
| 128 | private double Evaluate(string sentence, ref int pos, out bool perfect) {
|
---|
| 129 | const double completeBonus = 2.0;
|
---|
| 130 | double t = 0.0;
|
---|
| 131 | double weight = 0.0;
|
---|
| 132 | double sum = 0.0;
|
---|
| 133 | bool correctRoot = false;
|
---|
| 134 | bool allPerfect = true, tPerfect;
|
---|
| 135 | int arity;
|
---|
| 136 | switch (sentence[pos]) {
|
---|
| 137 | case 'a':
|
---|
| 138 | pos++;
|
---|
| 139 | correctRoot = (sentence[pos] == 'x');
|
---|
| 140 | t = Evaluate(sentence, ref pos, out tPerfect); // ignores the perfect flag (false for terminals)
|
---|
| 141 | weight = GetWeight(correctRoot, true);
|
---|
| 142 | sum = weight * t;
|
---|
| 143 | perfect = correctRoot;
|
---|
| 144 | if (correctRoot) sum *= completeBonus;
|
---|
| 145 | return sum;
|
---|
| 146 | case 'b': {
|
---|
| 147 | arity = 2;
|
---|
| 148 | pos++;
|
---|
| 149 | for (int i = 0; i < arity; i++) {
|
---|
| 150 | correctRoot = (sentence[pos] == 'a');
|
---|
| 151 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
| 152 | weight = GetWeight(correctRoot, tPerfect);
|
---|
| 153 | allPerfect &= correctRoot & tPerfect;
|
---|
| 154 | sum += weight * t;
|
---|
| 155 | }
|
---|
| 156 | perfect = allPerfect;
|
---|
| 157 | if (allPerfect) sum *= completeBonus;
|
---|
| 158 | return sum;
|
---|
| 159 | }
|
---|
| 160 | case 'c': {
|
---|
| 161 | arity = 3;
|
---|
| 162 | sum = 0.0;
|
---|
| 163 | pos++;
|
---|
| 164 | for (int i = 0; i < arity; i++) {
|
---|
| 165 | correctRoot = (sentence[pos] == 'b');
|
---|
| 166 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
| 167 | weight = GetWeight(correctRoot, tPerfect);
|
---|
| 168 | allPerfect &= correctRoot & tPerfect;
|
---|
| 169 | sum += weight * t;
|
---|
| 170 | }
|
---|
| 171 | perfect = allPerfect;
|
---|
| 172 | if (allPerfect) sum *= completeBonus;
|
---|
| 173 | return sum;
|
---|
| 174 | }
|
---|
| 175 | case 'd': {
|
---|
| 176 | arity = 4;
|
---|
| 177 | sum = 0.0;
|
---|
| 178 | pos++;
|
---|
| 179 | for (int i = 0; i < arity; i++) {
|
---|
| 180 | correctRoot = (sentence[pos] == 'c');
|
---|
| 181 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
| 182 | weight = GetWeight(correctRoot, tPerfect);
|
---|
| 183 | allPerfect &= correctRoot & tPerfect;
|
---|
| 184 | sum += weight * t;
|
---|
| 185 | }
|
---|
| 186 | perfect = allPerfect;
|
---|
| 187 | if (allPerfect) sum *= completeBonus;
|
---|
| 188 | return sum;
|
---|
| 189 | }
|
---|
| 190 | case 'e': {
|
---|
| 191 | arity = 5;
|
---|
| 192 | sum = 0.0;
|
---|
| 193 | pos++;
|
---|
| 194 | for (int i = 0; i < arity; i++) {
|
---|
| 195 | correctRoot = (sentence[pos] == 'd');
|
---|
| 196 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
| 197 | weight = GetWeight(correctRoot, tPerfect);
|
---|
| 198 | allPerfect &= correctRoot & tPerfect;
|
---|
| 199 | sum += weight * t;
|
---|
| 200 | }
|
---|
| 201 | perfect = allPerfect;
|
---|
| 202 | if (allPerfect) sum *= completeBonus;
|
---|
| 203 | return sum;
|
---|
| 204 | }
|
---|
| 205 | case 'f': {
|
---|
| 206 | arity = 6;
|
---|
| 207 | sum = 0.0;
|
---|
| 208 | pos++;
|
---|
| 209 | for (int i = 0; i < arity; i++) {
|
---|
| 210 | correctRoot = (sentence[pos] == 'e');
|
---|
| 211 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
| 212 | weight = GetWeight(correctRoot, tPerfect);
|
---|
| 213 | allPerfect &= correctRoot & tPerfect;
|
---|
| 214 | sum += weight * t;
|
---|
| 215 | }
|
---|
| 216 | perfect = allPerfect;
|
---|
| 217 | if (allPerfect) sum *= completeBonus;
|
---|
| 218 | return sum;
|
---|
| 219 | }
|
---|
| 220 | case 'g': {
|
---|
| 221 | arity = 7;
|
---|
| 222 | sum = 0.0;
|
---|
| 223 | pos++;
|
---|
| 224 | for (int i = 0; i < arity; i++) {
|
---|
| 225 | correctRoot = (sentence[pos] == 'f');
|
---|
| 226 | t = Evaluate(sentence, ref pos, out tPerfect);
|
---|
| 227 | weight = GetWeight(correctRoot, tPerfect);
|
---|
| 228 | allPerfect &= correctRoot & tPerfect;
|
---|
| 229 | sum += weight * t;
|
---|
| 230 | }
|
---|
| 231 | perfect = allPerfect;
|
---|
| 232 | if (allPerfect) sum *= completeBonus;
|
---|
| 233 | return sum;
|
---|
| 234 | }
|
---|
| 235 | case 'x':
|
---|
| 236 | pos++;
|
---|
| 237 | perfect = false;
|
---|
| 238 | return 1.0;
|
---|
| 239 | case 'y':
|
---|
| 240 | case 'z':
|
---|
| 241 | pos++;
|
---|
| 242 | perfect = false;
|
---|
| 243 | return 0.0;
|
---|
| 244 | default: throw new ArgumentException();
|
---|
| 245 | }
|
---|
| 246 | }
|
---|
| 247 |
|
---|
| 248 | private double GetWeight(bool correctRoot, bool perfect) {
|
---|
| 249 | const double fullBonus = 2.0;
|
---|
| 250 | const double partialBonus = 1.0;
|
---|
| 251 | const double penalty = 0.33;
|
---|
| 252 | if (correctRoot && perfect) return fullBonus;
|
---|
| 253 | else if (correctRoot) return partialBonus;
|
---|
| 254 | else return penalty;
|
---|
| 255 | }
|
---|
| 256 |
|
---|
[11832] | 257 | public string CanonicalRepresentation(string phrase) {
|
---|
[11793] | 258 | throw new NotImplementedException();
|
---|
[11832] | 259 | return phrase;
|
---|
[11727] | 260 | }
|
---|
[11659] | 261 |
|
---|
[11865] | 262 | public IEnumerable<Feature> GetFeatures(string phrase) {
|
---|
[11832] | 263 | throw new NotImplementedException();
|
---|
| 264 | }
|
---|
[12893] | 265 | public bool IsOptimalPhrase(string phrase) {
|
---|
| 266 | throw new NotImplementedException();
|
---|
| 267 | }
|
---|
[11865] | 268 |
|
---|
| 269 | public IGrammar TreeBasedGPGrammar { get; private set; }
|
---|
| 270 | public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
|
---|
| 271 | var sb = new StringBuilder();
|
---|
| 272 | foreach (var s in tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix()) {
|
---|
| 273 | if (s.Symbol.Name == "S") continue;
|
---|
| 274 | sb.Append(s.Symbol.Name.ToLower());
|
---|
| 275 | }
|
---|
| 276 | return sb.ToString();
|
---|
| 277 | }
|
---|
[11659] | 278 | }
|
---|
| 279 | }
|
---|