1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Diagnostics;
|
---|
4 | using System.Linq;
|
---|
5 | using System.Text;
|
---|
6 | using System.Text.RegularExpressions;
|
---|
7 | using HeuristicLab.Common;
|
---|
8 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
9 |
|
---|
10 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
11 |
|
---|
12 | // Predicting Prime Numbers Using Cartesian Genetic Programming, James Alfred Walker and Julian Francis Miller
|
---|
13 | // "Euler’s polynomial was the inspiration behind one of the GECCO competitions
|
---|
14 | // in 2006. The aim of the GECCO Prime Prediction competition (and some
|
---|
15 | // of the work in this paper) was to produce a polynomial f(i) with integer coeffi-
|
---|
16 | // cients, such that given an integer input, i, it produces the i
|
---|
17 | // th prime number, p(i),
|
---|
18 | // for the largest possible value of i. For example, f(1) = 2, f(2) = 3, f(3) = 5,
|
---|
19 | // f(4) = 7. Therefore, the function f(i) must produce consecutive prime numbers
|
---|
20 | // for consecutive values of i. The requirement that the polynomial must not only
|
---|
21 | // produce primes for consecutive input values, but also that the primes themselves
|
---|
22 | // must be consecutive, makes the problem considerably more challenging
|
---|
23 | // than mathematicians have previously considered. The two approaches described
|
---|
24 | // in Section 4.2 were entered in the GECCO Prime Prediction competition and
|
---|
25 | // were ranked second overall. The winning entry evolved floating point co-efficients
|
---|
26 | // of a polynomial using a Genetic Algorithm (GA), where the output of the polynomial
|
---|
27 | // was rounded to produce the prime numbers for consecutive values of i.
|
---|
28 | // However, the winning entry was only able to predict correctly a few consecutive
|
---|
29 | // prime numbers (9 in total). Unfortunately, the details regarding this have not
|
---|
30 | // been published."
|
---|
31 |
|
---|
32 | public class PrimePolynomialProblem : ISymbolicExpressionTreeProblem {
|
---|
33 | // C represents Koza-style ERC (the symbol is the index of the ERC), the values are initialized below
|
---|
34 | private const string grammarString = @"
|
---|
35 | G(E):
|
---|
36 | E -> a | C | a+E | a-E | a*E | (E) | C+E | C-E | C*E
|
---|
37 | C -> 0..9
|
---|
38 | ";
|
---|
39 |
|
---|
40 | // S .. Sum (+), N .. Neg. sum (-), P .. Product (*), D .. Division (%)
|
---|
41 | private const string treeGrammarString = @"
|
---|
42 | G(E):
|
---|
43 | E -> a | C | S | N | P | D
|
---|
44 | S -> EE | EEE
|
---|
45 | N -> EE | EEE
|
---|
46 | P -> EE | EEE
|
---|
47 | D -> EE
|
---|
48 | C -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
|
---|
49 | ";
|
---|
50 |
|
---|
51 | private readonly IGrammar grammar;
|
---|
52 | private double[] erc;
|
---|
53 | private double[] vars;
|
---|
54 | // source http://primes.utm.edu/lists/small/1000.txt
|
---|
55 | private double[] primes = new double[]
|
---|
56 | {
|
---|
57 | 2, 3, 5, 7, 11, 13, 17, 19, 23, 29,
|
---|
58 | 31, 37, 41, 43, 47, 53, 59, 61, 67, 71,
|
---|
59 | 73, 79, 83, 89, 97, 101, 103, 107, 109, 113,
|
---|
60 | 127, 131, 137, 139, 149, 151, 157, 163, 167, 173,
|
---|
61 | 179, 181, 191, 193, 197, 199, 211, 223, 227, 229,
|
---|
62 | 233, 239, 241, 251, 257, 263, 269, 271, 277, 281,
|
---|
63 | 283, 293, 307, 311, 313, 317, 331, 337, 347, 349,
|
---|
64 | 353, 359, 367, 373, 379, 383, 389, 397, 401, 409,
|
---|
65 | 419, 421, 431, 433, 439, 443, 449, 457, 461, 463,
|
---|
66 | 467, 479, 487, 491, 499, 503, 509, 521, 523, 541,
|
---|
67 | 547, 557, 563, 569, 571, 577, 587, 593, 599, 601,
|
---|
68 | 607, 613, 617, 619, 631, 641, 643, 647, 653, 659,
|
---|
69 | 661, 673, 677, 683, 691, 701, 709, 719, 727, 733,
|
---|
70 | 739, 743, 751, 757, 761, 769, 773, 787, 797, 809,
|
---|
71 | 811, 821, 823, 827, 829, 839, 853, 857, 859, 863,
|
---|
72 | 877, 881, 883, 887, 907, 911, 919, 929, 937, 941,
|
---|
73 | 947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013,
|
---|
74 | 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069,
|
---|
75 | 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151,
|
---|
76 | 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223,
|
---|
77 | 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, 1289, 1291,
|
---|
78 | 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373,
|
---|
79 | 1381, 1399, 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451,
|
---|
80 | 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511,
|
---|
81 | 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583,
|
---|
82 | 1597, 1601, 1607, 1609, 1613, 1619, 1621, 1627, 1637, 1657,
|
---|
83 | 1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733,
|
---|
84 | 1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811,
|
---|
85 | 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877, 1879, 1889,
|
---|
86 | 1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987,
|
---|
87 | 1993, 1997, 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053,
|
---|
88 | 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129,
|
---|
89 | 2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213,
|
---|
90 | 2221, 2237, 2239, 2243, 2251, 2267, 2269, 2273, 2281, 2287,
|
---|
91 | 2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357,
|
---|
92 | 2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423,
|
---|
93 | 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503, 2521, 2531,
|
---|
94 | 2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617,
|
---|
95 | 2621, 2633, 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687,
|
---|
96 | 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741,
|
---|
97 | 2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819,
|
---|
98 | 2833, 2837, 2843, 2851, 2857, 2861, 2879, 2887, 2897, 2903,
|
---|
99 | 2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999,
|
---|
100 | 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079,
|
---|
101 | 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167, 3169, 3181,
|
---|
102 | 3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257,
|
---|
103 | 3259, 3271, 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331,
|
---|
104 | 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413,
|
---|
105 | 3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511,
|
---|
106 | 3517, 3527, 3529, 3533, 3539, 3541, 3547, 3557, 3559, 3571,
|
---|
107 | 3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643,
|
---|
108 | 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727,
|
---|
109 | 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797, 3803, 3821,
|
---|
110 | 3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907,
|
---|
111 | 3911, 3917, 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989,
|
---|
112 | 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057,
|
---|
113 | 4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139,
|
---|
114 | 4153, 4157, 4159, 4177, 4201, 4211, 4217, 4219, 4229, 4231,
|
---|
115 | 4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297,
|
---|
116 | 4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409,
|
---|
117 | 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481, 4483, 4493,
|
---|
118 | 4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583,
|
---|
119 | 4591, 4597, 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657,
|
---|
120 | 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751,
|
---|
121 | 4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831,
|
---|
122 | 4861, 4871, 4877, 4889, 4903, 4909, 4919, 4931, 4933, 4937,
|
---|
123 | 4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003,
|
---|
124 | 5009, 5011, 5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087,
|
---|
125 | 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167, 5171, 5179,
|
---|
126 | 5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279,
|
---|
127 | 5281, 5297, 5303, 5309, 5323, 5333, 5347, 5351, 5381, 5387,
|
---|
128 | 5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443,
|
---|
129 | 5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521,
|
---|
130 | 5527, 5531, 5557, 5563, 5569, 5573, 5581, 5591, 5623, 5639,
|
---|
131 | 5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693,
|
---|
132 | 5701, 5711, 5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791,
|
---|
133 | 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849, 5851, 5857,
|
---|
134 | 5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939,
|
---|
135 | 5953, 5981, 5987, 6007, 6011, 6029, 6037, 6043, 6047, 6053,
|
---|
136 | 6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133,
|
---|
137 | 6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221,
|
---|
138 | 6229, 6247, 6257, 6263, 6269, 6271, 6277, 6287, 6299, 6301,
|
---|
139 | 6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367,
|
---|
140 | 6373, 6379, 6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473,
|
---|
141 | 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563, 6569, 6571,
|
---|
142 | 6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673,
|
---|
143 | 6679, 6689, 6691, 6701, 6703, 6709, 6719, 6733, 6737, 6761,
|
---|
144 | 6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833,
|
---|
145 | 6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917,
|
---|
146 | 6947, 6949, 6959, 6961, 6967, 6971, 6977, 6983, 6991, 6997,
|
---|
147 | 7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103,
|
---|
148 | 7109, 7121, 7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207,
|
---|
149 | 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253, 7283, 7297,
|
---|
150 | 7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411,
|
---|
151 | 7417, 7433, 7451, 7457, 7459, 7477, 7481, 7487, 7489, 7499,
|
---|
152 | 7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561,
|
---|
153 | 7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643,
|
---|
154 | 7649, 7669, 7673, 7681, 7687, 7691, 7699, 7703, 7717, 7723,
|
---|
155 | 7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829,
|
---|
156 | 7841, 7853, 7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919,
|
---|
157 | };
|
---|
158 | private bool[] isPrime = new bool[7920];
|
---|
159 | public string Name { get { return "PrimePolynomial"; } }
|
---|
160 | public PrimePolynomialProblem() {
|
---|
161 | this.grammar = new Grammar(grammarString);
|
---|
162 | this.TreeBasedGPGrammar = new Grammar(treeGrammarString);
|
---|
163 |
|
---|
164 | // values according to the paper: Monte-Carlo Expression Discovery, Cazenave, 2013
|
---|
165 | erc = Enumerable.Range(1, 10).Select(x => (double)x).ToArray(); // the numbers 1..10;
|
---|
166 | vars = new double[] { 0 }.Concat(primes.Take(25)).ToArray(); // the idx and all primes < 100;
|
---|
167 |
|
---|
168 | // for faster access;
|
---|
169 | for (int i = 0; i < isPrime.Length; i++) {
|
---|
170 | isPrime[i] = primes.Contains(i);
|
---|
171 | }
|
---|
172 |
|
---|
173 | }
|
---|
174 |
|
---|
175 | public double BestKnownQuality(int maxLen) {
|
---|
176 | // estimate
|
---|
177 | return 100;
|
---|
178 | }
|
---|
179 |
|
---|
180 | public IGrammar Grammar {
|
---|
181 | get { return grammar; }
|
---|
182 | }
|
---|
183 |
|
---|
184 | // according to above mentioned paper: ".. number of different primes it generations in a row for integer values of x starting at zero and increasing by one at each step"
|
---|
185 | public double Evaluate(string sentence) {
|
---|
186 | var interpreter = new ExpressionInterpreter();
|
---|
187 | double[] vars = new[] { 0.0 };
|
---|
188 | int i = 0;
|
---|
189 | var primesFound = new HashSet<int>();
|
---|
190 | for (; i <= primes.Length; i++) {
|
---|
191 | vars[0] = i;
|
---|
192 | var result = (int)interpreter.Interpret(sentence, vars, erc);
|
---|
193 | if (result < 0 || result >= isPrime.Length || !isPrime[result]) {
|
---|
194 | break;
|
---|
195 | }
|
---|
196 | primesFound.Add(result);
|
---|
197 | }
|
---|
198 | return primesFound.Count;
|
---|
199 | }
|
---|
200 |
|
---|
201 | public string CanonicalRepresentation(string phrase) {
|
---|
202 | return phrase;
|
---|
203 | }
|
---|
204 |
|
---|
205 | public IEnumerable<Feature> GetFeatures(string phrase)
|
---|
206 | {
|
---|
207 | return new Feature[] {new Feature(phrase, 1.0),};
|
---|
208 | }
|
---|
209 | public IGrammar TreeBasedGPGrammar { get; private set; }
|
---|
210 | public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
|
---|
211 | var sb = new StringBuilder();
|
---|
212 |
|
---|
213 | TreeToSentence(tree.Root.GetSubtree(0).GetSubtree(0), sb);
|
---|
214 |
|
---|
215 | return sb.ToString();
|
---|
216 | }
|
---|
217 | public bool IsOptimalPhrase(string phrase) {
|
---|
218 | // throw new NotImplementedException();
|
---|
219 | return false; // TODO
|
---|
220 | }
|
---|
221 |
|
---|
222 |
|
---|
223 | private void TreeToSentence(ISymbolicExpressionTreeNode treeNode, StringBuilder sb) {
|
---|
224 | if (treeNode.SubtreeCount == 0) {
|
---|
225 | // terminal
|
---|
226 | sb.Append(treeNode.Symbol.Name);
|
---|
227 | } else {
|
---|
228 | string op = string.Empty;
|
---|
229 | switch (treeNode.Symbol.Name) {
|
---|
230 | case "S": op = "+"; break;
|
---|
231 | case "N": op = "-"; break;
|
---|
232 | case "P": op = "*"; break;
|
---|
233 | case "D": op = "%"; break;
|
---|
234 | default: {
|
---|
235 | Debug.Assert(treeNode.SubtreeCount == 1);
|
---|
236 | break;
|
---|
237 | }
|
---|
238 | }
|
---|
239 | // nonterminal
|
---|
240 | if (treeNode.SubtreeCount > 1) sb.Append("(");
|
---|
241 | TreeToSentence(treeNode.Subtrees.First(), sb);
|
---|
242 | foreach (var subTree in treeNode.Subtrees.Skip(1)) {
|
---|
243 | sb.Append(op);
|
---|
244 | TreeToSentence(subTree, sb);
|
---|
245 | }
|
---|
246 | if (treeNode.SubtreeCount > 1) sb.Append(")");
|
---|
247 | }
|
---|
248 | }
|
---|
249 | }
|
---|
250 | }
|
---|