Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Problems.GrammaticalOptimization/Problems/SymbolicRegressionPoly10Problem.cs @ 13847

Last change on this file since 13847 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 15.5 KB
Line 
1using System;
2using System.Collections.Concurrent;
3using System.Collections.Generic;
4using System.Collections.Specialized;
5using System.Diagnostics;
6using System.Linq;
7using System.Net;
8using System.Runtime.InteropServices;
9using System.Security;
10using System.Security.AccessControl;
11using System.Text;
12using HeuristicLab.Common;
13using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
14
15namespace HeuristicLab.Problems.GrammaticalOptimization {
16  public class SymbolicRegressionPoly10Problem : ISymbolicExpressionTreeProblem {
17    //    private const string grammarString = @"
18    //    G(E):
19    //    E -> V | V+E | V-E | V*E | (E)
20    //    V -> a .. j
21    //    ";
22    //private const string grammarString = @"
23    //G(E):
24    //E -> a | b | c | d | e | f | g | h | i | j | a+E | b+E | c+E | d+E | e+E | f+E | g+E | h+E | i+E | j+E | a*E | b*E | c*E | d*E | e*E | f*E | g*E | h*E | i*E | j*E
25    //";
26    private const string grammarString = @"
27    G(E):
28    E -> a | b | c | d | e | f | g | h | i | j | a+E | b+E | c+E | d+E | e+E | f+E | g+E | h+E | i+E | j+E | a*E | b*E | c*E | d*E | e*E | f*E | g*E | h*E | i*E | j*E 
29    ";
30
31    // for tree-based GP in HL we need a different grammar for the same language
32    // E = expr, S = sum, P = product
33    private const string hlGrammarString = @"
34    G(E):
35    E -> S | P | a | b | c | d | e | f | g | h | i | j
36    S -> EE | EEE
37    P -> EE | EEE
38    ";
39    // mininmal tree for the optimal expr (40 nodes)
40    // E S
41    //     E S
42    //         E P
43    //           E a E b
44    //         E P
45    //           E c E d
46    //         E P
47    //           E e E f
48    //     E S
49    //         E P
50    //           E a E g E i
51    //         E P
52    //           E c E f E j
53
54    public IGrammar TreeBasedGPGrammar { get; private set; }
55
56    private readonly IGrammar grammar;
57
58    private readonly int N;
59    private readonly double[][] x;
60    private readonly double[] y;
61    public string Name { get { return "SymbolicRegressionPoly10"; } }
62
63    public SymbolicRegressionPoly10Problem(int n = 500) {
64      this.grammar = new Grammar(grammarString);
65      this.TreeBasedGPGrammar = new Grammar(hlGrammarString);
66
67      this.N = n;
68      this.x = new double[N][];
69      this.y = new double[N];
70
71      GenerateData();
72    }
73
74    private void GenerateData() {
75      // generate data with fixed seed to make sure that data is always the same
76      var rand = new System.Random(31415);
77      for (int i = 0; i < N; i++) {
78        x[i] = new double[10];
79        for (int j = 0; j < 10; j++) {
80          x[i][j] = rand.NextDouble() * 2 - 1;
81        }
82        // poly-10 no noise
83        /* a*b + c*d + e*f + a*g*i + c*f*j */
84        y[i] = x[i][0] * x[i][1] +
85               x[i][2] * x[i][3] +
86               x[i][4] * x[i][5] +
87               x[i][0] * x[i][6] * x[i][8] +
88               x[i][2] * x[i][5] * x[i][9];
89      }
90    }
91
92    public double BestKnownQuality(int maxLen) {
93      // for now only an upper bound is returned, ideally we have an R² of 1.0
94      // the optimal R² can only be reached for sentences of at least 23 symbols
95      return 1.0;
96    }
97
98    public IGrammar Grammar {
99      get { return grammar; }
100    }
101
102    private static double[] erc = new double[] { 0.0, 1.0 };
103    public double Evaluate(string sentence) {
104      var interpreter = new ExpressionInterpreter();
105      return Math.Round(HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i], erc)).ToArray()), 3);
106    }
107
108
109    // most-recently-used caching (with limited capacity) for canonical representations
110    MostRecentlyUsedCache<string, string> canonicalPhraseCache = new MostRecentlyUsedCache<string, string>(100000);
111    // right now only + and * is supported
112    public string CanonicalRepresentation(string phrase) {
113      string canonicalPhrase;
114      if (!canonicalPhraseCache.TryGetValue(phrase, out canonicalPhrase)) {
115        var terms = phrase.Split('+');
116        var canonicalTerms = new SortedSet<string>();
117        // only the last term might contain a NT-symbol. make sure this term is added at the end
118        for (int i = 0; i < terms.Length - 1; i++) {
119          canonicalTerms.Add(CanonicalTerm(terms[i]));
120        }
121
122        var sb = new StringBuilder(phrase.Length);
123        foreach (var t in canonicalTerms)
124          sb.Append(t).Append('+');
125
126        sb.Append(CanonicalTerm(terms[terms.Length - 1]));
127        canonicalPhrase = sb.ToString();
128        canonicalPhraseCache.Add(phrase, canonicalPhrase);
129      }
130      return canonicalPhrase;
131    }
132
133    // cache the canonical form of terms for performance reasons
134    private Dictionary<string, string> canonicalTermDictionary = new Dictionary<string, string>();
135    private string CanonicalTerm(string term) {
136      string canonicalTerm;
137      if (!canonicalTermDictionary.TryGetValue(term, out canonicalTerm)) {
138        // add
139        var chars = term.ToCharArray();
140        Array.Sort(chars);
141        var sb = new StringBuilder(chars.Length);
142        // we want to have the up-case characters last
143        for (int i = chars.Length - 1; i > 0; i--) {
144          if (chars[i] != '*') {
145            sb.Append(chars[i]);
146            if (chars[i - 1] != '*') sb.Append('*');
147          }
148        }
149        if (chars[0] != '*') sb.Append(chars[0]); // last term
150        canonicalTerm = sb.ToString();
151        canonicalTermDictionary.Add(term, canonicalTerm);
152      }
153      return canonicalTerm;
154    }
155
156    private double[] varIds = new double[] { };
157
158    // splits the phrase into terms and creates (sparse) term-occurrance features
159    public IEnumerable<Feature> GetFeatures(string phrase) {
160      //if (phrase.EndsWith("E")) phrase = phrase.TrimEnd('*', '+', 'E');
161      //yield return new Feature("$$$", 1.0); // const
162      //var canonicalTerms = new HashSet<string>();
163      //foreach (string t in phrase.Split('+')) {
164      //  canonicalTerms.Add(CanonicalTerm(t));
165      //}
166      //return canonicalTerms.Select(entry => new Feature(entry, 1.0));
167      //.Concat(new Feature[] { new Feature(CanonicalRepresentation(phrase), 1.0) });
168
169
170      //if (phrase.EndsWith("E")) phrase = phrase.TrimEnd('*', '+', 'E');
171      //var len = 5;
172      //var start = Math.Max(0, phrase.Length - len);
173      //var end = Math.Min(phrase.Length, start + len);
174      //string f = phrase.Substring(start, end - start);
175      //yield return new Feature(f, 1.0);
176      //
177
178      //var terms = phrase.Split('+');
179      //foreach (var t in terms.Distinct()) yield return new Feature(t, 1.0);
180      //
181      //for (int i = 0; i < terms.Length; i++) {
182      //  for (int j = i + 1; j < terms.Length; j++) {
183      //    yield return new Feature(terms[i] + " " + terms[j], 1.0);
184      //  }
185      //}
186      var t = new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j" };
187      for (int t0Idx = 0; t0Idx < t.Length - 1; t0Idx++) {
188        for (int t1Idx = t0Idx; t1Idx < t.Length; t1Idx++) {
189          var a = t[t0Idx] + "*" + t[t1Idx];
190          var b = t[t1Idx] + "*" + t[t0Idx];
191          yield return new Feature(a, phrase.Contains(a) || phrase.Contains(b) ? 1 : 0);
192        }
193      }
194
195      // var substrings = new HashSet<string>();
196      // for (int start = 0; start <= phrase.Length - 2; start += 2) {
197      //   var s = phrase.Substring(start, 3);
198      //   substrings.Add(s);
199      // }
200      //
201      // var list = new List<string>(substrings);
202      //
203      // for (int i = 0; i < list.Count; i++) {
204      //   yield return new Feature(list[i], 1.0);
205      //   //for (int j = i+1; j < list.Count; j++) {
206      //   //  yield return new Feature(list[i] + " " + list[j], 1.0);
207      //   //}
208      // }
209
210      //
211      // for (int len = 1; len <= phrase.Length; len += 2) {
212      //   var start = Math.Max(0, phrase.Length - len);
213      //   var end = Math.Min(phrase.Length, start + len);
214      //   string f = phrase.Substring(start, end - start);
215      //   yield return new Feature(f, 1.0);
216      //
217      // }
218
219      //var partialInterpreter = new PartialExpressionInterpreter();
220      //var vars = new double[] { 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, };
221      //var s = partialInterpreter.Interpret(phrase, vars);
222      ////if (s.Any())
223      ////  return new Feature[] { new Feature(s.Pop().ToString(), 1.0), };
224      ////else
225      ////  return new Feature[] { new Feature("$", 1.0), };
226      //return s.Select(f => new Feature(f.ToString(), 1.0));
227    }
228
229    public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
230      var sb = new StringBuilder();
231
232      TreeToSentence(tree.Root.GetSubtree(0).GetSubtree(0), sb);
233
234      return sb.ToString();
235    }
236
237    private static string[][] optimalTerms = new string[][]
238      {
239        new string[] { "a*b","b*a",},
240        new string[] { "c*d","d*c",},
241        new string[] { "e*f","f*e",},
242        new string[] { "a*g*i","a*i*g","g*a*i","g*i*a","i*a*g","i*g*a"},
243        new string[] { "c*j*f","c*f*j","j*c*f","j*f*c","f*c*j","f*j*c"},
244      };
245
246    private static int[][] permute5 = new int[][]
247      {
248        new int[] { 4, 3, 2, 0, 1 },
249        new int[] { 4, 3, 2, 1, 0 },
250        new int[] { 4, 3, 0, 2, 1 },
251        new int[] { 4, 3, 1, 2, 0 },
252        new int[] { 4, 3, 0, 1, 2 },
253        new int[] { 4, 3, 1, 0, 2 },
254        new int[] { 4, 2, 3, 0, 1 },
255        new int[] { 4, 2, 3, 1, 0 },
256        new int[] { 4, 0, 3, 2, 1 },
257        new int[] { 4, 1, 3, 2, 0 },
258        new int[] { 4, 0, 3, 1, 2 },
259        new int[] { 4, 1, 3, 0, 2 },
260        new int[] { 4, 2, 0, 3, 1 },
261        new int[] { 4, 2, 1, 3, 0 },
262        new int[] { 4, 0, 2, 3, 1 },
263        new int[] { 4, 1, 2, 3, 0 },
264        new int[] { 4, 0, 1, 3, 2 },
265        new int[] { 4, 1, 0, 3, 2 },
266        new int[] { 4, 2, 0, 1, 3 },
267        new int[] { 4, 2, 1, 0, 3 },
268        new int[] { 4, 0, 2, 1, 3 },
269        new int[] { 4, 1, 2, 0, 3 },
270        new int[] { 4, 0, 1, 2, 3 },
271        new int[] { 4, 1, 0, 2, 3 },
272        new int[] { 3, 4, 2, 0, 1 },
273        new int[] { 3, 4, 2, 1, 0 },
274        new int[] { 3, 4, 0, 2, 1 },
275        new int[] { 3, 4, 1, 2, 0 },
276        new int[] { 3, 4, 0, 1, 2 },
277        new int[] { 3, 4, 1, 0, 2 },
278        new int[] { 2, 4, 3, 0, 1 },
279        new int[] { 2, 4, 3, 1, 0 },
280        new int[] { 0, 4, 3, 2, 1 },
281        new int[] { 1, 4, 3, 2, 0 },
282        new int[] { 0, 4, 3, 1, 2 },
283        new int[] { 1, 4, 3, 0, 2 },
284        new int[] { 2, 4, 0, 3, 1 },
285        new int[] { 2, 4, 1, 3, 0 },
286        new int[] { 0, 4, 2, 3, 1 },
287        new int[] { 1, 4, 2, 3, 0 },
288        new int[] { 0, 4, 1, 3, 2 },
289        new int[] { 1, 4, 0, 3, 2 },
290        new int[] { 2, 4, 0, 1, 3 },
291        new int[] { 2, 4, 1, 0, 3 },
292        new int[] { 0, 4, 2, 1, 3 },
293        new int[] { 1, 4, 2, 0, 3 },
294        new int[] { 0, 4, 1, 2, 3 },
295        new int[] { 1, 4, 0, 2, 3 },
296        new int[] { 3, 2, 4, 0, 1 },
297        new int[] { 3, 2, 4, 1, 0 },
298        new int[] { 3, 0, 4, 2, 1 },
299        new int[] { 3, 1, 4, 2, 0 },
300        new int[] { 3, 0, 4, 1, 2 },
301        new int[] { 3, 1, 4, 0, 2 },
302        new int[] { 2, 3, 4, 0, 1 },
303        new int[] { 2, 3, 4, 1, 0 },
304        new int[] { 0, 3, 4, 2, 1 },
305        new int[] { 1, 3, 4, 2, 0 },
306        new int[] { 0, 3, 4, 1, 2 },
307        new int[] { 1, 3, 4, 0, 2 },
308        new int[] { 2, 0, 4, 3, 1 },
309        new int[] { 2, 1, 4, 3, 0 },
310        new int[] { 0, 2, 4, 3, 1 },
311        new int[] { 1, 2, 4, 3, 0 },
312        new int[] { 0, 1, 4, 3, 2 },
313        new int[] { 1, 0, 4, 3, 2 },
314        new int[] { 2, 0, 4, 1, 3 },
315        new int[] { 2, 1, 4, 0, 3 },
316        new int[] { 0, 2, 4, 1, 3 },
317        new int[] { 1, 2, 4, 0, 3 },
318        new int[] { 0, 1, 4, 2, 3 },
319        new int[] { 1, 0, 4, 2, 3 },
320        new int[] { 3, 2, 0, 4, 1 },
321        new int[] { 3, 2, 1, 4, 0 },
322        new int[] { 3, 0, 2, 4, 1 },
323        new int[] { 3, 1, 2, 4, 0 },
324        new int[] { 3, 0, 1, 4, 2 },
325        new int[] { 3, 1, 0, 4, 2 },
326        new int[] { 2, 3, 0, 4, 1 },
327        new int[] { 2, 3, 1, 4, 0 },
328        new int[] { 0, 3, 2, 4, 1 },
329        new int[] { 1, 3, 2, 4, 0 },
330        new int[] { 0, 3, 1, 4, 2 },
331        new int[] { 1, 3, 0, 4, 2 },
332        new int[] { 2, 0, 3, 4, 1 },
333        new int[] { 2, 1, 3, 4, 0 },
334        new int[] { 0, 2, 3, 4, 1 },
335        new int[] { 1, 2, 3, 4, 0 },
336        new int[] { 0, 1, 3, 4, 2 },
337        new int[] { 1, 0, 3, 4, 2 },
338        new int[] { 2, 0, 1, 4, 3 },
339        new int[] { 2, 1, 0, 4, 3 },
340        new int[] { 0, 2, 1, 4, 3 },
341        new int[] { 1, 2, 0, 4, 3 },
342        new int[] { 0, 1, 2, 4, 3 },
343        new int[] { 1, 0, 2, 4, 3 },
344        new int[] { 3, 2, 0, 1, 4 },
345        new int[] { 3, 2, 1, 0, 4 },
346        new int[] { 3, 0, 2, 1, 4 },
347        new int[] { 3, 1, 2, 0, 4 },
348        new int[] { 3, 0, 1, 2, 4 },
349        new int[] { 3, 1, 0, 2, 4 },
350        new int[] { 2, 3, 0, 1, 4 },
351        new int[] { 2, 3, 1, 0, 4 },
352        new int[] { 0, 3, 2, 1, 4 },
353        new int[] { 1, 3, 2, 0, 4 },
354        new int[] { 0, 3, 1, 2, 4 },
355        new int[] { 1, 3, 0, 2, 4 },
356        new int[] { 2, 0, 3, 1, 4 },
357        new int[] { 2, 1, 3, 0, 4 },
358        new int[] { 0, 2, 3, 1, 4 },
359        new int[] { 1, 2, 3, 0, 4 },
360        new int[] { 0, 1, 3, 2, 4 },
361        new int[] { 1, 0, 3, 2, 4 },
362        new int[] { 2, 0, 1, 3, 4 },
363        new int[] { 2, 1, 0, 3, 4 },
364        new int[] { 0, 2, 1, 3, 4 },
365        new int[] { 1, 2, 0, 3, 4 },
366        new int[] { 0, 1, 2, 3, 4 },
367        new int[] { 1, 0, 2, 3, 4 },
368      };
369
370
371    private static HashSet<string>[] optimalSentences;
372    static SymbolicRegressionPoly10Problem() {
373      optimalSentences = Enumerable.Range(0, 24).Select(i => new HashSet<string>()).ToArray();
374
375      foreach (var t0Idx in new[] { 0, 1 })
376        foreach (var t1Idx in new[] { 0, 1 })
377          foreach (var t2Idx in new[] { 0, 1 })
378            foreach (var t3Idx in new[] { 0, 1, 2, 3, 4, 5 })
379              foreach (var t4Idx in new[] { 0, 1, 2, 3, 4, 5 }) {
380                foreach (var p in permute5) {
381                  int[] idx = new int[] { t0Idx, t1Idx, t2Idx, t3Idx, t4Idx };
382                  optimalSentences[23].Add(string.Join("+", p.Select(pi => optimalTerms[pi][idx[pi]])));
383                }
384              }
385      for (int i = 0; i < 23; i += 2) {
386        var newElements = new HashSet<string>();
387        foreach (var sentence in optimalSentences[23]) {
388          newElements.Add(sentence.Substring(0, i) + "E");
389        }
390        foreach (var e in newElements) optimalSentences[i + 1].Add(e);
391      }
392    }
393
394    public bool IsOptimalPhrase(string phrase) {
395      return optimalSentences[phrase.Length].Contains(phrase);
396    }
397
398    private void TreeToSentence(ISymbolicExpressionTreeNode treeNode, StringBuilder sb) {
399      if (treeNode.SubtreeCount == 0) {
400        // terminal
401        sb.Append(treeNode.Symbol.Name);
402      } else {
403        string op = string.Empty;
404        switch (treeNode.Symbol.Name) {
405          case "S": op = "+"; break;
406          case "P": op = "*"; break;
407          default: {
408              Debug.Assert(treeNode.SubtreeCount == 1);
409              break;
410            }
411        }
412        // nonterminal
413        if (op == "+") sb.Append("(");
414        TreeToSentence(treeNode.Subtrees.First(), sb);
415        foreach (var subTree in treeNode.Subtrees.Skip(1)) {
416          sb.Append(op);
417          TreeToSentence(subTree, sb);
418        }
419        if (op == "+") sb.Append(")");
420      }
421    }
422  }
423}
Note: See TracBrowser for help on using the repository browser.