Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SymbolicRegressionPoly10Problem.cs @ 11832

Last change on this file since 11832 was 11832, checked in by gkronber, 9 years ago

linear value function approximation and good results for poly-10 benchmark

File size: 4.7 KB
RevLine 
[11659]1using System;
[11799]2using System.Collections.Concurrent;
[11659]3using System.Collections.Generic;
[11799]4using System.Collections.Specialized;
[11659]5using System.Linq;
[11770]6using System.Net;
[11659]7using System.Security;
8using System.Security.AccessControl;
9using System.Text;
[11727]10using HeuristicLab.Common;
[11659]11
12namespace HeuristicLab.Problems.GrammaticalOptimization {
13  public class SymbolicRegressionPoly10Problem : IProblem {
[11727]14    //    private const string grammarString = @"
15    //    G(E):
16    //    E -> V | V+E | V-E | V*E | (E)
17    //    V -> a .. j
18    //    ";
[11659]19    private const string grammarString = @"
[11727]20    G(E):
[11747]21    E -> a | b | c | d | e | f | g | h | i | j | a+E | b+E | c+E | d+E | e+E | f+E | g+E | h+E | i+E | j+E | a*E | b*E | c*E | d*E | e*E | f*E | g*E | h*E | i*E | j*E
[11727]22    ";
[11659]23
[11708]24
[11659]25    private readonly IGrammar grammar;
26
27    private readonly int N;
28    private readonly double[][] x;
29    private readonly double[] y;
30
31    public SymbolicRegressionPoly10Problem() {
32      this.grammar = new Grammar(grammarString);
33
34      this.N = 500;
35      this.x = new double[N][];
36      this.y = new double[N];
37
38      GenerateData();
39    }
40
41    private void GenerateData() {
42      // generate data with fixed seed to make sure that data is always the same
43      var rand = new Random(31415);
44      for (int i = 0; i < N; i++) {
45        x[i] = new double[10];
46        for (int j = 0; j < 10; j++) {
47          x[i][j] = rand.NextDouble() * 2 - 1;
48        }
49        // poly-10 no noise
[11730]50        /* a*b + c*d + e*f + a*g*i + c*f*j */
[11659]51        y[i] = x[i][0] * x[i][1] +
52               x[i][2] * x[i][3] +
53               x[i][4] * x[i][5] +
54               x[i][0] * x[i][6] * x[i][8] +
55               x[i][2] * x[i][5] * x[i][9];
56      }
57    }
58
[11732]59    public double BestKnownQuality(int maxLen) {
[11659]60      // for now only an upper bound is returned, ideally we have an R² of 1.0
61      // the optimal R² can only be reached for sentences of at least 23 symbols
62      return 1.0;
63    }
64
65    public IGrammar Grammar {
66      get { return grammar; }
67    }
68
69    public double Evaluate(string sentence) {
[11742]70      var interpreter = new ExpressionInterpreter();
[11732]71      return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])).ToArray());
[11659]72    }
73
74
75
[11799]76    // most-recently-used caching (with limited capacity) for canonical representations
77    MostRecentlyUsedCache<string, string> canonicalPhraseCache = new MostRecentlyUsedCache<string, string>(100000);
[11727]78    // right now only + and * is supported
[11745]79    public string CanonicalRepresentation(string phrase) {
[11799]80      string canonicalPhrase;
81      if (!canonicalPhraseCache.TryGetValue(phrase, out canonicalPhrase)) {
82        var terms = phrase.Split('+');
83        var canonicalTerms = new SortedSet<string>();
84        // only the last term might contain a NT-symbol. make sure this term is added at the end
85        for (int i = 0; i < terms.Length - 1; i++) {
86          canonicalTerms.Add(CanonicalTerm(terms[i]));
87        }
[11745]88
[11799]89        var sb = new StringBuilder(phrase.Length);
90        foreach (var t in canonicalTerms)
91          sb.Append(t).Append('+');
92
93        sb.Append(CanonicalTerm(terms[terms.Length - 1]));
94        canonicalPhrase = sb.ToString();
95        canonicalPhraseCache.Add(phrase, canonicalPhrase);
96      }
97      return canonicalPhrase;
[11727]98    }
[11745]99
[11799]100    // cache the canonical form of terms for performance reasons
101    private Dictionary<string, string> canonicalTermDictionary = new Dictionary<string, string>();
[11745]102    private string CanonicalTerm(string term) {
[11799]103      string canonicalTerm;
104      if (!canonicalTermDictionary.TryGetValue(term, out canonicalTerm)) {
105        // add
106        var chars = term.ToCharArray();
107        Array.Sort(chars);
108        var sb = new StringBuilder(chars.Length);
109        // we want to have the up-case characters last
[11806]110        for (int i = chars.Length - 1; i > 0; i--) {
111          if (chars[i] != '*') {
112            sb.Append(chars[i]);
113            if (chars[i - 1] != '*') sb.Append('*');
114          }
[11799]115        }
[11806]116        if (chars[0] != '*') sb.Append(chars[0]); // last term
[11799]117        canonicalTerm = sb.ToString();
118        canonicalTermDictionary.Add(term, canonicalTerm);
119      }
120      return canonicalTerm;
[11745]121    }
[11832]122
123    // splits the phrase into terms and creates (sparse) term-occurrance features
124    public IEnumerable<Feature> GetFeatures(string phrase) {
125      var canonicalTerms = new HashSet<string>();
126      foreach (string t in phrase.Split('+')) {
127        canonicalTerms.Add(CanonicalTerm(t));
128      }
129      return canonicalTerms.Select(entry => new Feature(entry, 1.0)).Concat(new Feature[] { new Feature(CanonicalRepresentation(phrase), 1.0) });
130    }
131
[11659]132  }
133}
Note: See TracBrowser for help on using the repository browser.