Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SymbolicRegressionPoly10Problem.cs @ 12099

Last change on this file since 12099 was 12099, checked in by gkronber, 9 years ago

#2283: name for all problems (for output), new unit test, and added testsettings file

File size: 6.6 KB
Line 
1using System;
2using System.Collections.Concurrent;
3using System.Collections.Generic;
4using System.Collections.Specialized;
5using System.Diagnostics;
6using System.Linq;
7using System.Net;
8using System.Security;
9using System.Security.AccessControl;
10using System.Text;
11using HeuristicLab.Common;
12using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
13
14namespace HeuristicLab.Problems.GrammaticalOptimization {
15  public class SymbolicRegressionPoly10Problem : ISymbolicExpressionTreeProblem {
16    //    private const string grammarString = @"
17    //    G(E):
18    //    E -> V | V+E | V-E | V*E | (E)
19    //    V -> a .. j
20    //    ";
21    private const string grammarString = @"
22    G(E):
23    E -> a | b | c | d | e | f | g | h | i | j | a+E | b+E | c+E | d+E | e+E | f+E | g+E | h+E | i+E | j+E | a*E | b*E | c*E | d*E | e*E | f*E | g*E | h*E | i*E | j*E
24    ";
25
26    // for tree-based GP in HL we need a different grammar for the same language
27    // E = expr, S = sum, P = product
28    private const string hlGrammarString = @"
29    G(E):
30    E -> S | P | a | b | c | d | e | f | g | h | i | j
31    S -> EE | EEE
32    P -> EE | EEE
33    ";
34    // mininmal tree for the optimal expr (40 nodes)
35    // E S
36    //     E S
37    //         E P
38    //           E a E b
39    //         E P
40    //           E c E d
41    //         E P
42    //           E e E f
43    //     E S
44    //         E P
45    //           E a E g E i
46    //         E P
47    //           E c E f E j
48
49    public IGrammar TreeBasedGPGrammar { get; private set; }
50
51    private readonly IGrammar grammar;
52
53    private readonly int N;
54    private readonly double[][] x;
55    private readonly double[] y;
56    public string Name { get { return "SymbolicRegressionPoly10"; } }
57
58    public SymbolicRegressionPoly10Problem() {
59      this.grammar = new Grammar(grammarString);
60      this.TreeBasedGPGrammar = new Grammar(hlGrammarString);
61
62      this.N = 500;
63      this.x = new double[N][];
64      this.y = new double[N];
65
66      GenerateData();
67    }
68
69    private void GenerateData() {
70      // generate data with fixed seed to make sure that data is always the same
71      var rand = new Random(31415);
72      for (int i = 0; i < N; i++) {
73        x[i] = new double[10];
74        for (int j = 0; j < 10; j++) {
75          x[i][j] = rand.NextDouble() * 2 - 1;
76        }
77        // poly-10 no noise
78        /* a*b + c*d + e*f + a*g*i + c*f*j */
79        y[i] = x[i][0] * x[i][1] +
80               x[i][2] * x[i][3] +
81               x[i][4] * x[i][5] +
82               x[i][0] * x[i][6] * x[i][8] +
83               x[i][2] * x[i][5] * x[i][9];
84      }
85    }
86
87    public double BestKnownQuality(int maxLen) {
88      // for now only an upper bound is returned, ideally we have an R² of 1.0
89      // the optimal R² can only be reached for sentences of at least 23 symbols
90      return 1.0;
91    }
92
93    public IGrammar Grammar {
94      get { return grammar; }
95    }
96
97    public double Evaluate(string sentence) {
98      var interpreter = new ExpressionInterpreter();
99      return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])).ToArray());
100    }
101
102
103    // most-recently-used caching (with limited capacity) for canonical representations
104    MostRecentlyUsedCache<string, string> canonicalPhraseCache = new MostRecentlyUsedCache<string, string>(100000);
105    // right now only + and * is supported
106    public string CanonicalRepresentation(string phrase) {
107      string canonicalPhrase;
108      if (!canonicalPhraseCache.TryGetValue(phrase, out canonicalPhrase)) {
109        var terms = phrase.Split('+');
110        var canonicalTerms = new SortedSet<string>();
111        // only the last term might contain a NT-symbol. make sure this term is added at the end
112        for (int i = 0; i < terms.Length - 1; i++) {
113          canonicalTerms.Add(CanonicalTerm(terms[i]));
114        }
115
116        var sb = new StringBuilder(phrase.Length);
117        foreach (var t in canonicalTerms)
118          sb.Append(t).Append('+');
119
120        sb.Append(CanonicalTerm(terms[terms.Length - 1]));
121        canonicalPhrase = sb.ToString();
122        canonicalPhraseCache.Add(phrase, canonicalPhrase);
123      }
124      return canonicalPhrase;
125    }
126
127    // cache the canonical form of terms for performance reasons
128    private Dictionary<string, string> canonicalTermDictionary = new Dictionary<string, string>();
129    private string CanonicalTerm(string term) {
130      string canonicalTerm;
131      if (!canonicalTermDictionary.TryGetValue(term, out canonicalTerm)) {
132        // add
133        var chars = term.ToCharArray();
134        Array.Sort(chars);
135        var sb = new StringBuilder(chars.Length);
136        // we want to have the up-case characters last
137        for (int i = chars.Length - 1; i > 0; i--) {
138          if (chars[i] != '*') {
139            sb.Append(chars[i]);
140            if (chars[i - 1] != '*') sb.Append('*');
141          }
142        }
143        if (chars[0] != '*') sb.Append(chars[0]); // last term
144        canonicalTerm = sb.ToString();
145        canonicalTermDictionary.Add(term, canonicalTerm);
146      }
147      return canonicalTerm;
148    }
149
150    // splits the phrase into terms and creates (sparse) term-occurrance features
151    public IEnumerable<Feature> GetFeatures(string phrase) {
152      var canonicalTerms = new HashSet<string>();
153      foreach (string t in phrase.Split('+')) {
154        canonicalTerms.Add(CanonicalTerm(t));
155      }
156      return canonicalTerms.Select(entry => new Feature(entry, 1.0))
157        .Concat(new Feature[] { new Feature(CanonicalRepresentation(phrase), 1.0) });
158    }
159
160    public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
161      var sb = new StringBuilder();
162
163      TreeToSentence(tree.Root.GetSubtree(0).GetSubtree(0), sb);
164
165      return sb.ToString();
166    }
167
168    private void TreeToSentence(ISymbolicExpressionTreeNode treeNode, StringBuilder sb) {
169      if (treeNode.SubtreeCount == 0) {
170        // terminal
171        sb.Append(treeNode.Symbol.Name);
172      } else {
173        string op = string.Empty;
174        switch (treeNode.Symbol.Name) {
175          case "S": op = "+"; break;
176          case "P": op = "*"; break;
177          default: {
178              Debug.Assert(treeNode.SubtreeCount == 1);
179              break;
180            }
181        }
182        // nonterminal
183        if (op == "+") sb.Append("(");
184        TreeToSentence(treeNode.Subtrees.First(), sb);
185        foreach (var subTree in treeNode.Subtrees.Skip(1)) {
186          sb.Append(op);
187          TreeToSentence(subTree, sb);
188        }
189        if (op == "+") sb.Append(")");
190      }
191    }
192  }
193}
Note: See TracBrowser for help on using the repository browser.