Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SymbolicRegressionPoly10Problem.cs @ 12099

Visit:

Last change on this file since 12099 was 12099, checked in by gkronber, 9 years ago
#2283: name for all problems (for output), new unit test, and added testsettings file
File size: 6.6 KB

Line
1	using System;
2	using System.Collections.Concurrent;
3	using System.Collections.Generic;
4	using System.Collections.Specialized;
5	using System.Diagnostics;
6	using System.Linq;
7	using System.Net;
8	using System.Security;
9	using System.Security.AccessControl;
10	using System.Text;
11	using HeuristicLab.Common;
12	using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
13
14	namespace HeuristicLab.Problems.GrammaticalOptimization {
15	public class SymbolicRegressionPoly10Problem : ISymbolicExpressionTreeProblem {
16	// private const string grammarString = @"
17	// G(E):
18	// E -> V \| V+E \| V-E \| V*E \| (E)
19	// V -> a .. j
20	// ";
21	private const string grammarString = @"
22	G(E):
23	E -> a \| b \| c \| d \| e \| f \| g \| h \| i \| j \| a+E \| b+E \| c+E \| d+E \| e+E \| f+E \| g+E \| h+E \| i+E \| j+E \| aE \| bE \| cE \| dE \| eE \| fE \| gE \| hE \| iE \| jE
24	";
25
26	// for tree-based GP in HL we need a different grammar for the same language
27	// E = expr, S = sum, P = product
28	private const string hlGrammarString = @"
29	G(E):
30	E -> S \| P \| a \| b \| c \| d \| e \| f \| g \| h \| i \| j
31	S -> EE \| EEE
32	P -> EE \| EEE
33	";
34	// mininmal tree for the optimal expr (40 nodes)
35	// E S
36	// E S
37	// E P
38	// E a E b
39	// E P
40	// E c E d
41	// E P
42	// E e E f
43	// E S
44	// E P
45	// E a E g E i
46	// E P
47	// E c E f E j
48
49	public IGrammar TreeBasedGPGrammar { get; private set; }
50
51	private readonly IGrammar grammar;
52
53	private readonly int N;
54	private readonly double[][] x;
55	private readonly double[] y;
56	public string Name { get { return "SymbolicRegressionPoly10"; } }
57
58	public SymbolicRegressionPoly10Problem() {
59	this.grammar = new Grammar(grammarString);
60	this.TreeBasedGPGrammar = new Grammar(hlGrammarString);
61
62	this.N = 500;
63	this.x = new double[N][];
64	this.y = new double[N];
65
66	GenerateData();
67	}
68
69	private void GenerateData() {
70	// generate data with fixed seed to make sure that data is always the same
71	var rand = new Random(31415);
72	for (int i = 0; i < N; i++) {
73	x[i] = new double[10];
74	for (int j = 0; j < 10; j++) {
75	x[i][j] = rand.NextDouble() * 2 - 1;
76	}
77	// poly-10 no noise
78	/* ab + cd + ef + agi + cfj /
79	y[i] = x[i][0] * x[i][1] +
80	x[i][2] * x[i][3] +
81	x[i][4] * x[i][5] +
82	x[i][0] * x[i][6] * x[i][8] +
83	x[i][2] * x[i][5] * x[i][9];
84	}
85	}
86
87	public double BestKnownQuality(int maxLen) {
88	// for now only an upper bound is returned, ideally we have an R² of 1.0
89	// the optimal R² can only be reached for sentences of at least 23 symbols
90	return 1.0;
91	}
92
93	public IGrammar Grammar {
94	get { return grammar; }
95	}
96
97	public double Evaluate(string sentence) {
98	var interpreter = new ExpressionInterpreter();
99	return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])).ToArray());
100	}
101
102
103	// most-recently-used caching (with limited capacity) for canonical representations
104	MostRecentlyUsedCache<string, string> canonicalPhraseCache = new MostRecentlyUsedCache<string, string>(100000);
105	// right now only + and * is supported
106	public string CanonicalRepresentation(string phrase) {
107	string canonicalPhrase;
108	if (!canonicalPhraseCache.TryGetValue(phrase, out canonicalPhrase)) {
109	var terms = phrase.Split('+');
110	var canonicalTerms = new SortedSet<string>();
111	// only the last term might contain a NT-symbol. make sure this term is added at the end
112	for (int i = 0; i < terms.Length - 1; i++) {
113	canonicalTerms.Add(CanonicalTerm(terms[i]));
114	}
115
116	var sb = new StringBuilder(phrase.Length);
117	foreach (var t in canonicalTerms)
118	sb.Append(t).Append('+');
119
120	sb.Append(CanonicalTerm(terms[terms.Length - 1]));
121	canonicalPhrase = sb.ToString();
122	canonicalPhraseCache.Add(phrase, canonicalPhrase);
123	}
124	return canonicalPhrase;
125	}
126
127	// cache the canonical form of terms for performance reasons
128	private Dictionary<string, string> canonicalTermDictionary = new Dictionary<string, string>();
129	private string CanonicalTerm(string term) {
130	string canonicalTerm;
131	if (!canonicalTermDictionary.TryGetValue(term, out canonicalTerm)) {
132	// add
133	var chars = term.ToCharArray();
134	Array.Sort(chars);
135	var sb = new StringBuilder(chars.Length);
136	// we want to have the up-case characters last
137	for (int i = chars.Length - 1; i > 0; i--) {
138	if (chars[i] != '*') {
139	sb.Append(chars[i]);
140	if (chars[i - 1] != '') sb.Append('');
141	}
142	}
143	if (chars[0] != '*') sb.Append(chars[0]); // last term
144	canonicalTerm = sb.ToString();
145	canonicalTermDictionary.Add(term, canonicalTerm);
146	}
147	return canonicalTerm;
148	}
149
150	// splits the phrase into terms and creates (sparse) term-occurrance features
151	public IEnumerable<Feature> GetFeatures(string phrase) {
152	var canonicalTerms = new HashSet<string>();
153	foreach (string t in phrase.Split('+')) {
154	canonicalTerms.Add(CanonicalTerm(t));
155	}
156	return canonicalTerms.Select(entry => new Feature(entry, 1.0))
157	.Concat(new Feature[] { new Feature(CanonicalRepresentation(phrase), 1.0) });
158	}
159
160	public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
161	var sb = new StringBuilder();
162
163	TreeToSentence(tree.Root.GetSubtree(0).GetSubtree(0), sb);
164
165	return sb.ToString();
166	}
167
168	private void TreeToSentence(ISymbolicExpressionTreeNode treeNode, StringBuilder sb) {
169	if (treeNode.SubtreeCount == 0) {
170	// terminal
171	sb.Append(treeNode.Symbol.Name);
172	} else {
173	string op = string.Empty;
174	switch (treeNode.Symbol.Name) {
175	case "S": op = "+"; break;
176	case "P": op = "*"; break;
177	default: {
178	Debug.Assert(treeNode.SubtreeCount == 1);
179	break;
180	}
181	}
182	// nonterminal
183	if (op == "+") sb.Append("(");
184	TreeToSentence(treeNode.Subtrees.First(), sb);
185	foreach (var subTree in treeNode.Subtrees.Skip(1)) {
186	sb.Append(op);
187	TreeToSentence(subTree, sb);
188	}
189	if (op == "+") sb.Append(")");
190	}
191	}
192	}
193	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences