[11659] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Linq;
|
---|
| 4 | using System.Security;
|
---|
| 5 | using System.Security.AccessControl;
|
---|
| 6 | using System.Text;
|
---|
| 7 |
|
---|
| 8 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
| 9 | public class SymbolicRegressionPoly10Problem : IProblem {
|
---|
| 10 | // length of the longest palindrome in the sentence + number of different symbols occurring in the palindrome
|
---|
| 11 | private const string grammarString = @"
|
---|
| 12 | G(E):
|
---|
| 13 | E -> V | V+E | V-E | V*E | V/E | (E)
|
---|
| 14 | V -> a .. j
|
---|
| 15 | ";
|
---|
| 16 |
|
---|
| 17 | private readonly IGrammar grammar;
|
---|
| 18 | private readonly ExpressionInterpreter interpreter;
|
---|
| 19 |
|
---|
| 20 | private readonly int N;
|
---|
| 21 | private readonly double[][] x;
|
---|
| 22 | private readonly double[] y;
|
---|
| 23 |
|
---|
| 24 | public SymbolicRegressionPoly10Problem() {
|
---|
| 25 | this.grammar = new Grammar(grammarString);
|
---|
| 26 | this.interpreter = new ExpressionInterpreter();
|
---|
| 27 |
|
---|
| 28 | this.N = 500;
|
---|
| 29 | this.x = new double[N][];
|
---|
| 30 | this.y = new double[N];
|
---|
| 31 |
|
---|
| 32 | GenerateData();
|
---|
| 33 | }
|
---|
| 34 |
|
---|
| 35 | private void GenerateData() {
|
---|
| 36 | // generate data with fixed seed to make sure that data is always the same
|
---|
| 37 | var rand = new Random(31415);
|
---|
| 38 | for (int i = 0; i < N; i++) {
|
---|
| 39 | x[i] = new double[10];
|
---|
| 40 | for (int j = 0; j < 10; j++) {
|
---|
| 41 | x[i][j] = rand.NextDouble() * 2 - 1;
|
---|
| 42 | }
|
---|
| 43 | // poly-10 no noise
|
---|
| 44 | y[i] = x[i][0] * x[i][1] +
|
---|
| 45 | x[i][2] * x[i][3] +
|
---|
| 46 | x[i][4] * x[i][5] +
|
---|
| 47 | x[i][0] * x[i][6] * x[i][8] +
|
---|
| 48 | x[i][2] * x[i][5] * x[i][9];
|
---|
| 49 | }
|
---|
| 50 | }
|
---|
| 51 |
|
---|
| 52 | public double GetBestKnownQuality(int maxLen) {
|
---|
| 53 | // for now only an upper bound is returned, ideally we have an R² of 1.0
|
---|
| 54 | // the optimal R² can only be reached for sentences of at least 23 symbols
|
---|
| 55 | return 1.0;
|
---|
| 56 | }
|
---|
| 57 |
|
---|
| 58 | public IGrammar Grammar {
|
---|
| 59 | get { return grammar; }
|
---|
| 60 | }
|
---|
| 61 |
|
---|
| 62 | public double Evaluate(string sentence) {
|
---|
| 63 | return RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])).ToArray());
|
---|
| 64 | }
|
---|
| 65 |
|
---|
| 66 | private double RSq(IEnumerable<double> xs, IEnumerable<double> ys) {
|
---|
| 67 | // two pass implementation, but we don't care
|
---|
| 68 | var meanX = xs.Average();
|
---|
| 69 | var meanY = ys.Average();
|
---|
| 70 |
|
---|
| 71 | var s = 0.0;
|
---|
| 72 | var ssX = 0.0;
|
---|
| 73 | var ssY = 0.0;
|
---|
| 74 | foreach (var p in xs.Zip(ys, (x, y) => new { x, y })) {
|
---|
| 75 | s += (p.x - meanX) * (p.y - meanY);
|
---|
| 76 | ssX += Math.Pow(p.x - meanX, 2);
|
---|
| 77 | ssY += Math.Pow(p.y - meanY, 2);
|
---|
| 78 | }
|
---|
| 79 |
|
---|
| 80 | if (s.IsAlmost(0)) return 0;
|
---|
| 81 | if (ssX.IsAlmost(0) || ssY.IsAlmost(0)) return 0;
|
---|
| 82 | return s * s / (ssX * ssY);
|
---|
| 83 | }
|
---|
| 84 | }
|
---|
| 85 | }
|
---|