source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg/SymbolicRegressionProblem.cs @ 11742

Last change on this file since 11742 was 11742, checked in by gkronber, 7 years ago

#2283 refactoring

File size: 2.9 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Security;
5using System.Security.AccessControl;
6using System.Text;
7using HeuristicLab.Common;
8using HeuristicLab.Problems.DataAnalysis;
9using HeuristicLab.Problems.Instances;
10using HeuristicLab.Problems.Instances.DataAnalysis;
11
12namespace HeuristicLab.Problems.GrammaticalOptimization.SymbReg {
13  // provides bridge to HL regression problem instances
14  public class SymbolicRegressionProblem : IProblem {
15    private const string grammarString = @"
16        G(E):
17        E -> V | V+E | V-E | V*E | (E)
18        V -> <variables>
19        ";
20
21
22    private readonly IGrammar grammar;
23    private readonly ExpressionInterpreter interpreter;
24
25    private readonly int N;
26    private readonly double[][] x;
27    private readonly double[] y;
28    private readonly int d;
29
30
31    public SymbolicRegressionProblem(string partOfName) {
32      var instanceProvider = new RegressionRealWorldInstanceProvider();
33      var dds = instanceProvider.GetDataDescriptors().OfType<RegressionDataDescriptor>();
34
35      var problemData = instanceProvider.LoadData(dds.Single(ds => ds.Name.Contains(partOfName)));
36
37      this.N = problemData.TrainingIndices.Count();
38      this.d = problemData.AllowedInputVariables.Count();
39      if (d > 26) throw new NotSupportedException(); // we only allow single-character terminal symbols so far
40      this.x = new double[N][];
41      this.y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
42
43      int i = 0;
44      foreach (var r in problemData.TrainingIndices) {
45        x[i] = new double[d];
46        int j = 0;
47        foreach (var inputVariable in problemData.AllowedInputVariables) {
48          x[i][j++] = problemData.Dataset.GetDoubleValue(inputVariable, r);
49        }
50        i++;
51      }
52
53      char firstVar = 'a';
54      char lastVar = Convert.ToChar(Convert.ToByte('a') + d - 1);
55      this.grammar = new Grammar(grammarString.Replace("<variables>", firstVar + " .. " + lastVar));
56      this.interpreter = new ExpressionInterpreter();
57
58    }
59
60
61    public double BestKnownQuality(int maxLen) {
62      // for now only an upper bound is returned, ideally we have an R² of 1.0
63      return 1.0;
64    }
65
66    public IGrammar Grammar {
67      get { return grammar; }
68    }
69
70    public double Evaluate(string sentence) {
71      return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i])));
72    }
73
74
75    // right now only + and * is supported
76    public string CanonicalRepresentation(string terminalPhrase) {
77      return terminalPhrase;
78      //var terms = terminalPhrase.Split('+');
79      //return string.Join("+", terms.Select(term => string.Join("", term.Replace("*", "").OrderBy(ch => ch)))
80      //  .OrderBy(term => term));
81    }
82  }
83}
Note: See TracBrowser for help on using the repository browser.