using System; using System.Collections.Generic; using System.Linq; using System.Security; using System.Security.AccessControl; using System.Text; using HeuristicLab.Common; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.Instances; using HeuristicLab.Problems.Instances.DataAnalysis; namespace HeuristicLab.Problems.GrammaticalOptimization.SymbReg { // provides bridge to HL regression problem instances public class SymbolicRegressionProblem : IProblem { private const string grammarString = @" G(E): E -> V | V+E | V-E | V*E | V/E | (E) V -> "; private readonly IGrammar grammar; private readonly ExpressionInterpreter interpreter; private readonly int N; private readonly double[][] x; private readonly double[] y; private readonly int d; public SymbolicRegressionProblem(string partOfName) { var instanceProvider = new RegressionRealWorldInstanceProvider(); var dds = instanceProvider.GetDataDescriptors().OfType(); var problemData = instanceProvider.LoadData(dds.Single(ds => ds.Name.Contains(partOfName))); this.N = problemData.TrainingIndices.Count(); this.d = problemData.AllowedInputVariables.Count(); if (d > 26) throw new NotSupportedException(); // we only allow single-character terminal symbols so far this.x = new double[N][]; this.y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); int i = 0; foreach (var r in problemData.TrainingIndices) { x[i] = new double[d]; int j = 0; foreach (var inputVariable in problemData.AllowedInputVariables) { x[i][j++] = problemData.Dataset.GetDoubleValue(inputVariable, r); } i++; } char firstVar = 'a'; char lastVar = Convert.ToChar(Convert.ToByte('a') + d - 1); this.grammar = new Grammar(grammarString.Replace("", firstVar + " .. " + lastVar)); this.interpreter = new ExpressionInterpreter(); } public double BestKnownQuality(int maxLen) { // for now only an upper bound is returned, ideally we have an R² of 1.0 return 1.0; } public IGrammar Grammar { get { return grammar; } } public double Evaluate(string sentence) { return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i]))); } // right now only + and * is supported public string CanonicalRepresentation(string terminalPhrase) { //return terminalPhrase; var terms = terminalPhrase.Split('+'); return string.Join("+", terms.Select(term => string.Join("", term.Replace("*", "").OrderBy(ch => ch))) .OrderBy(term => term)); } } }