using System; using System.Linq; using HeuristicLab.Core; using HeuristicLab.Problems.DataAnalysis; namespace RegressionProblemInstances { // This is a general descriptor for regression problem instances which produces data for a given target expression public class RegressionProblemInstanceDescriptor : IRegressionProblemInstanceDescriptor { private IRandom rand; private int dim; private Func func; private int numRows; public string Name { get; } public string Description { get; } public RegressionProblemInstanceDescriptor(string name, string desc, IRandom rand, int numRows, int dim, Func func) { this.Name = name; this.Description = desc; this.rand = rand; this.numRows = numRows; this.dim = dim; this.func = func; } public IRegressionProblemData GenerateData() { var inputs = Enumerable.Range(1, dim).Select(idx => "x" + idx); var target = "y"; // generate data var x = new double[dim]; // for evaluating the expression var data = new double[numRows, dim + 1]; int i = 0; while (i < numRows) { for (int j = 0; j < dim; j++) { // we use the supplied PRND to generate x independently and identically distributed // the PRND could use any kind of probability distribution x[j] = rand.NextDouble(); data[i, j] = x[j]; } data[i, dim] = func(x); // y = f(x) // only accept reasonable values if (!double.IsInfinity(data[i, dim]) && !double.IsNaN(data[i, dim])) i++; } var ds = new Dataset(inputs.Concat(new string[] { target }), data); ds.Name = Name; var probData = new RegressionProblemData(ds, inputs, target); probData.Name = Name; return probData; } } }