#region License Information
/* HeuristicLab
* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Random;
namespace HeuristicLab.Problems.Instances.DataAnalysis {
public class FriedmanRandomFunction : ArtificialRegressionDataDescriptor {
private readonly int nTrainingSamples;
private readonly int nTestSamples;
private readonly int numberOfFeatures;
private readonly double noiseRatio;
private readonly IRandom random;
public override string Name { get { return string.Format("FriedmanRandomFunction-{0:0%} ({1} dim)", noiseRatio, numberOfFeatures); } }
public override string Description {
get {
return "The data are generated using the random function generator described in 'Friedman: Greedy Function Approximation: A Gradient Boosting Machine, 1999'.";
}
}
public FriedmanRandomFunction(int numberOfFeatures, double noiseRatio,
IRandom rand)
: this(500, 5000, numberOfFeatures, noiseRatio, rand) { }
public FriedmanRandomFunction(int nTrainingSamples, int nTestSamples,
int numberOfFeatures, double noiseRatio, IRandom rand) {
this.nTrainingSamples = nTrainingSamples;
this.nTestSamples = nTestSamples;
this.noiseRatio = noiseRatio;
this.random = rand;
this.numberOfFeatures = numberOfFeatures;
}
protected override string TargetVariable { get { return "Y"; } }
protected override string[] VariableNames {
get { return AllowedInputVariables.Concat(new string[] { "Y" }).ToArray(); }
}
protected override string[] AllowedInputVariables {
get {
return Enumerable.Range(1, numberOfFeatures)
.Select(i => string.Format("X{0:000}", i))
.ToArray();
}
}
protected override int TrainingPartitionStart { get { return 0; } }
protected override int TrainingPartitionEnd { get { return nTrainingSamples; } }
protected override int TestPartitionStart { get { return nTrainingSamples; } }
protected override int TestPartitionEnd { get { return nTrainingSamples + nTestSamples; } }
protected override List> GenerateValues() {
List> data = new List>();
var nrand = new NormalDistributedRandom(random, 0, 1);
for (int c = 0; c < numberOfFeatures; c++) {
var datai = Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList();
data.Add(datai);
}
var y = GenerateRandomFunction(random, data);
//var targetSigma = y.StandardDeviation();
//var noisePrng = new NormalDistributedRandom(random, 0, targetSigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio)));
//data.Add(y.Select(t => t + noisePrng.NextDouble()).ToList());
data.Add(ValueGenerator.GenerateNoise(y, random, noiseRatio));
return data;
}
// as described in Greedy Function Approximation paper
private IEnumerable GenerateRandomFunction(IRandom rand, List> xs, int nTerms = 20) {
int nRows = xs.First().Count;
var gz = new List();
for (int i = 0; i < nTerms; i++) {
// alpha ~ U(-1, 1)
double alpha = rand.NextDouble() * 2 - 1;
double r = -Math.Log(1.0 - rand.NextDouble()) * 2.0; // r is exponentially distributed with lambda = 2
int nl = (int)Math.Floor(1.5 + r); // number of selected vars is likely to be between three and four
var selectedVars = xs.Shuffle(random).Take(nl).ToArray();
gz.Add(SampleRandomFunction(random, selectedVars)
.Select(f => alpha * f)
.ToArray());
}
// sum up
return Enumerable.Range(0, nRows)
.Select(r => gz.Sum(gzi => gzi[r]));
}
private IEnumerable SampleRandomFunction(IRandom random, List[] xs) {
int nl = xs.Length;
// mu is generated from same distribution as x
double[] mu = Enumerable.Range(0, nl).Select(_ => random.NextDouble() * 2 - 1).ToArray();
var condNum = 4.0 / 0.01; // as given in the paper for max and min eigen values
// temporarily use different random number generator in alglib
var curRand = alglib.math.rndobject;
alglib.math.rndobject = new System.Random(random.Next());
alglib.spdmatrixrndcond(nl, condNum, out var v);
// restore
alglib.math.rndobject = curRand;
int nRows = xs.First().Count;
var z = new double[nl];
var y = new double[nl];
for (int i = 0; i < nRows; i++) {
for (int j = 0; j < nl; j++) z[j] = xs[j][i] - mu[j];
alglib.rmatrixmv(nl, nl, v, 0, 0, 0, z, 0, ref y, 0);
// dot prod
var s = 0.0;
for (int j = 0; j < nl; j++) s += z[j] * y[j];
yield return s;
}
}
}
}