Changeset 14872


Ignore:
Timestamp:
04/14/17 17:53:30 (7 months ago)
Author:
gkronber
Message:

#2699: made a number of changes mainly to RBF regression model

Location:
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
7 deleted
12 edited
1 moved

Legend:

Unmodified
Added
Removed
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r14870 r14872  
    369369    <Compile Include="Plugin.cs" />
    370370    <Compile Include="Properties\AssemblyInfo.cs" />
    371     <Compile Include="RadialBasisFunctions\Distances\AngularDistance.cs" />
    372     <Compile Include="RadialBasisFunctions\Distances\DistanceBase.cs" />
    373     <Compile Include="RadialBasisFunctions\Distances\EuclidianDistance.cs" />
    374     <Compile Include="RadialBasisFunctions\Distances\ManhattanDistance.cs" />
    375     <Compile Include="RadialBasisFunctions\Interfaces\IDistance.cs" />
    376     <Compile Include="RadialBasisFunctions\Interfaces\IKernelFunction.cs" />
    377371    <Compile Include="RadialBasisFunctions\KernelFunctions\CicularKernel.cs" />
    378372    <Compile Include="RadialBasisFunctions\KernelFunctions\GaussianKernel.cs" />
    379     <Compile Include="RadialBasisFunctions\KernelFunctions\InverseMultiquadraticKernel .cs" />
     373    <Compile Include="RadialBasisFunctions\KernelFunctions\InverseMultiquadraticKernel.cs" />
    380374    <Compile Include="RadialBasisFunctions\KernelFunctions\KernelBase.cs" />
    381375    <Compile Include="RadialBasisFunctions\KernelFunctions\LaplacianKernel.cs" />
     
    383377    <Compile Include="RadialBasisFunctions\KernelFunctions\PolysplineKernel.cs" />
    384378    <Compile Include="RadialBasisFunctions\KernelFunctions\ThinPlatePolysplineKernel.cs" />
    385     <Compile Include="RadialBasisFunctions\MatrixUtilities.cs" />
    386379    <Compile Include="RadialBasisFunctions\RadialBasisFunctionModel.cs" />
    387     <Compile Include="RadialBasisFunctions\RadialBasisFunctionRegressionSolution.cs" />
    388380    <Compile Include="RadialBasisFunctions\RadialBasisRegression.cs" />
    389381    <Compile Include="RandomForest\RandomForestClassificationSolution.cs" />
     
    442434    </BootstrapperPackage>
    443435  </ItemGroup>
     436  <ItemGroup>
     437    <Folder Include="RadialBasisFunctions\Distances\" />
     438    <Folder Include="RadialBasisFunctions\Interfaces\" />
     439  </ItemGroup>
    444440  <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
    445441  <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RadialBasisFunctions/KernelFunctions/CicularKernel.cs

    r14386 r14872  
    3030  [StorableClass]
    3131  [Item("CircularKernel", "A circular kernel function")]
    32   public class CircularKernel<T> : KernelBase<T> {
     32  public class CircularKernel : KernelBase {
    3333
    3434    #region HLConstructors & Boilerplate
     
    3737    [StorableHook(HookType.AfterDeserialization)]
    3838    private void AfterDeserialization() { }
    39     protected CircularKernel(CircularKernel<T> original, Cloner cloner) : base(original, cloner) { }
     39    protected CircularKernel(CircularKernel original, Cloner cloner) : base(original, cloner) { }
    4040    public CircularKernel() {
    41       Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta in the kernelfunction 2*pi*(acos(-d)-d*(1-n²)^(0.5)) where n = ||x-c|| and d = n/beta", new DoubleValue(2)));
     41      Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta in the kernel function 2*pi*(acos(-d)-d*(1-n²)^(0.5)) where n = ||x-c|| and d = n/beta", new DoubleValue(2)));
    4242    }
    4343    public override IDeepCloneable Clone(Cloner cloner) {
    44       return new CircularKernel<T>(this, cloner);
     44      return new CircularKernel(this, cloner);
    4545    }
    4646    #endregion
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RadialBasisFunctions/KernelFunctions/GaussianKernel.cs

    r14386 r14872  
    3030  [StorableClass]
    3131  [Item("GaussianKernel", "A kernel function that uses Gaussian function")]
    32   public class GaussianKernel<T> : KernelBase<T> {
     32  public class GaussianKernel : KernelBase {
    3333
    3434    #region HLConstructors & Boilerplate
     
    3737    [StorableHook(HookType.AfterDeserialization)]
    3838    private void AfterDeserialization() { }
    39     protected GaussianKernel(GaussianKernel<T> original, Cloner cloner) : base(original, cloner) { }
     39    protected GaussianKernel(GaussianKernel original, Cloner cloner) : base(original, cloner) { }
    4040    public GaussianKernel() {
    4141      Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta in the kernelfunction exp(-||x-c||/beta²)", new DoubleValue(2)));
    4242    }
    4343    public override IDeepCloneable Clone(Cloner cloner) {
    44       return new GaussianKernel<T>(this, cloner);
     44      return new GaussianKernel(this, cloner);
    4545    }
    4646    #endregion
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RadialBasisFunctions/KernelFunctions/InverseMultiquadraticKernel.cs

    r14871 r14872  
    3030  [StorableClass]
    3131  [Item("InverseMultiquadraticKernel", "A kernel function that uses the inverse multiquadratic function")]
    32   public class InverseMultiquadraticKernel<T> : KernelBase<T> {
     32  public class InverseMultiquadraticKernel : KernelBase {
    3333    #region HLConstructors & Boilerplate
    3434    [StorableConstructor]
     
    3636    [StorableHook(HookType.AfterDeserialization)]
    3737    private void AfterDeserialization() { }
    38     protected InverseMultiquadraticKernel(InverseMultiquadraticKernel<T> original, Cloner cloner) : base(original, cloner) { }
     38    protected InverseMultiquadraticKernel(InverseMultiquadraticKernel original, Cloner cloner) : base(original, cloner) { }
    3939    public InverseMultiquadraticKernel() {
    40       Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta in the kernelfunction sqrt(1+||x-c||^2/beta)", new DoubleValue(2)));
     40      Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta in the kernel function 1 / sqrt(1+||x-c||^2/beta)", new DoubleValue(2)));
    4141    }
    4242    public override IDeepCloneable Clone(Cloner cloner) {
    43       return new InverseMultiquadraticKernel<T>(this, cloner);
     43      return new InverseMultiquadraticKernel(this, cloner);
    4444    }
    4545    #endregion
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RadialBasisFunctions/KernelFunctions/KernelBase.cs

    r14386 r14872  
    3131namespace HeuristicLab.Algorithms.DataAnalysis {
    3232  [StorableClass]
    33   public abstract class KernelBase<T> : ParameterizedNamedItem, IKernelFunction<T> {
     33  public abstract class KernelBase : ParameterizedNamedItem, ICovarianceFunction {
    3434
    3535    #region Parameternames
     
    3838    #endregion
    3939    #region Parameterproperties
    40     public ValueParameter<IDistance<T>> DistanceParameter
    41     {
    42       get { return Parameters[DistanceParameterName] as ValueParameter<IDistance<T>>; }
     40    public ValueParameter<IDistance> DistanceParameter {
     41      get { return Parameters[DistanceParameterName] as ValueParameter<IDistance>; }
    4342    }
    4443
    45     public IFixedValueParameter<DoubleValue> BetaParameter
    46     {
     44    public IFixedValueParameter<DoubleValue> BetaParameter {
    4745      get { return Parameters[BetaParameterName] as FixedValueParameter<DoubleValue>; }
    4846    }
     
    5048    #endregion
    5149    #region Properties
    52     public IDistance<T> Distance
    53     {
     50    public IDistance Distance {
    5451      get { return DistanceParameter.Value; }
    5552    }
    5653
    57     public double Beta
    58     {
     54    public double Beta {
    5955      get { return BetaParameter.Value.Value; }
    6056    }
     
    6258    #endregion
    6359
    64     #region HLConstructors & Boilerplate
    6560    [StorableConstructor]
    6661    protected KernelBase(bool deserializing) : base(deserializing) { }
     
    6863    private void AfterDeserialization() { }
    6964
    70     protected KernelBase(KernelBase<T> original, Cloner cloner)
     65    protected KernelBase(KernelBase original, Cloner cloner)
    7166      : base(original, cloner) { }
    7267
    7368    protected KernelBase() {
    74       Parameters.Add(new ValueParameter<IDistance<T>>(DistanceParameterName, "The distance function used for kernel calculation"));
    75       DistanceParameter.Value = new EuclidianDistance() as IDistance<T>;
     69      Parameters.Add(new ValueParameter<IDistance>(DistanceParameterName, "The distance function used for kernel calculation"));
     70      DistanceParameter.Value = new EuclideanDistance();
    7671    }
    77     #endregion
    7872
    79     public double Get(T a, T b) {
     73    public double Get(object a, object b) {
    8074      return Get(Distance.Get(a, b));
    8175    }
     
    9387    public ParameterizedCovarianceFunction GetParameterizedCovarianceFunction(double[] p, int[] columnIndices) {
    9488      if (p == null || p.Length != 1) throw new ArgumentException("Illegal parametrization");
    95       var myClone = (KernelBase<T>)Clone(new Cloner());
     89      var myClone = (KernelBase)Clone(new Cloner());
    9690      myClone.BetaParameter.Value.Value = p[0];
    9791      var cov = new ParameterizedCovarianceFunction {
     
    107101    protected double GetNorm(double[,] x, double[,] xt, int i, int j, int[] columnIndices) {
    108102      var dist = Distance as IDistance<IEnumerable<double>>;
    109       if (dist == null) throw new ArgumentException("The Distance needs to apply to double-Vectors");
     103      if (dist == null) throw new ArgumentException("The distance needs to apply to double vectors");
    110104      var r1 = new IndexedEnumerable(x, i, columnIndices);
    111105      var r2 = new IndexedEnumerable(xt, j, columnIndices);
     
    142136      }
    143137
    144       public double Current
    145       {
     138      public double Current {
    146139        get { return data[row, column.Current]; }
    147140      }
    148141
    149       object IEnumerator.Current
    150       {
    151         get
    152         {
     142      object IEnumerator.Current {
     143        get {
    153144          return data[row, column.Current];
    154145        }
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RadialBasisFunctions/KernelFunctions/LaplacianKernel.cs

    r14386 r14872  
    2929namespace HeuristicLab.Algorithms.DataAnalysis {
    3030  [StorableClass]
    31   [Item("LaplacianKernel", "A kernel function that uses an exponential function.\nIt is equvalent to the Gaussiankernel but less suseptible to improper values of beta")]
    32   public class LaplacianKernel<T> : KernelBase<T> {
    33 
    34     #region HLConstructors & Boilerplate
     31  [Item("LaplacianKernel", "A kernel function that uses an exponential function.\nIt is equvalent to the Gaussian kernel but less susceptible to improper values of beta")]
     32  public class LaplacianKernel : KernelBase {
    3533    [StorableConstructor]
    3634    protected LaplacianKernel(bool deserializing) : base(deserializing) { }
    3735    [StorableHook(HookType.AfterDeserialization)]
    3836    private void AfterDeserialization() { }
    39     protected LaplacianKernel(LaplacianKernel<T> original, Cloner cloner) : base(original, cloner) { }
     37    protected LaplacianKernel(LaplacianKernel original, Cloner cloner) : base(original, cloner) { }
    4038    public LaplacianKernel() {
    41       Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta in the kernelfunction exp(-||x-c||/beta)", new DoubleValue(2)));
     39      Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta in the kernel function exp(-||x-c||/beta)", new DoubleValue(2)));
    4240    }
    4341    public override IDeepCloneable Clone(Cloner cloner) {
    44       return new LaplacianKernel<T>(this, cloner);
     42      return new LaplacianKernel(this, cloner);
    4543    }
    46     #endregion
    4744
    4845    protected override double Get(double norm) {
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RadialBasisFunctions/KernelFunctions/MultiquadraticKernel.cs

    r14386 r14872  
    3030  [StorableClass]
    3131  [Item("MultiquadraticKernel", "A kernel function that uses the multiquadratic function")]
    32   public class MultiquadraticKernel<T> : KernelBase<T> {
     32  public class MultiquadraticKernel : KernelBase {
    3333
    3434    #region HLConstructors & Boilerplate
     
    3737    [StorableHook(HookType.AfterDeserialization)]
    3838    private void AfterDeserialization() { }
    39     protected MultiquadraticKernel(MultiquadraticKernel<T> original, Cloner cloner)
     39    protected MultiquadraticKernel(MultiquadraticKernel original, Cloner cloner)
    4040                : base(original, cloner) { }
    4141
    4242    public MultiquadraticKernel() {
    43       Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta in the kernelfunction sqrt(1+||x-c||²/beta)", new DoubleValue(2)));
     43      Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The beta in the kernel function sqrt(1+||x-c||²/beta)", new DoubleValue(2)));
    4444    }
    4545    public override IDeepCloneable Clone(Cloner cloner) {
    46       return new MultiquadraticKernel<T>(this, cloner);
     46      return new MultiquadraticKernel(this, cloner);
    4747    }
    4848    #endregion
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RadialBasisFunctions/KernelFunctions/PolysplineKernel.cs

    r14386 r14872  
    3030  [StorableClass]
    3131  [Item("PolysplineKernel", "A kernel function that uses the multiquadratic function")]
    32   public class PolysplineKernel<T> : KernelBase<T> {
     32  public class PolysplineKernel : KernelBase {
    3333
    3434    #region HLConstructors & Boilerplate
     
    3737    [StorableHook(HookType.AfterDeserialization)]
    3838    private void AfterDeserialization() { }
    39     protected PolysplineKernel(PolysplineKernel<T> original, Cloner cloner)
     39    protected PolysplineKernel(PolysplineKernel original, Cloner cloner)
    4040                : base(original, cloner) { }
    4141    public PolysplineKernel() {
     
    4343    }
    4444    public override IDeepCloneable Clone(Cloner cloner) {
    45       return new PolysplineKernel<T>(this, cloner);
     45      return new PolysplineKernel(this, cloner);
    4646    }
    4747    #endregion
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RadialBasisFunctions/KernelFunctions/ThinPlatePolysplineKernel.cs

    r14386 r14872  
    3030  [StorableClass]
    3131  [Item("ThinPlatePolysplineKernel", "A kernel function that uses the ThinPlatePolyspline function")]
    32   public class ThinPlatePolysplineKernel<T> : KernelBase<T> {
     32  public class ThinPlatePolysplineKernel : KernelBase {
    3333    #region HLConstructors & Boilerplate
    3434    [StorableConstructor]
     
    3636    [StorableHook(HookType.AfterDeserialization)]
    3737    private void AfterDeserialization() { }
    38     protected ThinPlatePolysplineKernel(ThinPlatePolysplineKernel<T> original, Cloner cloner) : base(original, cloner) { }
     38    protected ThinPlatePolysplineKernel(ThinPlatePolysplineKernel original, Cloner cloner) : base(original, cloner) { }
    3939    public ThinPlatePolysplineKernel() {
    4040      Parameters.Add(new FixedValueParameter<DoubleValue>(BetaParameterName, "The Beta in the kernelfunction ||x-c||^(2*Beta)*log(||x-c||^Beta)", new DoubleValue(1)));
    4141    }
    4242    public override IDeepCloneable Clone(Cloner cloner) {
    43       return new ThinPlatePolysplineKernel<T>(this, cloner);
     43      return new ThinPlatePolysplineKernel(this, cloner);
    4444    }
    4545    #endregion
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RadialBasisFunctions/RadialBasisFunctionModel.cs

    r14386 r14872  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    2222using System;
    2323using System.Collections.Generic;
     24using System.Diagnostics;
    2425using System.Linq;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
    27 using HeuristicLab.Data;
    2828using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2929using HeuristicLab.Problems.DataAnalysis;
     
    3131namespace HeuristicLab.Algorithms.DataAnalysis {
    3232  /// <summary>
    33   /// Represents a Radial Basis Function regression model.
     33  /// Represents an RBF regression model.
    3434  /// </summary>
    3535  [StorableClass]
    36   [Item("RBFModel", "Represents a Gaussian process posterior.")]
     36  [Item("RBFModel", "An RBF regression model")]
    3737  public sealed class RadialBasisFunctionModel : RegressionModel, IConfidenceRegressionModel {
    38     public override IEnumerable<string> VariablesUsedForPrediction
    39     {
     38    public override IEnumerable<string> VariablesUsedForPrediction {
    4039      get { return allowedInputVariables; }
    4140    }
    4241
    4342    [Storable]
    44     private string[] allowedInputVariables;
    45     public string[] AllowedInputVariables
    46     {
     43    private readonly string[] allowedInputVariables;
     44    public string[] AllowedInputVariables {
    4745      get { return allowedInputVariables; }
    4846    }
    4947
    5048    [Storable]
    51     private double[] alpha;
    52     [Storable]
    53     private IDataset trainingDataset; // it is better to store the original training dataset completely because this is more efficient in persistence
    54     [Storable]
    55     private int[] trainingRows;
    56     [Storable]
    57     private IKernelFunction<double[]> kernel;
    58     [Storable]
    59     private DoubleMatrix gramInvert;
    60 
     49    private readonly double[] alpha;
     50
     51    [Storable]
     52    private readonly double[,] trainX; // it is better to store the original training dataset completely because this is more efficient in persistence
     53
     54    [Storable]
     55    private readonly ITransformation<double>[] scaling;
     56
     57    [Storable]
     58    private readonly ICovarianceFunction kernel;
     59
     60    private double[,] gramInvert; // not storable as it can be large (recreate after deserialization as required)
     61
     62    [Storable]
     63    private readonly double meanOffset; // implementation works for zero-mean target variables
    6164
    6265    [StorableConstructor]
     
    6568      : base(original, cloner) {
    6669      // shallow copies of arrays because they cannot be modified
    67       trainingRows = original.trainingRows;
    6870      allowedInputVariables = original.allowedInputVariables;
    6971      alpha = original.alpha;
    70       trainingDataset = original.trainingDataset;
    71       kernel = original.kernel;
    72     }
    73     public RadialBasisFunctionModel(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows, IKernelFunction<double[]> kernel)
     72      trainX = original.trainX;
     73      gramInvert = original.gramInvert;
     74      scaling = original.scaling;
     75
     76      meanOffset = original.meanOffset;
     77      if (original.kernel != null)
     78        kernel = cloner.Clone(original.kernel);
     79    }
     80    public RadialBasisFunctionModel(IDataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
     81      bool scaleInputs, ICovarianceFunction kernel)
    7482      : base(targetVariable) {
     83      if (kernel.GetNumberOfParameters(allowedInputVariables.Count()) > 0) throw new ArgumentException("All parameters in the kernel function must be specified.");
    7584      name = ItemName;
    7685      description = ItemDescription;
    7786      this.allowedInputVariables = allowedInputVariables.ToArray();
    78       trainingRows = rows.ToArray();
    79       trainingDataset = dataset;
    80       this.kernel = (IKernelFunction<double[]>)kernel.Clone();
     87      var trainingRows = rows.ToArray();
     88      this.kernel = (ICovarianceFunction)kernel.Clone();
    8189      try {
    82         var data = ExtractData(dataset, trainingRows);
    83         var qualities = dataset.GetDoubleValues(targetVariable, trainingRows).ToArray();
     90        if (scaleInputs)
     91          scaling = CreateScaling(dataset, trainingRows);
     92        trainX = ExtractData(dataset, trainingRows, scaling);
     93        var y = dataset.GetDoubleValues(targetVariable, trainingRows).ToArray();
     94        meanOffset = y.Average();
     95        for (int i = 0; i < y.Length; i++) y[i] -= meanOffset;
    8496        int info;
     97        // TODO: improve efficiency by decomposing matrix once instead of solving the system and then inverting the matrix
    8598        alglib.densesolverlsreport denseSolveRep;
    86         var gr = BuildGramMatrix(data);
    87         alglib.rmatrixsolvels(gr, data.Length + 1, data.Length + 1, qualities.Concat(new[] { 0.0 }).ToArray(), 0.0, out info, out denseSolveRep, out alpha);
    88         if (info != 1) throw new ArgumentException("Could not create Model.");
    89         gramInvert = new DoubleMatrix(gr).Invert();
    90       }
    91       catch (alglib.alglibexception ae) {
     99        gramInvert = BuildGramMatrix(trainX);
     100        int n = trainX.GetLength(0);
     101        alglib.rmatrixsolvels(gramInvert, n, n, y, 0.0, out info, out denseSolveRep, out alpha);
     102        if (info != 1) throw new ArgumentException("Could not create model.");
     103
     104        alglib.matinvreport report;
     105        alglib.rmatrixinverse(ref gramInvert, out info, out report);
     106        if (info != 1) throw new ArgumentException("Could not invert matrix. Is it quadratic symmetric positive definite?");
     107
     108      } catch (alglib.alglibexception ae) {
    92109        // wrap exception so that calling code doesn't have to know about alglib implementation
    93         throw new ArgumentException("There was a problem in the calculation of the RBF process model", ae);
    94       }
    95     }
    96     private double[][] ExtractData(IDataset dataset, IEnumerable<int> rows) {
    97       return rows.Select(r => allowedInputVariables.Select(v => dataset.GetDoubleValue(v, r)).ToArray()).ToArray();
     110        throw new ArgumentException("There was a problem in the calculation of the RBF model", ae);
     111      }
     112    }
     113
     114    private ITransformation<double>[] CreateScaling(IDataset dataset, int[] rows) {
     115      var trans = new ITransformation<double>[allowedInputVariables.Length];
     116      int i = 0;
     117      foreach (var variable in allowedInputVariables) {
     118        var lin = new LinearTransformation(allowedInputVariables);
     119        var max = dataset.GetDoubleValues(variable, rows).Max();
     120        var min = dataset.GetDoubleValues(variable, rows).Min();
     121        lin.Multiplier = 1.0 / (max - min);
     122        lin.Addend = -min / (max - min);
     123        trans[i] = lin;
     124        i++;
     125      }
     126      return trans;
     127    }
     128
     129    private double[,] ExtractData(IDataset dataset, IEnumerable<int> rows, ITransformation<double>[] scaling = null) {
     130      double[][] variables;
     131      if (scaling != null) {
     132        variables =
     133          allowedInputVariables.Select((var, i) => scaling[i].Apply(dataset.GetDoubleValues(var, rows)).ToArray())
     134            .ToArray();
     135      } else {
     136        variables =
     137        allowedInputVariables.Select(var => dataset.GetDoubleValues(var, rows).ToArray()).ToArray();
     138      }
     139      int n = variables.First().Length;
     140      var res = new double[n, variables.Length];
     141      for (int r = 0; r < n; r++)
     142        for (int c = 0; c < variables.Length; c++) {
     143          res[r, c] = variables[c][r];
     144        }
     145      return res;
    98146    }
    99147
     
    104152    #region IRegressionModel Members
    105153    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    106       var solutions = ExtractData(dataset, rows);
    107       var data = ExtractData(trainingDataset, trainingRows);
    108       return solutions.Select(solution => alpha.Zip(data, (a, d) => a * kernel.Get(solution, d)).Sum() + 1 * alpha[alpha.Length - 1]).ToArray();
     154      var newX = ExtractData(dataset, rows, scaling);
     155      var dim = newX.GetLength(1);
     156      var cov = kernel.GetParameterizedCovarianceFunction(new double[0], Enumerable.Range(0, dim).ToArray());
     157
     158      var pred = new double[newX.GetLength(0)];
     159      for (int i = 0; i < pred.Length; i++) {
     160        double sum = meanOffset;
     161        for (int j = 0; j < alpha.Length; j++) {
     162          sum += alpha[j] * cov.CrossCovariance(trainX, newX, j, i);
     163        }
     164        pred[i] = sum;
     165      }
     166      return pred;
    109167    }
    110168    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
    111       return new RadialBasisFunctionRegressionSolution(this, new RegressionProblemData(problemData));
     169      return new ConfidenceRegressionSolution(this, new RegressionProblemData(problemData));
    112170    }
    113171    #endregion
    114172
    115173    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
    116       var data = ExtractData(trainingDataset, trainingRows);
    117       return ExtractData(dataset, rows).Select(x => GetVariance(x, data));
     174      if (gramInvert == null) CreateAndInvertGramMatrix();
     175      int n = gramInvert.GetLength(0);
     176      var newData = ExtractData(dataset, rows, scaling);
     177      var dim = newData.GetLength(1);
     178      var cov = kernel.GetParameterizedCovarianceFunction(new double[0], Enumerable.Range(0, dim).ToArray());
     179
     180      // TODO perf (matrix matrix multiplication)
     181      for (int i = 0; i < newData.GetLength(0); i++) {
     182        double[] p = new double[n];
     183
     184        for (int j = 0; j < trainX.GetLength(0); j++) {
     185          p[j] = cov.CrossCovariance(trainX, newData, j, i);
     186        }
     187
     188        var Ap = new double[n];
     189        alglib.ablas.rmatrixmv(n, n, gramInvert, 0, 0, 0, p, 0, ref Ap, 0);
     190        var res = 0.0;
     191        // dot product
     192        for (int j = 0; j < p.Length; j++) res += p[j] * Ap[j];
     193        yield return res > 0 ? res : 0;
     194      }
    118195    }
    119196    public double LeaveOneOutCrossValidationRootMeanSquaredError() {
    120       return Math.Sqrt(alpha.Select((t, i) => t / gramInvert[i, i]).Sum(d => d * d) / gramInvert.Rows);
    121     }
    122 
     197      if (gramInvert == null) CreateAndInvertGramMatrix();
     198      var n = gramInvert.GetLength(0);
     199      var s = 1.0 / n;
     200
     201      var sum = 0.0;
     202      for (int i = 0; i < alpha.Length; i++) {
     203        var x = alpha[i] / gramInvert[i, i];
     204        sum += x * x;
     205      }
     206      sum *= s;
     207      return Math.Sqrt(sum);
     208    }
     209
     210    private void CreateAndInvertGramMatrix() {
     211      try {
     212        gramInvert = BuildGramMatrix(trainX);
     213        int info = 0;
     214        alglib.matinvreport report;
     215        alglib.rmatrixinverse(ref gramInvert, out info, out report);
     216        if (info != 1)
     217          throw new ArgumentException("Could not invert matrix. Is it quadratic symmetric positive definite?");
     218      } catch (alglib.alglibexception) {
     219        // wrap exception so that calling code doesn't have to know about alglib implementation
     220        throw new ArgumentException("Could not invert matrix. Is it quadratic symmetric positive definite?");
     221      }
     222    }
    123223    #region helpers
    124     private double[,] BuildGramMatrix(double[][] data) {
    125       var size = data.Length + 1;
    126       var gram = new double[size, size];
    127       for (var i = 0; i < size; i++)
    128         for (var j = i; j < size; j++) {
    129           if (j == size - 1 && i == size - 1) gram[i, j] = 0;
    130           else if (j == size - 1 || i == size - 1) gram[j, i] = gram[i, j] = 1;
    131           else gram[j, i] = gram[i, j] = kernel.Get(data[i], data[j]); //symmteric Matrix --> half of the work
     224    private double[,] BuildGramMatrix(double[,] data) {
     225      var n = data.GetLength(0);
     226      var dim = data.GetLength(1);
     227      var cov = kernel.GetParameterizedCovarianceFunction(new double[0], Enumerable.Range(0, dim).ToArray());
     228      var gram = new double[n, n];
     229      for (var i = 0; i < n; i++)
     230        for (var j = i; j < n; j++) {
     231          gram[j, i] = gram[i, j] = cov.Covariance(data, i, j); // symmetric matrix --> half of the work
    132232        }
    133233      return gram;
    134234    }
    135     private double GetVariance(double[] solution, IEnumerable<double[]> data) {
    136       var phiT = data.Select(x => kernel.Get(x, solution)).Concat(new[] { 1.0 }).ToColumnVector();
    137       var res = phiT.Transpose().Mul(gramInvert.Mul(phiT))[0, 0];
    138       return res > 0 ? res : 0;
    139     }
     235
    140236    #endregion
    141237  }
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RadialBasisFunctions/RadialBasisRegression.cs

    r14386 r14872  
    2222using System;
    2323using System.Linq;
     24using System.Threading;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
     
    3435  /// Linear regression data analysis algorithm.
    3536  /// </summary>
    36   [Item("Radial Basis Function Regression (RBF-R)", "Radial basis function regression data analysis algorithm (uses for ALGLIB).")]
     37  [Item("Radial Basis Function Regression (RBF-R)", "Radial basis function regression data analysis algorithm.")]
    3738  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 100)]
    3839  [StorableClass]
    39   public sealed class RadialBasisRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    40     private const string RadialBasisRegressionModelResultName = "RBF regression solution";
     40  public sealed class RadialBasisRegression : BasicAlgorithm {
     41    private const string RBFRegressionSolutionResultName = "RBF regression solution";
    4142
     43    public override bool SupportsPause {
     44      get { return false; }
     45    }
     46    public override Type ProblemType {
     47      get { return typeof(IRegressionProblem); }
     48    }
     49    public new IRegressionProblem Problem {
     50      get { return (IRegressionProblem)base.Problem; }
     51      set { base.Problem = value; }
     52    }
    4253
    43     #region Parameternames
    44     private const string Kernelname = "Kernel";
     54    #region parameter names
     55    private const string KernelParameterName = "Kernel";
     56    private const string ScaleInputVariablesParameterName = "ScaleInputVariables";
    4557    #endregion
    4658
    47     #region Paramterproperties
    48     public ValueParameter<IKernelFunction<double[]>> KernelParameter
    49     {
    50       get { return Parameters[Kernelname] as ValueParameter<IKernelFunction<double[]>>; }
     59    #region parameter properties
     60    public ValueParameter<ICovarianceFunction> KernelParameter {
     61      get { return (ValueParameter<ICovarianceFunction>)Parameters[KernelParameterName]; }
     62    }
     63
     64    public IFixedValueParameter<BoolValue> ScaleInputVariablesParameter {
     65      get { return (IFixedValueParameter<BoolValue>)Parameters[ScaleInputVariablesParameterName]; }
    5166    }
    5267    #endregion
    5368
    54     #region Properties
    55     public IKernelFunction<double[]> Kernel
    56     {
     69    #region properties
     70    public ICovarianceFunction Kernel {
    5771      get { return KernelParameter.Value; }
     72    }
     73
     74    public bool ScaleInputVariables {
     75      get { return ScaleInputVariablesParameter.Value.Value; }
     76      set { ScaleInputVariablesParameter.Value.Value = value; }
    5877    }
    5978
     
    6786    public RadialBasisRegression() {
    6887      Problem = new RegressionProblem();
    69       Parameters.Add(new ValueParameter<IKernelFunction<double[]>>(Kernelname, "The radial basis function"));
    70       var kernel = new PolysplineKernel<double[]>();
     88      Parameters.Add(new ValueParameter<ICovarianceFunction>(KernelParameterName, "The radial basis function"));
     89      Parameters.Add(new FixedValueParameter<BoolValue>(ScaleInputVariablesParameterName, "Set to true if the input variables should be scaled to the interval [0..1]", new BoolValue(true)));
     90      var kernel = new GaussianKernel();
    7191      KernelParameter.Value = kernel;
    72       kernel.BetaParameter.Value.Value = 1;
    7392    }
    7493    [StorableHook(HookType.AfterDeserialization)]
     
    7998    }
    8099
    81     #region regression
    82     protected override void Run() {
    83       double loocvrmse, cvRmsError;
    84       var solution = CreateRadialBasisRegressionSolution(Problem.ProblemData, Kernel, out loocvrmse, out cvRmsError);
    85       Results.Add(new Result(RadialBasisRegressionModelResultName, "The RBF regression solution.", solution));
    86       Results.Add(new Result("LOOCVRMSE", "The root of the mean of squared errors of a leave-one-out-cross-validation on the trainingsset (This is not the RSME on the trainingset)", new DoubleValue(loocvrmse)));
    87       Results.Add(new Result("Estimated root mean square error (cross-validation)", "The estimated root of the mean of squared errors of the linear regression solution via cross validation.", new DoubleValue(cvRmsError)));
     100    protected override void Run(CancellationToken cancellationToken) {
     101      double loocvrmse, rmsError;
     102      var solution = CreateRadialBasisRegressionSolution(Problem.ProblemData, Kernel, ScaleInputVariables, out loocvrmse, out rmsError);
     103      Results.Add(new Result(RBFRegressionSolutionResultName, "The RBF regression solution.", solution));
     104      Results.Add(new Result("LOOCVRMSE", "The root mean squared error of a leave-one-out-cross-validation on the training set", new DoubleValue(loocvrmse)));
     105      Results.Add(new Result("RMSE (test)", "The root mean squared error of the solution on the test set.", new DoubleValue(rmsError)));
    88106    }
    89107
    90     public static IConfidenceRegressionSolution CreateRadialBasisRegressionSolution(IRegressionProblemData problemData, IKernelFunction<double[]> kernel, out double loocvRmsError, out double cvRmsError) {
    91       var model = new RadialBasisFunctionModel(problemData.Dataset, problemData.TargetVariable, problemData.AllowedInputVariables, problemData.TrainingIndices, kernel);
     108    public static IRegressionSolution CreateRadialBasisRegressionSolution(IRegressionProblemData problemData, ICovarianceFunction kernel, bool scaleInputs, out double loocvRmsError, out double rmsError) {
     109      var model = new RadialBasisFunctionModel(problemData.Dataset, problemData.TargetVariable, problemData.AllowedInputVariables, problemData.TrainingIndices, scaleInputs, kernel);
    92110      loocvRmsError = model.LeaveOneOutCrossValidationRootMeanSquaredError();
    93       cvRmsError = Math.Sqrt(model.GetEstimatedValues(problemData.Dataset, problemData.TestIndices)
     111      rmsError = Math.Sqrt(model.GetEstimatedValues(problemData.Dataset, problemData.TestIndices)
    94112        .Zip(problemData.TargetVariableTestValues, (a, b) => (a - b) * (a - b))
    95         .Sum());
    96       var solution = (RadialBasisFunctionRegressionSolution)model.CreateRegressionSolution((IRegressionProblemData)problemData.Clone());
    97       solution.Model.Name = "Radial Basis Regression Model";
    98       solution.Name = "Radial Basis Regression Solution";
     113        .Average());
     114      var solution = model.CreateRegressionSolution((IRegressionProblemData)problemData.Clone());
     115      solution.Model.Name = "RBF Regression Model";
     116      solution.Name = "RBF Regression Solution";
    99117      return solution;
    100118    }
    101     #endregion
    102 
    103     #region helpers
    104     #endregion
    105119  }
    106120}
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Distances/DistanceBase.cs

    r14767 r14872  
    1919 */
    2020#endregion
    21 
     21using System.Collections;
    2222using System.Collections.Generic;
    2323using HeuristicLab.Common;
     
    4242    }
    4343
    44     private class DistanceComparer : IComparer<T> {
     44    private class DistanceComparer : IComparer<T>, IComparer {
    4545      private readonly T item;
    4646      private readonly IDistance<T> dist;
     
    5454        return dist.Get(x, item).CompareTo(dist.Get(y, item));
    5555      }
     56
     57      public int Compare(object x, object y) {
     58        return Compare((T)x, (T)y);
     59      }
     60    }
     61
     62    public double Get(object x, object y) {
     63      return Get((T)x, (T)y);
     64    }
     65
     66    public IComparer GetDistanceComparer(object item) {
     67      return new DistanceComparer((T)item, this);
    5668    }
    5769  }
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/Interfaces/IDistance.cs

    r14767 r14872  
    1919 */
    2020#endregion
    21 
     21using System.Collections;
    2222using System.Collections.Generic;
    2323using HeuristicLab.Core;
    2424
    2525namespace HeuristicLab.Algorithms.DataAnalysis {
    26   public interface IDistance<in T> : IItem {
     26  public interface IDistance<in T> : IDistance {
    2727    /// <summary>
    2828    /// Calculates a distance measure between two objects.
     
    4141    IComparer<T> GetDistanceComparer(T item);
    4242  }
     43
     44  public interface IDistance : IItem {
     45    double Get(object x, object y);
     46    IComparer GetDistanceComparer(object item);
     47  }
    4348}
Note: See TracChangeset for help on using the changeset viewer.