Changeset 15967


Ignore:
Timestamp:
06/20/18 09:53:28 (11 months ago)
Author:
bwerth
Message:

#2847 added logistic dampening and some minor changes

Location:
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
1 added
8 edited

Legend:

Unmodified
Added
Removed
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r15830 r15967  
    372372    <Compile Include="M5Regression\Interfaces\ISplitter.cs" />
    373373    <Compile Include="M5Regression\LeafModels\ComponentReducedLinearModel.cs" />
    374     <Compile Include="M5Regression\LeafModels\DampenedLinearModel.cs" />
     374    <Compile Include="M5Regression\LeafModels\DampenedModel.cs" />
    375375    <Compile Include="M5Regression\LeafModels\PreconstructedLinearModel.cs" />
    376376    <Compile Include="M5Regression\LeafTypes\ComplexLeaf.cs" />
     
    380380    <Compile Include="M5Regression\LeafTypes\GaussianProcessLeaf.cs" />
    381381    <Compile Include="M5Regression\LeafTypes\LinearLeaf.cs" />
    382     <Compile Include="M5Regression\LeafTypes\LogisticLeaf.cs" />
    383382    <Compile Include="M5Regression\LeafTypes\M5Leaf.cs" />
    384383    <Compile Include="M5Regression\LeafTypes\M5regLeaf.cs" />
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafModels/ComponentReducedLinearModel.cs

    r15830 r15967  
    2828namespace HeuristicLab.Algorithms.DataAnalysis {
    2929  [StorableClass]
    30   public class ComponentReducedLinearModel : RegressionModel, IConfidenceRegressionModel {
     30  public class ComponentReducedLinearModel : RegressionModel {
    3131    [Storable]
    32     private IConfidenceRegressionModel Model;
     32    private IRegressionModel Model;
    3333    [Storable]
    3434    private PrincipleComponentTransformation Pca;
     
    4040      Pca = cloner.Clone(original.Pca);
    4141    }
    42     public ComponentReducedLinearModel(string targetVariable, IConfidenceRegressionModel model, PrincipleComponentTransformation pca) : base(targetVariable) {
     42    public ComponentReducedLinearModel(string targetVariable, IRegressionModel model, PrincipleComponentTransformation pca) : base(targetVariable) {
    4343      Model = model;
    4444      Pca = pca;
     
    5656    }
    5757    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
    58       return new ConfidenceRegressionSolution(this, problemData);
    59     }
    60     public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
    61       var data = ReduceDataset(dataset, rows.ToArray());
    62       return Model.GetEstimatedVariances(Pca.TransformDataset(data), Enumerable.Range(0, data.Rows));
     58      return new RegressionSolution(this, problemData);
    6359    }
    6460
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafModels/PreconstructedLinearModel.cs

    r15833 r15967  
    3131  //mulitdimensional extension of http://www2.stat.duke.edu/~tjl13/s101/slides/unit6lec3H.pdf
    3232  [StorableClass]
    33   internal sealed class PreconstructedLinearModel : RegressionModel, IConfidenceRegressionModel {
     33  internal sealed class PreconstructedLinearModel : RegressionModel {
    3434    [Storable]
    3535    public Dictionary<string, double> Coefficients { get; private set; }
    3636    [Storable]
    3737    public double Intercept { get; private set; }
    38     [Storable]
    39     private Dictionary<string, double> Means { get; set; }
    40     [Storable]
    41     private Dictionary<string, double> Variances { get; set; }
    42     [Storable]
    43     private double ResidualVariance { get; set; }
    44     [Storable]
    45     private int SampleSize { get; set; }
    4638
    4739    public override IEnumerable<string> VariablesUsedForPrediction {
     
    5446      if (original.Coefficients != null) Coefficients = original.Coefficients.ToDictionary(x => x.Key, x => x.Value);
    5547      Intercept = original.Intercept;
    56       if (original.Means != null) Means = original.Means.ToDictionary(x => x.Key, x => x.Value);
    57       if (original.Variances != null) Variances = original.Variances.ToDictionary(x => x.Key, x => x.Value);
    58       ResidualVariance = original.ResidualVariance;
    59       SampleSize = original.SampleSize;
    6048    }
    61     public PreconstructedLinearModel(Dictionary<string, double> means, Dictionary<string, double> variances, Dictionary<string, double> coefficients, double intercept, string targetvariable, double residualVariance = 0, double sampleSize = 0) : base(targetvariable) {
     49    public PreconstructedLinearModel(Dictionary<string, double> coefficients, double intercept, string targetvariable) : base(targetvariable) {
    6250      Coefficients = coefficients;
    6351      Intercept = intercept;
    64       Variances = variances;
    65       Means = means;
    66       ResidualVariance = 0;
    67       SampleSize = 0;
    6852    }
    6953    public PreconstructedLinearModel(double intercept, string targetvariable) : base(targetvariable) {
    7054      Coefficients = new Dictionary<string, double>();
    7155      Intercept = intercept;
    72       Variances = new Dictionary<string, double>();
    73       ResidualVariance = 0;
    74       SampleSize = 0;
    7556    }
    7657    public override IDeepCloneable Clone(Cloner cloner) {
     
    7960    #endregion
    8061
    81     public static PreconstructedLinearModel CreateConfidenceLinearModel(IRegressionProblemData pd, out double rmse, out double cvRmse) {
    82       rmse = double.NaN;
    83       cvRmse = double.NaN;
    84       return AlternativeCalculation(pd);
     62    public static PreconstructedLinearModel CreateLinearModel(IRegressionProblemData pd, out double rmse) {
     63      return AlternativeCalculation(pd, out rmse);
    8564    }
    8665
    87     private static PreconstructedLinearModel ClassicCalculation(IRegressionProblemData pd, out double rmse, out double cvRmse) {
     66    private static PreconstructedLinearModel ClassicCalculation(IRegressionProblemData pd) {
    8867      var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] {
    8968        pd.TargetVariable
     
    9877      alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), nFeatures, out retVal, out lm, out ar);
    9978      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
    100       rmse = ar.rmserror;
    101       cvRmse = ar.cvrmserror;
    10279
    10380      alglib.lrunpack(lm, out coefficients, out nFeatures);
    104 
    105 
    106       var means = pd.AllowedInputVariables.ToDictionary(n => n, n => pd.Dataset.GetDoubleValues(n).Average());
    107       var variances = pd.AllowedInputVariables.ToDictionary(n => n, n => pd.Dataset.GetDoubleValues(n).Variance());
    10881      var coeffs = pd.AllowedInputVariables.Zip(coefficients, (s, d) => new {s, d}).ToDictionary(x => x.s, x => x.d);
    109       var res = new PreconstructedLinearModel(means, variances, coeffs, coefficients[nFeatures], pd.TargetVariable);
    110 
    111       res.ResidualVariance = pd.TargetVariableValues.Zip(res.GetEstimatedValues(pd.Dataset, pd.TrainingIndices), (x, y) => x - y).Variance();
    112       res.SampleSize = pd.TrainingIndices.Count();
     82      var res = new PreconstructedLinearModel(coeffs, coefficients[nFeatures], pd.TargetVariable);
    11383      return res;
    11484    }
    11585
    116     private static PreconstructedLinearModel AlternativeCalculation(IRegressionProblemData pd) {
    117       var means = pd.AllowedInputVariables.ToDictionary(n1 => n1, n1 => pd.Dataset.GetDoubleValues(n1).Average());
    118       var variances = pd.AllowedInputVariables.ToDictionary(n1 => n1, n1 => pd.Dataset.GetDoubleValues(n1).Variance());
    119       var cmean = pd.TargetVariableTrainingValues.Average();
     86    private static PreconstructedLinearModel AlternativeCalculation(IRegressionProblemData pd, out double rmse) {
    12087      var variables = pd.AllowedInputVariables.ToList();
    12188      var n = variables.Count;
    12289      var m = pd.TrainingIndices.Count();
    12390
    124       //Set up X^T and y
     91      //Set up X^T
    12592      var inTr = new double[n + 1, m];
    12693      for (var i = 0; i < n; i++) {
    127         var v = variables[i];
    128         var vdata = pd.Dataset.GetDoubleValues(v, pd.TrainingIndices).ToArray();
     94        var vdata = pd.Dataset.GetDoubleValues(variables[i], pd.TrainingIndices).ToArray();
    12995        for (var j = 0; j < m; j++) inTr[i, j] = vdata[j];
    13096      }
    131 
    13297      for (var i = 0; i < m; i++) inTr[n, i] = 1;
    13398
     99      //Set up y
    134100      var y = new double[m, 1];
    135101      var ydata = pd.TargetVariableTrainingValues.ToArray();
     
    138104      //Perform linear regression
    139105      var aTy = new double[n + 1, 1];
    140       alglib.rmatrixgemm(n + 1, 1, m, 1, inTr, 0, 0, 0, y, 0, 0, 0, 0, ref aTy, 0, 0); //aTy = inTr * y;
    141106      var aTa = new double[n + 1, n + 1];
    142       alglib.rmatrixgemm(n + 1, n + 1, m, 1, inTr, 0, 0, 0, inTr, 0, 0, 1, 0, ref aTa, 0, 0); //aTa = inTr * t(inTr) +aTa //
    143       alglib.spdmatrixcholesky(ref aTa, n + 1, true);
     107      var aTyVector = new double[n + 1];
    144108      int info;
    145109      alglib.densesolverreport report;
    146110      double[] coefficients;
    147       var aTyVector = new double[n + 1];
     111
     112      //Perform linear regression
     113      alglib.rmatrixgemm(n + 1, 1, m, 1, inTr, 0, 0, 0, y, 0, 0, 0, 0, ref aTy, 0, 0); //aTy = inTr * y;
     114      alglib.rmatrixgemm(n + 1, n + 1, m, 1, inTr, 0, 0, 0, inTr, 0, 0, 1, 0, ref aTa, 0, 0); //aTa = inTr * t(inTr) +aTa //
     115      alglib.spdmatrixcholesky(ref aTa, n + 1, true);
    148116      for (var i = 0; i < n + 1; i++) aTyVector[i] = aTy[i, 0];
    149117      alglib.spdmatrixcholeskysolve(aTa, n + 1, true, aTyVector, out info, out report, out coefficients);
    150       double rmse, cvrmse;
    151       if (info != 1) return ClassicCalculation(pd, out rmse, out cvrmse);
    152118
    153       //extract coefficients
    154       var intercept = coefficients[n];
    155       var coeffs = new Dictionary<string, double>();
    156       for (var i = 0; i < n; i++) coeffs.Add(variables[i], coefficients[i]);
     119      //if cholesky calculation fails fall bakc to classic linear regresseion
     120      if (info != 1) {
     121        alglib.linearmodel lm;
     122        alglib.lrreport ar;
     123        int retVal;
     124        var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] {
     125          pd.TargetVariable
     126        }), pd.AllIndices);
     127        alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), n, out retVal, out lm, out ar);
     128        if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
     129        alglib.lrunpack(lm, out coefficients, out n);
     130      }
    157131
    158       return new PreconstructedLinearModel(means, variances, coeffs, intercept, pd.TargetVariable);
     132      var coeffs = Enumerable.Range(0, n).ToDictionary(i => variables[i], i => coefficients[i]);
     133      var model = new PreconstructedLinearModel(coeffs, coefficients[n], pd.TargetVariable);
     134      rmse = pd.TrainingIndices.Select(i => pd.Dataset.GetDoubleValue(pd.TargetVariable, i) - model.GetEstimatedValue(pd.Dataset, i)).Sum(r => r * r) / m;
     135      rmse = Math.Sqrt(rmse);
     136      return model;
    159137    }
    160138
     
    167145    }
    168146
    169     public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
    170       return rows.Select(i => GetEstimatedVariance(dataset, i));
    171     }
    172 
    173147    #region helpers
    174148    private double GetEstimatedValue(IDataset dataset, int row) {
    175149      return Intercept + (Coefficients.Count == 0 ? 0 : Coefficients.Sum(s => s.Value * dataset.GetDoubleValue(s.Key, row)));
    176150    }
    177     private double GetEstimatedVariance(IDataset dataset, int row) {
    178       if (SampleSize == 0) return 0.0;
    179       var sum = (from var in Variances let d = dataset.GetDoubleValue(var.Key, row) - Means[var.Key] select d * d / var.Value).Sum();
    180       var res = ResidualVariance * (SampleSize - 1) / (SampleSize - 2) * (1.0 / SampleSize + sum / (SampleSize - 1));
    181       if (double.IsInfinity(res) || double.IsNaN(res)) return 0.0;
    182       return Math.Sqrt(res);
    183     }
    184151    #endregion
    185152  }
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComponentReductionLinearLeaf.cs

    r15830 r15967  
    5757    #region IModelType
    5858    public override bool ProvidesConfidence {
    59       get { return true; }
     59      get { return false; }
    6060    }
    6161    public override IRegressionModel Build(IRegressionProblemData pd, IRandom random,
     
    7171        foreach (var v in pd2.InputVariables.CheckedItems.ToArray())
    7272          pd2.InputVariables.SetItemCheckedState(v.Value, inputs.Contains(v.Value.Value));
    73         double cvRmse;
    7473        double rmse;
    75         var model = PreconstructedLinearModel.CreateConfidenceLinearModel(pd2, out rmse, out cvRmse);
    76         if (cvRmse > bestCvrmse) continue;
     74        var model = PreconstructedLinearModel.CreateLinearModel(pd2, out rmse);
     75        if (rmse > bestCvrmse) continue;
    7776        bestModel = new ComponentReducedLinearModel(pd2.TargetVariable, model, pca);
    7877        noParameters = i + 1;
    79         bestCvrmse = cvRmse;
     78        bestCvrmse = rmse;
    8079      }
    8180      return bestModel;
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LeafBase.cs

    r15830 r15967  
    2626using HeuristicLab.Common;
    2727using HeuristicLab.Core;
     28using HeuristicLab.Data;
     29using HeuristicLab.Parameters;
    2830using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2931using HeuristicLab.Problems.DataAnalysis;
     
    3335  public abstract class LeafBase : ParameterizedNamedItem, ILeafModel {
    3436    public const string LeafBuildingStateVariableName = "LeafBuildingState";
     37    public const string UseDampeningParameterName = "UseDampening";
     38    private const string DampeningParameterName = "DampeningStrenght";
     39
     40    public IFixedValueParameter<DoubleValue> DampeningParameter {
     41      get { return Parameters[DampeningParameterName] as IFixedValueParameter<DoubleValue>; }
     42    }
     43    public IFixedValueParameter<BoolValue> UseDampeningParameter {
     44      get { return (IFixedValueParameter<BoolValue>)Parameters[UseDampeningParameterName]; }
     45    }
     46
     47    public bool UseDampening {
     48      get { return UseDampeningParameter.Value.Value; }
     49    }
     50    public double Dampening {
     51      get { return DampeningParameter.Value.Value; }
     52    }
    3553
    3654    #region Constructors & Cloning
     
    3856    protected LeafBase(bool deserializing) : base(deserializing) { }
    3957    protected LeafBase(LeafBase original, Cloner cloner) : base(original, cloner) { }
    40     public LeafBase() { }
     58    protected LeafBase() {
     59      Parameters.Add(new FixedValueParameter<BoolValue>(UseDampeningParameterName, "Whether logistic dampening should be used to prevent extreme extrapolation", new BoolValue(false)));
     60      Parameters.Add(new FixedValueParameter<DoubleValue>(DampeningParameterName, "Determines the strenght of the logistic dampening. Must be > 0.0. Larger numbers make more conservative predictions.", new DoubleValue(1.5)));
     61    }
    4162    #endregion
    4263
     
    7394      int numP;
    7495      var model = Build(pd, parameters.Random, cancellation, out numP);
     96      if (UseDampening && Dampening > 0.0) {
     97        model = DampenedModel.DampenModel(model, pd, Dampening);
     98      }
     99
    75100      numParams = numP;
    76101      cancellation.ThrowIfCancellationRequested();
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LinearLeaf.cs

    r15830 r15967  
    4444    #region IModelType
    4545    public override bool ProvidesConfidence {
    46       get { return true; }
     46      get { return false; }
    4747    }
    4848    public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
     
    5050      double rmse, cvRmse;
    5151      noParameters = pd.AllowedInputVariables.Count() + 1;
    52       var res = PreconstructedLinearModel.CreateConfidenceLinearModel(pd, out rmse, out cvRmse);
    53       return res;
     52      var res = LinearRegression.CreateLinearRegressionSolution(pd, out rmse, out cvRmse);
     53      return res.Model;
    5454    }
    5555
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/M5Leaf.cs

    r15830 r15967  
    112112      pd2.TrainingPartition.Start = pd.TrainingPartition.Start;
    113113
    114       return new PreconstructedLinearModel(means, variances, coeffs, intercept, pd.TargetVariable);
     114      return new PreconstructedLinearModel(coeffs, intercept, pd.TargetVariable);
    115115    }
    116116
     
    124124      //
    125125      //  double x1, x2;
    126       //  var lm = PreconstructedLinearModel.CreateConfidenceLinearModel(pd2, out x1, out x2);
     126      //  var lm = PreconstructedLinearModel.CreateLinearModel(pd2, out x1, out x2);
    127127      //  intercept = lm.Intercept;
    128128      //  return lm.Coefficients;
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Regression.cs

    r15833 r15967  
    220220      var pruningRows = (IntArray)stateScope.Variables[PruningSetVariableName].Value;
    221221      if (1 > trainingRows.Length)
    222         return new PreconstructedLinearModel(new Dictionary<string, double>(), new Dictionary<string, double>(), new Dictionary<string, double>(), 0, regressionTreeParams.TargetVariable);
     222        return new PreconstructedLinearModel(new Dictionary<string, double>(), 0, regressionTreeParams.TargetVariable);
    223223      if (regressionTreeParams.MinLeafSize > trainingRows.Length) {
    224224        var targets = regressionTreeParams.Data.GetDoubleValues(regressionTreeParams.TargetVariable).ToArray();
    225         return new PreconstructedLinearModel(new Dictionary<string, double>(), new Dictionary<string, double>(), new Dictionary<string, double>(), targets.Average(), regressionTreeParams.TargetVariable, targets.Variance(), targets.Length);
     225        return new PreconstructedLinearModel(new Dictionary<string, double>(), targets.Average(), regressionTreeParams.TargetVariable);
    226226      }
    227227      model.Build(trainingRows.ToArray(), pruningRows.ToArray(), stateScope, results, cancellationToken);
Note: See TracChangeset for help on using the changeset viewer.