Changeset 15967 for branches/M5Regression
- Timestamp:
- 06/20/18 09:53:28 (7 years ago)
- Location:
- branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 1 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r15830 r15967 372 372 <Compile Include="M5Regression\Interfaces\ISplitter.cs" /> 373 373 <Compile Include="M5Regression\LeafModels\ComponentReducedLinearModel.cs" /> 374 <Compile Include="M5Regression\LeafModels\Dampened LinearModel.cs" />374 <Compile Include="M5Regression\LeafModels\DampenedModel.cs" /> 375 375 <Compile Include="M5Regression\LeafModels\PreconstructedLinearModel.cs" /> 376 376 <Compile Include="M5Regression\LeafTypes\ComplexLeaf.cs" /> … … 380 380 <Compile Include="M5Regression\LeafTypes\GaussianProcessLeaf.cs" /> 381 381 <Compile Include="M5Regression\LeafTypes\LinearLeaf.cs" /> 382 <Compile Include="M5Regression\LeafTypes\LogisticLeaf.cs" />383 382 <Compile Include="M5Regression\LeafTypes\M5Leaf.cs" /> 384 383 <Compile Include="M5Regression\LeafTypes\M5regLeaf.cs" /> -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafModels/ComponentReducedLinearModel.cs
r15830 r15967 28 28 namespace HeuristicLab.Algorithms.DataAnalysis { 29 29 [StorableClass] 30 public class ComponentReducedLinearModel : RegressionModel , IConfidenceRegressionModel{30 public class ComponentReducedLinearModel : RegressionModel { 31 31 [Storable] 32 private I ConfidenceRegressionModel Model;32 private IRegressionModel Model; 33 33 [Storable] 34 34 private PrincipleComponentTransformation Pca; … … 40 40 Pca = cloner.Clone(original.Pca); 41 41 } 42 public ComponentReducedLinearModel(string targetVariable, I ConfidenceRegressionModel model, PrincipleComponentTransformation pca) : base(targetVariable) {42 public ComponentReducedLinearModel(string targetVariable, IRegressionModel model, PrincipleComponentTransformation pca) : base(targetVariable) { 43 43 Model = model; 44 44 Pca = pca; … … 56 56 } 57 57 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 58 return new ConfidenceRegressionSolution(this, problemData); 59 } 60 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) { 61 var data = ReduceDataset(dataset, rows.ToArray()); 62 return Model.GetEstimatedVariances(Pca.TransformDataset(data), Enumerable.Range(0, data.Rows)); 58 return new RegressionSolution(this, problemData); 63 59 } 64 60 -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafModels/PreconstructedLinearModel.cs
r15833 r15967 31 31 //mulitdimensional extension of http://www2.stat.duke.edu/~tjl13/s101/slides/unit6lec3H.pdf 32 32 [StorableClass] 33 internal sealed class PreconstructedLinearModel : RegressionModel , IConfidenceRegressionModel{33 internal sealed class PreconstructedLinearModel : RegressionModel { 34 34 [Storable] 35 35 public Dictionary<string, double> Coefficients { get; private set; } 36 36 [Storable] 37 37 public double Intercept { get; private set; } 38 [Storable]39 private Dictionary<string, double> Means { get; set; }40 [Storable]41 private Dictionary<string, double> Variances { get; set; }42 [Storable]43 private double ResidualVariance { get; set; }44 [Storable]45 private int SampleSize { get; set; }46 38 47 39 public override IEnumerable<string> VariablesUsedForPrediction { … … 54 46 if (original.Coefficients != null) Coefficients = original.Coefficients.ToDictionary(x => x.Key, x => x.Value); 55 47 Intercept = original.Intercept; 56 if (original.Means != null) Means = original.Means.ToDictionary(x => x.Key, x => x.Value);57 if (original.Variances != null) Variances = original.Variances.ToDictionary(x => x.Key, x => x.Value);58 ResidualVariance = original.ResidualVariance;59 SampleSize = original.SampleSize;60 48 } 61 public PreconstructedLinearModel(Dictionary<string, double> means, Dictionary<string, double> variances, Dictionary<string, double> coefficients, double intercept, string targetvariable, double residualVariance = 0, double sampleSize = 0) : base(targetvariable) {49 public PreconstructedLinearModel(Dictionary<string, double> coefficients, double intercept, string targetvariable) : base(targetvariable) { 62 50 Coefficients = coefficients; 63 51 Intercept = intercept; 64 Variances = variances;65 Means = means;66 ResidualVariance = 0;67 SampleSize = 0;68 52 } 69 53 public PreconstructedLinearModel(double intercept, string targetvariable) : base(targetvariable) { 70 54 Coefficients = new Dictionary<string, double>(); 71 55 Intercept = intercept; 72 Variances = new Dictionary<string, double>();73 ResidualVariance = 0;74 SampleSize = 0;75 56 } 76 57 public override IDeepCloneable Clone(Cloner cloner) { … … 79 60 #endregion 80 61 81 public static PreconstructedLinearModel CreateConfidenceLinearModel(IRegressionProblemData pd, out double rmse, out double cvRmse) { 82 rmse = double.NaN; 83 cvRmse = double.NaN; 84 return AlternativeCalculation(pd); 62 public static PreconstructedLinearModel CreateLinearModel(IRegressionProblemData pd, out double rmse) { 63 return AlternativeCalculation(pd, out rmse); 85 64 } 86 65 87 private static PreconstructedLinearModel ClassicCalculation(IRegressionProblemData pd , out double rmse, out double cvRmse) {66 private static PreconstructedLinearModel ClassicCalculation(IRegressionProblemData pd) { 88 67 var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] { 89 68 pd.TargetVariable … … 98 77 alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), nFeatures, out retVal, out lm, out ar); 99 78 if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution"); 100 rmse = ar.rmserror;101 cvRmse = ar.cvrmserror;102 79 103 80 alglib.lrunpack(lm, out coefficients, out nFeatures); 104 105 106 var means = pd.AllowedInputVariables.ToDictionary(n => n, n => pd.Dataset.GetDoubleValues(n).Average());107 var variances = pd.AllowedInputVariables.ToDictionary(n => n, n => pd.Dataset.GetDoubleValues(n).Variance());108 81 var coeffs = pd.AllowedInputVariables.Zip(coefficients, (s, d) => new {s, d}).ToDictionary(x => x.s, x => x.d); 109 var res = new PreconstructedLinearModel(means, variances, coeffs, coefficients[nFeatures], pd.TargetVariable); 110 111 res.ResidualVariance = pd.TargetVariableValues.Zip(res.GetEstimatedValues(pd.Dataset, pd.TrainingIndices), (x, y) => x - y).Variance(); 112 res.SampleSize = pd.TrainingIndices.Count(); 82 var res = new PreconstructedLinearModel(coeffs, coefficients[nFeatures], pd.TargetVariable); 113 83 return res; 114 84 } 115 85 116 private static PreconstructedLinearModel AlternativeCalculation(IRegressionProblemData pd) { 117 var means = pd.AllowedInputVariables.ToDictionary(n1 => n1, n1 => pd.Dataset.GetDoubleValues(n1).Average()); 118 var variances = pd.AllowedInputVariables.ToDictionary(n1 => n1, n1 => pd.Dataset.GetDoubleValues(n1).Variance()); 119 var cmean = pd.TargetVariableTrainingValues.Average(); 86 private static PreconstructedLinearModel AlternativeCalculation(IRegressionProblemData pd, out double rmse) { 120 87 var variables = pd.AllowedInputVariables.ToList(); 121 88 var n = variables.Count; 122 89 var m = pd.TrainingIndices.Count(); 123 90 124 //Set up X^T and y91 //Set up X^T 125 92 var inTr = new double[n + 1, m]; 126 93 for (var i = 0; i < n; i++) { 127 var v = variables[i]; 128 var vdata = pd.Dataset.GetDoubleValues(v, pd.TrainingIndices).ToArray(); 94 var vdata = pd.Dataset.GetDoubleValues(variables[i], pd.TrainingIndices).ToArray(); 129 95 for (var j = 0; j < m; j++) inTr[i, j] = vdata[j]; 130 96 } 131 132 97 for (var i = 0; i < m; i++) inTr[n, i] = 1; 133 98 99 //Set up y 134 100 var y = new double[m, 1]; 135 101 var ydata = pd.TargetVariableTrainingValues.ToArray(); … … 138 104 //Perform linear regression 139 105 var aTy = new double[n + 1, 1]; 140 alglib.rmatrixgemm(n + 1, 1, m, 1, inTr, 0, 0, 0, y, 0, 0, 0, 0, ref aTy, 0, 0); //aTy = inTr * y;141 106 var aTa = new double[n + 1, n + 1]; 142 alglib.rmatrixgemm(n + 1, n + 1, m, 1, inTr, 0, 0, 0, inTr, 0, 0, 1, 0, ref aTa, 0, 0); //aTa = inTr * t(inTr) +aTa // 143 alglib.spdmatrixcholesky(ref aTa, n + 1, true); 107 var aTyVector = new double[n + 1]; 144 108 int info; 145 109 alglib.densesolverreport report; 146 110 double[] coefficients; 147 var aTyVector = new double[n + 1]; 111 112 //Perform linear regression 113 alglib.rmatrixgemm(n + 1, 1, m, 1, inTr, 0, 0, 0, y, 0, 0, 0, 0, ref aTy, 0, 0); //aTy = inTr * y; 114 alglib.rmatrixgemm(n + 1, n + 1, m, 1, inTr, 0, 0, 0, inTr, 0, 0, 1, 0, ref aTa, 0, 0); //aTa = inTr * t(inTr) +aTa // 115 alglib.spdmatrixcholesky(ref aTa, n + 1, true); 148 116 for (var i = 0; i < n + 1; i++) aTyVector[i] = aTy[i, 0]; 149 117 alglib.spdmatrixcholeskysolve(aTa, n + 1, true, aTyVector, out info, out report, out coefficients); 150 double rmse, cvrmse;151 if (info != 1) return ClassicCalculation(pd, out rmse, out cvrmse);152 118 153 //extract coefficients 154 var intercept = coefficients[n]; 155 var coeffs = new Dictionary<string, double>(); 156 for (var i = 0; i < n; i++) coeffs.Add(variables[i], coefficients[i]); 119 //if cholesky calculation fails fall bakc to classic linear regresseion 120 if (info != 1) { 121 alglib.linearmodel lm; 122 alglib.lrreport ar; 123 int retVal; 124 var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] { 125 pd.TargetVariable 126 }), pd.AllIndices); 127 alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), n, out retVal, out lm, out ar); 128 if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution"); 129 alglib.lrunpack(lm, out coefficients, out n); 130 } 157 131 158 return new PreconstructedLinearModel(means, variances, coeffs, intercept, pd.TargetVariable); 132 var coeffs = Enumerable.Range(0, n).ToDictionary(i => variables[i], i => coefficients[i]); 133 var model = new PreconstructedLinearModel(coeffs, coefficients[n], pd.TargetVariable); 134 rmse = pd.TrainingIndices.Select(i => pd.Dataset.GetDoubleValue(pd.TargetVariable, i) - model.GetEstimatedValue(pd.Dataset, i)).Sum(r => r * r) / m; 135 rmse = Math.Sqrt(rmse); 136 return model; 159 137 } 160 138 … … 167 145 } 168 146 169 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {170 return rows.Select(i => GetEstimatedVariance(dataset, i));171 }172 173 147 #region helpers 174 148 private double GetEstimatedValue(IDataset dataset, int row) { 175 149 return Intercept + (Coefficients.Count == 0 ? 0 : Coefficients.Sum(s => s.Value * dataset.GetDoubleValue(s.Key, row))); 176 150 } 177 private double GetEstimatedVariance(IDataset dataset, int row) {178 if (SampleSize == 0) return 0.0;179 var sum = (from var in Variances let d = dataset.GetDoubleValue(var.Key, row) - Means[var.Key] select d * d / var.Value).Sum();180 var res = ResidualVariance * (SampleSize - 1) / (SampleSize - 2) * (1.0 / SampleSize + sum / (SampleSize - 1));181 if (double.IsInfinity(res) || double.IsNaN(res)) return 0.0;182 return Math.Sqrt(res);183 }184 151 #endregion 185 152 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComponentReductionLinearLeaf.cs
r15830 r15967 57 57 #region IModelType 58 58 public override bool ProvidesConfidence { 59 get { return true; }59 get { return false; } 60 60 } 61 61 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, … … 71 71 foreach (var v in pd2.InputVariables.CheckedItems.ToArray()) 72 72 pd2.InputVariables.SetItemCheckedState(v.Value, inputs.Contains(v.Value.Value)); 73 double cvRmse;74 73 double rmse; 75 var model = PreconstructedLinearModel.Create ConfidenceLinearModel(pd2, out rmse, out cvRmse);76 if ( cvRmse > bestCvrmse) continue;74 var model = PreconstructedLinearModel.CreateLinearModel(pd2, out rmse); 75 if (rmse > bestCvrmse) continue; 77 76 bestModel = new ComponentReducedLinearModel(pd2.TargetVariable, model, pca); 78 77 noParameters = i + 1; 79 bestCvrmse = cvRmse;78 bestCvrmse = rmse; 80 79 } 81 80 return bestModel; -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LeafBase.cs
r15830 r15967 26 26 using HeuristicLab.Common; 27 27 using HeuristicLab.Core; 28 using HeuristicLab.Data; 29 using HeuristicLab.Parameters; 28 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 29 31 using HeuristicLab.Problems.DataAnalysis; … … 33 35 public abstract class LeafBase : ParameterizedNamedItem, ILeafModel { 34 36 public const string LeafBuildingStateVariableName = "LeafBuildingState"; 37 public const string UseDampeningParameterName = "UseDampening"; 38 private const string DampeningParameterName = "DampeningStrenght"; 39 40 public IFixedValueParameter<DoubleValue> DampeningParameter { 41 get { return Parameters[DampeningParameterName] as IFixedValueParameter<DoubleValue>; } 42 } 43 public IFixedValueParameter<BoolValue> UseDampeningParameter { 44 get { return (IFixedValueParameter<BoolValue>)Parameters[UseDampeningParameterName]; } 45 } 46 47 public bool UseDampening { 48 get { return UseDampeningParameter.Value.Value; } 49 } 50 public double Dampening { 51 get { return DampeningParameter.Value.Value; } 52 } 35 53 36 54 #region Constructors & Cloning … … 38 56 protected LeafBase(bool deserializing) : base(deserializing) { } 39 57 protected LeafBase(LeafBase original, Cloner cloner) : base(original, cloner) { } 40 public LeafBase() { } 58 protected LeafBase() { 59 Parameters.Add(new FixedValueParameter<BoolValue>(UseDampeningParameterName, "Whether logistic dampening should be used to prevent extreme extrapolation", new BoolValue(false))); 60 Parameters.Add(new FixedValueParameter<DoubleValue>(DampeningParameterName, "Determines the strenght of the logistic dampening. Must be > 0.0. Larger numbers make more conservative predictions.", new DoubleValue(1.5))); 61 } 41 62 #endregion 42 63 … … 73 94 int numP; 74 95 var model = Build(pd, parameters.Random, cancellation, out numP); 96 if (UseDampening && Dampening > 0.0) { 97 model = DampenedModel.DampenModel(model, pd, Dampening); 98 } 99 75 100 numParams = numP; 76 101 cancellation.ThrowIfCancellationRequested(); -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LinearLeaf.cs
r15830 r15967 44 44 #region IModelType 45 45 public override bool ProvidesConfidence { 46 get { return true; }46 get { return false; } 47 47 } 48 48 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) { … … 50 50 double rmse, cvRmse; 51 51 noParameters = pd.AllowedInputVariables.Count() + 1; 52 var res = PreconstructedLinearModel.CreateConfidenceLinearModel(pd, out rmse, out cvRmse);53 return res ;52 var res = LinearRegression.CreateLinearRegressionSolution(pd, out rmse, out cvRmse); 53 return res.Model; 54 54 } 55 55 -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/M5Leaf.cs
r15830 r15967 112 112 pd2.TrainingPartition.Start = pd.TrainingPartition.Start; 113 113 114 return new PreconstructedLinearModel( means, variances,coeffs, intercept, pd.TargetVariable);114 return new PreconstructedLinearModel(coeffs, intercept, pd.TargetVariable); 115 115 } 116 116 … … 124 124 // 125 125 // double x1, x2; 126 // var lm = PreconstructedLinearModel.Create ConfidenceLinearModel(pd2, out x1, out x2);126 // var lm = PreconstructedLinearModel.CreateLinearModel(pd2, out x1, out x2); 127 127 // intercept = lm.Intercept; 128 128 // return lm.Coefficients; -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Regression.cs
r15833 r15967 220 220 var pruningRows = (IntArray)stateScope.Variables[PruningSetVariableName].Value; 221 221 if (1 > trainingRows.Length) 222 return new PreconstructedLinearModel(new Dictionary<string, double>(), new Dictionary<string, double>(), new Dictionary<string, double>(),0, regressionTreeParams.TargetVariable);222 return new PreconstructedLinearModel(new Dictionary<string, double>(), 0, regressionTreeParams.TargetVariable); 223 223 if (regressionTreeParams.MinLeafSize > trainingRows.Length) { 224 224 var targets = regressionTreeParams.Data.GetDoubleValues(regressionTreeParams.TargetVariable).ToArray(); 225 return new PreconstructedLinearModel(new Dictionary<string, double>(), new Dictionary<string, double>(), new Dictionary<string, double>(), targets.Average(), regressionTreeParams.TargetVariable, targets.Variance(), targets.Length);225 return new PreconstructedLinearModel(new Dictionary<string, double>(), targets.Average(), regressionTreeParams.TargetVariable); 226 226 } 227 227 model.Build(trainingRows.ToArray(), pruningRows.ToArray(), stateScope, results, cancellationToken);
Note: See TracChangeset
for help on using the changeset viewer.