- Timestamp:
- 03/08/18 10:57:03 (7 years ago)
- Location:
- branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/GBM/GradientBoostingRegressionAlgorithm.cs
r14523 r15833 45 45 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 350)] 46 46 public class GradientBoostingRegressionAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> { 47 48 47 #region ParameterNames 49 50 48 private const string IterationsParameterName = "Iterations"; 51 49 private const string NuParameterName = "Nu"; … … 58 56 private const string StoreRunsParameterName = "StoreRuns"; 59 57 private const string RegressionAlgorithmSolutionResultParameterName = "RegressionAlgorithmResult"; 60 61 58 #endregion 62 59 63 60 #region ParameterProperties 64 65 61 public IFixedValueParameter<IntValue> IterationsParameter { 66 62 get { return (IFixedValueParameter<IntValue>)Parameters[IterationsParameterName]; } … … 102 98 get { return (IFixedValueParameter<BoolValue>)Parameters[StoreRunsParameterName]; } 103 99 } 104 105 100 #endregion 106 101 107 102 #region Properties 108 109 103 public int Iterations { 110 104 get { return IterationsParameter.Value.Value; } … … 155 149 set { RegressionAlgorithmSolutionResultParameter.Value.Value = value; } 156 150 } 157 158 151 #endregion 159 152 160 153 [StorableConstructor] 161 154 protected GradientBoostingRegressionAlgorithm(bool deserializing) 162 : base(deserializing) { 163 } 155 : base(deserializing) { } 164 156 165 157 protected GradientBoostingRegressionAlgorithm(GradientBoostingRegressionAlgorithm original, Cloner cloner) 166 : base(original, cloner) { 167 } 158 : base(original, cloner) { } 168 159 169 160 public override IDeepCloneable Clone(Cloner cloner) { … … 232 223 var problemData = Problem.ProblemData; 233 224 var targetVarName = problemData.TargetVariable; 234 var activeVariables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable});225 var activeVariables = problemData.AllowedInputVariables.Concat(new string[] {problemData.TargetVariable}); 235 226 var modifiableDataset = new ModifiableDataset( 236 227 activeVariables, … … 252 243 List<IRegressionModel> models = new List<IRegressionModel>(); 253 244 try { 254 255 245 // Loop until iteration limit reached or canceled. 256 246 for (int i = 0; i < Iterations; i++) { … … 258 248 259 249 modifiableDataset.RemoveVariable(targetVarName); 260 modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest) );250 modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest).ToList()); 261 251 262 252 SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed … … 301 291 302 292 models.Add(model); 303 304 305 293 } 306 294 … … 363 351 alglib.lrunpack(lm, out coefficients, out features); 364 352 365 var ensembleModel = new RegressionEnsembleModel(models, coefficients.Take(models.Count)) { AverageModelEstimates = false};353 var ensembleModel = new RegressionEnsembleModel(models, coefficients.Take(models.Count)) {AverageModelEstimates = false}; 366 354 var ensembleSolution = (IRegressionEnsembleSolution)ensembleModel.CreateRegressionSolution(problemData); 367 355 return ensembleSolution; … … 442 430 prob.ProblemDataParameter.Value = problemData; 443 431 return true; 444 } else return false; 432 } 433 else return false; 445 434 } 446 435 … … 478 467 // NaN evaluations would not be critical but are problematic if we want to combine all symbolic models into a single symbolic model 479 468 if (symbRegSol == null || 480 (symbRegSol.TrainingLowerEstimationLimitHits == 0 && symbRegSol.TrainingUpperEstimationLimitHits == 0 &&481 symbRegSol.TestLowerEstimationLimitHits == 0 && symbRegSol.TestUpperEstimationLimitHits == 0) &&482 symbRegSol.TrainingNaNEvaluations == 0 && symbRegSol.TestNaNEvaluations == 0) {469 (symbRegSol.TrainingLowerEstimationLimitHits == 0 && symbRegSol.TrainingUpperEstimationLimitHits == 0 && 470 symbRegSol.TestLowerEstimationLimitHits == 0 && symbRegSol.TestUpperEstimationLimitHits == 0) && 471 symbRegSol.TrainingNaNEvaluations == 0 && symbRegSol.TestNaNEvaluations == 0) { 483 472 model = sol.Model; 484 473 } … … 499 488 ((BoolValue)paramItem.Parameters["SetSeedRandomly"].ActualValue).Value = false; 500 489 ((IntValue)paramItem.Parameters["Seed"].ActualValue).Value = seed; 501 } else { 490 } 491 else { 502 492 throw new ArgumentException("Base learner does not have a seed parameter (algorithm {0})", alg.Name); 503 493 } 504 505 494 } 506 495 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafModels/PreconstructedLinearModel.cs
r15830 r15833 59 59 SampleSize = original.SampleSize; 60 60 } 61 public PreconstructedLinearModel(Dictionary<string, double> means, Dictionary<string, double> variances, Dictionary<string, double> coefficients, double intercept, string targetvariable ) : base(targetvariable) {61 public PreconstructedLinearModel(Dictionary<string, double> means, Dictionary<string, double> variances, Dictionary<string, double> coefficients, double intercept, string targetvariable, double residualVariance = 0, double sampleSize = 0) : base(targetvariable) { 62 62 Coefficients = coefficients; 63 63 Intercept = intercept; … … 178 178 if (SampleSize == 0) return 0.0; 179 179 var sum = (from var in Variances let d = dataset.GetDoubleValue(var.Key, row) - Means[var.Key] select d * d / var.Value).Sum(); 180 var res = ResidualVariance * ( 1.0 / SampleSize + sum / (SampleSize - 1));180 var res = ResidualVariance * (SampleSize - 1) / (SampleSize - 2) * (1.0 / SampleSize + sum / (SampleSize - 1)); 181 181 if (double.IsInfinity(res) || double.IsNaN(res)) return 0.0; 182 return res;182 return Math.Sqrt(res); 183 183 } 184 184 #endregion -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Regression.cs
r15830 r15833 148 148 #region Static Interface 149 149 public static IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData, IRandom random, ILeafModel leafModel = null, ISplitter splitter = null, IPruning pruning = null, 150 bool useHoldout = false, double holdoutSize = 0.2, int minimumLeafSize = 4, bool generateRules = false, ResultCollection results = null, CancellationToken? cancellationToken = null) {150 bool useHoldout = false, double holdoutSize = 0.2, int minimumLeafSize = 1, bool generateRules = false, ResultCollection results = null, CancellationToken? cancellationToken = null) { 151 151 if (leafModel == null) leafModel = new LinearLeaf(); 152 152 if (splitter == null) splitter = new M5Splitter(); … … 170 170 171 171 #region Helpers 172 private static IScope InitializeScope(IRandom random, IRegressionProblemData problemData, IPruning pruning, int minLeafSize, ILeafModel leafModel, ISplitter splitter, bool GenerateRules, bool useHoldout, double holdoutSize) {172 private static IScope InitializeScope(IRandom random, IRegressionProblemData problemData, IPruning pruning, int minLeafSize, ILeafModel leafModel, ISplitter splitter, bool generateRules, bool useHoldout, double holdoutSize) { 173 173 var stateScope = new Scope("RegressionTreeStateScope"); 174 174 … … 196 196 //store unbuilt model 197 197 IItem model; 198 if ( GenerateRules) {198 if (generateRules) { 199 199 model = RegressionRuleSetModel.CreateRuleModel(problemData.TargetVariable, regressionTreeParams); 200 200 RegressionRuleSetModel.Initialize(stateScope); … … 215 215 216 216 private static IRegressionModel Build(IScope stateScope, ResultCollection results, CancellationToken cancellationToken) { 217 var regressionTreeParams = (RegressionTreeParameters)stateScope.Variables[RegressionTreeParameterVariableName].Value; 217 218 var model = (IM5Model)stateScope.Variables[ModelVariableName].Value; 218 219 var trainingRows = (IntArray)stateScope.Variables[TrainingSetVariableName].Value; 219 220 var pruningRows = (IntArray)stateScope.Variables[PruningSetVariableName].Value; 221 if (1 > trainingRows.Length) 222 return new PreconstructedLinearModel(new Dictionary<string, double>(), new Dictionary<string, double>(), new Dictionary<string, double>(), 0, regressionTreeParams.TargetVariable); 223 if (regressionTreeParams.MinLeafSize > trainingRows.Length) { 224 var targets = regressionTreeParams.Data.GetDoubleValues(regressionTreeParams.TargetVariable).ToArray(); 225 return new PreconstructedLinearModel(new Dictionary<string, double>(), new Dictionary<string, double>(), new Dictionary<string, double>(), targets.Average(), regressionTreeParams.TargetVariable, targets.Variance(), targets.Length); 226 } 220 227 model.Build(trainingRows.ToArray(), pruningRows.ToArray(), stateScope, results, cancellationToken); 221 228 return model;
Note: See TracChangeset
for help on using the changeset viewer.