Changeset 16847 for branches/2847_M5Regression
- Timestamp:
- 04/19/19 13:06:11 (6 years ago)
- Location:
- branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression
- Files:
-
- 31 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Interfaces/ILeafModel.cs
r15830 r16847 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 7Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 24 24 using HeuristicLab.Core; 25 25 using HeuristicLab.Problems.DataAnalysis; 26 using HEAL.Attic; 26 27 27 28 namespace HeuristicLab.Algorithms.DataAnalysis { 29 [StorableType("2A4CB43C-51EB-47AF-AEDD-9B84B27D318B")] 28 30 public interface ILeafModel : IParameterizedNamedItem { 29 31 bool ProvidesConfidence { get; } 30 // IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters);31 32 int MinLeafSize(IRegressionProblemData pd); 32 33 -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Interfaces/IM5Model.cs
r15830 r16847 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 7Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 25 25 using HeuristicLab.Optimization; 26 26 using HeuristicLab.Problems.DataAnalysis; 27 using HEAL.Attic; 27 28 28 29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 [StorableType("A5399E6A-6A4D-4616-A1CD-CE12FE670F12")] 29 31 public interface IM5Model : IRegressionModel { 30 32 void Build(IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, IScope stateScope, ResultCollection results, CancellationToken cancellationToken); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Interfaces/IPruning.cs
r15830 r16847 23 23 using HeuristicLab.Core; 24 24 using HeuristicLab.Problems.DataAnalysis; 25 using HEAL.Attic; 25 26 26 27 namespace HeuristicLab.Algorithms.DataAnalysis { 28 [StorableType("5CAADC16-DCF2-4562-A4DF-C3D5BA1E02A5")] 27 29 public interface IPruning : IParameterizedNamedItem { 28 30 int MinLeafSize(IRegressionProblemData pd, ILeafModel leafModel); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Interfaces/ISplitter.cs
r15830 r16847 22 22 using System.Threading; 23 23 using HeuristicLab.Core; 24 using H euristicLab.Problems.DataAnalysis;24 using HEAL.Attic; 25 25 26 26 namespace HeuristicLab.Algorithms.DataAnalysis { 27 [StorableType("A6FB3D68-B298-4C89-9FD9-2D1415D131F5")] 27 28 public interface ISplitter : IParameterizedNamedItem { 28 29 void Initialize(IScope states); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafModels/ComponentReducedLinearModel.cs
r15967 r16847 23 23 using System.Linq; 24 24 using HeuristicLab.Common; 25 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;26 25 using HeuristicLab.Problems.DataAnalysis; 26 using HEAL.Attic; 27 27 28 28 namespace HeuristicLab.Algorithms.DataAnalysis { 29 [Storable Class]29 [StorableType("4E5B8317-648D-4A5A-A3F7-A1A5BEB9AA69")] 30 30 public class ComponentReducedLinearModel : RegressionModel { 31 31 [Storable] … … 35 35 36 36 [StorableConstructor] 37 private ComponentReducedLinearModel( bool deserializing) : base(deserializing) { }37 private ComponentReducedLinearModel(StorableConstructorFlag _) : base(_) { } 38 38 private ComponentReducedLinearModel(ComponentReducedLinearModel original, Cloner cloner) : base(original, cloner) { 39 39 Model = cloner.Clone(original.Model); 40 40 Pca = cloner.Clone(original.Pca); 41 41 } 42 42 43 public ComponentReducedLinearModel(string targetVariable, IRegressionModel model, PrincipleComponentTransformation pca) : base(targetVariable) { 43 44 Model = model; 44 45 Pca = pca; 45 46 } 47 46 48 public override IDeepCloneable Clone(Cloner cloner) { 47 49 return new ComponentReducedLinearModel(this, cloner); … … 51 53 get { return Model.VariablesUsedForPrediction; } 52 54 } 55 53 56 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 54 57 var data = ReduceDataset(dataset, rows.ToArray()); 55 58 return Model.GetEstimatedValues(Pca.TransformDataset(data), Enumerable.Range(0, data.Rows)); 56 59 } 60 57 61 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 58 62 return new RegressionSolution(this, problemData); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafModels/DampenedModel.cs
r15967 r16847 24 24 using System.Linq; 25 25 using HeuristicLab.Common; 26 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;27 26 using HeuristicLab.Problems.DataAnalysis; 27 using HEAL.Attic; 28 28 29 29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 30 //mulitdimensional extension of http://www2.stat.duke.edu/~tjl13/s101/slides/unit6lec3H.pdf 31 [Storable Class]31 [StorableType("42E9766F-207F-47B1-890C-D5DFCF469838")] 32 32 public class DampenedModel : RegressionModel { 33 33 [Storable] … … 41 41 42 42 [StorableConstructor] 43 protected DampenedModel( bool deserializing) : base(deserializing) { }43 protected DampenedModel(StorableConstructorFlag _) : base(_) { } 44 44 protected DampenedModel(DampenedModel original, Cloner cloner) : base(original, cloner) { 45 45 Model = cloner.Clone(original.Model); … … 69 69 get { return Model.VariablesUsedForPrediction; } 70 70 } 71 71 72 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 72 73 var slow = Sigmoid(-Dampening); … … 79 80 } 80 81 } 82 81 83 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 82 84 return new RegressionSolution(this, problemData); … … 93 95 return ((x - oMin) / d) * nd + nMin; 94 96 } 97 95 98 private static double Sigmoid(double x) { 96 99 return 1 / (1 + Math.Exp(-x)); … … 98 101 99 102 100 [Storable Class]103 [StorableType("CCC93BEC-8796-4D8E-AC58-DD175073A79B")] 101 104 private sealed class ConfidenceDampenedModel : DampenedModel, IConfidenceRegressionModel { 102 105 #region HLConstructors 103 106 [StorableConstructor] 104 private ConfidenceDampenedModel( bool deserializing) : base(deserializing) { }107 private ConfidenceDampenedModel(StorableConstructorFlag _) : base(_) { } 105 108 private ConfidenceDampenedModel(ConfidenceDampenedModel original, Cloner cloner) : base(original, cloner) { } 106 109 public ConfidenceDampenedModel(IConfidenceRegressionModel model, IRegressionProblemData pd, double dampening) : base(model, pd, dampening) { } -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafModels/PreconstructedLinearModel.cs
r15967 r16847 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Diagnostics;25 24 using System.Linq; 26 25 using HeuristicLab.Common; 27 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;28 26 using HeuristicLab.Problems.DataAnalysis; 27 using HEAL.Attic; 29 28 30 29 namespace HeuristicLab.Algorithms.DataAnalysis { 31 30 //mulitdimensional extension of http://www2.stat.duke.edu/~tjl13/s101/slides/unit6lec3H.pdf 32 [Storable Class]31 [StorableType("15F2295C-28C1-48C3-8DCB-9470823C6734")] 33 32 internal sealed class PreconstructedLinearModel : RegressionModel { 34 33 [Storable] … … 40 39 get { return Coefficients.Keys; } 41 40 } 41 42 42 #region HLConstructors 43 43 [StorableConstructor] 44 private PreconstructedLinearModel( bool deserializing) : base(deserializing) { }44 private PreconstructedLinearModel(StorableConstructorFlag _) : base(_) { } 45 45 private PreconstructedLinearModel(PreconstructedLinearModel original, Cloner cloner) : base(original, cloner) { 46 46 if (original.Coefficients != null) Coefficients = original.Coefficients.ToDictionary(x => x.Key, x => x.Value); … … 48 48 } 49 49 public PreconstructedLinearModel(Dictionary<string, double> coefficients, double intercept, string targetvariable) : base(targetvariable) { 50 Coefficients = coefficients;50 Coefficients = new Dictionary<string, double>(coefficients); 51 51 Intercept = intercept; 52 52 } … … 117 117 alglib.spdmatrixcholeskysolve(aTa, n + 1, true, aTyVector, out info, out report, out coefficients); 118 118 119 //if cholesky calculation fails fall ba kcto classic linear regresseion119 //if cholesky calculation fails fall back to classic linear regresseion 120 120 if (info != 1) { 121 121 alglib.linearmodel lm; -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComplexLeaf.cs
r15830 r16847 26 26 using HeuristicLab.Core; 27 27 using HeuristicLab.Parameters; 28 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;29 28 using HeuristicLab.Problems.DataAnalysis; 29 using HEAL.Attic; 30 30 31 31 namespace HeuristicLab.Algorithms.DataAnalysis { 32 [Storable Class]33 [Item("ComplexLeaf", "A leaf type that uses an arbitr iary RegressionAlgorithm to create leaf models")]32 [StorableType("F34A0ED9-2CF6-4DEE-850D-08790663B66D")] 33 [Item("ComplexLeaf", "A leaf type that uses an arbitrary RegressionAlgorithm to create leaf models")] 34 34 public class ComplexLeaf : LeafBase { 35 35 public const string RegressionParameterName = "Regression"; 36 36 public IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionParameter { 37 get { return Parameters[RegressionParameterName] as IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>>; }37 get { return (IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>>)Parameters[RegressionParameterName]; } 38 38 } 39 39 public IDataAnalysisAlgorithm<IRegressionProblem> Regression { 40 40 get { return RegressionParameter.Value; } 41 set { RegressionParameter.Value = value; } 41 42 } 42 43 43 44 #region Constructors & Cloning 44 45 [StorableConstructor] 45 private ComplexLeaf( bool deserializing) : base(deserializing) { }46 private ComplexLeaf(StorableConstructorFlag _) : base(_) { } 46 47 private ComplexLeaf(ComplexLeaf original, Cloner cloner) : base(original, cloner) { } 47 48 public ComplexLeaf() { … … 61 62 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model"); 62 63 noParameters = pd.Dataset.Rows + 1; 63 Regression.Problem = new RegressionProblem { ProblemData = pd};64 Regression.Problem = new RegressionProblem { ProblemData = pd }; 64 65 var res = RegressionTreeUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken); 65 66 var t = res.Select(x => x.Value).OfType<IRegressionSolution>().FirstOrDefault(); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComponentReductionLinearLeaf.cs
r15967 r16847 28 28 using HeuristicLab.Data; 29 29 using HeuristicLab.Parameters; 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;31 30 using HeuristicLab.Problems.DataAnalysis; 31 using HEAL.Attic; 32 32 33 33 namespace HeuristicLab.Algorithms.DataAnalysis { 34 [Storable Class]34 [StorableType("5730B54C-7A8B-4CA7-8F37-7FF3F9848CD2")] 35 35 [Item("ComponentReductionLinearLeaf", "A leaf type that uses principle component analysis to create smaller linear models as leaf models")] 36 36 public class ComponentReductionLinearLeaf : LeafBase { 37 public const string N oComponentsParameterName = "NoComponents";38 public IFixedValueParameter<IntValue> N oComponentsParameter {39 get { return Parameters[NoComponentsParameterName] as IFixedValueParameter<IntValue>; }37 public const string NumberOfComponentsParameterName = "NoComponents"; 38 public IFixedValueParameter<IntValue> NumberOfCompontentsParameter { 39 get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfComponentsParameterName]; } 40 40 } 41 public int NoComponents { 42 get { return NoComponentsParameter.Value.Value; } 41 public int NumberOfComponents { 42 get { return NumberOfCompontentsParameter.Value.Value; } 43 set { NumberOfCompontentsParameter.Value.Value = value; } 43 44 } 44 45 45 46 #region Constructors & Cloning 46 47 [StorableConstructor] 47 protected ComponentReductionLinearLeaf( bool deserializing) : base(deserializing) { }48 protected ComponentReductionLinearLeaf(StorableConstructorFlag _) : base(_) { } 48 49 protected ComponentReductionLinearLeaf(ComponentReductionLinearLeaf original, Cloner cloner) : base(original, cloner) { } 49 50 public ComponentReductionLinearLeaf() { 50 Parameters.Add(new FixedValueParameter<IntValue>(N oComponentsParameterName, "The maximum number of principle components used", new IntValue(10)));51 Parameters.Add(new FixedValueParameter<IntValue>(NumberOfComponentsParameterName, "The maximum number of principle components used (default=10)", new IntValue(10))); 51 52 } 52 53 public override IDeepCloneable Clone(Cloner cloner) { … … 59 60 get { return false; } 60 61 } 62 61 63 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, 62 CancellationToken cancellationToken, out int n oParameters) {63 var pca = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, true);64 CancellationToken cancellationToken, out int numberOfParameters) { 65 var pca = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, normalize: true); 64 66 var pcdata = pca.TransformProblemData(pd); 65 67 ComponentReducedLinearModel bestModel = null; 66 68 var bestCvrmse = double.MaxValue; 67 n oParameters = 1;68 for (var i = 1; i <= Math.Min(N oComponents, pd.AllowedInputVariables.Count()); i++) {69 numberOfParameters = 1; 70 for (var i = 1; i <= Math.Min(NumberOfComponents, pd.AllowedInputVariables.Count()); i++) { 69 71 var pd2 = (IRegressionProblemData)pcdata.Clone(); 70 72 var inputs = new HashSet<string>(pca.ComponentNames.Take(i)); … … 75 77 if (rmse > bestCvrmse) continue; 76 78 bestModel = new ComponentReducedLinearModel(pd2.TargetVariable, model, pca); 77 n oParameters = i + 1;79 numberOfParameters = i + 1; 78 80 bestCvrmse = rmse; 79 81 } … … 82 84 83 85 public override int MinLeafSize(IRegressionProblemData pd) { 84 return N oComponents + 2;86 return NumberOfComponents + 2; 85 87 } 86 88 #endregion -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ConstantLeaf.cs
r15830 r16847 25 25 using HeuristicLab.Common; 26 26 using HeuristicLab.Core; 27 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;28 27 using HeuristicLab.Problems.DataAnalysis; 28 using HEAL.Attic; 29 29 30 30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 [Storable Class]31 [StorableType("F3E94907-C5FF-4658-A870-8013C61DD2E1")] 32 32 [Item("ConstantLeaf", "A leaf type that uses constant models as leaf models")] 33 33 public class ConstantLeaf : LeafBase { 34 34 #region Constructors & Cloning 35 35 [StorableConstructor] 36 protected ConstantLeaf( bool deserializing) : base(deserializing) { }36 protected ConstantLeaf(StorableConstructorFlag _) : base(_) { } 37 37 protected ConstantLeaf(ConstantLeaf original, Cloner cloner) : base(original, cloner) { } 38 38 public ConstantLeaf() { } … … 46 46 get { return false; } 47 47 } 48 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int n oParameters) {48 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int numberOfParameters) { 49 49 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model"); 50 n oParameters = 1;50 numberOfParameters = 1; 51 51 return new PreconstructedLinearModel(pd.Dataset.GetDoubleValues(pd.TargetVariable).Average(), pd.TargetVariable); 52 52 } -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/GaussianProcessLeaf.cs
r15830 r16847 27 27 using HeuristicLab.Data; 28 28 using HeuristicLab.Parameters; 29 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;30 29 using HeuristicLab.Problems.DataAnalysis; 30 using HEAL.Attic; 31 31 32 32 namespace HeuristicLab.Algorithms.DataAnalysis { 33 [Storable Class]34 [Item("GaussianProcessLeaf", "A leaf type that uses gaussian process models as leaf models.")]33 [StorableType("852B9F7D-9C2B-4574-BB71-EE70106EA809")] 34 [Item("GaussianProcessLeaf", "A leaf type that uses Gaussian process models as leaf models.")] 35 35 public class GaussianProcessLeaf : LeafBase { 36 36 #region ParameterNames … … 41 41 #region ParameterProperties 42 42 public IFixedValueParameter<IntValue> TriesParameter { 43 get { return Parameters[TriesParameterName] as IFixedValueParameter<IntValue>; }43 get { return (IFixedValueParameter<IntValue>)Parameters[TriesParameterName]; } 44 44 } 45 public I FixedValueParameter<GaussianProcessRegression> RegressionParameter {46 get { return Parameters[RegressionParameterName] as IFixedValueParameter<GaussianProcessRegression>; }45 public IValueParameter<GaussianProcessRegression> RegressionParameter { 46 get { return (IValueParameter<GaussianProcessRegression>)Parameters[RegressionParameterName]; } 47 47 } 48 48 #endregion … … 51 51 public int Tries { 52 52 get { return TriesParameter.Value.Value; } 53 set { TriesParameter.Value.Value = value; } 53 54 } 54 55 public GaussianProcessRegression Regression { 55 56 get { return RegressionParameter.Value; } 57 set { RegressionParameter.Value = value; } 56 58 } 57 59 #endregion … … 59 61 #region Constructors & Cloning 60 62 [StorableConstructor] 61 protected GaussianProcessLeaf( bool deserializing) : base(deserializing) { }63 protected GaussianProcessLeaf(StorableConstructorFlag _) : base(_) { } 62 64 protected GaussianProcessLeaf(GaussianProcessLeaf original, Cloner cloner) : base(original, cloner) { } 63 65 public GaussianProcessLeaf() { … … 66 68 gp.MeanFunctionParameter.Value = new MeanLinear(); 67 69 68 Parameters.Add(new FixedValueParameter<IntValue>(TriesParameterName, "Number of re petitions", new IntValue(10)));69 Parameters.Add(new FixedValueParameter<GaussianProcessRegression>(RegressionParameterName, "The algorithm creating GPmodels", gp));70 Parameters.Add(new FixedValueParameter<IntValue>(TriesParameterName, "Number of restarts (default = 10)", new IntValue(10))); 71 Parameters.Add(new ValueParameter<GaussianProcessRegression>(RegressionParameterName, "The algorithm creating Gaussian process models", gp)); 70 72 } 71 73 public override IDeepCloneable Clone(Cloner cloner) { … … 78 80 get { return true; } 79 81 } 80 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) { 81 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a gaussian process model"); 82 Regression.Problem = new RegressionProblem {ProblemData = pd}; 82 83 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int numberOfParameters) { 84 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a Gaussian process model"); 85 Regression.Problem = new RegressionProblem { ProblemData = pd }; 83 86 var cvscore = double.MaxValue; 84 87 GaussianProcessRegressionSolution sol = null; … … 93 96 } 94 97 Regression.Runs.Clear(); 95 if (sol == null) throw new ArgumentException("Could not create Gaussian Process model");98 if (sol == null) throw new ArgumentException("Could not create Gaussian process model"); 96 99 97 n oParameters = pd.Dataset.Rows + 198 + Regression.CovarianceFunction.GetNumberOfParameters(pd.AllowedInputVariables.Count())99 + Regression.MeanFunction.GetNumberOfParameters(pd.AllowedInputVariables.Count());100 numberOfParameters = pd.Dataset.Rows + 1 101 + Regression.CovarianceFunction.GetNumberOfParameters(pd.AllowedInputVariables.Count()) 102 + Regression.MeanFunction.GetNumberOfParameters(pd.AllowedInputVariables.Count()); 100 103 return sol.Model; 101 104 } -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LeafBase.cs
r15967 r16847 20 20 #endregion 21 21 22 using System;23 22 using System.Collections.Generic; 24 23 using System.Linq; … … 28 27 using HeuristicLab.Data; 29 28 using HeuristicLab.Parameters; 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;31 29 using HeuristicLab.Problems.DataAnalysis; 30 using HEAL.Attic; 32 31 33 32 namespace HeuristicLab.Algorithms.DataAnalysis { 34 [Storable Class]33 [StorableType("F3A9CCD4-975F-4F55-BE24-3A3E932591F6")] 35 34 public abstract class LeafBase : ParameterizedNamedItem, ILeafModel { 36 35 public const string LeafBuildingStateVariableName = "LeafBuildingState"; 37 36 public const string UseDampeningParameterName = "UseDampening"; 38 p rivate const string DampeningParameterName = "DampeningStrenght";37 public const string DampeningParameterName = "DampeningStrength"; 39 38 40 39 public IFixedValueParameter<DoubleValue> DampeningParameter { 41 get { return Parameters[DampeningParameterName] as IFixedValueParameter<DoubleValue>; }40 get { return (IFixedValueParameter<DoubleValue>)Parameters[DampeningParameterName]; } 42 41 } 43 42 public IFixedValueParameter<BoolValue> UseDampeningParameter { … … 47 46 public bool UseDampening { 48 47 get { return UseDampeningParameter.Value.Value; } 48 set { UseDampeningParameter.Value.Value = value; } 49 49 } 50 50 public double Dampening { 51 51 get { return DampeningParameter.Value.Value; } 52 set { DampeningParameter.Value.Value = value; } 52 53 } 53 54 54 55 #region Constructors & Cloning 55 56 [StorableConstructor] 56 protected LeafBase( bool deserializing) : base(deserializing) { }57 protected LeafBase(StorableConstructorFlag _) : base(_) { } 57 58 protected LeafBase(LeafBase original, Cloner cloner) : base(original, cloner) { } 58 59 protected LeafBase() { 59 Parameters.Add(new FixedValueParameter<BoolValue>(UseDampeningParameterName, "Whether logistic dampening should be used to prevent extreme extrapolation ", new BoolValue(false)));60 Parameters.Add(new FixedValueParameter<DoubleValue>(DampeningParameterName, "Determines the streng ht of the logistic dampening. Must be > 0.0. Larger numbers make more conservative predictions.", new DoubleValue(1.5)));60 Parameters.Add(new FixedValueParameter<BoolValue>(UseDampeningParameterName, "Whether logistic dampening should be used to prevent extreme extrapolation (default=false)", new BoolValue(false))); 61 Parameters.Add(new FixedValueParameter<DoubleValue>(DampeningParameterName, "Determines the strength of logistic dampening. Must be > 0.0. Larger numbers lead to more conservative predictions. (default=1.5)", new DoubleValue(1.5))); 61 62 } 62 63 #endregion … … 65 66 public abstract bool ProvidesConfidence { get; } 66 67 public abstract int MinLeafSize(IRegressionProblemData pd); 68 67 69 public void Initialize(IScope states) { 68 70 states.Variables.Add(new Variable(LeafBuildingStateVariableName, new LeafBuildingState())); 69 71 } 72 70 73 public void Build(RegressionNodeTreeModel tree, IReadOnlyList<int> trainingRows, IScope stateScope, CancellationToken cancellationToken) { 71 74 var parameters = (RegressionTreeParameters)stateScope.Variables[M5Regression.RegressionTreeParameterVariableName].Value; … … 86 89 } 87 90 88 public IRegressionModel BuildModel(IReadOnlyList<int> rows, RegressionTreeParameters parameters, CancellationToken cancellation, out int num Params) {91 public IRegressionModel BuildModel(IReadOnlyList<int> rows, RegressionTreeParameters parameters, CancellationToken cancellation, out int numberOfParameters) { 89 92 var reducedData = RegressionTreeUtilities.ReduceDataset(parameters.Data, rows, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable); 90 93 var pd = new RegressionProblemData(reducedData, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable); … … 98 101 } 99 102 100 num Params = numP;103 numberOfParameters = numP; 101 104 cancellation.ThrowIfCancellationRequested(); 102 105 return model; 103 106 } 104 107 105 public abstract IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int n oParameters);108 public abstract IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int numberOfParameters); 106 109 #endregion 107 110 108 [Storable Class]111 [StorableType("495243C0-6C15-4328-B30D-FFBFA0F54DCB")] 109 112 public class LeafBuildingState : Item { 110 113 [Storable] … … 115 118 //State.Code values denote the current action (for pausing) 116 119 //0...nothing has been done; 117 //1...building Models;120 //1...building models; 118 121 [Storable] 119 122 public int Code = 0; … … 121 124 #region HLConstructors & Cloning 122 125 [StorableConstructor] 123 protected LeafBuildingState( bool deserializing) : base(deserializing) { }126 protected LeafBuildingState(StorableConstructorFlag _) : base(_) { } 124 127 protected LeafBuildingState(LeafBuildingState original, Cloner cloner) : base(original, cloner) { 125 128 nodeQueue = new Queue<RegressionNodeModel>(original.nodeQueue.Select(cloner.Clone)); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LinearLeaf.cs
r15967 r16847 25 25 using HeuristicLab.Common; 26 26 using HeuristicLab.Core; 27 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;28 27 using HeuristicLab.Problems.DataAnalysis; 28 using HEAL.Attic; 29 29 30 30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 [Storable Class]31 [StorableType("347CA25D-FB37-4C4F-9B61-9D79288B2B28")] 32 32 [Item("LinearLeaf", "A leaf type that uses linear models as leaf models. This is the standard for M5' regression")] 33 33 public class LinearLeaf : LeafBase { 34 34 #region Constructors & Cloning 35 35 [StorableConstructor] 36 protected LinearLeaf( bool deserializing) : base(deserializing) { }36 protected LinearLeaf(StorableConstructorFlag _) : base(_) { } 37 37 protected LinearLeaf(LinearLeaf original, Cloner cloner) : base(original, cloner) { } 38 38 public LinearLeaf() { } … … 46 46 get { return false; } 47 47 } 48 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int n oParameters) {48 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int numberOfParameters) { 49 49 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model"); 50 50 double rmse, cvRmse; 51 n oParameters = pd.AllowedInputVariables.Count() + 1;52 var res = LinearRegression.Create LinearRegressionSolution(pd, out rmse, out cvRmse);51 numberOfParameters = pd.AllowedInputVariables.Count() + 1; 52 var res = LinearRegression.CreateSolution(pd, out rmse, out cvRmse); 53 53 return res.Model; 54 54 } -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/M5Leaf.cs
r15967 r16847 26 26 using HeuristicLab.Common; 27 27 using HeuristicLab.Core; 28 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;29 28 using HeuristicLab.Problems.DataAnalysis; 29 using HEAL.Attic; 30 30 31 31 namespace HeuristicLab.Algorithms.DataAnalysis { 32 [Storable Class]33 [Item("M5Leaf", "A leaf type that uses linear models as leaf models. This is the standard for M5' regression")]32 [StorableType("58517042-5318-4087-B098-AC75F0208BA0")] 33 [Item("M5Leaf", "A leaf type that uses regularized linear models with feature selection as leaf models.")] 34 34 public class M5Leaf : LeafBase { 35 35 #region Constructors & Cloning 36 36 [StorableConstructor] 37 private M5Leaf( bool deserializing) : base(deserializing) { }37 private M5Leaf(StorableConstructorFlag _) : base(_) { } 38 38 private M5Leaf(M5Leaf original, Cloner cloner) : base(original, cloner) { } 39 39 public M5Leaf() { } … … 47 47 get { return false; } 48 48 } 49 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int n oParameters) {49 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int numberOfParameters) { 50 50 if (pd.Dataset.Rows == 0) throw new ArgumentException("The number of training instances is too small to create an M5 leaf model"); 51 51 52 52 if (pd.Dataset.Rows == 1) 53 return new ConstantLeaf().Build(pd, random, cancellationToken, out n oParameters);53 return new ConstantLeaf().Build(pd, random, cancellationToken, out numberOfParameters); 54 54 55 55 var means = pd.AllowedInputVariables.ToDictionary(n => n, n => pd.Dataset.GetDoubleValues(n, pd.TrainingIndices).Average()); … … 57 57 var used = pd.AllowedInputVariables.Where(v => !variances[v].IsAlmost(0.0)).ToList(); 58 58 59 var classMean = pd.TargetVariableTrainingValues.Average();60 var classVar= pd.TargetVariableTrainingValues.Variance();59 var targetMean = pd.TargetVariableTrainingValues.Average(); 60 var targetVariance = pd.TargetVariableTrainingValues.Variance(); 61 61 62 var model = FindBestModel(variances, means, classMean, classVar, pd, used);63 n oParameters = 1 + model.Coefficients.Count;62 var model = FindBestModel(variances, means, targetMean, targetVariance, pd, used); 63 numberOfParameters = 1 + model.Coefficients.Count; 64 64 return model; 65 65 } … … 69 69 #endregion 70 70 71 private static PreconstructedLinearModel FindBestModel(Dictionary<string, double> variances, Dictionary<string, double> means, double cMean, double cVar, IRegressionProblemData pd, IList<string> variables) {71 private static PreconstructedLinearModel FindBestModel(Dictionary<string, double> variances, Dictionary<string, double> means, double yMean, double yVariance, IRegressionProblemData pd, IList<string> variables) { 72 72 Dictionary<string, double> coeffs; 73 73 double intercept; 74 74 do { 75 coeffs = DoRegression(pd, variables, variances, means, cMean, 1.0e-8, out intercept);76 variables = DeselectColinear(variances, coeffs, cVar, pd, variables);75 coeffs = DoRegression(pd, variables, variances, means, yMean, 1.0e-8, out intercept); 76 variables = DeselectColinear(variances, coeffs, yVariance, pd, variables); 77 77 } 78 78 while (coeffs.Count != variables.Count); … … 88 88 improved = false; 89 89 currentNumAttributes--; 90 // Find attribute with smallest SC 91 var candidate = variables.ToDictionary(v => v, v => Math.Abs(coeffs[v] * Math.Sqrt(variances[v] / cVar)))90 // Find attribute with smallest SC (variance-scaled coefficient) 91 var candidate = variables.ToDictionary(v => v, v => Math.Abs(coeffs[v] * Math.Sqrt(variances[v] / yVariance))) 92 92 .OrderBy(x => x.Value).Select(x => x.Key).First(); 93 93 94 94 var currVariables = variables.Where(v => !v.Equals(candidate)).ToList(); 95 95 var currentIntercept = 0.0; 96 var currentCoeffs = DoRegression(pd, currVariables, variances, means, cMean, 1.0e-8, out currentIntercept);96 var currentCoeffs = DoRegression(pd, currVariables, variances, means, yMean, 1.0e-8, out currentIntercept); 97 97 var currentMse = CalculateSE(currentCoeffs, currentIntercept, pd, currVariables); 98 98 var currentAkaike = currentMse / fullMse * (numInst - numAtts) + 2 * currentNumAttributes; … … 115 115 } 116 116 117 private static Dictionary<string, double> DoRegression(IRegressionProblemData pd, IList<string> variables, Dictionary<string, double> variances, Dictionary<string, double> means, double cmean, double ridge, out double intercept) { 118 //if (pd.TrainingIndices.Count() > variables.Count) { 119 // var pd2 = new RegressionProblemData(pd.Dataset, variables, pd.TargetVariable); 120 // pd2.TestPartition.End = pd.TestPartition.End; 121 // pd2.TestPartition.Start = pd.TestPartition.Start; 122 // pd2.TrainingPartition.End = pd.TrainingPartition.End; 123 // pd2.TrainingPartition.Start = pd.TrainingPartition.Start; 124 // 125 // double x1, x2; 126 // var lm = PreconstructedLinearModel.CreateLinearModel(pd2, out x1, out x2); 127 // intercept = lm.Intercept; 128 // return lm.Coefficients; 117 private static Dictionary<string, double> DoRegression(IRegressionProblemData pd, IList<string> variables, Dictionary<string, double> variances, Dictionary<string, double> means, double yMean, double ridge, out double intercept) { 129 118 130 119 var n = variables.Count; … … 182 171 if (coefficients == null) throw new ArgumentException("No linear model could be built"); 183 172 184 intercept = cmean;173 intercept = yMean; 185 174 var res = new Dictionary<string, double>(); 186 175 for (var i = 0; i < n; i++) { … … 193 182 } 194 183 195 private static IList<string> DeselectColinear(Dictionary<string, double> variances, Dictionary<string, double> coeffs, double cVar, IRegressionProblemData pd, IList<string> variables) {196 var candidates = variables.ToDictionary(v => v, v => Math.Abs(coeffs[v] * Math.Sqrt(variances[v] / cVar))).Where(x => x.Value > 1.5).OrderBy(x => -x.Value).ToList();184 private static IList<string> DeselectColinear(Dictionary<string, double> variances, Dictionary<string, double> coeffs, double yVariance, IRegressionProblemData pd, IList<string> variables) { 185 var candidates = variables.ToDictionary(v => v, v => Math.Abs(coeffs[v] * Math.Sqrt(variances[v] / yVariance))).Where(x => x.Value > 1.5).OrderBy(x => -x.Value).ToList(); 197 186 if (candidates.Count == 0) return variables; 198 187 var c = candidates.First().Key; -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/M5regLeaf.cs
r15830 r16847 26 26 using HeuristicLab.Common; 27 27 using HeuristicLab.Core; 28 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;29 28 using HeuristicLab.Problems.DataAnalysis; 29 using HEAL.Attic; 30 30 31 31 namespace HeuristicLab.Algorithms.DataAnalysis { 32 [Storable Class]33 [Item("M5regLeaf", "A leaf type that uses linear models as leaf models. This is the standard for M5' regression")]32 [StorableType("0AED959D-78C3-4927-BDCF-473D0AEE32AA")] 33 [Item("M5regLeaf", "A leaf type that uses regularized linear models as leaf models.")] 34 34 public class M5regLeaf : LeafBase { 35 35 #region Constructors & Cloning 36 36 [StorableConstructor] 37 private M5regLeaf( bool deserializing) : base(deserializing) { }37 private M5regLeaf(StorableConstructorFlag _) : base(_) { } 38 38 private M5regLeaf(M5regLeaf original, Cloner cloner) : base(original, cloner) { } 39 39 public M5regLeaf() { } … … 47 47 get { return true; } 48 48 } 49 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) { 49 50 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int numberOfParameters) { 50 51 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model"); 51 n oParameters = pd.AllowedInputVariables.Count() + 1;52 numberOfParameters = pd.AllowedInputVariables.Count() + 1; 52 53 53 54 double x1, x2; 54 55 var coeffs = ElasticNetLinearRegression.CalculateModelCoefficients(pd, 1, 0.2, out x1, out x2); 55 n oParameters = coeffs.Length;56 numberOfParameters = coeffs.Length; 56 57 return ElasticNetLinearRegression.CreateSymbolicSolution(coeffs, pd).Model; 57 58 } -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Regression.cs
r15967 r16847 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * 5 * This file is part of HeuristicLab. 6 * 7 * HeuristicLab is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * 12 * HeuristicLab is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 #endregion 21 22 using System; 2 23 using System.Collections.Generic; 3 24 using System.Linq; … … 9 30 using HeuristicLab.Optimization; 10 31 using HeuristicLab.Parameters; 11 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;12 32 using HeuristicLab.PluginInfrastructure; 13 33 using HeuristicLab.Problems.DataAnalysis; 14 34 using HeuristicLab.Random; 35 using HEAL.Attic; 15 36 16 37 namespace HeuristicLab.Algorithms.DataAnalysis { 17 [Storable Class]38 [StorableType("FC8D8E5A-D16D-41BB-91CF-B2B35D17ADD7")] 18 39 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 95)] 19 40 [Item("M5RegressionTree", "A M5 regression tree / rule set")] … … 28 49 public const string TrainingSetVariableName = "TrainingSet"; 29 50 30 #region Parameter name51 #region Parameter names 31 52 private const string GenerateRulesParameterName = "GenerateRules"; 32 53 private const string HoldoutSizeParameterName = "HoldoutSize"; 33 private const string Split erParameterName = "Splitter";54 private const string SplitterParameterName = "Splitter"; 34 55 private const string MinimalNodeSizeParameterName = "MinimalNodeSize"; 35 56 private const string LeafModelParameterName = "LeafModel"; … … 47 68 get { return (IFixedValueParameter<PercentValue>)Parameters[HoldoutSizeParameterName]; } 48 69 } 49 public IConstrainedValueParameter<ISplitter> ImpurityParameter {50 get { return (IConstrainedValueParameter<ISplitter>)Parameters[Split erParameterName]; }70 public IConstrainedValueParameter<ISplitter> SplitterParameter { 71 get { return (IConstrainedValueParameter<ISplitter>)Parameters[SplitterParameterName]; } 51 72 } 52 73 public IFixedValueParameter<IntValue> MinimalNodeSizeParameter { … … 73 94 public bool GenerateRules { 74 95 get { return GenerateRulesParameter.Value.Value; } 96 set { GenerateRulesParameter.Value.Value = value; } 75 97 } 76 98 public double HoldoutSize { 77 99 get { return HoldoutSizeParameter.Value.Value; } 100 set { HoldoutSizeParameter.Value.Value = value; } 78 101 } 79 102 public ISplitter Splitter { 80 get { return ImpurityParameter.Value; } 103 get { return SplitterParameter.Value; } 104 // no setter because this is a constrained parameter 81 105 } 82 106 public int MinimalNodeSize { 83 107 get { return MinimalNodeSizeParameter.Value.Value; } 108 set { MinimalNodeSizeParameter.Value.Value = value; } 84 109 } 85 110 public ILeafModel LeafModel { … … 91 116 public int Seed { 92 117 get { return SeedParameter.Value.Value; } 118 set { SeedParameter.Value.Value = value; } 93 119 } 94 120 public bool SetSeedRandomly { 95 121 get { return SetSeedRandomlyParameter.Value.Value; } 122 set { SetSeedRandomlyParameter.Value.Value = value; } 96 123 } 97 124 public bool UseHoldout { 98 125 get { return UseHoldoutParameter.Value.Value; } 126 set { UseHoldoutParameter.Value.Value = value; } 99 127 } 100 128 #endregion … … 107 135 #region Constructors and Cloning 108 136 [StorableConstructor] 109 private M5Regression( bool deserializing) : base(deserializing) { }137 private M5Regression(StorableConstructorFlag _) : base(_) { } 110 138 private M5Regression(M5Regression original, Cloner cloner) : base(original, cloner) { 111 139 stateScope = cloner.Clone(stateScope); … … 114 142 var modelSet = new ItemSet<ILeafModel>(ApplicationManager.Manager.GetInstances<ILeafModel>()); 115 143 var pruningSet = new ItemSet<IPruning>(ApplicationManager.Manager.GetInstances<IPruning>()); 116 var impuritySet = new ItemSet<ISplitter>(ApplicationManager.Manager.GetInstances<ISplitter>());117 Parameters.Add(new FixedValueParameter<BoolValue>(GenerateRulesParameterName, "Whether a set of rules or a decision tree shall be created ", new BoolValue(false)));118 Parameters.Add(new FixedValueParameter<PercentValue>(HoldoutSizeParameterName, "How much of the training set shall be reserved for pruning ", new PercentValue(0.2)));119 Parameters.Add(new ConstrainedValueParameter<ISplitter>(Split erParameterName, "The type of split function used to create node splits", impuritySet, impuritySet.OfType<M5Splitter>().First()));120 Parameters.Add(new FixedValueParameter<IntValue>(MinimalNodeSizeParameterName, "The minimal number of samples in a leaf node ", new IntValue(1)));121 Parameters.Add(new ConstrainedValueParameter<ILeafModel>(LeafModelParameterName, "The type of model used for the nodes ", modelSet, modelSet.OfType<LinearLeaf>().First()));122 Parameters.Add(new ConstrainedValueParameter<IPruning>(PruningTypeParameterName, "The type of pruning used ", pruningSet, pruningSet.OfType<ComplexityPruning>().First()));144 var splitterSet = new ItemSet<ISplitter>(ApplicationManager.Manager.GetInstances<ISplitter>()); 145 Parameters.Add(new FixedValueParameter<BoolValue>(GenerateRulesParameterName, "Whether a set of rules or a decision tree shall be created (default=false)", new BoolValue(false))); 146 Parameters.Add(new FixedValueParameter<PercentValue>(HoldoutSizeParameterName, "How much of the training set shall be reserved for pruning (default=20%).", new PercentValue(0.2))); 147 Parameters.Add(new ConstrainedValueParameter<ISplitter>(SplitterParameterName, "The type of split function used to create node splits (default='M5Splitter').", splitterSet, splitterSet.OfType<M5Splitter>().First())); 148 Parameters.Add(new FixedValueParameter<IntValue>(MinimalNodeSizeParameterName, "The minimal number of samples in a leaf node (default=1).", new IntValue(1))); 149 Parameters.Add(new ConstrainedValueParameter<ILeafModel>(LeafModelParameterName, "The type of model used for the nodes (default='LinearLeaf').", modelSet, modelSet.OfType<LinearLeaf>().First())); 150 Parameters.Add(new ConstrainedValueParameter<IPruning>(PruningTypeParameterName, "The type of pruning used (default='ComplexityPruning').", pruningSet, pruningSet.OfType<ComplexityPruning>().First())); 123 151 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); 124 152 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); 125 Parameters.Add(new FixedValueParameter<BoolValue>(UseHoldoutParameterName, "True if a holdout set should be generated, false if splitting and pruning shall be performed on the same data ", new BoolValue(false)));153 Parameters.Add(new FixedValueParameter<BoolValue>(UseHoldoutParameterName, "True if a holdout set should be generated, false if splitting and pruning shall be performed on the same data (default=false).", new BoolValue(false))); 126 154 Problem = new RegressionProblem(); 127 155 } … … 134 162 base.Initialize(cancellationToken); 135 163 var random = new MersenneTwister(); 136 if (SetSeedRandomly) Seed Parameter.Value.Value = new System.Random().Next();164 if (SetSeedRandomly) Seed = RandomSeedGenerator.GetSeed(); 137 165 random.Reset(Seed); 138 166 stateScope = InitializeScope(random, Problem.ProblemData, Pruning, MinimalNodeSize, LeafModel, Splitter, GenerateRules, UseHoldout, HoldoutSize); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Utilities/PrincipleComponentTransformation.cs
r15470 r16847 24 24 using System.Linq; 25 25 using HeuristicLab.Common; 26 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;27 26 using HeuristicLab.Problems.DataAnalysis; 27 using HEAL.Attic; 28 28 29 29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 [Storable Class]30 [StorableType("A2DDC528-BAA7-445F-98E1-5F895CE2FD5C")] 31 31 public class PrincipleComponentTransformation : IDeepCloneable { 32 32 #region Properties … … 48 48 #region HLConstructors 49 49 [StorableConstructor] 50 protected PrincipleComponentTransformation( bool deserializing) { }50 protected PrincipleComponentTransformation(StorableConstructorFlag _) { } 51 51 protected PrincipleComponentTransformation(PrincipleComponentTransformation original, Cloner cloner) { 52 52 if (original.Variances != null) Variances = original.Variances.ToArray(); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Utilities/RegressionTreeAnalyzer.cs
r15830 r16847 28 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 29 29 using HeuristicLab.Optimization; 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;31 30 using HeuristicLab.Problems.DataAnalysis; 31 using HEAL.Attic; 32 32 33 33 namespace HeuristicLab.Algorithms.DataAnalysis { … … 243 243 244 244 245 [Storable Class]245 [StorableType("D5540C63-310B-4D6F-8A3D-6C1A08DE7F80")] 246 246 private class TextSymbol : Symbol { 247 247 [StorableConstructor] 248 private TextSymbol( bool deserializing) : base(deserializing) { }248 private TextSymbol(StorableConstructorFlag _) : base(_) { } 249 249 private TextSymbol(Symbol original, Cloner cloner) : base(original, cloner) { } 250 250 public TextSymbol(string name) : base(name, "") { -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Utilities/RegressionTreeParameters.cs
r15830 r16847 24 24 using HeuristicLab.Common; 25 25 using HeuristicLab.Core; 26 using HeuristicLab.Optimization;27 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;28 26 using HeuristicLab.Problems.DataAnalysis; 27 using HEAL.Attic; 29 28 30 29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 [StorableType("A6293516-C146-469D-B248-31B866A1D94F")] 31 31 public class RegressionTreeParameters : Item { 32 32 private readonly ISplitter splitter; … … 66 66 #region Constructors & Cloning 67 67 [StorableConstructor] 68 private RegressionTreeParameters( bool deserializing) : base(deserializing) { }68 private RegressionTreeParameters(StorableConstructorFlag _) : base(_) { } 69 69 private RegressionTreeParameters(RegressionTreeParameters original, Cloner cloner) : base(original, cloner) { 70 70 problemData = cloner.Clone(original.problemData); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/RegressionNodeModel.cs
r16069 r16847 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using HeuristicLab.Common; 26 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 25 using HeuristicLab.Common; 27 26 using HeuristicLab.Problems.DataAnalysis; 27 using HEAL.Attic; 28 28 29 29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 [Storable Class]30 [StorableType("C20C7DF1-CE33-4CCD-88D3-E145CFE239AC")] 31 31 public class RegressionNodeModel : RegressionModel { 32 32 #region Properties … … 65 65 #region HLConstructors 66 66 [StorableConstructor] 67 protected RegressionNodeModel( bool deserializing) : base(deserializing) { }67 protected RegressionNodeModel(StorableConstructorFlag _) : base(_) { } 68 68 protected RegressionNodeModel(RegressionNodeModel original, Cloner cloner) : base(original, cloner) { 69 69 IsLeaf = original.IsLeaf; … … 147 147 #endregion 148 148 149 [Storable Class]149 [StorableType("1FF9E216-6AF1-4282-A7EF-3FA0C1DB29C8")] 150 150 private sealed class ConfidenceRegressionNodeModel : RegressionNodeModel, IConfidenceRegressionModel { 151 151 #region HLConstructors 152 152 [StorableConstructor] 153 private ConfidenceRegressionNodeModel( bool deserializing) : base(deserializing) { }153 private ConfidenceRegressionNodeModel(StorableConstructorFlag _) : base(_) { } 154 154 private ConfidenceRegressionNodeModel(ConfidenceRegressionNodeModel original, Cloner cloner) : base(original, cloner) { } 155 155 public ConfidenceRegressionNodeModel(string targetAttr) : base(targetAttr) { } -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/RegressionNodeTreeModel.cs
r16069 r16847 27 27 using HeuristicLab.Core; 28 28 using HeuristicLab.Optimization; 29 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;30 29 using HeuristicLab.Problems.DataAnalysis; 30 using HEAL.Attic; 31 31 32 32 namespace HeuristicLab.Algorithms.DataAnalysis { 33 [Storable Class]33 [StorableType("FAF1F955-82F3-4824-9759-9D2846E831AE")] 34 34 public class RegressionNodeTreeModel : RegressionModel, IM5Model { 35 35 public const string NumCurrentLeafsResultName = "Number of current leafs"; … … 42 42 #region HLConstructors & Cloning 43 43 [StorableConstructor] 44 protected RegressionNodeTreeModel( bool deserializing) : base(deserializing) { }44 protected RegressionNodeTreeModel(StorableConstructorFlag _) : base(_) { } 45 45 protected RegressionNodeTreeModel(RegressionNodeTreeModel original, Cloner cloner) : base(original, cloner) { 46 46 Root = cloner.Clone(original.Root); … … 97 97 #endregion 98 98 99 public void BuildModel less(IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, IScope statescope, ResultCollection results, CancellationToken cancellationToken) {99 public void BuildModel(IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, IScope statescope, ResultCollection results, CancellationToken cancellationToken) { 100 100 var regressionTreeParams = (RegressionTreeParameters)statescope.Variables[M5Regression.RegressionTreeParameterVariableName].Value; 101 101 //start with one node … … 109 109 } 110 110 111 [Storable Class]111 [StorableType("E84ACC40-5694-4E40-A947-190673643206")] 112 112 private sealed class ConfidenceRegressionNodeTreeModel : RegressionNodeTreeModel, IConfidenceRegressionModel { 113 113 #region HLConstructors & Cloning 114 114 [StorableConstructor] 115 private ConfidenceRegressionNodeTreeModel( bool deserializing) : base(deserializing) { }115 private ConfidenceRegressionNodeTreeModel(StorableConstructorFlag _) : base(_) { } 116 116 private ConfidenceRegressionNodeTreeModel(ConfidenceRegressionNodeTreeModel original, Cloner cloner) : base(original, cloner) { } 117 117 public ConfidenceRegressionNodeTreeModel(string targetVariable) : base(targetVariable) { } -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/RegressionRuleModel.cs
r15830 r16847 28 28 using HeuristicLab.Core; 29 29 using HeuristicLab.Optimization; 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;31 30 using HeuristicLab.Problems.DataAnalysis; 31 using HEAL.Attic; 32 32 33 33 namespace HeuristicLab.Algorithms.DataAnalysis { 34 [Storable Class]34 [StorableType("425AF262-A756-4E9A-B76F-4D2480BEA4FD")] 35 35 public class RegressionRuleModel : RegressionModel, IM5Model { 36 36 #region Properties … … 49 49 #region HLConstructors 50 50 [StorableConstructor] 51 protected RegressionRuleModel( bool deserializing) : base(deserializing) { }51 protected RegressionRuleModel(StorableConstructorFlag _) : base(_) { } 52 52 protected RegressionRuleModel(RegressionRuleModel original, Cloner cloner) : base(original, cloner) { 53 53 if (original.SplitAttributes != null) SplitAttributes = original.SplitAttributes.ToArray(); … … 88 88 //build tree and select node with maximum coverage 89 89 var tree = RegressionNodeTreeModel.CreateTreeModel(regressionTreeParams.TargetVariable, regressionTreeParams); 90 tree.BuildModel less(trainingRows, pruningRows, statescope, results, cancellationToken);90 tree.BuildModel(trainingRows, pruningRows, statescope, results, cancellationToken); 91 91 var nodeModel = tree.Root.EnumerateNodes().Where(x => x.IsLeaf).MaxItems(x => x.NumSamples).First(); 92 92 … … 95 95 var reops = new List<Comparison>(); 96 96 97 //extract Splits97 //extract splits 98 98 for (var temp = nodeModel; temp.Parent != null; temp = temp.Parent) { 99 99 satts.Add(temp.Parent.SplitAttribute); … … 137 137 } 138 138 139 [Storable Class]139 [StorableType("7302AA30-9F58-42F3-BF6A-ECF1536508AB")] 140 140 private sealed class ConfidenceRegressionRuleModel : RegressionRuleModel, IConfidenceRegressionModel { 141 141 #region HLConstructors 142 142 [StorableConstructor] 143 private ConfidenceRegressionRuleModel( bool deserializing) : base(deserializing) { }143 private ConfidenceRegressionRuleModel(StorableConstructorFlag _) : base(_) { } 144 144 private ConfidenceRegressionRuleModel(ConfidenceRegressionRuleModel original, Cloner cloner) : base(original, cloner) { } 145 145 public ConfidenceRegressionRuleModel(string targetAttr) : base(targetAttr) { } … … 159 159 } 160 160 161 [StorableType("152DECE4-2692-4D53-B290-974806ADCD72")] 161 162 internal enum Comparison { 162 163 LessEqual, -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/RegressionRuleSetModel.cs
r15830 r16847 28 28 using HeuristicLab.Data; 29 29 using HeuristicLab.Optimization; 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;31 30 using HeuristicLab.Problems.DataAnalysis; 31 using HEAL.Attic; 32 32 33 33 namespace HeuristicLab.Algorithms.DataAnalysis { 34 [Storable Class]34 [StorableType("7B4D9AE9-0456-4029-80A6-CCB5E33CE356")] 35 35 public class RegressionRuleSetModel : RegressionModel, IM5Model { 36 36 private const string NumRulesResultName = "Number of rules"; … … 45 45 #region HLConstructors & Cloning 46 46 [StorableConstructor] 47 protected RegressionRuleSetModel( bool deserializing) : base(deserializing) { }47 protected RegressionRuleSetModel(StorableConstructorFlag _) : base(_) { } 48 48 protected RegressionRuleSetModel(RegressionRuleSetModel original, Cloner cloner) : base(original, cloner) { 49 49 if (original.Rules != null) Rules = original.Rules.Select(cloner.Clone).ToList(); … … 123 123 #endregion 124 124 125 [Storable Class]125 [StorableType("E114F3C9-3C1F-443D-8270-0E10CE12F2A0")] 126 126 public class RuleSetState : Item { 127 127 [Storable] … … 140 140 #region HLConstructors & Cloning 141 141 [StorableConstructor] 142 protected RuleSetState( bool deserializing) : base(deserializing) { }142 protected RuleSetState(StorableConstructorFlag _) : base(_) { } 143 143 protected RuleSetState(RuleSetState original, Cloner cloner) : base(original, cloner) { 144 144 Rules = original.Rules.Select(cloner.Clone).ToList(); … … 155 155 } 156 156 157 [Storable Class]157 [StorableType("52E7992B-94CC-4960-AA82-1A399BE735C6")] 158 158 private sealed class ConfidenceRegressionRuleSetModel : RegressionRuleSetModel, IConfidenceRegressionModel { 159 159 #region HLConstructors & Cloning 160 160 [StorableConstructor] 161 private ConfidenceRegressionRuleSetModel( bool deserializing) : base(deserializing) { }161 private ConfidenceRegressionRuleSetModel(StorableConstructorFlag _) : base(_) { } 162 162 private ConfidenceRegressionRuleSetModel(ConfidenceRegressionRuleSetModel original, Cloner cloner) : base(original, cloner) { } 163 163 public ConfidenceRegressionRuleSetModel(string targetVariable) : base(targetVariable) { } -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/ComplexityPruning.cs
r15830 r16847 28 28 using HeuristicLab.Data; 29 29 using HeuristicLab.Parameters; 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;31 30 using HeuristicLab.Problems.DataAnalysis; 31 using HEAL.Attic; 32 32 33 33 namespace HeuristicLab.Algorithms.DataAnalysis { 34 [StorableType("B643D965-D13F-415D-8589-3F3527460347")] 34 35 public class ComplexityPruning : ParameterizedNamedItem, IPruning { 35 36 public const string PruningStateVariableName = "PruningState"; 36 37 37 p rivateconst string PruningStrengthParameterName = "PruningStrength";38 p rivateconst string PruningDecayParameterName = "PruningDecay";39 p rivateconst string FastPruningParameterName = "FastPruning";38 public const string PruningStrengthParameterName = "PruningStrength"; 39 public const string PruningDecayParameterName = "PruningDecay"; 40 public const string FastPruningParameterName = "FastPruning"; 40 41 41 42 public IFixedValueParameter<DoubleValue> PruningStrengthParameter { … … 51 52 public double PruningStrength { 52 53 get { return PruningStrengthParameter.Value.Value; } 54 set { PruningStrengthParameter.Value.Value = value; } 53 55 } 54 56 public double PruningDecay { 55 57 get { return PruningDecayParameter.Value.Value; } 58 set { PruningDecayParameter.Value.Value = value; } 56 59 } 57 60 public bool FastPruning { 58 61 get { return FastPruningParameter.Value.Value; } 62 set { FastPruningParameter.Value.Value = value; } 59 63 } 60 64 61 65 #region Constructors & Cloning 62 66 [StorableConstructor] 63 protected ComplexityPruning( bool deserializing) : base(deserializing) { }67 protected ComplexityPruning(StorableConstructorFlag _) : base(_) { } 64 68 protected ComplexityPruning(ComplexityPruning original, Cloner cloner) : base(original, cloner) { } 65 69 public ComplexityPruning() { 66 Parameters.Add(new FixedValueParameter<DoubleValue>(PruningStrengthParameterName, "The strength of the pruning. Higher values force the algorithm to create simpler models ", new DoubleValue(2.0)));67 Parameters.Add(new FixedValueParameter<DoubleValue>(PruningDecayParameterName, "Pruning decay allows nodes higher up in the tree to be more stable .", new DoubleValue(1.0)));68 Parameters.Add(new FixedValueParameter<BoolValue>(FastPruningParameterName, "Accelerate Pruning by using linear models instead of leaf models", new BoolValue(true)));70 Parameters.Add(new FixedValueParameter<DoubleValue>(PruningStrengthParameterName, "The strength of the pruning. Higher values force the algorithm to create simpler models (default=2.0).", new DoubleValue(2.0))); 71 Parameters.Add(new FixedValueParameter<DoubleValue>(PruningDecayParameterName, "Pruning decay allows nodes higher up in the tree to be more stable (default=1.0).", new DoubleValue(1.0))); 72 Parameters.Add(new FixedValueParameter<BoolValue>(FastPruningParameterName, "Accelerate pruning by using linear models instead of leaf models (default=true).", new BoolValue(true))); 69 73 } 70 74 public override IDeepCloneable Clone(Cloner cloner) { … … 136 140 } 137 141 138 139 142 private static void UpdateThreshold(RegressionNodeTreeModel tree, PruningState state) { 140 143 if (state.Code == 2) { … … 161 164 } 162 165 163 164 166 private static void BuildPruningModel(RegressionNodeModel regressionNode, ILeafModel leaf, IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, PruningState state, RegressionTreeParameters regressionTreeParams, CancellationToken cancellationToken) { 165 167 //create regressionProblemdata from pruning data 166 var vars = regressionTreeParams.AllowedInputVariables.Concat(new[] { regressionTreeParams.TargetVariable}).ToArray();168 var vars = regressionTreeParams.AllowedInputVariables.Concat(new[] { regressionTreeParams.TargetVariable }).ToArray(); 167 169 var reducedData = new Dataset(vars, vars.Select(x => regressionTreeParams.Data.GetDoubleValues(x, pruningRows).ToList())); 168 170 var pd = new RegressionProblemData(reducedData, regressionTreeParams.AllowedInputVariables, regressionTreeParams.TargetVariable); … … 179 181 state.modelErrors.Add(regressionNode, rmsModel); 180 182 state.modelComplexities.Add(regressionNode, numModelParams); 181 if (regressionNode.IsLeaf) { state.nodeComplexities[regressionNode] = state.modelComplexities[regressionNode]; } 182 else { state.nodeComplexities.Add(regressionNode, state.nodeComplexities[regressionNode.Left] + state.nodeComplexities[regressionNode.Right] + 1); } 183 if (regressionNode.IsLeaf) { state.nodeComplexities[regressionNode] = state.modelComplexities[regressionNode]; } else { state.nodeComplexities.Add(regressionNode, state.nodeComplexities[regressionNode.Left] + state.nodeComplexities[regressionNode.Right] + 1); } 183 184 } 184 185 … … 200 201 } 201 202 202 [Storable Class]203 [StorableType("EAD60C7E-2C58-45C4-9697-6F735F518CFD")] 203 204 public class PruningState : Item { 204 205 [Storable] … … 230 231 #region HLConstructors & Cloning 231 232 [StorableConstructor] 232 protected PruningState( bool deserializing) : base(deserializing) { }233 protected PruningState(StorableConstructorFlag _) : base(_) { } 233 234 protected PruningState(PruningState original, Cloner cloner) : base(original, cloner) { 234 235 modelComplexities = original.modelComplexities.ToDictionary(x => cloner.Clone(x.Key), x => x.Value); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/NoPruning.cs
r15830 r16847 24 24 using HeuristicLab.Common; 25 25 using HeuristicLab.Core; 26 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;27 26 using HeuristicLab.Problems.DataAnalysis; 27 using HEAL.Attic; 28 28 29 29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 [Storable Class]30 [StorableType("D67152AA-3533-45D2-B77B-4A0742FB4B92")] 31 31 [Item("NoPruning", "No pruning")] 32 32 public class NoPruning : ParameterizedNamedItem, IPruning { 33 33 #region Constructors & Cloning 34 34 [StorableConstructor] 35 private NoPruning( bool deserializing) : base(deserializing) { }35 private NoPruning(StorableConstructorFlag _) : base(_) { } 36 36 private NoPruning(NoPruning original, Cloner cloner) : base(original, cloner) { } 37 37 public NoPruning() { } -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/CorrelationImpuritiyCalculator.cs
r15830 r16847 28 28 /// <summary> 29 29 /// Helper class for incremental split calculation. 30 /// Used while moving a potential Spliter along the ordered training Instances30 /// Used while moving a potential splitter along the ordered training instances 31 31 /// </summary> 32 internal class Correl ationImpuritiyCalculator {32 internal class CorreleationImpurityCalculator { 33 33 #region state 34 34 //Data … … 58 58 59 59 #region Constructors 60 public Correl ationImpuritiyCalculator(int partition, IEnumerable<double> atts, IEnumerable<double> targets, double order) {60 public CorreleationImpurityCalculator(int partition, IEnumerable<double> atts, IEnumerable<double> targets, double order) { 61 61 if (order <= 0) throw new ArgumentException("Splitter order must be larger than 0"); 62 62 this.order = order; -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/CorrelationSplitter.cs
r15830 r16847 25 25 using HeuristicLab.Data; 26 26 using HeuristicLab.Parameters; 27 using H euristicLab.Persistence.Default.CompositeSerializers.Storable;27 using HEAL.Attic; 28 28 29 29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 [Storable Class]30 [StorableType("EC3A5009-EE84-4E1A-A537-20F6F1224842")] 31 31 [Item("CorrelationSplitter", "An experimental split selector that uses correlation coefficients")] 32 32 public class CorrelationSplitter : SplitterBase { … … 37 37 public double Order { 38 38 get { return OrderParameter.Value.Value; } 39 set { OrderParameter.Value.Value = value; } 39 40 } 40 41 41 42 #region Constructors & Cloning 42 43 [StorableConstructor] 43 private CorrelationSplitter( bool deserializing) { }44 private CorrelationSplitter(StorableConstructorFlag _) { } 44 45 private CorrelationSplitter(CorrelationSplitter original, Cloner cloner) : base(original, cloner) { } 45 46 public CorrelationSplitter() { 46 Parameters.Add(new FixedValueParameter<DoubleValue>(OrderParameterName, "The exponent in the split calculation ssrLeft^(1/Order)+ssrRight^(1/Order) .", new DoubleValue(1)));47 Parameters.Add(new FixedValueParameter<DoubleValue>(OrderParameterName, "The exponent in the split calculation ssrLeft^(1/Order)+ssrRight^(1/Order) (default=1.0).", new DoubleValue(1))); 47 48 } 48 49 public override IDeepCloneable Clone(Cloner cloner) { … … 65 66 if (start >= length) return; 66 67 67 var imp = new Correl ationImpuritiyCalculator(minLeafSize, attValues, targetValues, Order);68 var imp = new CorreleationImpurityCalculator(minLeafSize, attValues, targetValues, Order); 68 69 maxImpurity = imp.Impurity; 69 70 splitValues.Add(imp.SplitValue); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/M5Splitter.cs
r15830 r16847 26 26 using HeuristicLab.Data; 27 27 using HeuristicLab.Parameters; 28 using H euristicLab.Persistence.Default.CompositeSerializers.Storable;28 using HEAL.Attic; 29 29 30 30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 [Storable Class]32 [Item("M5Splitter", "A split selector that uses the ratio between Variances^(1/Order) to determine good splits ")]31 [StorableType("502B1429-7A28-45C1-A60A-93E72CB3AF4A")] 32 [Item("M5Splitter", "A split selector that uses the ratio between Variances^(1/Order) to determine good splits.")] 33 33 public class M5Splitter : SplitterBase { 34 34 public const string OrderParameterName = "Order"; … … 38 38 public double Order { 39 39 get { return OrderParameter.Value.Value; } 40 set { OrderParameter.Value.Value = value; } 40 41 } 41 42 42 43 #region Constructors & Cloning 43 44 [StorableConstructor] 44 private M5Splitter( bool deserializing) { }45 private M5Splitter(StorableConstructorFlag _) { } 45 46 private M5Splitter(M5Splitter original, Cloner cloner) : base(original, cloner) { } 46 47 public M5Splitter() { 47 Parameters.Add(new FixedValueParameter<DoubleValue>(OrderParameterName, "The exponent in the split calculation sum (x_i - x_avg)^Order .", new DoubleValue(5)));48 Parameters.Add(new FixedValueParameter<DoubleValue>(OrderParameterName, "The exponent in the split calculation sum (x_i - x_avg)^Order (default=5).", new DoubleValue(5))); 48 49 } 49 50 public override IDeepCloneable Clone(Cloner cloner) { … … 67 68 var imp = new OrderImpurityCalculator(part + 1, targetValues, Order); 68 69 69 //if (imp.Impurity > maxImpurity && !attValues[part - 1].IsAlmost(attValues[part])) {70 // maxImpurity = imp.Impurity;71 // splitValue = (attValues[part - 1] + attValues[part]) / 2;72 // position = part;73 //}74 75 70 for (var i = low + len; i < high - len; i++) { 76 71 imp.Increment(targetValues[i], OrderImpurityCalculator.IncrementType.Left); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/OrderImpurityCalculator.cs
r15830 r16847 28 28 /// <summary> 29 29 /// Helper class for incremental split calculation. 30 /// Used while moving a potential Splitter along the ordered training Instances30 /// Used while moving a potential splitter along the ordered training instances 31 31 /// </summary> 32 32 internal class OrderImpurityCalculator { -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/SplitterBase.cs
r15830 r16847 26 26 using HeuristicLab.Common; 27 27 using HeuristicLab.Core; 28 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;29 28 using HeuristicLab.Problems.DataAnalysis; 29 using HEAL.Attic; 30 30 31 31 namespace HeuristicLab.Algorithms.DataAnalysis { 32 [Storable Class]33 [Item("SplitterBase", "A split selector that uses the ratio between Variances^(1/Order) to determine good splits")]32 [StorableType("22DCCF28-8943-4622-BBD3-B2AB04F28C36")] 33 [Item("SplitterBase", "Abstract base class for splitters")] 34 34 public abstract class SplitterBase : ParameterizedNamedItem, ISplitter { 35 35 public const string SplittingStateVariableName = "RuleSetState"; … … 37 37 #region Constructors & Cloning 38 38 [StorableConstructor] 39 protected SplitterBase( bool deserializing) { }39 protected SplitterBase(StorableConstructorFlag _) { } 40 40 protected SplitterBase(SplitterBase original, Cloner cloner) : base(original, cloner) { } 41 41 public SplitterBase() { } … … 46 46 states.Variables.Add(new Variable(SplittingStateVariableName, new SplittingState())); 47 47 } 48 48 49 public void Split(RegressionNodeTreeModel tree, IReadOnlyList<int> trainingRows, IScope stateScope, CancellationToken cancellationToken) { 49 50 var regressionTreeParams = (RegressionTreeParameters)stateScope.Variables[M5Regression.RegressionTreeParameterVariableName].Value; … … 87 88 if (splitData.Dataset.Rows < minLeafSize) return false; 88 89 89 // find best Attribute for the Splitter90 // find best attribute for the splitter 90 91 foreach (var attr in splitData.AllowedInputVariables) { 91 92 int pos; … … 109 110 #endregion 110 111 111 [Storable Class]112 [StorableType("BC1149FD-370E-4F3A-92F5-6E519736D09A")] 112 113 public class SplittingState : Item { 113 114 [Storable] … … 124 125 #region HLConstructors & Cloning 125 126 [StorableConstructor] 126 protected SplittingState( bool deserializing) : base(deserializing) { }127 protected SplittingState(StorableConstructorFlag _) : base(_) { } 127 128 protected SplittingState(SplittingState original, Cloner cloner) : base(original, cloner) { 128 129 nodeQueue = new Queue<RegressionNodeModel>(original.nodeQueue.Select(cloner.Clone)); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/UnivariateOnlineLR.cs
r15830 r16847 28 28 /// <summary> 29 29 /// Helper class for incremental split calculation. 30 /// Used while moving a potential Spliter along the ordered training Instances30 /// Used while moving a potential splitter along the ordered training instances 31 31 /// </summary> 32 32 internal class UnivariateOnlineLR {
Note: See TracChangeset
for help on using the changeset viewer.