Changeset 15470
- Timestamp:
- 11/10/17 12:56:36 (7 years ago)
- Location:
- branches/M5Regression
- Files:
-
- 4 added
- 5 deleted
- 15 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/FixedDataAnalysisAlgorithm.cs
r15430 r15470 28 28 namespace HeuristicLab.Algorithms.DataAnalysis { 29 29 [StorableClass] 30 public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm where T : class, IDataAnalysisProblem {30 public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm, IDataAnalysisAlgorithm<T> where T : class, IDataAnalysisProblem { 31 31 #region Properties 32 32 public override Type ProblemType { … … 34 34 } 35 35 public new T Problem { 36 get { return (T) base.Problem; }36 get { return (T) base.Problem; } 37 37 set { base.Problem = value; } 38 38 } 39 39 #endregion 40 40 41 public override bool SupportsPause { get { return false; } } 41 public override bool SupportsPause { 42 get { return false; } 43 } 42 44 43 45 [StorableConstructor] … … 45 47 protected FixedDataAnalysisAlgorithm(FixedDataAnalysisAlgorithm<T> original, Cloner cloner) : base(original, cloner) { } 46 48 public FixedDataAnalysisAlgorithm() : base() { } 47 48 49 } 49 50 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r15430 r15470 361 361 <Compile Include="Linear\MultinomialLogitModel.cs" /> 362 362 <Compile Include="Linear\Scaling.cs" /> 363 <Compile Include="M5Regression\Interfaces\IImpurityCalculator.cs" /> 364 <Compile Include="M5Regression\Interfaces\IImpurityType.cs" /> 363 <Compile Include="M5Regression\Interfaces\ISplitType.cs" /> 365 364 <Compile Include="M5Regression\Interfaces\IM5MetaModel.cs" /> 366 365 <Compile Include="M5Regression\Interfaces\ILeafType.cs" /> … … 377 376 <Compile Include="M5Regression\M5Utilities\M5CreationParameters.cs" /> 378 377 <Compile Include="M5Regression\M5Utilities\M5UpdateParameters.cs" /> 379 <Compile Include="M5Regression\M5Utilities\SplitInfo.cs" />380 378 <Compile Include="M5Regression\MetaModels\ComponentReducedLinearModel.cs" /> 381 379 <Compile Include="M5Regression\MetaModels\M5NodeModel.cs" /> … … 392 390 <Compile Include="M5Regression\Pruning\M5LeafPruning.cs" /> 393 391 <Compile Include="M5Regression\Spliting\OrderImpurityCalculator.cs" /> 394 <Compile Include="M5Regression\Spliting\Order ImpurityType.cs" />392 <Compile Include="M5Regression\Spliting\OrderSplitType.cs" /> 395 393 <Compile Include="Nca\Initialization\INcaInitializer.cs" /> 396 394 <Compile Include="Nca\Initialization\LdaInitializer.cs" /> … … 423 421 <Compile Include="NeuralNetwork\NeuralNetworkRegressionSolution.cs" /> 424 422 <Compile Include="NonlinearRegression\NonlinearRegression.cs" /> 425 <Compile Include="PCA\PrincipleComponentAnalysis.cs" /> 426 <Compile Include="PCA\PrincipleComponentAnalysisStatic.cs" /> 423 <Compile Include="M5Regression\M5Utilities\PrincipleComponentTransformation.cs" /> 427 424 <Compile Include="Plugin.cs" /> 428 425 <Compile Include="Properties\AssemblyInfo.cs" /> -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComponentReductionLinearLeaf.cs
r15430 r15470 58 58 public IConfidenceRegressionModel BuildModel(IRegressionProblemData pd, IRandom random, 59 59 CancellationToken cancellation, out int noParameters) { 60 var pca = PrincipleComponent AnalysisStatic.Create(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, true);61 var pcdata = pca. ProjectProblem(pd);60 var pca = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, true); 61 var pcdata = pca.TransformProblemData(pd); 62 62 ComponentReducedLinearModel bestModel = null; 63 63 var bestCvrmse = double.MaxValue; … … 65 65 for (var i = 1; i <= Math.Min(NoComponents, pd.AllowedInputVariables.Count()); i++) { 66 66 var pd2 = (IRegressionProblemData) pcdata.Clone(); 67 var inputs = new HashSet<string>(pca. Names.Take(i));67 var inputs = new HashSet<string>(pca.ComponentNames.Take(i)); 68 68 foreach (var v in pd2.InputVariables.CheckedItems.ToArray()) 69 69 pd2.InputVariables.SetItemCheckedState(v.Value, inputs.Contains(v.Value.Value)); -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Regression.cs
r15430 r15470 20 20 #region Parametername 21 21 private const string GenerateRulesParameterName = "GenerateRules"; 22 private const string ImpurityParameterName = " Impurity";22 private const string ImpurityParameterName = "Split"; 23 23 private const string MinimalNodeSizeParameterName = "MinimalNodeSize"; 24 24 private const string ModelTypeParameterName = "ModelType"; … … 32 32 get { return Parameters[GenerateRulesParameterName] as IFixedValueParameter<BoolValue>; } 33 33 } 34 public IConstrainedValueParameter<I ImpurityType> ImpurityParameter {35 get { return Parameters[ImpurityParameterName] as IConstrainedValueParameter<I ImpurityType>; }34 public IConstrainedValueParameter<ISplitType> ImpurityParameter { 35 get { return Parameters[ImpurityParameterName] as IConstrainedValueParameter<ISplitType>; } 36 36 } 37 37 public IFixedValueParameter<IntValue> MinimalNodeSizeParameter { … … 56 56 get { return GenerateRulesParameter.Value.Value; } 57 57 } 58 public I ImpurityType Impurity{58 public ISplitType Split { 59 59 get { return ImpurityParameter.Value; } 60 60 } … … 83 83 var modelSet = new ItemSet<ILeafType<IRegressionModel>>(ApplicationManager.Manager.GetInstances<ILeafType<IRegressionModel>>()); 84 84 var pruningSet = new ItemSet<IPruningType>(ApplicationManager.Manager.GetInstances<IPruningType>()); 85 var impuritySet = new ItemSet<I ImpurityType>(ApplicationManager.Manager.GetInstances<IImpurityType>());85 var impuritySet = new ItemSet<ISplitType>(ApplicationManager.Manager.GetInstances<ISplitType>()); 86 86 Parameters.Add(new FixedValueParameter<BoolValue>(GenerateRulesParameterName, "Whether a set of rules or a decision tree shall be created", new BoolValue(true))); 87 Parameters.Add(new ConstrainedValueParameter<I ImpurityType>(ImpurityParameterName, "The type of impurity function used to create node splits", impuritySet, impuritySet.OfType<OrderImpurityType>().First()));87 Parameters.Add(new ConstrainedValueParameter<ISplitType>(ImpurityParameterName, "The type of split function used to create node splits", impuritySet, impuritySet.OfType<OrderSplitType>().First())); 88 88 Parameters.Add(new FixedValueParameter<IntValue>(MinimalNodeSizeParameterName, "The minimal number of samples in a leaf node", new IntValue(1))); 89 89 Parameters.Add(new ConstrainedValueParameter<ILeafType<IRegressionModel>>(ModelTypeParameterName, "The type of model used for the nodes", modelSet, modelSet.OfType<LinearLeaf>().First())); … … 102 102 if (SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next(); 103 103 random.Reset(Seed); 104 var solution = CreateM5RegressionSolution(Problem.ProblemData, random, LeafType, Impurity, PruningType, cancellationToken, MinimalNodeSize, GenerateRules, Results);104 var solution = CreateM5RegressionSolution(Problem.ProblemData, random, LeafType, Split, PruningType, cancellationToken, MinimalNodeSize, GenerateRules, Results); 105 105 AnalyzeSolution(solution); 106 106 } … … 108 108 #region Static Interface 109 109 public static IRegressionSolution CreateM5RegressionSolution(IRegressionProblemData problemData, IRandom random, 110 ILeafType<IRegressionModel> leafType = null, I ImpurityType impurityType = null, IPruningType pruningType = null,110 ILeafType<IRegressionModel> leafType = null, ISplitType splitType = null, IPruningType pruningType = null, 111 111 CancellationToken? cancellationToken = null, int minNumInstances = 4, bool generateRules = false, ResultCollection results = null) { 112 112 //set default values 113 113 if (leafType == null) leafType = new LinearLeaf(); 114 if ( impurityType == null) impurityType = new OrderImpurityType();114 if (splitType == null) splitType = new OrderSplitType(); 115 115 if (cancellationToken == null) cancellationToken = CancellationToken.None; 116 116 if (pruningType == null) pruningType = new M5LeafPruning(); … … 130 130 131 131 //create & build Model 132 var m5Params = new M5CreationParameters(pruningType, minNumInstances, leafType, pd, random, impurityType, results);132 var m5Params = new M5CreationParameters(pruningType, minNumInstances, leafType, pd, random, splitType, results); 133 133 134 134 IReadOnlyList<int> t, h; … … 148 148 149 149 public static void UpdateM5Model(M5TreeModel model, IRegressionProblemData problemData, IRandom random, 150 ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null , ResultCollection results = null) {151 UpdateM5Model(model as IM5MetaModel, problemData, random, leafType, cancellationToken , results);150 ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null) { 151 UpdateM5Model(model as IM5MetaModel, problemData, random, leafType, cancellationToken); 152 152 } 153 153 154 154 public static void UpdateM5Model(M5RuleSetModel model, IRegressionProblemData problemData, IRandom random, 155 ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null , ResultCollection results = null) {156 UpdateM5Model(model as IM5MetaModel, problemData, random, leafType, cancellationToken , results);155 ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null) { 156 UpdateM5Model(model as IM5MetaModel, problemData, random, leafType, cancellationToken); 157 157 } 158 158 159 159 private static void UpdateM5Model(IM5MetaModel model, IRegressionProblemData problemData, IRandom random, 160 ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null , ResultCollection results = null) {160 ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null) { 161 161 if (cancellationToken == null) cancellationToken = CancellationToken.None; 162 var m5Params = new M5UpdateParameters(leafType, problemData, random , results);162 var m5Params = new M5UpdateParameters(leafType, problemData, random); 163 163 model.UpdateModel(problemData.TrainingIndices.ToList(), m5Params, cancellationToken.Value); 164 164 } … … 175 175 } 176 176 else { 177 Results.Add(M5Analyzer.CreateRulesResult((M5RuleSetModel) solution.Model, Problem.ProblemData, "M5TreeResult", 0,true));177 Results.Add(M5Analyzer.CreateRulesResult((M5RuleSetModel) solution.Model, Problem.ProblemData, "M5TreeResult", true)); 178 178 frequencies = M5Analyzer.GetRuleVariableFrequences((M5RuleSetModel) solution.Model); 179 179 Results.Add(M5Analyzer.CreateCoverageDiagram((M5RuleSetModel) solution.Model, Problem.ProblemData)); -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Utilities/M5Analyzer.cs
r15430 r15470 29 29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 30 internal static class M5Analyzer { 31 private const string LeftResultName = "Left";32 private const string RightResultName = "Right";33 31 private const string ConditionResultName = "Condition"; 34 32 private const string CoverResultName = "Covered Instances"; 35 33 private const string CoverageDiagramResultName = "Coverage"; 36 private const string NodeModelResultName = "NodeModel";37 private const string NodeSizeResultName = "NodeSize";38 34 private const string RuleModelResultName = "RuleModel"; 39 35 … … 41 37 var res = ruleSetModel.VariablesUsedForPrediction.ToDictionary(x => x, x => 0); 42 38 foreach (var rule in ruleSetModel.Rules) 43 44 39 foreach (var att in rule.SplitAtts) 40 res[att]++; 45 41 return res; 46 42 } … … 57 53 var list = new List<int>(); 58 54 GetLeafDepths(treeModel.Root, 0, list); 59 var row = new DataRow("Depths", "", list.Select(x => (double) x)) {60 VisualProperties = { ChartType = DataRowVisualProperties.DataRowChartType.Histogram}55 var row = new DataRow("Depths", "", list.Select(x => (double) x)) { 56 VisualProperties = {ChartType = DataRowVisualProperties.DataRowChartType.Histogram} 61 57 }; 62 58 var hist = new DataTable("LeafDepths"); … … 65 61 } 66 62 67 public static Result CreateRulesResult(M5RuleSetModel ruleSetModel, IRegressionProblemData pd, string resultName, int maxDepth,bool displayModels) {63 public static Result CreateRulesResult(M5RuleSetModel ruleSetModel, IRegressionProblemData pd, string resultName, bool displayModels) { 68 64 var res = new ResultCollection(); 69 65 var i = 0; 70 66 foreach (var rule in ruleSetModel.Rules) 71 res.Add(new Result("Rule" + i++, CreateRulesResult(rule, pd, maxDepth,displayModels, out pd)));67 res.Add(new Result("Rule" + i++, CreateRulesResult(rule, pd, displayModels, out pd))); 72 68 return new Result(resultName, res); 73 69 } … … 101 97 } 102 98 103 private static ResultCollection CreateRulesResult(M5NodeModel nodeModel, IRegressionProblemData pd, IList<int> rows, int maxDepth, bool displayModels) { 104 var res = new ResultCollection(); 105 if (!nodeModel.IsLeaf) { 106 res.Add(new Result(ConditionResultName, new StringValue(nodeModel.SplitAttr + " <= " + nodeModel.SplitValue))); 107 var assignment = pd.Dataset.GetDoubleValues(nodeModel.SplitAttr, rows).Select(x => x <= nodeModel.SplitValue).ToArray(); 108 var leftRows = Enumerable.Range(0, assignment.Length).Where(i => assignment[i]).Select(i => rows[i]).ToList(); 109 var rightRows = Enumerable.Range(0, assignment.Length).Where(i => !assignment[i]).Select(i => rows[i]).ToList(); 110 if (nodeModel.Left != null && maxDepth > 0) res.Add(new Result(LeftResultName, CreateRulesResult(nodeModel.Left, pd, leftRows, maxDepth - 1, displayModels))); 111 if (nodeModel.Right != null && maxDepth > 0) res.Add(new Result(RightResultName, CreateRulesResult(nodeModel.Right, pd, rightRows, maxDepth - 1, displayModels))); 112 } 113 if (nodeModel.NodeModel != null && displayModels) res.Add(new Result(NodeModelResultName, nodeModel.NodeModel.CreateRegressionSolution(pd))); 114 res.Add(new Result(NodeSizeResultName, new IntValue(rows.Count))); 115 return res; 116 } 117 118 private static ResultCollection CreateRulesResult(M5RuleModel m5RuleModel, IRegressionProblemData pd, int maxDepth, bool displayModels, out IRegressionProblemData notCovered) { 119 99 private static ResultCollection CreateRulesResult(M5RuleModel m5RuleModel, IRegressionProblemData pd, bool displayModels, out IRegressionProblemData notCovered) { 120 100 var training = pd.TrainingIndices.Where(x => !m5RuleModel.Covers(pd.Dataset, x)).ToArray(); 121 101 var test = pd.TestIndices.Where(x => !m5RuleModel.Covers(pd.Dataset, x)).ToArray(); … … 134 114 var res = new ResultCollection { 135 115 new Result(ConditionResultName, new StringValue(m5RuleModel.ToCompactString())), 136 new Result(CoverResultName, new IntValue(pd.TrainingIndices.Count() -training.Length))116 new Result(CoverResultName, new IntValue(pd.TrainingIndices.Count() - training.Length)) 137 117 }; 138 118 if (displayModels) res.Add(new Result(RuleModelResultName, m5RuleModel.CreateRegressionSolution(covered))); 139 119 return res; 140 }141 142 private static IEnumerable<double> Cumulate(this IEnumerable<double> values) {143 double sum = 0.0;144 foreach (var value in values) {145 sum += value;146 yield return sum;147 }148 120 } 149 121 … … 161 133 } 162 134 } 163 -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Utilities/M5CreationParameters.cs
r15430 r15470 28 28 namespace HeuristicLab.Algorithms.DataAnalysis { 29 29 internal class M5CreationParameters { 30 private readonly I ImpurityType Impurity1;30 private readonly ISplitType Impurity1; 31 31 private readonly IPruningType Pruningtype1; 32 32 private readonly ILeafType<IRegressionModel> LeafType1; … … 35 35 private readonly IRandom Random1; 36 36 private readonly ResultCollection Results1; 37 public I ImpurityType Impurity{37 public ISplitType Split { 38 38 get { return Impurity1; } 39 39 } … … 71 71 72 72 public M5CreationParameters(IPruningType pruning, int minleafSize, ILeafType<IRegressionModel> modeltype, 73 IRegressionProblemData problemData, IRandom random, I ImpurityType impurity, ResultCollection results) {74 Impurity1 = impurity;73 IRegressionProblemData problemData, IRandom random, ISplitType split, ResultCollection results) { 74 Impurity1 = split; 75 75 Pruningtype1 = pruning; 76 76 ProblemData1 = problemData; -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Utilities/M5UpdateParameters.cs
r15430 r15470 30 30 private readonly IRegressionProblemData ProblemData1; 31 31 private readonly IRandom Random1; 32 private readonly ResultCollection Results1;33 32 public ILeafType<IRegressionModel> LeafType { 34 33 get { return LeafType1; } … … 40 39 get { return Random1; } 41 40 } 42 public ResultCollection Results {43 get { return Results1; }44 }45 46 public IEnumerable<string> AllowedInputVariables {47 get { return ProblemData.AllowedInputVariables; }48 }49 public string TargetVariable {50 get { return ProblemData.TargetVariable; }51 }52 41 public IDataset Data { 53 42 get { return ProblemData.Dataset; } 54 43 } 55 44 56 public M5UpdateParameters(ILeafType<IRegressionModel> modeltype, IRegressionProblemData problemData, IRandom random , ResultCollection results) {45 public M5UpdateParameters(ILeafType<IRegressionModel> modeltype, IRegressionProblemData problemData, IRandom random) { 57 46 ProblemData1 = problemData; 58 47 Random1 = random; 59 48 LeafType1 = modeltype; 60 Results1 = results;61 49 } 62 50 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/ComponentReducedLinearModel.cs
r15430 r15470 21 21 22 22 using System.Collections.Generic; 23 using System.Linq; 23 24 using HeuristicLab.Common; 24 25 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; … … 31 32 private IConfidenceRegressionModel Model; 32 33 [Storable] 33 private PrincipleComponent AnalysisStaticPca;34 private PrincipleComponentTransformation Pca; 34 35 35 36 [StorableConstructor] … … 39 40 Pca = cloner.Clone(original.Pca); 40 41 } 41 public ComponentReducedLinearModel(string targetVariable, IConfidenceRegressionModel model, PrincipleComponent AnalysisStaticpca) : base(targetVariable) {42 public ComponentReducedLinearModel(string targetVariable, IConfidenceRegressionModel model, PrincipleComponentTransformation pca) : base(targetVariable) { 42 43 Model = model; 43 44 Pca = pca; … … 51 52 } 52 53 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 53 return Model.GetEstimatedValues(Pca.ProjectDataset(dataset), rows); 54 var data = ReduceDataset(dataset, rows.ToArray()); 55 return Model.GetEstimatedValues(Pca.TransformDataset(data), Enumerable.Range(0, data.Rows)); 54 56 } 55 57 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { … … 57 59 } 58 60 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) { 59 return Model.GetEstimatedVariances(Pca.ProjectDataset(dataset), rows); 61 var data = ReduceDataset(dataset, rows.ToArray()); 62 return Model.GetEstimatedVariances(Pca.TransformDataset(data), Enumerable.Range(0, data.Rows)); 63 } 64 65 private IDataset ReduceDataset(IDataset data, IReadOnlyList<int> rows) { 66 return new Dataset(data.DoubleVariables, data.DoubleVariables.Select(v => data.GetDoubleValues(v, rows).ToList())); 60 67 } 61 68 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/M5NodeModel.cs
r15430 r15470 56 56 [Storable] 57 57 private IReadOnlyList<string> Variables { get; set; } 58 59 private const double DevFraction = 0.05;60 58 #endregion 61 59 … … 110 108 Left = null; 111 109 NodeModel = null; 112 IsLeaf = m5CreationParams.Data.GetDoubleValues(TargetVariable, rows).StandardDeviation() < globalStdDev * DevFraction; 110 SplitAttr = null; 111 SplitValue = double.NaN; 112 string attr; 113 double splitValue; 114 //IsLeaf = m5CreationParams.Data.GetDoubleValues(TargetVariable, rows).StandardDeviation() < globalStdDev * DevFraction; 115 //if (IsLeaf) return; 116 IsLeaf = !m5CreationParams.Split.Split(new RegressionProblemData(ReduceDataset(m5CreationParams.Data, rows), Variables, TargetVariable), m5CreationParams.MinLeafSize, out attr, out splitValue); 113 117 if (IsLeaf) return; 114 //Split(rows, m5Params, globalStdDev); 115 var bestSplit = new SplitInfo(); 116 var currentSplit = new SplitInfo(); 117 118 //find best Attribute for the Split 119 foreach (var attr in m5CreationParams.AllowedInputVariables) { 120 var sortedData = m5CreationParams.Data.GetDoubleValues(attr, rows).Zip(m5CreationParams.Data.GetDoubleValues(TargetVariable, rows), Tuple.Create).OrderBy(x => x.Item1).ToArray(); 121 currentSplit.AttributeSplit(attr, sortedData.Select(x => x.Item1).ToArray(), sortedData.Select(x => x.Item2).ToArray(), m5CreationParams); 122 if (!currentSplit.MaxImpurity.IsAlmost(bestSplit.MaxImpurity) && currentSplit.MaxImpurity > bestSplit.MaxImpurity) 123 bestSplit = (SplitInfo) currentSplit.Clone(); 124 } 125 126 //if no suitable split exists => leafNode 127 if (bestSplit.SplitAttr == null || bestSplit.Position < 1 || bestSplit.Position > rows.Count - 1) { 118 119 //split Dataset 120 IReadOnlyList<int> leftRows, rightRows; 121 SplitRows(rows, m5CreationParams.Data, attr, splitValue, out leftRows, out rightRows); 122 123 if (leftRows.Count < m5CreationParams.MinLeafSize || rightRows.Count < m5CreationParams.MinLeafSize) { 128 124 IsLeaf = true; 129 125 return; 130 126 } 131 132 SplitAttr = bestSplit.SplitAttr; 133 SplitValue = bestSplit.SplitValue; 134 135 //split Dataset 136 IReadOnlyList<int> leftRows, rightRows; 137 SplitRows(rows, m5CreationParams.Data, SplitAttr, SplitValue, out leftRows, out rightRows); 138 139 if (leftRows.Count < m5CreationParams.MinLeafSize || rightRows.Count < m5CreationParams.MinLeafSize) { 140 IsLeaf = true; 141 SplitAttr = null; 142 SplitValue = double.NaN; 143 return; 144 } 127 SplitAttr = attr; 128 SplitValue = splitValue; 145 129 146 130 //create subtrees … … 220 204 221 205 private void BuildModel(IReadOnlyList<int> rows, IDataset data, IRandom random, ILeafType<IRegressionModel> leafType, CancellationToken cancellation) { 222 var reducedData = new Dataset(VariablesUsedForPrediction.Concat(new[] {TargetVariable}), VariablesUsedForPrediction.Concat(new[] {TargetVariable}).Select(x => data.GetDoubleValues(x, rows).ToList()));206 var reducedData = ReduceDataset(data, rows); 223 207 var pd = new RegressionProblemData(reducedData, VariablesUsedForPrediction, TargetVariable); 224 208 pd.TrainingPartition.Start = 0; … … 229 213 NodeModelParams = noparams; 230 214 cancellation.ThrowIfCancellationRequested(); 215 } 216 217 private IDataset ReduceDataset(IDataset data, IReadOnlyList<int> rows) { 218 return new Dataset(VariablesUsedForPrediction.Concat(new[] {TargetVariable}), VariablesUsedForPrediction.Concat(new[] {TargetVariable}).Select(x => data.GetDoubleValues(x, rows).ToList())); 231 219 } 232 220 -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/PreconstructedLinearModel.cs
r15430 r15470 24 24 using System.Linq; 25 25 using HeuristicLab.Common; 26 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;27 26 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 28 27 using HeuristicLab.Problems.DataAnalysis; 29 using HeuristicLab.Problems.DataAnalysis.Symbolic;30 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;31 28 32 29 namespace HeuristicLab.Algorithms.DataAnalysis { … … 98 95 } 99 96 100 101 97 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 102 98 return rows.Select(row => GetEstimatedValue(dataset, row)); -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/HoldoutLeafPruning.cs
r15430 r15470 30 30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 31 [StorableClass] 32 [Item("HoldoutLeafPruning", "Postpruning via a holdout set ")]32 [Item("HoldoutLeafPruning", "Postpruning via a holdout set. Pruning is done using the model type of the leaf models")] 33 33 public class HoldoutLeafPruning : PruningBase { 34 34 private const string HoldoutSizeParameterName = "HoldoutSize"; -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/M5LinearPruning.cs
r15430 r15470 34 34 private M5LinearPruning(bool deserializing) : base(deserializing) { } 35 35 private M5LinearPruning(M5LinearPruning original, Cloner cloner) : base(original, cloner) { } 36 public M5LinearPruning() : base(){ }36 public M5LinearPruning() { } 37 37 public override IDeepCloneable Clone(Cloner cloner) { 38 38 return new M5LinearPruning(this, cloner); -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/NoPruning.cs
r15430 r15470 34 34 private NoPruning(bool deserializing) : base(deserializing) { } 35 35 private NoPruning(NoPruning original, Cloner cloner) : base(original, cloner) { } 36 public NoPruning() : base() { } 36 public NoPruning() { 37 PruningStrengthParameter.Hidden = true; 38 } 37 39 public override IDeepCloneable Clone(Cloner cloner) { 38 40 return new NoPruning(this, cloner); -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/PruningBase.cs
r15430 r15470 45 45 protected PruningBase(bool deserializing) : base(deserializing) { } 46 46 protected PruningBase(PruningBase original, Cloner cloner) : base(original, cloner) { } 47 protected PruningBase() : base(){47 protected PruningBase() { 48 48 Parameters.Add(new FixedValueParameter<DoubleValue>(PruningStrengthParameterName, "The strength of the pruning. Higher values force the algorithm to create simpler models", new DoubleValue(2.0))); 49 49 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/OrderImpurityCalculator.cs
r15430 r15470 27 27 namespace HeuristicLab.Algorithms.DataAnalysis { 28 28 /// <summary> 29 /// Helper class for incremental impuritycalculation.29 /// Helper class for incremental split calculation. 30 30 /// Used while moving a potential Split along the ordered training Instances 31 31 /// </summary> 32 internal class OrderImpurityCalculator : IImpurityCalculator { 32 internal class OrderImpurityCalculator { 33 internal enum IncrementType { 34 Left, 35 Right, 36 None 37 } 38 33 39 #region Properties 34 40 private double SqSumLeft { get; set; } … … 99 105 VarRight = NoRight <= 0 ? 0 : Math.Abs(NoRight * SqSumRight - SumRight * SumRight) / (NoRight * NoRight); 100 106 101 if (Order <= 0) throw new ArgumentException(" Impurityorder must be larger than 0");107 if (Order <= 0) throw new ArgumentException("Split order must be larger than 0"); 102 108 if (Order.IsAlmost(1)) { 103 109 y = VarTotal; … … 111 117 } 112 118 var t = NoRight + NoLeft; 113 if (NoLeft <= 0.0 || NoRight <= 0.0) Impurity = double.MinValue; // Impurity= 0;114 else Impurity = y - NoLeft / t * yl - NoRight / t * yr; // Impurity= y - NoLeft / NoRight * yl - NoRight / NoLeft * yr119 if (NoLeft <= 0.0 || NoRight <= 0.0) Impurity = double.MinValue; //Split = 0; 120 else Impurity = y - NoLeft / t * yl - NoRight / t * yr; // Split = y - NoLeft / NoRight * yl - NoRight / NoLeft * yr 115 121 } 116 122 #endregion
Note: See TracChangeset
for help on using the changeset viewer.