Changeset 15470


Ignore:
Timestamp:
11/10/17 12:56:36 (21 months ago)
Author:
bwerth
Message:

#2847 worked on M5Regression

Location:
branches/M5Regression
Files:
4 added
5 deleted
15 edited

Legend:

Unmodified
Added
Removed
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/FixedDataAnalysisAlgorithm.cs

    r15430 r15470  
    2828namespace HeuristicLab.Algorithms.DataAnalysis {
    2929  [StorableClass]
    30   public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm where T : class, IDataAnalysisProblem {
     30  public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm, IDataAnalysisAlgorithm<T> where T : class, IDataAnalysisProblem {
    3131    #region Properties
    3232    public override Type ProblemType {
     
    3434    }
    3535    public new T Problem {
    36       get { return (T)base.Problem; }
     36      get { return (T) base.Problem; }
    3737      set { base.Problem = value; }
    3838    }
    3939    #endregion
    4040
    41     public override bool SupportsPause { get { return false; } }
     41    public override bool SupportsPause {
     42      get { return false; }
     43    }
    4244
    4345    [StorableConstructor]
     
    4547    protected FixedDataAnalysisAlgorithm(FixedDataAnalysisAlgorithm<T> original, Cloner cloner) : base(original, cloner) { }
    4648    public FixedDataAnalysisAlgorithm() : base() { }
    47 
    4849  }
    4950}
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r15430 r15470  
    361361    <Compile Include="Linear\MultinomialLogitModel.cs" />
    362362    <Compile Include="Linear\Scaling.cs" />
    363     <Compile Include="M5Regression\Interfaces\IImpurityCalculator.cs" />
    364     <Compile Include="M5Regression\Interfaces\IImpurityType.cs" />
     363    <Compile Include="M5Regression\Interfaces\ISplitType.cs" />
    365364    <Compile Include="M5Regression\Interfaces\IM5MetaModel.cs" />
    366365    <Compile Include="M5Regression\Interfaces\ILeafType.cs" />
     
    377376    <Compile Include="M5Regression\M5Utilities\M5CreationParameters.cs" />
    378377    <Compile Include="M5Regression\M5Utilities\M5UpdateParameters.cs" />
    379     <Compile Include="M5Regression\M5Utilities\SplitInfo.cs" />
    380378    <Compile Include="M5Regression\MetaModels\ComponentReducedLinearModel.cs" />
    381379    <Compile Include="M5Regression\MetaModels\M5NodeModel.cs" />
     
    392390    <Compile Include="M5Regression\Pruning\M5LeafPruning.cs" />
    393391    <Compile Include="M5Regression\Spliting\OrderImpurityCalculator.cs" />
    394     <Compile Include="M5Regression\Spliting\OrderImpurityType.cs" />
     392    <Compile Include="M5Regression\Spliting\OrderSplitType.cs" />
    395393    <Compile Include="Nca\Initialization\INcaInitializer.cs" />
    396394    <Compile Include="Nca\Initialization\LdaInitializer.cs" />
     
    423421    <Compile Include="NeuralNetwork\NeuralNetworkRegressionSolution.cs" />
    424422    <Compile Include="NonlinearRegression\NonlinearRegression.cs" />
    425     <Compile Include="PCA\PrincipleComponentAnalysis.cs" />
    426     <Compile Include="PCA\PrincipleComponentAnalysisStatic.cs" />
     423    <Compile Include="M5Regression\M5Utilities\PrincipleComponentTransformation.cs" />
    427424    <Compile Include="Plugin.cs" />
    428425    <Compile Include="Properties\AssemblyInfo.cs" />
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComponentReductionLinearLeaf.cs

    r15430 r15470  
    5858    public IConfidenceRegressionModel BuildModel(IRegressionProblemData pd, IRandom random,
    5959      CancellationToken cancellation, out int noParameters) {
    60       var pca = PrincipleComponentAnalysisStatic.Create(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, true);
    61       var pcdata = pca.ProjectProblem(pd);
     60      var pca = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, true);
     61      var pcdata = pca.TransformProblemData(pd);
    6262      ComponentReducedLinearModel bestModel = null;
    6363      var bestCvrmse = double.MaxValue;
     
    6565      for (var i = 1; i <= Math.Min(NoComponents, pd.AllowedInputVariables.Count()); i++) {
    6666        var pd2 = (IRegressionProblemData) pcdata.Clone();
    67         var inputs = new HashSet<string>(pca.Names.Take(i));
     67        var inputs = new HashSet<string>(pca.ComponentNames.Take(i));
    6868        foreach (var v in pd2.InputVariables.CheckedItems.ToArray())
    6969          pd2.InputVariables.SetItemCheckedState(v.Value, inputs.Contains(v.Value.Value));
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Regression.cs

    r15430 r15470  
    2020    #region Parametername
    2121    private const string GenerateRulesParameterName = "GenerateRules";
    22     private const string ImpurityParameterName = "Impurity";
     22    private const string ImpurityParameterName = "Split";
    2323    private const string MinimalNodeSizeParameterName = "MinimalNodeSize";
    2424    private const string ModelTypeParameterName = "ModelType";
     
    3232      get { return Parameters[GenerateRulesParameterName] as IFixedValueParameter<BoolValue>; }
    3333    }
    34     public IConstrainedValueParameter<IImpurityType> ImpurityParameter {
    35       get { return Parameters[ImpurityParameterName] as IConstrainedValueParameter<IImpurityType>; }
     34    public IConstrainedValueParameter<ISplitType> ImpurityParameter {
     35      get { return Parameters[ImpurityParameterName] as IConstrainedValueParameter<ISplitType>; }
    3636    }
    3737    public IFixedValueParameter<IntValue> MinimalNodeSizeParameter {
     
    5656      get { return GenerateRulesParameter.Value.Value; }
    5757    }
    58     public IImpurityType Impurity {
     58    public ISplitType Split {
    5959      get { return ImpurityParameter.Value; }
    6060    }
     
    8383      var modelSet = new ItemSet<ILeafType<IRegressionModel>>(ApplicationManager.Manager.GetInstances<ILeafType<IRegressionModel>>());
    8484      var pruningSet = new ItemSet<IPruningType>(ApplicationManager.Manager.GetInstances<IPruningType>());
    85       var impuritySet = new ItemSet<IImpurityType>(ApplicationManager.Manager.GetInstances<IImpurityType>());
     85      var impuritySet = new ItemSet<ISplitType>(ApplicationManager.Manager.GetInstances<ISplitType>());
    8686      Parameters.Add(new FixedValueParameter<BoolValue>(GenerateRulesParameterName, "Whether a set of rules or a decision tree shall be created", new BoolValue(true)));
    87       Parameters.Add(new ConstrainedValueParameter<IImpurityType>(ImpurityParameterName, "The type of impurity function used to create node splits", impuritySet, impuritySet.OfType<OrderImpurityType>().First()));
     87      Parameters.Add(new ConstrainedValueParameter<ISplitType>(ImpurityParameterName, "The type of split function used to create node splits", impuritySet, impuritySet.OfType<OrderSplitType>().First()));
    8888      Parameters.Add(new FixedValueParameter<IntValue>(MinimalNodeSizeParameterName, "The minimal number of samples in a leaf node", new IntValue(1)));
    8989      Parameters.Add(new ConstrainedValueParameter<ILeafType<IRegressionModel>>(ModelTypeParameterName, "The type of model used for the nodes", modelSet, modelSet.OfType<LinearLeaf>().First()));
     
    102102      if (SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next();
    103103      random.Reset(Seed);
    104       var solution = CreateM5RegressionSolution(Problem.ProblemData, random, LeafType, Impurity, PruningType, cancellationToken, MinimalNodeSize, GenerateRules, Results);
     104      var solution = CreateM5RegressionSolution(Problem.ProblemData, random, LeafType, Split, PruningType, cancellationToken, MinimalNodeSize, GenerateRules, Results);
    105105      AnalyzeSolution(solution);
    106106    }
     
    108108    #region Static Interface
    109109    public static IRegressionSolution CreateM5RegressionSolution(IRegressionProblemData problemData, IRandom random,
    110       ILeafType<IRegressionModel> leafType = null, IImpurityType impurityType = null, IPruningType pruningType = null,
     110      ILeafType<IRegressionModel> leafType = null, ISplitType splitType = null, IPruningType pruningType = null,
    111111      CancellationToken? cancellationToken = null, int minNumInstances = 4, bool generateRules = false, ResultCollection results = null) {
    112112      //set default values
    113113      if (leafType == null) leafType = new LinearLeaf();
    114       if (impurityType == null) impurityType = new OrderImpurityType();
     114      if (splitType == null) splitType = new OrderSplitType();
    115115      if (cancellationToken == null) cancellationToken = CancellationToken.None;
    116116      if (pruningType == null) pruningType = new M5LeafPruning();
     
    130130
    131131      //create & build Model
    132       var m5Params = new M5CreationParameters(pruningType, minNumInstances, leafType, pd, random, impurityType, results);
     132      var m5Params = new M5CreationParameters(pruningType, minNumInstances, leafType, pd, random, splitType, results);
    133133
    134134      IReadOnlyList<int> t, h;
     
    148148
    149149    public static void UpdateM5Model(M5TreeModel model, IRegressionProblemData problemData, IRandom random,
    150       ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null, ResultCollection results = null) {
    151       UpdateM5Model(model as IM5MetaModel, problemData, random, leafType, cancellationToken, results);
     150      ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null) {
     151      UpdateM5Model(model as IM5MetaModel, problemData, random, leafType, cancellationToken);
    152152    }
    153153
    154154    public static void UpdateM5Model(M5RuleSetModel model, IRegressionProblemData problemData, IRandom random,
    155       ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null, ResultCollection results = null) {
    156       UpdateM5Model(model as IM5MetaModel, problemData, random, leafType, cancellationToken, results);
     155      ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null) {
     156      UpdateM5Model(model as IM5MetaModel, problemData, random, leafType, cancellationToken);
    157157    }
    158158
    159159    private static void UpdateM5Model(IM5MetaModel model, IRegressionProblemData problemData, IRandom random,
    160       ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null, ResultCollection results = null) {
     160      ILeafType<IRegressionModel> leafType = null, CancellationToken? cancellationToken = null) {
    161161      if (cancellationToken == null) cancellationToken = CancellationToken.None;
    162       var m5Params = new M5UpdateParameters(leafType, problemData, random, results);
     162      var m5Params = new M5UpdateParameters(leafType, problemData, random);
    163163      model.UpdateModel(problemData.TrainingIndices.ToList(), m5Params, cancellationToken.Value);
    164164    }
     
    175175      }
    176176      else {
    177         Results.Add(M5Analyzer.CreateRulesResult((M5RuleSetModel) solution.Model, Problem.ProblemData, "M5TreeResult", 0, true));
     177        Results.Add(M5Analyzer.CreateRulesResult((M5RuleSetModel) solution.Model, Problem.ProblemData, "M5TreeResult", true));
    178178        frequencies = M5Analyzer.GetRuleVariableFrequences((M5RuleSetModel) solution.Model);
    179179        Results.Add(M5Analyzer.CreateCoverageDiagram((M5RuleSetModel) solution.Model, Problem.ProblemData));
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Utilities/M5Analyzer.cs

    r15430 r15470  
    2929namespace HeuristicLab.Algorithms.DataAnalysis {
    3030  internal static class M5Analyzer {
    31     private const string LeftResultName = "Left";
    32     private const string RightResultName = "Right";
    3331    private const string ConditionResultName = "Condition";
    3432    private const string CoverResultName = "Covered Instances";
    3533    private const string CoverageDiagramResultName = "Coverage";
    36     private const string NodeModelResultName = "NodeModel";
    37     private const string NodeSizeResultName = "NodeSize";
    3834    private const string RuleModelResultName = "RuleModel";
    3935
     
    4137      var res = ruleSetModel.VariablesUsedForPrediction.ToDictionary(x => x, x => 0);
    4238      foreach (var rule in ruleSetModel.Rules)
    43         foreach (var att in rule.SplitAtts)
    44           res[att]++;
     39      foreach (var att in rule.SplitAtts)
     40        res[att]++;
    4541      return res;
    4642    }
     
    5753      var list = new List<int>();
    5854      GetLeafDepths(treeModel.Root, 0, list);
    59       var row = new DataRow("Depths", "", list.Select(x => (double)x)) {
    60         VisualProperties = { ChartType = DataRowVisualProperties.DataRowChartType.Histogram }
     55      var row = new DataRow("Depths", "", list.Select(x => (double) x)) {
     56        VisualProperties = {ChartType = DataRowVisualProperties.DataRowChartType.Histogram}
    6157      };
    6258      var hist = new DataTable("LeafDepths");
     
    6561    }
    6662
    67     public static Result CreateRulesResult(M5RuleSetModel ruleSetModel, IRegressionProblemData pd, string resultName, int maxDepth, bool displayModels) {
     63    public static Result CreateRulesResult(M5RuleSetModel ruleSetModel, IRegressionProblemData pd, string resultName, bool displayModels) {
    6864      var res = new ResultCollection();
    6965      var i = 0;
    7066      foreach (var rule in ruleSetModel.Rules)
    71         res.Add(new Result("Rule" + i++, CreateRulesResult(rule, pd, maxDepth, displayModels, out pd)));
     67        res.Add(new Result("Rule" + i++, CreateRulesResult(rule, pd, displayModels, out pd)));
    7268      return new Result(resultName, res);
    7369    }
     
    10197    }
    10298
    103     private static ResultCollection CreateRulesResult(M5NodeModel nodeModel, IRegressionProblemData pd, IList<int> rows, int maxDepth, bool displayModels) {
    104       var res = new ResultCollection();
    105       if (!nodeModel.IsLeaf) {
    106         res.Add(new Result(ConditionResultName, new StringValue(nodeModel.SplitAttr + " <= " + nodeModel.SplitValue)));
    107         var assignment = pd.Dataset.GetDoubleValues(nodeModel.SplitAttr, rows).Select(x => x <= nodeModel.SplitValue).ToArray();
    108         var leftRows = Enumerable.Range(0, assignment.Length).Where(i => assignment[i]).Select(i => rows[i]).ToList();
    109         var rightRows = Enumerable.Range(0, assignment.Length).Where(i => !assignment[i]).Select(i => rows[i]).ToList();
    110         if (nodeModel.Left != null && maxDepth > 0) res.Add(new Result(LeftResultName, CreateRulesResult(nodeModel.Left, pd, leftRows, maxDepth - 1, displayModels)));
    111         if (nodeModel.Right != null && maxDepth > 0) res.Add(new Result(RightResultName, CreateRulesResult(nodeModel.Right, pd, rightRows, maxDepth - 1, displayModels)));
    112       }
    113       if (nodeModel.NodeModel != null && displayModels) res.Add(new Result(NodeModelResultName, nodeModel.NodeModel.CreateRegressionSolution(pd)));
    114       res.Add(new Result(NodeSizeResultName, new IntValue(rows.Count)));
    115       return res;
    116     }
    117 
    118     private static ResultCollection CreateRulesResult(M5RuleModel m5RuleModel, IRegressionProblemData pd, int maxDepth, bool displayModels, out IRegressionProblemData notCovered) {
    119 
     99    private static ResultCollection CreateRulesResult(M5RuleModel m5RuleModel, IRegressionProblemData pd, bool displayModels, out IRegressionProblemData notCovered) {
    120100      var training = pd.TrainingIndices.Where(x => !m5RuleModel.Covers(pd.Dataset, x)).ToArray();
    121101      var test = pd.TestIndices.Where(x => !m5RuleModel.Covers(pd.Dataset, x)).ToArray();
     
    134114      var res = new ResultCollection {
    135115        new Result(ConditionResultName, new StringValue(m5RuleModel.ToCompactString())),
    136         new Result(CoverResultName, new IntValue(pd.TrainingIndices.Count()-training.Length))
     116        new Result(CoverResultName, new IntValue(pd.TrainingIndices.Count() - training.Length))
    137117      };
    138118      if (displayModels) res.Add(new Result(RuleModelResultName, m5RuleModel.CreateRegressionSolution(covered)));
    139119      return res;
    140     }
    141 
    142     private static IEnumerable<double> Cumulate(this IEnumerable<double> values) {
    143       double sum = 0.0;
    144       foreach (var value in values) {
    145         sum += value;
    146         yield return sum;
    147       }
    148120    }
    149121
     
    161133  }
    162134}
    163 
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Utilities/M5CreationParameters.cs

    r15430 r15470  
    2828namespace HeuristicLab.Algorithms.DataAnalysis {
    2929  internal class M5CreationParameters {
    30     private readonly IImpurityType Impurity1;
     30    private readonly ISplitType Impurity1;
    3131    private readonly IPruningType Pruningtype1;
    3232    private readonly ILeafType<IRegressionModel> LeafType1;
     
    3535    private readonly IRandom Random1;
    3636    private readonly ResultCollection Results1;
    37     public IImpurityType Impurity {
     37    public ISplitType Split {
    3838      get { return Impurity1; }
    3939    }
     
    7171
    7272    public M5CreationParameters(IPruningType pruning, int minleafSize, ILeafType<IRegressionModel> modeltype,
    73       IRegressionProblemData problemData, IRandom random, IImpurityType impurity, ResultCollection results) {
    74       Impurity1 = impurity;
     73      IRegressionProblemData problemData, IRandom random, ISplitType split, ResultCollection results) {
     74      Impurity1 = split;
    7575      Pruningtype1 = pruning;
    7676      ProblemData1 = problemData;
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Utilities/M5UpdateParameters.cs

    r15430 r15470  
    3030    private readonly IRegressionProblemData ProblemData1;
    3131    private readonly IRandom Random1;
    32     private readonly ResultCollection Results1;
    3332    public ILeafType<IRegressionModel> LeafType {
    3433      get { return LeafType1; }
     
    4039      get { return Random1; }
    4140    }
    42     public ResultCollection Results {
    43       get { return Results1; }
    44     }
    45 
    46     public IEnumerable<string> AllowedInputVariables {
    47       get { return ProblemData.AllowedInputVariables; }
    48     }
    49     public string TargetVariable {
    50       get { return ProblemData.TargetVariable; }
    51     }
    5241    public IDataset Data {
    5342      get { return ProblemData.Dataset; }
    5443    }
    5544
    56     public M5UpdateParameters(ILeafType<IRegressionModel> modeltype, IRegressionProblemData problemData, IRandom random, ResultCollection results) {
     45    public M5UpdateParameters(ILeafType<IRegressionModel> modeltype, IRegressionProblemData problemData, IRandom random) {
    5746      ProblemData1 = problemData;
    5847      Random1 = random;
    5948      LeafType1 = modeltype;
    60       Results1 = results;
    6149    }
    6250  }
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/ComponentReducedLinearModel.cs

    r15430 r15470  
    2121
    2222using System.Collections.Generic;
     23using System.Linq;
    2324using HeuristicLab.Common;
    2425using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     
    3132    private IConfidenceRegressionModel Model;
    3233    [Storable]
    33     private PrincipleComponentAnalysisStatic Pca;
     34    private PrincipleComponentTransformation Pca;
    3435
    3536    [StorableConstructor]
     
    3940      Pca = cloner.Clone(original.Pca);
    4041    }
    41     public ComponentReducedLinearModel(string targetVariable, IConfidenceRegressionModel model, PrincipleComponentAnalysisStatic pca) : base(targetVariable) {
     42    public ComponentReducedLinearModel(string targetVariable, IConfidenceRegressionModel model, PrincipleComponentTransformation pca) : base(targetVariable) {
    4243      Model = model;
    4344      Pca = pca;
     
    5152    }
    5253    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    53       return Model.GetEstimatedValues(Pca.ProjectDataset(dataset), rows);
     54      var data = ReduceDataset(dataset, rows.ToArray());
     55      return Model.GetEstimatedValues(Pca.TransformDataset(data), Enumerable.Range(0, data.Rows));
    5456    }
    5557    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
     
    5759    }
    5860    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
    59       return Model.GetEstimatedVariances(Pca.ProjectDataset(dataset), rows);
     61      var data = ReduceDataset(dataset, rows.ToArray());
     62      return Model.GetEstimatedVariances(Pca.TransformDataset(data), Enumerable.Range(0, data.Rows));
     63    }
     64
     65    private IDataset ReduceDataset(IDataset data, IReadOnlyList<int> rows) {
     66      return new Dataset(data.DoubleVariables, data.DoubleVariables.Select(v => data.GetDoubleValues(v, rows).ToList()));
    6067    }
    6168  }
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/M5NodeModel.cs

    r15430 r15470  
    5656    [Storable]
    5757    private IReadOnlyList<string> Variables { get; set; }
    58 
    59     private const double DevFraction = 0.05;
    6058    #endregion
    6159
     
    110108      Left = null;
    111109      NodeModel = null;
    112       IsLeaf = m5CreationParams.Data.GetDoubleValues(TargetVariable, rows).StandardDeviation() < globalStdDev * DevFraction;
     110      SplitAttr = null;
     111      SplitValue = double.NaN;
     112      string attr;
     113      double splitValue;
     114      //IsLeaf = m5CreationParams.Data.GetDoubleValues(TargetVariable, rows).StandardDeviation() < globalStdDev * DevFraction;
     115      //if (IsLeaf) return;
     116      IsLeaf = !m5CreationParams.Split.Split(new RegressionProblemData(ReduceDataset(m5CreationParams.Data, rows), Variables, TargetVariable), m5CreationParams.MinLeafSize, out attr, out splitValue);
    113117      if (IsLeaf) return;
    114       //Split(rows, m5Params, globalStdDev);
    115       var bestSplit = new SplitInfo();
    116       var currentSplit = new SplitInfo();
    117 
    118       //find best Attribute for the Split
    119       foreach (var attr in m5CreationParams.AllowedInputVariables) {
    120         var sortedData = m5CreationParams.Data.GetDoubleValues(attr, rows).Zip(m5CreationParams.Data.GetDoubleValues(TargetVariable, rows), Tuple.Create).OrderBy(x => x.Item1).ToArray();
    121         currentSplit.AttributeSplit(attr, sortedData.Select(x => x.Item1).ToArray(), sortedData.Select(x => x.Item2).ToArray(), m5CreationParams);
    122         if (!currentSplit.MaxImpurity.IsAlmost(bestSplit.MaxImpurity) && currentSplit.MaxImpurity > bestSplit.MaxImpurity)
    123           bestSplit = (SplitInfo) currentSplit.Clone();
    124       }
    125 
    126       //if no suitable split exists => leafNode
    127       if (bestSplit.SplitAttr == null || bestSplit.Position < 1 || bestSplit.Position > rows.Count - 1) {
     118
     119      //split Dataset
     120      IReadOnlyList<int> leftRows, rightRows;
     121      SplitRows(rows, m5CreationParams.Data, attr, splitValue, out leftRows, out rightRows);
     122
     123      if (leftRows.Count < m5CreationParams.MinLeafSize || rightRows.Count < m5CreationParams.MinLeafSize) {
    128124        IsLeaf = true;
    129125        return;
    130126      }
    131 
    132       SplitAttr = bestSplit.SplitAttr;
    133       SplitValue = bestSplit.SplitValue;
    134 
    135       //split Dataset
    136       IReadOnlyList<int> leftRows, rightRows;
    137       SplitRows(rows, m5CreationParams.Data, SplitAttr, SplitValue, out leftRows, out rightRows);
    138 
    139       if (leftRows.Count < m5CreationParams.MinLeafSize || rightRows.Count < m5CreationParams.MinLeafSize) {
    140         IsLeaf = true;
    141         SplitAttr = null;
    142         SplitValue = double.NaN;
    143         return;
    144       }
     127      SplitAttr = attr;
     128      SplitValue = splitValue;
    145129
    146130      //create subtrees
     
    220204
    221205    private void BuildModel(IReadOnlyList<int> rows, IDataset data, IRandom random, ILeafType<IRegressionModel> leafType, CancellationToken cancellation) {
    222       var reducedData = new Dataset(VariablesUsedForPrediction.Concat(new[] {TargetVariable}), VariablesUsedForPrediction.Concat(new[] {TargetVariable}).Select(x => data.GetDoubleValues(x, rows).ToList()));
     206      var reducedData = ReduceDataset(data, rows);
    223207      var pd = new RegressionProblemData(reducedData, VariablesUsedForPrediction, TargetVariable);
    224208      pd.TrainingPartition.Start = 0;
     
    229213      NodeModelParams = noparams;
    230214      cancellation.ThrowIfCancellationRequested();
     215    }
     216
     217    private IDataset ReduceDataset(IDataset data, IReadOnlyList<int> rows) {
     218      return new Dataset(VariablesUsedForPrediction.Concat(new[] {TargetVariable}), VariablesUsedForPrediction.Concat(new[] {TargetVariable}).Select(x => data.GetDoubleValues(x, rows).ToList()));
    231219    }
    232220
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/PreconstructedLinearModel.cs

    r15430 r15470  
    2424using System.Linq;
    2525using HeuristicLab.Common;
    26 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2726using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2827using HeuristicLab.Problems.DataAnalysis;
    29 using HeuristicLab.Problems.DataAnalysis.Symbolic;
    30 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
    3128
    3229namespace HeuristicLab.Algorithms.DataAnalysis {
     
    9895    }
    9996
    100 
    10197    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    10298      return rows.Select(row => GetEstimatedValue(dataset, row));
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/HoldoutLeafPruning.cs

    r15430 r15470  
    3030namespace HeuristicLab.Algorithms.DataAnalysis {
    3131  [StorableClass]
    32   [Item("HoldoutLeafPruning", "Postpruning via a holdout set")]
     32  [Item("HoldoutLeafPruning", "Postpruning via a holdout set. Pruning is done using the model type of the leaf models")]
    3333  public class HoldoutLeafPruning : PruningBase {
    3434    private const string HoldoutSizeParameterName = "HoldoutSize";
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/M5LinearPruning.cs

    r15430 r15470  
    3434    private M5LinearPruning(bool deserializing) : base(deserializing) { }
    3535    private M5LinearPruning(M5LinearPruning original, Cloner cloner) : base(original, cloner) { }
    36     public M5LinearPruning() : base() { }
     36    public M5LinearPruning() { }
    3737    public override IDeepCloneable Clone(Cloner cloner) {
    3838      return new M5LinearPruning(this, cloner);
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/NoPruning.cs

    r15430 r15470  
    3434    private NoPruning(bool deserializing) : base(deserializing) { }
    3535    private NoPruning(NoPruning original, Cloner cloner) : base(original, cloner) { }
    36     public NoPruning() : base() { }
     36    public NoPruning() {
     37      PruningStrengthParameter.Hidden = true;
     38    }
    3739    public override IDeepCloneable Clone(Cloner cloner) {
    3840      return new NoPruning(this, cloner);
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/PruningBase.cs

    r15430 r15470  
    4545    protected PruningBase(bool deserializing) : base(deserializing) { }
    4646    protected PruningBase(PruningBase original, Cloner cloner) : base(original, cloner) { }
    47     protected PruningBase() : base() {
     47    protected PruningBase() {
    4848      Parameters.Add(new FixedValueParameter<DoubleValue>(PruningStrengthParameterName, "The strength of the pruning. Higher values force the algorithm to create simpler models", new DoubleValue(2.0)));
    4949    }
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/OrderImpurityCalculator.cs

    r15430 r15470  
    2727namespace HeuristicLab.Algorithms.DataAnalysis {
    2828  /// <summary>
    29   /// Helper class for incremental impurity calculation.
     29  /// Helper class for incremental split calculation.
    3030  /// Used while moving a potential Split along the ordered training Instances
    3131  /// </summary>
    32   internal class OrderImpurityCalculator : IImpurityCalculator {
     32  internal class OrderImpurityCalculator {
     33    internal enum IncrementType {
     34      Left,
     35      Right,
     36      None
     37    }
     38
    3339    #region Properties
    3440    private double SqSumLeft { get; set; }
     
    99105      VarRight = NoRight <= 0 ? 0 : Math.Abs(NoRight * SqSumRight - SumRight * SumRight) / (NoRight * NoRight);
    100106
    101       if (Order <= 0) throw new ArgumentException("Impurity order must be larger than 0");
     107      if (Order <= 0) throw new ArgumentException("Split order must be larger than 0");
    102108      if (Order.IsAlmost(1)) {
    103109        y = VarTotal;
     
    111117      }
    112118      var t = NoRight + NoLeft;
    113       if (NoLeft <= 0.0 || NoRight <= 0.0) Impurity = double.MinValue; //Impurity = 0;
    114       else Impurity = y - NoLeft / t * yl - NoRight / t * yr; //  Impurity = y - NoLeft / NoRight * yl - NoRight / NoLeft * yr
     119      if (NoLeft <= 0.0 || NoRight <= 0.0) Impurity = double.MinValue; //Split = 0;
     120      else Impurity = y - NoLeft / t * yl - NoRight / t * yr; //  Split = y - NoLeft / NoRight * yl - NoRight / NoLeft * yr
    115121    }
    116122    #endregion
Note: See TracChangeset for help on using the changeset viewer.