Changeset 15830
- Timestamp:
- 03/08/18 08:46:40 (7 years ago)
- Location:
- branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 23 added
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r15614 r15830 43 43 <DebugType>full</DebugType> 44 44 <Optimize>false</Optimize> 45 <OutputPath>..\..\..\..\trunk\ sources\bin\</OutputPath>45 <OutputPath>..\..\..\..\trunk\bin\</OutputPath> 46 46 <DefineConstants>DEBUG;TRACE</DefineConstants> 47 47 <ErrorReport>prompt</ErrorReport> … … 54 54 <DebugType>pdbonly</DebugType> 55 55 <Optimize>true</Optimize> 56 <OutputPath>..\..\..\..\trunk\ sources\bin\</OutputPath>56 <OutputPath>..\..\..\..\trunk\bin\</OutputPath> 57 57 <DefineConstants>TRACE</DefineConstants> 58 58 <ErrorReport>prompt</ErrorReport> … … 65 65 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' "> 66 66 <DebugSymbols>true</DebugSymbols> 67 <OutputPath>..\..\..\..\trunk\ sources\bin\</OutputPath>67 <OutputPath>..\..\..\..\trunk\bin\</OutputPath> 68 68 <DefineConstants>DEBUG;TRACE</DefineConstants> 69 69 <DebugType>full</DebugType> … … 74 74 </PropertyGroup> 75 75 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' "> 76 <OutputPath>..\..\..\..\trunk\ sources\bin\</OutputPath>76 <OutputPath>..\..\..\..\trunk\bin\</OutputPath> 77 77 <DefineConstants>TRACE</DefineConstants> 78 78 <DocumentationFile> … … 87 87 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' "> 88 88 <DebugSymbols>true</DebugSymbols> 89 <OutputPath>..\..\..\..\trunk\ sources\bin\</OutputPath>89 <OutputPath>..\..\..\..\trunk\bin\</OutputPath> 90 90 <DefineConstants>DEBUG;TRACE</DefineConstants> 91 91 <DebugType>full</DebugType> … … 96 96 </PropertyGroup> 97 97 <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' "> 98 <OutputPath>..\..\..\..\trunk\ sources\bin\</OutputPath>98 <OutputPath>..\..\..\..\trunk\bin\</OutputPath> 99 99 <DefineConstants>TRACE</DefineConstants> 100 100 <DocumentationFile> … … 108 108 </PropertyGroup> 109 109 <ItemGroup> 110 <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 111 <HintPath>..\..\bin\ALGLIB-3.7.0.dll</HintPath> 112 <Private>False</Private> 113 </Reference> 114 <Reference Include="HeuristicLab.Algorithms.GradientDescent-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 115 <SpecificVersion>False</SpecificVersion> 116 <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Algorithms.GradientDescent-3.3.dll</HintPath> 110 <Reference Include="ALGLIB-3.7.0"> 111 <HintPath>..\..\..\..\trunk\bin\ALGLIB-3.7.0.dll</HintPath> 112 </Reference> 113 <Reference Include="HeuristicLab.Algorithms.DataAnalysis.Glmnet-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 114 <SpecificVersion>False</SpecificVersion> 115 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Algorithms.DataAnalysis.Glmnet-3.4.dll</HintPath> 116 </Reference> 117 <Reference Include="HeuristicLab.Algorithms.GradientDescent-3.3"> 118 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Algorithms.GradientDescent-3.3.dll</HintPath> 117 119 </Reference> 118 120 <Reference Include="HeuristicLab.Algorithms.OffspringSelectionGeneticAlgorithm-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 119 121 <SpecificVersion>False</SpecificVersion> 120 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Algorithms.OffspringSelectionGeneticAlgorithm-3.3.dll</HintPath>122 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Algorithms.OffspringSelectionGeneticAlgorithm-3.3.dll</HintPath> 121 123 </Reference> 122 124 <Reference Include="HeuristicLab.Analysis-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 123 125 <SpecificVersion>False</SpecificVersion> 124 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Analysis-3.3.dll</HintPath>126 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Analysis-3.3.dll</HintPath> 125 127 </Reference> 126 128 <Reference Include="HeuristicLab.Collections-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 127 129 <SpecificVersion>False</SpecificVersion> 128 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Collections-3.3.dll</HintPath>130 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Collections-3.3.dll</HintPath> 129 131 </Reference> 130 132 <Reference Include="HeuristicLab.Common-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 131 133 <SpecificVersion>False</SpecificVersion> 132 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Common-3.3.dll</HintPath>134 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Common-3.3.dll</HintPath> 133 135 </Reference> 134 136 <Reference Include="HeuristicLab.Common.Resources-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 135 137 <SpecificVersion>False</SpecificVersion> 136 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Common.Resources-3.3.dll</HintPath>138 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Common.Resources-3.3.dll</HintPath> 137 139 </Reference> 138 140 <Reference Include="HeuristicLab.Core-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 139 141 <SpecificVersion>False</SpecificVersion> 140 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Core-3.3.dll</HintPath>142 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Core-3.3.dll</HintPath> 141 143 </Reference> 142 144 <Reference Include="HeuristicLab.Data-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 143 145 <SpecificVersion>False</SpecificVersion> 144 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Data-3.3.dll</HintPath>146 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Data-3.3.dll</HintPath> 145 147 </Reference> 146 148 <Reference Include="HeuristicLab.Encodings.PermutationEncoding-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 147 149 <SpecificVersion>False</SpecificVersion> 148 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Encodings.PermutationEncoding-3.3.dll</HintPath>150 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Encodings.PermutationEncoding-3.3.dll</HintPath> 149 151 </Reference> 150 152 <Reference Include="HeuristicLab.Encodings.RealVectorEncoding-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 151 153 <SpecificVersion>False</SpecificVersion> 152 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Encodings.RealVectorEncoding-3.3.dll</HintPath>154 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Encodings.RealVectorEncoding-3.3.dll</HintPath> 153 155 </Reference> 154 156 <Reference Include="HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 155 157 <SpecificVersion>False</SpecificVersion> 156 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4.dll</HintPath>158 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4.dll</HintPath> 157 159 </Reference> 158 160 <Reference Include="HeuristicLab.LibSVM-3.12, Version=3.12.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 159 161 <SpecificVersion>False</SpecificVersion> 160 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.LibSVM-3.12.dll</HintPath>162 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.LibSVM-3.12.dll</HintPath> 161 163 </Reference> 162 164 <Reference Include="HeuristicLab.Operators-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 163 165 <SpecificVersion>False</SpecificVersion> 164 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Operators-3.3.dll</HintPath>166 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Operators-3.3.dll</HintPath> 165 167 </Reference> 166 168 <Reference Include="HeuristicLab.Optimization-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 167 169 <SpecificVersion>False</SpecificVersion> 168 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Optimization-3.3.dll</HintPath>170 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Optimization-3.3.dll</HintPath> 169 171 </Reference> 170 172 <Reference Include="HeuristicLab.Parameters-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 171 173 <SpecificVersion>False</SpecificVersion> 172 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Parameters-3.3.dll</HintPath>174 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Parameters-3.3.dll</HintPath> 173 175 </Reference> 174 176 <Reference Include="HeuristicLab.Persistence-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 175 177 <SpecificVersion>False</SpecificVersion> 176 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Persistence-3.3.dll</HintPath>178 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Persistence-3.3.dll</HintPath> 177 179 </Reference> 178 180 <Reference Include="HeuristicLab.PluginInfrastructure-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 179 181 <SpecificVersion>False</SpecificVersion> 180 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.PluginInfrastructure-3.3.dll</HintPath>182 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.PluginInfrastructure-3.3.dll</HintPath> 181 183 </Reference> 182 184 <Reference Include="HeuristicLab.Problems.DataAnalysis-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 183 185 <SpecificVersion>False</SpecificVersion> 184 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath>186 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath> 185 187 </Reference> 186 188 <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 187 189 <SpecificVersion>False</SpecificVersion> 188 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.dll</HintPath>190 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.dll</HintPath> 189 191 </Reference> 190 192 <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 191 193 <SpecificVersion>False</SpecificVersion> 192 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4.dll</HintPath>194 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4.dll</HintPath> 193 195 </Reference> 194 196 <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 195 197 <SpecificVersion>False</SpecificVersion> 196 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4.dll</HintPath>198 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4.dll</HintPath> 197 199 </Reference> 198 200 <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic.TimeSeriesPrognosis-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 199 201 <SpecificVersion>False</SpecificVersion> 200 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.TimeSeriesPrognosis-3.4.dll</HintPath>202 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.TimeSeriesPrognosis-3.4.dll</HintPath> 201 203 </Reference> 202 204 <Reference Include="HeuristicLab.Problems.Instances-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 203 205 <SpecificVersion>False</SpecificVersion> 204 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath>206 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath> 205 207 </Reference> 206 208 <Reference Include="HeuristicLab.Random-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 207 209 <SpecificVersion>False</SpecificVersion> 208 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Random-3.3.dll</HintPath>210 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Random-3.3.dll</HintPath> 209 211 </Reference> 210 212 <Reference Include="HeuristicLab.Selection-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 211 213 <SpecificVersion>False</SpecificVersion> 212 <HintPath>..\..\..\..\trunk\ sources\bin\HeuristicLab.Selection-3.3.dll</HintPath>214 <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Selection-3.3.dll</HintPath> 213 215 </Reference> 214 216 <Reference Include="LibSVM-3.12, Version=3.12.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> … … 365 367 <Compile Include="Linear\MultinomialLogitModel.cs" /> 366 368 <Compile Include="Linear\Scaling.cs" /> 367 <Compile Include="M5Regression\Interfaces\ISpliter.cs" />368 369 <Compile Include="M5Regression\Interfaces\IM5Model.cs" /> 369 370 <Compile Include="M5Regression\Interfaces\ILeafModel.cs" /> 370 371 <Compile Include="M5Regression\Interfaces\IPruning.cs" /> 372 <Compile Include="M5Regression\Interfaces\ISplitter.cs" /> 373 <Compile Include="M5Regression\LeafModels\ComponentReducedLinearModel.cs" /> 374 <Compile Include="M5Regression\LeafModels\DampenedLinearModel.cs" /> 375 <Compile Include="M5Regression\LeafModels\PreconstructedLinearModel.cs" /> 371 376 <Compile Include="M5Regression\LeafTypes\ComplexLeaf.cs" /> 372 377 <Compile Include="M5Regression\LeafTypes\ComponentReductionLinearLeaf.cs" /> 378 <Compile Include="M5Regression\LeafTypes\LeafBase.cs" /> 373 379 <Compile Include="M5Regression\LeafTypes\ConstantLeaf.cs" /> 374 380 <Compile Include="M5Regression\LeafTypes\GaussianProcessLeaf.cs" /> 375 381 <Compile Include="M5Regression\LeafTypes\LinearLeaf.cs" /> 376 382 <Compile Include="M5Regression\LeafTypes\LogisticLeaf.cs" /> 383 <Compile Include="M5Regression\LeafTypes\M5Leaf.cs" /> 384 <Compile Include="M5Regression\LeafTypes\M5regLeaf.cs" /> 377 385 <Compile Include="M5Regression\M5Regression.cs" /> 378 <Compile Include="M5Regression\M5Utilities\M5StaticUtilities.cs" /> 379 <Compile Include="M5Regression\M5Utilities\M5Analyzer.cs" /> 380 <Compile Include="M5Regression\M5Utilities\M5Parameters.cs" /> 381 <Compile Include="M5Regression\MetaModels\ComponentReducedLinearModel.cs" /> 382 <Compile Include="M5Regression\MetaModels\M5NodeModel.cs" /> 383 <Compile Include="M5Regression\MetaModels\M5RuleModel.cs" /> 384 <Compile Include="M5Regression\MetaModels\M5RuleSetModel.cs" /> 385 <Compile Include="M5Regression\MetaModels\M5TreeModel.cs" /> 386 <Compile Include="M5Regression\MetaModels\DampenedLinearModel.cs" /> 387 <Compile Include="M5Regression\MetaModels\PreconstructedLinearModel.cs" /> 388 <Compile Include="M5Regression\Pruning\M5LinearBottomUpPruning.cs" /> 389 <Compile Include="M5Regression\Pruning\BottomUpPruningBase.cs" /> 386 <Compile Include="M5Regression\M5Utilities\RegressionTreeUtilities.cs" /> 387 <Compile Include="M5Regression\M5Utilities\RegressionTreeAnalyzer.cs" /> 388 <Compile Include="M5Regression\M5Utilities\RegressionTreeParameters.cs" /> 389 <Compile Include="M5Regression\MetaModels\RegressionNodeModel.cs" /> 390 <Compile Include="M5Regression\MetaModels\RegressionRuleModel.cs" /> 391 <Compile Include="M5Regression\MetaModels\RegressionRuleSetModel.cs" /> 392 <Compile Include="M5Regression\MetaModels\RegressionNodeTreeModel.cs" /> 393 <Compile Include="M5Regression\Pruning\ComplexityPruning.cs" /> 390 394 <Compile Include="M5Regression\Pruning\NoPruning.cs" /> 391 <Compile Include="M5Regression\Pruning\M5LeafBottomUpPruning.cs" /> 395 <Compile Include="M5Regression\Spliting\CorrelationImpuritiyCalculator.cs" /> 396 <Compile Include="M5Regression\Spliting\CorrelationSplitter.cs" /> 397 <Compile Include="M5Regression\Spliting\SplitterBase.cs" /> 398 <Compile Include="M5Regression\Spliting\M5Splitter.cs" /> 399 <Compile Include="M5Regression\Spliting\NeumaierSum.cs" /> 392 400 <Compile Include="M5Regression\Spliting\OrderImpurityCalculator.cs" /> 393 <Compile Include="M5Regression\Spliting\OptimumSearchingSpliter.cs" /> 394 <Compile Include="M5Regression\Spliting\M5Spliter.cs" /> 401 <Compile Include="M5Regression\Spliting\UnivariateOnlineLR.cs" /> 395 402 <Compile Include="Nca\Initialization\INcaInitializer.cs" /> 396 403 <Compile Include="Nca\Initialization\LdaInitializer.cs" /> -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Interfaces/ILeafModel.cs
r15614 r15830 20 20 #endregion 21 21 22 using System.Collections.Generic; 22 23 using System.Threading; 23 24 using HeuristicLab.Core; … … 27 28 public interface ILeafModel : IParameterizedNamedItem { 28 29 bool ProvidesConfidence { get; } 29 IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters);30 // IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters); 30 31 int MinLeafSize(IRegressionProblemData pd); 32 33 void Initialize(IScope states); 34 void Build(RegressionNodeTreeModel tree, IReadOnlyList<int> trainingRows, IScope stateScope, CancellationToken cancellationToken); 35 IRegressionModel BuildModel(IReadOnlyList<int> rows, RegressionTreeParameters parameters, CancellationToken cancellation, out int numParams); 31 36 } 32 37 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Interfaces/IM5Model.cs
r15614 r15830 22 22 using System.Collections.Generic; 23 23 using System.Threading; 24 using HeuristicLab.Core; 25 using HeuristicLab.Optimization; 24 26 using HeuristicLab.Problems.DataAnalysis; 25 27 26 28 namespace HeuristicLab.Algorithms.DataAnalysis { 27 internalinterface IM5Model : IRegressionModel {28 void Build(IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, M5Parameters m5Params, CancellationToken cancellationToken);29 void Update(IReadOnlyList<int> rows, M5Parameters m5Parameters, CancellationToken cancellationToken);29 public interface IM5Model : IRegressionModel { 30 void Build(IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, IScope stateScope, ResultCollection results, CancellationToken cancellationToken); 31 void Update(IReadOnlyList<int> rows, IScope stateScope, CancellationToken cancellationToken); 30 32 } 31 33 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Interfaces/IPruning.cs
r15614 r15830 20 20 #endregion 21 21 using System.Collections.Generic; 22 using System.Threading; 22 23 using HeuristicLab.Core; 23 24 using HeuristicLab.Problems.DataAnalysis; … … 26 27 public interface IPruning : IParameterizedNamedItem { 27 28 int MinLeafSize(IRegressionProblemData pd, ILeafModel leafModel); 29 30 void Initialize(IScope states); 31 void Prune(RegressionNodeTreeModel treeModel, IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, IScope scope, CancellationToken cancellationToken); 28 32 } 29 33 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComplexLeaf.cs
r15614 r15830 32 32 [StorableClass] 33 33 [Item("ComplexLeaf", "A leaf type that uses an arbitriary RegressionAlgorithm to create leaf models")] 34 public class ComplexLeaf : ParameterizedNamedItem, ILeafModel{34 public class ComplexLeaf : LeafBase { 35 35 public const string RegressionParameterName = "Regression"; 36 36 public IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionParameter { … … 55 55 56 56 #region IModelType 57 public bool ProvidesConfidence {57 public override bool ProvidesConfidence { 58 58 get { return false; } 59 59 } 60 public IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {60 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) { 61 61 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model"); 62 62 noParameters = pd.Dataset.Rows + 1; 63 63 Regression.Problem = new RegressionProblem {ProblemData = pd}; 64 var res = M5StaticUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken);64 var res = RegressionTreeUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken); 65 65 var t = res.Select(x => x.Value).OfType<IRegressionSolution>().FirstOrDefault(); 66 66 if (t == null) throw new ArgumentException("No RegressionSolution was provided by the algorithm"); 67 67 return t.Model; 68 68 } 69 70 public int MinLeafSize(IRegressionProblemData pd) { 69 public override int MinLeafSize(IRegressionProblemData pd) { 71 70 return 3; 72 71 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComponentReductionLinearLeaf.cs
r15614 r15830 34 34 [StorableClass] 35 35 [Item("ComponentReductionLinearLeaf", "A leaf type that uses principle component analysis to create smaller linear models as leaf models")] 36 public class ComponentReductionLinearLeaf : ParameterizedNamedItem, ILeafModel{36 public class ComponentReductionLinearLeaf : LeafBase { 37 37 public const string NoComponentsParameterName = "NoComponents"; 38 38 public IFixedValueParameter<IntValue> NoComponentsParameter { … … 45 45 #region Constructors & Cloning 46 46 [StorableConstructor] 47 pr ivateComponentReductionLinearLeaf(bool deserializing) : base(deserializing) { }48 pr ivateComponentReductionLinearLeaf(ComponentReductionLinearLeaf original, Cloner cloner) : base(original, cloner) { }47 protected ComponentReductionLinearLeaf(bool deserializing) : base(deserializing) { } 48 protected ComponentReductionLinearLeaf(ComponentReductionLinearLeaf original, Cloner cloner) : base(original, cloner) { } 49 49 public ComponentReductionLinearLeaf() { 50 50 Parameters.Add(new FixedValueParameter<IntValue>(NoComponentsParameterName, "The maximum number of principle components used", new IntValue(10))); … … 56 56 57 57 #region IModelType 58 public bool ProvidesConfidence {58 public override bool ProvidesConfidence { 59 59 get { return true; } 60 60 } 61 public IRegressionModel Build(IRegressionProblemData pd, IRandom random,61 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, 62 62 CancellationToken cancellationToken, out int noParameters) { 63 63 var pca = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, true); … … 82 82 } 83 83 84 public int MinLeafSize(IRegressionProblemData pd) {84 public override int MinLeafSize(IRegressionProblemData pd) { 85 85 return NoComponents + 2; 86 86 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ConstantLeaf.cs
r15614 r15830 31 31 [StorableClass] 32 32 [Item("ConstantLeaf", "A leaf type that uses constant models as leaf models")] 33 public class ConstantLeaf : ParameterizedNamedItem, ILeafModel{33 public class ConstantLeaf : LeafBase { 34 34 #region Constructors & Cloning 35 35 [StorableConstructor] 36 pr ivateConstantLeaf(bool deserializing) : base(deserializing) { }37 pr ivateConstantLeaf(ConstantLeaf original, Cloner cloner) : base(original, cloner) { }36 protected ConstantLeaf(bool deserializing) : base(deserializing) { } 37 protected ConstantLeaf(ConstantLeaf original, Cloner cloner) : base(original, cloner) { } 38 38 public ConstantLeaf() { } 39 39 public override IDeepCloneable Clone(Cloner cloner) { … … 43 43 44 44 #region IModelType 45 public bool ProvidesConfidence {45 public override bool ProvidesConfidence { 46 46 get { return false; } 47 47 } 48 public IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {48 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) { 49 49 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model"); 50 50 noParameters = 1; … … 52 52 } 53 53 54 public int MinLeafSize(IRegressionProblemData pd) {54 public override int MinLeafSize(IRegressionProblemData pd) { 55 55 return 0; 56 56 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/GaussianProcessLeaf.cs
r15614 r15830 33 33 [StorableClass] 34 34 [Item("GaussianProcessLeaf", "A leaf type that uses gaussian process models as leaf models.")] 35 public class GaussianProcessLeaf : ParameterizedNamedItem, ILeafModel{35 public class GaussianProcessLeaf : LeafBase { 36 36 #region ParameterNames 37 37 public const string TriesParameterName = "Tries"; … … 59 59 #region Constructors & Cloning 60 60 [StorableConstructor] 61 pr ivateGaussianProcessLeaf(bool deserializing) : base(deserializing) { }62 pr ivateGaussianProcessLeaf(GaussianProcessLeaf original, Cloner cloner) : base(original, cloner) { }61 protected GaussianProcessLeaf(bool deserializing) : base(deserializing) { } 62 protected GaussianProcessLeaf(GaussianProcessLeaf original, Cloner cloner) : base(original, cloner) { } 63 63 public GaussianProcessLeaf() { 64 64 var gp = new GaussianProcessRegression(); … … 75 75 76 76 #region IModelType 77 public bool ProvidesConfidence {77 public override bool ProvidesConfidence { 78 78 get { return true; } 79 79 } 80 public IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {80 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) { 81 81 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a gaussian process model"); 82 82 Regression.Problem = new RegressionProblem {ProblemData = pd}; … … 85 85 86 86 for (var i = 0; i < Tries; i++) { 87 var res = M5StaticUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken);87 var res = RegressionTreeUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken); 88 88 var t = res.Select(x => x.Value).OfType<GaussianProcessRegressionSolution>().FirstOrDefault(); 89 89 var score = ((DoubleValue)res["Negative log pseudo-likelihood (LOO-CV)"].Value).Value; … … 101 101 } 102 102 103 public int MinLeafSize(IRegressionProblemData pd) {103 public override int MinLeafSize(IRegressionProblemData pd) { 104 104 return 3; 105 105 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LinearLeaf.cs
r15614 r15830 31 31 [StorableClass] 32 32 [Item("LinearLeaf", "A leaf type that uses linear models as leaf models. This is the standard for M5' regression")] 33 public class LinearLeaf : ParameterizedNamedItem, ILeafModel{33 public class LinearLeaf : LeafBase { 34 34 #region Constructors & Cloning 35 35 [StorableConstructor] 36 pr ivateLinearLeaf(bool deserializing) : base(deserializing) { }37 pr ivateLinearLeaf(LinearLeaf original, Cloner cloner) : base(original, cloner) { }36 protected LinearLeaf(bool deserializing) : base(deserializing) { } 37 protected LinearLeaf(LinearLeaf original, Cloner cloner) : base(original, cloner) { } 38 38 public LinearLeaf() { } 39 39 public override IDeepCloneable Clone(Cloner cloner) { … … 43 43 44 44 #region IModelType 45 public bool ProvidesConfidence {45 public override bool ProvidesConfidence { 46 46 get { return true; } 47 47 } 48 public IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {48 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) { 49 49 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model"); 50 50 double rmse, cvRmse; 51 51 noParameters = pd.AllowedInputVariables.Count() + 1; 52 return PreconstructedLinearModel.CreateConfidenceLinearModel(pd, out rmse, out cvRmse); 52 var res = PreconstructedLinearModel.CreateConfidenceLinearModel(pd, out rmse, out cvRmse); 53 return res; 53 54 } 54 55 55 public int MinLeafSize(IRegressionProblemData pd) {56 return pd.AllowedInputVariables.Count() + 2;56 public override int MinLeafSize(IRegressionProblemData pd) { 57 return pd.AllowedInputVariables.Count() == 1 ? 2 : pd.AllowedInputVariables.Count() + 2; 57 58 } 58 59 #endregion -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LogisticLeaf.cs
r15614 r15830 33 33 [StorableClass] 34 34 [Item("LogisticLeaf", "A leaf type that uses linear models with a logistic dampening as leaf models. Dampening reduces prediction values far outside the observed target values.")] 35 public class LogisticLeaf : ParameterizedNamedItem, ILeafModel{35 public class LogisticLeaf : LeafBase { 36 36 private const string DampeningParameterName = "Dampening"; 37 37 public IFixedValueParameter<DoubleValue> DampeningParameter { … … 55 55 56 56 #region IModelType 57 public bool ProvidesConfidence {57 public override bool ProvidesConfidence { 58 58 get { return true; } 59 59 } 60 public IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) { 61 if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model"); 62 double rmse, cvRmse; 63 noParameters = pd.AllowedInputVariables.Count() + 1; 64 return new DampenedLinearModel(PreconstructedLinearModel.CreateConfidenceLinearModel(pd, out rmse, out cvRmse), pd, Dampening); 60 public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) { 61 var res = (IConfidenceRegressionModel)new LinearLeaf().Build(pd, random, cancellationToken, out noParameters); 62 return new DampenedLinearModel(res, pd, Dampening); 65 63 } 66 64 67 public int MinLeafSize(IRegressionProblemData pd) {65 public override int MinLeafSize(IRegressionProblemData pd) { 68 66 return pd.AllowedInputVariables.Count() + 2; 69 67 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Regression.cs
r15614 r15830 19 19 [Item("M5RegressionTree", "A M5 regression tree / rule set")] 20 20 public sealed class M5Regression : FixedDataAnalysisAlgorithm<IRegressionProblem> { 21 public override bool SupportsPause { 22 get { return true; } 23 } 24 25 public const string RegressionTreeParameterVariableName = "RegressionTreeParameters"; 26 public const string ModelVariableName = "Model"; 27 public const string PruningSetVariableName = "PruningSet"; 28 public const string TrainingSetVariableName = "TrainingSet"; 29 21 30 #region Parametername 22 31 private const string GenerateRulesParameterName = "GenerateRules"; 23 32 private const string HoldoutSizeParameterName = "HoldoutSize"; 24 private const string SpliterParameterName = "Split er";33 private const string SpliterParameterName = "Splitter"; 25 34 private const string MinimalNodeSizeParameterName = "MinimalNodeSize"; 26 35 private const string LeafModelParameterName = "LeafModel"; … … 38 47 get { return (IFixedValueParameter<PercentValue>)Parameters[HoldoutSizeParameterName]; } 39 48 } 40 public IConstrainedValueParameter<ISplit er> ImpurityParameter {41 get { return (IConstrainedValueParameter<ISplit er>)Parameters[SpliterParameterName]; }49 public IConstrainedValueParameter<ISplitter> ImpurityParameter { 50 get { return (IConstrainedValueParameter<ISplitter>)Parameters[SpliterParameterName]; } 42 51 } 43 52 public IFixedValueParameter<IntValue> MinimalNodeSizeParameter { … … 68 77 get { return HoldoutSizeParameter.Value.Value; } 69 78 } 70 public ISplit er Split{79 public ISplitter Splitter { 71 80 get { return ImpurityParameter.Value; } 72 81 } … … 89 98 get { return UseHoldoutParameter.Value.Value; } 90 99 } 100 #endregion 101 102 #region State 103 [Storable] 104 private IScope stateScope; 91 105 #endregion 92 106 … … 94 108 [StorableConstructor] 95 109 private M5Regression(bool deserializing) : base(deserializing) { } 96 private M5Regression(M5Regression original, Cloner cloner) : base(original, cloner) { } 110 private M5Regression(M5Regression original, Cloner cloner) : base(original, cloner) { 111 stateScope = cloner.Clone(stateScope); 112 } 97 113 public M5Regression() { 98 114 var modelSet = new ItemSet<ILeafModel>(ApplicationManager.Manager.GetInstances<ILeafModel>()); 99 115 var pruningSet = new ItemSet<IPruning>(ApplicationManager.Manager.GetInstances<IPruning>()); 100 var impuritySet = new ItemSet<ISplit er>(ApplicationManager.Manager.GetInstances<ISpliter>());116 var impuritySet = new ItemSet<ISplitter>(ApplicationManager.Manager.GetInstances<ISplitter>()); 101 117 Parameters.Add(new FixedValueParameter<BoolValue>(GenerateRulesParameterName, "Whether a set of rules or a decision tree shall be created", new BoolValue(false))); 102 118 Parameters.Add(new FixedValueParameter<PercentValue>(HoldoutSizeParameterName, "How much of the training set shall be reserved for pruning", new PercentValue(0.2))); 103 Parameters.Add(new ConstrainedValueParameter<ISplit er>(SpliterParameterName, "The type of split function used to create node splits", impuritySet, impuritySet.OfType<M5Spliter>().First()));119 Parameters.Add(new ConstrainedValueParameter<ISplitter>(SpliterParameterName, "The type of split function used to create node splits", impuritySet, impuritySet.OfType<M5Splitter>().First())); 104 120 Parameters.Add(new FixedValueParameter<IntValue>(MinimalNodeSizeParameterName, "The minimal number of samples in a leaf node", new IntValue(1))); 105 121 Parameters.Add(new ConstrainedValueParameter<ILeafModel>(LeafModelParameterName, "The type of model used for the nodes", modelSet, modelSet.OfType<LinearLeaf>().First())); 106 Parameters.Add(new ConstrainedValueParameter<IPruning>(PruningTypeParameterName, "The type of pruning used", pruningSet, pruningSet.OfType< M5LinearBottomUpPruning>().First()));122 Parameters.Add(new ConstrainedValueParameter<IPruning>(PruningTypeParameterName, "The type of pruning used", pruningSet, pruningSet.OfType<ComplexityPruning>().First())); 107 123 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); 108 124 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); … … 115 131 #endregion 116 132 117 protected override void Run(CancellationToken cancellationToken) { 133 protected override void Initialize(CancellationToken cancellationToken) { 134 base.Initialize(cancellationToken); 118 135 var random = new MersenneTwister(); 119 136 if (SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next(); 120 137 random.Reset(Seed); 121 var solution = CreateM5RegressionSolution(Problem.ProblemData, random, LeafModel, Split, Pruning, UseHoldout, HoldoutSize, MinimalNodeSize, GenerateRules, Results, cancellationToken); 122 AnalyzeSolution(solution); 138 stateScope = InitializeScope(random, Problem.ProblemData, Pruning, MinimalNodeSize, LeafModel, Splitter, GenerateRules, UseHoldout, HoldoutSize); 139 stateScope.Variables.Add(new Variable("Algorithm", this)); 140 Results.AddOrUpdateResult("StateScope", stateScope); 141 } 142 143 protected override void Run(CancellationToken cancellationToken) { 144 var model = Build(stateScope, Results, cancellationToken); 145 AnalyzeSolution(model.CreateRegressionSolution(Problem.ProblemData), Results, Problem.ProblemData); 123 146 } 124 147 125 148 #region Static Interface 126 public static IRegressionSolution CreateM5RegressionSolution(IRegressionProblemData problemData, IRandom random, 127 ILeafModel leafModel = null, ISpliter spliter = null, IPruning pruning = null, 128 bool useHoldout = false, double holdoutSize = 0.2, int minNumInstances = 4, bool generateRules = false, ResultCollection results = null, CancellationToken? cancellationToken = null) { 129 //set default values 149 public static IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData, IRandom random, ILeafModel leafModel = null, ISplitter splitter = null, IPruning pruning = null, 150 bool useHoldout = false, double holdoutSize = 0.2, int minimumLeafSize = 4, bool generateRules = false, ResultCollection results = null, CancellationToken? cancellationToken = null) { 130 151 if (leafModel == null) leafModel = new LinearLeaf(); 131 if (split er == null) spliter = new M5Spliter();152 if (splitter == null) splitter = new M5Splitter(); 132 153 if (cancellationToken == null) cancellationToken = CancellationToken.None; 133 if (pruning == null) pruning = new M5LeafBottomUpPruning(); 134 154 if (pruning == null) pruning = new ComplexityPruning(); 155 156 var stateScope = InitializeScope(random, problemData, pruning, minimumLeafSize, leafModel, splitter, generateRules, useHoldout, holdoutSize); 157 var model = Build(stateScope, results, cancellationToken.Value); 158 return model.CreateRegressionSolution(problemData); 159 } 160 161 public static void UpdateModel(IM5Model model, IRegressionProblemData problemData, IRandom random, ILeafModel leafModel, CancellationToken? cancellationToken = null) { 162 if (cancellationToken == null) cancellationToken = CancellationToken.None; 163 var regressionTreeParameters = new RegressionTreeParameters(leafModel, problemData, random); 164 var scope = new Scope(); 165 scope.Variables.Add(new Variable(RegressionTreeParameterVariableName, regressionTreeParameters)); 166 leafModel.Initialize(scope); 167 model.Update(problemData.TrainingIndices.ToList(), scope, cancellationToken.Value); 168 } 169 #endregion 170 171 #region Helpers 172 private static IScope InitializeScope(IRandom random, IRegressionProblemData problemData, IPruning pruning, int minLeafSize, ILeafModel leafModel, ISplitter splitter, bool GenerateRules, bool useHoldout, double holdoutSize) { 173 var stateScope = new Scope("RegressionTreeStateScope"); 174 175 //reduce RegressionProblemData to AllowedInput & Target column wise and to TrainingSet row wise 135 176 var doubleVars = new HashSet<string>(problemData.Dataset.DoubleVariables); 136 177 var vars = problemData.AllowedInputVariables.Concat(new[] {problemData.TargetVariable}).ToArray(); 137 178 if (vars.Any(v => !doubleVars.Contains(v))) throw new NotSupportedException("M5 regression supports only double valued input or output features."); 138 139 var values = vars.Select(v => problemData.Dataset.GetDoubleValues(v, problemData.TrainingIndices).ToArray()).ToArray(); 140 if (values.Any(v => v.Any(x => double.IsNaN(x) || double.IsInfinity(x)))) 179 var doubles = vars.Select(v => problemData.Dataset.GetDoubleValues(v, problemData.TrainingIndices).ToArray()).ToArray(); 180 if (doubles.Any(v => v.Any(x => double.IsNaN(x) || double.IsInfinity(x)))) 141 181 throw new NotSupportedException("M5 regression does not support NaN or infinity values in the input dataset."); 142 143 var trainingData = new Dataset(vars, values); 182 var trainingData = new Dataset(vars, doubles); 144 183 var pd = new RegressionProblemData(trainingData, problemData.AllowedInputVariables, problemData.TargetVariable); 145 184 pd.TrainingPartition.End = pd.TestPartition.Start = pd.TestPartition.End = pd.Dataset.Rows; 146 185 pd.TrainingPartition.Start = 0; 147 186 148 //create & build Model 149 var m5Params = new M5Parameters(pruning, minNumInstances, leafModel, pd, random, spliter, results); 150 151 IReadOnlyList<int> trainingRows, pruningRows; 152 GeneratePruningSet(problemData.TrainingIndices.ToArray(), random, useHoldout, holdoutSize, out trainingRows, out pruningRows); 153 154 IM5Model model; 155 if (generateRules) 156 model = M5RuleSetModel.CreateRuleModel(problemData.TargetVariable, m5Params); 157 else 158 model = M5TreeModel.CreateTreeModel(problemData.TargetVariable, m5Params); 159 160 model.Build(trainingRows, pruningRows, m5Params, cancellationToken.Value); 161 return model.CreateRegressionSolution(problemData); 162 } 163 164 public static void UpdateM5Model(IRegressionModel model, IRegressionProblemData problemData, IRandom random, 165 ILeafModel leafModel, CancellationToken? cancellationToken = null) { 166 var m5Model = model as IM5Model; 167 if (m5Model == null) throw new ArgumentException("This type of model can not be updated"); 168 UpdateM5Model(m5Model, problemData, random, leafModel, cancellationToken); 169 } 170 171 private static void UpdateM5Model(IM5Model model, IRegressionProblemData problemData, IRandom random, 172 ILeafModel leafModel = null, CancellationToken? cancellationToken = null) { 173 if (cancellationToken == null) cancellationToken = CancellationToken.None; 174 var m5Params = new M5Parameters(leafModel, problemData, random); 175 model.Update(problemData.TrainingIndices.ToList(), m5Params, cancellationToken.Value); 176 } 177 #endregion 178 179 #region Helpers 187 //store regression tree parameters 188 var regressionTreeParams = new RegressionTreeParameters(pruning, minLeafSize, leafModel, pd, random, splitter); 189 stateScope.Variables.Add(new Variable(RegressionTreeParameterVariableName, regressionTreeParams)); 190 191 //initialize tree operators 192 pruning.Initialize(stateScope); 193 splitter.Initialize(stateScope); 194 leafModel.Initialize(stateScope); 195 196 //store unbuilt model 197 IItem model; 198 if (GenerateRules) { 199 model = RegressionRuleSetModel.CreateRuleModel(problemData.TargetVariable, regressionTreeParams); 200 RegressionRuleSetModel.Initialize(stateScope); 201 } 202 else { 203 model = RegressionNodeTreeModel.CreateTreeModel(problemData.TargetVariable, regressionTreeParams); 204 } 205 stateScope.Variables.Add(new Variable(ModelVariableName, model)); 206 207 //store training & pruning indices 208 IReadOnlyList<int> trainingSet, pruningSet; 209 GeneratePruningSet(pd.TrainingIndices.ToArray(), random, useHoldout, holdoutSize, out trainingSet, out pruningSet); 210 stateScope.Variables.Add(new Variable(TrainingSetVariableName, new IntArray(trainingSet.ToArray()))); 211 stateScope.Variables.Add(new Variable(PruningSetVariableName, new IntArray(pruningSet.ToArray()))); 212 213 return stateScope; 214 } 215 216 private static IRegressionModel Build(IScope stateScope, ResultCollection results, CancellationToken cancellationToken) { 217 var model = (IM5Model)stateScope.Variables[ModelVariableName].Value; 218 var trainingRows = (IntArray)stateScope.Variables[TrainingSetVariableName].Value; 219 var pruningRows = (IntArray)stateScope.Variables[PruningSetVariableName].Value; 220 model.Build(trainingRows.ToArray(), pruningRows.ToArray(), stateScope, results, cancellationToken); 221 return model; 222 } 223 180 224 private static void GeneratePruningSet(IReadOnlyList<int> allrows, IRandom random, bool useHoldout, double holdoutSize, out IReadOnlyList<int> training, out IReadOnlyList<int> pruning) { 181 225 if (!useHoldout) { … … 190 234 } 191 235 192 private void AnalyzeSolution(IRegressionSolution solution) { 193 Results.Add(new Result("RegressionSolution", (IItem)solution.Clone())); 194 195 Dictionary<string, int> frequencies; 196 if (!GenerateRules) { 197 Results.Add(M5Analyzer.CreateLeafDepthHistogram((M5TreeModel)solution.Model)); 198 frequencies = M5Analyzer.GetTreeVariableFrequences((M5TreeModel)solution.Model); 199 } 200 else { 201 Results.Add(M5Analyzer.CreateRulesResult((M5RuleSetModel)solution.Model, Problem.ProblemData, "M5TreeResult", true)); 202 frequencies = M5Analyzer.GetRuleVariableFrequences((M5RuleSetModel)solution.Model); 203 Results.Add(M5Analyzer.CreateCoverageDiagram((M5RuleSetModel)solution.Model, Problem.ProblemData)); 236 private void AnalyzeSolution(IRegressionSolution solution, ResultCollection results, IRegressionProblemData problemData) { 237 results.Add(new Result("RegressionSolution", (IItem)solution.Clone())); 238 239 Dictionary<string, int> frequencies = null; 240 241 var tree = solution.Model as RegressionNodeTreeModel; 242 if (tree != null) { 243 results.Add(RegressionTreeAnalyzer.CreateLeafDepthHistogram(tree)); 244 frequencies = RegressionTreeAnalyzer.GetTreeVariableFrequences(tree); 245 RegressionTreeAnalyzer.AnalyzeNodes(tree, results, problemData); 246 } 247 248 var ruleSet = solution.Model as RegressionRuleSetModel; 249 if (ruleSet != null) { 250 results.Add(RegressionTreeAnalyzer.CreateRulesResult(ruleSet, problemData, "M5Rules", true)); 251 frequencies = RegressionTreeAnalyzer.GetRuleVariableFrequences(ruleSet); 252 results.Add(RegressionTreeAnalyzer.CreateCoverageDiagram(ruleSet, problemData)); 204 253 } 205 254 206 255 //Variable frequencies 207 var sum = frequencies.Values.Sum(); 208 sum = sum == 0 ? 1 : sum; 209 var impactArray = new DoubleArray(frequencies.Select(i => (double)i.Value / sum).ToArray()) { 210 ElementNames = frequencies.Select(i => i.Key) 211 }; 212 Results.Add(new Result("Variable Frequences", "relative frequencies of variables in rules and tree nodes", impactArray)); 256 if (frequencies != null) { 257 var sum = frequencies.Values.Sum(); 258 sum = sum == 0 ? 1 : sum; 259 var impactArray = new DoubleArray(frequencies.Select(i => (double)i.Value / sum).ToArray()) { 260 ElementNames = frequencies.Select(i => i.Key) 261 }; 262 results.Add(new Result("Variable Frequences", "relative frequencies of variables in rules and tree nodes", impactArray)); 263 } 264 265 var pruning = Pruning as ComplexityPruning; 266 if (pruning != null && tree != null) 267 RegressionTreeAnalyzer.PruningChart(tree, pruning, results); 213 268 } 214 269 #endregion -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/NoPruning.cs
r15614 r15830 21 21 22 22 using System.Collections.Generic; 23 using System.Threading; 23 24 using HeuristicLab.Common; 24 25 using HeuristicLab.Core; … … 38 39 return new NoPruning(this, cloner); 39 40 } 41 #endregion 42 43 40 44 public int MinLeafSize(IRegressionProblemData pd, ILeafModel leafModel) { 41 45 return 0; 42 46 } 43 #endregion 47 public void Initialize(IScope states) { } 48 49 public void Prune(RegressionNodeTreeModel treeModel, IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, IScope scope, CancellationToken cancellationToken) { } 44 50 } 45 51 } -
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/OrderImpurityCalculator.cs
r15614 r15830 28 28 /// <summary> 29 29 /// Helper class for incremental split calculation. 30 /// Used while moving a potential Split er along the ordered training Instances30 /// Used while moving a potential Splitter along the ordered training Instances 31 31 /// </summary> 32 32 internal class OrderImpurityCalculator { … … 105 105 VarRight = NoRight <= 0 ? 0 : Math.Abs(NoRight * SqSumRight - SumRight * SumRight) / (NoRight * NoRight); 106 106 107 if (Order <= 0) throw new ArgumentException("Split er order must be larger than 0");107 if (Order <= 0) throw new ArgumentException("Splitter order must be larger than 0"); 108 108 if (Order.IsAlmost(1)) { 109 109 y = VarTotal; … … 116 116 yr = Math.Pow(VarRight, 1.0 / Order); 117 117 } 118 var t = NoRight + NoLeft; 119 if (NoLeft <= 0.0 || NoRight <= 0.0) Impurity = double.MinValue; //Spliter = 0; 120 else Impurity = y - NoLeft / t * yl - NoRight / t * yr; // Spliter = y - NoLeft / NoRight * yl - NoRight / NoLeft * yr 118 if (NoLeft <= 0.0 || NoRight <= 0.0) Impurity = double.MinValue; //Splitter = 0; 119 else Impurity = y - (NoLeft * yl + NoRight * yr) / (NoRight + NoLeft); 121 120 } 122 121 #endregion
Note: See TracChangeset
for help on using the changeset viewer.