Changeset 15830


Ignore:
Timestamp:
03/08/18 08:46:40 (15 months ago)
Author:
bwerth
Message:

#2847 adapted project to new rep structure; major changes to interfaces; restructures splitting and pruning

Location:
branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
23 added
13 edited

Legend:

Unmodified
Added
Removed
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r15614 r15830  
    4343    <DebugType>full</DebugType>
    4444    <Optimize>false</Optimize>
    45     <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
     45    <OutputPath>..\..\..\..\trunk\bin\</OutputPath>
    4646    <DefineConstants>DEBUG;TRACE</DefineConstants>
    4747    <ErrorReport>prompt</ErrorReport>
     
    5454    <DebugType>pdbonly</DebugType>
    5555    <Optimize>true</Optimize>
    56     <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
     56    <OutputPath>..\..\..\..\trunk\bin\</OutputPath>
    5757    <DefineConstants>TRACE</DefineConstants>
    5858    <ErrorReport>prompt</ErrorReport>
     
    6565  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x86' ">
    6666    <DebugSymbols>true</DebugSymbols>
    67     <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
     67    <OutputPath>..\..\..\..\trunk\bin\</OutputPath>
    6868    <DefineConstants>DEBUG;TRACE</DefineConstants>
    6969    <DebugType>full</DebugType>
     
    7474  </PropertyGroup>
    7575  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x86' ">
    76     <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
     76    <OutputPath>..\..\..\..\trunk\bin\</OutputPath>
    7777    <DefineConstants>TRACE</DefineConstants>
    7878    <DocumentationFile>
     
    8787  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|x64' ">
    8888    <DebugSymbols>true</DebugSymbols>
    89     <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
     89    <OutputPath>..\..\..\..\trunk\bin\</OutputPath>
    9090    <DefineConstants>DEBUG;TRACE</DefineConstants>
    9191    <DebugType>full</DebugType>
     
    9696  </PropertyGroup>
    9797  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|x64' ">
    98     <OutputPath>..\..\..\..\trunk\sources\bin\</OutputPath>
     98    <OutputPath>..\..\..\..\trunk\bin\</OutputPath>
    9999    <DefineConstants>TRACE</DefineConstants>
    100100    <DocumentationFile>
     
    108108  </PropertyGroup>
    109109  <ItemGroup>
    110     <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    111       <HintPath>..\..\bin\ALGLIB-3.7.0.dll</HintPath>
    112       <Private>False</Private>
    113     </Reference>
    114     <Reference Include="HeuristicLab.Algorithms.GradientDescent-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    115       <SpecificVersion>False</SpecificVersion>
    116       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Algorithms.GradientDescent-3.3.dll</HintPath>
     110    <Reference Include="ALGLIB-3.7.0">
     111      <HintPath>..\..\..\..\trunk\bin\ALGLIB-3.7.0.dll</HintPath>
     112    </Reference>
     113    <Reference Include="HeuristicLab.Algorithms.DataAnalysis.Glmnet-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     114      <SpecificVersion>False</SpecificVersion>
     115      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Algorithms.DataAnalysis.Glmnet-3.4.dll</HintPath>
     116    </Reference>
     117    <Reference Include="HeuristicLab.Algorithms.GradientDescent-3.3">
     118      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Algorithms.GradientDescent-3.3.dll</HintPath>
    117119    </Reference>
    118120    <Reference Include="HeuristicLab.Algorithms.OffspringSelectionGeneticAlgorithm-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    119121      <SpecificVersion>False</SpecificVersion>
    120       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Algorithms.OffspringSelectionGeneticAlgorithm-3.3.dll</HintPath>
     122      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Algorithms.OffspringSelectionGeneticAlgorithm-3.3.dll</HintPath>
    121123    </Reference>
    122124    <Reference Include="HeuristicLab.Analysis-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    123125      <SpecificVersion>False</SpecificVersion>
    124       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Analysis-3.3.dll</HintPath>
     126      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Analysis-3.3.dll</HintPath>
    125127    </Reference>
    126128    <Reference Include="HeuristicLab.Collections-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    127129      <SpecificVersion>False</SpecificVersion>
    128       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Collections-3.3.dll</HintPath>
     130      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Collections-3.3.dll</HintPath>
    129131    </Reference>
    130132    <Reference Include="HeuristicLab.Common-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    131133      <SpecificVersion>False</SpecificVersion>
    132       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Common-3.3.dll</HintPath>
     134      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Common-3.3.dll</HintPath>
    133135    </Reference>
    134136    <Reference Include="HeuristicLab.Common.Resources-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    135137      <SpecificVersion>False</SpecificVersion>
    136       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Common.Resources-3.3.dll</HintPath>
     138      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Common.Resources-3.3.dll</HintPath>
    137139    </Reference>
    138140    <Reference Include="HeuristicLab.Core-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    139141      <SpecificVersion>False</SpecificVersion>
    140       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Core-3.3.dll</HintPath>
     142      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Core-3.3.dll</HintPath>
    141143    </Reference>
    142144    <Reference Include="HeuristicLab.Data-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    143145      <SpecificVersion>False</SpecificVersion>
    144       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Data-3.3.dll</HintPath>
     146      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Data-3.3.dll</HintPath>
    145147    </Reference>
    146148    <Reference Include="HeuristicLab.Encodings.PermutationEncoding-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    147149      <SpecificVersion>False</SpecificVersion>
    148       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Encodings.PermutationEncoding-3.3.dll</HintPath>
     150      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Encodings.PermutationEncoding-3.3.dll</HintPath>
    149151    </Reference>
    150152    <Reference Include="HeuristicLab.Encodings.RealVectorEncoding-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    151153      <SpecificVersion>False</SpecificVersion>
    152       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Encodings.RealVectorEncoding-3.3.dll</HintPath>
     154      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Encodings.RealVectorEncoding-3.3.dll</HintPath>
    153155    </Reference>
    154156    <Reference Include="HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    155157      <SpecificVersion>False</SpecificVersion>
    156       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4.dll</HintPath>
     158      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4.dll</HintPath>
    157159    </Reference>
    158160    <Reference Include="HeuristicLab.LibSVM-3.12, Version=3.12.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    159161      <SpecificVersion>False</SpecificVersion>
    160       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.LibSVM-3.12.dll</HintPath>
     162      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.LibSVM-3.12.dll</HintPath>
    161163    </Reference>
    162164    <Reference Include="HeuristicLab.Operators-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    163165      <SpecificVersion>False</SpecificVersion>
    164       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Operators-3.3.dll</HintPath>
     166      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Operators-3.3.dll</HintPath>
    165167    </Reference>
    166168    <Reference Include="HeuristicLab.Optimization-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    167169      <SpecificVersion>False</SpecificVersion>
    168       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Optimization-3.3.dll</HintPath>
     170      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Optimization-3.3.dll</HintPath>
    169171    </Reference>
    170172    <Reference Include="HeuristicLab.Parameters-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    171173      <SpecificVersion>False</SpecificVersion>
    172       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Parameters-3.3.dll</HintPath>
     174      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Parameters-3.3.dll</HintPath>
    173175    </Reference>
    174176    <Reference Include="HeuristicLab.Persistence-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    175177      <SpecificVersion>False</SpecificVersion>
    176       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Persistence-3.3.dll</HintPath>
     178      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Persistence-3.3.dll</HintPath>
    177179    </Reference>
    178180    <Reference Include="HeuristicLab.PluginInfrastructure-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    179181      <SpecificVersion>False</SpecificVersion>
    180       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.PluginInfrastructure-3.3.dll</HintPath>
     182      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.PluginInfrastructure-3.3.dll</HintPath>
    181183    </Reference>
    182184    <Reference Include="HeuristicLab.Problems.DataAnalysis-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    183185      <SpecificVersion>False</SpecificVersion>
    184       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath>
     186      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis-3.4.dll</HintPath>
    185187    </Reference>
    186188    <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    187189      <SpecificVersion>False</SpecificVersion>
    188       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.dll</HintPath>
     190      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.dll</HintPath>
    189191    </Reference>
    190192    <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    191193      <SpecificVersion>False</SpecificVersion>
    192       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4.dll</HintPath>
     194      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4.dll</HintPath>
    193195    </Reference>
    194196    <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    195197      <SpecificVersion>False</SpecificVersion>
    196       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4.dll</HintPath>
     198      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4.dll</HintPath>
    197199    </Reference>
    198200    <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic.TimeSeriesPrognosis-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    199201      <SpecificVersion>False</SpecificVersion>
    200       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.TimeSeriesPrognosis-3.4.dll</HintPath>
     202      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.TimeSeriesPrognosis-3.4.dll</HintPath>
    201203    </Reference>
    202204    <Reference Include="HeuristicLab.Problems.Instances-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    203205      <SpecificVersion>False</SpecificVersion>
    204       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath>
     206      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Problems.Instances-3.3.dll</HintPath>
    205207    </Reference>
    206208    <Reference Include="HeuristicLab.Random-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    207209      <SpecificVersion>False</SpecificVersion>
    208       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Random-3.3.dll</HintPath>
     210      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Random-3.3.dll</HintPath>
    209211    </Reference>
    210212    <Reference Include="HeuristicLab.Selection-3.3, Version=3.3.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
    211213      <SpecificVersion>False</SpecificVersion>
    212       <HintPath>..\..\..\..\trunk\sources\bin\HeuristicLab.Selection-3.3.dll</HintPath>
     214      <HintPath>..\..\..\..\trunk\bin\HeuristicLab.Selection-3.3.dll</HintPath>
    213215    </Reference>
    214216    <Reference Include="LibSVM-3.12, Version=3.12.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     
    365367    <Compile Include="Linear\MultinomialLogitModel.cs" />
    366368    <Compile Include="Linear\Scaling.cs" />
    367     <Compile Include="M5Regression\Interfaces\ISpliter.cs" />
    368369    <Compile Include="M5Regression\Interfaces\IM5Model.cs" />
    369370    <Compile Include="M5Regression\Interfaces\ILeafModel.cs" />
    370371    <Compile Include="M5Regression\Interfaces\IPruning.cs" />
     372    <Compile Include="M5Regression\Interfaces\ISplitter.cs" />
     373    <Compile Include="M5Regression\LeafModels\ComponentReducedLinearModel.cs" />
     374    <Compile Include="M5Regression\LeafModels\DampenedLinearModel.cs" />
     375    <Compile Include="M5Regression\LeafModels\PreconstructedLinearModel.cs" />
    371376    <Compile Include="M5Regression\LeafTypes\ComplexLeaf.cs" />
    372377    <Compile Include="M5Regression\LeafTypes\ComponentReductionLinearLeaf.cs" />
     378    <Compile Include="M5Regression\LeafTypes\LeafBase.cs" />
    373379    <Compile Include="M5Regression\LeafTypes\ConstantLeaf.cs" />
    374380    <Compile Include="M5Regression\LeafTypes\GaussianProcessLeaf.cs" />
    375381    <Compile Include="M5Regression\LeafTypes\LinearLeaf.cs" />
    376382    <Compile Include="M5Regression\LeafTypes\LogisticLeaf.cs" />
     383    <Compile Include="M5Regression\LeafTypes\M5Leaf.cs" />
     384    <Compile Include="M5Regression\LeafTypes\M5regLeaf.cs" />
    377385    <Compile Include="M5Regression\M5Regression.cs" />
    378     <Compile Include="M5Regression\M5Utilities\M5StaticUtilities.cs" />
    379     <Compile Include="M5Regression\M5Utilities\M5Analyzer.cs" />
    380     <Compile Include="M5Regression\M5Utilities\M5Parameters.cs" />
    381     <Compile Include="M5Regression\MetaModels\ComponentReducedLinearModel.cs" />
    382     <Compile Include="M5Regression\MetaModels\M5NodeModel.cs" />
    383     <Compile Include="M5Regression\MetaModels\M5RuleModel.cs" />
    384     <Compile Include="M5Regression\MetaModels\M5RuleSetModel.cs" />
    385     <Compile Include="M5Regression\MetaModels\M5TreeModel.cs" />
    386     <Compile Include="M5Regression\MetaModels\DampenedLinearModel.cs" />
    387     <Compile Include="M5Regression\MetaModels\PreconstructedLinearModel.cs" />
    388     <Compile Include="M5Regression\Pruning\M5LinearBottomUpPruning.cs" />
    389     <Compile Include="M5Regression\Pruning\BottomUpPruningBase.cs" />
     386    <Compile Include="M5Regression\M5Utilities\RegressionTreeUtilities.cs" />
     387    <Compile Include="M5Regression\M5Utilities\RegressionTreeAnalyzer.cs" />
     388    <Compile Include="M5Regression\M5Utilities\RegressionTreeParameters.cs" />
     389    <Compile Include="M5Regression\MetaModels\RegressionNodeModel.cs" />
     390    <Compile Include="M5Regression\MetaModels\RegressionRuleModel.cs" />
     391    <Compile Include="M5Regression\MetaModels\RegressionRuleSetModel.cs" />
     392    <Compile Include="M5Regression\MetaModels\RegressionNodeTreeModel.cs" />
     393    <Compile Include="M5Regression\Pruning\ComplexityPruning.cs" />
    390394    <Compile Include="M5Regression\Pruning\NoPruning.cs" />
    391     <Compile Include="M5Regression\Pruning\M5LeafBottomUpPruning.cs" />
     395    <Compile Include="M5Regression\Spliting\CorrelationImpuritiyCalculator.cs" />
     396    <Compile Include="M5Regression\Spliting\CorrelationSplitter.cs" />
     397    <Compile Include="M5Regression\Spliting\SplitterBase.cs" />
     398    <Compile Include="M5Regression\Spliting\M5Splitter.cs" />
     399    <Compile Include="M5Regression\Spliting\NeumaierSum.cs" />
    392400    <Compile Include="M5Regression\Spliting\OrderImpurityCalculator.cs" />
    393     <Compile Include="M5Regression\Spliting\OptimumSearchingSpliter.cs" />
    394     <Compile Include="M5Regression\Spliting\M5Spliter.cs" />
     401    <Compile Include="M5Regression\Spliting\UnivariateOnlineLR.cs" />
    395402    <Compile Include="Nca\Initialization\INcaInitializer.cs" />
    396403    <Compile Include="Nca\Initialization\LdaInitializer.cs" />
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Interfaces/ILeafModel.cs

    r15614 r15830  
    2020#endregion
    2121
     22using System.Collections.Generic;
    2223using System.Threading;
    2324using HeuristicLab.Core;
     
    2728  public interface ILeafModel : IParameterizedNamedItem {
    2829    bool ProvidesConfidence { get; }
    29     IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters);
     30    // IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters);
    3031    int MinLeafSize(IRegressionProblemData pd);
     32
     33    void Initialize(IScope states);
     34    void Build(RegressionNodeTreeModel tree, IReadOnlyList<int> trainingRows, IScope stateScope, CancellationToken cancellationToken);
     35    IRegressionModel BuildModel(IReadOnlyList<int> rows, RegressionTreeParameters parameters, CancellationToken cancellation, out int numParams);
    3136  }
    3237}
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Interfaces/IM5Model.cs

    r15614 r15830  
    2222using System.Collections.Generic;
    2323using System.Threading;
     24using HeuristicLab.Core;
     25using HeuristicLab.Optimization;
    2426using HeuristicLab.Problems.DataAnalysis;
    2527
    2628namespace HeuristicLab.Algorithms.DataAnalysis {
    27   internal interface IM5Model : IRegressionModel {
    28     void Build(IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, M5Parameters m5Params, CancellationToken cancellationToken);
    29     void Update(IReadOnlyList<int> rows, M5Parameters m5Parameters, CancellationToken cancellationToken);
     29  public interface IM5Model : IRegressionModel {
     30    void Build(IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, IScope stateScope, ResultCollection results, CancellationToken cancellationToken);
     31    void Update(IReadOnlyList<int> rows, IScope stateScope, CancellationToken cancellationToken);
    3032  }
    3133}
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Interfaces/IPruning.cs

    r15614 r15830  
    2020#endregion
    2121using System.Collections.Generic;
     22using System.Threading;
    2223using HeuristicLab.Core;
    2324using HeuristicLab.Problems.DataAnalysis;
     
    2627  public interface IPruning : IParameterizedNamedItem {
    2728    int MinLeafSize(IRegressionProblemData pd, ILeafModel leafModel);
     29
     30    void Initialize(IScope states);
     31    void Prune(RegressionNodeTreeModel treeModel, IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, IScope scope, CancellationToken cancellationToken);
    2832  }
    2933}
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComplexLeaf.cs

    r15614 r15830  
    3232  [StorableClass]
    3333  [Item("ComplexLeaf", "A leaf type that uses an arbitriary RegressionAlgorithm to create leaf models")]
    34   public class ComplexLeaf : ParameterizedNamedItem, ILeafModel {
     34  public class ComplexLeaf : LeafBase {
    3535    public const string RegressionParameterName = "Regression";
    3636    public IValueParameter<IDataAnalysisAlgorithm<IRegressionProblem>> RegressionParameter {
     
    5555
    5656    #region IModelType
    57     public bool ProvidesConfidence {
     57    public override bool ProvidesConfidence {
    5858      get { return false; }
    5959    }
    60     public IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
     60    public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
    6161      if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model");
    6262      noParameters = pd.Dataset.Rows + 1;
    6363      Regression.Problem = new RegressionProblem {ProblemData = pd};
    64       var res = M5StaticUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken);
     64      var res = RegressionTreeUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken);
    6565      var t = res.Select(x => x.Value).OfType<IRegressionSolution>().FirstOrDefault();
    6666      if (t == null) throw new ArgumentException("No RegressionSolution was provided by the algorithm");
    6767      return t.Model;
    6868    }
    69 
    70     public int MinLeafSize(IRegressionProblemData pd) {
     69    public override int MinLeafSize(IRegressionProblemData pd) {
    7170      return 3;
    7271    }
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ComponentReductionLinearLeaf.cs

    r15614 r15830  
    3434  [StorableClass]
    3535  [Item("ComponentReductionLinearLeaf", "A leaf type that uses principle component analysis to create smaller linear models as leaf models")]
    36   public class ComponentReductionLinearLeaf : ParameterizedNamedItem, ILeafModel {
     36  public class ComponentReductionLinearLeaf : LeafBase {
    3737    public const string NoComponentsParameterName = "NoComponents";
    3838    public IFixedValueParameter<IntValue> NoComponentsParameter {
     
    4545    #region Constructors & Cloning
    4646    [StorableConstructor]
    47     private ComponentReductionLinearLeaf(bool deserializing) : base(deserializing) { }
    48     private ComponentReductionLinearLeaf(ComponentReductionLinearLeaf original, Cloner cloner) : base(original, cloner) { }
     47    protected ComponentReductionLinearLeaf(bool deserializing) : base(deserializing) { }
     48    protected ComponentReductionLinearLeaf(ComponentReductionLinearLeaf original, Cloner cloner) : base(original, cloner) { }
    4949    public ComponentReductionLinearLeaf() {
    5050      Parameters.Add(new FixedValueParameter<IntValue>(NoComponentsParameterName, "The maximum number of principle components used", new IntValue(10)));
     
    5656
    5757    #region IModelType
    58     public bool ProvidesConfidence {
     58    public override bool ProvidesConfidence {
    5959      get { return true; }
    6060    }
    61     public IRegressionModel Build(IRegressionProblemData pd, IRandom random,
     61    public override IRegressionModel Build(IRegressionProblemData pd, IRandom random,
    6262      CancellationToken cancellationToken, out int noParameters) {
    6363      var pca = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, true);
     
    8282    }
    8383
    84     public int MinLeafSize(IRegressionProblemData pd) {
     84    public override int MinLeafSize(IRegressionProblemData pd) {
    8585      return NoComponents + 2;
    8686    }
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/ConstantLeaf.cs

    r15614 r15830  
    3131  [StorableClass]
    3232  [Item("ConstantLeaf", "A leaf type that uses constant models as leaf models")]
    33   public class ConstantLeaf : ParameterizedNamedItem, ILeafModel {
     33  public class ConstantLeaf : LeafBase {
    3434    #region Constructors & Cloning
    3535    [StorableConstructor]
    36     private ConstantLeaf(bool deserializing) : base(deserializing) { }
    37     private ConstantLeaf(ConstantLeaf original, Cloner cloner) : base(original, cloner) { }
     36    protected ConstantLeaf(bool deserializing) : base(deserializing) { }
     37    protected ConstantLeaf(ConstantLeaf original, Cloner cloner) : base(original, cloner) { }
    3838    public ConstantLeaf() { }
    3939    public override IDeepCloneable Clone(Cloner cloner) {
     
    4343
    4444    #region IModelType
    45     public bool ProvidesConfidence {
     45    public override bool ProvidesConfidence {
    4646      get { return false; }
    4747    }
    48     public IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
     48    public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
    4949      if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model");
    5050      noParameters = 1;
     
    5252    }
    5353
    54     public int MinLeafSize(IRegressionProblemData pd) {
     54    public override int MinLeafSize(IRegressionProblemData pd) {
    5555      return 0;
    5656    }
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/GaussianProcessLeaf.cs

    r15614 r15830  
    3333  [StorableClass]
    3434  [Item("GaussianProcessLeaf", "A leaf type that uses gaussian process models as leaf models.")]
    35   public class GaussianProcessLeaf : ParameterizedNamedItem, ILeafModel {
     35  public class GaussianProcessLeaf : LeafBase {
    3636    #region ParameterNames
    3737    public const string TriesParameterName = "Tries";
     
    5959    #region Constructors & Cloning
    6060    [StorableConstructor]
    61     private GaussianProcessLeaf(bool deserializing) : base(deserializing) { }
    62     private GaussianProcessLeaf(GaussianProcessLeaf original, Cloner cloner) : base(original, cloner) { }
     61    protected GaussianProcessLeaf(bool deserializing) : base(deserializing) { }
     62    protected GaussianProcessLeaf(GaussianProcessLeaf original, Cloner cloner) : base(original, cloner) { }
    6363    public GaussianProcessLeaf() {
    6464      var gp = new GaussianProcessRegression();
     
    7575
    7676    #region IModelType
    77     public bool ProvidesConfidence {
     77    public override bool ProvidesConfidence {
    7878      get { return true; }
    7979    }
    80     public IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
     80    public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
    8181      if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a gaussian process model");
    8282      Regression.Problem = new RegressionProblem {ProblemData = pd};
     
    8585
    8686      for (var i = 0; i < Tries; i++) {
    87         var res = M5StaticUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken);
     87        var res = RegressionTreeUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken);
    8888        var t = res.Select(x => x.Value).OfType<GaussianProcessRegressionSolution>().FirstOrDefault();
    8989        var score = ((DoubleValue)res["Negative log pseudo-likelihood (LOO-CV)"].Value).Value;
     
    101101    }
    102102
    103     public int MinLeafSize(IRegressionProblemData pd) {
     103    public override int MinLeafSize(IRegressionProblemData pd) {
    104104      return 3;
    105105    }
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LinearLeaf.cs

    r15614 r15830  
    3131  [StorableClass]
    3232  [Item("LinearLeaf", "A leaf type that uses linear models as leaf models. This is the standard for M5' regression")]
    33   public class LinearLeaf : ParameterizedNamedItem, ILeafModel {
     33  public class LinearLeaf : LeafBase {
    3434    #region Constructors & Cloning
    3535    [StorableConstructor]
    36     private LinearLeaf(bool deserializing) : base(deserializing) { }
    37     private LinearLeaf(LinearLeaf original, Cloner cloner) : base(original, cloner) { }
     36    protected LinearLeaf(bool deserializing) : base(deserializing) { }
     37    protected LinearLeaf(LinearLeaf original, Cloner cloner) : base(original, cloner) { }
    3838    public LinearLeaf() { }
    3939    public override IDeepCloneable Clone(Cloner cloner) {
     
    4343
    4444    #region IModelType
    45     public bool ProvidesConfidence {
     45    public override bool ProvidesConfidence {
    4646      get { return true; }
    4747    }
    48     public IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
     48    public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
    4949      if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model");
    5050      double rmse, cvRmse;
    5151      noParameters = pd.AllowedInputVariables.Count() + 1;
    52       return PreconstructedLinearModel.CreateConfidenceLinearModel(pd, out rmse, out cvRmse);
     52      var res = PreconstructedLinearModel.CreateConfidenceLinearModel(pd, out rmse, out cvRmse);
     53      return res;
    5354    }
    5455
    55     public int MinLeafSize(IRegressionProblemData pd) {
    56       return pd.AllowedInputVariables.Count() + 2;
     56    public override int MinLeafSize(IRegressionProblemData pd) {
     57      return pd.AllowedInputVariables.Count() == 1 ? 2 : pd.AllowedInputVariables.Count() + 2;
    5758    }
    5859    #endregion
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/LeafTypes/LogisticLeaf.cs

    r15614 r15830  
    3333  [StorableClass]
    3434  [Item("LogisticLeaf", "A leaf type that uses linear models with a logistic dampening as leaf models. Dampening reduces prediction values far outside the observed target values.")]
    35   public class LogisticLeaf : ParameterizedNamedItem, ILeafModel {
     35  public class LogisticLeaf : LeafBase {
    3636    private const string DampeningParameterName = "Dampening";
    3737    public IFixedValueParameter<DoubleValue> DampeningParameter {
     
    5555
    5656    #region IModelType
    57     public bool ProvidesConfidence {
     57    public override bool ProvidesConfidence {
    5858      get { return true; }
    5959    }
    60     public IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
    61       if (pd.Dataset.Rows < MinLeafSize(pd)) throw new ArgumentException("The number of training instances is too small to create a linear model");
    62       double rmse, cvRmse;
    63       noParameters = pd.AllowedInputVariables.Count() + 1;
    64       return new DampenedLinearModel(PreconstructedLinearModel.CreateConfidenceLinearModel(pd, out rmse, out cvRmse), pd, Dampening);
     60    public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters) {
     61      var res = (IConfidenceRegressionModel)new LinearLeaf().Build(pd, random, cancellationToken, out noParameters);
     62      return new DampenedLinearModel(res, pd, Dampening);
    6563    }
    6664
    67     public int MinLeafSize(IRegressionProblemData pd) {
     65    public override int MinLeafSize(IRegressionProblemData pd) {
    6866      return pd.AllowedInputVariables.Count() + 2;
    6967    }
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Regression.cs

    r15614 r15830  
    1919  [Item("M5RegressionTree", "A M5 regression tree / rule set")]
    2020  public sealed class M5Regression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
     21    public override bool SupportsPause {
     22      get { return true; }
     23    }
     24
     25    public const string RegressionTreeParameterVariableName = "RegressionTreeParameters";
     26    public const string ModelVariableName = "Model";
     27    public const string PruningSetVariableName = "PruningSet";
     28    public const string TrainingSetVariableName = "TrainingSet";
     29
    2130    #region Parametername
    2231    private const string GenerateRulesParameterName = "GenerateRules";
    2332    private const string HoldoutSizeParameterName = "HoldoutSize";
    24     private const string SpliterParameterName = "Spliter";
     33    private const string SpliterParameterName = "Splitter";
    2534    private const string MinimalNodeSizeParameterName = "MinimalNodeSize";
    2635    private const string LeafModelParameterName = "LeafModel";
     
    3847      get { return (IFixedValueParameter<PercentValue>)Parameters[HoldoutSizeParameterName]; }
    3948    }
    40     public IConstrainedValueParameter<ISpliter> ImpurityParameter {
    41       get { return (IConstrainedValueParameter<ISpliter>)Parameters[SpliterParameterName]; }
     49    public IConstrainedValueParameter<ISplitter> ImpurityParameter {
     50      get { return (IConstrainedValueParameter<ISplitter>)Parameters[SpliterParameterName]; }
    4251    }
    4352    public IFixedValueParameter<IntValue> MinimalNodeSizeParameter {
     
    6877      get { return HoldoutSizeParameter.Value.Value; }
    6978    }
    70     public ISpliter Split {
     79    public ISplitter Splitter {
    7180      get { return ImpurityParameter.Value; }
    7281    }
     
    8998      get { return UseHoldoutParameter.Value.Value; }
    9099    }
     100    #endregion
     101
     102    #region State
     103    [Storable]
     104    private IScope stateScope;
    91105    #endregion
    92106
     
    94108    [StorableConstructor]
    95109    private M5Regression(bool deserializing) : base(deserializing) { }
    96     private M5Regression(M5Regression original, Cloner cloner) : base(original, cloner) { }
     110    private M5Regression(M5Regression original, Cloner cloner) : base(original, cloner) {
     111      stateScope = cloner.Clone(stateScope);
     112    }
    97113    public M5Regression() {
    98114      var modelSet = new ItemSet<ILeafModel>(ApplicationManager.Manager.GetInstances<ILeafModel>());
    99115      var pruningSet = new ItemSet<IPruning>(ApplicationManager.Manager.GetInstances<IPruning>());
    100       var impuritySet = new ItemSet<ISpliter>(ApplicationManager.Manager.GetInstances<ISpliter>());
     116      var impuritySet = new ItemSet<ISplitter>(ApplicationManager.Manager.GetInstances<ISplitter>());
    101117      Parameters.Add(new FixedValueParameter<BoolValue>(GenerateRulesParameterName, "Whether a set of rules or a decision tree shall be created", new BoolValue(false)));
    102118      Parameters.Add(new FixedValueParameter<PercentValue>(HoldoutSizeParameterName, "How much of the training set shall be reserved for pruning", new PercentValue(0.2)));
    103       Parameters.Add(new ConstrainedValueParameter<ISpliter>(SpliterParameterName, "The type of split function used to create node splits", impuritySet, impuritySet.OfType<M5Spliter>().First()));
     119      Parameters.Add(new ConstrainedValueParameter<ISplitter>(SpliterParameterName, "The type of split function used to create node splits", impuritySet, impuritySet.OfType<M5Splitter>().First()));
    104120      Parameters.Add(new FixedValueParameter<IntValue>(MinimalNodeSizeParameterName, "The minimal number of samples in a leaf node", new IntValue(1)));
    105121      Parameters.Add(new ConstrainedValueParameter<ILeafModel>(LeafModelParameterName, "The type of model used for the nodes", modelSet, modelSet.OfType<LinearLeaf>().First()));
    106       Parameters.Add(new ConstrainedValueParameter<IPruning>(PruningTypeParameterName, "The type of pruning used", pruningSet, pruningSet.OfType<M5LinearBottomUpPruning>().First()));
     122      Parameters.Add(new ConstrainedValueParameter<IPruning>(PruningTypeParameterName, "The type of pruning used", pruningSet, pruningSet.OfType<ComplexityPruning>().First()));
    107123      Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0)));
    108124      Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true)));
     
    115131    #endregion
    116132
    117     protected override void Run(CancellationToken cancellationToken) {
     133    protected override void Initialize(CancellationToken cancellationToken) {
     134      base.Initialize(cancellationToken);
    118135      var random = new MersenneTwister();
    119136      if (SetSeedRandomly) SeedParameter.Value.Value = new System.Random().Next();
    120137      random.Reset(Seed);
    121       var solution = CreateM5RegressionSolution(Problem.ProblemData, random, LeafModel, Split, Pruning, UseHoldout, HoldoutSize, MinimalNodeSize, GenerateRules, Results, cancellationToken);
    122       AnalyzeSolution(solution);
     138      stateScope = InitializeScope(random, Problem.ProblemData, Pruning, MinimalNodeSize, LeafModel, Splitter, GenerateRules, UseHoldout, HoldoutSize);
     139      stateScope.Variables.Add(new Variable("Algorithm", this));
     140      Results.AddOrUpdateResult("StateScope", stateScope);
     141    }
     142
     143    protected override void Run(CancellationToken cancellationToken) {
     144      var model = Build(stateScope, Results, cancellationToken);
     145      AnalyzeSolution(model.CreateRegressionSolution(Problem.ProblemData), Results, Problem.ProblemData);
    123146    }
    124147
    125148    #region Static Interface
    126     public static IRegressionSolution CreateM5RegressionSolution(IRegressionProblemData problemData, IRandom random,
    127       ILeafModel leafModel = null, ISpliter spliter = null, IPruning pruning = null,
    128       bool useHoldout = false, double holdoutSize = 0.2, int minNumInstances = 4, bool generateRules = false, ResultCollection results = null, CancellationToken? cancellationToken = null) {
    129       //set default values
     149    public static IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData, IRandom random, ILeafModel leafModel = null, ISplitter splitter = null, IPruning pruning = null,
     150      bool useHoldout = false, double holdoutSize = 0.2, int minimumLeafSize = 4, bool generateRules = false, ResultCollection results = null, CancellationToken? cancellationToken = null) {
    130151      if (leafModel == null) leafModel = new LinearLeaf();
    131       if (spliter == null) spliter = new M5Spliter();
     152      if (splitter == null) splitter = new M5Splitter();
    132153      if (cancellationToken == null) cancellationToken = CancellationToken.None;
    133       if (pruning == null) pruning = new M5LeafBottomUpPruning();
    134 
     154      if (pruning == null) pruning = new ComplexityPruning();
     155
     156      var stateScope = InitializeScope(random, problemData, pruning, minimumLeafSize, leafModel, splitter, generateRules, useHoldout, holdoutSize);
     157      var model = Build(stateScope, results, cancellationToken.Value);
     158      return model.CreateRegressionSolution(problemData);
     159    }
     160
     161    public static void UpdateModel(IM5Model model, IRegressionProblemData problemData, IRandom random, ILeafModel leafModel, CancellationToken? cancellationToken = null) {
     162      if (cancellationToken == null) cancellationToken = CancellationToken.None;
     163      var regressionTreeParameters = new RegressionTreeParameters(leafModel, problemData, random);
     164      var scope = new Scope();
     165      scope.Variables.Add(new Variable(RegressionTreeParameterVariableName, regressionTreeParameters));
     166      leafModel.Initialize(scope);
     167      model.Update(problemData.TrainingIndices.ToList(), scope, cancellationToken.Value);
     168    }
     169    #endregion
     170
     171    #region Helpers
     172    private static IScope InitializeScope(IRandom random, IRegressionProblemData problemData, IPruning pruning, int minLeafSize, ILeafModel leafModel, ISplitter splitter, bool GenerateRules, bool useHoldout, double holdoutSize) {
     173      var stateScope = new Scope("RegressionTreeStateScope");
     174
     175      //reduce RegressionProblemData to AllowedInput & Target column wise and to TrainingSet row wise
    135176      var doubleVars = new HashSet<string>(problemData.Dataset.DoubleVariables);
    136177      var vars = problemData.AllowedInputVariables.Concat(new[] {problemData.TargetVariable}).ToArray();
    137178      if (vars.Any(v => !doubleVars.Contains(v))) throw new NotSupportedException("M5 regression supports only double valued input or output features.");
    138 
    139       var values = vars.Select(v => problemData.Dataset.GetDoubleValues(v, problemData.TrainingIndices).ToArray()).ToArray();
    140       if (values.Any(v => v.Any(x => double.IsNaN(x) || double.IsInfinity(x))))
     179      var doubles = vars.Select(v => problemData.Dataset.GetDoubleValues(v, problemData.TrainingIndices).ToArray()).ToArray();
     180      if (doubles.Any(v => v.Any(x => double.IsNaN(x) || double.IsInfinity(x))))
    141181        throw new NotSupportedException("M5 regression does not support NaN or infinity values in the input dataset.");
    142 
    143       var trainingData = new Dataset(vars, values);
     182      var trainingData = new Dataset(vars, doubles);
    144183      var pd = new RegressionProblemData(trainingData, problemData.AllowedInputVariables, problemData.TargetVariable);
    145184      pd.TrainingPartition.End = pd.TestPartition.Start = pd.TestPartition.End = pd.Dataset.Rows;
    146185      pd.TrainingPartition.Start = 0;
    147186
    148       //create & build Model
    149       var m5Params = new M5Parameters(pruning, minNumInstances, leafModel, pd, random, spliter, results);
    150 
    151       IReadOnlyList<int> trainingRows, pruningRows;
    152       GeneratePruningSet(problemData.TrainingIndices.ToArray(), random, useHoldout, holdoutSize, out trainingRows, out pruningRows);
    153 
    154       IM5Model model;
    155       if (generateRules)
    156         model = M5RuleSetModel.CreateRuleModel(problemData.TargetVariable, m5Params);
    157       else
    158         model = M5TreeModel.CreateTreeModel(problemData.TargetVariable, m5Params);
    159 
    160       model.Build(trainingRows, pruningRows, m5Params, cancellationToken.Value);
    161       return model.CreateRegressionSolution(problemData);
    162     }
    163 
    164     public static void UpdateM5Model(IRegressionModel model, IRegressionProblemData problemData, IRandom random,
    165       ILeafModel leafModel, CancellationToken? cancellationToken = null) {
    166       var m5Model = model as IM5Model;
    167       if (m5Model == null) throw new ArgumentException("This type of model can not be updated");
    168       UpdateM5Model(m5Model, problemData, random, leafModel, cancellationToken);
    169     }
    170 
    171     private static void UpdateM5Model(IM5Model model, IRegressionProblemData problemData, IRandom random,
    172       ILeafModel leafModel = null, CancellationToken? cancellationToken = null) {
    173       if (cancellationToken == null) cancellationToken = CancellationToken.None;
    174       var m5Params = new M5Parameters(leafModel, problemData, random);
    175       model.Update(problemData.TrainingIndices.ToList(), m5Params, cancellationToken.Value);
    176     }
    177     #endregion
    178 
    179     #region Helpers
     187      //store regression tree parameters
     188      var regressionTreeParams = new RegressionTreeParameters(pruning, minLeafSize, leafModel, pd, random, splitter);
     189      stateScope.Variables.Add(new Variable(RegressionTreeParameterVariableName, regressionTreeParams));
     190
     191      //initialize tree operators
     192      pruning.Initialize(stateScope);
     193      splitter.Initialize(stateScope);
     194      leafModel.Initialize(stateScope);
     195
     196      //store unbuilt model
     197      IItem model;
     198      if (GenerateRules) {
     199        model = RegressionRuleSetModel.CreateRuleModel(problemData.TargetVariable, regressionTreeParams);
     200        RegressionRuleSetModel.Initialize(stateScope);
     201      }
     202      else {
     203        model = RegressionNodeTreeModel.CreateTreeModel(problemData.TargetVariable, regressionTreeParams);
     204      }
     205      stateScope.Variables.Add(new Variable(ModelVariableName, model));
     206
     207      //store training & pruning indices
     208      IReadOnlyList<int> trainingSet, pruningSet;
     209      GeneratePruningSet(pd.TrainingIndices.ToArray(), random, useHoldout, holdoutSize, out trainingSet, out pruningSet);
     210      stateScope.Variables.Add(new Variable(TrainingSetVariableName, new IntArray(trainingSet.ToArray())));
     211      stateScope.Variables.Add(new Variable(PruningSetVariableName, new IntArray(pruningSet.ToArray())));
     212
     213      return stateScope;
     214    }
     215
     216    private static IRegressionModel Build(IScope stateScope, ResultCollection results, CancellationToken cancellationToken) {
     217      var model = (IM5Model)stateScope.Variables[ModelVariableName].Value;
     218      var trainingRows = (IntArray)stateScope.Variables[TrainingSetVariableName].Value;
     219      var pruningRows = (IntArray)stateScope.Variables[PruningSetVariableName].Value;
     220      model.Build(trainingRows.ToArray(), pruningRows.ToArray(), stateScope, results, cancellationToken);
     221      return model;
     222    }
     223
    180224    private static void GeneratePruningSet(IReadOnlyList<int> allrows, IRandom random, bool useHoldout, double holdoutSize, out IReadOnlyList<int> training, out IReadOnlyList<int> pruning) {
    181225      if (!useHoldout) {
     
    190234    }
    191235
    192     private void AnalyzeSolution(IRegressionSolution solution) {
    193       Results.Add(new Result("RegressionSolution", (IItem)solution.Clone()));
    194 
    195       Dictionary<string, int> frequencies;
    196       if (!GenerateRules) {
    197         Results.Add(M5Analyzer.CreateLeafDepthHistogram((M5TreeModel)solution.Model));
    198         frequencies = M5Analyzer.GetTreeVariableFrequences((M5TreeModel)solution.Model);
    199       }
    200       else {
    201         Results.Add(M5Analyzer.CreateRulesResult((M5RuleSetModel)solution.Model, Problem.ProblemData, "M5TreeResult", true));
    202         frequencies = M5Analyzer.GetRuleVariableFrequences((M5RuleSetModel)solution.Model);
    203         Results.Add(M5Analyzer.CreateCoverageDiagram((M5RuleSetModel)solution.Model, Problem.ProblemData));
     236    private void AnalyzeSolution(IRegressionSolution solution, ResultCollection results, IRegressionProblemData problemData) {
     237      results.Add(new Result("RegressionSolution", (IItem)solution.Clone()));
     238
     239      Dictionary<string, int> frequencies = null;
     240
     241      var tree = solution.Model as RegressionNodeTreeModel;
     242      if (tree != null) {
     243        results.Add(RegressionTreeAnalyzer.CreateLeafDepthHistogram(tree));
     244        frequencies = RegressionTreeAnalyzer.GetTreeVariableFrequences(tree);
     245        RegressionTreeAnalyzer.AnalyzeNodes(tree, results, problemData);
     246      }
     247
     248      var ruleSet = solution.Model as RegressionRuleSetModel;
     249      if (ruleSet != null) {
     250        results.Add(RegressionTreeAnalyzer.CreateRulesResult(ruleSet, problemData, "M5Rules", true));
     251        frequencies = RegressionTreeAnalyzer.GetRuleVariableFrequences(ruleSet);
     252        results.Add(RegressionTreeAnalyzer.CreateCoverageDiagram(ruleSet, problemData));
    204253      }
    205254
    206255      //Variable frequencies
    207       var sum = frequencies.Values.Sum();
    208       sum = sum == 0 ? 1 : sum;
    209       var impactArray = new DoubleArray(frequencies.Select(i => (double)i.Value / sum).ToArray()) {
    210         ElementNames = frequencies.Select(i => i.Key)
    211       };
    212       Results.Add(new Result("Variable Frequences", "relative frequencies of variables in rules and tree nodes", impactArray));
     256      if (frequencies != null) {
     257        var sum = frequencies.Values.Sum();
     258        sum = sum == 0 ? 1 : sum;
     259        var impactArray = new DoubleArray(frequencies.Select(i => (double)i.Value / sum).ToArray()) {
     260          ElementNames = frequencies.Select(i => i.Key)
     261        };
     262        results.Add(new Result("Variable Frequences", "relative frequencies of variables in rules and tree nodes", impactArray));
     263      }
     264
     265      var pruning = Pruning as ComplexityPruning;
     266      if (pruning != null && tree != null)
     267        RegressionTreeAnalyzer.PruningChart(tree, pruning, results);
    213268    }
    214269    #endregion
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Pruning/NoPruning.cs

    r15614 r15830  
    2121
    2222using System.Collections.Generic;
     23using System.Threading;
    2324using HeuristicLab.Common;
    2425using HeuristicLab.Core;
     
    3839      return new NoPruning(this, cloner);
    3940    }
     41    #endregion
     42
     43
    4044    public int MinLeafSize(IRegressionProblemData pd, ILeafModel leafModel) {
    4145      return 0;
    4246    }
    43     #endregion
     47    public void Initialize(IScope states) { }
     48
     49    public void Prune(RegressionNodeTreeModel treeModel, IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, IScope scope, CancellationToken cancellationToken) { }
    4450  }
    4551}
  • branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/OrderImpurityCalculator.cs

    r15614 r15830  
    2828  /// <summary>
    2929  /// Helper class for incremental split calculation.
    30   /// Used while moving a potential Spliter along the ordered training Instances
     30  /// Used while moving a potential Splitter along the ordered training Instances
    3131  /// </summary>
    3232  internal class OrderImpurityCalculator {
     
    105105      VarRight = NoRight <= 0 ? 0 : Math.Abs(NoRight * SqSumRight - SumRight * SumRight) / (NoRight * NoRight);
    106106
    107       if (Order <= 0) throw new ArgumentException("Spliter order must be larger than 0");
     107      if (Order <= 0) throw new ArgumentException("Splitter order must be larger than 0");
    108108      if (Order.IsAlmost(1)) {
    109109        y = VarTotal;
     
    116116        yr = Math.Pow(VarRight, 1.0 / Order);
    117117      }
    118       var t = NoRight + NoLeft;
    119       if (NoLeft <= 0.0 || NoRight <= 0.0) Impurity = double.MinValue; //Spliter = 0;
    120       else Impurity = y - NoLeft / t * yl - NoRight / t * yr; //  Spliter = y - NoLeft / NoRight * yl - NoRight / NoLeft * yr
     118      if (NoLeft <= 0.0 || NoRight <= 0.0) Impurity = double.MinValue; //Splitter = 0;
     119      else Impurity = y - (NoLeft * yl + NoRight * yr) / (NoRight + NoLeft);
    121120    }
    122121    #endregion
Note: See TracChangeset for help on using the changeset viewer.