Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
06/28/16 13:33:17 (9 years ago)
Author:
mkommend
Message:

#2604:

  • Base classes for data analysis, classification, and regression models
  • Added target variable to classification and regression models
  • Switched parameter order in data analysis solutions (model, problemdata)
Location:
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4
Files:
3 added
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj

    r13766 r13941  
    132132    <Compile Include="Implementation\Classification\ConstantClassificationSolution.cs" />
    133133    <Compile Include="Implementation\Classification\DiscriminantFunctionClassificationSolutionBase.cs" />
     134    <Compile Include="Implementation\Classification\ClassificationModel.cs" />
    134135    <Compile Include="Implementation\Clustering\ClusteringProblem.cs" />
    135136    <Compile Include="Implementation\Clustering\ClusteringProblemData.cs" />
    136137    <Compile Include="Implementation\Clustering\ClusteringSolution.cs" />
    137138    <Compile Include="Implementation\ConstantModel.cs" />
     139    <Compile Include="Implementation\DataAnalysisModel.cs" />
    138140    <Compile Include="Implementation\Regression\ConstantRegressionModel.cs" />
    139141    <Compile Include="Implementation\Regression\ConstantRegressionSolution.cs" />
     
    143145    </Compile>
    144146    <Compile Include="Implementation\Regression\RegressionEnsembleSolution.cs" />
     147    <Compile Include="Implementation\Regression\RegressionModel.cs" />
    145148    <Compile Include="Implementation\Regression\RegressionSolutionVariableImpactsCalculator.cs" />
    146149    <Compile Include="Implementation\TimeSeriesPrognosis\Models\ConstantTimeSeriesPrognosisModel.cs" />
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationEnsembleModel.cs

    r13921 r13941  
    3232  [StorableClass]
    3333  [Item("ClassificationEnsembleModel", "A classification model that contains an ensemble of multiple classification models")]
    34   public class ClassificationEnsembleModel : NamedItem, IClassificationEnsembleModel {
    35     public IEnumerable<string> VariablesUsedForPrediction {
     34  public class ClassificationEnsembleModel : ClassificationModel, IClassificationEnsembleModel {
     35    public override IEnumerable<string> VariablesUsedForPrediction {
    3636      get { return models.SelectMany(x => x.VariablesUsedForPrediction).Distinct().OrderBy(x => x); }
    37     }
    38 
    39     public string TargetVariable {
    40       get { return models.First().TargetVariable; }
    4137    }
    4238
     
    5652    public ClassificationEnsembleModel() : this(Enumerable.Empty<IClassificationModel>()) { }
    5753    public ClassificationEnsembleModel(IEnumerable<IClassificationModel> models)
    58       : base() {
     54      : base(string.Empty) {
    5955      this.name = ItemName;
    6056      this.description = ItemDescription;
    6157      this.models = new List<IClassificationModel>(models);
     58
     59      if (this.models.Any()) this.TargetVariable = this.models.First().TargetVariable;
    6260    }
    6361
     
    6664    }
    6765
    68     #region IClassificationEnsembleModel Members
    6966    public void Add(IClassificationModel model) {
     67      if (string.IsNullOrEmpty(TargetVariable)) TargetVariable = model.TargetVariable;
    7068      models.Add(model);
    7169    }
    7270    public void Remove(IClassificationModel model) {
    7371      models.Remove(model);
     72      if (!models.Any()) TargetVariable = string.Empty;
    7473    }
    7574
     
    8584    }
    8685
    87     #endregion
    8886
    89     #region IClassificationModel Members
    90 
    91     public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
     87    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    9288      foreach (var estimatedValuesVector in GetEstimatedClassValueVectors(dataset, rows)) {
    9389        // return the class which is most often occuring
     
    10197    }
    10298
    103     IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) {
     99    public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
    104100      return new ClassificationEnsembleSolution(models, new ClassificationEnsembleProblemData(problemData));
    105101    }
    106     #endregion
     102
     103
    107104  }
    108105}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/DiscriminantFunctionClassificationModel.cs

    r13921 r13941  
    3333  [StorableClass]
    3434  [Item("DiscriminantFunctionClassificationModel", "Represents a classification model that uses a discriminant function and classification thresholds.")]
    35   public class DiscriminantFunctionClassificationModel : NamedItem, IDiscriminantFunctionClassificationModel {
    36     public IEnumerable<string> VariablesUsedForPrediction {
     35  public class DiscriminantFunctionClassificationModel : ClassificationModel, IDiscriminantFunctionClassificationModel {
     36    public override IEnumerable<string> VariablesUsedForPrediction {
    3737      get { return model.VariablesUsedForPrediction; }
    3838    }
    39 
    40     public string TargetVariable { get { return model.TargetVariable; } }
    4139
    4240    [Storable]
     
    7977
    8078    public DiscriminantFunctionClassificationModel(IRegressionModel model, IDiscriminantFunctionThresholdCalculator thresholdCalculator)
    81       : base() {
     79      : base(model.TargetVariable) {
    8280      this.name = ItemName;
    8381      this.description = ItemDescription;
     82
    8483      this.model = model;
    8584      this.classValues = new double[0];
     
    121120    }
    122121
    123     public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
     122    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    124123      if (!Thresholds.Any() && !ClassValues.Any()) throw new ArgumentException("No thresholds and class values were set for the current classification model.");
    125124      foreach (var x in GetEstimatedValues(dataset, rows)) {
     
    141140    #endregion
    142141
    143     public virtual IDiscriminantFunctionClassificationSolution CreateDiscriminantFunctionClassificationSolution(IClassificationProblemData problemData) {
     142    public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
     143      return CreateDiscriminantFunctionClassificationSolution(problemData);
     144    }
     145    public virtual IDiscriminantFunctionClassificationSolution CreateDiscriminantFunctionClassificationSolution(
     146      IClassificationProblemData problemData) {
    144147      return new DiscriminantFunctionClassificationSolution(this, new ClassificationProblemData(problemData));
    145     }
    146 
    147     public virtual IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
    148       return CreateDiscriminantFunctionClassificationSolution(problemData);
    149148    }
    150149  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/ConstantModel.cs

    r13921 r13941  
    3131  [StorableClass]
    3232  [Item("Constant Model", "A model that always returns the same constant value regardless of the presented input data.")]
    33   public class ConstantModel : NamedItem, IRegressionModel, IClassificationModel, ITimeSeriesPrognosisModel, IStringConvertibleValue {
    34     public IEnumerable<string> VariablesUsedForPrediction { get { return Enumerable.Empty<string>(); } }
     33  public class ConstantModel : RegressionModel, IClassificationModel, ITimeSeriesPrognosisModel, IStringConvertibleValue {
     34    public override IEnumerable<string> VariablesUsedForPrediction { get { return Enumerable.Empty<string>(); } }
    3535
    36     [Storable]
    37     private readonly string targetVariable;
    38     public string TargetVariable {
    39       get { return targetVariable; }
    40     }
    4136
    4237    [Storable]
     
    5247      : base(original, cloner) {
    5348      this.constant = original.constant;
    54       this.targetVariable = original.targetVariable;
    5549    }
    5650
     
    5852
    5953    public ConstantModel(double constant, string targetVariable = "Target")
    60       : base() {
     54      : base(targetVariable) {
    6155      this.name = ItemName;
    6256      this.description = ItemDescription;
    6357      this.constant = constant;
    6458      this.ReadOnly = true; // changing a constant regression model is not supported
    65       this.targetVariable = targetVariable;
    6659    }
    6760
    68     public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
     61    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    6962      return rows.Select(row => Constant);
    7063    }
     
    7669    }
    7770
    78     public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
     71    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
    7972      return new ConstantRegressionSolution(this, new RegressionProblemData(problemData));
    8073    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/ConstantRegressionModel.cs

    r13921 r13941  
    3232  [Item("Constant Regression Model", "A model that always returns the same constant value regardless of the presented input data.")]
    3333  [Obsolete]
    34   public class ConstantRegressionModel : NamedItem, IRegressionModel, IStringConvertibleValue {
    35     public IEnumerable<string> VariablesUsedForPrediction { get { return Enumerable.Empty<string>(); } }
    36 
    37     [Storable]
    38     private readonly string targetVariable;
    39     public string TargetVariable {
    40       get { return targetVariable; }
    41     }
     34  public class ConstantRegressionModel : RegressionModel, IStringConvertibleValue {
     35    public override IEnumerable<string> VariablesUsedForPrediction { get { return Enumerable.Empty<string>(); } }
    4236
    4337    [Storable]
     
    5347      : base(original, cloner) {
    5448      this.constant = original.constant;
    55       this.targetVariable = original.targetVariable;
    5649    }
    5750
     
    5952
    6053    public ConstantRegressionModel(double constant, string targetVariable = "Target")
    61       : base() {
     54      : base(targetVariable) {
    6255      this.name = ItemName;
    6356      this.description = ItemDescription;
    6457      this.constant = constant;
    6558      this.ReadOnly = true; // changing a constant regression model is not supported
    66       this.targetVariable = targetVariable;
    6759    }
    6860
    69     public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
     61    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    7062      return rows.Select(row => Constant);
    7163    }
    7264
    73     public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
     65    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
    7466      return new ConstantRegressionSolution(new ConstantModel(constant), new RegressionProblemData(problemData));
    7567    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionEnsembleModel.cs

    r13921 r13941  
    3333  [StorableClass]
    3434  [Item("RegressionEnsembleModel", "A regression model that contains an ensemble of multiple regression models")]
    35   public sealed class RegressionEnsembleModel : NamedItem, IRegressionEnsembleModel {
    36     public IEnumerable<string> VariablesUsedForPrediction {
     35  public sealed class RegressionEnsembleModel : RegressionModel, IRegressionEnsembleModel {
     36    public override IEnumerable<string> VariablesUsedForPrediction {
    3737      get { return models.SelectMany(x => x.VariablesUsedForPrediction).Distinct().OrderBy(x => x); }
    3838    }
     
    4141    public IEnumerable<IRegressionModel> Models {
    4242      get { return new List<IRegressionModel>(models); }
    43     }
    44 
    45     [Storable]
    46     private readonly string target;
    47     public string TargetVariable {
    48       get { return models.First().TargetVariable; }
    4943    }
    5044
     
    109103    public RegressionEnsembleModel(IEnumerable<IRegressionModel> models) : this(models, models.Select(m => 1.0)) { }
    110104    public RegressionEnsembleModel(IEnumerable<IRegressionModel> models, IEnumerable<double> modelWeights)
    111       : base() {
     105      : base(string.Empty) {
    112106      this.name = ItemName;
    113107      this.description = ItemDescription;
    114108
    115 
    116109      this.models = new List<IRegressionModel>(models);
    117110      this.modelWeights = new List<double>(modelWeights);
     111
     112      if (this.models.Any()) this.TargetVariable = this.models.First().TargetVariable;
    118113    }
    119114
    120115    public void Add(IRegressionModel model) {
     116      if (string.IsNullOrEmpty(TargetVariable)) TargetVariable = model.TargetVariable;
    121117      Add(model, 1.0);
    122118    }
    123119    public void Add(IRegressionModel model, double weight) {
     120      if (string.IsNullOrEmpty(TargetVariable)) TargetVariable = model.TargetVariable;
     121
    124122      models.Add(model);
    125123      modelWeights.Add(weight);
     
    131129    }
    132130    public void AddRange(IEnumerable<IRegressionModel> models, IEnumerable<double> weights) {
     131      if (string.IsNullOrEmpty(TargetVariable)) TargetVariable = models.First().TargetVariable;
     132
    133133      this.models.AddRange(models);
    134134      modelWeights.AddRange(weights);
     
    140140      models.RemoveAt(index);
    141141      modelWeights.RemoveAt(index);
     142
     143      if (!models.Any()) TargetVariable = string.Empty;
    142144      OnChanged();
    143145    }
     
    148150        modelWeights.RemoveAt(index);
    149151      }
     152
     153      if (!models.Any()) TargetVariable = string.Empty;
    150154      OnChanged();
    151155    }
     
    174178    }
    175179
    176     public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
     180    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    177181      double weightsSum = modelWeights.Sum();
    178182      var summedEstimates = from estimatedValuesVector in GetEstimatedValueVectors(dataset, rows)
     
    222226
    223227
    224     public RegressionEnsembleSolution CreateRegressionSolution(IRegressionProblemData problemData) {
     228    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
    225229      return new RegressionEnsembleSolution(this, new RegressionEnsembleProblemData(problemData));
    226     }
    227     IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
    228       return CreateRegressionSolution(problemData);
    229230    }
    230231  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/TimeSeriesPrognosis/Models/TimeSeriesPrognosisAutoRegressiveModel.cs

    r13921 r13941  
    3030  [StorableClass]
    3131  [Item("Autoregressive TimeSeries Model", "A linear autoregressive time series model used to predict future values.")]
    32   public class TimeSeriesPrognosisAutoRegressiveModel : NamedItem, ITimeSeriesPrognosisModel {
    33     public IEnumerable<string> VariablesUsedForPrediction {
     32  public class TimeSeriesPrognosisAutoRegressiveModel : RegressionModel, ITimeSeriesPrognosisModel {
     33    public override IEnumerable<string> VariablesUsedForPrediction {
    3434      get { return Enumerable.Empty<string>(); } // what to return here?
    3535    }
     
    3939    [Storable]
    4040    public double Constant { get; private set; }
    41     [Storable]
    42     public string TargetVariable { get; private set; }
    4341
    4442    public int TimeOffset { get { return Phi.Length; } }
     
    5048      this.Phi = (double[])original.Phi.Clone();
    5149      this.Constant = original.Constant;
    52       this.TargetVariable = original.TargetVariable;
    5350    }
    5451    public override IDeepCloneable Clone(Cloner cloner) {
     
    5653    }
    5754    public TimeSeriesPrognosisAutoRegressiveModel(string targetVariable, double[] phi, double constant)
    58       : base("AR(1) Model") {
     55      : base(targetVariable, "AR(1) Model") {
    5956      Phi = (double[])phi.Clone();
    6057      Constant = constant;
    61       TargetVariable = targetVariable;
    6258    }
    6359
     
    9591    }
    9692
    97     public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
     93    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    9894      var targetVariables = dataset.GetReadOnlyDoubleValues(TargetVariable);
    9995      foreach (int row in rows) {
     
    115111      return new TimeSeriesPrognosisSolution(this, new TimeSeriesPrognosisProblemData(problemData));
    116112    }
    117     public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
     113    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
    118114      throw new NotSupportedException();
    119115    }
Note: See TracChangeset for help on using the changeset viewer.