Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs @ 12053

Last change on this file since 12053 was 12012, checked in by ascheibe, 10 years ago

#2212 merged r12008, r12009, r12010 back into trunk

File size: 4.8 KB
RevLine 
[10310]1#region License Information
2/* HeuristicLab
[12012]3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[10310]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[10383]22using System;
[10536]23using System.Collections.Generic;
[11382]24using System.Linq;
[10982]25using HeuristicLab.Common;
[10310]26using HeuristicLab.Problems.DataAnalysis;
27
28namespace HeuristicLab.DataPreprocessing {
[10908]29  public class ProblemDataCreator {
[10310]30
31    private readonly IPreprocessingContext context;
32
[10695]33    private Dataset ExportedDataset {
[11418]34      get {
35        return context.Data.ExportToDataset();
36      }
[10695]37    }
38
[10922]39    private IList<ITransformation> Transformations { get { return context.Data.Transformations; } }
[10695]40
[10383]41    public ProblemDataCreator(IPreprocessingContext context) {
[10310]42      this.context = context;
43    }
44
[10383]45    public IDataAnalysisProblemData CreateProblemData() {
[11098]46      if (context.Data.Rows == 0 || context.Data.Columns == 0) return null;
47
[10990]48      var oldProblemData = context.ProblemData;
49      IDataAnalysisProblemData problemData;
[10310]50
[10536]51      if (oldProblemData is RegressionProblemData) {
[10695]52        problemData = CreateRegressionData((RegressionProblemData)oldProblemData);
[10536]53      } else if (oldProblemData is ClassificationProblemData) {
[10695]54        problemData = CreateClassificationData((ClassificationProblemData)oldProblemData);
[10536]55      } else if (oldProblemData is ClusteringProblemData) {
[10695]56        problemData = CreateClusteringData((ClusteringProblemData)oldProblemData);
[10536]57      } else {
58        throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported.");
[10383]59      }
60
[10536]61      SetTrainingAndTestPartition(problemData);
[11382]62      // set the input variables to the correct checked state
63      var inputVariables = problemData.InputVariables.ToDictionary(x => x.Value, x => x);
64      foreach (var variable in oldProblemData.InputVariables) {
65        bool @checked = oldProblemData.InputVariables.ItemChecked(variable);
66        problemData.InputVariables.SetItemCheckedState(inputVariables[variable.Value], @checked);
67      }
[10536]68
[10383]69      return problemData;
70    }
71
[10695]72    private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) {
[10536]73      var targetVariable = oldProblemData.TargetVariable;
74      // target variable must be double and must exist in the new dataset
[10982]75      return new RegressionProblemData(ExportedDataset, GetDoubleInputVariables(targetVariable), targetVariable, Transformations);
[10536]76    }
[10310]77
[10695]78    private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) {
[10536]79      var targetVariable = oldProblemData.TargetVariable;
80      // target variable must be double and must exist in the new dataset
[10982]81      return new ClassificationProblemData(ExportedDataset, GetDoubleInputVariables(targetVariable), targetVariable, Transformations);
[10536]82    }
[10383]83
[10695]84    private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
[10982]85      return new ClusteringProblemData(ExportedDataset, GetDoubleInputVariables(String.Empty), Transformations);
[10383]86    }
87
88    private void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData) {
89      var ppData = context.Data;
90
91      problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
92      problemData.TrainingPartition.End = ppData.TrainingPartition.End;
93      problemData.TestPartition.Start = ppData.TestPartition.Start;
94      problemData.TestPartition.End = ppData.TestPartition.End;
95    }
[10982]96
97    private IEnumerable<string> GetDoubleInputVariables(string targetVariable) {
98      var variableNames = new List<string>();
99      for (int i = 0; i < context.Data.Columns; ++i) {
100        var variableName = context.Data.GetVariableName(i);
[11156]101        if (context.Data.VariableHasType<double>(i)
[10982]102          && variableName != targetVariable
103          && IsNotConstantInputVariable(context.Data.GetValues<double>(i))) {
104
105          variableNames.Add(variableName);
106        }
107      }
108      return variableNames;
109    }
110
111    private bool IsNotConstantInputVariable(IList<double> list) {
112      return context.Data.TrainingPartition.End - context.Data.TrainingPartition.Start > 1 || list.Range() > 0;
113    }
[10310]114  }
115}
Note: See TracBrowser for help on using the repository browser.