Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2906_Transformations/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs @ 15870

Last change on this file since 15870 was 15870, checked in by pfleck, 6 years ago

#2906

  • Implemented for classification, clustering, etc.
  • Simplified Transformation interfaces (read-only, ...).
  • Started moving transformation logic out of ProblemData.
File size: 6.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Problems.DataAnalysis;
27
28namespace HeuristicLab.DataPreprocessing {
29  public class ProblemDataCreator {
30    private readonly PreprocessingContext context;
31
32    private Dataset ExportedDataset {
33      get { return context.Data.ExportToDataset(); }
34    }
35
36    private IList<PreprocessingTransformation> Transformations {
37      get { return context.Data.Transformations; }
38    }
39
40    public ProblemDataCreator(PreprocessingContext context) {
41      this.context = context;
42    }
43
44    public IDataAnalysisProblemData CreateProblemData(IDataAnalysisProblemData oldProblemData) {
45      if (context.Data.Rows == 0 || context.Data.Columns == 0) return null;
46
47      IDataAnalysisProblemData problemData;
48
49      if (oldProblemData is TimeSeriesPrognosisProblemData) {
50        problemData = CreateTimeSeriesPrognosisData((TimeSeriesPrognosisProblemData)oldProblemData);
51      } else if (oldProblemData is RegressionProblemData) {
52        problemData = CreateRegressionData((RegressionProblemData)oldProblemData);
53      } else if (oldProblemData is ClassificationProblemData) {
54        problemData = CreateClassificationData((ClassificationProblemData)oldProblemData);
55      } else if (oldProblemData is ClusteringProblemData) {
56        problemData = CreateClusteringData((ClusteringProblemData)oldProblemData);
57      } else {
58        throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported.");
59      }
60
61      SetTrainingAndTestPartition(problemData);
62      SetAllowedInputVariables(problemData, oldProblemData.AllowedInputVariables);
63      // set the input variables to the correct checked state
64      //var inputVariables = oldProblemData.InputVariables.ToDictionary(x => x.Value, x => x);
65      //foreach (var variable in problemData.InputVariables) {
66      //  bool isChecked = inputVariables.ContainsKey(variable.Value) && oldProblemData.InputVariables.ItemChecked(inputVariables[variable.Value]);
67      //  problemData.InputVariables.SetItemCheckedState(variable, isChecked);
68      //}
69
70      return problemData;
71    }
72
73    private IDataAnalysisProblemData CreateTimeSeriesPrognosisData(TimeSeriesPrognosisProblemData oldProblemData) {
74      var targetVariable = oldProblemData.TargetVariable;
75      if (!context.Data.VariableNames.Contains(targetVariable))
76        targetVariable = context.Data.VariableNames.First();
77      var inputVariables = GetDoubleInputVariables(targetVariable);
78      var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation()) {
79        TrainingHorizon = oldProblemData.TrainingHorizon,
80        TestHorizon = oldProblemData.TestHorizon
81      };
82      return newProblemData;
83    }
84
85    private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) {
86      // TODO: transformations (additional inputs, target changed)
87      var targetVariable = RegressionTransformationModel.GetTransformedTragetVariable(oldProblemData.TargetVariable, CreateDataAnalysisTransformation());
88      if (!context.Data.VariableNames.Contains(targetVariable))
89        targetVariable = context.Data.VariableNames.First();
90      var inputVariables = GetDoubleInputVariables(targetVariable);
91      var newProblemData = new RegressionProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation());
92      return newProblemData;
93    }
94
95    private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) {
96      var targetVariable = oldProblemData.TargetVariable;
97      if (!context.Data.VariableNames.Contains(targetVariable))
98        targetVariable = context.Data.VariableNames.First();
99      var inputVariables = GetDoubleInputVariables(targetVariable);
100      var newProblemData = new ClassificationProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation()) {
101        PositiveClass = oldProblemData.PositiveClass
102      };
103      return newProblemData;
104    }
105
106    private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
107      return new ClusteringProblemData(ExportedDataset, GetDoubleInputVariables(String.Empty), CreateDataAnalysisTransformation());
108    }
109
110    private void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData) {
111      var ppData = context.Data;
112
113      problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
114      problemData.TrainingPartition.End = ppData.TrainingPartition.End;
115      problemData.TestPartition.Start = ppData.TestPartition.Start;
116      problemData.TestPartition.End = ppData.TestPartition.End;
117    }
118
119    void SetAllowedInputVariables(IDataAnalysisProblemData problemData, IEnumerable<string> oldInputVariables) {
120      var inputs = DataAnalysisProblemData.ExtendInputVariables(oldInputVariables, problemData.Transformations);
121
122      foreach (var input in problemData.InputVariables) {
123        problemData.InputVariables.SetItemCheckedState(input, inputs.Contains(input.Value));
124      }
125    }
126
127    private IEnumerable<string> GetDoubleInputVariables(string targetVariable) {
128      var variableNames = new List<string>();
129      for (int i = 0; i < context.Data.Columns; ++i) {
130        var variableName = context.Data.GetVariableName(i);
131        if (context.Data.VariableHasType<double>(i)
132          && variableName != targetVariable
133          && IsNotConstantInputVariable(context.Data.GetValues<double>(i))) {
134
135          variableNames.Add(variableName);
136        }
137      }
138      return variableNames;
139    }
140
141    private bool IsNotConstantInputVariable(IList<double> list) {
142      return context.Data.TrainingPartition.End - context.Data.TrainingPartition.Start > 1 || list.Range() > 0;
143    }
144
145    private IEnumerable<IDataAnalysisTransformation> CreateDataAnalysisTransformation() {
146      return Transformations.Select(x => new DataAnalysisTransformation(x.OriginalVariable, x.TransformedVariable, (ITransformation)x.Transformation.Clone()));
147    }
148  }
149}
Note: See TracBrowser for help on using the repository browser.