Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2906_Transformations/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs @ 15865

Last change on this file since 15865 was 15865, checked in by pfleck, 6 years ago

#2906 Added PreprocessingTransformation as a custom view-model for transformations in preprocessing.

File size: 6.9 KB
RevLine 
[10310]1#region License Information
2/* HeuristicLab
[15583]3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[10310]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[10383]22using System;
[10536]23using System.Collections.Generic;
[11382]24using System.Linq;
[10982]25using HeuristicLab.Common;
[10310]26using HeuristicLab.Problems.DataAnalysis;
27
28namespace HeuristicLab.DataPreprocessing {
[10908]29  public class ProblemDataCreator {
[13502]30    private readonly PreprocessingContext context;
[10310]31
[10695]32    private Dataset ExportedDataset {
[15110]33      get { return context.Data.ExportToDataset(); }
[10695]34    }
35
[15865]36    private IList<PreprocessingTransformation> Transformations {
[15110]37      get { return context.Data.Transformations; }
38    }
[10695]39
[13502]40    public ProblemDataCreator(PreprocessingContext context) {
[10310]41      this.context = context;
42    }
43
[13502]44    public IDataAnalysisProblemData CreateProblemData(IDataAnalysisProblemData oldProblemData) {
[11098]45      if (context.Data.Rows == 0 || context.Data.Columns == 0) return null;
46
[10990]47      IDataAnalysisProblemData problemData;
[10310]48
[13508]49      if (oldProblemData is TimeSeriesPrognosisProblemData) {
50        problemData = CreateTimeSeriesPrognosisData((TimeSeriesPrognosisProblemData)oldProblemData);
51      } else if (oldProblemData is RegressionProblemData) {
[10695]52        problemData = CreateRegressionData((RegressionProblemData)oldProblemData);
[10536]53      } else if (oldProblemData is ClassificationProblemData) {
[10695]54        problemData = CreateClassificationData((ClassificationProblemData)oldProblemData);
[10536]55      } else if (oldProblemData is ClusteringProblemData) {
[10695]56        problemData = CreateClusteringData((ClusteringProblemData)oldProblemData);
[10536]57      } else {
58        throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported.");
[10383]59      }
60
[10536]61      SetTrainingAndTestPartition(problemData);
[15856]62      SetAllowedInputVariables(problemData, oldProblemData.AllowedInputVariables);
[11382]63      // set the input variables to the correct checked state
[15856]64      //var inputVariables = oldProblemData.InputVariables.ToDictionary(x => x.Value, x => x);
65      //foreach (var variable in problemData.InputVariables) {
66      //  bool isChecked = inputVariables.ContainsKey(variable.Value) && oldProblemData.InputVariables.ItemChecked(inputVariables[variable.Value]);
67      //  problemData.InputVariables.SetItemCheckedState(variable, isChecked);
68      //}
[10536]69
[10383]70      return problemData;
71    }
72
[13508]73    private IDataAnalysisProblemData CreateTimeSeriesPrognosisData(TimeSeriesPrognosisProblemData oldProblemData) {
74      var targetVariable = oldProblemData.TargetVariable;
75      if (!context.Data.VariableNames.Contains(targetVariable))
76        targetVariable = context.Data.VariableNames.First();
77      var inputVariables = GetDoubleInputVariables(targetVariable);
[15865]78      var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation()) {
[13508]79        TrainingHorizon = oldProblemData.TrainingHorizon,
80        TestHorizon = oldProblemData.TestHorizon
81      };
82      return newProblemData;
83    }
84
[10695]85    private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) {
[15847]86      // TODO: transformations (additional inputs, target changed)
[15865]87      var targetVariable = RegressionProblemData.GetTransformedTragetVariable(oldProblemData.TargetVariable, CreateDataAnalysisTransformation());
[13252]88      if (!context.Data.VariableNames.Contains(targetVariable))
89        targetVariable = context.Data.VariableNames.First();
90      var inputVariables = GetDoubleInputVariables(targetVariable);
[15865]91      var newProblemData = new RegressionProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation());
[13252]92      return newProblemData;
[10536]93    }
[10310]94
[10695]95    private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) {
[10536]96      var targetVariable = oldProblemData.TargetVariable;
[13252]97      if (!context.Data.VariableNames.Contains(targetVariable))
98        targetVariable = context.Data.VariableNames.First();
99      var inputVariables = GetDoubleInputVariables(targetVariable);
[15865]100      var newProblemData = new ClassificationProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation()) {
[13508]101        PositiveClass = oldProblemData.PositiveClass
102      };
[12676]103      return newProblemData;
[10536]104    }
[10383]105
[10695]106    private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
[15865]107      return new ClusteringProblemData(ExportedDataset, GetDoubleInputVariables(String.Empty), CreateDataAnalysisTransformation());
[10383]108    }
109
110    private void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData) {
111      var ppData = context.Data;
112
113      problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
114      problemData.TrainingPartition.End = ppData.TrainingPartition.End;
115      problemData.TestPartition.Start = ppData.TestPartition.Start;
116      problemData.TestPartition.End = ppData.TestPartition.End;
117    }
[10982]118
[15865]119    void SetAllowedInputVariables(IDataAnalysisProblemData problemData, IEnumerable<string> oldInputVariables) {
120      var inputs = DataAnalysisProblemData.ExtendInputVariables(oldInputVariables, problemData.Transformations);
[15856]121
122      foreach (var input in problemData.InputVariables) {
123        problemData.InputVariables.SetItemCheckedState(input, inputs.Contains(input.Value));
124      }
125    }
126
[10982]127    private IEnumerable<string> GetDoubleInputVariables(string targetVariable) {
128      var variableNames = new List<string>();
129      for (int i = 0; i < context.Data.Columns; ++i) {
130        var variableName = context.Data.GetVariableName(i);
[11156]131        if (context.Data.VariableHasType<double>(i)
[10982]132          && variableName != targetVariable
133          && IsNotConstantInputVariable(context.Data.GetValues<double>(i))) {
134
135          variableNames.Add(variableName);
136        }
137      }
138      return variableNames;
139    }
140
141    private bool IsNotConstantInputVariable(IList<double> list) {
142      return context.Data.TrainingPartition.End - context.Data.TrainingPartition.Start > 1 || list.Range() > 0;
143    }
[15865]144
145    private IEnumerable<IDataAnalysisTransformation> CreateDataAnalysisTransformation() {
146      return Transformations.Select(x => new DataAnalysisTransformation(x.OriginalVariable, x.TransformedVariable, (ITransformation)x.Transformation.Clone()));
147    }
[10310]148  }
149}
Note: See TracBrowser for help on using the repository browser.