Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2906_Transformations/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs @ 15884

Last change on this file since 15884 was 15884, checked in by pfleck, 6 years ago

#2906 Refactoring

  • Moved transformation-specific parts out of existing interfaces.
  • Moved all Transformation logic to DataAnalysisTransformation.
  • Simplified (Inverse)Transformation of Dataset/ProblemData/Model/Solution.
File size: 6.1 KB
RevLine 
[10310]1#region License Information
2/* HeuristicLab
[15583]3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[10310]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[10383]22using System;
[10536]23using System.Collections.Generic;
[11382]24using System.Linq;
[10310]25using HeuristicLab.Problems.DataAnalysis;
26
27namespace HeuristicLab.DataPreprocessing {
[10908]28  public class ProblemDataCreator {
[13502]29    private readonly PreprocessingContext context;
[10310]30
[10695]31    private Dataset ExportedDataset {
[15110]32      get { return context.Data.ExportToDataset(); }
[10695]33    }
34
[15865]35    private IList<PreprocessingTransformation> Transformations {
[15110]36      get { return context.Data.Transformations; }
37    }
[10695]38
[13502]39    public ProblemDataCreator(PreprocessingContext context) {
[10310]40      this.context = context;
41    }
42
[13502]43    public IDataAnalysisProblemData CreateProblemData(IDataAnalysisProblemData oldProblemData) {
[11098]44      if (context.Data.Rows == 0 || context.Data.Columns == 0) return null;
45
[10990]46      IDataAnalysisProblemData problemData;
[13508]47      if (oldProblemData is TimeSeriesPrognosisProblemData) {
48        problemData = CreateTimeSeriesPrognosisData((TimeSeriesPrognosisProblemData)oldProblemData);
49      } else if (oldProblemData is RegressionProblemData) {
[10695]50        problemData = CreateRegressionData((RegressionProblemData)oldProblemData);
[10536]51      } else if (oldProblemData is ClassificationProblemData) {
[10695]52        problemData = CreateClassificationData((ClassificationProblemData)oldProblemData);
[10536]53      } else if (oldProblemData is ClusteringProblemData) {
[10695]54        problemData = CreateClusteringData((ClusteringProblemData)oldProblemData);
[10536]55      } else {
56        throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported.");
[10383]57      }
58
[15884]59      SetTrainingAndTestPartition(problemData, context.Data);
60      SetAllowedInputVariables(problemData, oldProblemData);
[10536]61
[10383]62      return problemData;
63    }
64
[13508]65    private IDataAnalysisProblemData CreateTimeSeriesPrognosisData(TimeSeriesPrognosisProblemData oldProblemData) {
66      var targetVariable = oldProblemData.TargetVariable;
67      if (!context.Data.VariableNames.Contains(targetVariable))
68        targetVariable = context.Data.VariableNames.First();
[15884]69      var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, Enumerable.Empty<string>(), targetVariable, CreateDataAnalysisTransformation()) {
[13508]70        TrainingHorizon = oldProblemData.TrainingHorizon,
71        TestHorizon = oldProblemData.TestHorizon
72      };
73      return newProblemData;
74    }
75
[10695]76    private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) {
[15884]77      var targetVariable = DataAnalysisTransformation.GetLastTransitiveVariable(oldProblemData.TargetVariable, CreateDataAnalysisTransformation());
[13252]78      if (!context.Data.VariableNames.Contains(targetVariable))
79        targetVariable = context.Data.VariableNames.First();
[15884]80      var newProblemData = new RegressionProblemData(ExportedDataset, Enumerable.Empty<string>(), targetVariable, CreateDataAnalysisTransformation());
[13252]81      return newProblemData;
[10536]82    }
[10310]83
[10695]84    private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) {
[10536]85      var targetVariable = oldProblemData.TargetVariable;
[13252]86      if (!context.Data.VariableNames.Contains(targetVariable))
87        targetVariable = context.Data.VariableNames.First();
[15884]88      var newProblemData = new ClassificationProblemData(ExportedDataset, Enumerable.Empty<string>(), targetVariable, CreateDataAnalysisTransformation()) {
[13508]89        PositiveClass = oldProblemData.PositiveClass
90      };
[12676]91      return newProblemData;
[10536]92    }
[10383]93
[10695]94    private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
[15884]95      return new ClusteringProblemData(ExportedDataset, Enumerable.Empty<string>(), CreateDataAnalysisTransformation());
[10383]96    }
97
[15884]98    private static void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData, IPreprocessingData ppData) {
[10383]99      problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
100      problemData.TrainingPartition.End = ppData.TrainingPartition.End;
101      problemData.TestPartition.Start = ppData.TestPartition.Start;
102      problemData.TestPartition.End = ppData.TestPartition.End;
103    }
[10982]104
[15884]105    private static void SetAllowedInputVariables(IDataAnalysisProblemData problemData, IDataAnalysisProblemData oldProblemData) {
106      // original inputs + extended(transitive) inputs
107      var inputs = DataAnalysisTransformation.ExtendVariables(oldProblemData.AllowedInputVariables, problemData.Transformations).ToList();
[15856]108      foreach (var input in problemData.InputVariables) {
109        problemData.InputVariables.SetItemCheckedState(input, inputs.Contains(input.Value));
110      }
111
[15884]112      // new variables that were not created via transformations
113      var originalAndVirtualVariables = DataAnalysisTransformation.ExtendVariables(oldProblemData.Dataset.VariableNames, problemData.Transformations);
114      var newVariables = problemData.Dataset.VariableNames.Except(originalAndVirtualVariables).ToList();
115      foreach (var input in problemData.InputVariables) {
116        if (newVariables.Contains(input.Value))
117          problemData.InputVariables.SetItemCheckedState(input, true);
[10982]118      }
119    }
120
[15865]121    private IEnumerable<IDataAnalysisTransformation> CreateDataAnalysisTransformation() {
122      return Transformations.Select(x => new DataAnalysisTransformation(x.OriginalVariable, x.TransformedVariable, (ITransformation)x.Transformation.Clone()));
123    }
[10310]124  }
125}
Note: See TracBrowser for help on using the repository browser.