Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3087_Ceres_Integration/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs

Last change on this file was 18006, checked in by gkronber, 3 years ago

#3087: merged r17784:18004 from trunk to branch to prepare for trunk reintegration (fixed a conflict in CrossValidation.cs)

File size: 6.0 KB
RevLine 
[10310]1#region License Information
2/* HeuristicLab
[17180]3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[10310]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
[10383]22using System;
[10536]23using System.Collections.Generic;
[11382]24using System.Linq;
[10982]25using HeuristicLab.Common;
[10310]26using HeuristicLab.Problems.DataAnalysis;
27
28namespace HeuristicLab.DataPreprocessing {
[10908]29  public class ProblemDataCreator {
[13502]30    private readonly PreprocessingContext context;
[10310]31
[10695]32    private Dataset ExportedDataset {
[15110]33      get { return context.Data.ExportToDataset(); }
[10695]34    }
35
[15110]36    private IList<ITransformation> Transformations {
37      get { return context.Data.Transformations; }
38    }
[10695]39
[13502]40    public ProblemDataCreator(PreprocessingContext context) {
[10310]41      this.context = context;
42    }
43
[13502]44    public IDataAnalysisProblemData CreateProblemData(IDataAnalysisProblemData oldProblemData) {
[11098]45      if (context.Data.Rows == 0 || context.Data.Columns == 0) return null;
46
[10990]47      IDataAnalysisProblemData problemData;
[10310]48
[13508]49      if (oldProblemData is TimeSeriesPrognosisProblemData) {
50        problemData = CreateTimeSeriesPrognosisData((TimeSeriesPrognosisProblemData)oldProblemData);
51      } else if (oldProblemData is RegressionProblemData) {
[10695]52        problemData = CreateRegressionData((RegressionProblemData)oldProblemData);
[10536]53      } else if (oldProblemData is ClassificationProblemData) {
[10695]54        problemData = CreateClassificationData((ClassificationProblemData)oldProblemData);
[10536]55      } else if (oldProblemData is ClusteringProblemData) {
[10695]56        problemData = CreateClusteringData((ClusteringProblemData)oldProblemData);
[10536]57      } else {
58        throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported.");
[10383]59      }
60
[10536]61      SetTrainingAndTestPartition(problemData);
[11382]62      // set the input variables to the correct checked state
[12676]63      var inputVariables = oldProblemData.InputVariables.ToDictionary(x => x.Value, x => x);
64      foreach (var variable in problemData.InputVariables) {
[13252]65        bool isChecked = inputVariables.ContainsKey(variable.Value) && oldProblemData.InputVariables.ItemChecked(inputVariables[variable.Value]);
[12676]66        problemData.InputVariables.SetItemCheckedState(variable, isChecked);
[11382]67      }
[10536]68
[10383]69      return problemData;
70    }
71
[13508]72    private IDataAnalysisProblemData CreateTimeSeriesPrognosisData(TimeSeriesPrognosisProblemData oldProblemData) {
73      var targetVariable = oldProblemData.TargetVariable;
74      if (!context.Data.VariableNames.Contains(targetVariable))
75        targetVariable = context.Data.VariableNames.First();
76      var inputVariables = GetDoubleInputVariables(targetVariable);
77      var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, inputVariables, targetVariable, Transformations) {
78        TrainingHorizon = oldProblemData.TrainingHorizon,
79        TestHorizon = oldProblemData.TestHorizon
80      };
81      return newProblemData;
82    }
83
[10695]84    private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) {
[10536]85      var targetVariable = oldProblemData.TargetVariable;
[13252]86      if (!context.Data.VariableNames.Contains(targetVariable))
87        targetVariable = context.Data.VariableNames.First();
88      var inputVariables = GetDoubleInputVariables(targetVariable);
89      var newProblemData = new RegressionProblemData(ExportedDataset, inputVariables, targetVariable, Transformations);
90      return newProblemData;
[10536]91    }
[10310]92
[10695]93    private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) {
[10536]94      var targetVariable = oldProblemData.TargetVariable;
[13252]95      if (!context.Data.VariableNames.Contains(targetVariable))
96        targetVariable = context.Data.VariableNames.First();
97      var inputVariables = GetDoubleInputVariables(targetVariable);
[18006]98      var newProblemData = new ClassificationProblemData(ExportedDataset, inputVariables, targetVariable, transformations: Transformations) {
[13508]99        PositiveClass = oldProblemData.PositiveClass
100      };
[12676]101      return newProblemData;
[10536]102    }
[10383]103
[10695]104    private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
[10982]105      return new ClusteringProblemData(ExportedDataset, GetDoubleInputVariables(String.Empty), Transformations);
[10383]106    }
107
108    private void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData) {
109      var ppData = context.Data;
110
111      problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
112      problemData.TrainingPartition.End = ppData.TrainingPartition.End;
113      problemData.TestPartition.Start = ppData.TestPartition.Start;
114      problemData.TestPartition.End = ppData.TestPartition.End;
115    }
[10982]116
117    private IEnumerable<string> GetDoubleInputVariables(string targetVariable) {
118      var variableNames = new List<string>();
119      for (int i = 0; i < context.Data.Columns; ++i) {
120        var variableName = context.Data.GetVariableName(i);
[11156]121        if (context.Data.VariableHasType<double>(i)
[10982]122          && variableName != targetVariable
123          && IsNotConstantInputVariable(context.Data.GetValues<double>(i))) {
124
125          variableNames.Add(variableName);
126        }
127      }
128      return variableNames;
129    }
130
131    private bool IsNotConstantInputVariable(IList<double> list) {
132      return context.Data.TrainingPartition.End - context.Data.TrainingPartition.Start > 1 || list.Range() > 0;
133    }
[10310]134  }
135}
Note: See TracBrowser for help on using the repository browser.