Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs @ 13502

Last change on this file since 13502 was 13502, checked in by pfleck, 8 years ago

#2559

  • Adapted import and export for preprocessing.
  • Added MenuItem to be able to open Preprocessing without creating a DataAnalysisProblem before.
  • Added coloring in ScatterPlot.
  • Removed IPreprocessingContext interface.
  • Reformatted code:
    • Added missing copyright headers.
    • Corrected namespaces.
    • Deleted unnecessary usings.
    • Applied correct formatting.
File size: 5.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Problems.DataAnalysis;
27
28namespace HeuristicLab.DataPreprocessing {
29  public class ProblemDataCreator {
30
31    private readonly PreprocessingContext context;
32
33    private Dataset ExportedDataset {
34      get {
35        return context.Data.ExportToDataset();
36      }
37    }
38
39    private IList<ITransformation> Transformations { get { return context.Data.Transformations; } }
40
41    public ProblemDataCreator(PreprocessingContext context) {
42      this.context = context;
43    }
44
45    public IDataAnalysisProblemData CreateProblemData(IDataAnalysisProblemData oldProblemData) {
46      if (context.Data.Rows == 0 || context.Data.Columns == 0) return null;
47
48      IDataAnalysisProblemData problemData;
49
50      if (oldProblemData is RegressionProblemData) {
51        problemData = CreateRegressionData((RegressionProblemData)oldProblemData);
52      } else if (oldProblemData is ClassificationProblemData) {
53        problemData = CreateClassificationData((ClassificationProblemData)oldProblemData);
54      } else if (oldProblemData is ClusteringProblemData) {
55        problemData = CreateClusteringData((ClusteringProblemData)oldProblemData);
56      } else {
57        throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported.");
58      }
59
60      SetTrainingAndTestPartition(problemData);
61      // set the input variables to the correct checked state
62      var inputVariables = oldProblemData.InputVariables.ToDictionary(x => x.Value, x => x);
63      foreach (var variable in problemData.InputVariables) {
64        bool isChecked = inputVariables.ContainsKey(variable.Value) && oldProblemData.InputVariables.ItemChecked(inputVariables[variable.Value]);
65        problemData.InputVariables.SetItemCheckedState(variable, isChecked);
66      }
67
68      return problemData;
69    }
70
71    private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) {
72      var targetVariable = oldProblemData.TargetVariable;
73      if (!context.Data.VariableNames.Contains(targetVariable))
74        targetVariable = context.Data.VariableNames.First();
75      var inputVariables = GetDoubleInputVariables(targetVariable);
76      var newProblemData = new RegressionProblemData(ExportedDataset, inputVariables, targetVariable, Transformations);
77      return newProblemData;
78    }
79
80    private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) {
81      var targetVariable = oldProblemData.TargetVariable;
82      if (!context.Data.VariableNames.Contains(targetVariable))
83        targetVariable = context.Data.VariableNames.First();
84      var inputVariables = GetDoubleInputVariables(targetVariable);
85      var newProblemData = new ClassificationProblemData(ExportedDataset, inputVariables, targetVariable, Transformations);
86      newProblemData.PositiveClass = oldProblemData.PositiveClass;
87      return newProblemData;
88    }
89
90    private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
91      return new ClusteringProblemData(ExportedDataset, GetDoubleInputVariables(String.Empty), Transformations);
92    }
93
94    private void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData) {
95      var ppData = context.Data;
96
97      problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
98      problemData.TrainingPartition.End = ppData.TrainingPartition.End;
99      problemData.TestPartition.Start = ppData.TestPartition.Start;
100      problemData.TestPartition.End = ppData.TestPartition.End;
101    }
102
103    private IEnumerable<string> GetDoubleInputVariables(string targetVariable) {
104      var variableNames = new List<string>();
105      for (int i = 0; i < context.Data.Columns; ++i) {
106        var variableName = context.Data.GetVariableName(i);
107        if (context.Data.VariableHasType<double>(i)
108          && variableName != targetVariable
109          && IsNotConstantInputVariable(context.Data.GetValues<double>(i))) {
110
111          variableNames.Add(variableName);
112        }
113      }
114      return variableNames;
115    }
116
117    private bool IsNotConstantInputVariable(IList<double> list) {
118      return context.Data.TrainingPartition.End - context.Data.TrainingPartition.Start > 1 || list.Range() > 0;
119    }
120  }
121}
Note: See TracBrowser for help on using the repository browser.