Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/04/18 17:18:02 (7 years ago)
Author:
pfleck
Message:

#2906 Refactoring

  • Moved transformation-specific parts out of existing interfaces.
  • Moved all Transformation logic to DataAnalysisTransformation.
  • Simplified (Inverse)Transformation of Dataset/ProblemData/Model/Solution.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2906_Transformations/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs

    r15880 r15884  
    2323using System.Collections.Generic;
    2424using System.Linq;
    25 using HeuristicLab.Common;
    2625using HeuristicLab.Problems.DataAnalysis;
    2726
     
    4645
    4746      IDataAnalysisProblemData problemData;
    48 
    4947      if (oldProblemData is TimeSeriesPrognosisProblemData) {
    5048        problemData = CreateTimeSeriesPrognosisData((TimeSeriesPrognosisProblemData)oldProblemData);
     
    5957      }
    6058
    61       SetTrainingAndTestPartition(problemData);
    62       SetAllowedInputVariables(problemData, oldProblemData.AllowedInputVariables);
    63       // set the input variables to the correct checked state
    64       //var inputVariables = oldProblemData.InputVariables.ToDictionary(x => x.Value, x => x);
    65       //foreach (var variable in problemData.InputVariables) {
    66       //  bool isChecked = inputVariables.ContainsKey(variable.Value) && oldProblemData.InputVariables.ItemChecked(inputVariables[variable.Value]);
    67       //  problemData.InputVariables.SetItemCheckedState(variable, isChecked);
    68       //}
     59      SetTrainingAndTestPartition(problemData, context.Data);
     60      SetAllowedInputVariables(problemData, oldProblemData);
    6961
    7062      return problemData;
     
    7567      if (!context.Data.VariableNames.Contains(targetVariable))
    7668        targetVariable = context.Data.VariableNames.First();
    77       var inputVariables = GetDoubleInputVariables(targetVariable);
    78       var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation()) {
     69      var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, Enumerable.Empty<string>(), targetVariable, CreateDataAnalysisTransformation()) {
    7970        TrainingHorizon = oldProblemData.TrainingHorizon,
    8071        TestHorizon = oldProblemData.TestHorizon
     
    8475
    8576    private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) {
    86       // TODO: transformations (additional inputs, target changed)
    87       var targetVariable = RegressionTransformationModel.GetTransformedTragetVariable(oldProblemData.TargetVariable, CreateDataAnalysisTransformation());
     77      var targetVariable = DataAnalysisTransformation.GetLastTransitiveVariable(oldProblemData.TargetVariable, CreateDataAnalysisTransformation());
    8878      if (!context.Data.VariableNames.Contains(targetVariable))
    8979        targetVariable = context.Data.VariableNames.First();
    90       var inputVariables = GetDoubleInputVariables(targetVariable);
    91       var newProblemData = new RegressionProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation());
     80      var newProblemData = new RegressionProblemData(ExportedDataset, Enumerable.Empty<string>(), targetVariable, CreateDataAnalysisTransformation());
    9281      return newProblemData;
    9382    }
     
    9786      if (!context.Data.VariableNames.Contains(targetVariable))
    9887        targetVariable = context.Data.VariableNames.First();
    99       var inputVariables = GetDoubleInputVariables(targetVariable);
    100       var newProblemData = new ClassificationProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation()) {
     88      var newProblemData = new ClassificationProblemData(ExportedDataset, Enumerable.Empty<string>(), targetVariable, CreateDataAnalysisTransformation()) {
    10189        PositiveClass = oldProblemData.PositiveClass
    10290      };
     
    10593
    10694    private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
    107       return new ClusteringProblemData(ExportedDataset, GetDoubleInputVariables(String.Empty), CreateDataAnalysisTransformation());
     95      return new ClusteringProblemData(ExportedDataset, Enumerable.Empty<string>(), CreateDataAnalysisTransformation());
    10896    }
    10997
    110     private void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData) {
    111       var ppData = context.Data;
    112 
     98    private static void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData, IPreprocessingData ppData) {
    11399      problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
    114100      problemData.TrainingPartition.End = ppData.TrainingPartition.End;
     
    117103    }
    118104
    119     void SetAllowedInputVariables(IDataAnalysisProblemData problemData, IEnumerable<string> oldInputVariables) {
    120       var inputs = DataAnalysisTransformationModel.ExtendInputVariables(oldInputVariables, problemData.Transformations);
    121 
     105    private static void SetAllowedInputVariables(IDataAnalysisProblemData problemData, IDataAnalysisProblemData oldProblemData) {
     106      // original inputs + extended(transitive) inputs
     107      var inputs = DataAnalysisTransformation.ExtendVariables(oldProblemData.AllowedInputVariables, problemData.Transformations).ToList();
    122108      foreach (var input in problemData.InputVariables) {
    123109        problemData.InputVariables.SetItemCheckedState(input, inputs.Contains(input.Value));
    124110      }
    125     }
    126111
    127     private IEnumerable<string> GetDoubleInputVariables(string targetVariable) {
    128       var variableNames = new List<string>();
    129       for (int i = 0; i < context.Data.Columns; ++i) {
    130         var variableName = context.Data.GetVariableName(i);
    131         if (context.Data.VariableHasType<double>(i)
    132           && variableName != targetVariable
    133           && IsNotConstantInputVariable(context.Data.GetValues<double>(i))) {
    134 
    135           variableNames.Add(variableName);
    136         }
     112      // new variables that were not created via transformations
     113      var originalAndVirtualVariables = DataAnalysisTransformation.ExtendVariables(oldProblemData.Dataset.VariableNames, problemData.Transformations);
     114      var newVariables = problemData.Dataset.VariableNames.Except(originalAndVirtualVariables).ToList();
     115      foreach (var input in problemData.InputVariables) {
     116        if (newVariables.Contains(input.Value))
     117          problemData.InputVariables.SetItemCheckedState(input, true);
    137118      }
    138       return variableNames;
    139     }
    140 
    141     private bool IsNotConstantInputVariable(IList<double> list) {
    142       return context.Data.TrainingPartition.End - context.Data.TrainingPartition.Start > 1 || list.Range() > 0;
    143119    }
    144120
Note: See TracChangeset for help on using the changeset viewer.