#region License Information /* HeuristicLab * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.DataPreprocessing { public class ProblemDataCreator { private readonly PreprocessingContext context; private Dataset ExportedDataset { get { return context.Data.ExportToDataset(); } } private IList Transformations { get { return context.Data.Transformations; } } public ProblemDataCreator(PreprocessingContext context) { this.context = context; } public IDataAnalysisProblemData CreateProblemData(IDataAnalysisProblemData oldProblemData) { if (context.Data.Rows == 0 || context.Data.Columns == 0) return null; IDataAnalysisProblemData problemData; if (oldProblemData is TimeSeriesPrognosisProblemData) { problemData = CreateTimeSeriesPrognosisData((TimeSeriesPrognosisProblemData)oldProblemData); } else if (oldProblemData is RegressionProblemData) { problemData = CreateRegressionData((RegressionProblemData)oldProblemData); } else if (oldProblemData is ClassificationProblemData) { problemData = CreateClassificationData((ClassificationProblemData)oldProblemData); } else if (oldProblemData is ClusteringProblemData) { problemData = CreateClusteringData((ClusteringProblemData)oldProblemData); } else { throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported."); } SetTrainingAndTestPartition(problemData, context.Data); SetAllowedInputVariables(problemData, oldProblemData); return problemData; } private IDataAnalysisProblemData CreateTimeSeriesPrognosisData(TimeSeriesPrognosisProblemData oldProblemData) { var targetVariable = oldProblemData.TargetVariable; if (!context.Data.VariableNames.Contains(targetVariable)) targetVariable = context.Data.VariableNames.First(); var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, Enumerable.Empty(), targetVariable, CreateDataAnalysisTransformation()) { TrainingHorizon = oldProblemData.TrainingHorizon, TestHorizon = oldProblemData.TestHorizon }; return newProblemData; } private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) { var targetVariable = DataAnalysisTransformation.GetStrictTransitiveVariables(oldProblemData.TargetVariable, CreateDataAnalysisTransformation(), false).Last(); if (!context.Data.VariableNames.Contains(targetVariable)) targetVariable = context.Data.VariableNames.First(); var newProblemData = new RegressionProblemData(ExportedDataset, Enumerable.Empty(), targetVariable, CreateDataAnalysisTransformation()); return newProblemData; } private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) { var targetVariable = oldProblemData.TargetVariable; if (!context.Data.VariableNames.Contains(targetVariable)) targetVariable = context.Data.VariableNames.First(); var newProblemData = new ClassificationProblemData(ExportedDataset, Enumerable.Empty(), targetVariable, CreateDataAnalysisTransformation()) { PositiveClass = oldProblemData.PositiveClass }; return newProblemData; } private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) { return new ClusteringProblemData(ExportedDataset, Enumerable.Empty(), CreateDataAnalysisTransformation()); } private static void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData, IPreprocessingData ppData) { problemData.TrainingPartition.Start = ppData.TrainingPartition.Start; problemData.TrainingPartition.End = ppData.TrainingPartition.End; problemData.TestPartition.Start = ppData.TestPartition.Start; problemData.TestPartition.End = ppData.TestPartition.End; } private static void SetAllowedInputVariables(IDataAnalysisProblemData problemData, IDataAnalysisProblemData oldProblemData) { // original inputs + extended(transitive) inputs var inputs = DataAnalysisTransformation.ExtendVariables(oldProblemData.AllowedInputVariables, problemData.Transformations).ToList(); foreach (var input in problemData.InputVariables) { problemData.InputVariables.SetItemCheckedState(input, inputs.Contains(input.Value)); } // new variables that were not created via transformations var originalAndVirtualVariables = DataAnalysisTransformation.ExtendVariables(oldProblemData.Dataset.VariableNames, problemData.Transformations); var newVariables = problemData.Dataset.VariableNames.Except(originalAndVirtualVariables).ToList(); foreach (var input in problemData.InputVariables) { if (newVariables.Contains(input.Value)) problemData.InputVariables.SetItemCheckedState(input, true); } } private IEnumerable CreateDataAnalysisTransformation() { return Transformations.Select(x => new DataAnalysisTransformation(x.OriginalVariable, x.TransformedVariable, (ITransformation)x.Transformation.Clone())); } } }