#region License Information
/* HeuristicLab
* Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.DataPreprocessing {
public class ProblemDataCreator {
private readonly PreprocessingContext context;
private Dataset ExportedDataset {
get { return context.Data.ExportToDataset(); }
}
private IList Transformations {
get { return context.Data.Transformations; }
}
public ProblemDataCreator(PreprocessingContext context) {
this.context = context;
}
public IDataAnalysisProblemData CreateProblemData(IDataAnalysisProblemData oldProblemData) {
if (context.Data.Rows == 0 || context.Data.Columns == 0) return null;
IDataAnalysisProblemData problemData;
if (oldProblemData is TimeSeriesPrognosisProblemData) {
problemData = CreateTimeSeriesPrognosisData((TimeSeriesPrognosisProblemData)oldProblemData);
} else if (oldProblemData is RegressionProblemData) {
problemData = CreateRegressionData((RegressionProblemData)oldProblemData);
} else if (oldProblemData is ClassificationProblemData) {
problemData = CreateClassificationData((ClassificationProblemData)oldProblemData);
} else if (oldProblemData is ClusteringProblemData) {
problemData = CreateClusteringData((ClusteringProblemData)oldProblemData);
} else {
throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported.");
}
SetTrainingAndTestPartition(problemData, context.Data);
SetAllowedInputVariables(problemData, oldProblemData);
return problemData;
}
private IDataAnalysisProblemData CreateTimeSeriesPrognosisData(TimeSeriesPrognosisProblemData oldProblemData) {
var targetVariable = oldProblemData.TargetVariable;
if (!context.Data.VariableNames.Contains(targetVariable))
targetVariable = context.Data.VariableNames.First();
var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, Enumerable.Empty(), targetVariable, CreateDataAnalysisTransformation()) {
TrainingHorizon = oldProblemData.TrainingHorizon,
TestHorizon = oldProblemData.TestHorizon
};
return newProblemData;
}
private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) {
var targetVariable = DataAnalysisTransformation.GetStrictTransitiveVariables(oldProblemData.TargetVariable, CreateDataAnalysisTransformation(), false).Last();
if (!context.Data.VariableNames.Contains(targetVariable))
targetVariable = context.Data.VariableNames.First();
var newProblemData = new RegressionProblemData(ExportedDataset, Enumerable.Empty(), targetVariable, CreateDataAnalysisTransformation());
return newProblemData;
}
private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData) {
var targetVariable = oldProblemData.TargetVariable;
if (!context.Data.VariableNames.Contains(targetVariable))
targetVariable = context.Data.VariableNames.First();
var newProblemData = new ClassificationProblemData(ExportedDataset, Enumerable.Empty(), targetVariable, CreateDataAnalysisTransformation()) {
PositiveClass = oldProblemData.PositiveClass
};
return newProblemData;
}
private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
return new ClusteringProblemData(ExportedDataset, Enumerable.Empty(), CreateDataAnalysisTransformation());
}
private static void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData, IPreprocessingData ppData) {
problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
problemData.TrainingPartition.End = ppData.TrainingPartition.End;
problemData.TestPartition.Start = ppData.TestPartition.Start;
problemData.TestPartition.End = ppData.TestPartition.End;
}
private static void SetAllowedInputVariables(IDataAnalysisProblemData problemData, IDataAnalysisProblemData oldProblemData) {
// original inputs + extended(transitive) inputs
var inputs = DataAnalysisTransformation.ExtendVariables(oldProblemData.AllowedInputVariables, problemData.Transformations).ToList();
foreach (var input in problemData.InputVariables) {
problemData.InputVariables.SetItemCheckedState(input, inputs.Contains(input.Value));
}
// new variables that were not created via transformations
var originalAndVirtualVariables = DataAnalysisTransformation.ExtendVariables(oldProblemData.Dataset.VariableNames, problemData.Transformations);
var newVariables = problemData.Dataset.VariableNames.Except(originalAndVirtualVariables).ToList();
foreach (var input in problemData.InputVariables) {
if (newVariables.Contains(input.Value))
problemData.InputVariables.SetItemCheckedState(input, true);
}
}
private IEnumerable CreateDataAnalysisTransformation() {
return Transformations.Select(x => new DataAnalysisTransformation(x.OriginalVariable, x.TransformedVariable, (ITransformation)x.Transformation.Clone()));
}
}
}