#region License Information
/* HeuristicLab
* Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.DataPreprocessing {
internal class ProblemDataCreator {
private readonly IPreprocessingContext context;
public ProblemDataCreator(IPreprocessingContext context) {
this.context = context;
}
public IDataAnalysisProblemData CreateProblemData() {
var oldProblemData = context.Problem.ProblemData;
IDataAnalysisProblemData problemData = null;
var dataSet = context.Data.ExportToDataset();
var inputVariables = context.Data.VariableNames;
if (oldProblemData is RegressionProblemData) {
problemData = CreateRegressionData((RegressionProblemData)oldProblemData, dataSet, inputVariables);
} else if (oldProblemData is ClassificationProblemData) {
problemData = CreateClassificationData((ClassificationProblemData)oldProblemData, dataSet, inputVariables);
} else if (oldProblemData is ClusteringProblemData) {
problemData = CreateClusteringData((ClusteringProblemData)oldProblemData, dataSet, inputVariables);
} else {
throw new NotImplementedException("The type of the DataAnalysisProblemData is not supported.");
}
SetTrainingAndTestPartition(problemData);
return problemData;
}
private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData, Dataset dataSet, IEnumerable inputVariables) {
var targetVariable = oldProblemData.TargetVariable;
// target variable must be double and must exist in the new dataset
return new RegressionProblemData(dataSet, inputVariables, targetVariable);
}
private IDataAnalysisProblemData CreateClassificationData(ClassificationProblemData oldProblemData, Dataset dataSet, IEnumerable inputVariables) {
var targetVariable = oldProblemData.TargetVariable;
// target variable must be double and must exist in the new dataset
return new ClassificationProblemData(dataSet, inputVariables, targetVariable);
}
private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData, Dataset dataSet, IEnumerable inputVariables) {
return new ClusteringProblemData(dataSet, inputVariables);
}
private void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData) {
var ppData = context.Data;
problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
problemData.TrainingPartition.End = ppData.TrainingPartition.End;
problemData.TestPartition.Start = ppData.TestPartition.Start;
problemData.TestPartition.End = ppData.TestPartition.End;
}
}
}