#region License Information
/* HeuristicLab
* Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
namespace HeuristicLab.Problems.DataAnalysis {
[Item("Transformation", "A transformation applied to a DataAnalysisProblemData")]
[StorableClass]
public sealed class DataAnalysisTransformation : ParameterizedNamedItem, IDataAnalysisTransformation {
#region Parameter Properties
private IFixedValueParameter OriginalVariableParameter {
get { return (IFixedValueParameter)Parameters["Original Variable"]; }
}
private IFixedValueParameter TransformedVariableParameter {
get { return (IFixedValueParameter)Parameters["Transformed Variable"]; }
}
private ValueParameter TransformationParameter {
get { return (ValueParameter)Parameters["Transformation"]; }
}
#endregion
#region Properties
public string OriginalVariable {
get { return OriginalVariableParameter.Value.Value; }
}
public string TransformedVariable {
get { return TransformedVariableParameter.Value.Value; }
}
public ITransformation Transformation {
get { return TransformationParameter.Value; }
}
#endregion
#region Constructor, Cloning & Persistence
public DataAnalysisTransformation(string originalVariable, string transformedVariable, ITransformation transformation)
: base() {
Parameters.Add(new FixedValueParameter("Original Variable", new StringValue(originalVariable).AsReadOnly()));
Parameters.Add(new FixedValueParameter("Transformed Variable", new StringValue(transformedVariable).AsReadOnly()));
Parameters.Add(new ValueParameter("Transformation", transformation)); // TODO: should be readonly/fixed
}
private DataAnalysisTransformation(DataAnalysisTransformation original, Cloner cloner)
: base(original, cloner) { }
public override IDeepCloneable Clone(Cloner cloner) {
return new DataAnalysisTransformation(this, cloner);
}
[StorableConstructor]
private DataAnalysisTransformation(bool deserializing)
: base(deserializing) { }
[StorableHook(HookType.AfterDeserialization)]
#endregion
public override string ToString() {
return $"{Transformation} ({OriginalVariable} -> {TransformedVariable})";
}
#region Transformation
#region Variable Extension & Reduction
// originals => include extended
public static IEnumerable ExtendVariables(IEnumerable variables, IEnumerable transformations) {
return GetTransitiveVariables(variables, transformations);
}
// extended => originals
public static IEnumerable ReduceVariables(IEnumerable variables, IEnumerable transformations) {
var originalVariables = new HashSet();
foreach (var variable in variables)
originalVariables.Add(GetLastTransitiveVariable(variable, transformations, inverse: true));
return originalVariables;
}
public static IEnumerable GetTransitiveVariables(IEnumerable variables, IEnumerable transformations, bool inverse = false) {
var reachableVariables = new HashSet(variables);
if (inverse) transformations = transformations.Reverse();
foreach (var transformation in transformations) {
var source = inverse ? transformation.TransformedVariable : transformation.OriginalVariable;
var target = inverse ? transformation.OriginalVariable : transformation.TransformedVariable;
if (reachableVariables.Contains(source))
reachableVariables.Add(target);
}
return reachableVariables;
}
public static string GetLastTransitiveVariable(string variable, IEnumerable transformations, bool inverse = false) {
if (inverse) transformations = transformations.Reverse();
foreach (var transformation in transformations) {
var source = inverse ? transformation.TransformedVariable : transformation.OriginalVariable;
var target = inverse ? transformation.OriginalVariable : transformation.TransformedVariable;
if (variable == source)
variable = target;
}
return variable;
}
#endregion
#region Transform Dataset
public static IDataset Transform(IDataset dataset, IEnumerable transformations) {
var modifiableDataset = ((Dataset)dataset).ToModifiable();
foreach (var transformation in transformations) {
var trans = (ITransformation)transformation.Transformation;
var originalData = modifiableDataset.GetDoubleValues(transformation.OriginalVariable);
//if (!trans.Check(originalData, out string errorMessage))
// throw new InvalidOperationException($"Cannot estimate Values, Transformation is invalid: {errorMessage}");
// TODO: check was already called before configure (in preprocessing)
// TODO: newly specified data might not pass the check but it does not matter because the data is not configured with
// e.g. impact calculation -> replacement=most common -> originalMean is zero
var transformedData = trans.Apply(originalData).ToList();
if (modifiableDataset.VariableNames.Contains(transformation.TransformedVariable))
modifiableDataset.ReplaceVariable(transformation.TransformedVariable, transformedData);
else
modifiableDataset.AddVariable(transformation.TransformedVariable, transformedData);
}
return modifiableDataset; // TODO: to regular dataset?
}
public static IDataset InverseTransform(IDataset dataset, IEnumerable transformations, bool removeVirtualVariables = true) {
var modifiableDataset = ((Dataset)dataset).ToModifiable();
var transformationsStack = new Stack(transformations);
while (transformationsStack.Any()) {
var transformation = transformationsStack.Pop();
var trans = (ITransformation)transformation.Transformation;
var prevTransformations = transformations.Except(transformationsStack);
bool originalWasChanged = prevTransformations.Any(x => x.TransformedVariable == transformation.OriginalVariable);
if (originalWasChanged) {
var transformedData = modifiableDataset.GetDoubleValues(transformation.TransformedVariable);
var originalData = trans.InverseApply(transformedData).ToList();
modifiableDataset.ReplaceVariable(transformation.OriginalVariable, originalData);
}
}
if (removeVirtualVariables) {
var originalVariables = ReduceVariables(dataset.VariableNames, transformations);
var virtualVariables = dataset.VariableNames.Except(originalVariables);
foreach (var virtualVariable in virtualVariables)
modifiableDataset.RemoveVariable(virtualVariable);
}
return modifiableDataset; // TODO: to regular dataset?
}
#endregion
#region Transform ProblemData
public static IDataAnalysisProblemData ApplyTransformations(IDataAnalysisProblemData problemData) {
var newDataset = Transform(problemData.Dataset, problemData.Transformations);
var extendedInputs = ExtendVariables(problemData.AllowedInputVariables, problemData.Transformations);
return CreateNewProblemData(problemData, newDataset, extendedInputs, inverse: false);
}
public static IDataAnalysisProblemData InverseApplyTransformations(IDataAnalysisProblemData problemData) {
var newDataset = InverseTransform(problemData.Dataset, problemData.Transformations);
var reducedInputs = ReduceVariables(problemData.AllowedInputVariables, problemData.Transformations);
return CreateNewProblemData(problemData, newDataset, reducedInputs, inverse: true);
}
private static IDataAnalysisProblemData CreateNewProblemData(IDataAnalysisProblemData problemData, IDataset dataset, IEnumerable inputs, bool inverse = false) {
IDataAnalysisProblemData newProblemData;
if (problemData is IRegressionProblemData regressionProblemData) {
var newTargetVariable = GetLastTransitiveVariable(regressionProblemData.TargetVariable, problemData.Transformations, inverse);
newProblemData = new RegressionProblemData(dataset, inputs, newTargetVariable, problemData.Transformations);
} else if (problemData is IClassificationProblemData classificationProblemData) {
newProblemData = new ClassificationProblemData(dataset, inputs, classificationProblemData.TargetVariable, problemData.Transformations);
} else throw new NotSupportedException("Type of ProblemData not supported");
newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
newProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
newProblemData.TestPartition.Start = problemData.TestPartition.Start;
newProblemData.TestPartition.End = problemData.TestPartition.End;
return newProblemData;
}
#endregion
#region Transform Model
public static IDataAnalysisTransformationModel CreateTransformationIntegratedModel(IDataAnalysisModel model, IEnumerable transformations) {
if (model is IDataAnalysisTransformationModel)
throw new InvalidOperationException("Model already is a transformation model.");
switch (model) {
case ITimeSeriesPrognosisModel timeSeriesPrognosisModel:
return new TimeSeriesPrognosisTransformationModel(timeSeriesPrognosisModel, transformations);
case IRegressionModel regressionModel:
return new RegressionTransformationModel(regressionModel, transformations);
case IClassificationModel classificationModel:
return new ClassificationTransformationModel(classificationModel, transformations);
case IClusteringModel clusteringModel:
return new ClusteringTransformationModel(clusteringModel, transformations);
default:
throw new NotSupportedException("Type of the model is not supported;");
}
}
public static IDataAnalysisModel RestoreTrainedModel(IDataAnalysisModel transformationModel, IEnumerable transformations) {
if (!(transformationModel is IDataAnalysisTransformationModel model))
throw new InvalidOperationException("Cannot restore because model is not a TransformationModel");
return model.OriginalModel;
}
#endregion
#region Transform Solution
public static IDataAnalysisSolution TransformSolution(IDataAnalysisSolution solution) {
var transformations = solution.ProblemData.Transformations;
var model = solution.Model is IDataAnalysisTransformationModel // TODO: what if model is a integrated sym-reg model?
? RestoreTrainedModel(solution.Model, transformations)
: CreateTransformationIntegratedModel(solution.Model, transformations);
var data = solution.Model is IDataAnalysisTransformationModel
? ApplyTransformations(solution.ProblemData) // original -> transformed
: InverseApplyTransformations(solution.ProblemData); // transformed -> original
return CreateSolution(model, data);
}
private static IDataAnalysisSolution CreateSolution(IDataAnalysisModel model, IDataAnalysisProblemData problemData) {
switch (model) {
case ITimeSeriesPrognosisModel timeSeriesPrognosisModel:
return timeSeriesPrognosisModel.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData);
case IRegressionModel regressionModel:
return regressionModel.CreateRegressionSolution((IRegressionProblemData)problemData);
case IClassificationModel classificationModel:
return classificationModel.CreateClassificationSolution((IClassificationProblemData)problemData);
default:
throw new NotSupportedException("Cannot create Solution of the model type.");
}
}
#endregion
#endregion
}
}