#region License Information /* HeuristicLab * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis { [Item("Transformation", "A transformation applied to a DataAnalysisProblemData")] [StorableClass] public sealed class DataAnalysisTransformation : ParameterizedNamedItem, IDataAnalysisTransformation { #region Parameter Properties private IFixedValueParameter OriginalVariableParameter { get { return (IFixedValueParameter)Parameters["Original Variable"]; } } private IFixedValueParameter TransformedVariableParameter { get { return (IFixedValueParameter)Parameters["Transformed Variable"]; } } private ValueParameter TransformationParameter { get { return (ValueParameter)Parameters["Transformation"]; } } #endregion #region Properties public string OriginalVariable { get { return OriginalVariableParameter.Value.Value; } } public string TransformedVariable { get { return TransformedVariableParameter.Value.Value; } } public ITransformation Transformation { get { return TransformationParameter.Value; } } #endregion #region Constructor, Cloning & Persistence public DataAnalysisTransformation(string originalVariable, string transformedVariable, ITransformation transformation) : base() { Parameters.Add(new FixedValueParameter("Original Variable", new StringValue(originalVariable).AsReadOnly())); Parameters.Add(new FixedValueParameter("Transformed Variable", new StringValue(transformedVariable).AsReadOnly())); Parameters.Add(new ValueParameter("Transformation", transformation)); // TODO: should be readonly/fixed } private DataAnalysisTransformation(DataAnalysisTransformation original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new DataAnalysisTransformation(this, cloner); } [StorableConstructor] private DataAnalysisTransformation(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] #endregion public override string ToString() { return $"{Transformation} ({OriginalVariable} -> {TransformedVariable})"; } #region Transformation #region Variable Extension & Reduction // originals => include extended public static IEnumerable ExtendVariables(IEnumerable variables, IEnumerable transformations) { return GetTransitiveVariables(variables, transformations); } // extended => originals public static IEnumerable ReduceVariables(IEnumerable variables, IEnumerable transformations) { var originalVariables = new HashSet(); foreach (var variable in variables) originalVariables.Add(GetLastTransitiveVariable(variable, transformations, inverse: true)); return originalVariables; } public static IEnumerable GetTransitiveVariables(IEnumerable variables, IEnumerable transformations, bool inverse = false) { var reachableVariables = new HashSet(variables); if (inverse) transformations = transformations.Reverse(); foreach (var transformation in transformations) { var source = inverse ? transformation.TransformedVariable : transformation.OriginalVariable; var target = inverse ? transformation.OriginalVariable : transformation.TransformedVariable; if (reachableVariables.Contains(source)) reachableVariables.Add(target); } return reachableVariables; } public static string GetLastTransitiveVariable(string variable, IEnumerable transformations, bool inverse = false) { if (inverse) transformations = transformations.Reverse(); foreach (var transformation in transformations) { var source = inverse ? transformation.TransformedVariable : transformation.OriginalVariable; var target = inverse ? transformation.OriginalVariable : transformation.TransformedVariable; if (variable == source) variable = target; } return variable; } #endregion #region Transform Dataset public static IDataset Transform(IDataset dataset, IEnumerable transformations) { var modifiableDataset = ((Dataset)dataset).ToModifiable(); foreach (var transformation in transformations) { var trans = (ITransformation)transformation.Transformation; var originalData = modifiableDataset.GetDoubleValues(transformation.OriginalVariable); //if (!trans.Check(originalData, out string errorMessage)) // throw new InvalidOperationException($"Cannot estimate Values, Transformation is invalid: {errorMessage}"); // TODO: check was already called before configure (in preprocessing) // TODO: newly specified data might not pass the check but it does not matter because the data is not configured with // e.g. impact calculation -> replacement=most common -> originalMean is zero var transformedData = trans.Apply(originalData).ToList(); if (modifiableDataset.VariableNames.Contains(transformation.TransformedVariable)) modifiableDataset.ReplaceVariable(transformation.TransformedVariable, transformedData); else modifiableDataset.AddVariable(transformation.TransformedVariable, transformedData); } return modifiableDataset; // TODO: to regular dataset? } public static IDataset InverseTransform(IDataset dataset, IEnumerable transformations, bool removeVirtualVariables = true) { var modifiableDataset = ((Dataset)dataset).ToModifiable(); var transformationsStack = new Stack(transformations); while (transformationsStack.Any()) { var transformation = transformationsStack.Pop(); var trans = (ITransformation)transformation.Transformation; var prevTransformations = transformations.Except(transformationsStack); bool originalWasChanged = prevTransformations.Any(x => x.TransformedVariable == transformation.OriginalVariable); if (originalWasChanged) { var transformedData = modifiableDataset.GetDoubleValues(transformation.TransformedVariable); var originalData = trans.InverseApply(transformedData).ToList(); modifiableDataset.ReplaceVariable(transformation.OriginalVariable, originalData); } } if (removeVirtualVariables) { var originalVariables = ReduceVariables(dataset.VariableNames, transformations); var virtualVariables = dataset.VariableNames.Except(originalVariables); foreach (var virtualVariable in virtualVariables) modifiableDataset.RemoveVariable(virtualVariable); } return modifiableDataset; // TODO: to regular dataset? } #endregion #region Transform ProblemData public static IDataAnalysisProblemData ApplyTransformations(IDataAnalysisProblemData problemData) { var newDataset = Transform(problemData.Dataset, problemData.Transformations); var extendedInputs = ExtendVariables(problemData.AllowedInputVariables, problemData.Transformations); return CreateNewProblemData(problemData, newDataset, extendedInputs, inverse: false); } public static IDataAnalysisProblemData InverseApplyTransformations(IDataAnalysisProblemData problemData) { var newDataset = InverseTransform(problemData.Dataset, problemData.Transformations); var reducedInputs = ReduceVariables(problemData.AllowedInputVariables, problemData.Transformations); return CreateNewProblemData(problemData, newDataset, reducedInputs, inverse: true); } private static IDataAnalysisProblemData CreateNewProblemData(IDataAnalysisProblemData problemData, IDataset dataset, IEnumerable inputs, bool inverse = false) { IDataAnalysisProblemData newProblemData; if (problemData is IRegressionProblemData regressionProblemData) { var newTargetVariable = GetLastTransitiveVariable(regressionProblemData.TargetVariable, problemData.Transformations, inverse); newProblemData = new RegressionProblemData(dataset, inputs, newTargetVariable, problemData.Transformations); } else if (problemData is IClassificationProblemData classificationProblemData) { newProblemData = new ClassificationProblemData(dataset, inputs, classificationProblemData.TargetVariable, problemData.Transformations); } else throw new NotSupportedException("Type of ProblemData not supported"); newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start; newProblemData.TrainingPartition.End = problemData.TrainingPartition.End; newProblemData.TestPartition.Start = problemData.TestPartition.Start; newProblemData.TestPartition.End = problemData.TestPartition.End; return newProblemData; } #endregion #region Transform Model public static IDataAnalysisTransformationModel CreateTransformationIntegratedModel(IDataAnalysisModel model, IEnumerable transformations) { if (model is IDataAnalysisTransformationModel) throw new InvalidOperationException("Model already is a transformation model."); switch (model) { case ITimeSeriesPrognosisModel timeSeriesPrognosisModel: return new TimeSeriesPrognosisTransformationModel(timeSeriesPrognosisModel, transformations); case IRegressionModel regressionModel: return new RegressionTransformationModel(regressionModel, transformations); case IClassificationModel classificationModel: return new ClassificationTransformationModel(classificationModel, transformations); case IClusteringModel clusteringModel: return new ClusteringTransformationModel(clusteringModel, transformations); default: throw new NotSupportedException("Type of the model is not supported;"); } } public static IDataAnalysisModel RestoreTrainedModel(IDataAnalysisModel transformationModel, IEnumerable transformations) { if (!(transformationModel is IDataAnalysisTransformationModel model)) throw new InvalidOperationException("Cannot restore because model is not a TransformationModel"); return model.OriginalModel; } #endregion #region Transform Solution public static IDataAnalysisSolution TransformSolution(IDataAnalysisSolution solution) { var transformations = solution.ProblemData.Transformations; var model = solution.Model is IDataAnalysisTransformationModel // TODO: what if model is a integrated sym-reg model? ? RestoreTrainedModel(solution.Model, transformations) : CreateTransformationIntegratedModel(solution.Model, transformations); var data = solution.Model is IDataAnalysisTransformationModel ? ApplyTransformations(solution.ProblemData) // original -> transformed : InverseApplyTransformations(solution.ProblemData); // transformed -> original return CreateSolution(model, data); } private static IDataAnalysisSolution CreateSolution(IDataAnalysisModel model, IDataAnalysisProblemData problemData) { switch (model) { case ITimeSeriesPrognosisModel timeSeriesPrognosisModel: return timeSeriesPrognosisModel.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData); case IRegressionModel regressionModel: return regressionModel.CreateRegressionSolution((IRegressionProblemData)problemData); case IClassificationModel classificationModel: return classificationModel.CreateClassificationSolution((IClassificationProblemData)problemData); default: throw new NotSupportedException("Cannot create Solution of the model type."); } } #endregion #endregion } }