[15846] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
| 3 | * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
| 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
[15884] | 22 | using System;
|
---|
| 23 | using System.Collections.Generic;
|
---|
| 24 | using System.Linq;
|
---|
[15846] | 25 | using HeuristicLab.Common;
|
---|
| 26 | using HeuristicLab.Core;
|
---|
| 27 | using HeuristicLab.Data;
|
---|
| 28 | using HeuristicLab.Parameters;
|
---|
| 29 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
| 30 |
|
---|
| 31 | namespace HeuristicLab.Problems.DataAnalysis {
|
---|
| 32 | [Item("Transformation", "A transformation applied to a DataAnalysisProblemData")]
|
---|
| 33 | [StorableClass]
|
---|
[15879] | 34 | public sealed class DataAnalysisTransformation : ParameterizedNamedItem, IDataAnalysisTransformation {
|
---|
[15865] | 35 | #region Parameter Properties
|
---|
| 36 | private IFixedValueParameter<StringValue> OriginalVariableParameter {
|
---|
| 37 | get { return (IFixedValueParameter<StringValue>)Parameters["Original Variable"]; }
|
---|
[15846] | 38 | }
|
---|
| 39 |
|
---|
[15865] | 40 | private IFixedValueParameter<StringValue> TransformedVariableParameter {
|
---|
| 41 | get { return (IFixedValueParameter<StringValue>)Parameters["Transformed Variable"]; }
|
---|
[15846] | 42 | }
|
---|
| 43 |
|
---|
[15870] | 44 | private ValueParameter<ITransformation> TransformationParameter {
|
---|
| 45 | get { return (ValueParameter<ITransformation>)Parameters["Transformation"]; }
|
---|
[15846] | 46 | }
|
---|
| 47 | #endregion
|
---|
| 48 |
|
---|
| 49 | #region Properties
|
---|
| 50 | public string OriginalVariable {
|
---|
| 51 | get { return OriginalVariableParameter.Value.Value; }
|
---|
| 52 | }
|
---|
| 53 |
|
---|
| 54 | public string TransformedVariable {
|
---|
| 55 | get { return TransformedVariableParameter.Value.Value; }
|
---|
| 56 | }
|
---|
| 57 |
|
---|
| 58 | public ITransformation Transformation {
|
---|
| 59 | get { return TransformationParameter.Value; }
|
---|
| 60 | }
|
---|
| 61 | #endregion
|
---|
| 62 |
|
---|
| 63 | #region Constructor, Cloning & Persistence
|
---|
[15865] | 64 | public DataAnalysisTransformation(string originalVariable, string transformedVariable, ITransformation transformation)
|
---|
[15846] | 65 | : base() {
|
---|
[15870] | 66 | Parameters.Add(new FixedValueParameter<StringValue>("Original Variable", new StringValue(originalVariable).AsReadOnly()));
|
---|
| 67 | Parameters.Add(new FixedValueParameter<StringValue>("Transformed Variable", new StringValue(transformedVariable).AsReadOnly()));
|
---|
| 68 | Parameters.Add(new ValueParameter<ITransformation>("Transformation", transformation)); // TODO: should be readonly/fixed
|
---|
[15846] | 69 | }
|
---|
| 70 |
|
---|
[15879] | 71 | private DataAnalysisTransformation(DataAnalysisTransformation original, Cloner cloner)
|
---|
[15884] | 72 | : base(original, cloner) { }
|
---|
[15846] | 73 |
|
---|
| 74 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 75 | return new DataAnalysisTransformation(this, cloner);
|
---|
| 76 | }
|
---|
| 77 |
|
---|
| 78 | [StorableConstructor]
|
---|
[15879] | 79 | private DataAnalysisTransformation(bool deserializing)
|
---|
[15846] | 80 | : base(deserializing) { }
|
---|
| 81 |
|
---|
| 82 | [StorableHook(HookType.AfterDeserialization)]
|
---|
| 83 | #endregion
|
---|
| 84 |
|
---|
| 85 | public override string ToString() {
|
---|
| 86 | return $"{Transformation} ({OriginalVariable} -> {TransformedVariable})";
|
---|
| 87 | }
|
---|
[15884] | 88 |
|
---|
| 89 | #region Transformation
|
---|
| 90 |
|
---|
| 91 | #region Variable Extension & Reduction
|
---|
| 92 | // originals => include extended
|
---|
| 93 | public static IEnumerable<string> ExtendVariables(IEnumerable<string> variables, IEnumerable<IDataAnalysisTransformation> transformations) {
|
---|
| 94 | return GetTransitiveVariables(variables, transformations);
|
---|
| 95 | }
|
---|
| 96 |
|
---|
| 97 | // extended => originals
|
---|
| 98 | public static IEnumerable<string> ReduceVariables(IEnumerable<string> variables, IEnumerable<IDataAnalysisTransformation> transformations) {
|
---|
| 99 | var originalVariables = new HashSet<string>();
|
---|
| 100 | foreach (var variable in variables)
|
---|
| 101 | originalVariables.Add(GetLastTransitiveVariable(variable, transformations, inverse: true));
|
---|
| 102 | return originalVariables;
|
---|
| 103 | }
|
---|
| 104 |
|
---|
| 105 | public static IEnumerable<string> GetTransitiveVariables(IEnumerable<string> variables, IEnumerable<IDataAnalysisTransformation> transformations, bool inverse = false) {
|
---|
| 106 | var reachableVariables = new HashSet<string>(variables);
|
---|
| 107 | if (inverse) transformations = transformations.Reverse();
|
---|
| 108 | foreach (var transformation in transformations) {
|
---|
| 109 | var source = inverse ? transformation.TransformedVariable : transformation.OriginalVariable;
|
---|
| 110 | var target = inverse ? transformation.OriginalVariable : transformation.TransformedVariable;
|
---|
| 111 | if (reachableVariables.Contains(source))
|
---|
| 112 | reachableVariables.Add(target);
|
---|
| 113 | }
|
---|
| 114 |
|
---|
| 115 | return reachableVariables;
|
---|
| 116 | }
|
---|
| 117 |
|
---|
| 118 | public static string GetLastTransitiveVariable(string variable, IEnumerable<IDataAnalysisTransformation> transformations, bool inverse = false) {
|
---|
| 119 | if (inverse) transformations = transformations.Reverse();
|
---|
| 120 | foreach (var transformation in transformations) {
|
---|
| 121 | var source = inverse ? transformation.TransformedVariable : transformation.OriginalVariable;
|
---|
| 122 | var target = inverse ? transformation.OriginalVariable : transformation.TransformedVariable;
|
---|
| 123 | if (variable == source)
|
---|
| 124 | variable = target;
|
---|
| 125 | }
|
---|
| 126 |
|
---|
| 127 | return variable;
|
---|
| 128 | }
|
---|
| 129 | #endregion
|
---|
| 130 |
|
---|
| 131 | #region Transform Dataset
|
---|
| 132 | public static IDataset Transform(IDataset dataset, IEnumerable<IDataAnalysisTransformation> transformations) {
|
---|
| 133 | var modifiableDataset = ((Dataset)dataset).ToModifiable();
|
---|
| 134 |
|
---|
| 135 | foreach (var transformation in transformations) {
|
---|
| 136 | var trans = (ITransformation<double>)transformation.Transformation;
|
---|
| 137 |
|
---|
| 138 | var originalData = modifiableDataset.GetDoubleValues(transformation.OriginalVariable);
|
---|
| 139 | //if (!trans.Check(originalData, out string errorMessage))
|
---|
| 140 | // throw new InvalidOperationException($"Cannot estimate Values, Transformation is invalid: {errorMessage}");
|
---|
| 141 | // TODO: check was already called before configure (in preprocessing)
|
---|
| 142 | // TODO: newly specified data might not pass the check but it does not matter because the data is not configured with
|
---|
| 143 | // e.g. impact calculation -> replacement=most common -> originalMean is zero
|
---|
| 144 |
|
---|
| 145 | var transformedData = trans.Apply(originalData).ToList();
|
---|
| 146 | if (modifiableDataset.VariableNames.Contains(transformation.TransformedVariable))
|
---|
| 147 | modifiableDataset.ReplaceVariable(transformation.TransformedVariable, transformedData);
|
---|
| 148 | else
|
---|
| 149 | modifiableDataset.AddVariable(transformation.TransformedVariable, transformedData);
|
---|
| 150 | }
|
---|
| 151 |
|
---|
| 152 | return modifiableDataset; // TODO: to regular dataset?
|
---|
| 153 | }
|
---|
| 154 |
|
---|
| 155 | public static IDataset InverseTransform(IDataset dataset, IEnumerable<IDataAnalysisTransformation> transformations, bool removeVirtualVariables = true) {
|
---|
| 156 | var modifiableDataset = ((Dataset)dataset).ToModifiable();
|
---|
| 157 |
|
---|
| 158 | var transformationsStack = new Stack<IDataAnalysisTransformation>(transformations);
|
---|
| 159 | while (transformationsStack.Any()) {
|
---|
| 160 | var transformation = transformationsStack.Pop();
|
---|
| 161 | var trans = (ITransformation<double>)transformation.Transformation;
|
---|
| 162 |
|
---|
| 163 | var prevTransformations = transformations.Except(transformationsStack);
|
---|
| 164 | bool originalWasChanged = prevTransformations.Any(x => x.TransformedVariable == transformation.OriginalVariable);
|
---|
| 165 | if (originalWasChanged) {
|
---|
| 166 | var transformedData = modifiableDataset.GetDoubleValues(transformation.TransformedVariable);
|
---|
| 167 |
|
---|
| 168 | var originalData = trans.InverseApply(transformedData).ToList();
|
---|
| 169 | modifiableDataset.ReplaceVariable(transformation.OriginalVariable, originalData);
|
---|
| 170 | }
|
---|
| 171 | }
|
---|
| 172 |
|
---|
| 173 | if (removeVirtualVariables) {
|
---|
| 174 | var originalVariables = ReduceVariables(dataset.VariableNames, transformations);
|
---|
| 175 | var virtualVariables = dataset.VariableNames.Except(originalVariables);
|
---|
| 176 | foreach (var virtualVariable in virtualVariables)
|
---|
| 177 | modifiableDataset.RemoveVariable(virtualVariable);
|
---|
| 178 | }
|
---|
| 179 |
|
---|
| 180 | return modifiableDataset; // TODO: to regular dataset?
|
---|
| 181 | }
|
---|
| 182 | #endregion
|
---|
| 183 |
|
---|
| 184 | #region Transform ProblemData
|
---|
| 185 | public static IDataAnalysisProblemData ApplyTransformations(IDataAnalysisProblemData problemData) {
|
---|
| 186 | var newDataset = Transform(problemData.Dataset, problemData.Transformations);
|
---|
| 187 | var extendedInputs = ExtendVariables(problemData.AllowedInputVariables, problemData.Transformations);
|
---|
| 188 |
|
---|
| 189 | return CreateNewProblemData(problemData, newDataset, extendedInputs, inverse: false);
|
---|
| 190 | }
|
---|
| 191 |
|
---|
| 192 | public static IDataAnalysisProblemData InverseApplyTransformations(IDataAnalysisProblemData problemData) {
|
---|
| 193 | var newDataset = InverseTransform(problemData.Dataset, problemData.Transformations);
|
---|
| 194 | var reducedInputs = ReduceVariables(problemData.AllowedInputVariables, problemData.Transformations);
|
---|
| 195 |
|
---|
| 196 | return CreateNewProblemData(problemData, newDataset, reducedInputs, inverse: true);
|
---|
| 197 | }
|
---|
| 198 |
|
---|
| 199 | private static IDataAnalysisProblemData CreateNewProblemData(IDataAnalysisProblemData problemData, IDataset dataset, IEnumerable<string> inputs, bool inverse = false) {
|
---|
| 200 | IDataAnalysisProblemData newProblemData;
|
---|
| 201 | if (problemData is IRegressionProblemData regressionProblemData) {
|
---|
| 202 | var newTargetVariable = GetLastTransitiveVariable(regressionProblemData.TargetVariable, problemData.Transformations, inverse);
|
---|
| 203 | newProblemData = new RegressionProblemData(dataset, inputs, newTargetVariable, problemData.Transformations);
|
---|
| 204 | } else if (problemData is IClassificationProblemData classificationProblemData) {
|
---|
| 205 | newProblemData = new ClassificationProblemData(dataset, inputs, classificationProblemData.TargetVariable, problemData.Transformations);
|
---|
| 206 | } else throw new NotSupportedException("Type of ProblemData not supported");
|
---|
| 207 |
|
---|
| 208 | newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
|
---|
| 209 | newProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
|
---|
| 210 | newProblemData.TestPartition.Start = problemData.TestPartition.Start;
|
---|
| 211 | newProblemData.TestPartition.End = problemData.TestPartition.End;
|
---|
| 212 |
|
---|
| 213 | return newProblemData;
|
---|
| 214 | }
|
---|
| 215 | #endregion
|
---|
| 216 |
|
---|
| 217 | #region Transform Model
|
---|
| 218 | public static IDataAnalysisTransformationModel CreateTransformationIntegratedModel(IDataAnalysisModel model, IEnumerable<IDataAnalysisTransformation> transformations) {
|
---|
| 219 | if (model is IDataAnalysisTransformationModel)
|
---|
| 220 | throw new InvalidOperationException("Model already is a transformation model.");
|
---|
| 221 |
|
---|
| 222 | switch (model) {
|
---|
| 223 | case ITimeSeriesPrognosisModel timeSeriesPrognosisModel:
|
---|
| 224 | return new TimeSeriesPrognosisTransformationModel(timeSeriesPrognosisModel, transformations);
|
---|
| 225 | case IRegressionModel regressionModel:
|
---|
| 226 | return new RegressionTransformationModel(regressionModel, transformations);
|
---|
| 227 | case IClassificationModel classificationModel:
|
---|
| 228 | return new ClassificationTransformationModel(classificationModel, transformations);
|
---|
| 229 | case IClusteringModel clusteringModel:
|
---|
| 230 | return new ClusteringTransformationModel(clusteringModel, transformations);
|
---|
| 231 | default:
|
---|
| 232 | throw new NotSupportedException("Type of the model is not supported;");
|
---|
| 233 | }
|
---|
| 234 | }
|
---|
| 235 |
|
---|
| 236 | public static IDataAnalysisModel RestoreTrainedModel(IDataAnalysisModel transformationModel, IEnumerable<IDataAnalysisTransformation> transformations) {
|
---|
| 237 | if (!(transformationModel is IDataAnalysisTransformationModel model))
|
---|
| 238 | throw new InvalidOperationException("Cannot restore because model is not a TransformationModel");
|
---|
| 239 | return model.OriginalModel;
|
---|
| 240 | }
|
---|
| 241 | #endregion
|
---|
| 242 |
|
---|
| 243 | #region Transform Solution
|
---|
| 244 | public static IDataAnalysisSolution TransformSolution(IDataAnalysisSolution solution) {
|
---|
| 245 | var transformations = solution.ProblemData.Transformations;
|
---|
| 246 |
|
---|
| 247 | var model = solution.Model is IDataAnalysisTransformationModel // TODO: what if model is a integrated sym-reg model?
|
---|
| 248 | ? RestoreTrainedModel(solution.Model, transformations)
|
---|
| 249 | : CreateTransformationIntegratedModel(solution.Model, transformations);
|
---|
| 250 |
|
---|
| 251 | var data = solution.Model is IDataAnalysisTransformationModel
|
---|
| 252 | ? ApplyTransformations(solution.ProblemData) // original -> transformed
|
---|
| 253 | : InverseApplyTransformations(solution.ProblemData); // transformed -> original
|
---|
| 254 |
|
---|
| 255 | return CreateSolution(model, data);
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 | private static IDataAnalysisSolution CreateSolution(IDataAnalysisModel model, IDataAnalysisProblemData problemData) {
|
---|
| 259 | switch (model) {
|
---|
| 260 | case ITimeSeriesPrognosisModel timeSeriesPrognosisModel:
|
---|
| 261 | return timeSeriesPrognosisModel.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData);
|
---|
| 262 | case IRegressionModel regressionModel:
|
---|
| 263 | return regressionModel.CreateRegressionSolution((IRegressionProblemData)problemData);
|
---|
| 264 | case IClassificationModel classificationModel:
|
---|
| 265 | return classificationModel.CreateClassificationSolution((IClassificationProblemData)problemData);
|
---|
| 266 | default:
|
---|
| 267 | throw new NotSupportedException("Cannot create Solution of the model type.");
|
---|
| 268 | }
|
---|
| 269 | }
|
---|
| 270 | #endregion
|
---|
| 271 |
|
---|
| 272 | #endregion
|
---|
[15846] | 273 | }
|
---|
| 274 | } |
---|