source: branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisTransformation.cs @ 15884

Last change on this file since 15884 was 15884, checked in by pfleck, 4 years ago

#2906 Refactoring

  • Moved transformation-specific parts out of existing interfaces.
  • Moved all Transformation logic to DataAnalysisTransformation.
  • Simplified (Inverse)Transformation of Dataset/ProblemData/Model/Solution.
File size: 13.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30
31namespace HeuristicLab.Problems.DataAnalysis {
32  [Item("Transformation", "A transformation applied to a DataAnalysisProblemData")]
33  [StorableClass]
34  public sealed class DataAnalysisTransformation : ParameterizedNamedItem, IDataAnalysisTransformation {
35    #region Parameter Properties
36    private IFixedValueParameter<StringValue> OriginalVariableParameter {
37      get { return (IFixedValueParameter<StringValue>)Parameters["Original Variable"]; }
38    }
39
40    private IFixedValueParameter<StringValue> TransformedVariableParameter {
41      get { return (IFixedValueParameter<StringValue>)Parameters["Transformed Variable"]; }
42    }
43
44    private ValueParameter<ITransformation> TransformationParameter {
45      get { return (ValueParameter<ITransformation>)Parameters["Transformation"]; }
46    }
47    #endregion
48
49    #region Properties
50    public string OriginalVariable {
51      get { return OriginalVariableParameter.Value.Value; }
52    }
53
54    public string TransformedVariable {
55      get { return TransformedVariableParameter.Value.Value; }
56    }
57
58    public ITransformation Transformation {
59      get { return TransformationParameter.Value; }
60    }
61    #endregion
62
63    #region Constructor, Cloning & Persistence
64    public DataAnalysisTransformation(string originalVariable, string transformedVariable, ITransformation transformation)
65      : base() {
66      Parameters.Add(new FixedValueParameter<StringValue>("Original Variable", new StringValue(originalVariable).AsReadOnly()));
67      Parameters.Add(new FixedValueParameter<StringValue>("Transformed Variable", new StringValue(transformedVariable).AsReadOnly()));
68      Parameters.Add(new ValueParameter<ITransformation>("Transformation", transformation)); // TODO: should be readonly/fixed
69    }
70
71    private DataAnalysisTransformation(DataAnalysisTransformation original, Cloner cloner)
72      : base(original, cloner) { }
73
74    public override IDeepCloneable Clone(Cloner cloner) {
75      return new DataAnalysisTransformation(this, cloner);
76    }
77
78    [StorableConstructor]
79    private DataAnalysisTransformation(bool deserializing)
80      : base(deserializing) { }
81
82    [StorableHook(HookType.AfterDeserialization)]
83    #endregion
84
85    public override string ToString() {
86      return $"{Transformation} ({OriginalVariable} -> {TransformedVariable})";
87    }
88
89    #region Transformation
90
91    #region Variable Extension & Reduction
92    // originals => include extended
93    public static IEnumerable<string> ExtendVariables(IEnumerable<string> variables, IEnumerable<IDataAnalysisTransformation> transformations) {
94      return GetTransitiveVariables(variables, transformations);
95    }
96
97    // extended => originals
98    public static IEnumerable<string> ReduceVariables(IEnumerable<string> variables, IEnumerable<IDataAnalysisTransformation> transformations) {
99      var originalVariables = new HashSet<string>();
100      foreach (var variable in variables)
101        originalVariables.Add(GetLastTransitiveVariable(variable, transformations, inverse: true));
102      return originalVariables;
103    }
104
105    public static IEnumerable<string> GetTransitiveVariables(IEnumerable<string> variables, IEnumerable<IDataAnalysisTransformation> transformations, bool inverse = false) {
106      var reachableVariables = new HashSet<string>(variables);
107      if (inverse) transformations = transformations.Reverse();
108      foreach (var transformation in transformations) {
109        var source = inverse ? transformation.TransformedVariable : transformation.OriginalVariable;
110        var target = inverse ? transformation.OriginalVariable : transformation.TransformedVariable;
111        if (reachableVariables.Contains(source))
112          reachableVariables.Add(target);
113      }
114
115      return reachableVariables;
116    }
117
118    public static string GetLastTransitiveVariable(string variable, IEnumerable<IDataAnalysisTransformation> transformations, bool inverse = false) {
119      if (inverse) transformations = transformations.Reverse();
120      foreach (var transformation in transformations) {
121        var source = inverse ? transformation.TransformedVariable : transformation.OriginalVariable;
122        var target = inverse ? transformation.OriginalVariable : transformation.TransformedVariable;
123        if (variable == source)
124          variable = target;
125      }
126
127      return variable;
128    }
129    #endregion
130
131    #region Transform Dataset
132    public static IDataset Transform(IDataset dataset, IEnumerable<IDataAnalysisTransformation> transformations) {
133      var modifiableDataset = ((Dataset)dataset).ToModifiable();
134
135      foreach (var transformation in transformations) {
136        var trans = (ITransformation<double>)transformation.Transformation;
137
138        var originalData = modifiableDataset.GetDoubleValues(transformation.OriginalVariable);
139        //if (!trans.Check(originalData, out string errorMessage))
140        //  throw new InvalidOperationException($"Cannot estimate Values, Transformation is invalid: {errorMessage}");
141        // TODO: check was already called before configure (in preprocessing)
142        // TODO: newly specified data might not pass the check but it does not matter because the data is not configured with
143        // e.g. impact calculation -> replacement=most common -> originalMean is zero
144
145        var transformedData = trans.Apply(originalData).ToList();
146        if (modifiableDataset.VariableNames.Contains(transformation.TransformedVariable))
147          modifiableDataset.ReplaceVariable(transformation.TransformedVariable, transformedData);
148        else
149          modifiableDataset.AddVariable(transformation.TransformedVariable, transformedData);
150      }
151
152      return modifiableDataset; // TODO: to regular dataset?
153    }
154
155    public static IDataset InverseTransform(IDataset dataset, IEnumerable<IDataAnalysisTransformation> transformations, bool removeVirtualVariables = true) {
156      var modifiableDataset = ((Dataset)dataset).ToModifiable();
157
158      var transformationsStack = new Stack<IDataAnalysisTransformation>(transformations);
159      while (transformationsStack.Any()) {
160        var transformation = transformationsStack.Pop();
161        var trans = (ITransformation<double>)transformation.Transformation;
162
163        var prevTransformations = transformations.Except(transformationsStack);
164        bool originalWasChanged = prevTransformations.Any(x => x.TransformedVariable == transformation.OriginalVariable);
165        if (originalWasChanged) {
166          var transformedData = modifiableDataset.GetDoubleValues(transformation.TransformedVariable);
167
168          var originalData = trans.InverseApply(transformedData).ToList();
169          modifiableDataset.ReplaceVariable(transformation.OriginalVariable, originalData);
170        }
171      }
172
173      if (removeVirtualVariables) {
174        var originalVariables = ReduceVariables(dataset.VariableNames, transformations);
175        var virtualVariables = dataset.VariableNames.Except(originalVariables);
176        foreach (var virtualVariable in virtualVariables)
177          modifiableDataset.RemoveVariable(virtualVariable);
178      }
179
180      return modifiableDataset; // TODO: to regular dataset?
181    }
182    #endregion
183
184    #region Transform ProblemData
185    public static IDataAnalysisProblemData ApplyTransformations(IDataAnalysisProblemData problemData) {
186      var newDataset = Transform(problemData.Dataset, problemData.Transformations);
187      var extendedInputs = ExtendVariables(problemData.AllowedInputVariables, problemData.Transformations);
188
189      return CreateNewProblemData(problemData, newDataset, extendedInputs, inverse: false);
190    }
191
192    public static IDataAnalysisProblemData InverseApplyTransformations(IDataAnalysisProblemData problemData) {
193      var newDataset = InverseTransform(problemData.Dataset, problemData.Transformations);
194      var reducedInputs = ReduceVariables(problemData.AllowedInputVariables, problemData.Transformations);
195
196      return CreateNewProblemData(problemData, newDataset, reducedInputs, inverse: true);
197    }
198
199    private static IDataAnalysisProblemData CreateNewProblemData(IDataAnalysisProblemData problemData, IDataset dataset, IEnumerable<string> inputs, bool inverse = false) {
200      IDataAnalysisProblemData newProblemData;
201      if (problemData is IRegressionProblemData regressionProblemData) {
202        var newTargetVariable = GetLastTransitiveVariable(regressionProblemData.TargetVariable, problemData.Transformations, inverse);
203        newProblemData = new RegressionProblemData(dataset, inputs, newTargetVariable, problemData.Transformations);
204      } else if (problemData is IClassificationProblemData classificationProblemData) {
205        newProblemData = new ClassificationProblemData(dataset, inputs, classificationProblemData.TargetVariable, problemData.Transformations);
206      } else throw new NotSupportedException("Type of ProblemData not supported");
207
208      newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
209      newProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
210      newProblemData.TestPartition.Start = problemData.TestPartition.Start;
211      newProblemData.TestPartition.End = problemData.TestPartition.End;
212
213      return newProblemData;
214    }
215    #endregion
216
217    #region Transform Model
218    public static IDataAnalysisTransformationModel CreateTransformationIntegratedModel(IDataAnalysisModel model, IEnumerable<IDataAnalysisTransformation> transformations) {
219      if (model is IDataAnalysisTransformationModel)
220        throw new InvalidOperationException("Model already is a transformation model.");
221
222      switch (model) {
223        case ITimeSeriesPrognosisModel timeSeriesPrognosisModel:
224          return new TimeSeriesPrognosisTransformationModel(timeSeriesPrognosisModel, transformations);
225        case IRegressionModel regressionModel:
226          return new RegressionTransformationModel(regressionModel, transformations);
227        case IClassificationModel classificationModel:
228          return new ClassificationTransformationModel(classificationModel, transformations);
229        case IClusteringModel clusteringModel:
230          return new ClusteringTransformationModel(clusteringModel, transformations);
231        default:
232          throw new NotSupportedException("Type of the model is not supported;");
233      }
234    }
235
236    public static IDataAnalysisModel RestoreTrainedModel(IDataAnalysisModel transformationModel, IEnumerable<IDataAnalysisTransformation> transformations) {
237      if (!(transformationModel is IDataAnalysisTransformationModel model))
238        throw new InvalidOperationException("Cannot restore because model is not a TransformationModel");
239      return model.OriginalModel;
240    }
241    #endregion
242
243    #region Transform Solution
244    public static IDataAnalysisSolution TransformSolution(IDataAnalysisSolution solution) {
245      var transformations = solution.ProblemData.Transformations;
246
247      var model = solution.Model is IDataAnalysisTransformationModel // TODO: what if model is a integrated sym-reg model?
248        ? RestoreTrainedModel(solution.Model, transformations)
249        : CreateTransformationIntegratedModel(solution.Model, transformations);
250
251      var data = solution.Model is IDataAnalysisTransformationModel
252        ? ApplyTransformations(solution.ProblemData) // original -> transformed
253        : InverseApplyTransformations(solution.ProblemData); // transformed -> original
254
255      return CreateSolution(model, data);
256    }
257
258    private static IDataAnalysisSolution CreateSolution(IDataAnalysisModel model, IDataAnalysisProblemData problemData) {
259      switch (model) {
260        case ITimeSeriesPrognosisModel timeSeriesPrognosisModel:
261          return timeSeriesPrognosisModel.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData);
262        case IRegressionModel regressionModel:
263          return regressionModel.CreateRegressionSolution((IRegressionProblemData)problemData);
264        case IClassificationModel classificationModel:
265          return classificationModel.CreateClassificationSolution((IClassificationProblemData)problemData);
266        default:
267          throw new NotSupportedException("Cannot create Solution of the model type.");
268      }
269    }
270    #endregion
271
272    #endregion
273  }
274}
Note: See TracBrowser for help on using the repository browser.