Changeset 15884


Ignore:
Timestamp:
04/04/18 17:18:02 (12 months ago)
Author:
pfleck
Message:

#2906 Refactoring

  • Moved transformation-specific parts out of existing interfaces.
  • Moved all Transformation logic to DataAnalysisTransformation.
  • Simplified (Inverse)Transformation of Dataset/ProblemData/Model/Solution.
Location:
branches/2906_Transformations
Files:
18 edited

Legend:

Unmodified
Added
Removed
  • branches/2906_Transformations/HeuristicLab.DataPreprocessing.Views/3.4/PreprocessingCheckedVariablesView.cs

    r15583 r15884  
    4242    protected PreprocessingCheckedVariablesView() {
    4343      InitializeComponent();
     44
     45      // TODO: fix auto-size of columns
     46      //for (int i = 0; i < variablesListView.Columns.Count; i++) {
     47      //  variablesListView.Columns[i].Width = -1;
     48      //  variablesListView.Columns[i].AutoResize(ColumnHeaderAutoResizeStyle.ColumnContent);
     49      //}
    4450    }
    4551
  • branches/2906_Transformations/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs

    r15880 r15884  
    2323using System.Collections.Generic;
    2424using System.Linq;
    25 using HeuristicLab.Common;
    2625using HeuristicLab.Problems.DataAnalysis;
    2726
     
    4645
    4746      IDataAnalysisProblemData problemData;
    48 
    4947      if (oldProblemData is TimeSeriesPrognosisProblemData) {
    5048        problemData = CreateTimeSeriesPrognosisData((TimeSeriesPrognosisProblemData)oldProblemData);
     
    5957      }
    6058
    61       SetTrainingAndTestPartition(problemData);
    62       SetAllowedInputVariables(problemData, oldProblemData.AllowedInputVariables);
    63       // set the input variables to the correct checked state
    64       //var inputVariables = oldProblemData.InputVariables.ToDictionary(x => x.Value, x => x);
    65       //foreach (var variable in problemData.InputVariables) {
    66       //  bool isChecked = inputVariables.ContainsKey(variable.Value) && oldProblemData.InputVariables.ItemChecked(inputVariables[variable.Value]);
    67       //  problemData.InputVariables.SetItemCheckedState(variable, isChecked);
    68       //}
     59      SetTrainingAndTestPartition(problemData, context.Data);
     60      SetAllowedInputVariables(problemData, oldProblemData);
    6961
    7062      return problemData;
     
    7567      if (!context.Data.VariableNames.Contains(targetVariable))
    7668        targetVariable = context.Data.VariableNames.First();
    77       var inputVariables = GetDoubleInputVariables(targetVariable);
    78       var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation()) {
     69      var newProblemData = new TimeSeriesPrognosisProblemData(ExportedDataset, Enumerable.Empty<string>(), targetVariable, CreateDataAnalysisTransformation()) {
    7970        TrainingHorizon = oldProblemData.TrainingHorizon,
    8071        TestHorizon = oldProblemData.TestHorizon
     
    8475
    8576    private IDataAnalysisProblemData CreateRegressionData(RegressionProblemData oldProblemData) {
    86       // TODO: transformations (additional inputs, target changed)
    87       var targetVariable = RegressionTransformationModel.GetTransformedTragetVariable(oldProblemData.TargetVariable, CreateDataAnalysisTransformation());
     77      var targetVariable = DataAnalysisTransformation.GetLastTransitiveVariable(oldProblemData.TargetVariable, CreateDataAnalysisTransformation());
    8878      if (!context.Data.VariableNames.Contains(targetVariable))
    8979        targetVariable = context.Data.VariableNames.First();
    90       var inputVariables = GetDoubleInputVariables(targetVariable);
    91       var newProblemData = new RegressionProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation());
     80      var newProblemData = new RegressionProblemData(ExportedDataset, Enumerable.Empty<string>(), targetVariable, CreateDataAnalysisTransformation());
    9281      return newProblemData;
    9382    }
     
    9786      if (!context.Data.VariableNames.Contains(targetVariable))
    9887        targetVariable = context.Data.VariableNames.First();
    99       var inputVariables = GetDoubleInputVariables(targetVariable);
    100       var newProblemData = new ClassificationProblemData(ExportedDataset, inputVariables, targetVariable, CreateDataAnalysisTransformation()) {
     88      var newProblemData = new ClassificationProblemData(ExportedDataset, Enumerable.Empty<string>(), targetVariable, CreateDataAnalysisTransformation()) {
    10189        PositiveClass = oldProblemData.PositiveClass
    10290      };
     
    10593
    10694    private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
    107       return new ClusteringProblemData(ExportedDataset, GetDoubleInputVariables(String.Empty), CreateDataAnalysisTransformation());
     95      return new ClusteringProblemData(ExportedDataset, Enumerable.Empty<string>(), CreateDataAnalysisTransformation());
    10896    }
    10997
    110     private void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData) {
    111       var ppData = context.Data;
    112 
     98    private static void SetTrainingAndTestPartition(IDataAnalysisProblemData problemData, IPreprocessingData ppData) {
    11399      problemData.TrainingPartition.Start = ppData.TrainingPartition.Start;
    114100      problemData.TrainingPartition.End = ppData.TrainingPartition.End;
     
    117103    }
    118104
    119     void SetAllowedInputVariables(IDataAnalysisProblemData problemData, IEnumerable<string> oldInputVariables) {
    120       var inputs = DataAnalysisTransformationModel.ExtendInputVariables(oldInputVariables, problemData.Transformations);
    121 
     105    private static void SetAllowedInputVariables(IDataAnalysisProblemData problemData, IDataAnalysisProblemData oldProblemData) {
     106      // original inputs + extended(transitive) inputs
     107      var inputs = DataAnalysisTransformation.ExtendVariables(oldProblemData.AllowedInputVariables, problemData.Transformations).ToList();
    122108      foreach (var input in problemData.InputVariables) {
    123109        problemData.InputVariables.SetItemCheckedState(input, inputs.Contains(input.Value));
    124110      }
    125     }
    126111
    127     private IEnumerable<string> GetDoubleInputVariables(string targetVariable) {
    128       var variableNames = new List<string>();
    129       for (int i = 0; i < context.Data.Columns; ++i) {
    130         var variableName = context.Data.GetVariableName(i);
    131         if (context.Data.VariableHasType<double>(i)
    132           && variableName != targetVariable
    133           && IsNotConstantInputVariable(context.Data.GetValues<double>(i))) {
    134 
    135           variableNames.Add(variableName);
    136         }
     112      // new variables that were not created via transformations
     113      var originalAndVirtualVariables = DataAnalysisTransformation.ExtendVariables(oldProblemData.Dataset.VariableNames, problemData.Transformations);
     114      var newVariables = problemData.Dataset.VariableNames.Except(originalAndVirtualVariables).ToList();
     115      foreach (var input in problemData.InputVariables) {
     116        if (newVariables.Contains(input.Value))
     117          problemData.InputVariables.SetItemCheckedState(input, true);
    137118      }
    138       return variableNames;
    139     }
    140 
    141     private bool IsNotConstantInputVariable(IList<double> list) {
    142       return context.Data.TrainingPartition.End - context.Data.TrainingPartition.Start > 1 || list.Range() > 0;
    143119    }
    144120
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis.Views/3.4/DataAnalysisTransformationModelView.Designer.cs

    r15870 r15884  
    4545    /// </summary>
    4646    private void InitializeComponent() {
    47       this.originalModelTabPage = new System.Windows.Forms.TabPage();
     47      this.trainedModelTabPage = new System.Windows.Forms.TabPage();
    4848      this.modelViewHost = new HeuristicLab.MainForm.WindowsForms.ViewHost();
    4949      this.tabControl = new System.Windows.Forms.TabControl();
     
    5252      this.targetTransformationsTabPage = new System.Windows.Forms.TabPage();
    5353      this.targetTransformationsViewHost = new HeuristicLab.MainForm.WindowsForms.ViewHost();
    54       this.originalModelTabPage.SuspendLayout();
     54      this.trainedModelTabPage.SuspendLayout();
    5555      this.tabControl.SuspendLayout();
    5656      this.inputTransformationsTabPage.SuspendLayout();
     
    5858      this.SuspendLayout();
    5959      //
    60       // originalModelTabPage
     60      // trainedModelTabPage
    6161      //
    62       this.originalModelTabPage.Controls.Add(this.modelViewHost);
    63       this.originalModelTabPage.Location = new System.Drawing.Point(4, 22);
    64       this.originalModelTabPage.Name = "originalModelTabPage";
    65       this.originalModelTabPage.Padding = new System.Windows.Forms.Padding(3);
    66       this.originalModelTabPage.Size = new System.Drawing.Size(486, 392);
    67       this.originalModelTabPage.TabIndex = 0;
    68       this.originalModelTabPage.Text = "Original Model";
    69       this.originalModelTabPage.UseVisualStyleBackColor = true;
     62      this.trainedModelTabPage.Controls.Add(this.modelViewHost);
     63      this.trainedModelTabPage.Location = new System.Drawing.Point(4, 22);
     64      this.trainedModelTabPage.Name = "trainedModelTabPage";
     65      this.trainedModelTabPage.Padding = new System.Windows.Forms.Padding(3);
     66      this.trainedModelTabPage.Size = new System.Drawing.Size(486, 392);
     67      this.trainedModelTabPage.TabIndex = 0;
     68      this.trainedModelTabPage.Text = "Trained Model";
     69      this.trainedModelTabPage.UseVisualStyleBackColor = true;
    7070      //
    7171      // modelViewHost
     
    8888            | System.Windows.Forms.AnchorStyles.Left)
    8989            | System.Windows.Forms.AnchorStyles.Right)));
    90       this.tabControl.Controls.Add(this.originalModelTabPage);
     90      this.tabControl.Controls.Add(this.trainedModelTabPage);
    9191      this.tabControl.Controls.Add(this.inputTransformationsTabPage);
    9292      this.tabControl.Controls.Add(this.targetTransformationsTabPage);
     
    154154      this.Name = "DataAnalysisTransformationModelView";
    155155      this.Size = new System.Drawing.Size(500, 424);
    156       this.originalModelTabPage.ResumeLayout(false);
     156      this.trainedModelTabPage.ResumeLayout(false);
    157157      this.tabControl.ResumeLayout(false);
    158158      this.inputTransformationsTabPage.ResumeLayout(false);
     
    164164    #endregion
    165165
    166     private System.Windows.Forms.TabPage originalModelTabPage;
     166    private System.Windows.Forms.TabPage trainedModelTabPage;
    167167    private MainForm.WindowsForms.ViewHost modelViewHost;
    168168    private System.Windows.Forms.TabControl tabControl;
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis.Views/3.4/DataAnalysisTransformationModelView.cs

    r15870 r15884  
    1818 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
    1919 */
    20 #endregion           
     20#endregion
    2121
    2222using HeuristicLab.Core.Views;
     
    2525namespace HeuristicLab.Problems.DataAnalysis.Views {
    2626  [View("Transformation Model View")]
    27   [Content(typeof(IDataAnalysisTransformationModel), true)]
     27  [Content(typeof(DataAnalysisTransformationModel), true)]
    2828  public partial class DataAnalysisTransformationModelView : ItemView {
    2929
    30     public new IDataAnalysisTransformationModel Content {
    31       get { return (IDataAnalysisTransformationModel)base.Content; }
     30    public new DataAnalysisTransformationModel Content {
     31      get { return (DataAnalysisTransformationModel)base.Content; }
    3232      set { base.Content = value; }
    3333    }
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis.Views/3.4/Solution Views/DataAnalysisSolutionView.cs

    r15870 r15884  
    4040
    4141    private const string BacktransformButtonText = "Integrate Transformations";
    42     private const string ReapplyTransformationsButtonText = "Restore Original Model";
     42    private const string ReapplyTransformationsButtonText = "Restore Trained Model";
    4343
    4444    public DataAnalysisSolutionView() {
     
    9393
    9494      transformButton.Text = Content.Model is IDataAnalysisTransformationModel
    95         ? ReapplyTransformationsButtonText : BacktransformButtonText;
     95        ? ReapplyTransformationsButtonText
     96        : BacktransformButtonText;
    9697    }
    9798
     
    237238        problemData = param.Value as DataAnalysisProblemData;
    238239      }
     240
    239241      if (problemData == null) return;
    240242
     
    258260
    259261    private void transformButton_Click(object sender, EventArgs e) {
    260       var transformedSolution = CreateTransformedSolution(Content);
     262      var transformedSolution = DataAnalysisTransformation.TransformSolution(Content);
    261263      MainFormManager.MainForm.ShowContent(transformedSolution);
    262264    }
    263 
    264     private static IDataAnalysisSolution CreateTransformedSolution(IDataAnalysisSolution solution) {
    265       if (solution.Model is IRegressionTransformationModel regressionTransformationModel && !(solution.Model is ITimeSeriesPrognosisTransformationModel)) {
    266         var originalData = (IRegressionProblemData)((IRegressionSolution)solution).ProblemData.Transform();
    267         return regressionTransformationModel.OriginalModel.CreateRegressionSolution(originalData);
    268       } else if (solution.Model is IClassificationTransformationModel classificationTransformationModel) {
    269         var originalData = (IClassificationProblemData)((IClassificationSolution)solution).ProblemData.Transform();
    270         return classificationTransformationModel.OriginalModel.CreateClassificationSolution(originalData);
    271       } else if (solution.Model is IRegressionModel regressionModel && !(solution.Model is ITimeSeriesPrognosisModel)) {
    272         var transformationModel = new RegressionTransformationModel(regressionModel, solution.ProblemData.Transformations);
    273         var transformedData = (IRegressionProblemData)((IRegressionSolution)solution).ProblemData.InverseTransform();
    274         return transformationModel.CreateRegressionSolution(transformedData);
    275       } else if (solution.Model is IClassificationModel classificationModel) {
    276         var transformationModel = new ClassificationTransformationModel(classificationModel, solution.ProblemData.Transformations);
    277         var transformedData = (IClassificationProblemData)((IClassificationSolution)solution).ProblemData.InverseTransform();
    278         return transformationModel.CreateClassificationSolution(transformedData);
    279       } else throw new NotSupportedException();
    280     }
    281 
    282     /*if (Content.Model is IDataAnalysisTransformationModel transformationModel) {
    283       var originalModel = transformationModel.InverseTransform();
    284       originalModel.CreateSolution();
    285     } else {
    286       var originalModel = Content.Model;
    287       var transformationModel = originalModel.Transform();
    288       transformationModel.CreateSolution();
    289     }*/
    290 
    291     ////Content.Model.Transform()
    292 
    293     //var transformedModel = new DataAnalysisTransformationModel(Content.Model, Content.ProblemData.Transformations);
    294     //var originalProblemData = (IRegressionProblemData)Content.ProblemData.InverseTransform();
    295     //var transformedSolution = new TransformedRegressionSolution(transformedModel, originalProblemData);
    296     //MainFormManager.MainForm.ShowContent(transformedSolution);
    297265  }
    298266}
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationTransformationModel.cs

    r15870 r15884  
    3535    }
    3636
    37 
    3837    #region Constructor, Cloning & Persistence
    3938    public ClassificationTransformationModel(IClassificationModel originalModel, IEnumerable<IDataAnalysisTransformation> transformations)
    4039      : base(originalModel, transformations) {
    41       if (CalculateTransitiveVariables(new[] { originalModel.TargetVariable }, transformations).Any())
     40      if (DataAnalysisTransformation.GetTransitiveVariables(new[] { originalModel.TargetVariable }, transformations).Any())
    4241        throw new NotSupportedException("Classification with a transformed target variable is not allowed");
    4342    }
     
    5756
    5857    public virtual IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    59       return OriginalModel.GetEstimatedClassValues(Transform(dataset, InputTransformations), rows);
     58      var transformedInput = DataAnalysisTransformation.Transform(dataset, InputTransformations);
     59      return OriginalModel.GetEstimatedClassValues(transformedInput, rows);
    6060    }
    6161
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Clustering/ClusteringTransformationModel.cs

    r15870 r15884  
    5353
    5454    public IEnumerable<int> GetClusterValues(IDataset dataset, IEnumerable<int> rows) {
    55       return OriginalModel.GetClusterValues(Transform(dataset, InputTransformations), rows);
     55      var transformedInputs = DataAnalysisTransformation.Transform(dataset, InputTransformations);
     56      return OriginalModel.GetClusterValues(transformedInputs, rows);
    5657    }
    5758  }
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs

    r15879 r15884  
    241241      }
    242242    }
    243 
    244     public virtual IDataAnalysisProblemData Transform() {
    245       var newDataset = DataAnalysisTransformationModel.Transform(Dataset, Transformations);
    246 
    247       var extendedInputs = DataAnalysisTransformationModel.ExtendInputVariables(AllowedInputVariables, Transformations);
    248       var checkedInputs = new CheckedItemList<StringValue>(newDataset.VariableNames.Select(x => new StringValue(x)));
    249       foreach (var input in checkedInputs) checkedInputs.SetItemCheckedState(input, extendedInputs.Contains(input.Value));
    250 
    251       // TODO: Cannot create concrete instance here (maybe derived Create-method?)
    252       var cloner = new Cloner();
    253       cloner.RegisterClonedObject(Dataset, newDataset);
    254       cloner.RegisterClonedObject(InputVariables, checkedInputs.AsReadOnly());
    255       // TODO: valid values for target are not extended
    256 
    257       return cloner.Clone(this);
    258     }
    259 
    260     public virtual IDataAnalysisProblemData InverseTransform() {
    261       var newDataset = InverseTransform(Dataset, Transformations);
    262 
    263       var checkedInputs = new CheckedItemList<StringValue>(newDataset.VariableNames.Select(x => new StringValue(x)));
    264       foreach (var input in checkedInputs) checkedInputs.SetItemCheckedState(input, AllowedInputVariables.Contains(input.Value));
    265 
    266       // TODO: Cannot create concrete instance here (maybe derived Create-method?)
    267       var cloner = new Cloner();
    268       cloner.RegisterClonedObject(Dataset, newDataset);
    269       cloner.RegisterClonedObject(InputVariables, checkedInputs.AsReadOnly());
    270       // TODO: check valid target values
    271 
    272       return cloner.Clone(this);
    273     }
    274 
    275     public static IDataset InverseTransform(IDataset dataset, IEnumerable<IDataAnalysisTransformation> transformations, bool removeVirtualVariables = true) {
    276       var modifiableDataset = ((Dataset)dataset).ToModifiable();
    277 
    278       var transformationsStack = new Stack<IDataAnalysisTransformation>(transformations);
    279       while (transformationsStack.Any()) {
    280         var transformation = transformationsStack.Pop();
    281         var trans = (ITransformation<double>)transformation.Transformation;
    282 
    283         var prevTransformations = transformations.Except(transformationsStack);
    284         bool originalWasChanged = prevTransformations.Any(x => x.TransformedVariable == transformation.OriginalVariable);
    285         if (originalWasChanged) {
    286           var transformedData = modifiableDataset.GetDoubleValues(transformation.TransformedVariable);
    287 
    288           var originalData = trans.InverseApply(transformedData).ToList();
    289           modifiableDataset.ReplaceVariable(transformation.OriginalVariable, originalData);
    290         }
    291 
    292         bool transformedVariablePending = transformationsStack.Any(x => x.OriginalVariable == transformation.TransformedVariable || x.TransformedVariable == transformation.TransformedVariable);
    293         if (removeVirtualVariables && !transformedVariablePending)
    294           modifiableDataset.RemoveVariable(transformation.TransformedVariable);
    295       }
    296 
    297       return modifiableDataset;
    298     }
    299243  }
    300244}
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisTransformation.cs

    r15879 r15884  
    2020#endregion
    2121
     22using System;
     23using System.Collections.Generic;
     24using System.Linq;
    2225using HeuristicLab.Common;
    2326using HeuristicLab.Core;
     
    6770
    6871    private DataAnalysisTransformation(DataAnalysisTransformation original, Cloner cloner)
    69       : base(original, cloner) {
    70     }
     72      : base(original, cloner) { }
    7173
    7274    public override IDeepCloneable Clone(Cloner cloner) {
     
    8486      return $"{Transformation} ({OriginalVariable} -> {TransformedVariable})";
    8587    }
     88
     89    #region Transformation
     90
     91    #region Variable Extension & Reduction
     92    // originals => include extended
     93    public static IEnumerable<string> ExtendVariables(IEnumerable<string> variables, IEnumerable<IDataAnalysisTransformation> transformations) {
     94      return GetTransitiveVariables(variables, transformations);
     95    }
     96
     97    // extended => originals
     98    public static IEnumerable<string> ReduceVariables(IEnumerable<string> variables, IEnumerable<IDataAnalysisTransformation> transformations) {
     99      var originalVariables = new HashSet<string>();
     100      foreach (var variable in variables)
     101        originalVariables.Add(GetLastTransitiveVariable(variable, transformations, inverse: true));
     102      return originalVariables;
     103    }
     104
     105    public static IEnumerable<string> GetTransitiveVariables(IEnumerable<string> variables, IEnumerable<IDataAnalysisTransformation> transformations, bool inverse = false) {
     106      var reachableVariables = new HashSet<string>(variables);
     107      if (inverse) transformations = transformations.Reverse();
     108      foreach (var transformation in transformations) {
     109        var source = inverse ? transformation.TransformedVariable : transformation.OriginalVariable;
     110        var target = inverse ? transformation.OriginalVariable : transformation.TransformedVariable;
     111        if (reachableVariables.Contains(source))
     112          reachableVariables.Add(target);
     113      }
     114
     115      return reachableVariables;
     116    }
     117
     118    public static string GetLastTransitiveVariable(string variable, IEnumerable<IDataAnalysisTransformation> transformations, bool inverse = false) {
     119      if (inverse) transformations = transformations.Reverse();
     120      foreach (var transformation in transformations) {
     121        var source = inverse ? transformation.TransformedVariable : transformation.OriginalVariable;
     122        var target = inverse ? transformation.OriginalVariable : transformation.TransformedVariable;
     123        if (variable == source)
     124          variable = target;
     125      }
     126
     127      return variable;
     128    }
     129    #endregion
     130
     131    #region Transform Dataset
     132    public static IDataset Transform(IDataset dataset, IEnumerable<IDataAnalysisTransformation> transformations) {
     133      var modifiableDataset = ((Dataset)dataset).ToModifiable();
     134
     135      foreach (var transformation in transformations) {
     136        var trans = (ITransformation<double>)transformation.Transformation;
     137
     138        var originalData = modifiableDataset.GetDoubleValues(transformation.OriginalVariable);
     139        //if (!trans.Check(originalData, out string errorMessage))
     140        //  throw new InvalidOperationException($"Cannot estimate Values, Transformation is invalid: {errorMessage}");
     141        // TODO: check was already called before configure (in preprocessing)
     142        // TODO: newly specified data might not pass the check but it does not matter because the data is not configured with
     143        // e.g. impact calculation -> replacement=most common -> originalMean is zero
     144
     145        var transformedData = trans.Apply(originalData).ToList();
     146        if (modifiableDataset.VariableNames.Contains(transformation.TransformedVariable))
     147          modifiableDataset.ReplaceVariable(transformation.TransformedVariable, transformedData);
     148        else
     149          modifiableDataset.AddVariable(transformation.TransformedVariable, transformedData);
     150      }
     151
     152      return modifiableDataset; // TODO: to regular dataset?
     153    }
     154
     155    public static IDataset InverseTransform(IDataset dataset, IEnumerable<IDataAnalysisTransformation> transformations, bool removeVirtualVariables = true) {
     156      var modifiableDataset = ((Dataset)dataset).ToModifiable();
     157
     158      var transformationsStack = new Stack<IDataAnalysisTransformation>(transformations);
     159      while (transformationsStack.Any()) {
     160        var transformation = transformationsStack.Pop();
     161        var trans = (ITransformation<double>)transformation.Transformation;
     162
     163        var prevTransformations = transformations.Except(transformationsStack);
     164        bool originalWasChanged = prevTransformations.Any(x => x.TransformedVariable == transformation.OriginalVariable);
     165        if (originalWasChanged) {
     166          var transformedData = modifiableDataset.GetDoubleValues(transformation.TransformedVariable);
     167
     168          var originalData = trans.InverseApply(transformedData).ToList();
     169          modifiableDataset.ReplaceVariable(transformation.OriginalVariable, originalData);
     170        }
     171      }
     172
     173      if (removeVirtualVariables) {
     174        var originalVariables = ReduceVariables(dataset.VariableNames, transformations);
     175        var virtualVariables = dataset.VariableNames.Except(originalVariables);
     176        foreach (var virtualVariable in virtualVariables)
     177          modifiableDataset.RemoveVariable(virtualVariable);
     178      }
     179
     180      return modifiableDataset; // TODO: to regular dataset?
     181    }
     182    #endregion
     183
     184    #region Transform ProblemData
     185    public static IDataAnalysisProblemData ApplyTransformations(IDataAnalysisProblemData problemData) {
     186      var newDataset = Transform(problemData.Dataset, problemData.Transformations);
     187      var extendedInputs = ExtendVariables(problemData.AllowedInputVariables, problemData.Transformations);
     188
     189      return CreateNewProblemData(problemData, newDataset, extendedInputs, inverse: false);
     190    }
     191
     192    public static IDataAnalysisProblemData InverseApplyTransformations(IDataAnalysisProblemData problemData) {
     193      var newDataset = InverseTransform(problemData.Dataset, problemData.Transformations);
     194      var reducedInputs = ReduceVariables(problemData.AllowedInputVariables, problemData.Transformations);
     195
     196      return CreateNewProblemData(problemData, newDataset, reducedInputs, inverse: true);
     197    }
     198
     199    private static IDataAnalysisProblemData CreateNewProblemData(IDataAnalysisProblemData problemData, IDataset dataset, IEnumerable<string> inputs, bool inverse = false) {
     200      IDataAnalysisProblemData newProblemData;
     201      if (problemData is IRegressionProblemData regressionProblemData) {
     202        var newTargetVariable = GetLastTransitiveVariable(regressionProblemData.TargetVariable, problemData.Transformations, inverse);
     203        newProblemData = new RegressionProblemData(dataset, inputs, newTargetVariable, problemData.Transformations);
     204      } else if (problemData is IClassificationProblemData classificationProblemData) {
     205        newProblemData = new ClassificationProblemData(dataset, inputs, classificationProblemData.TargetVariable, problemData.Transformations);
     206      } else throw new NotSupportedException("Type of ProblemData not supported");
     207
     208      newProblemData.TrainingPartition.Start = problemData.TrainingPartition.Start;
     209      newProblemData.TrainingPartition.End = problemData.TrainingPartition.End;
     210      newProblemData.TestPartition.Start = problemData.TestPartition.Start;
     211      newProblemData.TestPartition.End = problemData.TestPartition.End;
     212
     213      return newProblemData;
     214    }
     215    #endregion
     216
     217    #region Transform Model
     218    public static IDataAnalysisTransformationModel CreateTransformationIntegratedModel(IDataAnalysisModel model, IEnumerable<IDataAnalysisTransformation> transformations) {
     219      if (model is IDataAnalysisTransformationModel)
     220        throw new InvalidOperationException("Model already is a transformation model.");
     221
     222      switch (model) {
     223        case ITimeSeriesPrognosisModel timeSeriesPrognosisModel:
     224          return new TimeSeriesPrognosisTransformationModel(timeSeriesPrognosisModel, transformations);
     225        case IRegressionModel regressionModel:
     226          return new RegressionTransformationModel(regressionModel, transformations);
     227        case IClassificationModel classificationModel:
     228          return new ClassificationTransformationModel(classificationModel, transformations);
     229        case IClusteringModel clusteringModel:
     230          return new ClusteringTransformationModel(clusteringModel, transformations);
     231        default:
     232          throw new NotSupportedException("Type of the model is not supported;");
     233      }
     234    }
     235
     236    public static IDataAnalysisModel RestoreTrainedModel(IDataAnalysisModel transformationModel, IEnumerable<IDataAnalysisTransformation> transformations) {
     237      if (!(transformationModel is IDataAnalysisTransformationModel model))
     238        throw new InvalidOperationException("Cannot restore because model is not a TransformationModel");
     239      return model.OriginalModel;
     240    }
     241    #endregion
     242
     243    #region Transform Solution
     244    public static IDataAnalysisSolution TransformSolution(IDataAnalysisSolution solution) {
     245      var transformations = solution.ProblemData.Transformations;
     246
     247      var model = solution.Model is IDataAnalysisTransformationModel // TODO: what if model is a integrated sym-reg model?
     248        ? RestoreTrainedModel(solution.Model, transformations)
     249        : CreateTransformationIntegratedModel(solution.Model, transformations);
     250
     251      var data = solution.Model is IDataAnalysisTransformationModel
     252        ? ApplyTransformations(solution.ProblemData) // original -> transformed
     253        : InverseApplyTransformations(solution.ProblemData); // transformed -> original
     254
     255      return CreateSolution(model, data);
     256    }
     257
     258    private static IDataAnalysisSolution CreateSolution(IDataAnalysisModel model, IDataAnalysisProblemData problemData) {
     259      switch (model) {
     260        case ITimeSeriesPrognosisModel timeSeriesPrognosisModel:
     261          return timeSeriesPrognosisModel.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData);
     262        case IRegressionModel regressionModel:
     263          return regressionModel.CreateRegressionSolution((IRegressionProblemData)problemData);
     264        case IClassificationModel classificationModel:
     265          return classificationModel.CreateClassificationSolution((IClassificationProblemData)problemData);
     266        default:
     267          throw new NotSupportedException("Cannot create Solution of the model type.");
     268      }
     269    }
     270    #endregion
     271
     272    #endregion
    86273  }
    87274}
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisTransformationModel.cs

    r15880 r15884  
    3535    public IDataAnalysisModel OriginalModel { get; protected set; }
    3636
     37    IEnumerable<IDataAnalysisTransformation> IDataAnalysisTransformationModel.InputTransformations {
     38      get { return InputTransformations; }
     39    }
     40
    3741    [Storable]
    3842    public ReadOnlyItemList<IDataAnalysisTransformation> InputTransformations { get; protected set; }
     
    5559
    5660    public override IEnumerable<string> VariablesUsedForPrediction {
    57       get { return OriginalModel.VariablesUsedForPrediction; /* TODO: reduce extend-inputs */}
     61      get { return DataAnalysisTransformation.ReduceVariables(OriginalModel.VariablesUsedForPrediction, InputTransformations); }
    5862    }
    5963
    6064    #region Constructor, Cloning & Persistence
    6165    protected DataAnalysisTransformationModel(IDataAnalysisModel originalModel, IEnumerable<IDataAnalysisTransformation> transformations)
    62       : base(originalModel.Name) {
     66      : base("Transformation Model " + originalModel.Name) {
    6367      OriginalModel = originalModel;
    64       var transitiveInputs = CalculateTransitiveVariables(originalModel.VariablesUsedForPrediction, transformations);
     68      var transitiveInputs = DataAnalysisTransformation.GetTransitiveVariables(originalModel.VariablesUsedForPrediction, transformations, inverse: true);
    6569      InputTransformations = new ItemList<IDataAnalysisTransformation>(transformations.Where(t => transitiveInputs.Contains(t.OriginalVariable))).AsReadOnly();
    6670      TargetTransformations = new ReadOnlyItemList<IDataAnalysisTransformation>();
     
    8084    #endregion
    8185
    82     // extended -> include originals
    83     public static ISet<string> CalculateTransitiveVariables(IEnumerable<string> inputVariables, IEnumerable<IDataAnalysisTransformation> transformations) {
    84       var transitiveInputs = new HashSet<string>(inputVariables);
    85 
    86       foreach (var transformation in transformations.Reverse()) {
    87         if (transitiveInputs.Contains(transformation.TransformedVariable)) {
    88           transitiveInputs.Add(transformation.OriginalVariable);
    89         }
    90       }
    91 
    92       return transitiveInputs;
    93     }
    94     // originals => include extended
    95     public static IEnumerable<string> ExtendInputVariables(IEnumerable<string> oldInputVariables, IEnumerable<IDataAnalysisTransformation> transformations) {
    96       var inputs = new HashSet<string>(oldInputVariables);
    97 
    98       foreach (var transformation in transformations) {
    99         if (inputs.Contains(transformation.OriginalVariable))
    100           inputs.Add(transformation.TransformedVariable);
    101       }
    102 
    103       return inputs;
    104     }
    105     [Obsolete]
    106     public static IEnumerable<string> RemoveVirtualVariables(IEnumerable<string> variables, IEnumerable<IDataAnalysisTransformation> transformations) {
    107       var remainingVariables = new HashSet<string>(variables);
    108 
    109       var transformationsStack = new Stack<IDataAnalysisTransformation>(transformations);
    110 
    111       while (transformationsStack.Any()) {
    112         var transformation = transformationsStack.Pop();
    113 
    114 
    115         bool transformedVariablePending = transformationsStack.Any(x => x.OriginalVariable == transformation.TransformedVariable);
    116         if (!transformedVariablePending)
    117           remainingVariables.Remove(transformation.TransformedVariable);
    118       }
    119 
    120       return remainingVariables;
    121     }
    122 
    123     public static IDataset Transform(IDataset dataset, IEnumerable<IDataAnalysisTransformation> transformations) {
    124       var modifiableDataset = ((Dataset)dataset).ToModifiable();
    125 
    126       foreach (var transformation in transformations) {
    127         var trans = (ITransformation<double>)transformation.Transformation;
    128 
    129         var originalData = modifiableDataset.GetDoubleValues(transformation.OriginalVariable);
    130         if (!trans.Check(originalData, out string errorMessage))
    131           throw new InvalidOperationException($"Cannot estimate Values, Transformation is invalid: {errorMessage}");
    132 
    133         var transformedData = trans.Apply(originalData).ToList();
    134         if (modifiableDataset.VariableNames.Contains(transformation.TransformedVariable))
    135           modifiableDataset.ReplaceVariable(transformation.TransformedVariable, transformedData);
    136         else
    137           modifiableDataset.AddVariable(transformation.TransformedVariable, transformedData);
    138       }
    139 
    140       return modifiableDataset;
    141     }
    142 
    14386    #region Events
    14487    public event EventHandler TargetVariableChanged;
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionProblemData.cs

    r15870 r15884  
    180180      base.AdjustProblemDataProperties(problemData);
    181181    }
    182 
    183     public override IDataAnalysisProblemData Transform() {
    184       var transformedProblemData = (IRegressionProblemData)base.Transform();
    185       var targetVariable = RegressionTransformationModel.GetTransformedTragetVariable(TargetVariable, Transformations);
    186       transformedProblemData.TargetVariable = targetVariable;
    187       return transformedProblemData;
    188     }
    189 
    190     public override IDataAnalysisProblemData InverseTransform() {
    191       var transformedProblemData = (IRegressionProblemData)base.InverseTransform();
    192       var targetVariable = RegressionTransformationModel.GetOriginalTragetVariable(TargetVariable, Transformations);
    193       transformedProblemData.TargetVariable = targetVariable;
    194       return transformedProblemData;
    195     }
    196182  }
    197183}
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionTransformationModel.cs

    r15870 r15884  
    2020#endregion
    2121
    22 using System;
    2322using System.Collections.Generic;
    2423using System.Linq;
     
    3635    }
    3736
     37    IEnumerable<IDataAnalysisTransformation> IRegressionTransformationModel.TargetTransformations {
     38      get { return TargetTransformations; }
     39    }
     40
    3841    #region Constructor, Cloning & Persistence
    3942    public RegressionTransformationModel(IRegressionModel originalModel, IEnumerable<IDataAnalysisTransformation> transformations)
    4043      : base(originalModel, transformations) {
    41       var transitiveTargets = CalculateTransitiveVariables(new[] { originalModel.TargetVariable }, transformations);
     44      var transitiveTargets = DataAnalysisTransformation.GetTransitiveVariables(new[] { originalModel.TargetVariable }, transformations, inverse: true).ToList();
    4245      TargetTransformations = new ItemList<IDataAnalysisTransformation>(transformations.Where(t => transitiveTargets.Contains(t.OriginalVariable))).AsReadOnly();
    43       TargetVariable = GetOriginalTragetVariable(originalModel.TargetVariable, TargetTransformations);
     46      TargetVariable = DataAnalysisTransformation.GetLastTransitiveVariable(originalModel.TargetVariable, TargetTransformations, inverse: true);
    4447    }
    4548
    4649    protected RegressionTransformationModel(RegressionTransformationModel original, Cloner cloner)
    4750      : base(original, cloner) {
    48      
    4951    }
    5052
     
    5961
    6062    public virtual IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    61       var estimates = OriginalModel.GetEstimatedValues(Transform(dataset, InputTransformations), rows);
    62       return InverseTransform(estimates, TargetTransformations, OriginalModel.TargetVariable);
     63      var transformedInput = DataAnalysisTransformation.Transform(dataset, InputTransformations);
     64      var estimates = OriginalModel.GetEstimatedValues(transformedInput, rows);
     65      return InverseTransform(estimates, TargetTransformations);
    6366    }
    6467
     
    6770    }
    6871
    69     public static IEnumerable<double> InverseTransform(IEnumerable<double> data, IEnumerable<IDataAnalysisTransformation> transformations, string targetVariable) {
    70       var estimates = data.ToList();
    71 
    72       foreach (var transformation in transformations.Reverse()) {
    73         if (transformation.TransformedVariable == targetVariable) {
    74           var trans = (ITransformation<double>)transformation.Transformation;
    75 
    76           estimates = trans.InverseApply(estimates).ToList();
    77 
    78           // setup next iteration
    79           targetVariable = transformation.OriginalVariable;
    80         }
     72    protected static IEnumerable<double> InverseTransform(IEnumerable<double> data, IEnumerable<IDataAnalysisTransformation> transformations) {
     73      foreach (var transformation in transformations.Reverse()) { // TargetTransformations only contains only relevant transformations
     74        var trans = (ITransformation<double>)transformation.Transformation;
     75        data = trans.InverseApply(data).ToList();
    8176      }
    82 
    83       return estimates;
    84     }
    85 
    86     public static string GetTransformedTragetVariable(string originalTarget, IEnumerable<IDataAnalysisTransformation> transformations) {
    87       var transformedTarget = originalTarget;
    88       foreach (var transformation in transformations) {
    89         if (transformation.OriginalVariable == transformedTarget)
    90           transformedTarget = transformation.TransformedVariable;
    91       }
    92       return transformedTarget;
    93     }
    94 
    95     public static string GetOriginalTragetVariable(string transformedTarget, IEnumerable<IDataAnalysisTransformation> transformations) {
    96       var originalTarget = transformedTarget;
    97       foreach (var transformation in transformations.Reverse()) {
    98         if (transformation.TransformedVariable == originalTarget)
    99           originalTarget = transformation.OriginalVariable;
    100       }
    101       return originalTarget;
     77      return data;
    10278    }
    10379  }
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/TimeSeriesPrognosis/Models/TimeSeriesPrognosisTransformationModel.cs

    r15870 r15884  
    5353    #endregion
    5454
     55
    5556    public IEnumerable<IEnumerable<double>> GetPrognosedValues(IDataset dataset, IEnumerable<int> rows, IEnumerable<int> horizons) {
    56       var estimates = OriginalModel.GetPrognosedValues(Transform(dataset, InputTransformations), rows, horizons);
    57       return estimates.Select(x => InverseTransform(x, TargetTransformations, TargetVariable)); // TODO: transform the whole horizon?
     57      var transformedInput = DataAnalysisTransformation.Transform(dataset, InputTransformations);
     58      var estimates = OriginalModel.GetPrognosedValues(transformedInput, rows, horizons);
     59      return estimates.Select(x => InverseTransform(x, TargetTransformations));
    5860    }
    5961
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/LinearTransformation.cs

    r15865 r15884  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     
    4647    public double Slope {
    4748      get { return SlopeParameter.Value.Value; }
    48       set { SlopeParameter.Value.Value = value; } //TODO: check for != 0?
     49      set { SlopeParameter.Value.Value = value; }
    4950    }
    5051
     
    8586
    8687    public static IEnumerable<double> Apply(IEnumerable<double> data, double slope = 1.0, double intercept = 0.0) {
     88      if (slope.IsAlmost(0.0))
     89        throw new InvalidOperationException($"Cannot transform with a {nameof(slope)} of zero because inverse transformation would be invalid.");
     90
    8791      return data.Select(x => slope * x + intercept);
    8892    }
    8993
    9094    public static IEnumerable<double> InverseApply(IEnumerable<double> data, double slope = 1.0, double intercept = 0.0) {
     95      if (slope.IsAlmost(0.0))
     96        throw new InvalidOperationException($"Cannot inverse transform with a {nameof(slope)} of zero.");
     97
    9198      return data.Select(x => (x - intercept) / slope);
    9299    }
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ZNormalizationTransformation.cs

    r15865 r15884  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     
    9798
    9899    public override void Configure(IEnumerable<double> data) {
    99       OriginalMean = data.Average();
    100       OriginalStandardDeviation = data.StandardDeviationPop();
     100      Configure(data, out double originalMean, out double originalStandardDeviation);
     101      OriginalMean = originalMean;
     102      OriginalStandardDeviation = originalStandardDeviation;
    101103      base.Configure(data);
    102104    }
    103105
    104106    public override IEnumerable<double> Apply(IEnumerable<double> data) {
    105       if (double.IsNaN(OriginalMean) || double.IsNaN(OriginalStandardDeviation)) //TODO isConfigured field?
    106         Configure(data);
     107      if (double.IsNaN(OriginalMean) || double.IsNaN(OriginalStandardDeviation))
     108        throw new InvalidOperationException("Transformation is not configured");
    107109
    108110      return Apply(data, TargetMean, TargetStandardDeviation, OriginalMean, OriginalStandardDeviation);
     
    110112
    111113    public override IEnumerable<double> InverseApply(IEnumerable<double> data) {
     114      if (double.IsNaN(OriginalMean) || double.IsNaN(OriginalStandardDeviation))
     115        throw new InvalidOperationException("Transformation is not configured");
     116
    112117      return InverseApply(data, TargetMean, TargetStandardDeviation, OriginalMean, OriginalStandardDeviation);
    113118    }
    114119
     120
     121    public static void Configure(IEnumerable<double> data, out double originalMean, out double originalStandardDeviation) {
     122      originalMean = data.Average();
     123      originalStandardDeviation = data.StandardDeviationPop();
     124    }
    115125
    116126    public static IEnumerable<double> Apply(IEnumerable<double> data, double targetMean, double targetStandardDeviation, double originalMean = double.NaN, double originalStandardDeviation = double.NaN) {
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs

    r15856 r15884  
    4444
    4545    IEnumerable<IDataAnalysisTransformation> Transformations { get; }
    46     IDataAnalysisProblemData Transform();
    47     IDataAnalysisProblemData InverseTransform();
    4846
    4947    bool IsTrainingSample(int index);
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisTransformationModel.cs

    r15870 r15884  
    2020#endregion
    2121
    22 using HeuristicLab.Core;
     22using System.Collections.Generic;
    2323
    2424namespace HeuristicLab.Problems.DataAnalysis {
     
    2828    /// <summary>
    2929    /// Transformations that are used on the model's inputs.
    30     /// </summary>                                                                     
    31     ReadOnlyItemList<IDataAnalysisTransformation> InputTransformations { get; }  // TODO: IReadonlyList<>?
    32 
    33     ReadOnlyItemList<IDataAnalysisTransformation> TargetTransformations { get; } // TODO: to Regression/Classification interface?
     30    /// </summary>
     31    IEnumerable<IDataAnalysisTransformation> InputTransformations { get; }
    3432  }
    3533}
  • branches/2906_Transformations/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/Regression/IRegressionTransformationModel.cs

    r15870 r15884  
    2020#endregion
    2121
    22 using HeuristicLab.Core;
     22using System.Collections.Generic;
    2323
    2424namespace HeuristicLab.Problems.DataAnalysis {
    2525  public interface IRegressionTransformationModel : IDataAnalysisTransformationModel, IRegressionModel {
    2626    new IRegressionModel OriginalModel { get; }
     27
     28    IEnumerable<IDataAnalysisTransformation> TargetTransformations { get; }
    2729  }
    2830}
Note: See TracChangeset for help on using the changeset viewer.