#region License Information /* HeuristicLab * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Text; using HeuristicLab.Collections; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis { [StorableClass] public abstract class DataAnalysisProblemData : ParameterizedNamedItem, IDataAnalysisProblemData { protected const string DatasetParameterName = "Dataset"; protected const string InputVariablesParameterName = "InputVariables"; protected const string TrainingPartitionParameterName = "TrainingPartition"; protected const string TestPartitionParameterName = "TestPartition"; protected const string TransformationsParameterName = "Transformations"; #region parameter properites //mkommend: inserted parameter caching due to performance reasons private IFixedValueParameter datasetParameter; public IFixedValueParameter DatasetParameter { get { if (datasetParameter == null) datasetParameter = (IFixedValueParameter)Parameters[DatasetParameterName]; return datasetParameter; } } private IFixedValueParameter> inputVariablesParameter; public IFixedValueParameter> InputVariablesParameter { get { if (inputVariablesParameter == null) inputVariablesParameter = (IFixedValueParameter>)Parameters[InputVariablesParameterName]; return inputVariablesParameter; } } private IFixedValueParameter trainingPartitionParameter; public IFixedValueParameter TrainingPartitionParameter { get { if (trainingPartitionParameter == null) trainingPartitionParameter = (IFixedValueParameter)Parameters[TrainingPartitionParameterName]; return trainingPartitionParameter; } } private IFixedValueParameter testPartitionParameter; public IFixedValueParameter TestPartitionParameter { get { if (testPartitionParameter == null) testPartitionParameter = (IFixedValueParameter)Parameters[TestPartitionParameterName]; return testPartitionParameter; } } public IFixedValueParameter> TransformationsParameter { get { return (IFixedValueParameter>)Parameters[TransformationsParameterName]; } } #endregion #region properties protected bool isEmpty = false; public bool IsEmpty { get { return isEmpty; } } public IDataset Dataset { get { return DatasetParameter.Value; } } public ICheckedItemList InputVariables { get { return InputVariablesParameter.Value; } } public IEnumerable AllowedInputVariables { get { return InputVariables.CheckedItems.Select(x => x.Value.Value); } } public double[,] AllowedInputsTrainingValues { get { return Dataset.ToArray(AllowedInputVariables, TrainingIndices); } } public double[,] AllowedInputsTestValues { get { return Dataset.ToArray(AllowedInputVariables, TestIndices); } } public IntRange TrainingPartition { get { return TrainingPartitionParameter.Value; } } public IntRange TestPartition { get { return TestPartitionParameter.Value; } } public virtual IEnumerable AllIndices { get { return Enumerable.Range(0, Dataset.Rows); } } public virtual IEnumerable TrainingIndices { get { return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start)) .Where(IsTrainingSample); } } public virtual IEnumerable TestIndices { get { return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start)) .Where(IsTestSample); } } public IEnumerable Transformations { get { return TransformationsParameter.Value; } } public virtual bool IsTrainingSample(int index) { return index >= 0 && index < Dataset.Rows && TrainingPartition.Start <= index && index < TrainingPartition.End && (index < TestPartition.Start || TestPartition.End <= index); } public virtual bool IsTestSample(int index) { return index >= 0 && index < Dataset.Rows && TestPartition.Start <= index && index < TestPartition.End; } #endregion protected DataAnalysisProblemData(DataAnalysisProblemData original, Cloner cloner) : base(original, cloner) { isEmpty = original.isEmpty; RegisterEventHandlers(); } [StorableConstructor] protected DataAnalysisProblemData(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (Parameters[TransformationsParameterName] is FixedValueParameter>) Parameters.Remove(TransformationsParameterName); if (!Parameters.ContainsKey(TransformationsParameterName)) Parameters.Add(new FixedValueParameter>(TransformationsParameterName, new ItemList().AsReadOnly()) { Hidden = true }); RegisterEventHandlers(); } protected DataAnalysisProblemData(IDataset dataset, IEnumerable allowedInputVariables, IEnumerable transformations = null) { if (dataset == null) throw new ArgumentNullException("The dataset must not be null."); if (allowedInputVariables == null) throw new ArgumentNullException("The allowed input variables must not be null."); if (allowedInputVariables.Except(dataset.DoubleVariables).Except(dataset.StringVariables).Any()) throw new ArgumentException("All allowed input variables must be present in the dataset and of type double or string."); var variables = dataset.VariableNames.Where(variable => dataset.VariableHasType(variable) || dataset.VariableHasType(variable)); var inputVariables = new CheckedItemList(variables.Select(x => new StringValue(x))); foreach (StringValue x in inputVariables) inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value)); int trainingPartitionStart = 0; int trainingPartitionEnd = dataset.Rows / 2; int testPartitionStart = dataset.Rows / 2; int testPartitionEnd = dataset.Rows; var transformationsList = new ItemList(transformations ?? Enumerable.Empty()); Parameters.Add(new FixedValueParameter(DatasetParameterName, "", (Dataset)dataset)); Parameters.Add(new FixedValueParameter>(InputVariablesParameterName, "", inputVariables.AsReadOnly())); Parameters.Add(new FixedValueParameter(TrainingPartitionParameterName, "", new IntRange(trainingPartitionStart, trainingPartitionEnd))); Parameters.Add(new FixedValueParameter(TestPartitionParameterName, "", new IntRange(testPartitionStart, testPartitionEnd))); Parameters.Add(new FixedValueParameter>(TransformationsParameterName, "", transformationsList.AsReadOnly()) { Hidden = transformationsList.Count == 0 }); ((ValueParameter)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false; RegisterEventHandlers(); } private void RegisterEventHandlers() { DatasetParameter.ValueChanged += new EventHandler(Parameter_ValueChanged); InputVariables.CheckedItemsChanged += new CollectionItemsChangedEventHandler>(InputVariables_CheckedItemsChanged); TrainingPartition.ValueChanged += new EventHandler(Parameter_ValueChanged); TestPartition.ValueChanged += new EventHandler(Parameter_ValueChanged); TransformationsParameter.ValueChanged += new EventHandler(Parameter_ValueChanged); } private void InputVariables_CheckedItemsChanged(object sender, CollectionItemsChangedEventArgs> e) { OnChanged(); } private void Parameter_ValueChanged(object sender, EventArgs e) { OnChanged(); } public event EventHandler Changed; protected virtual void OnChanged() { var listeners = Changed; if (listeners != null) listeners(this, EventArgs.Empty); } protected virtual bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) { errorMessage = string.Empty; if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null."); //check allowed input variables StringBuilder message = new StringBuilder(); var variables = new HashSet(problemData.InputVariables.Select(x => x.Value)); foreach (var item in AllowedInputVariables) { if (!variables.Contains(item)) message.AppendLine("Input variable '" + item + "' is not present in the new problem data."); } if (message.Length != 0) { errorMessage = message.ToString(); return false; } return true; } public virtual void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) { DataAnalysisProblemData data = problemData as DataAnalysisProblemData; if (data == null) throw new ArgumentException("The problem data is not a data analysis problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData"); string errorMessage; if (!data.IsProblemDataCompatible(this, out errorMessage)) { throw new InvalidOperationException(errorMessage); } foreach (var inputVariable in InputVariables) { var variable = data.InputVariables.FirstOrDefault(i => i.Value == inputVariable.Value); InputVariables.SetItemCheckedState(inputVariable, variable != null && data.InputVariables.ItemChecked(variable)); } } public virtual IDataAnalysisProblemData Transform() { var newDataset = DataAnalysisTransformationModel.Transform(Dataset, Transformations); var extendedInputs = DataAnalysisTransformationModel.ExtendInputVariables(AllowedInputVariables, Transformations); var checkedInputs = new CheckedItemList(newDataset.VariableNames.Select(x => new StringValue(x))); foreach (var input in checkedInputs) checkedInputs.SetItemCheckedState(input, extendedInputs.Contains(input.Value)); // TODO: Cannot create concrete instance here (maybe derived Create-method?) var cloner = new Cloner(); cloner.RegisterClonedObject(Dataset, newDataset); cloner.RegisterClonedObject(InputVariables, checkedInputs.AsReadOnly()); // TODO: valid values for target are not extended return cloner.Clone(this); } public virtual IDataAnalysisProblemData InverseTransform() { var newDataset = InverseTransform(Dataset, Transformations); var checkedInputs = new CheckedItemList(newDataset.VariableNames.Select(x => new StringValue(x))); foreach (var input in checkedInputs) checkedInputs.SetItemCheckedState(input, AllowedInputVariables.Contains(input.Value)); // TODO: Cannot create concrete instance here (maybe derived Create-method?) var cloner = new Cloner(); cloner.RegisterClonedObject(Dataset, newDataset); cloner.RegisterClonedObject(InputVariables, checkedInputs.AsReadOnly()); // TODO: check valid target values return cloner.Clone(this); } public static IDataset InverseTransform(IDataset dataset, IEnumerable transformations, bool removeVirtualVariables = true) { var modifiableDataset = ((Dataset)dataset).ToModifiable(); var transformationsStack = new Stack(transformations); while (transformationsStack.Any()) { var transformation = transformationsStack.Pop(); var trans = (ITransformation)transformation.Transformation; var prevTransformations = transformations.Except(transformationsStack); bool originalWasChanged = prevTransformations.Any(x => x.TransformedVariable == transformation.OriginalVariable); if (originalWasChanged) { var transformedData = modifiableDataset.GetDoubleValues(transformation.TransformedVariable); var originalData = trans.InverseApply(transformedData).ToList(); modifiableDataset.ReplaceVariable(transformation.OriginalVariable, originalData); } bool transformedVariablePending = transformationsStack.Any(x => x.OriginalVariable == transformation.TransformedVariable || x.TransformedVariable == transformation.TransformedVariable); if (removeVirtualVariables && !transformedVariablePending) modifiableDataset.RemoveVariable(transformation.TransformedVariable); } return modifiableDataset; } } }