#region License Information /* HeuristicLab * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Text; using HeuristicLab.Collections; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis { [StorableClass] public abstract class DataAnalysisProblemData : ParameterizedNamedItem, IDataAnalysisProblemData { protected const string DatasetParameterName = "Dataset"; protected const string InputVariablesParameterName = "InputVariables"; protected const string TrainingPartitionParameterName = "TrainingPartition"; protected const string TestPartitionParameterName = "TestPartition"; protected const string TransformationsParameterName = "Transformations"; #region parameter properites public IFixedValueParameter DatasetParameter { get { return (IFixedValueParameter)Parameters[DatasetParameterName]; } } public IFixedValueParameter> InputVariablesParameter { get { return (IFixedValueParameter>)Parameters[InputVariablesParameterName]; } } public IFixedValueParameter TrainingPartitionParameter { get { return (IFixedValueParameter)Parameters[TrainingPartitionParameterName]; } } public IFixedValueParameter TestPartitionParameter { get { return (IFixedValueParameter)Parameters[TestPartitionParameterName]; } } public IFixedValueParameter> TransformationsParameter { get { return (IFixedValueParameter>)Parameters[TransformationsParameterName]; } } #endregion #region properties protected bool isEmpty = false; public bool IsEmpty { get { return isEmpty; } } public Dataset Dataset { get { return DatasetParameter.Value; } } public ICheckedItemList InputVariables { get { return InputVariablesParameter.Value; } } public IEnumerable AllowedInputVariables { get { return InputVariables.CheckedItems.Select(x => x.Value.Value); } } public IntRange TrainingPartition { get { return TrainingPartitionParameter.Value; } } public IntRange TestPartition { get { return TestPartitionParameter.Value; } } public virtual IEnumerable TrainingIndices { get { return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start)) .Where(IsTrainingSample); } } public virtual IEnumerable TestIndices { get { return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start)) .Where(IsTestSample); } } public IEnumerable Transformations { get { return TransformationsParameter.Value; } } public virtual bool IsTrainingSample(int index) { return index >= 0 && index < Dataset.Rows && TrainingPartition.Start <= index && index < TrainingPartition.End && (index < TestPartition.Start || TestPartition.End <= index); } public virtual bool IsTestSample(int index) { return index >= 0 && index < Dataset.Rows && TestPartition.Start <= index && index < TestPartition.End; } #endregion protected DataAnalysisProblemData(DataAnalysisProblemData original, Cloner cloner) : base(original, cloner) { isEmpty = original.isEmpty; RegisterEventHandlers(); } [StorableConstructor] protected DataAnalysisProblemData(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(TransformationsParameterName)) { Parameters.Add(new FixedValueParameter>(TransformationsParameterName, "", new ItemList().AsReadOnly())); TransformationsParameter.Hidden = true; } RegisterEventHandlers(); } protected DataAnalysisProblemData(Dataset dataset, IEnumerable allowedInputVariables, IEnumerable transformations = null) { if (dataset == null) throw new ArgumentNullException("The dataset must not be null."); if (allowedInputVariables == null) throw new ArgumentNullException("The allowedInputVariables must not be null."); if (allowedInputVariables.Except(dataset.DoubleVariables).Any()) throw new ArgumentException("All allowed input variables must be present in the dataset and of type double."); var inputVariables = new CheckedItemList(dataset.DoubleVariables.Select(x => new StringValue(x))); foreach (StringValue x in inputVariables) inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value)); int trainingPartitionStart = 0; int trainingPartitionEnd = dataset.Rows / 2; int testPartitionStart = dataset.Rows / 2; int testPartitionEnd = dataset.Rows; var transformationsList = new ItemList(transformations ?? Enumerable.Empty()); Parameters.Add(new FixedValueParameter(DatasetParameterName, "", dataset)); Parameters.Add(new FixedValueParameter>(InputVariablesParameterName, "", inputVariables.AsReadOnly())); Parameters.Add(new FixedValueParameter(TrainingPartitionParameterName, "", new IntRange(trainingPartitionStart, trainingPartitionEnd))); Parameters.Add(new FixedValueParameter(TestPartitionParameterName, "", new IntRange(testPartitionStart, testPartitionEnd))); Parameters.Add(new FixedValueParameter>(TransformationsParameterName, "", transformationsList.AsReadOnly())); TransformationsParameter.Hidden = true; ((ValueParameter)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false; RegisterEventHandlers(); } private void RegisterEventHandlers() { DatasetParameter.ValueChanged += new EventHandler(Parameter_ValueChanged); InputVariables.CheckedItemsChanged += new CollectionItemsChangedEventHandler>(InputVariables_CheckedItemsChanged); TrainingPartition.ValueChanged += new EventHandler(Parameter_ValueChanged); TestPartition.ValueChanged += new EventHandler(Parameter_ValueChanged); TransformationsParameter.ValueChanged += new EventHandler(Parameter_ValueChanged); } private void InputVariables_CheckedItemsChanged(object sender, CollectionItemsChangedEventArgs> e) { OnChanged(); } private void Parameter_ValueChanged(object sender, EventArgs e) { OnChanged(); } public event EventHandler Changed; protected virtual void OnChanged() { var listeners = Changed; if (listeners != null) listeners(this, EventArgs.Empty); } protected virtual bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) { errorMessage = string.Empty; if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null."); //check allowed input variables StringBuilder message = new StringBuilder(); var variables = new HashSet(problemData.InputVariables.Select(x => x.Value)); foreach (var item in AllowedInputVariables) { if (!variables.Contains(item)) message.AppendLine("Input variable '" + item + "' is not present in the new problem data."); } if (message.Length != 0) { errorMessage = message.ToString(); return false; } return true; } public virtual void AdjustProblemDataProperties(IDataAnalysisProblemData problemData) { DataAnalysisProblemData data = problemData as DataAnalysisProblemData; if (data == null) throw new ArgumentException("The problem data is not a data analysis problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData"); string errorMessage; if (!data.IsProblemDataCompatible(this, out errorMessage)) { throw new InvalidOperationException(errorMessage); } foreach (var inputVariable in InputVariables) { var variable = data.InputVariables.FirstOrDefault(i => i.Value == inputVariable.Value); InputVariables.SetItemCheckedState(inputVariable, variable != null && data.InputVariables.ItemChecked(variable)); } TrainingPartition.Start = TrainingPartition.End = 0; TestPartition.Start = 0; TestPartition.End = Dataset.Rows; } } }