#region License Information /* HeuristicLab * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Parameters; using HEAL.Attic; namespace HeuristicLab.Problems.DataAnalysis { [StorableType("EE612297-B1AF-42D2-BF21-AF9A2D42791C")] [Item("RegressionProblemData", "Represents an item containing all data defining a regression problem.")] public class RegressionProblemData : DataAnalysisProblemData, IRegressionProblemData, IStorableContent { protected const string TargetVariableParameterName = "TargetVariable"; protected const string ScaleInputsParameterName = "Scale Inputs"; public string Filename { get; set; } #region default data private static double[,] kozaF1 = new double[,] { {2.017885919, -1.449165046}, {1.30060506, -1.344523885}, {1.147134798, -1.317989331}, {0.877182504, -1.266142284}, {0.852562452, -1.261020794}, {0.431095788, -1.158793317}, {0.112586002, -1.050908405}, {0.04594507, -1.021989402}, {0.042572879, -1.020438113}, {-0.074027291, -0.959859562}, {-0.109178553, -0.938094706}, {-0.259721109, -0.803635355}, {-0.272991057, -0.387519561}, {-0.161978191, -0.193611001}, {-0.102489983, -0.114215349}, {-0.01469968, -0.014918985}, {-0.008863365, -0.008942626}, {0.026751057, 0.026054094}, {0.166922436, 0.14309643}, {0.176953808, 0.1504144}, {0.190233418, 0.159916534}, {0.199800708, 0.166635331}, {0.261502822, 0.207600348}, {0.30182879, 0.232370249}, {0.83763905, 0.468046718} }; private static readonly Dataset defaultDataset; private static readonly IEnumerable defaultAllowedInputVariables; private static readonly string defaultTargetVariable; private static readonly RegressionProblemData emptyProblemData; public static RegressionProblemData EmptyProblemData { get { return emptyProblemData; } } static RegressionProblemData() { defaultDataset = new Dataset(new string[] { "y", "x" }, kozaF1); defaultDataset.Name = "Fourth-order Polynomial Function Benchmark Dataset"; defaultDataset.Description = "f(x) = x^4 + x^3 + x^2 + x^1"; defaultAllowedInputVariables = new List() { "x" }; defaultTargetVariable = "y"; var problemData = new RegressionProblemData(); problemData.Parameters.Clear(); problemData.Name = "Empty Regression ProblemData"; problemData.Description = "This ProblemData acts as place holder before the correct problem data is loaded."; problemData.isEmpty = true; problemData.Parameters.Add(new FixedValueParameter(DatasetParameterName, "", new Dataset())); problemData.Parameters.Add(new FixedValueParameter>(InputVariablesParameterName, "")); problemData.Parameters.Add(new FixedValueParameter(TrainingPartitionParameterName, "", (IntRange)new IntRange(0, 0).AsReadOnly())); problemData.Parameters.Add(new FixedValueParameter(TestPartitionParameterName, "", (IntRange)new IntRange(0, 0).AsReadOnly())); problemData.Parameters.Add(new ConstrainedValueParameter(TargetVariableParameterName, new ItemSet())); emptyProblemData = problemData; } #endregion public IConstrainedValueParameter TargetVariableParameter { get { return (IConstrainedValueParameter)Parameters[TargetVariableParameterName]; } } public string TargetVariable { get { return TargetVariableParameter.Value.Value; } set { if (value == null) throw new ArgumentNullException("targetVariable", "The provided value for the targetVariable is null."); if (value == TargetVariable) return; var matchingParameterValue = TargetVariableParameter.ValidValues.FirstOrDefault(v => v.Value == value); if (matchingParameterValue == null) throw new ArgumentException("The provided value is not valid as the targetVariable.", "targetVariable"); TargetVariableParameter.Value = matchingParameterValue; } } public IFixedValueParameter ScaleInputsParameter { get { return (IFixedValueParameter)Parameters[ScaleInputsParameterName]; } } public bool ScaleInputs { get { return ScaleInputsParameter.Value.Value; } set { ScaleInputsParameter.Value.Value = value; } } public IEnumerable TargetVariableValues { get { return Dataset.GetDoubleValues(TargetVariable); } } public IEnumerable TargetVariableTrainingValues { get { return Dataset.GetDoubleValues(TargetVariable, TrainingIndices); } } public IEnumerable TargetVariableTestValues { get { return Dataset.GetDoubleValues(TargetVariable, TestIndices); } } [StorableConstructor] protected RegressionProblemData(StorableConstructorFlag _) : base(_) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(ScaleInputsParameterName)) { Parameters.Add(new FixedValueParameter(ScaleInputsParameterName, "If enabled input features are scaled by a standard transformation (µ=0, σ=1)", new BoolValue(false))); } RegisterParameterEvents(); } protected RegressionProblemData(RegressionProblemData original, Cloner cloner) : base(original, cloner) { RegisterParameterEvents(); } public override IDeepCloneable Clone(Cloner cloner) { if (this == emptyProblemData) return emptyProblemData; return new RegressionProblemData(this, cloner); } public RegressionProblemData() : this(defaultDataset, defaultAllowedInputVariables, defaultTargetVariable) { } public RegressionProblemData(IRegressionProblemData regressionProblemData) : this(regressionProblemData.Dataset, regressionProblemData.AllowedInputVariables, regressionProblemData.TargetVariable) { TrainingPartition.Start = regressionProblemData.TrainingPartition.Start; TrainingPartition.End = regressionProblemData.TrainingPartition.End; TestPartition.Start = regressionProblemData.TestPartition.Start; TestPartition.End = regressionProblemData.TestPartition.End; } public RegressionProblemData(IDataset dataset, IEnumerable allowedInputVariables, string targetVariable, IEnumerable transformations = null) : base(dataset, allowedInputVariables, transformations ?? Enumerable.Empty()) { var variables = InputVariables.Select(x => x.AsReadOnly()).ToList(); Parameters.Add(new ConstrainedValueParameter(TargetVariableParameterName, new ItemSet(variables), variables.Where(x => x.Value == targetVariable).First())); Parameters.Add(new FixedValueParameter(ScaleInputsParameterName, "If enabled input features are scaled by a standard transformation (µ=0, σ=1)", new BoolValue(false))); RegisterParameterEvents(); } private void RegisterParameterEvents() { TargetVariableParameter.ValueChanged += new EventHandler(TargetVariableParameter_ValueChanged); ScaleInputsParameter.Value.ValueChanged += new EventHandler(ScaleInputsParameter_ValueChanged); } private void TargetVariableParameter_ValueChanged(object sender, EventArgs e) { OnChanged(); } private void ScaleInputsParameter_ValueChanged(object sender, EventArgs e) { var transformations = new ShiftStandardDistributionTransformation(Dataset.DoubleVariables); transformations.Mean = 0; transformations.StandardDeviation = 1; var scaling = Transformation.CreateTransformations(transformations, Dataset, TrainingIndices, AllowedInputVariables); var scaledVariables = AllowedInputVariables.Select((var, i) => new { Variable = var, Data = scaling[i].Apply(Dataset.GetDoubleValues(var)).ToArray() }); var newDataset = ((Dataset)Dataset).ToModifiable(); foreach (var v in scaledVariables) { newDataset.ReplaceVariable(v.Variable, v.Data); } if (!Parameters.ContainsKey("Scaled Dataset")) Parameters.Add(new FixedValueParameter("Scaled Dataset", newDataset.ToDataset())); } } }