#region License Information /* HeuristicLab * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.IO; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis { [Item("DataAnalysisProblemData", "Represents an item containing all data defining a data analysis problem.")] [StorableClass] public class DataAnalysisProblemData : ParameterizedNamedItem, IStorableContent { protected bool suppressEvents = false; #region IStorableContent Members public string Filename { get; set; } #endregion #region default data // y = x^4 + x^3 + x^2 + x private static double[,] kozaF1 = new double[,] { {2.017885919, -1.449165046}, {1.30060506, -1.344523885}, {1.147134798, -1.317989331}, {0.877182504, -1.266142284}, {0.852562452, -1.261020794}, {0.431095788, -1.158793317}, {0.112586002, -1.050908405}, {0.04594507, -1.021989402}, {0.042572879, -1.020438113}, {-0.074027291, -0.959859562}, {-0.109178553, -0.938094706}, {-0.259721109, -0.803635355}, {-0.272991057, -0.387519561}, {-0.161978191, -0.193611001}, {-0.102489983, -0.114215349}, {-0.01469968, -0.014918985}, {-0.008863365, -0.008942626}, {0.026751057, 0.026054094}, {0.166922436, 0.14309643}, {0.176953808, 0.1504144}, {0.190233418, 0.159916534}, {0.199800708, 0.166635331}, {0.261502822, 0.207600348}, {0.30182879, 0.232370249}, {0.83763905, 0.468046718} }; #endregion #region parameter properties public IValueParameter DatasetParameter { get { return (IValueParameter)Parameters["Dataset"]; } } public IValueParameter TargetVariableParameter { get { return (IValueParameter)Parameters["TargetVariable"]; } } public IValueParameter> InputVariablesParameter { get { return (IValueParameter>)Parameters["InputVariables"]; } } public IValueParameter TrainingSamplesStartParameter { get { return (IValueParameter)Parameters["TrainingSamplesStart"]; } } public IValueParameter TrainingSamplesEndParameter { get { return (IValueParameter)Parameters["TrainingSamplesEnd"]; } } public IValueParameter TestSamplesStartParameter { get { return (IValueParameter)Parameters["TestSamplesStart"]; } } public IValueParameter TestSamplesEndParameter { get { return (IValueParameter)Parameters["TestSamplesEnd"]; } } public IValueParameter ValidationPercentageParameter { get { return (IValueParameter)Parameters["ValidationPercentage"]; } } #endregion #region properties public Dataset Dataset { get { return DatasetParameter.Value; } set { if (value != Dataset) { if (value == null) throw new ArgumentNullException(); DatasetParameter.Value = value; } } } public StringValue TargetVariable { get { return TargetVariableParameter.Value; } set { if (value != TargetVariableParameter.Value) { if (value == null) throw new ArgumentNullException(); if (TargetVariable != null) DeregisterStringValueEventHandlers(TargetVariable); TargetVariableParameter.Value = value; } } } public ICheckedItemList InputVariables { get { return InputVariablesParameter.Value; } set { if (value != InputVariables) { if (value == null) throw new ArgumentNullException(); if (InputVariables != null) DeregisterInputVariablesEventHandlers(); InputVariablesParameter.Value = value; } } } public IntValue TrainingSamplesStart { get { return TrainingSamplesStartParameter.Value; } set { if (value != TrainingSamplesStart) { if (value == null) throw new ArgumentNullException(); if (TrainingSamplesStart != null) DeregisterValueTypeEventHandlers(TrainingSamplesStart); TrainingSamplesStartParameter.Value = value; } } } public IntValue TrainingSamplesEnd { get { return TrainingSamplesEndParameter.Value; } set { if (value != TrainingSamplesEnd) { if (value == null) throw new ArgumentNullException(); if (TrainingSamplesEnd != null) DeregisterValueTypeEventHandlers(TrainingSamplesEnd); TrainingSamplesEndParameter.Value = value; } } } public IntValue TestSamplesStart { get { return TestSamplesStartParameter.Value; } set { if (value != TestSamplesStart) { if (value == null) throw new ArgumentNullException(); if (TestSamplesStart != null) DeregisterValueTypeEventHandlers(TestSamplesStart); TestSamplesStartParameter.Value = value; } } } public IntValue TestSamplesEnd { get { return TestSamplesEndParameter.Value; } set { if (value != TestSamplesEnd) { if (value == null) throw new ArgumentNullException(); if (TestSamplesEnd != null) DeregisterValueTypeEventHandlers(TestSamplesEnd); TestSamplesEndParameter.Value = value; } } } public PercentValue ValidationPercentage { get { return ValidationPercentageParameter.Value; } set { if (value != ValidationPercentage) { if (value == null) throw new ArgumentNullException(); if (value.Value < 0 || value.Value > 1) throw new ArgumentException("ValidationPercentage must be between 0 and 1."); if (ValidationPercentage != null) DeregisterValueTypeEventHandlers(ValidationPercentage); ValidationPercentageParameter.Value = value; } } } public IEnumerable TrainingIndizes { get { return Enumerable.Range(TrainingSamplesStart.Value, TrainingSamplesEnd.Value - TrainingSamplesStart.Value) .Where(i => i >= 0 && i < Dataset.Rows && (i < TestSamplesStart.Value || TestSamplesEnd.Value <= i)); } } public IEnumerable TestIndizes { get { return Enumerable.Range(TestSamplesStart.Value, TestSamplesEnd.Value - TestSamplesStart.Value) .Where(i => i >= 0 && i < Dataset.Rows); } } #endregion public DataAnalysisProblemData() : base() { var inputVariables = new CheckedItemList(); StringValue inputVariable = new StringValue("x"); inputVariables.Add(inputVariable); StringValue targetVariable = new StringValue("y"); var validTargetVariables = new ItemSet(); validTargetVariables.Add(targetVariable); Parameters.Add(new ValueParameter("Dataset", new Dataset(new string[] { "y", "x" }, kozaF1))); Parameters.Add(new ValueParameter>("InputVariables", inputVariables.AsReadOnly())); Parameters.Add(new ConstrainedValueParameter("TargetVariable", validTargetVariables, targetVariable)); Parameters.Add(new ValueParameter("TrainingSamplesStart", new IntValue(0))); Parameters.Add(new ValueParameter("TrainingSamplesEnd", new IntValue(15))); Parameters.Add(new ValueParameter("TestSamplesStart", new IntValue(15))); Parameters.Add(new ValueParameter("TestSamplesEnd", new IntValue(25))); Parameters.Add(new ValueParameter("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5))); RegisterParameterEventHandlers(); RegisterParameterValueEventHandlers(); } public DataAnalysisProblemData(Dataset dataset, IEnumerable inputVariables, string targetVariable, int trainingSamplesStart, int trainingSamplesEnd, int testSamplesStart, int testSamplesEnd) { var inputVariablesList = new CheckedItemList(inputVariables.Select(x => new StringValue(x)).ToList()); StringValue targetVariableValue = new StringValue(targetVariable); var validTargetVariables = new ItemSet(); foreach (var variable in dataset.VariableNames) if (variable != targetVariable) validTargetVariables.Add(new StringValue(variable)); validTargetVariables.Add(targetVariableValue); Parameters.Add(new ValueParameter("Dataset", dataset)); Parameters.Add(new ValueParameter>("InputVariables", inputVariablesList.AsReadOnly())); Parameters.Add(new ConstrainedValueParameter("TargetVariable", validTargetVariables, targetVariableValue)); Parameters.Add(new ValueParameter("TrainingSamplesStart", new IntValue(trainingSamplesStart))); Parameters.Add(new ValueParameter("TrainingSamplesEnd", new IntValue(trainingSamplesEnd))); Parameters.Add(new ValueParameter("TestSamplesStart", new IntValue(testSamplesStart))); Parameters.Add(new ValueParameter("TestSamplesEnd", new IntValue(testSamplesEnd))); Parameters.Add(new ValueParameter("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5))); RegisterParameterEventHandlers(); RegisterParameterValueEventHandlers(); } [StorableConstructor] protected DataAnalysisProblemData(bool deserializing) : base(deserializing) { } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserializationHook() { if (!Parameters.ContainsKey("ValidationPercentage")) Parameters.Add(new ValueParameter("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5))); RegisterParameterEventHandlers(); RegisterParameterValueEventHandlers(); } #region events public event EventHandler ProblemDataChanged; protected virtual void OnProblemDataChanged(EventArgs e) { if (TrainingSamplesStart.Value < 0) TrainingSamplesStart.Value = 0; else if (TestSamplesStart.Value < 0) TestSamplesStart.Value = 0; else if (TrainingSamplesEnd.Value > Dataset.Rows - 1) TrainingSamplesEnd.Value = Dataset.Rows - 1; else if (TestSamplesEnd.Value > Dataset.Rows - 1) TestSamplesEnd.Value = Dataset.Rows - 1; else if (TrainingSamplesStart.Value > TrainingSamplesEnd.Value) TrainingSamplesStart.Value = TestSamplesEnd.Value; else if (TestSamplesStart.Value > TestSamplesEnd.Value) TestSamplesStart.Value = TestSamplesEnd.Value; else if (ValidationPercentage.Value < 0) ValidationPercentage.Value = 0; else if (ValidationPercentage.Value > 1) ValidationPercentage.Value = 1; else if (!TrainingIndizes.Any()) throw new ArgumentException("No training samples are available."); else if (!suppressEvents) { var listeners = ProblemDataChanged; if (listeners != null) listeners(this, e); } } private void RegisterParameterEventHandlers() { DatasetParameter.ValueChanged += new EventHandler(DatasetParameter_ValueChanged); InputVariablesParameter.ValueChanged += new EventHandler(InputVariablesParameter_ValueChanged); TargetVariableParameter.ValueChanged += new EventHandler(TargetVariableParameter_ValueChanged); TrainingSamplesStartParameter.ValueChanged += new EventHandler(TrainingSamplesStartParameter_ValueChanged); TrainingSamplesEndParameter.ValueChanged += new EventHandler(TrainingSamplesEndParameter_ValueChanged); TestSamplesStartParameter.ValueChanged += new EventHandler(TestSamplesStartParameter_ValueChanged); TestSamplesEndParameter.ValueChanged += new EventHandler(TestSamplesEndParameter_ValueChanged); ValidationPercentageParameter.ValueChanged += new EventHandler(ValidationPercentageParameter_ValueChanged); } private void RegisterParameterValueEventHandlers() { RegisterInputVariablesEventHandlers(); if (TargetVariable != null) RegisterStringValueEventHandlers(TargetVariable); RegisterValueTypeEventHandlers(TrainingSamplesStart); RegisterValueTypeEventHandlers(TrainingSamplesEnd); RegisterValueTypeEventHandlers(TestSamplesStart); RegisterValueTypeEventHandlers(TestSamplesEnd); RegisterValueTypeEventHandlers(ValidationPercentage); } #region parameter value changed event handlers private void DatasetParameter_ValueChanged(object sender, EventArgs e) { OnProblemDataChanged(EventArgs.Empty); } private void InputVariablesParameter_ValueChanged(object sender, EventArgs e) { RegisterInputVariablesEventHandlers(); OnProblemDataChanged(EventArgs.Empty); } private void TargetVariableParameter_ValueChanged(object sender, EventArgs e) { if (TargetVariable != null) { RegisterStringValueEventHandlers(TargetVariable); OnProblemDataChanged(EventArgs.Empty); } } private void TrainingSamplesStartParameter_ValueChanged(object sender, EventArgs e) { RegisterValueTypeEventHandlers(TrainingSamplesStart); OnProblemDataChanged(EventArgs.Empty); } private void TrainingSamplesEndParameter_ValueChanged(object sender, EventArgs e) { RegisterValueTypeEventHandlers(TrainingSamplesEnd); OnProblemDataChanged(EventArgs.Empty); } private void TestSamplesStartParameter_ValueChanged(object sender, EventArgs e) { RegisterValueTypeEventHandlers(TestSamplesStart); OnProblemDataChanged(EventArgs.Empty); } private void TestSamplesEndParameter_ValueChanged(object sender, EventArgs e) { RegisterValueTypeEventHandlers(TestSamplesEnd); OnProblemDataChanged(EventArgs.Empty); } private void ValidationPercentageParameter_ValueChanged(object sender, EventArgs e) { RegisterValueTypeEventHandlers(ValidationPercentage); OnProblemDataChanged(EventArgs.Empty); } #endregion private void RegisterInputVariablesEventHandlers() { InputVariables.CollectionReset += new HeuristicLab.Collections.CollectionItemsChangedEventHandler>(InputVariables_CollectionReset); InputVariables.ItemsAdded += new HeuristicLab.Collections.CollectionItemsChangedEventHandler>(InputVariables_ItemsAdded); InputVariables.ItemsRemoved += new HeuristicLab.Collections.CollectionItemsChangedEventHandler>(InputVariables_ItemsRemoved); InputVariables.CheckedItemsChanged += new HeuristicLab.Collections.CollectionItemsChangedEventHandler>(InputVariables_CheckedItemsChanged); foreach (var item in InputVariables) { item.ValueChanged += new EventHandler(InputVariable_ValueChanged); } } private void DeregisterInputVariablesEventHandlers() { InputVariables.CollectionReset -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler>(InputVariables_CollectionReset); InputVariables.ItemsAdded -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler>(InputVariables_ItemsAdded); InputVariables.ItemsRemoved -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler>(InputVariables_ItemsRemoved); InputVariables.CheckedItemsChanged -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler>(InputVariables_CheckedItemsChanged); foreach (var item in InputVariables) { item.ValueChanged -= new EventHandler(InputVariable_ValueChanged); } } private void InputVariables_CheckedItemsChanged(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs> e) { OnProblemDataChanged(e); } private void InputVariables_ItemsRemoved(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs> e) { foreach (var indexedItem in e.Items) indexedItem.Value.ValueChanged -= new EventHandler(InputVariable_ValueChanged); OnProblemDataChanged(e); } private void InputVariables_ItemsAdded(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs> e) { foreach (var indexedItem in e.Items) indexedItem.Value.ValueChanged += new EventHandler(InputVariable_ValueChanged); OnProblemDataChanged(e); } private void InputVariables_CollectionReset(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs> e) { foreach (var indexedItem in e.OldItems) indexedItem.Value.ValueChanged -= new EventHandler(InputVariable_ValueChanged); OnProblemDataChanged(e); } private void InputVariable_ValueChanged(object sender, EventArgs e) { OnProblemDataChanged(e); } #region helper private void RegisterValueTypeEventHandlers(ValueTypeValue value) where T : struct { value.ValueChanged += new EventHandler(value_ValueChanged); } private void DeregisterValueTypeEventHandlers(ValueTypeValue value) where T : struct { value.ValueChanged -= new EventHandler(value_ValueChanged); } private void RegisterStringValueEventHandlers(StringValue value) { value.ValueChanged += new EventHandler(value_ValueChanged); } private void DeregisterStringValueEventHandlers(StringValue value) { value.ValueChanged -= new EventHandler(value_ValueChanged); } private void value_ValueChanged(object sender, EventArgs e) { OnProblemDataChanged(e); } #endregion #endregion public virtual void ImportFromFile(string fileName) { var csvFileParser = new CsvFileParser(); csvFileParser.Parse(fileName); suppressEvents = true; Name = "Data imported from " + Path.GetFileName(fileName); Dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); Dataset.Name = Path.GetFileName(fileName); var variableNames = Dataset.VariableNames.Select(x => new StringValue(x).AsReadOnly()).ToList(); ((ConstrainedValueParameter)TargetVariableParameter).ValidValues.Clear(); foreach (var variableName in variableNames) ((ConstrainedValueParameter)TargetVariableParameter).ValidValues.Add(variableName); TargetVariable = variableNames.First(); InputVariables = new CheckedItemList(variableNames).AsReadOnly(); InputVariables.SetItemCheckedState(variableNames.First(), false); int middle = (int)(csvFileParser.Rows * 0.5); TrainingSamplesStart = new IntValue(0); TrainingSamplesEnd = new IntValue(middle); TestSamplesStart = new IntValue(middle); TestSamplesEnd = new IntValue(csvFileParser.Rows); suppressEvents = false; OnProblemDataChanged(EventArgs.Empty); } public override IDeepCloneable Clone(Cloner cloner) { DataAnalysisProblemData clone = (DataAnalysisProblemData)base.Clone(cloner); clone.RegisterParameterEventHandlers(); clone.RegisterParameterValueEventHandlers(); return clone; } } }