Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/DataAnalysisProblemData.cs @ 4769

Last change on this file since 4769 was 4722, checked in by swagner, 14 years ago

Merged cloning refactoring branch back into trunk (#922)

File size: 20.5 KB
RevLine 
[3294]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
[4068]24using System.IO;
[3294]25using System.Linq;
[3376]26using HeuristicLab.Common;
[3294]27using HeuristicLab.Core;
[4068]28using HeuristicLab.Data;
29using HeuristicLab.Parameters;
[3294]30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31
[3373]32namespace HeuristicLab.Problems.DataAnalysis {
33  [Item("DataAnalysisProblemData", "Represents an item containing all data defining a data analysis problem.")]
[3294]34  [StorableClass]
[4451]35  public class DataAnalysisProblemData : ParameterizedNamedItem, IStorableContent {
[4393]36    protected bool suppressEvents = false;
[4468]37    #region IStorableContent Members
38    public string Filename { get; set; }
39    #endregion
[3723]40    #region default data
41    // y = x^4 + x^3 + x^2 + x
[4022]42    private static double[,] kozaF1 = new double[,] {
[3723]43{2.017885919, -1.449165046},
44{1.30060506,  -1.344523885},
45{1.147134798, -1.317989331},
46{0.877182504, -1.266142284},
47{0.852562452, -1.261020794},
48{0.431095788, -1.158793317},
49{0.112586002, -1.050908405},
50{0.04594507,  -1.021989402},
51{0.042572879, -1.020438113},
52{-0.074027291,  -0.959859562},
53{-0.109178553,  -0.938094706},
54{-0.259721109,  -0.803635355},
55{-0.272991057,  -0.387519561},
56{-0.161978191,  -0.193611001},
57{-0.102489983,  -0.114215349},
58{-0.01469968, -0.014918985},
59{-0.008863365,  -0.008942626},
60{0.026751057, 0.026054094},
61{0.166922436, 0.14309643},
62{0.176953808, 0.1504144},
63{0.190233418, 0.159916534},
64{0.199800708, 0.166635331},
65{0.261502822, 0.207600348},
66{0.30182879,  0.232370249},
67{0.83763905,  0.468046718}
68    };
69    #endregion
[3545]70    #region parameter properties
71    public IValueParameter<Dataset> DatasetParameter {
72      get { return (IValueParameter<Dataset>)Parameters["Dataset"]; }
[3442]73    }
[3545]74    public IValueParameter<StringValue> TargetVariableParameter {
75      get { return (IValueParameter<StringValue>)Parameters["TargetVariable"]; }
[3442]76    }
[3628]77    public IValueParameter<ICheckedItemList<StringValue>> InputVariablesParameter {
78      get { return (IValueParameter<ICheckedItemList<StringValue>>)Parameters["InputVariables"]; }
[3442]79    }
[3545]80    public IValueParameter<IntValue> TrainingSamplesStartParameter {
81      get { return (IValueParameter<IntValue>)Parameters["TrainingSamplesStart"]; }
[3442]82    }
[3545]83    public IValueParameter<IntValue> TrainingSamplesEndParameter {
84      get { return (IValueParameter<IntValue>)Parameters["TrainingSamplesEnd"]; }
[3442]85    }
[3545]86    public IValueParameter<IntValue> TestSamplesStartParameter {
87      get { return (IValueParameter<IntValue>)Parameters["TestSamplesStart"]; }
[3442]88    }
[3545]89    public IValueParameter<IntValue> TestSamplesEndParameter {
90      get { return (IValueParameter<IntValue>)Parameters["TestSamplesEnd"]; }
[3442]91    }
[4468]92    public IValueParameter<PercentValue> ValidationPercentageParameter {
93      get { return (IValueParameter<PercentValue>)Parameters["ValidationPercentage"]; }
94    }
[3442]95    #endregion
96
[3294]97    #region properties
98    public Dataset Dataset {
[4468]99      get { return DatasetParameter.Value; }
[3309]100      set {
[3442]101        if (value != Dataset) {
[3309]102          if (value == null) throw new ArgumentNullException();
[3545]103          DatasetParameter.Value = value;
[3309]104        }
105      }
[3294]106    }
107    public StringValue TargetVariable {
[4468]108      get { return TargetVariableParameter.Value; }
[3442]109      set {
[3545]110        if (value != TargetVariableParameter.Value) {
[3442]111          if (value == null) throw new ArgumentNullException();
112          if (TargetVariable != null) DeregisterStringValueEventHandlers(TargetVariable);
[3545]113          TargetVariableParameter.Value = value;
[3442]114        }
115      }
[3294]116    }
[3628]117    public ICheckedItemList<StringValue> InputVariables {
[4468]118      get { return InputVariablesParameter.Value; }
[3294]119      set {
[3442]120        if (value != InputVariables) {
[3294]121          if (value == null) throw new ArgumentNullException();
[3442]122          if (InputVariables != null) DeregisterInputVariablesEventHandlers();
[3545]123          InputVariablesParameter.Value = value;
[3294]124        }
125      }
126    }
127    public IntValue TrainingSamplesStart {
[4468]128      get { return TrainingSamplesStartParameter.Value; }
[3442]129      set {
130        if (value != TrainingSamplesStart) {
131          if (value == null) throw new ArgumentNullException();
132          if (TrainingSamplesStart != null) DeregisterValueTypeEventHandlers(TrainingSamplesStart);
[3545]133          TrainingSamplesStartParameter.Value = value;
[3442]134        }
135      }
[3294]136    }
137    public IntValue TrainingSamplesEnd {
[4468]138      get { return TrainingSamplesEndParameter.Value; }
[3442]139      set {
140        if (value != TrainingSamplesEnd) {
141          if (value == null) throw new ArgumentNullException();
142          if (TrainingSamplesEnd != null) DeregisterValueTypeEventHandlers(TrainingSamplesEnd);
[3545]143          TrainingSamplesEndParameter.Value = value;
[3442]144        }
145      }
[3294]146    }
147    public IntValue TestSamplesStart {
[4468]148      get { return TestSamplesStartParameter.Value; }
[3442]149      set {
150        if (value != TestSamplesStart) {
151          if (value == null) throw new ArgumentNullException();
152          if (TestSamplesStart != null) DeregisterValueTypeEventHandlers(TestSamplesStart);
[3545]153          TestSamplesStartParameter.Value = value;
[3442]154        }
155      }
[3294]156    }
157    public IntValue TestSamplesEnd {
[4468]158      get { return TestSamplesEndParameter.Value; }
[3442]159      set {
160        if (value != TestSamplesEnd) {
161          if (value == null) throw new ArgumentNullException();
162          if (TestSamplesEnd != null) DeregisterValueTypeEventHandlers(TestSamplesEnd);
[3545]163          TestSamplesEndParameter.Value = value;
[3442]164        }
165      }
[3294]166    }
[4468]167    public PercentValue ValidationPercentage {
168      get { return ValidationPercentageParameter.Value; }
169      set {
170        if (value != ValidationPercentage) {
171          if (value == null) throw new ArgumentNullException();
172          if (value.Value < 0 || value.Value > 1) throw new ArgumentException("ValidationPercentage must be between 0 and 1.");
173          if (ValidationPercentage != null) DeregisterValueTypeEventHandlers(ValidationPercentage);
174          ValidationPercentageParameter.Value = value;
175        }
176      }
177    }
[3294]178
[4468]179    public IEnumerable<int> TrainingIndizes {
180      get {
181        return Enumerable.Range(TrainingSamplesStart.Value, TrainingSamplesEnd.Value - TrainingSamplesStart.Value)
[4543]182                         .Where(i => i >= 0 && i < Dataset.Rows && (i < TestSamplesStart.Value || TestSamplesEnd.Value <= i));
[4468]183      }
184    }
185    public IEnumerable<int> TestIndizes {
186      get {
187        return Enumerable.Range(TestSamplesStart.Value, TestSamplesEnd.Value - TestSamplesStart.Value)
[4543]188           .Where(i => i >= 0 && i < Dataset.Rows);
[4468]189      }
190    }
[4451]191    #endregion
192
[4722]193    [StorableConstructor]
194    protected DataAnalysisProblemData(bool deserializing) : base(deserializing) { }
195    protected DataAnalysisProblemData(DataAnalysisProblemData original, Cloner cloner)
196      : base(original, cloner) {
197      RegisterParameterEventHandlers();
198      RegisterParameterValueEventHandlers();
199    }
[3373]200    public DataAnalysisProblemData()
[3294]201      : base() {
[3723]202      var inputVariables = new CheckedItemList<StringValue>();
203      StringValue inputVariable = new StringValue("x");
204      inputVariables.Add(inputVariable);
205      StringValue targetVariable = new StringValue("y");
206      var validTargetVariables = new ItemSet<StringValue>();
207      validTargetVariables.Add(targetVariable);
208      Parameters.Add(new ValueParameter<Dataset>("Dataset", new Dataset(new string[] { "y", "x" }, kozaF1)));
209      Parameters.Add(new ValueParameter<ICheckedItemList<StringValue>>("InputVariables", inputVariables.AsReadOnly()));
210      Parameters.Add(new ConstrainedValueParameter<StringValue>("TargetVariable", validTargetVariables, targetVariable));
211      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesStart", new IntValue(0)));
212      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesEnd", new IntValue(15)));
213      Parameters.Add(new ValueParameter<IntValue>("TestSamplesStart", new IntValue(15)));
214      Parameters.Add(new ValueParameter<IntValue>("TestSamplesEnd", new IntValue(25)));
[4468]215      Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
[3545]216      RegisterParameterEventHandlers();
217      RegisterParameterValueEventHandlers();
[3294]218    }
219
[4022]220    public DataAnalysisProblemData(Dataset dataset, IEnumerable<string> inputVariables, string targetVariable,
221      int trainingSamplesStart, int trainingSamplesEnd, int testSamplesStart, int testSamplesEnd) {
[4393]222      var inputVariablesList = new CheckedItemList<StringValue>(inputVariables.Select(x => new StringValue(x)).ToList());
[4022]223      StringValue targetVariableValue = new StringValue(targetVariable);
224      var validTargetVariables = new ItemSet<StringValue>();
225      foreach (var variable in dataset.VariableNames)
226        if (variable != targetVariable)
227          validTargetVariables.Add(new StringValue(variable));
228      validTargetVariables.Add(targetVariableValue);
229      Parameters.Add(new ValueParameter<Dataset>("Dataset", dataset));
230      Parameters.Add(new ValueParameter<ICheckedItemList<StringValue>>("InputVariables", inputVariablesList.AsReadOnly()));
231      Parameters.Add(new ConstrainedValueParameter<StringValue>("TargetVariable", validTargetVariables, targetVariableValue));
232      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesStart", new IntValue(trainingSamplesStart)));
233      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesEnd", new IntValue(trainingSamplesEnd)));
234      Parameters.Add(new ValueParameter<IntValue>("TestSamplesStart", new IntValue(testSamplesStart)));
235      Parameters.Add(new ValueParameter<IntValue>("TestSamplesEnd", new IntValue(testSamplesEnd)));
[4468]236      Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
[4022]237      RegisterParameterEventHandlers();
238      RegisterParameterValueEventHandlers();
239    }
[3545]240
[4722]241    public override IDeepCloneable Clone(Cloner cloner) {
242      return new DataAnalysisProblemData(this, cloner);
243    }
[3294]244
[3442]245    [StorableHook(HookType.AfterDeserialization)]
[4722]246    private void AfterDeserialization() {
[4468]247      if (!Parameters.ContainsKey("ValidationPercentage"))
248        Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
249
[3545]250      RegisterParameterEventHandlers();
251      RegisterParameterValueEventHandlers();
[3442]252    }
253
[3294]254    #region events
[3545]255    public event EventHandler ProblemDataChanged;
256    protected virtual void OnProblemDataChanged(EventArgs e) {
[4473]257      if (TrainingSamplesStart.Value < 0) TrainingSamplesStart.Value = 0;
258      else if (TestSamplesStart.Value < 0) TestSamplesStart.Value = 0;
259      else if (TrainingSamplesEnd.Value > Dataset.Rows - 1) TrainingSamplesEnd.Value = Dataset.Rows - 1;
260      else if (TestSamplesEnd.Value > Dataset.Rows - 1) TestSamplesEnd.Value = Dataset.Rows - 1;
261      else if (TrainingSamplesStart.Value > TrainingSamplesEnd.Value) TrainingSamplesStart.Value = TestSamplesEnd.Value;
262      else if (TestSamplesStart.Value > TestSamplesEnd.Value) TestSamplesStart.Value = TestSamplesEnd.Value;
263      else if (ValidationPercentage.Value < 0) ValidationPercentage.Value = 0;
264      else if (ValidationPercentage.Value > 1) ValidationPercentage.Value = 1;
265      else if (!TrainingIndizes.Any()) throw new ArgumentException("No training samples are available.");
266      else if (!suppressEvents) {
[3545]267        var listeners = ProblemDataChanged;
268        if (listeners != null) listeners(this, e);
269      }
270    }
271
272    private void RegisterParameterEventHandlers() {
273      DatasetParameter.ValueChanged += new EventHandler(DatasetParameter_ValueChanged);
274      InputVariablesParameter.ValueChanged += new EventHandler(InputVariablesParameter_ValueChanged);
275      TargetVariableParameter.ValueChanged += new EventHandler(TargetVariableParameter_ValueChanged);
276      TrainingSamplesStartParameter.ValueChanged += new EventHandler(TrainingSamplesStartParameter_ValueChanged);
277      TrainingSamplesEndParameter.ValueChanged += new EventHandler(TrainingSamplesEndParameter_ValueChanged);
278      TestSamplesStartParameter.ValueChanged += new EventHandler(TestSamplesStartParameter_ValueChanged);
279      TestSamplesEndParameter.ValueChanged += new EventHandler(TestSamplesEndParameter_ValueChanged);
[4468]280      ValidationPercentageParameter.ValueChanged += new EventHandler(ValidationPercentageParameter_ValueChanged);
[3545]281    }
282
283    private void RegisterParameterValueEventHandlers() {
[3442]284      RegisterInputVariablesEventHandlers();
[3545]285      if (TargetVariable != null) RegisterStringValueEventHandlers(TargetVariable);
[3442]286      RegisterValueTypeEventHandlers(TrainingSamplesStart);
287      RegisterValueTypeEventHandlers(TrainingSamplesEnd);
288      RegisterValueTypeEventHandlers(TestSamplesStart);
289      RegisterValueTypeEventHandlers(TestSamplesEnd);
[4468]290      RegisterValueTypeEventHandlers(ValidationPercentage);
[3442]291    }
292
[3309]293
[3545]294    #region parameter value changed event handlers
[4250]295    private void DatasetParameter_ValueChanged(object sender, EventArgs e) {
[3545]296      OnProblemDataChanged(EventArgs.Empty);
[3309]297    }
[4250]298    private void InputVariablesParameter_ValueChanged(object sender, EventArgs e) {
[4291]299      RegisterInputVariablesEventHandlers();
[3545]300      OnProblemDataChanged(EventArgs.Empty);
[3442]301    }
[4250]302    private void TargetVariableParameter_ValueChanged(object sender, EventArgs e) {
[4295]303      if (TargetVariable != null) {
304        RegisterStringValueEventHandlers(TargetVariable);
305        OnProblemDataChanged(EventArgs.Empty);
306      }
[3442]307    }
[4250]308    private void TrainingSamplesStartParameter_ValueChanged(object sender, EventArgs e) {
[4291]309      RegisterValueTypeEventHandlers(TrainingSamplesStart);
[3545]310      OnProblemDataChanged(EventArgs.Empty);
[3442]311    }
[4250]312    private void TrainingSamplesEndParameter_ValueChanged(object sender, EventArgs e) {
[4291]313      RegisterValueTypeEventHandlers(TrainingSamplesEnd);
[3545]314      OnProblemDataChanged(EventArgs.Empty);
[3442]315    }
[4250]316    private void TestSamplesStartParameter_ValueChanged(object sender, EventArgs e) {
[4291]317      RegisterValueTypeEventHandlers(TestSamplesStart);
[3545]318      OnProblemDataChanged(EventArgs.Empty);
319    }
[4250]320    private void TestSamplesEndParameter_ValueChanged(object sender, EventArgs e) {
[4291]321      RegisterValueTypeEventHandlers(TestSamplesEnd);
[3545]322      OnProblemDataChanged(EventArgs.Empty);
323    }
[4468]324    private void ValidationPercentageParameter_ValueChanged(object sender, EventArgs e) {
325      RegisterValueTypeEventHandlers(ValidationPercentage);
326      OnProblemDataChanged(EventArgs.Empty);
327    }
[3545]328    #endregion
[3442]329
330    private void RegisterInputVariablesEventHandlers() {
[3651]331      InputVariables.CollectionReset += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CollectionReset);
[3599]332      InputVariables.ItemsAdded += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsAdded);
333      InputVariables.ItemsRemoved += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsRemoved);
334      InputVariables.CheckedItemsChanged += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CheckedItemsChanged);
[4250]335      foreach (var item in InputVariables) {
[3545]336        item.ValueChanged += new EventHandler(InputVariable_ValueChanged);
[4250]337      }
[3442]338    }
339
[3599]340    private void DeregisterInputVariablesEventHandlers() {
341      InputVariables.CollectionReset -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CollectionReset);
342      InputVariables.ItemsAdded -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsAdded);
343      InputVariables.ItemsRemoved -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsRemoved);
344      InputVariables.CheckedItemsChanged -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CheckedItemsChanged);
345      foreach (var item in InputVariables) {
346        item.ValueChanged -= new EventHandler(InputVariable_ValueChanged);
347      }
348    }
[3651]349
[3599]350    private void InputVariables_CheckedItemsChanged(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
[3559]351      OnProblemDataChanged(e);
352    }
[3599]353    private void InputVariables_ItemsRemoved(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
354      foreach (var indexedItem in e.Items)
355        indexedItem.Value.ValueChanged -= new EventHandler(InputVariable_ValueChanged);
[3545]356      OnProblemDataChanged(e);
[3442]357    }
[3599]358    private void InputVariables_ItemsAdded(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
359      foreach (var indexedItem in e.Items)
360        indexedItem.Value.ValueChanged += new EventHandler(InputVariable_ValueChanged);
[3442]361      OnProblemDataChanged(e);
362    }
[3599]363    private void InputVariables_CollectionReset(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
364      foreach (var indexedItem in e.OldItems)
365        indexedItem.Value.ValueChanged -= new EventHandler(InputVariable_ValueChanged);
[3442]366      OnProblemDataChanged(e);
367    }
[4250]368    private void InputVariable_ValueChanged(object sender, EventArgs e) {
[3442]369      OnProblemDataChanged(e);
370    }
[4250]371
[3545]372    #region helper
373    private void RegisterValueTypeEventHandlers<T>(ValueTypeValue<T> value) where T : struct {
374      value.ValueChanged += new EventHandler(value_ValueChanged);
375    }
376    private void DeregisterValueTypeEventHandlers<T>(ValueTypeValue<T> value) where T : struct {
377      value.ValueChanged -= new EventHandler(value_ValueChanged);
[3442]378    }
[3545]379    private void RegisterStringValueEventHandlers(StringValue value) {
380      value.ValueChanged += new EventHandler(value_ValueChanged);
381    }
382    private void DeregisterStringValueEventHandlers(StringValue value) {
383      value.ValueChanged -= new EventHandler(value_ValueChanged);
384    }
385
[4250]386    private void value_ValueChanged(object sender, EventArgs e) {
387      OnProblemDataChanged(e);
388    }
[3294]389    #endregion
[3545]390    #endregion
[3294]391
392    public virtual void ImportFromFile(string fileName) {
393      var csvFileParser = new CsvFileParser();
394      csvFileParser.Parse(fileName);
[3545]395      suppressEvents = true;
[3373]396      Name = "Data imported from " + Path.GetFileName(fileName);
[3294]397      Dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
398      Dataset.Name = Path.GetFileName(fileName);
[3545]399      var variableNames = Dataset.VariableNames.Select(x => new StringValue(x).AsReadOnly()).ToList();
400      ((ConstrainedValueParameter<StringValue>)TargetVariableParameter).ValidValues.Clear();
401      foreach (var variableName in variableNames)
402        ((ConstrainedValueParameter<StringValue>)TargetVariableParameter).ValidValues.Add(variableName);
403      TargetVariable = variableNames.First();
[3628]404      InputVariables = new CheckedItemList<StringValue>(variableNames).AsReadOnly();
[3651]405      InputVariables.SetItemCheckedState(variableNames.First(), false);
[3442]406      int middle = (int)(csvFileParser.Rows * 0.5);
[3294]407      TrainingSamplesStart = new IntValue(0);
[3442]408      TrainingSamplesEnd = new IntValue(middle);
409      TestSamplesStart = new IntValue(middle);
[3294]410      TestSamplesEnd = new IntValue(csvFileParser.Rows);
[3545]411      suppressEvents = false;
412      OnProblemDataChanged(EventArgs.Empty);
[3294]413    }
414  }
415}
Note: See TracBrowser for help on using the repository browser.