Free cookie consent management tool by TermsFeed Policy Generator

source: branches/PersistenceSpeedUp/HeuristicLab.Problems.DataAnalysis/3.3/DataAnalysisProblemData.cs @ 13792

Last change on this file since 13792 was 5809, checked in by mkommend, 14 years ago

#1418: Reintegrated branch into trunk.

File size: 20.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.IO;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.PluginInfrastructure;
32
33namespace HeuristicLab.Problems.DataAnalysis {
34  [Item("DataAnalysisProblemData", "Represents an item containing all data defining a data analysis problem.")]
35  [StorableClass]
36  [NonDiscoverableType]
37  public class DataAnalysisProblemData : ParameterizedNamedItem, IStorableContent {
38    protected bool suppressEvents = false;
39    #region IStorableContent Members
40    public string Filename { get; set; }
41    #endregion
42    #region default data
43    // y = x^4 + x^3 + x^2 + x
44    private static double[,] kozaF1 = new double[,] {
45{2.017885919, -1.449165046},
46{1.30060506,  -1.344523885},
47{1.147134798, -1.317989331},
48{0.877182504, -1.266142284},
49{0.852562452, -1.261020794},
50{0.431095788, -1.158793317},
51{0.112586002, -1.050908405},
52{0.04594507,  -1.021989402},
53{0.042572879, -1.020438113},
54{-0.074027291,  -0.959859562},
55{-0.109178553,  -0.938094706},
56{-0.259721109,  -0.803635355},
57{-0.272991057,  -0.387519561},
58{-0.161978191,  -0.193611001},
59{-0.102489983,  -0.114215349},
60{-0.01469968, -0.014918985},
61{-0.008863365,  -0.008942626},
62{0.026751057, 0.026054094},
63{0.166922436, 0.14309643},
64{0.176953808, 0.1504144},
65{0.190233418, 0.159916534},
66{0.199800708, 0.166635331},
67{0.261502822, 0.207600348},
68{0.30182879,  0.232370249},
69{0.83763905,  0.468046718}
70    };
71    #endregion
72    #region parameter properties
73    public ValueParameter<Dataset> DatasetParameter {
74      get { return (ValueParameter<Dataset>)Parameters["Dataset"]; }
75    }
76    public IValueParameter<StringValue> TargetVariableParameter {
77      get { return (IValueParameter<StringValue>)Parameters["TargetVariable"]; }
78    }
79    public IValueParameter<ICheckedItemList<StringValue>> InputVariablesParameter {
80      get { return (IValueParameter<ICheckedItemList<StringValue>>)Parameters["InputVariables"]; }
81    }
82    public IValueParameter<IntValue> TrainingSamplesStartParameter {
83      get { return (IValueParameter<IntValue>)Parameters["TrainingSamplesStart"]; }
84    }
85    public IValueParameter<IntValue> TrainingSamplesEndParameter {
86      get { return (IValueParameter<IntValue>)Parameters["TrainingSamplesEnd"]; }
87    }
88    public IValueParameter<IntValue> TestSamplesStartParameter {
89      get { return (IValueParameter<IntValue>)Parameters["TestSamplesStart"]; }
90    }
91    public IValueParameter<IntValue> TestSamplesEndParameter {
92      get { return (IValueParameter<IntValue>)Parameters["TestSamplesEnd"]; }
93    }
94    public IValueParameter<PercentValue> ValidationPercentageParameter {
95      get { return (IValueParameter<PercentValue>)Parameters["ValidationPercentage"]; }
96    }
97    #endregion
98
99    #region properties
100    public Dataset Dataset {
101      get { return DatasetParameter.Value; }
102      set {
103        if (value != Dataset) {
104          if (value == null) throw new ArgumentNullException();
105          DatasetParameter.Value = value;
106        }
107      }
108    }
109    public StringValue TargetVariable {
110      get { return TargetVariableParameter.Value; }
111      set {
112        if (value != TargetVariableParameter.Value) {
113          if (value == null) throw new ArgumentNullException();
114          if (TargetVariable != null) DeregisterStringValueEventHandlers(TargetVariable);
115          TargetVariableParameter.Value = value;
116        }
117      }
118    }
119    public ICheckedItemList<StringValue> InputVariables {
120      get { return InputVariablesParameter.Value; }
121      set {
122        if (value != InputVariables) {
123          if (value == null) throw new ArgumentNullException();
124          if (InputVariables != null) DeregisterInputVariablesEventHandlers();
125          InputVariablesParameter.Value = value;
126        }
127      }
128    }
129    public IntValue TrainingSamplesStart {
130      get { return TrainingSamplesStartParameter.Value; }
131      set {
132        if (value != TrainingSamplesStart) {
133          if (value == null) throw new ArgumentNullException();
134          if (TrainingSamplesStart != null) DeregisterValueTypeEventHandlers(TrainingSamplesStart);
135          TrainingSamplesStartParameter.Value = value;
136        }
137      }
138    }
139    public IntValue TrainingSamplesEnd {
140      get { return TrainingSamplesEndParameter.Value; }
141      set {
142        if (value != TrainingSamplesEnd) {
143          if (value == null) throw new ArgumentNullException();
144          if (TrainingSamplesEnd != null) DeregisterValueTypeEventHandlers(TrainingSamplesEnd);
145          TrainingSamplesEndParameter.Value = value;
146        }
147      }
148    }
149    public IntValue TestSamplesStart {
150      get { return TestSamplesStartParameter.Value; }
151      set {
152        if (value != TestSamplesStart) {
153          if (value == null) throw new ArgumentNullException();
154          if (TestSamplesStart != null) DeregisterValueTypeEventHandlers(TestSamplesStart);
155          TestSamplesStartParameter.Value = value;
156        }
157      }
158    }
159    public IntValue TestSamplesEnd {
160      get { return TestSamplesEndParameter.Value; }
161      set {
162        if (value != TestSamplesEnd) {
163          if (value == null) throw new ArgumentNullException();
164          if (TestSamplesEnd != null) DeregisterValueTypeEventHandlers(TestSamplesEnd);
165          TestSamplesEndParameter.Value = value;
166        }
167      }
168    }
169    public PercentValue ValidationPercentage {
170      get { return ValidationPercentageParameter.Value; }
171      set {
172        if (value != ValidationPercentage) {
173          if (value == null) throw new ArgumentNullException();
174          if (value.Value < 0 || value.Value > 1) throw new ArgumentException("ValidationPercentage must be between 0 and 1.");
175          if (ValidationPercentage != null) DeregisterValueTypeEventHandlers(ValidationPercentage);
176          ValidationPercentageParameter.Value = value;
177        }
178      }
179    }
180
181    public IEnumerable<int> TrainingIndizes {
182      get {
183        return Enumerable.Range(TrainingSamplesStart.Value, TrainingSamplesEnd.Value - TrainingSamplesStart.Value)
184                         .Where(i => i >= 0 && i < Dataset.Rows && (i < TestSamplesStart.Value || TestSamplesEnd.Value <= i));
185      }
186    }
187    public IEnumerable<int> TestIndizes {
188      get {
189        return Enumerable.Range(TestSamplesStart.Value, TestSamplesEnd.Value - TestSamplesStart.Value)
190           .Where(i => i >= 0 && i < Dataset.Rows);
191      }
192    }
193    #endregion
194
195    [StorableConstructor]
196    protected DataAnalysisProblemData(bool deserializing) : base(deserializing) { }
197    protected DataAnalysisProblemData(DataAnalysisProblemData original, Cloner cloner)
198      : base(original, cloner) {
199      RegisterParameterEventHandlers();
200      RegisterParameterValueEventHandlers();
201    }
202    public DataAnalysisProblemData()
203      : base() {
204      var inputVariables = new CheckedItemList<StringValue>();
205      StringValue inputVariable = new StringValue("x");
206      inputVariables.Add(inputVariable);
207      StringValue targetVariable = new StringValue("y");
208      var validTargetVariables = new ItemSet<StringValue>();
209      validTargetVariables.Add(targetVariable);
210      Parameters.Add(new ValueParameter<Dataset>("Dataset", new Dataset(new string[] { "y", "x" }, kozaF1)));
211      Parameters.Add(new ValueParameter<ICheckedItemList<StringValue>>("InputVariables", inputVariables.AsReadOnly()));
212      Parameters.Add(new ConstrainedValueParameter<StringValue>("TargetVariable", validTargetVariables, targetVariable));
213      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesStart", new IntValue(0)));
214      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesEnd", new IntValue(15)));
215      Parameters.Add(new ValueParameter<IntValue>("TestSamplesStart", new IntValue(15)));
216      Parameters.Add(new ValueParameter<IntValue>("TestSamplesEnd", new IntValue(25)));
217      Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
218
219      DatasetParameter.ReactOnValueToStringChangedAndValueItemImageChanged = false;
220      RegisterParameterEventHandlers();
221      RegisterParameterValueEventHandlers();
222    }
223
224    public DataAnalysisProblemData(Dataset dataset, IEnumerable<string> inputVariables, string targetVariable,
225      int trainingSamplesStart, int trainingSamplesEnd, int testSamplesStart, int testSamplesEnd) {
226      var inputVariablesList = new CheckedItemList<StringValue>(inputVariables.Select(x => new StringValue(x)).ToList());
227      StringValue targetVariableValue = new StringValue(targetVariable);
228      var validTargetVariables = new ItemSet<StringValue>();
229      foreach (var variable in dataset.VariableNames)
230        if (variable != targetVariable)
231          validTargetVariables.Add(new StringValue(variable));
232      validTargetVariables.Add(targetVariableValue);
233      Parameters.Add(new ValueParameter<Dataset>("Dataset", dataset));
234      Parameters.Add(new ValueParameter<ICheckedItemList<StringValue>>("InputVariables", inputVariablesList.AsReadOnly()));
235      Parameters.Add(new ConstrainedValueParameter<StringValue>("TargetVariable", validTargetVariables, targetVariableValue));
236      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesStart", new IntValue(trainingSamplesStart)));
237      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesEnd", new IntValue(trainingSamplesEnd)));
238      Parameters.Add(new ValueParameter<IntValue>("TestSamplesStart", new IntValue(testSamplesStart)));
239      Parameters.Add(new ValueParameter<IntValue>("TestSamplesEnd", new IntValue(testSamplesEnd)));
240      Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
241
242      DatasetParameter.ReactOnValueToStringChangedAndValueItemImageChanged = false;
243      RegisterParameterEventHandlers();
244      RegisterParameterValueEventHandlers();
245    }
246
247    public override IDeepCloneable Clone(Cloner cloner) {
248      return new DataAnalysisProblemData(this, cloner);
249    }
250
251    [StorableHook(HookType.AfterDeserialization)]
252    private void AfterDeserialization() {
253      if (!Parameters.ContainsKey("ValidationPercentage"))
254        Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
255
256      DatasetParameter.ReactOnValueToStringChangedAndValueItemImageChanged = false;
257      RegisterParameterEventHandlers();
258      RegisterParameterValueEventHandlers();
259    }
260
261    #region events
262    public event EventHandler ProblemDataChanged;
263    protected virtual void OnProblemDataChanged(EventArgs e) {
264      if (TrainingSamplesStart.Value < 0) TrainingSamplesStart.Value = 0;
265      else if (TestSamplesStart.Value < 0) TestSamplesStart.Value = 0;
266      else if (TrainingSamplesEnd.Value > Dataset.Rows - 1) TrainingSamplesEnd.Value = Dataset.Rows - 1;
267      else if (TestSamplesEnd.Value > Dataset.Rows - 1) TestSamplesEnd.Value = Dataset.Rows - 1;
268      else if (TrainingSamplesStart.Value > TrainingSamplesEnd.Value) TrainingSamplesStart.Value = TestSamplesEnd.Value;
269      else if (TestSamplesStart.Value > TestSamplesEnd.Value) TestSamplesStart.Value = TestSamplesEnd.Value;
270      else if (ValidationPercentage.Value < 0) ValidationPercentage.Value = 0;
271      else if (ValidationPercentage.Value > 1) ValidationPercentage.Value = 1;
272      else if (!TrainingIndizes.Any()) throw new ArgumentException("No training samples are available.");
273      else if (!suppressEvents) {
274        var listeners = ProblemDataChanged;
275        if (listeners != null) listeners(this, e);
276      }
277    }
278
279    private void RegisterParameterEventHandlers() {
280      DatasetParameter.ValueChanged += new EventHandler(DatasetParameter_ValueChanged);
281      InputVariablesParameter.ValueChanged += new EventHandler(InputVariablesParameter_ValueChanged);
282      TargetVariableParameter.ValueChanged += new EventHandler(TargetVariableParameter_ValueChanged);
283      TrainingSamplesStartParameter.ValueChanged += new EventHandler(TrainingSamplesStartParameter_ValueChanged);
284      TrainingSamplesEndParameter.ValueChanged += new EventHandler(TrainingSamplesEndParameter_ValueChanged);
285      TestSamplesStartParameter.ValueChanged += new EventHandler(TestSamplesStartParameter_ValueChanged);
286      TestSamplesEndParameter.ValueChanged += new EventHandler(TestSamplesEndParameter_ValueChanged);
287      ValidationPercentageParameter.ValueChanged += new EventHandler(ValidationPercentageParameter_ValueChanged);
288    }
289
290    private void RegisterParameterValueEventHandlers() {
291      RegisterInputVariablesEventHandlers();
292      if (TargetVariable != null) RegisterStringValueEventHandlers(TargetVariable);
293      RegisterValueTypeEventHandlers(TrainingSamplesStart);
294      RegisterValueTypeEventHandlers(TrainingSamplesEnd);
295      RegisterValueTypeEventHandlers(TestSamplesStart);
296      RegisterValueTypeEventHandlers(TestSamplesEnd);
297      RegisterValueTypeEventHandlers(ValidationPercentage);
298    }
299
300
301    #region parameter value changed event handlers
302    private void DatasetParameter_ValueChanged(object sender, EventArgs e) {
303      OnProblemDataChanged(EventArgs.Empty);
304    }
305    private void InputVariablesParameter_ValueChanged(object sender, EventArgs e) {
306      RegisterInputVariablesEventHandlers();
307      OnProblemDataChanged(EventArgs.Empty);
308    }
309    private void TargetVariableParameter_ValueChanged(object sender, EventArgs e) {
310      if (TargetVariable != null) {
311        RegisterStringValueEventHandlers(TargetVariable);
312        OnProblemDataChanged(EventArgs.Empty);
313      }
314    }
315    private void TrainingSamplesStartParameter_ValueChanged(object sender, EventArgs e) {
316      RegisterValueTypeEventHandlers(TrainingSamplesStart);
317      OnProblemDataChanged(EventArgs.Empty);
318    }
319    private void TrainingSamplesEndParameter_ValueChanged(object sender, EventArgs e) {
320      RegisterValueTypeEventHandlers(TrainingSamplesEnd);
321      OnProblemDataChanged(EventArgs.Empty);
322    }
323    private void TestSamplesStartParameter_ValueChanged(object sender, EventArgs e) {
324      RegisterValueTypeEventHandlers(TestSamplesStart);
325      OnProblemDataChanged(EventArgs.Empty);
326    }
327    private void TestSamplesEndParameter_ValueChanged(object sender, EventArgs e) {
328      RegisterValueTypeEventHandlers(TestSamplesEnd);
329      OnProblemDataChanged(EventArgs.Empty);
330    }
331    private void ValidationPercentageParameter_ValueChanged(object sender, EventArgs e) {
332      RegisterValueTypeEventHandlers(ValidationPercentage);
333      OnProblemDataChanged(EventArgs.Empty);
334    }
335    #endregion
336
337    private void RegisterInputVariablesEventHandlers() {
338      InputVariables.CollectionReset += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CollectionReset);
339      InputVariables.ItemsAdded += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsAdded);
340      InputVariables.ItemsRemoved += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsRemoved);
341      InputVariables.CheckedItemsChanged += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CheckedItemsChanged);
342      foreach (var item in InputVariables) {
343        item.ValueChanged += new EventHandler(InputVariable_ValueChanged);
344      }
345    }
346
347    private void DeregisterInputVariablesEventHandlers() {
348      InputVariables.CollectionReset -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CollectionReset);
349      InputVariables.ItemsAdded -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsAdded);
350      InputVariables.ItemsRemoved -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsRemoved);
351      InputVariables.CheckedItemsChanged -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CheckedItemsChanged);
352      foreach (var item in InputVariables) {
353        item.ValueChanged -= new EventHandler(InputVariable_ValueChanged);
354      }
355    }
356
357    private void InputVariables_CheckedItemsChanged(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
358      OnProblemDataChanged(e);
359    }
360    private void InputVariables_ItemsRemoved(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
361      foreach (var indexedItem in e.Items)
362        indexedItem.Value.ValueChanged -= new EventHandler(InputVariable_ValueChanged);
363      OnProblemDataChanged(e);
364    }
365    private void InputVariables_ItemsAdded(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
366      foreach (var indexedItem in e.Items)
367        indexedItem.Value.ValueChanged += new EventHandler(InputVariable_ValueChanged);
368      OnProblemDataChanged(e);
369    }
370    private void InputVariables_CollectionReset(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
371      foreach (var indexedItem in e.OldItems)
372        indexedItem.Value.ValueChanged -= new EventHandler(InputVariable_ValueChanged);
373      OnProblemDataChanged(e);
374    }
375    private void InputVariable_ValueChanged(object sender, EventArgs e) {
376      OnProblemDataChanged(e);
377    }
378
379    #region helper
380    private void RegisterValueTypeEventHandlers<T>(ValueTypeValue<T> value) where T : struct {
381      value.ValueChanged += new EventHandler(value_ValueChanged);
382    }
383    private void DeregisterValueTypeEventHandlers<T>(ValueTypeValue<T> value) where T : struct {
384      value.ValueChanged -= new EventHandler(value_ValueChanged);
385    }
386    private void RegisterStringValueEventHandlers(StringValue value) {
387      value.ValueChanged += new EventHandler(value_ValueChanged);
388    }
389    private void DeregisterStringValueEventHandlers(StringValue value) {
390      value.ValueChanged -= new EventHandler(value_ValueChanged);
391    }
392
393    private void value_ValueChanged(object sender, EventArgs e) {
394      OnProblemDataChanged(e);
395    }
396    #endregion
397    #endregion
398
399    public virtual void ImportFromFile(string fileName) {
400      var csvFileParser = new TableFileParser();
401      csvFileParser.Parse(fileName);
402      suppressEvents = true;
403      Name = "Data imported from " + Path.GetFileName(fileName);
404      Dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
405      Dataset.Name = Path.GetFileName(fileName);
406      var variableNames = Dataset.VariableNames.Select(x => new StringValue(x).AsReadOnly()).ToList();
407      ((ConstrainedValueParameter<StringValue>)TargetVariableParameter).ValidValues.Clear();
408      foreach (var variableName in variableNames)
409        ((ConstrainedValueParameter<StringValue>)TargetVariableParameter).ValidValues.Add(variableName);
410      TargetVariable = variableNames.First();
411      InputVariables = new CheckedItemList<StringValue>(variableNames).AsReadOnly();
412      InputVariables.SetItemCheckedState(variableNames.First(), false);
413      int middle = (int)(csvFileParser.Rows * 0.5);
414      TrainingSamplesEnd = new IntValue(middle);
415      TrainingSamplesStart = new IntValue(0);
416      TestSamplesEnd = new IntValue(csvFileParser.Rows);
417      TestSamplesStart = new IntValue(middle);
418      suppressEvents = false;
419      OnProblemDataChanged(EventArgs.Empty);
420    }
421  }
422}
Note: See TracBrowser for help on using the repository browser.