Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/DataAnalysisProblemData.cs @ 6866

Last change on this file since 6866 was 5275, checked in by gkronber, 13 years ago

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File size: 20.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.IO;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31
32namespace HeuristicLab.Problems.DataAnalysis {
33  [Item("DataAnalysisProblemData", "Represents an item containing all data defining a data analysis problem.")]
34  [StorableClass]
35  public class DataAnalysisProblemData : ParameterizedNamedItem, IStorableContent {
36    protected bool suppressEvents = false;
37    #region IStorableContent Members
38    public string Filename { get; set; }
39    #endregion
40    #region default data
41    // y = x^4 + x^3 + x^2 + x
42    private static double[,] kozaF1 = new double[,] {
43{2.017885919, -1.449165046},
44{1.30060506,  -1.344523885},
45{1.147134798, -1.317989331},
46{0.877182504, -1.266142284},
47{0.852562452, -1.261020794},
48{0.431095788, -1.158793317},
49{0.112586002, -1.050908405},
50{0.04594507,  -1.021989402},
51{0.042572879, -1.020438113},
52{-0.074027291,  -0.959859562},
53{-0.109178553,  -0.938094706},
54{-0.259721109,  -0.803635355},
55{-0.272991057,  -0.387519561},
56{-0.161978191,  -0.193611001},
57{-0.102489983,  -0.114215349},
58{-0.01469968, -0.014918985},
59{-0.008863365,  -0.008942626},
60{0.026751057, 0.026054094},
61{0.166922436, 0.14309643},
62{0.176953808, 0.1504144},
63{0.190233418, 0.159916534},
64{0.199800708, 0.166635331},
65{0.261502822, 0.207600348},
66{0.30182879,  0.232370249},
67{0.83763905,  0.468046718}
68    };
69    #endregion
70    #region parameter properties
71    public ValueParameter<Dataset> DatasetParameter {
72      get { return (ValueParameter<Dataset>)Parameters["Dataset"]; }
73    }
74    public IValueParameter<StringValue> TargetVariableParameter {
75      get { return (IValueParameter<StringValue>)Parameters["TargetVariable"]; }
76    }
77    public IValueParameter<ICheckedItemList<StringValue>> InputVariablesParameter {
78      get { return (IValueParameter<ICheckedItemList<StringValue>>)Parameters["InputVariables"]; }
79    }
80    public IValueParameter<IntValue> TrainingSamplesStartParameter {
81      get { return (IValueParameter<IntValue>)Parameters["TrainingSamplesStart"]; }
82    }
83    public IValueParameter<IntValue> TrainingSamplesEndParameter {
84      get { return (IValueParameter<IntValue>)Parameters["TrainingSamplesEnd"]; }
85    }
86    public IValueParameter<IntValue> TestSamplesStartParameter {
87      get { return (IValueParameter<IntValue>)Parameters["TestSamplesStart"]; }
88    }
89    public IValueParameter<IntValue> TestSamplesEndParameter {
90      get { return (IValueParameter<IntValue>)Parameters["TestSamplesEnd"]; }
91    }
92    public IValueParameter<PercentValue> ValidationPercentageParameter {
93      get { return (IValueParameter<PercentValue>)Parameters["ValidationPercentage"]; }
94    }
95    #endregion
96
97    #region properties
98    public Dataset Dataset {
99      get { return DatasetParameter.Value; }
100      set {
101        if (value != Dataset) {
102          if (value == null) throw new ArgumentNullException();
103          DatasetParameter.Value = value;
104        }
105      }
106    }
107    public StringValue TargetVariable {
108      get { return TargetVariableParameter.Value; }
109      set {
110        if (value != TargetVariableParameter.Value) {
111          if (value == null) throw new ArgumentNullException();
112          if (TargetVariable != null) DeregisterStringValueEventHandlers(TargetVariable);
113          TargetVariableParameter.Value = value;
114        }
115      }
116    }
117    public ICheckedItemList<StringValue> InputVariables {
118      get { return InputVariablesParameter.Value; }
119      set {
120        if (value != InputVariables) {
121          if (value == null) throw new ArgumentNullException();
122          if (InputVariables != null) DeregisterInputVariablesEventHandlers();
123          InputVariablesParameter.Value = value;
124        }
125      }
126    }
127    public IntValue TrainingSamplesStart {
128      get { return TrainingSamplesStartParameter.Value; }
129      set {
130        if (value != TrainingSamplesStart) {
131          if (value == null) throw new ArgumentNullException();
132          if (TrainingSamplesStart != null) DeregisterValueTypeEventHandlers(TrainingSamplesStart);
133          TrainingSamplesStartParameter.Value = value;
134        }
135      }
136    }
137    public IntValue TrainingSamplesEnd {
138      get { return TrainingSamplesEndParameter.Value; }
139      set {
140        if (value != TrainingSamplesEnd) {
141          if (value == null) throw new ArgumentNullException();
142          if (TrainingSamplesEnd != null) DeregisterValueTypeEventHandlers(TrainingSamplesEnd);
143          TrainingSamplesEndParameter.Value = value;
144        }
145      }
146    }
147    public IntValue TestSamplesStart {
148      get { return TestSamplesStartParameter.Value; }
149      set {
150        if (value != TestSamplesStart) {
151          if (value == null) throw new ArgumentNullException();
152          if (TestSamplesStart != null) DeregisterValueTypeEventHandlers(TestSamplesStart);
153          TestSamplesStartParameter.Value = value;
154        }
155      }
156    }
157    public IntValue TestSamplesEnd {
158      get { return TestSamplesEndParameter.Value; }
159      set {
160        if (value != TestSamplesEnd) {
161          if (value == null) throw new ArgumentNullException();
162          if (TestSamplesEnd != null) DeregisterValueTypeEventHandlers(TestSamplesEnd);
163          TestSamplesEndParameter.Value = value;
164        }
165      }
166    }
167    public PercentValue ValidationPercentage {
168      get { return ValidationPercentageParameter.Value; }
169      set {
170        if (value != ValidationPercentage) {
171          if (value == null) throw new ArgumentNullException();
172          if (value.Value < 0 || value.Value > 1) throw new ArgumentException("ValidationPercentage must be between 0 and 1.");
173          if (ValidationPercentage != null) DeregisterValueTypeEventHandlers(ValidationPercentage);
174          ValidationPercentageParameter.Value = value;
175        }
176      }
177    }
178
179    public IEnumerable<int> TrainingIndizes {
180      get {
181        return Enumerable.Range(TrainingSamplesStart.Value, TrainingSamplesEnd.Value - TrainingSamplesStart.Value)
182                         .Where(i => i >= 0 && i < Dataset.Rows && (i < TestSamplesStart.Value || TestSamplesEnd.Value <= i));
183      }
184    }
185    public IEnumerable<int> TestIndizes {
186      get {
187        return Enumerable.Range(TestSamplesStart.Value, TestSamplesEnd.Value - TestSamplesStart.Value)
188           .Where(i => i >= 0 && i < Dataset.Rows);
189      }
190    }
191    #endregion
192
193    [StorableConstructor]
194    protected DataAnalysisProblemData(bool deserializing) : base(deserializing) { }
195    protected DataAnalysisProblemData(DataAnalysisProblemData original, Cloner cloner)
196      : base(original, cloner) {
197      RegisterParameterEventHandlers();
198      RegisterParameterValueEventHandlers();
199    }
200    public DataAnalysisProblemData()
201      : base() {
202      var inputVariables = new CheckedItemList<StringValue>();
203      StringValue inputVariable = new StringValue("x");
204      inputVariables.Add(inputVariable);
205      StringValue targetVariable = new StringValue("y");
206      var validTargetVariables = new ItemSet<StringValue>();
207      validTargetVariables.Add(targetVariable);
208      Parameters.Add(new ValueParameter<Dataset>("Dataset", new Dataset(new string[] { "y", "x" }, kozaF1)));
209      Parameters.Add(new ValueParameter<ICheckedItemList<StringValue>>("InputVariables", inputVariables.AsReadOnly()));
210      Parameters.Add(new ConstrainedValueParameter<StringValue>("TargetVariable", validTargetVariables, targetVariable));
211      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesStart", new IntValue(0)));
212      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesEnd", new IntValue(15)));
213      Parameters.Add(new ValueParameter<IntValue>("TestSamplesStart", new IntValue(15)));
214      Parameters.Add(new ValueParameter<IntValue>("TestSamplesEnd", new IntValue(25)));
215      Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
216
217      DatasetParameter.ReactOnValueToStringChangedAndValueItemImageChanged = false;
218      RegisterParameterEventHandlers();
219      RegisterParameterValueEventHandlers();
220    }
221
222    public DataAnalysisProblemData(Dataset dataset, IEnumerable<string> inputVariables, string targetVariable,
223      int trainingSamplesStart, int trainingSamplesEnd, int testSamplesStart, int testSamplesEnd) {
224      var inputVariablesList = new CheckedItemList<StringValue>(inputVariables.Select(x => new StringValue(x)).ToList());
225      StringValue targetVariableValue = new StringValue(targetVariable);
226      var validTargetVariables = new ItemSet<StringValue>();
227      foreach (var variable in dataset.VariableNames)
228        if (variable != targetVariable)
229          validTargetVariables.Add(new StringValue(variable));
230      validTargetVariables.Add(targetVariableValue);
231      Parameters.Add(new ValueParameter<Dataset>("Dataset", dataset));
232      Parameters.Add(new ValueParameter<ICheckedItemList<StringValue>>("InputVariables", inputVariablesList.AsReadOnly()));
233      Parameters.Add(new ConstrainedValueParameter<StringValue>("TargetVariable", validTargetVariables, targetVariableValue));
234      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesStart", new IntValue(trainingSamplesStart)));
235      Parameters.Add(new ValueParameter<IntValue>("TrainingSamplesEnd", new IntValue(trainingSamplesEnd)));
236      Parameters.Add(new ValueParameter<IntValue>("TestSamplesStart", new IntValue(testSamplesStart)));
237      Parameters.Add(new ValueParameter<IntValue>("TestSamplesEnd", new IntValue(testSamplesEnd)));
238      Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
239
240      DatasetParameter.ReactOnValueToStringChangedAndValueItemImageChanged = false;
241      RegisterParameterEventHandlers();
242      RegisterParameterValueEventHandlers();
243    }
244
245    public override IDeepCloneable Clone(Cloner cloner) {
246      return new DataAnalysisProblemData(this, cloner);
247    }
248
249    [StorableHook(HookType.AfterDeserialization)]
250    private void AfterDeserialization() {
251      if (!Parameters.ContainsKey("ValidationPercentage"))
252        Parameters.Add(new ValueParameter<PercentValue>("ValidationPercentage", "The relative amount of the training samples that should be used as validation set.", new PercentValue(0.5)));
253
254      DatasetParameter.ReactOnValueToStringChangedAndValueItemImageChanged = false;
255      RegisterParameterEventHandlers();
256      RegisterParameterValueEventHandlers();
257    }
258
259    #region events
260    public event EventHandler ProblemDataChanged;
261    protected virtual void OnProblemDataChanged(EventArgs e) {
262      if (TrainingSamplesStart.Value < 0) TrainingSamplesStart.Value = 0;
263      else if (TestSamplesStart.Value < 0) TestSamplesStart.Value = 0;
264      else if (TrainingSamplesEnd.Value > Dataset.Rows - 1) TrainingSamplesEnd.Value = Dataset.Rows - 1;
265      else if (TestSamplesEnd.Value > Dataset.Rows - 1) TestSamplesEnd.Value = Dataset.Rows - 1;
266      else if (TrainingSamplesStart.Value > TrainingSamplesEnd.Value) TrainingSamplesStart.Value = TestSamplesEnd.Value;
267      else if (TestSamplesStart.Value > TestSamplesEnd.Value) TestSamplesStart.Value = TestSamplesEnd.Value;
268      else if (ValidationPercentage.Value < 0) ValidationPercentage.Value = 0;
269      else if (ValidationPercentage.Value > 1) ValidationPercentage.Value = 1;
270      else if (!TrainingIndizes.Any()) throw new ArgumentException("No training samples are available.");
271      else if (!suppressEvents) {
272        var listeners = ProblemDataChanged;
273        if (listeners != null) listeners(this, e);
274      }
275    }
276
277    private void RegisterParameterEventHandlers() {
278      DatasetParameter.ValueChanged += new EventHandler(DatasetParameter_ValueChanged);
279      InputVariablesParameter.ValueChanged += new EventHandler(InputVariablesParameter_ValueChanged);
280      TargetVariableParameter.ValueChanged += new EventHandler(TargetVariableParameter_ValueChanged);
281      TrainingSamplesStartParameter.ValueChanged += new EventHandler(TrainingSamplesStartParameter_ValueChanged);
282      TrainingSamplesEndParameter.ValueChanged += new EventHandler(TrainingSamplesEndParameter_ValueChanged);
283      TestSamplesStartParameter.ValueChanged += new EventHandler(TestSamplesStartParameter_ValueChanged);
284      TestSamplesEndParameter.ValueChanged += new EventHandler(TestSamplesEndParameter_ValueChanged);
285      ValidationPercentageParameter.ValueChanged += new EventHandler(ValidationPercentageParameter_ValueChanged);
286    }
287
288    private void RegisterParameterValueEventHandlers() {
289      RegisterInputVariablesEventHandlers();
290      if (TargetVariable != null) RegisterStringValueEventHandlers(TargetVariable);
291      RegisterValueTypeEventHandlers(TrainingSamplesStart);
292      RegisterValueTypeEventHandlers(TrainingSamplesEnd);
293      RegisterValueTypeEventHandlers(TestSamplesStart);
294      RegisterValueTypeEventHandlers(TestSamplesEnd);
295      RegisterValueTypeEventHandlers(ValidationPercentage);
296    }
297
298
299    #region parameter value changed event handlers
300    private void DatasetParameter_ValueChanged(object sender, EventArgs e) {
301      OnProblemDataChanged(EventArgs.Empty);
302    }
303    private void InputVariablesParameter_ValueChanged(object sender, EventArgs e) {
304      RegisterInputVariablesEventHandlers();
305      OnProblemDataChanged(EventArgs.Empty);
306    }
307    private void TargetVariableParameter_ValueChanged(object sender, EventArgs e) {
308      if (TargetVariable != null) {
309        RegisterStringValueEventHandlers(TargetVariable);
310        OnProblemDataChanged(EventArgs.Empty);
311      }
312    }
313    private void TrainingSamplesStartParameter_ValueChanged(object sender, EventArgs e) {
314      RegisterValueTypeEventHandlers(TrainingSamplesStart);
315      OnProblemDataChanged(EventArgs.Empty);
316    }
317    private void TrainingSamplesEndParameter_ValueChanged(object sender, EventArgs e) {
318      RegisterValueTypeEventHandlers(TrainingSamplesEnd);
319      OnProblemDataChanged(EventArgs.Empty);
320    }
321    private void TestSamplesStartParameter_ValueChanged(object sender, EventArgs e) {
322      RegisterValueTypeEventHandlers(TestSamplesStart);
323      OnProblemDataChanged(EventArgs.Empty);
324    }
325    private void TestSamplesEndParameter_ValueChanged(object sender, EventArgs e) {
326      RegisterValueTypeEventHandlers(TestSamplesEnd);
327      OnProblemDataChanged(EventArgs.Empty);
328    }
329    private void ValidationPercentageParameter_ValueChanged(object sender, EventArgs e) {
330      RegisterValueTypeEventHandlers(ValidationPercentage);
331      OnProblemDataChanged(EventArgs.Empty);
332    }
333    #endregion
334
335    private void RegisterInputVariablesEventHandlers() {
336      InputVariables.CollectionReset += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CollectionReset);
337      InputVariables.ItemsAdded += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsAdded);
338      InputVariables.ItemsRemoved += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsRemoved);
339      InputVariables.CheckedItemsChanged += new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CheckedItemsChanged);
340      foreach (var item in InputVariables) {
341        item.ValueChanged += new EventHandler(InputVariable_ValueChanged);
342      }
343    }
344
345    private void DeregisterInputVariablesEventHandlers() {
346      InputVariables.CollectionReset -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CollectionReset);
347      InputVariables.ItemsAdded -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsAdded);
348      InputVariables.ItemsRemoved -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_ItemsRemoved);
349      InputVariables.CheckedItemsChanged -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler<HeuristicLab.Collections.IndexedItem<StringValue>>(InputVariables_CheckedItemsChanged);
350      foreach (var item in InputVariables) {
351        item.ValueChanged -= new EventHandler(InputVariable_ValueChanged);
352      }
353    }
354
355    private void InputVariables_CheckedItemsChanged(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
356      OnProblemDataChanged(e);
357    }
358    private void InputVariables_ItemsRemoved(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
359      foreach (var indexedItem in e.Items)
360        indexedItem.Value.ValueChanged -= new EventHandler(InputVariable_ValueChanged);
361      OnProblemDataChanged(e);
362    }
363    private void InputVariables_ItemsAdded(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
364      foreach (var indexedItem in e.Items)
365        indexedItem.Value.ValueChanged += new EventHandler(InputVariable_ValueChanged);
366      OnProblemDataChanged(e);
367    }
368    private void InputVariables_CollectionReset(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs<HeuristicLab.Collections.IndexedItem<StringValue>> e) {
369      foreach (var indexedItem in e.OldItems)
370        indexedItem.Value.ValueChanged -= new EventHandler(InputVariable_ValueChanged);
371      OnProblemDataChanged(e);
372    }
373    private void InputVariable_ValueChanged(object sender, EventArgs e) {
374      OnProblemDataChanged(e);
375    }
376
377    #region helper
378    private void RegisterValueTypeEventHandlers<T>(ValueTypeValue<T> value) where T : struct {
379      value.ValueChanged += new EventHandler(value_ValueChanged);
380    }
381    private void DeregisterValueTypeEventHandlers<T>(ValueTypeValue<T> value) where T : struct {
382      value.ValueChanged -= new EventHandler(value_ValueChanged);
383    }
384    private void RegisterStringValueEventHandlers(StringValue value) {
385      value.ValueChanged += new EventHandler(value_ValueChanged);
386    }
387    private void DeregisterStringValueEventHandlers(StringValue value) {
388      value.ValueChanged -= new EventHandler(value_ValueChanged);
389    }
390
391    private void value_ValueChanged(object sender, EventArgs e) {
392      OnProblemDataChanged(e);
393    }
394    #endregion
395    #endregion
396
397    public virtual void ImportFromFile(string fileName) {
398      var csvFileParser = new TableFileParser();
399      csvFileParser.Parse(fileName);
400      suppressEvents = true;
401      Name = "Data imported from " + Path.GetFileName(fileName);
402      Dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
403      Dataset.Name = Path.GetFileName(fileName);
404      var variableNames = Dataset.VariableNames.Select(x => new StringValue(x).AsReadOnly()).ToList();
405      ((ConstrainedValueParameter<StringValue>)TargetVariableParameter).ValidValues.Clear();
406      foreach (var variableName in variableNames)
407        ((ConstrainedValueParameter<StringValue>)TargetVariableParameter).ValidValues.Add(variableName);
408      TargetVariable = variableNames.First();
409      InputVariables = new CheckedItemList<StringValue>(variableNames).AsReadOnly();
410      InputVariables.SetItemCheckedState(variableNames.First(), false);
411      int middle = (int)(csvFileParser.Rows * 0.5);
412      TrainingSamplesEnd = new IntValue(middle);
413      TrainingSamplesStart = new IntValue(0);
414      TestSamplesEnd = new IntValue(csvFileParser.Rows);
415      TestSamplesStart = new IntValue(middle);
416      suppressEvents = false;
417      OnProblemDataChanged(EventArgs.Empty);
418    }
419  }
420}
Note: See TracBrowser for help on using the repository browser.