Free cookie consent management tool by TermsFeed Policy Generator

source: branches/LearningClassifierSystems/HeuristicLab.Problems.DecisionListClassification/3.3/DecisionListClassificationProblemData.cs @ 9605

Last change on this file since 9605 was 9605, checked in by sforsten, 11 years ago

#1980:

  • set plugin dependencies
  • added smart initialization
  • added hierarchical selection
  • fixed major and minor default rule
  • fixed several smaller bugs
  • some refactoring has been done
File size: 14.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Collections;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.DecisionList;
30using HeuristicLab.Optimization.Operators.LCS;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis;
34
35namespace HeuristicLab.Problems.DecisionListClassification {
36  [StorableClass]
37  [Item("DecisionListClassificationProblemData", "")]
38  public class DecisionListClassificationProblemData : ParameterizedNamedItem, IDecisionListClassificationProblemData {
39
40    #region default data
41    public static string[] defaultVariableNames = new string[] { "a", "b", "c", "d", "e", "f", "g" };
42    public static double[,] defaultData = new double[,]{
43      {0,0,1,1,0,0,0},
44      {0,1,1,1,0,0,0},
45      {0,0,1,0,0,0,1},
46      {1,0,1,0,1,1,0},
47      {0,0,1,1,0,0,0},
48      {0,1,1,1,0,0,0},
49      {0,0,1,0,0,0,1},
50      {1,0,1,0,1,1,0}
51    };
52    #endregion
53
54    #region parameter properites
55    public IFixedValueParameter<Dataset> DatasetParameter {
56      get { return (IFixedValueParameter<Dataset>)Parameters["Dataset"]; }
57    }
58    public IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>> ConditionVariablesParameter {
59      get { return (IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters["ConditionVariables"]; }
60    }
61    public IConstrainedValueParameter<StringValue> TargetVariableParameter {
62      get { return (IConstrainedValueParameter<StringValue>)Parameters["TargetVariable"]; }
63    }
64    public IFixedValueParameter<IntRange> TrainingPartitionParameter {
65      get { return (IFixedValueParameter<IntRange>)Parameters["TrainingPartition"]; }
66    }
67    public IFixedValueParameter<IntRange> TestPartitionParameter {
68      get { return (IFixedValueParameter<IntRange>)Parameters["TestPartition"]; }
69    }
70    public IValueParameter<Rule> SampleRuleParameter {
71      get { return (IValueParameter<Rule>)Parameters["SampleRule"]; }
72    }
73    public IFixedValueParameter<IntValue> MaxIntervalsParameter {
74      get { return (IFixedValueParameter<IntValue>)Parameters["MaxIntervals"]; }
75    }
76    #endregion
77
78    #region properties
79    public Dataset Dataset {
80      get { return DatasetParameter.Value; }
81    }
82    public ICheckedItemList<StringValue> ConditionVariables {
83      get { return ConditionVariablesParameter.Value; }
84    }
85    public StringValue TargetVariable {
86      get { return TargetVariableParameter.Value; }
87    }
88    public IEnumerable<string> AllowedConditionVariables {
89      get { return ConditionVariables.CheckedItems.Select(x => x.Value.Value); }
90    }
91    //public IEnumerable<string> AllowedTargetVariables {
92    //  get { return ActionVariables.CheckedItems.Select(x => x.Value.Value); }
93    //}
94    public IntRange TrainingPartition {
95      get { return TrainingPartitionParameter.Value; }
96    }
97    public IntRange TestPartition {
98      get { return TestPartitionParameter.Value; }
99    }
100    public IEnumerable<int> TrainingIndices {
101      get {
102        return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start))
103                         .Where(IsTrainingSample);
104      }
105    }
106    public IEnumerable<int> TestIndices {
107      get {
108        return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start))
109           .Where(IsTestSample);
110      }
111    }
112    public bool IsTrainingSample(int index) {
113      return index >= 0 && index < Dataset.Rows &&
114        TrainingPartition.Start <= index && index < TrainingPartition.End &&
115        (index < TestPartition.Start || TestPartition.End <= index);
116    }
117    public bool IsTestSample(int index) {
118      return index >= 0 && index < Dataset.Rows &&
119             TestPartition.Start <= index && index < TestPartition.End;
120    }
121    public int Classes {
122      get { return SampleRuleParameter.Value.Action.Possibilities; }
123    }
124    #endregion
125
126    [StorableHook(HookType.AfterDeserialization)]
127    private void AfterDeserialization() {
128      RegisterParameterEvents();
129    }
130    [StorableConstructor]
131    protected DecisionListClassificationProblemData(bool deserializing) : base(deserializing) { }
132    protected DecisionListClassificationProblemData(DecisionListClassificationProblemData original, Cloner cloner)
133      : base(original, cloner) {
134      RegisterParameterEvents();
135    }
136    public DecisionListClassificationProblemData(Dataset dataset, IEnumerable<string> allowedConditionVariables, string targetVariable) {
137      if (dataset == null) throw new ArgumentNullException("The dataset must not be null.");
138      if (allowedConditionVariables == null) throw new ArgumentNullException("The allowedActionVariables must not be null.");
139
140      var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset);
141      if (!validTargetVariableValues.Any()) {
142        throw new ArgumentException("No valid target variable found.");
143      }
144      // var validTargetVariableValues = dataset.VariableNames.Select(x => new StringValue(x));
145      var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First();
146
147      var conditionVariables = new CheckedItemList<StringValue>(dataset.VariableNames.Select(x => new StringValue(x)));
148      foreach (StringValue x in conditionVariables) {
149        conditionVariables.SetItemCheckedState(x, allowedConditionVariables.Contains(x.Value));
150      }
151      if (!targetVariable.Equals(target.Value)) {
152        var helper = conditionVariables.FirstOrDefault(x => x.Value.Equals(target.Value));
153        if (helper != null) {
154          conditionVariables.SetItemCheckedState(helper, false);
155        }
156        helper = conditionVariables.FirstOrDefault(x => x.Value.Equals(targetVariable));
157        if (helper != null) {
158          conditionVariables.SetItemCheckedState(helper, true);
159        }
160      }
161
162      int trainingPartitionStart = 0;
163      int trainingPartitionEnd = dataset.Rows / 2;
164      int testPartitionStart = dataset.Rows / 2;
165      int testPartitionEnd = dataset.Rows;
166
167      Parameters.Add(new FixedValueParameter<Dataset>("Dataset", "", dataset));
168      Parameters.Add(new ConstrainedValueParameter<StringValue>("TargetVariable", new ItemSet<StringValue>(validTargetVariableValues), target));
169      Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemList<StringValue>>("ConditionVariables", "", conditionVariables.AsReadOnly()));
170      Parameters.Add(new FixedValueParameter<IntRange>("TrainingPartition", "", new IntRange(trainingPartitionStart, trainingPartitionEnd)));
171      Parameters.Add(new FixedValueParameter<IntRange>("TestPartition", "", new IntRange(testPartitionStart, testPartitionEnd)));
172      Parameters.Add(new FixedValueParameter<IntValue>("MaxIntervals", "", new IntValue(5)));
173      Parameters.Add(new ValueParameter<Rule>("SampleRule", "", CreateSampleRule(dataset, conditionVariables.CheckedItems.Select(x => x.Value.Value), target.Value)));
174
175      ((ValueParameter<Dataset>)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false;
176
177      RegisterParameterEvents();
178    }
179
180    private IEnumerable<StringValue> CheckVariablesForPossibleTargetVariables(DataAnalysis.Dataset dataset) {
181      var possibleTargets = new List<StringValue>();
182      foreach (var variable in dataset.VariableNames) {
183        var variableValues = dataset.GetValues(variable);
184        if (variableValues is List<string>) {
185          possibleTargets.Add(new StringValue(variable));
186        } else if (variableValues is List<double>) {
187          var doubleValues = (variableValues as List<double>).Distinct();
188          if (doubleValues.All(x => x % 1 == 0)) {
189            possibleTargets.Add(new StringValue(variable));
190          }
191        }
192      }
193      return possibleTargets;
194    }
195    public override IDeepCloneable Clone(Cloner cloner) {
196      return new DecisionListClassificationProblemData(this, cloner);
197    }
198
199    private Rule CreateSampleRule(Dataset dataset, IEnumerable<string> conditionVariables, string target) {
200      IEnumerable<HeuristicLab.Encodings.DecisionList.IVariable> condition = GetConditionOfDataSet(dataset, conditionVariables);
201      IAction action = GetActionOfDataset(dataset, target);
202      return new Rule(condition, action);
203    }
204
205    private IAction GetActionOfDataset(DataAnalysis.Dataset dataset, string target) {
206      IAction action = null;
207      var variableValues = dataset.GetValues(target);
208      if (variableValues is List<string>) {
209        action = new StringAction(target, (variableValues as List<string>).Distinct().ToList());
210      } else if (variableValues is List<double>) {
211        var doubleValues = (variableValues as List<double>).Distinct();
212        if (doubleValues.All(x => x % 1 == 0)) {
213          action = new IntAction(target, doubleValues.Select(x => Convert.ToInt32(x)).ToList());
214        } else {
215          throw new ArgumentException("The target variable contains double values. Therefore it can't be used as target");
216        }
217      } else {
218        throw new ArgumentException("There is no matching variable type for the values in the dataset");
219      }
220      return action;
221    }
222
223    private IEnumerable<Encodings.DecisionList.IVariable> GetConditionOfDataSet(DataAnalysis.Dataset dataset, IEnumerable<string> conditionVariables) {
224      var condition = new List<HeuristicLab.Encodings.DecisionList.IVariable>();
225      foreach (var variableName in conditionVariables) {
226        var variableValues = dataset.GetValues(variableName);
227        HeuristicLab.Encodings.DecisionList.IVariable variable;
228        if (variableValues is List<string>) {
229          variable = new StringVariable(variableName, (variableValues as List<string>).Distinct().ToList());
230        } else if (variableValues is List<double>) {
231          var doubleValues = (variableValues as List<double>).Distinct();
232          if (doubleValues.All(x => x % 1 == 0)) {
233            // ToList call is necessary, because otherwise it wouldn't be possible to serialize it
234            variable = new IntVariable(variableName, doubleValues.Select(x => Convert.ToInt32(x)).ToList());
235          } else {
236            variable = new DoubleVariable(variableName, MaxIntervalsParameter.Value.Value);
237          }
238        } else {
239          throw new ArgumentException("There is no matching variable type for the values in the dataset");
240        }
241        condition.Add(variable);
242      }
243      return condition;
244    }
245
246    public IEnumerable<IGAssistInput> FetchInput(IEnumerable<int> rows) {
247      foreach (var row in rows) {
248        yield return FetchInput(row);
249      }
250    }
251
252    protected IDictionary<int, DecisionListInput> fetchInputCache = new Dictionary<int, DecisionListInput>();
253    public IGAssistInput FetchInput(int row) {
254      if (!fetchInputCache.ContainsKey(row)) {
255        DecisionListInput input = new DecisionListInput();
256        var variableNames = SampleRuleParameter.Value.Variables.Keys.ToList();
257        variableNames.Add(SampleRuleParameter.Value.Action.VariableName);
258        foreach (var variableName in variableNames) {
259          input.InputDictionary.Add(variableName, Dataset.GetValue(row, variableName));
260        }
261        fetchInputCache.Add(row, input);
262      }
263      return fetchInputCache[row];
264    }
265
266    public IEnumerable<IGAssistNiche> FetchAction(IEnumerable<int> rows) {
267      foreach (var row in rows) {
268        yield return FetchAction(row);
269      }
270    }
271    protected IDictionary<int, IAction> fetchActionCache = new Dictionary<int, IAction>();
272    public IGAssistNiche FetchAction(int row) {
273      if (!fetchActionCache.ContainsKey(row)) {
274        var action = (IAction)SampleRuleParameter.Value.Action.Clone();
275        action.SetTo(Dataset.GetValue(row, action.VariableName));
276        fetchActionCache.Add(row, action);
277      }
278      return fetchActionCache[row];
279    }
280
281    protected IList<IGAssistNiche> possibleNiches;
282    public IEnumerable<IGAssistNiche> GetPossibleNiches() {
283      if (possibleNiches == null) {
284        possibleNiches = new List<IGAssistNiche>();
285        for (int i = 0; i < Dataset.Rows; i++) {
286          var action = FetchAction(i);
287          if (!possibleNiches.Any(x => x.SameNiche(action))) {
288            possibleNiches.Add(action);
289          }
290        }
291      }
292      return possibleNiches;
293    }
294
295    public event EventHandler Changed;
296
297    #region events
298    private void RegisterParameterEvents() {
299      ConditionVariablesParameter.ValueChanged += new EventHandler(VariablesChanged);
300      ConditionVariablesParameter.Value.CheckedItemsChanged += new CollectionItemsChangedEventHandler<IndexedItem<StringValue>>(VariablesChanged);
301      TargetVariableParameter.ValueChanged += new EventHandler(VariablesChanged);
302    }
303    private void DeregisterParameterEvents() {
304      TargetVariableParameter.ValueChanged += new EventHandler(VariablesChanged);
305      ConditionVariablesParameter.Value.CheckedItemsChanged += new CollectionItemsChangedEventHandler<IndexedItem<StringValue>>(VariablesChanged);
306      ConditionVariablesParameter.ValueChanged += new EventHandler(VariablesChanged);
307    }
308    private void Value_CheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
309      VariablesChanged();
310    }
311    private void VariablesChanged(object sender, EventArgs e) {
312      VariablesChanged();
313    }
314
315    private void VariablesChanged() {
316      SampleRuleParameter.Value = CreateSampleRule(Dataset, AllowedConditionVariables, TargetVariable.Value);
317    }
318    #endregion
319
320    #region IDataAnalysisProblemData Members
321    public bool IsEmpty {
322      get { return true; }
323    }
324    public ICheckedItemList<StringValue> InputVariables {
325      get { return ConditionVariables; }
326    }
327    public IEnumerable<string> AllowedInputVariables {
328      get { return AllowedConditionVariables; }
329    }
330    #endregion
331  }
332}
Note: See TracBrowser for help on using the repository browser.