Free cookie consent management tool by TermsFeed Policy Generator

source: branches/LearningClassifierSystems/HeuristicLab.Problems.DecisionListClassification/3.3/DecisionListClassificationProblemData.cs @ 9352

Last change on this file since 9352 was 9352, checked in by sforsten, 11 years ago

#1980:

  • added DecisionListView
  • added event handlers in *ProblemData
  • renamed project Problems.XCS.Views to Problems.lCS.Views and Problems.Instances.ConditionActionClassification to Problems.Instances.LCS
  • integrated niching in GAssist and added NichingTournamentSelector
  • minor code improvements and property changes
File size: 13.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Collections;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.DecisionList;
30using HeuristicLab.Optimization.Operators.LCS;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis;
34
35namespace HeuristicLab.Problems.DecisionListClassification {
36  [StorableClass]
37  [Item("DecisionListClassificationProblemData", "")]
38  public class DecisionListClassificationProblemData : ParameterizedNamedItem, IDecisionListClassificationProblemData {
39
40    #region default data
41    public static string[] defaultVariableNames = new string[] { "a", "b", "c", "d", "e", "f", "g" };
42    public static double[,] defaultData = new double[,]{
43      {0,0,1,1,0,0,0},
44      {0,1,1,1,0,0,0},
45      {0,0,1,0,0,0,1},
46      {1,0,1,0,1,1,0}
47    };
48    #endregion
49
50    #region parameter properites
51    public IFixedValueParameter<Dataset> DatasetParameter {
52      get { return (IFixedValueParameter<Dataset>)Parameters["Dataset"]; }
53    }
54    public IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>> ConditionVariablesParameter {
55      get { return (IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters["ConditionVariables"]; }
56    }
57    public IConstrainedValueParameter<StringValue> TargetVariableParameter {
58      get { return (IConstrainedValueParameter<StringValue>)Parameters["TargetVariable"]; }
59    }
60    public IFixedValueParameter<IntRange> TrainingPartitionParameter {
61      get { return (IFixedValueParameter<IntRange>)Parameters["TrainingPartition"]; }
62    }
63    public IFixedValueParameter<IntRange> TestPartitionParameter {
64      get { return (IFixedValueParameter<IntRange>)Parameters["TestPartition"]; }
65    }
66    public IValueParameter<Rule> SampleRuleParameter {
67      get { return (IValueParameter<Rule>)Parameters["SampleRule"]; }
68    }
69    public IFixedValueParameter<IntValue> MaxIntervalsParameter {
70      get { return (IFixedValueParameter<IntValue>)Parameters["MaxIntervals"]; }
71    }
72    #endregion
73
74    #region properties
75    public Dataset Dataset {
76      get { return DatasetParameter.Value; }
77    }
78    public ICheckedItemList<StringValue> ConditionVariables {
79      get { return ConditionVariablesParameter.Value; }
80    }
81    public StringValue TargetVariable {
82      get { return TargetVariableParameter.Value; }
83    }
84    public IEnumerable<string> AllowedConditionVariables {
85      get { return ConditionVariables.CheckedItems.Select(x => x.Value.Value); }
86    }
87    //public IEnumerable<string> AllowedTargetVariables {
88    //  get { return ActionVariables.CheckedItems.Select(x => x.Value.Value); }
89    //}
90    public IntRange TrainingPartition {
91      get { return TrainingPartitionParameter.Value; }
92    }
93    public IntRange TestPartition {
94      get { return TestPartitionParameter.Value; }
95    }
96    public IEnumerable<int> TrainingIndices {
97      get {
98        return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start))
99                         .Where(IsTrainingSample);
100      }
101    }
102    public IEnumerable<int> TestIndices {
103      get {
104        return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start))
105           .Where(IsTestSample);
106      }
107    }
108    public bool IsTrainingSample(int index) {
109      return index >= 0 && index < Dataset.Rows &&
110        TrainingPartition.Start <= index && index < TrainingPartition.End &&
111        (index < TestPartition.Start || TestPartition.End <= index);
112    }
113    public bool IsTestSample(int index) {
114      return index >= 0 && index < Dataset.Rows &&
115             TestPartition.Start <= index && index < TestPartition.End;
116    }
117    public int Classes {
118      get { return SampleRuleParameter.Value.Action.Possibilities; }
119    }
120    #endregion
121
122    [StorableHook(HookType.AfterDeserialization)]
123    private void AfterDeserialization() {
124      RegisterParameterEvents();
125    }
126    [StorableConstructor]
127    protected DecisionListClassificationProblemData(bool deserializing) : base(deserializing) { }
128    protected DecisionListClassificationProblemData(DecisionListClassificationProblemData original, Cloner cloner)
129      : base(original, cloner) {
130      RegisterParameterEvents();
131    }
132    public DecisionListClassificationProblemData(Dataset dataset, IEnumerable<string> allowedConditionVariables, string targetVariable) {
133      if (dataset == null) throw new ArgumentNullException("The dataset must not be null.");
134      if (allowedConditionVariables == null) throw new ArgumentNullException("The allowedActionVariables must not be null.");
135
136      if (allowedConditionVariables.Except(dataset.DoubleVariables).Any())
137        throw new ArgumentException("All allowed condition variables must be present in the dataset and of type double.");
138
139      //var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset).Select(x => new StringValue(x).AsReadOnly()).ToList();
140      var validTargetVariableValues = dataset.VariableNames.Select(x => new StringValue(x));
141      var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First();
142
143      var conditionVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Select(x => new StringValue(x)));
144      foreach (StringValue x in conditionVariables) {
145        conditionVariables.SetItemCheckedState(x, allowedConditionVariables.Contains(x.Value));
146      }
147
148      int trainingPartitionStart = 0;
149      int trainingPartitionEnd = dataset.Rows / 2;
150      int testPartitionStart = dataset.Rows / 2;
151      int testPartitionEnd = dataset.Rows;
152
153      Parameters.Add(new FixedValueParameter<Dataset>("Dataset", "", dataset));
154      Parameters.Add(new ConstrainedValueParameter<StringValue>("TargetVariable", new ItemSet<StringValue>(validTargetVariableValues), target));
155      Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemList<StringValue>>("ConditionVariables", "", conditionVariables.AsReadOnly()));
156      Parameters.Add(new FixedValueParameter<IntRange>("TrainingPartition", "", new IntRange(trainingPartitionStart, trainingPartitionEnd)));
157      Parameters.Add(new FixedValueParameter<IntRange>("TestPartition", "", new IntRange(testPartitionStart, testPartitionEnd)));
158      Parameters.Add(new FixedValueParameter<IntValue>("MaxIntervals", "", new IntValue(5)));
159      Parameters.Add(new ValueParameter<Rule>("SampleRule", "", CreateSampleRule(dataset, conditionVariables.CheckedItems.Select(x => x.Value.Value), target.Value)));
160
161      ((ValueParameter<Dataset>)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false;
162
163      RegisterParameterEvents();
164    }
165    public override IDeepCloneable Clone(Cloner cloner) {
166      return new DecisionListClassificationProblemData(this, cloner);
167    }
168
169    private Rule CreateSampleRule(Dataset dataset, IEnumerable<string> conditionVariables, string target) {
170      IEnumerable<HeuristicLab.Encodings.DecisionList.IVariable> condition = GetConditionOfDataSet(dataset, conditionVariables);
171      IAction action = GetActionOfDataset(dataset, target);
172      return new Rule(condition, action);
173    }
174
175    private IAction GetActionOfDataset(DataAnalysis.Dataset dataset, string target) {
176      IAction action = null;
177      var variableValues = dataset.GetValues(target);
178      if (variableValues is List<string>) {
179        action = new StringAction(target, (variableValues as List<string>).Distinct().ToList());
180      } else if (variableValues is List<double>) {
181        var doubleValues = (variableValues as List<double>).Distinct();
182        if (doubleValues.All(x => x % 1 == 0)) {
183          action = new IntAction(target, doubleValues.Select(x => Convert.ToInt32(x)).ToList());
184        } else {
185          throw new ArgumentException("The target variable contains double values. Therefore it can't be used as target");
186        }
187      } else {
188        throw new ArgumentException("There is no matching variable type for the values in the dataset");
189      }
190      return action;
191    }
192
193    private IEnumerable<Encodings.DecisionList.IVariable> GetConditionOfDataSet(DataAnalysis.Dataset dataset, IEnumerable<string> conditionVariables) {
194      var condition = new List<HeuristicLab.Encodings.DecisionList.IVariable>();
195      foreach (var variableName in conditionVariables) {
196        var variableValues = dataset.GetValues(variableName);
197        HeuristicLab.Encodings.DecisionList.IVariable variable;
198        if (variableValues is List<string>) {
199          variable = new StringVariable(variableName, (variableValues as List<string>).Distinct().ToList());
200        } else if (variableValues is List<double>) {
201          var doubleValues = (variableValues as List<double>).Distinct();
202          if (doubleValues.All(x => x % 1 == 0)) {
203            // ToList call is necessary, because otherwise it wouldn't be possible to serialize it
204            variable = new IntVariable(variableName, doubleValues.Select(x => Convert.ToInt32(x)).ToList());
205          } else {
206            variable = new DoubleVariable(variableName, MaxIntervalsParameter.Value.Value);
207          }
208        } else {
209          throw new ArgumentException("There is no matching variable type for the values in the dataset");
210        }
211        condition.Add(variable);
212      }
213      return condition;
214    }
215
216    public IEnumerable<DecisionListInput> FetchInput(IEnumerable<int> rows) {
217      foreach (var row in rows) {
218        yield return FetchInput(row);
219      }
220    }
221
222    protected IDictionary<int, DecisionListInput> fetchInputCache = new Dictionary<int, DecisionListInput>();
223    public DecisionListInput FetchInput(int row) {
224      if (!fetchInputCache.ContainsKey(row)) {
225        DecisionListInput input = new DecisionListInput();
226        var variableNames = SampleRuleParameter.Value.Variables.Keys.ToList();
227        variableNames.Add(SampleRuleParameter.Value.Action.VariableName);
228        foreach (var variableName in variableNames) {
229          input.InputDictionary.Add(variableName, Dataset.GetValue(row, variableName));
230        }
231        fetchInputCache.Add(row, input);
232      }
233      return fetchInputCache[row];
234    }
235
236    public IEnumerable<IAction> FetchAction(IEnumerable<int> rows) {
237      foreach (var row in rows) {
238        yield return FetchAction(row);
239      }
240    }
241    protected IDictionary<int, IAction> fetchActionCache = new Dictionary<int, IAction>();
242    public IAction FetchAction(int row) {
243      if (!fetchActionCache.ContainsKey(row)) {
244        var action = (IAction)SampleRuleParameter.Value.Action.Clone();
245        action.SetTo(Dataset.GetValue(row, action.VariableName));
246        fetchActionCache.Add(row, action);
247      }
248      return fetchActionCache[row];
249    }
250
251    protected IList<IGAssistNiche> possibleNiches;
252    public IEnumerable<IGAssistNiche> GetPossibleNiches() {
253      if (possibleNiches == null) {
254        possibleNiches = new List<IGAssistNiche>();
255        for (int i = 0; i < Dataset.Rows; i++) {
256          var action = FetchAction(i);
257          if (!possibleNiches.Any(x => x.SameNiche(action))) {
258            possibleNiches.Add(action);
259          }
260        }
261      }
262      return possibleNiches;
263    }
264
265    public event EventHandler Changed;
266
267    #region events
268    private void RegisterParameterEvents() {
269      ConditionVariablesParameter.ValueChanged += new EventHandler(VariablesChanged);
270      ConditionVariablesParameter.Value.CheckedItemsChanged += new CollectionItemsChangedEventHandler<IndexedItem<StringValue>>(VariablesChanged);
271      TargetVariableParameter.ValueChanged += new EventHandler(VariablesChanged);
272    }
273    private void DeregisterParameterEvents() {
274      TargetVariableParameter.ValueChanged += new EventHandler(VariablesChanged);
275      ConditionVariablesParameter.Value.CheckedItemsChanged += new CollectionItemsChangedEventHandler<IndexedItem<StringValue>>(VariablesChanged);
276      ConditionVariablesParameter.ValueChanged += new EventHandler(VariablesChanged);
277    }
278    private void Value_CheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
279      VariablesChanged();
280    }
281    private void VariablesChanged(object sender, EventArgs e) {
282      VariablesChanged();
283    }
284
285    private void VariablesChanged() {
286      SampleRuleParameter.Value = CreateSampleRule(Dataset, AllowedConditionVariables, TargetVariable.Value);
287    }
288    #endregion
289  }
290}
Note: See TracBrowser for help on using the repository browser.