Free cookie consent management tool by TermsFeed Policy Generator

source: branches/LearningClassifierSystems/HeuristicLab.Problems.DecisionListClassification/3.3/DecisionListClassificationProblemData.cs @ 9468

Last change on this file since 9468 was 9468, checked in by sforsten, 11 years ago

#1980: removed condition that all condition variables have to be double variables

File size: 13.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Collections;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HeuristicLab.Encodings.DecisionList;
30using HeuristicLab.Optimization.Operators.LCS;
31using HeuristicLab.Parameters;
32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
33using HeuristicLab.Problems.DataAnalysis;
34
35namespace HeuristicLab.Problems.DecisionListClassification {
36  [StorableClass]
37  [Item("DecisionListClassificationProblemData", "")]
38  public class DecisionListClassificationProblemData : ParameterizedNamedItem, IDecisionListClassificationProblemData {
39
40    #region default data
41    public static string[] defaultVariableNames = new string[] { "a", "b", "c", "d", "e", "f", "g" };
42    public static double[,] defaultData = new double[,]{
43      {0,0,1,1,0,0,0},
44      {0,1,1,1,0,0,0},
45      {0,0,1,0,0,0,1},
46      {1,0,1,0,1,1,0}
47    };
48    #endregion
49
50    #region parameter properites
51    public IFixedValueParameter<Dataset> DatasetParameter {
52      get { return (IFixedValueParameter<Dataset>)Parameters["Dataset"]; }
53    }
54    public IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>> ConditionVariablesParameter {
55      get { return (IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters["ConditionVariables"]; }
56    }
57    public IConstrainedValueParameter<StringValue> TargetVariableParameter {
58      get { return (IConstrainedValueParameter<StringValue>)Parameters["TargetVariable"]; }
59    }
60    public IFixedValueParameter<IntRange> TrainingPartitionParameter {
61      get { return (IFixedValueParameter<IntRange>)Parameters["TrainingPartition"]; }
62    }
63    public IFixedValueParameter<IntRange> TestPartitionParameter {
64      get { return (IFixedValueParameter<IntRange>)Parameters["TestPartition"]; }
65    }
66    public IValueParameter<Rule> SampleRuleParameter {
67      get { return (IValueParameter<Rule>)Parameters["SampleRule"]; }
68    }
69    public IFixedValueParameter<IntValue> MaxIntervalsParameter {
70      get { return (IFixedValueParameter<IntValue>)Parameters["MaxIntervals"]; }
71    }
72    #endregion
73
74    #region properties
75    public Dataset Dataset {
76      get { return DatasetParameter.Value; }
77    }
78    public ICheckedItemList<StringValue> ConditionVariables {
79      get { return ConditionVariablesParameter.Value; }
80    }
81    public StringValue TargetVariable {
82      get { return TargetVariableParameter.Value; }
83    }
84    public IEnumerable<string> AllowedConditionVariables {
85      get { return ConditionVariables.CheckedItems.Select(x => x.Value.Value); }
86    }
87    //public IEnumerable<string> AllowedTargetVariables {
88    //  get { return ActionVariables.CheckedItems.Select(x => x.Value.Value); }
89    //}
90    public IntRange TrainingPartition {
91      get { return TrainingPartitionParameter.Value; }
92    }
93    public IntRange TestPartition {
94      get { return TestPartitionParameter.Value; }
95    }
96    public IEnumerable<int> TrainingIndices {
97      get {
98        return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start))
99                         .Where(IsTrainingSample);
100      }
101    }
102    public IEnumerable<int> TestIndices {
103      get {
104        return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start))
105           .Where(IsTestSample);
106      }
107    }
108    public bool IsTrainingSample(int index) {
109      return index >= 0 && index < Dataset.Rows &&
110        TrainingPartition.Start <= index && index < TrainingPartition.End &&
111        (index < TestPartition.Start || TestPartition.End <= index);
112    }
113    public bool IsTestSample(int index) {
114      return index >= 0 && index < Dataset.Rows &&
115             TestPartition.Start <= index && index < TestPartition.End;
116    }
117    public int Classes {
118      get { return SampleRuleParameter.Value.Action.Possibilities; }
119    }
120    #endregion
121
122    [StorableHook(HookType.AfterDeserialization)]
123    private void AfterDeserialization() {
124      RegisterParameterEvents();
125    }
126    [StorableConstructor]
127    protected DecisionListClassificationProblemData(bool deserializing) : base(deserializing) { }
128    protected DecisionListClassificationProblemData(DecisionListClassificationProblemData original, Cloner cloner)
129      : base(original, cloner) {
130      RegisterParameterEvents();
131    }
132    public DecisionListClassificationProblemData(Dataset dataset, IEnumerable<string> allowedConditionVariables, string targetVariable) {
133      if (dataset == null) throw new ArgumentNullException("The dataset must not be null.");
134      if (allowedConditionVariables == null) throw new ArgumentNullException("The allowedActionVariables must not be null.");
135
136      //var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset).Select(x => new StringValue(x).AsReadOnly()).ToList();
137      var validTargetVariableValues = dataset.VariableNames.Select(x => new StringValue(x));
138      var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First();
139
140      var conditionVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Select(x => new StringValue(x)));
141      foreach (StringValue x in conditionVariables) {
142        conditionVariables.SetItemCheckedState(x, allowedConditionVariables.Contains(x.Value));
143      }
144
145      int trainingPartitionStart = 0;
146      int trainingPartitionEnd = dataset.Rows / 2;
147      int testPartitionStart = dataset.Rows / 2;
148      int testPartitionEnd = dataset.Rows;
149
150      Parameters.Add(new FixedValueParameter<Dataset>("Dataset", "", dataset));
151      Parameters.Add(new ConstrainedValueParameter<StringValue>("TargetVariable", new ItemSet<StringValue>(validTargetVariableValues), target));
152      Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemList<StringValue>>("ConditionVariables", "", conditionVariables.AsReadOnly()));
153      Parameters.Add(new FixedValueParameter<IntRange>("TrainingPartition", "", new IntRange(trainingPartitionStart, trainingPartitionEnd)));
154      Parameters.Add(new FixedValueParameter<IntRange>("TestPartition", "", new IntRange(testPartitionStart, testPartitionEnd)));
155      Parameters.Add(new FixedValueParameter<IntValue>("MaxIntervals", "", new IntValue(5)));
156      Parameters.Add(new ValueParameter<Rule>("SampleRule", "", CreateSampleRule(dataset, conditionVariables.CheckedItems.Select(x => x.Value.Value), target.Value)));
157
158      ((ValueParameter<Dataset>)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false;
159
160      RegisterParameterEvents();
161    }
162    public override IDeepCloneable Clone(Cloner cloner) {
163      return new DecisionListClassificationProblemData(this, cloner);
164    }
165
166    private Rule CreateSampleRule(Dataset dataset, IEnumerable<string> conditionVariables, string target) {
167      IEnumerable<HeuristicLab.Encodings.DecisionList.IVariable> condition = GetConditionOfDataSet(dataset, conditionVariables);
168      IAction action = GetActionOfDataset(dataset, target);
169      return new Rule(condition, action);
170    }
171
172    private IAction GetActionOfDataset(DataAnalysis.Dataset dataset, string target) {
173      IAction action = null;
174      var variableValues = dataset.GetValues(target);
175      if (variableValues is List<string>) {
176        action = new StringAction(target, (variableValues as List<string>).Distinct().ToList());
177      } else if (variableValues is List<double>) {
178        var doubleValues = (variableValues as List<double>).Distinct();
179        if (doubleValues.All(x => x % 1 == 0)) {
180          action = new IntAction(target, doubleValues.Select(x => Convert.ToInt32(x)).ToList());
181        } else {
182          throw new ArgumentException("The target variable contains double values. Therefore it can't be used as target");
183        }
184      } else {
185        throw new ArgumentException("There is no matching variable type for the values in the dataset");
186      }
187      return action;
188    }
189
190    private IEnumerable<Encodings.DecisionList.IVariable> GetConditionOfDataSet(DataAnalysis.Dataset dataset, IEnumerable<string> conditionVariables) {
191      var condition = new List<HeuristicLab.Encodings.DecisionList.IVariable>();
192      foreach (var variableName in conditionVariables) {
193        var variableValues = dataset.GetValues(variableName);
194        HeuristicLab.Encodings.DecisionList.IVariable variable;
195        if (variableValues is List<string>) {
196          variable = new StringVariable(variableName, (variableValues as List<string>).Distinct().ToList());
197        } else if (variableValues is List<double>) {
198          var doubleValues = (variableValues as List<double>).Distinct();
199          if (doubleValues.All(x => x % 1 == 0)) {
200            // ToList call is necessary, because otherwise it wouldn't be possible to serialize it
201            variable = new IntVariable(variableName, doubleValues.Select(x => Convert.ToInt32(x)).ToList());
202          } else {
203            variable = new DoubleVariable(variableName, MaxIntervalsParameter.Value.Value);
204          }
205        } else {
206          throw new ArgumentException("There is no matching variable type for the values in the dataset");
207        }
208        condition.Add(variable);
209      }
210      return condition;
211    }
212
213    public IEnumerable<IGAssistInput> FetchInput(IEnumerable<int> rows) {
214      foreach (var row in rows) {
215        yield return FetchInput(row);
216      }
217    }
218
219    protected IDictionary<int, DecisionListInput> fetchInputCache = new Dictionary<int, DecisionListInput>();
220    public IGAssistInput FetchInput(int row) {
221      if (!fetchInputCache.ContainsKey(row)) {
222        DecisionListInput input = new DecisionListInput();
223        var variableNames = SampleRuleParameter.Value.Variables.Keys.ToList();
224        variableNames.Add(SampleRuleParameter.Value.Action.VariableName);
225        foreach (var variableName in variableNames) {
226          input.InputDictionary.Add(variableName, Dataset.GetValue(row, variableName));
227        }
228        fetchInputCache.Add(row, input);
229      }
230      return fetchInputCache[row];
231    }
232
233    public IEnumerable<IGAssistNiche> FetchAction(IEnumerable<int> rows) {
234      foreach (var row in rows) {
235        yield return FetchAction(row);
236      }
237    }
238    protected IDictionary<int, IAction> fetchActionCache = new Dictionary<int, IAction>();
239    public IGAssistNiche FetchAction(int row) {
240      if (!fetchActionCache.ContainsKey(row)) {
241        var action = (IAction)SampleRuleParameter.Value.Action.Clone();
242        action.SetTo(Dataset.GetValue(row, action.VariableName));
243        fetchActionCache.Add(row, action);
244      }
245      return fetchActionCache[row];
246    }
247
248    protected IList<IGAssistNiche> possibleNiches;
249    public IEnumerable<IGAssistNiche> GetPossibleNiches() {
250      if (possibleNiches == null) {
251        possibleNiches = new List<IGAssistNiche>();
252        for (int i = 0; i < Dataset.Rows; i++) {
253          var action = FetchAction(i);
254          if (!possibleNiches.Any(x => x.SameNiche(action))) {
255            possibleNiches.Add(action);
256          }
257        }
258      }
259      return possibleNiches;
260    }
261
262    public event EventHandler Changed;
263
264    #region events
265    private void RegisterParameterEvents() {
266      ConditionVariablesParameter.ValueChanged += new EventHandler(VariablesChanged);
267      ConditionVariablesParameter.Value.CheckedItemsChanged += new CollectionItemsChangedEventHandler<IndexedItem<StringValue>>(VariablesChanged);
268      TargetVariableParameter.ValueChanged += new EventHandler(VariablesChanged);
269    }
270    private void DeregisterParameterEvents() {
271      TargetVariableParameter.ValueChanged += new EventHandler(VariablesChanged);
272      ConditionVariablesParameter.Value.CheckedItemsChanged += new CollectionItemsChangedEventHandler<IndexedItem<StringValue>>(VariablesChanged);
273      ConditionVariablesParameter.ValueChanged += new EventHandler(VariablesChanged);
274    }
275    private void Value_CheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<IndexedItem<StringValue>> e) {
276      VariablesChanged();
277    }
278    private void VariablesChanged(object sender, EventArgs e) {
279      VariablesChanged();
280    }
281
282    private void VariablesChanged() {
283      SampleRuleParameter.Value = CreateSampleRule(Dataset, AllowedConditionVariables, TargetVariable.Value);
284    }
285    #endregion
286
287    #region IDataAnalysisProblemData Members
288    public bool IsEmpty {
289      get { return true; }
290    }
291    public ICheckedItemList<StringValue> InputVariables {
292      get { return ConditionVariables; }
293    }
294    public IEnumerable<string> AllowedInputVariables {
295      get { return AllowedConditionVariables; }
296    }
297    #endregion
298  }
299}
Note: See TracBrowser for help on using the repository browser.