#region License Information
/* HeuristicLab
* Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Collections;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.DecisionList;
using HeuristicLab.Optimization.Operators.LCS;
using HeuristicLab.Parameters;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.Problems.DecisionListClassification {
[StorableClass]
[Item("DecisionListClassificationProblemData", "")]
public class DecisionListClassificationProblemData : ParameterizedNamedItem, IDecisionListClassificationProblemData {
#region default data
public static string[] defaultVariableNames = new string[] { "a", "b", "c", "d", "e", "f", "g" };
public static double[,] defaultData = new double[,]{
{0,0,1,1,0,0,0},
{0,1,1,1,0,0,0},
{0,0,1,0,0,0,1},
{1,0,1,0,1,1,0},
{0,0,1,1,0,0,0},
{0,1,1,1,0,0,0},
{0,0,1,0,0,0,1},
{1,0,1,0,1,1,0}
};
#endregion
#region parameter properites
public IFixedValueParameter DatasetParameter {
get { return (IFixedValueParameter)Parameters["Dataset"]; }
}
public IFixedValueParameter> ConditionVariablesParameter {
get { return (IFixedValueParameter>)Parameters["ConditionVariables"]; }
}
public IConstrainedValueParameter TargetVariableParameter {
get { return (IConstrainedValueParameter)Parameters["TargetVariable"]; }
}
public IFixedValueParameter TrainingPartitionParameter {
get { return (IFixedValueParameter)Parameters["TrainingPartition"]; }
}
public IFixedValueParameter TestPartitionParameter {
get { return (IFixedValueParameter)Parameters["TestPartition"]; }
}
public IValueParameter SampleRuleParameter {
get { return (IValueParameter)Parameters["SampleRule"]; }
}
public IFixedValueParameter MaxIntervalsParameter {
get { return (IFixedValueParameter)Parameters["MaxIntervals"]; }
}
#endregion
#region properties
public Dataset Dataset {
get { return DatasetParameter.Value; }
}
public ICheckedItemList ConditionVariables {
get { return ConditionVariablesParameter.Value; }
}
public StringValue TargetVariable {
get { return TargetVariableParameter.Value; }
}
public IEnumerable AllowedConditionVariables {
get { return ConditionVariables.CheckedItems.Select(x => x.Value.Value); }
}
//public IEnumerable AllowedTargetVariables {
// get { return ActionVariables.CheckedItems.Select(x => x.Value.Value); }
//}
public IntRange TrainingPartition {
get { return TrainingPartitionParameter.Value; }
}
public IntRange TestPartition {
get { return TestPartitionParameter.Value; }
}
public IEnumerable TrainingIndices {
get {
return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start))
.Where(IsTrainingSample);
}
}
public IEnumerable TestIndices {
get {
return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start))
.Where(IsTestSample);
}
}
public bool IsTrainingSample(int index) {
return index >= 0 && index < Dataset.Rows &&
TrainingPartition.Start <= index && index < TrainingPartition.End &&
(index < TestPartition.Start || TestPartition.End <= index);
}
public bool IsTestSample(int index) {
return index >= 0 && index < Dataset.Rows &&
TestPartition.Start <= index && index < TestPartition.End;
}
public int Classes {
get { return SampleRuleParameter.Value.Action.Possibilities; }
}
#endregion
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
RegisterParameterEvents();
}
[StorableConstructor]
protected DecisionListClassificationProblemData(bool deserializing) : base(deserializing) { }
protected DecisionListClassificationProblemData(DecisionListClassificationProblemData original, Cloner cloner)
: base(original, cloner) {
RegisterParameterEvents();
}
public DecisionListClassificationProblemData(Dataset dataset, IEnumerable allowedConditionVariables, string targetVariable) {
if (dataset == null) throw new ArgumentNullException("The dataset must not be null.");
if (allowedConditionVariables == null) throw new ArgumentNullException("The allowedActionVariables must not be null.");
var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset);
if (!validTargetVariableValues.Any()) {
throw new ArgumentException("No valid target variable found.");
}
// var validTargetVariableValues = dataset.VariableNames.Select(x => new StringValue(x));
var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First();
var conditionVariables = new CheckedItemList(dataset.VariableNames.Select(x => new StringValue(x)));
foreach (StringValue x in conditionVariables) {
conditionVariables.SetItemCheckedState(x, allowedConditionVariables.Contains(x.Value));
}
if (!targetVariable.Equals(target.Value)) {
var helper = conditionVariables.FirstOrDefault(x => x.Value.Equals(target.Value));
if (helper != null) {
conditionVariables.SetItemCheckedState(helper, false);
}
helper = conditionVariables.FirstOrDefault(x => x.Value.Equals(targetVariable));
if (helper != null) {
conditionVariables.SetItemCheckedState(helper, true);
}
}
int trainingPartitionStart = 0;
int trainingPartitionEnd = dataset.Rows / 2;
int testPartitionStart = dataset.Rows / 2;
int testPartitionEnd = dataset.Rows;
Parameters.Add(new FixedValueParameter("Dataset", "", dataset));
Parameters.Add(new ConstrainedValueParameter("TargetVariable", new ItemSet(validTargetVariableValues), target));
Parameters.Add(new FixedValueParameter>("ConditionVariables", "", conditionVariables.AsReadOnly()));
Parameters.Add(new FixedValueParameter("TrainingPartition", "", new IntRange(trainingPartitionStart, trainingPartitionEnd)));
Parameters.Add(new FixedValueParameter("TestPartition", "", new IntRange(testPartitionStart, testPartitionEnd)));
Parameters.Add(new FixedValueParameter("MaxIntervals", "", new IntValue(5)));
Parameters.Add(new ValueParameter("SampleRule", "", CreateSampleRule(dataset, conditionVariables.CheckedItems.Select(x => x.Value.Value), target.Value)));
((ValueParameter)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false;
RegisterParameterEvents();
}
private IEnumerable CheckVariablesForPossibleTargetVariables(DataAnalysis.Dataset dataset) {
var possibleTargets = new List();
foreach (var variable in dataset.VariableNames) {
var variableValues = dataset.GetValues(variable);
if (variableValues is List) {
possibleTargets.Add(new StringValue(variable));
} else if (variableValues is List) {
var doubleValues = (variableValues as List).Distinct();
if (doubleValues.All(x => x % 1 == 0)) {
possibleTargets.Add(new StringValue(variable));
}
}
}
return possibleTargets;
}
public override IDeepCloneable Clone(Cloner cloner) {
return new DecisionListClassificationProblemData(this, cloner);
}
private Rule CreateSampleRule(Dataset dataset, IEnumerable conditionVariables, string target) {
IEnumerable condition = GetConditionOfDataSet(dataset, conditionVariables);
IAction action = GetActionOfDataset(dataset, target);
return new Rule(condition, action);
}
private IAction GetActionOfDataset(DataAnalysis.Dataset dataset, string target) {
IAction action = null;
var variableValues = dataset.GetValues(target);
if (variableValues is List) {
action = new StringAction(target, (variableValues as List).Distinct().ToList());
} else if (variableValues is List) {
var doubleValues = (variableValues as List).Distinct();
if (doubleValues.All(x => x % 1 == 0)) {
action = new IntAction(target, doubleValues.Select(x => Convert.ToInt32(x)).ToList());
} else {
throw new ArgumentException("The target variable contains double values. Therefore it can't be used as target");
}
} else {
throw new ArgumentException("There is no matching variable type for the values in the dataset");
}
return action;
}
private IEnumerable GetConditionOfDataSet(DataAnalysis.Dataset dataset, IEnumerable conditionVariables) {
var condition = new List();
foreach (var variableName in conditionVariables) {
var variableValues = dataset.GetValues(variableName);
HeuristicLab.Encodings.DecisionList.IVariable variable;
if (variableValues is List) {
variable = new StringVariable(variableName, (variableValues as List).Distinct().ToList());
} else if (variableValues is List) {
var doubleValues = (variableValues as List).Distinct();
if (doubleValues.All(x => x % 1 == 0)) {
// ToList call is necessary, because otherwise it wouldn't be possible to serialize it
variable = new IntVariable(variableName, doubleValues.Select(x => Convert.ToInt32(x)).ToList());
} else {
variable = new DoubleVariable(variableName, MaxIntervalsParameter.Value.Value);
}
} else {
throw new ArgumentException("There is no matching variable type for the values in the dataset");
}
condition.Add(variable);
}
return condition;
}
public IEnumerable FetchInput(IEnumerable rows) {
foreach (var row in rows) {
yield return FetchInput(row);
}
}
protected IDictionary fetchInputCache = new Dictionary();
public IGAssistInput FetchInput(int row) {
if (!fetchInputCache.ContainsKey(row)) {
DecisionListInput input = new DecisionListInput();
var variableNames = SampleRuleParameter.Value.Variables.Keys.ToList();
variableNames.Add(SampleRuleParameter.Value.Action.VariableName);
foreach (var variableName in variableNames) {
input.InputDictionary.Add(variableName, Dataset.GetValue(row, variableName));
}
fetchInputCache.Add(row, input);
}
return fetchInputCache[row];
}
public IEnumerable FetchAction(IEnumerable rows) {
foreach (var row in rows) {
yield return FetchAction(row);
}
}
protected IDictionary fetchActionCache = new Dictionary();
public IGAssistNiche FetchAction(int row) {
if (!fetchActionCache.ContainsKey(row)) {
var action = (IAction)SampleRuleParameter.Value.Action.Clone();
action.SetTo(Dataset.GetValue(row, action.VariableName));
fetchActionCache.Add(row, action);
}
return fetchActionCache[row];
}
protected IList possibleNiches;
public IEnumerable GetPossibleNiches() {
if (possibleNiches == null) {
possibleNiches = new List();
for (int i = 0; i < Dataset.Rows; i++) {
var action = FetchAction(i);
if (!possibleNiches.Any(x => x.SameNiche(action))) {
possibleNiches.Add(action);
}
}
}
return possibleNiches;
}
public event EventHandler Changed;
#region events
private void RegisterParameterEvents() {
ConditionVariablesParameter.ValueChanged += new EventHandler(VariablesChanged);
ConditionVariablesParameter.Value.CheckedItemsChanged += new CollectionItemsChangedEventHandler>(VariablesChanged);
TargetVariableParameter.ValueChanged += new EventHandler(VariablesChanged);
}
private void DeregisterParameterEvents() {
TargetVariableParameter.ValueChanged += new EventHandler(VariablesChanged);
ConditionVariablesParameter.Value.CheckedItemsChanged += new CollectionItemsChangedEventHandler>(VariablesChanged);
ConditionVariablesParameter.ValueChanged += new EventHandler(VariablesChanged);
}
private void Value_CheckedItemsChanged(object sender, CollectionItemsChangedEventArgs> e) {
VariablesChanged();
}
private void VariablesChanged(object sender, EventArgs e) {
VariablesChanged();
}
private void VariablesChanged() {
SampleRuleParameter.Value = CreateSampleRule(Dataset, AllowedConditionVariables, TargetVariable.Value);
}
#endregion
#region IDataAnalysisProblemData Members
public bool IsEmpty {
get { return true; }
}
public ICheckedItemList InputVariables {
get { return ConditionVariables; }
}
public IEnumerable AllowedInputVariables {
get { return AllowedConditionVariables; }
}
#endregion
}
}