#region License Information /* HeuristicLab * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Collections; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.DecisionList; using HeuristicLab.Optimization.Operators.LCS; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.Problems.DecisionListClassification { [StorableClass] [Item("DecisionListClassificationProblemData", "")] public class DecisionListClassificationProblemData : ParameterizedNamedItem, IDecisionListClassificationProblemData { #region default data public static string[] defaultVariableNames = new string[] { "a", "b", "c", "d", "e", "f", "g" }; public static double[,] defaultData = new double[,]{ {0,0,1,1,0,0,0}, {0,1,1,1,0,0,0}, {0,0,1,0,0,0,1}, {1,0,1,0,1,1,0}, {0,0,1,1,0,0,0}, {0,1,1,1,0,0,0}, {0,0,1,0,0,0,1}, {1,0,1,0,1,1,0} }; #endregion #region parameter properites public IFixedValueParameter DatasetParameter { get { return (IFixedValueParameter)Parameters["Dataset"]; } } public IFixedValueParameter> ConditionVariablesParameter { get { return (IFixedValueParameter>)Parameters["ConditionVariables"]; } } public IConstrainedValueParameter TargetVariableParameter { get { return (IConstrainedValueParameter)Parameters["TargetVariable"]; } } public IFixedValueParameter TrainingPartitionParameter { get { return (IFixedValueParameter)Parameters["TrainingPartition"]; } } public IFixedValueParameter TestPartitionParameter { get { return (IFixedValueParameter)Parameters["TestPartition"]; } } public IValueParameter SampleRuleParameter { get { return (IValueParameter)Parameters["SampleRule"]; } } public IFixedValueParameter MaxIntervalsParameter { get { return (IFixedValueParameter)Parameters["MaxIntervals"]; } } #endregion #region properties public Dataset Dataset { get { return DatasetParameter.Value; } } public ICheckedItemList ConditionVariables { get { return ConditionVariablesParameter.Value; } } public StringValue TargetVariable { get { return TargetVariableParameter.Value; } } public IEnumerable AllowedConditionVariables { get { return ConditionVariables.CheckedItems.Select(x => x.Value.Value); } } //public IEnumerable AllowedTargetVariables { // get { return ActionVariables.CheckedItems.Select(x => x.Value.Value); } //} public IntRange TrainingPartition { get { return TrainingPartitionParameter.Value; } } public IntRange TestPartition { get { return TestPartitionParameter.Value; } } public IEnumerable TrainingIndices { get { return Enumerable.Range(TrainingPartition.Start, Math.Max(0, TrainingPartition.End - TrainingPartition.Start)) .Where(IsTrainingSample); } } public IEnumerable TestIndices { get { return Enumerable.Range(TestPartition.Start, Math.Max(0, TestPartition.End - TestPartition.Start)) .Where(IsTestSample); } } public bool IsTrainingSample(int index) { return index >= 0 && index < Dataset.Rows && TrainingPartition.Start <= index && index < TrainingPartition.End && (index < TestPartition.Start || TestPartition.End <= index); } public bool IsTestSample(int index) { return index >= 0 && index < Dataset.Rows && TestPartition.Start <= index && index < TestPartition.End; } public int Classes { get { return SampleRuleParameter.Value.Action.Possibilities; } } #endregion [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { RegisterParameterEvents(); } [StorableConstructor] protected DecisionListClassificationProblemData(bool deserializing) : base(deserializing) { } protected DecisionListClassificationProblemData(DecisionListClassificationProblemData original, Cloner cloner) : base(original, cloner) { RegisterParameterEvents(); } public DecisionListClassificationProblemData(Dataset dataset, IEnumerable allowedConditionVariables, string targetVariable) { if (dataset == null) throw new ArgumentNullException("The dataset must not be null."); if (allowedConditionVariables == null) throw new ArgumentNullException("The allowedActionVariables must not be null."); var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset); if (!validTargetVariableValues.Any()) { throw new ArgumentException("No valid target variable found."); } // var validTargetVariableValues = dataset.VariableNames.Select(x => new StringValue(x)); var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First(); var conditionVariables = new CheckedItemList(dataset.VariableNames.Select(x => new StringValue(x))); foreach (StringValue x in conditionVariables) { conditionVariables.SetItemCheckedState(x, allowedConditionVariables.Contains(x.Value)); } if (!targetVariable.Equals(target.Value)) { var helper = conditionVariables.FirstOrDefault(x => x.Value.Equals(target.Value)); if (helper != null) { conditionVariables.SetItemCheckedState(helper, false); } helper = conditionVariables.FirstOrDefault(x => x.Value.Equals(targetVariable)); if (helper != null) { conditionVariables.SetItemCheckedState(helper, true); } } int trainingPartitionStart = 0; int trainingPartitionEnd = dataset.Rows / 2; int testPartitionStart = dataset.Rows / 2; int testPartitionEnd = dataset.Rows; Parameters.Add(new FixedValueParameter("Dataset", "", dataset)); Parameters.Add(new ConstrainedValueParameter("TargetVariable", new ItemSet(validTargetVariableValues), target)); Parameters.Add(new FixedValueParameter>("ConditionVariables", "", conditionVariables.AsReadOnly())); Parameters.Add(new FixedValueParameter("TrainingPartition", "", new IntRange(trainingPartitionStart, trainingPartitionEnd))); Parameters.Add(new FixedValueParameter("TestPartition", "", new IntRange(testPartitionStart, testPartitionEnd))); Parameters.Add(new FixedValueParameter("MaxIntervals", "", new IntValue(5))); Parameters.Add(new ValueParameter("SampleRule", "", CreateSampleRule(dataset, conditionVariables.CheckedItems.Select(x => x.Value.Value), target.Value))); ((ValueParameter)DatasetParameter).ReactOnValueToStringChangedAndValueItemImageChanged = false; RegisterParameterEvents(); } private IEnumerable CheckVariablesForPossibleTargetVariables(DataAnalysis.Dataset dataset) { var possibleTargets = new List(); foreach (var variable in dataset.VariableNames) { var variableValues = dataset.GetValues(variable); if (variableValues is List) { possibleTargets.Add(new StringValue(variable)); } else if (variableValues is List) { var doubleValues = (variableValues as List).Distinct(); if (doubleValues.All(x => x % 1 == 0)) { possibleTargets.Add(new StringValue(variable)); } } } return possibleTargets; } public override IDeepCloneable Clone(Cloner cloner) { return new DecisionListClassificationProblemData(this, cloner); } private Rule CreateSampleRule(Dataset dataset, IEnumerable conditionVariables, string target) { IEnumerable condition = GetConditionOfDataSet(dataset, conditionVariables); IAction action = GetActionOfDataset(dataset, target); return new Rule(condition, action); } private IAction GetActionOfDataset(DataAnalysis.Dataset dataset, string target) { IAction action = null; var variableValues = dataset.GetValues(target); if (variableValues is List) { action = new StringAction(target, (variableValues as List).Distinct().ToList()); } else if (variableValues is List) { var doubleValues = (variableValues as List).Distinct(); if (doubleValues.All(x => x % 1 == 0)) { action = new IntAction(target, doubleValues.Select(x => Convert.ToInt32(x)).ToList()); } else { throw new ArgumentException("The target variable contains double values. Therefore it can't be used as target"); } } else { throw new ArgumentException("There is no matching variable type for the values in the dataset"); } return action; } private IEnumerable GetConditionOfDataSet(DataAnalysis.Dataset dataset, IEnumerable conditionVariables) { var condition = new List(); foreach (var variableName in conditionVariables) { var variableValues = dataset.GetValues(variableName); HeuristicLab.Encodings.DecisionList.IVariable variable; if (variableValues is List) { variable = new StringVariable(variableName, (variableValues as List).Distinct().ToList()); } else if (variableValues is List) { var doubleValues = (variableValues as List).Distinct(); if (doubleValues.All(x => x % 1 == 0)) { // ToList call is necessary, because otherwise it wouldn't be possible to serialize it variable = new IntVariable(variableName, doubleValues.Select(x => Convert.ToInt32(x)).ToList()); } else { variable = new DoubleVariable(variableName, MaxIntervalsParameter.Value.Value); } } else { throw new ArgumentException("There is no matching variable type for the values in the dataset"); } condition.Add(variable); } return condition; } public IEnumerable FetchInput(IEnumerable rows) { foreach (var row in rows) { yield return FetchInput(row); } } protected IDictionary fetchInputCache = new Dictionary(); public IGAssistInput FetchInput(int row) { if (!fetchInputCache.ContainsKey(row)) { DecisionListInput input = new DecisionListInput(); var variableNames = SampleRuleParameter.Value.Variables.Keys.ToList(); variableNames.Add(SampleRuleParameter.Value.Action.VariableName); foreach (var variableName in variableNames) { input.InputDictionary.Add(variableName, Dataset.GetValue(row, variableName)); } fetchInputCache.Add(row, input); } return fetchInputCache[row]; } public IEnumerable FetchAction(IEnumerable rows) { foreach (var row in rows) { yield return FetchAction(row); } } protected IDictionary fetchActionCache = new Dictionary(); public IGAssistNiche FetchAction(int row) { if (!fetchActionCache.ContainsKey(row)) { var action = (IAction)SampleRuleParameter.Value.Action.Clone(); action.SetTo(Dataset.GetValue(row, action.VariableName)); fetchActionCache.Add(row, action); } return fetchActionCache[row]; } protected IList possibleNiches; public IEnumerable GetPossibleNiches() { if (possibleNiches == null) { possibleNiches = new List(); for (int i = 0; i < Dataset.Rows; i++) { var action = FetchAction(i); if (!possibleNiches.Any(x => x.SameNiche(action))) { possibleNiches.Add(action); } } } return possibleNiches; } public event EventHandler Changed; #region events private void RegisterParameterEvents() { ConditionVariablesParameter.ValueChanged += new EventHandler(VariablesChanged); ConditionVariablesParameter.Value.CheckedItemsChanged += new CollectionItemsChangedEventHandler>(VariablesChanged); TargetVariableParameter.ValueChanged += new EventHandler(VariablesChanged); } private void DeregisterParameterEvents() { TargetVariableParameter.ValueChanged += new EventHandler(VariablesChanged); ConditionVariablesParameter.Value.CheckedItemsChanged += new CollectionItemsChangedEventHandler>(VariablesChanged); ConditionVariablesParameter.ValueChanged += new EventHandler(VariablesChanged); } private void Value_CheckedItemsChanged(object sender, CollectionItemsChangedEventArgs> e) { VariablesChanged(); } private void VariablesChanged(object sender, EventArgs e) { VariablesChanged(); } private void VariablesChanged() { SampleRuleParameter.Value = CreateSampleRule(Dataset, AllowedConditionVariables, TargetVariable.Value); } #endregion #region IDataAnalysisProblemData Members public bool IsEmpty { get { return true; } } public ICheckedItemList InputVariables { get { return ConditionVariables; } } public IEnumerable AllowedInputVariables { get { return AllowedConditionVariables; } } #endregion } }