#region License Information /* HeuristicLab * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Text; using System.Xml; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.DataAnalysis; using System.Linq; namespace HeuristicLab.Modeling { public class VariableQualityImpactCalculator : OperatorBase { public VariableQualityImpactCalculator() : base() { AddVariableInfo(new VariableInfo("Predictor", "The predictor used to evaluate the model", typeof(IPredictor), VariableKind.In)); AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.In)); AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(StringData), VariableKind.In)); AddVariableInfo(new VariableInfo("InputVariableNames", "Names of used variables in the model (optional)", typeof(ItemList), VariableKind.In)); AddVariableInfo(new VariableInfo("SamplesStart", "SamplesStart", typeof(IntData), VariableKind.In)); AddVariableInfo(new VariableInfo("SamplesEnd", "SamplesEnd", typeof(IntData), VariableKind.In)); AddVariableInfo(new VariableInfo(ModelingResult.VariableQualityImpact.ToString(), "VariableQualityImpacts", typeof(ItemList), VariableKind.New)); } public override string Description { get { return @"Calculates the impact of all allowed input variables on the quality of the model using evaluator supplied as suboperator."; } } public override IOperation Apply(IScope scope) { IPredictor predictor = GetVariableValue("Predictor", scope, true); Dataset dataset = GetVariableValue("Dataset", scope, true); string targetVariableName = GetVariableValue("TargetVariable", scope, true).Data; int targetVariable = dataset.GetVariableIndex(targetVariableName); ItemList inputVariableNames = GetVariableValue>("InputVariableNames", scope, true, false); int start = GetVariableValue("SamplesStart", scope, true).Data; int end = GetVariableValue("SamplesEnd", scope, true).Data; Dictionary qualityImpacts; if (inputVariableNames == null) qualityImpacts = Calculate(dataset, predictor, targetVariableName, start, end); else qualityImpacts = Calculate(dataset, predictor, targetVariableName, inputVariableNames.Select(iv => iv.Data), start, end); ItemList variableImpacts = new ItemList(); foreach (KeyValuePair p in qualityImpacts) { if (p.Key != targetVariableName) { ItemList row = new ItemList(); row.Add(new StringData(p.Key)); row.Add(new DoubleData(p.Value)); variableImpacts.Add(row); } } scope.AddVariable(new Variable(scope.TranslateName(ModelingResult.VariableQualityImpact.ToString()), variableImpacts)); return null; } public static Dictionary Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, int start, int end) { return Calculate(dataset, predictor, targetVariableName, null, start, end); } public static Dictionary Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, IEnumerable inputVariableNames, int start, int end) { Dictionary evaluationImpacts = new Dictionary(); Dataset dirtyDataset = (Dataset)dataset.Clone(); IPredictor dirtyPredictor = (IPredictor)predictor.Clone(); double[] predictedValues = predictor.Predict(dataset, start, end); double[] targetValues = dataset.GetVariableValues(targetVariableName, start, end); double oldMSE = CalculateMSE(targetValues, predictedValues); double newMSE; double mean; IEnumerable oldValues; IEnumerable variables; if (inputVariableNames != null) variables = inputVariableNames; else variables = dataset.VariableNames; foreach (string variableName in variables) { if (dataset.CountMissingValues(variableName, start, end) < (end - start) && dataset.GetRange(variableName, start, end) > 0.0 && variableName != targetVariableName) { mean = dataset.GetMean(variableName, start, end); oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end); predictedValues = dirtyPredictor.Predict(dirtyDataset, start, end); newMSE = CalculateMSE(predictedValues, targetValues); evaluationImpacts[variableName] = newMSE / oldMSE; dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end); } else { evaluationImpacts[variableName] = 1.0; } } return evaluationImpacts; } private static double CalculateImpact(double referenceValue, double newValue) { return newValue / referenceValue; } private static double CalculateMSE(double[] referenceValues, double[] newValues) { try { return SimpleMSEEvaluator.Calculate(Matrix.Create(referenceValues, newValues)); } catch (ArgumentException) { return double.PositiveInfinity; } } } }