#region License Information
/* HeuristicLab
* Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System.Collections.Generic;
using System.Linq;
using System.Windows.Forms;
using HeuristicLab.Common;
using HeuristicLab.Data;
using HeuristicLab.MainForm;
using HeuristicLab.MainForm.WindowsForms;
using HeuristicLab.Optimization;
using System;
using HeuristicLab.Problems.DataAnalysis.Regression.Symbolic;
using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
using HeuristicLab.Problems.DataAnalysis.Evaluators;
namespace HeuristicLab.Problems.DataAnalysis.Views {
[Content(typeof(RunCollection), false)]
[View("RunCollection Monte-Carlo Variable Impact View")]
public partial class RunCollectionMonteCarloVariableImpactView : AsynchronousContentView {
private const string validationBestModelResultName = "Best solution (on validation set)";
public RunCollectionMonteCarloVariableImpactView() {
InitializeComponent();
}
public new RunCollection Content {
get { return (RunCollection)base.Content; }
set { base.Content = value; }
}
protected override void RegisterContentEvents() {
base.RegisterContentEvents();
this.Content.ItemsAdded += new HeuristicLab.Collections.CollectionItemsChangedEventHandler(Content_ItemsAdded);
this.Content.ItemsRemoved += new HeuristicLab.Collections.CollectionItemsChangedEventHandler(Content_ItemsRemoved);
this.Content.CollectionReset += new HeuristicLab.Collections.CollectionItemsChangedEventHandler(Content_CollectionReset);
}
protected override void DeregisterContentEvents() {
base.RegisterContentEvents();
this.Content.ItemsAdded -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler(Content_ItemsAdded);
this.Content.ItemsRemoved -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler(Content_ItemsRemoved);
this.Content.CollectionReset -= new HeuristicLab.Collections.CollectionItemsChangedEventHandler(Content_CollectionReset);
}
protected override void OnContentChanged() {
base.OnContentChanged();
this.UpdateData();
}
private void Content_ItemsAdded(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs e) {
this.UpdateData();
}
private void Content_ItemsRemoved(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs e) {
this.UpdateData();
}
private void Content_CollectionReset(object sender, HeuristicLab.Collections.CollectionItemsChangedEventArgs e) {
this.UpdateData();
}
private void UpdateData() {
matrixView.Content = CalculateVariableImpactMatrix();
}
public DoubleMatrix CalculateVariableImpactMatrix() {
DoubleMatrix matrix = null;
if (Content != null) {
List runsWithSolutions = (from run in Content
where run.Results.ContainsKey(validationBestModelResultName)
select run)
.ToList();
IEnumerable allSolutions = (from run in Content
where run.Results.ContainsKey(validationBestModelResultName)
select run.Results[validationBestModelResultName]).Cast();
Dictionary> variableReferences = new Dictionary>();
foreach (var solution in allSolutions) {
variableReferences[solution] = GetVariableReferences(solution).Distinct();
}
List variableNames = (from modelVarRefs in variableReferences.Values
from variableName in modelVarRefs
select variableName)
.Distinct()
.ToList();
List statictics = new List { "Median Impact", "Mean Impact", "StdDev", "pValue" };
List columnNames = (from run in runsWithSolutions
select run.Name).ToList();
columnNames.AddRange(statictics);
matrix = new DoubleMatrix(variableNames.Count, columnNames.Count);
matrix.SortableView = true;
matrix.RowNames = variableNames;
matrix.ColumnNames = columnNames;
Random random = new Random();
int columnIndex = 0;
foreach (SymbolicRegressionSolution solution in variableReferences.Keys) {
foreach (string variableName in variableReferences[solution]) {
int rowIndex = variableNames.IndexOf(variableName);
if (rowIndex > -1) {
matrix[rowIndex, columnIndex] = ApproximatePermutationImpact(random, variableName, solution);
}
}
columnIndex++;
}
List> variableImpactValues = (from row in Enumerable.Range(0, variableNames.Count())
select GetRowValues(matrix, row).ToList())
.ToList();
List referenceValues = (from variableImpacts in variableImpactValues
orderby variableImpacts.Average()
select variableImpacts)
.First();
for (int row = 0; row < variableNames.Count; row++) {
List rowValues = variableImpactValues[row];
matrix[row, columnIndex] = rowValues.Median();
matrix[row, columnIndex + 1] = rowValues.Average();
matrix[row, columnIndex + 2] = rowValues.StandardDeviation();
double bothTails, leftTail, rightTail;
bothTails = leftTail = rightTail = 0.0;
double[] z = new double[rowValues.Count()];
for (int i = 0; i < z.Length; i++) {
z[i] = rowValues[i] - referenceValues[i];
}
alglib.wsr.wilcoxonsignedranktest(z, z.Length, 0.0, ref bothTails, ref leftTail, ref rightTail);
matrix[row, columnIndex + 3] = bothTails;
}
}
return matrix;
}
private IEnumerable GetRowValues(DoubleMatrix matrix, int row) {
return from col in Enumerable.Range(0, matrix.Columns)
select matrix[row, col];
}
private IEnumerable GetVariableReferences(SymbolicRegressionSolution solution) {
return from node in solution.Model.SymbolicExpressionTree.IterateNodesPostfix().OfType()
select node.VariableName;
}
private double ApproximatePermutationImpact(Random random, string variableName, SymbolicRegressionSolution solution) {
int permutations = 10;
int variableIndex = solution.ProblemData.Dataset.GetVariableIndex(variableName);
List originalOutput = new List(solution.EstimatedValues);
Dataset originalDataset = solution.ProblemData.Dataset;
int rows = solution.ProblemData.Dataset.Rows;
int columns = solution.ProblemData.Dataset.Columns;
List rowIndexPermutation = Enumerable.Range(0, rows).ToList();
double mseSum = 0.0;
for (int rep = 0; rep < permutations; rep++) {
double[,] manipulatedData = new double[rows, columns];
Shuffle(random, rowIndexPermutation);
for (int row = 0; row < rows; row++) {
for (int column = 0; column < columns; column++) {
if (column == variableIndex) {
manipulatedData[row, column] = solution.ProblemData.Dataset[row, column];
} else {
manipulatedData[row, column] = solution.ProblemData.Dataset[rowIndexPermutation[row], column];
}
}
}
Dataset manipulatedDataset = new Dataset(solution.ProblemData.Dataset.VariableNames, manipulatedData);
solution.ProblemData.Dataset = manipulatedDataset;
double mse = SimpleMSEEvaluator.Calculate(originalOutput, solution.EstimatedValues);
mseSum += mse;
}
solution.ProblemData.Dataset = originalDataset;
return mseSum / permutations;
}
private void Shuffle(Random random, List xs) {
for (int i = xs.Count; i > 1; i--) {
int j = random.Next(i);
int tmp = xs[j];
xs[j] = xs[i - 1];
xs[i - 1] = tmp;
}
}
}
}