#region License Information
/* HeuristicLab
* Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.Algorithms.DataAnalysis {
public class SupportVectorMachineUtil {
///
/// Transforms into a data structure as needed by libSVM.
///
/// The problem data to transform
/// The rows of the dataset that should be contained in the resulting SVM-problem
/// A problem data type that can be used to train a support vector machine.
public static SVM.Problem CreateSvmProblem(Dataset dataset, string targetVariable, IEnumerable inputVariables, IEnumerable rowIndices) {
double[] targetVector =
dataset.GetEnumeratedVariableValues(targetVariable, rowIndices)
.ToArray();
SVM.Node[][] nodes = new SVM.Node[targetVector.Length][];
List tempRow;
int maxNodeIndex = 0;
int svmProblemRowIndex = 0;
List inputVariablesList = inputVariables.ToList();
foreach (int row in rowIndices) {
tempRow = new List();
int colIndex = 1; // make sure the smallest node index for SVM = 1
foreach (var inputVariable in inputVariablesList) {
double value = dataset[row, dataset.GetVariableIndex(inputVariable)];
// SVM also works with missing values
// => don't add NaN values in the dataset to the sparse SVM matrix representation
if (!double.IsNaN(value)) {
tempRow.Add(new SVM.Node(colIndex, value)); // nodes must be sorted in ascending ordered by column index
if (colIndex > maxNodeIndex) maxNodeIndex = colIndex;
}
colIndex++;
}
nodes[svmProblemRowIndex++] = tempRow.ToArray();
}
return new SVM.Problem(targetVector.Length, targetVector, nodes, maxNodeIndex);
}
}
}