#region License Information /* HeuristicLab * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System.Collections.Generic; using System.Linq; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.Algorithms.DataAnalysis { public class SupportVectorMachineUtil { /// /// Transforms into a data structure as needed by libSVM. /// /// The problem data to transform /// The rows of the dataset that should be contained in the resulting SVM-problem /// A problem data type that can be used to train a support vector machine. public static SVM.Problem CreateSvmProblem(Dataset dataset, string targetVariable, IEnumerable inputVariables, IEnumerable rowIndices) { double[] targetVector = dataset.GetEnumeratedVariableValues(targetVariable, rowIndices) .ToArray(); SVM.Node[][] nodes = new SVM.Node[targetVector.Length][]; List tempRow; int maxNodeIndex = 0; int svmProblemRowIndex = 0; List inputVariablesList = inputVariables.ToList(); foreach (int row in rowIndices) { tempRow = new List(); int colIndex = 1; // make sure the smallest node index for SVM = 1 foreach (var inputVariable in inputVariablesList) { double value = dataset[row, dataset.GetVariableIndex(inputVariable)]; // SVM also works with missing values // => don't add NaN values in the dataset to the sparse SVM matrix representation if (!double.IsNaN(value)) { tempRow.Add(new SVM.Node(colIndex, value)); // nodes must be sorted in ascending ordered by column index if (colIndex > maxNodeIndex) maxNodeIndex = colIndex; } colIndex++; } nodes[svmProblemRowIndex++] = tempRow.ToArray(); } return new SVM.Problem(targetVector.Length, targetVector, nodes, maxNodeIndex); } } }