source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineUtil.cs @ 6002

Last change on this file since 6002 was 6002, checked in by gkronber, 8 years ago

#790 Fixed minor issues in LDA, LR, SVC and SVR to make sure everything works correctly in presence of NaN and infinity values.

File size: 2.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Problems.DataAnalysis;
25
26namespace HeuristicLab.Algorithms.DataAnalysis {
27  public class SupportVectorMachineUtil {
28    /// <summary>
29    /// Transforms <paramref name="problemData"/> into a data structure as needed by libSVM.
30    /// </summary>
31    /// <param name="problemData">The problem data to transform</param>
32    /// <param name="rowIndices">The rows of the dataset that should be contained in the resulting SVM-problem</param>
33    /// <returns>A problem data type that can be used to train a support vector machine.</returns>
34    public static SVM.Problem CreateSvmProblem(Dataset dataset, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<int> rowIndices) {
35      double[] targetVector =
36        dataset.GetEnumeratedVariableValues(targetVariable, rowIndices)
37        .ToArray();
38
39      SVM.Node[][] nodes = new SVM.Node[targetVector.Length][];
40      List<SVM.Node> tempRow;
41      int maxNodeIndex = 0;
42      int svmProblemRowIndex = 0;
43      List<string> inputVariablesList = inputVariables.ToList();
44      foreach (int row in rowIndices) {
45        tempRow = new List<SVM.Node>();
46        int colIndex = 1; // make sure the smallest node index for SVM = 1
47        foreach (var inputVariable in inputVariablesList) {
48          double value = dataset[row, dataset.GetVariableIndex(inputVariable)];
49          // SVM also works with missing values
50          // => don't add NaN values in the dataset to the sparse SVM matrix representation
51          if (!double.IsNaN(value)) {
52            tempRow.Add(new SVM.Node(colIndex, value)); // nodes must be sorted in ascending ordered by column index
53            if (colIndex > maxNodeIndex) maxNodeIndex = colIndex;
54          }
55          colIndex++;
56        }
57        nodes[svmProblemRowIndex++] = tempRow.ToArray();
58      }
59
60      return new SVM.Problem(targetVector.Length, targetVector, nodes, maxNodeIndex);
61    }
62  }
63}
Note: See TracBrowser for help on using the repository browser.