Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.SupportVectorMachines/3.2/SVMHelper.cs @ 2417

Last change on this file since 2417 was 2417, checked in by gkronber, 15 years ago

Fixed problem with inconsistent SVM predictor output (node indexes in support vectors and problems have to be in ascending order). #744

File size: 2.6 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using HeuristicLab.Core;
6using HeuristicLab.Data;
7using HeuristicLab.DataAnalysis;
8
9namespace HeuristicLab.SupportVectorMachines {
10  public class SVMHelper {
11
12    public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariable, int start, int end, int minTimeOffset, int maxTimeOffset) {
13      return CreateSVMProblem(dataset, targetVariable, Enumerable.Range(0, dataset.Columns).ToDictionary<int, int>(x => x), start, end, minTimeOffset, maxTimeOffset);
14    }
15
16    public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariable, Dictionary<int, int> columnMapping, int start, int end, int minTimeOffset, int maxTimeOffset) {
17      int rowCount = end - start;
18      List<int> skippedFeatures = new List<int>();
19      for (int i = 0; i < dataset.Columns; i++) {
20        if (i != targetVariable) {
21          if (dataset.GetRange(i, start, end) == 0)
22            skippedFeatures.Add(i);
23        }
24      }
25
26      int maxColumns = 0;
27
28      double[] targetVector = new double[rowCount];
29      for (int i = 0; i < rowCount; i++) {
30        double value = dataset.GetValue(start + i, targetVariable);
31        targetVector[i] = value;
32      }
33      targetVector = targetVector.Where(x => !double.IsNaN(x)).ToArray();
34
35      SVM.Node[][] nodes = new SVM.Node[targetVector.Length][];
36      List<SVM.Node> tempRow;
37      int addedRows = 0;
38      int timeOffsetBase = columnMapping.Count;
39      for (int row = 0; row < rowCount; row++) {
40        tempRow = new List<SVM.Node>();
41        for (int col = 0; col < dataset.Columns; col++) {
42          if (!skippedFeatures.Contains(col) && col != targetVariable && columnMapping.ContainsKey(col)) {
43            for (int timeOffset = minTimeOffset; timeOffset <= maxTimeOffset; timeOffset++) {
44              int actualColumn = columnMapping[col] * (maxTimeOffset - minTimeOffset + 1) + (timeOffset - minTimeOffset);
45              double value = dataset.GetValue(start + row + timeOffset, col);
46              if (!double.IsNaN(value)) {
47                tempRow.Add(new SVM.Node(actualColumn, value));
48                if (actualColumn > maxColumns) maxColumns = actualColumn;
49              }
50            }
51          }
52        }
53        if (!double.IsNaN(dataset.GetValue(start + row, targetVariable))) {
54          nodes[addedRows] = tempRow.OrderBy(x => x.Index).ToArray();
55          addedRows++;
56        }
57      }
58
59      return new SVM.Problem(targetVector.Length, targetVector, nodes, maxColumns);
60    }
61  }
62}
Note: See TracBrowser for help on using the repository browser.