1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using System.Text;
|
---|
5 | using HeuristicLab.Core;
|
---|
6 | using HeuristicLab.Data;
|
---|
7 | using HeuristicLab.DataAnalysis;
|
---|
8 | using HeuristicLab.Common;
|
---|
9 |
|
---|
10 | namespace HeuristicLab.SupportVectorMachines {
|
---|
11 | public class SVMHelper {
|
---|
12 | public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariableIndex, IEnumerable<string> inputVariables, int start, int end, int minTimeOffset, int maxTimeOffset) {
|
---|
13 | int rowCount = end - start;
|
---|
14 |
|
---|
15 | var targetVector = (from row in Enumerable.Range(start, rowCount)
|
---|
16 | let val = dataset.GetValue(row, targetVariableIndex)
|
---|
17 | where !double.IsNaN(val)
|
---|
18 | select val).ToArray();
|
---|
19 |
|
---|
20 |
|
---|
21 | SVM.Node[][] nodes = new SVM.Node[targetVector.Length][];
|
---|
22 | List<SVM.Node> tempRow;
|
---|
23 | int addedRows = 0;
|
---|
24 | int maxColumns = 0;
|
---|
25 | for (int row = 0; row < rowCount; row++) {
|
---|
26 | tempRow = new List<SVM.Node>();
|
---|
27 | int nodeIndex = 0;
|
---|
28 | foreach (var inputVariable in inputVariables) {
|
---|
29 | ++nodeIndex;
|
---|
30 | int col = dataset.GetVariableIndex(inputVariable);
|
---|
31 | if (IsUsefulColumn(dataset, col, start, end)) {
|
---|
32 | for (int timeOffset = minTimeOffset; timeOffset <= maxTimeOffset; timeOffset++) {
|
---|
33 | int actualColumn = nodeIndex * (maxTimeOffset - minTimeOffset + 1) + (timeOffset - minTimeOffset);
|
---|
34 | if (start + row + timeOffset >= 0 && start + row + timeOffset < dataset.Rows) {
|
---|
35 | double value = dataset.GetValue(start + row + timeOffset, col);
|
---|
36 | if (!double.IsNaN(value)) {
|
---|
37 | tempRow.Add(new SVM.Node(actualColumn, value));
|
---|
38 | if (actualColumn > maxColumns) maxColumns = actualColumn;
|
---|
39 | }
|
---|
40 | }
|
---|
41 | }
|
---|
42 | }
|
---|
43 | }
|
---|
44 | if (!double.IsNaN(dataset.GetValue(start + row, targetVariableIndex))) {
|
---|
45 | nodes[addedRows] = tempRow.ToArray();
|
---|
46 | addedRows++;
|
---|
47 | }
|
---|
48 | }
|
---|
49 |
|
---|
50 | return new SVM.Problem(targetVector.Length, targetVector, nodes, maxColumns);
|
---|
51 | }
|
---|
52 |
|
---|
53 | // checks if the column has at least two different non-NaN and non-Infinity values
|
---|
54 | private static bool IsUsefulColumn(Dataset dataset, int col, int start, int end) {
|
---|
55 | double min = double.PositiveInfinity;
|
---|
56 | double max = double.NegativeInfinity;
|
---|
57 | for (int i = start; i < end; i++) {
|
---|
58 | double x = dataset.GetValue(i, col);
|
---|
59 | if (!double.IsNaN(x) && !double.IsInfinity(x)) {
|
---|
60 | min = Math.Min(min, x);
|
---|
61 | max = Math.Max(max, x);
|
---|
62 | }
|
---|
63 | if (min != max) return true;
|
---|
64 | }
|
---|
65 | return false;
|
---|
66 | }
|
---|
67 | }
|
---|
68 | }
|
---|