[1808] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Linq;
|
---|
| 4 | using System.Text;
|
---|
| 5 | using HeuristicLab.Core;
|
---|
| 6 | using HeuristicLab.Data;
|
---|
| 7 | using HeuristicLab.DataAnalysis;
|
---|
[2421] | 8 | using HeuristicLab.Common;
|
---|
[1808] | 9 |
|
---|
| 10 | namespace HeuristicLab.SupportVectorMachines {
|
---|
| 11 | public class SVMHelper {
|
---|
[2421] | 12 | public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariableIndex, IEnumerable<string> inputVariables, int start, int end, int minTimeOffset, int maxTimeOffset) {
|
---|
[1808] | 13 | int rowCount = end - start;
|
---|
[2148] | 14 |
|
---|
[2421] | 15 | var targetVector = (from row in Enumerable.Range(start, rowCount)
|
---|
| 16 | let val = dataset.GetValue(row, targetVariableIndex)
|
---|
| 17 | where !double.IsNaN(val)
|
---|
| 18 | select val).ToArray();
|
---|
[2165] | 19 |
|
---|
[2148] | 20 |
|
---|
| 21 | SVM.Node[][] nodes = new SVM.Node[targetVector.Length][];
|
---|
[1808] | 22 | List<SVM.Node> tempRow;
|
---|
[2148] | 23 | int addedRows = 0;
|
---|
[2421] | 24 | int maxColumns = 0;
|
---|
[1808] | 25 | for (int row = 0; row < rowCount; row++) {
|
---|
| 26 | tempRow = new List<SVM.Node>();
|
---|
[2421] | 27 | int nodeIndex = 0;
|
---|
| 28 | foreach (var inputVariable in inputVariables) {
|
---|
| 29 | ++nodeIndex;
|
---|
| 30 | int col = dataset.GetVariableIndex(inputVariable);
|
---|
| 31 | if (IsUsefulColumn(dataset, col, start, end)) {
|
---|
[2347] | 32 | for (int timeOffset = minTimeOffset; timeOffset <= maxTimeOffset; timeOffset++) {
|
---|
[2421] | 33 | int actualColumn = nodeIndex * (maxTimeOffset - minTimeOffset + 1) + (timeOffset - minTimeOffset);
|
---|
| 34 | if (start + row + timeOffset >= 0 && start + row + timeOffset < dataset.Rows) {
|
---|
| 35 | double value = dataset.GetValue(start + row + timeOffset, col);
|
---|
| 36 | if (!double.IsNaN(value)) {
|
---|
| 37 | tempRow.Add(new SVM.Node(actualColumn, value));
|
---|
| 38 | if (actualColumn > maxColumns) maxColumns = actualColumn;
|
---|
| 39 | }
|
---|
[2412] | 40 | }
|
---|
[2347] | 41 | }
|
---|
[2148] | 42 | }
|
---|
[1808] | 43 | }
|
---|
[2421] | 44 | if (!double.IsNaN(dataset.GetValue(start + row, targetVariableIndex))) {
|
---|
| 45 | nodes[addedRows] = tempRow.ToArray();
|
---|
[2148] | 46 | addedRows++;
|
---|
| 47 | }
|
---|
[1808] | 48 | }
|
---|
| 49 |
|
---|
[2165] | 50 | return new SVM.Problem(targetVector.Length, targetVector, nodes, maxColumns);
|
---|
[1808] | 51 | }
|
---|
[2421] | 52 |
|
---|
| 53 | // checks if the column has at least two different non-NaN and non-Infinity values
|
---|
| 54 | private static bool IsUsefulColumn(Dataset dataset, int col, int start, int end) {
|
---|
| 55 | double min = double.PositiveInfinity;
|
---|
| 56 | double max = double.NegativeInfinity;
|
---|
| 57 | for (int i = start; i < end; i++) {
|
---|
| 58 | double x = dataset.GetValue(i, col);
|
---|
| 59 | if (!double.IsNaN(x) && !double.IsInfinity(x)) {
|
---|
| 60 | min = Math.Min(min, x);
|
---|
| 61 | max = Math.Max(max, x);
|
---|
| 62 | }
|
---|
| 63 | if (min != max) return true;
|
---|
| 64 | }
|
---|
| 65 | return false;
|
---|
| 66 | }
|
---|
[1808] | 67 | }
|
---|
| 68 | }
|
---|