Free cookie consent management tool by TermsFeed Policy Generator

source: branches/Persistence Test/HeuristicLab.SupportVectorMachines/3.2/SVMHelper.cs @ 3043

Last change on this file since 3043 was 2421, checked in by gkronber, 15 years ago

Fixed bugs related to time series prognosis with SVMs. And fixed an exception when trying to save time-series models to the database. #776 (Error when trying to save time-series prognosis predictors to the database)

File size: 2.7 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using HeuristicLab.Core;
6using HeuristicLab.Data;
7using HeuristicLab.DataAnalysis;
8using HeuristicLab.Common;
9
10namespace HeuristicLab.SupportVectorMachines {
11  public class SVMHelper {
12    public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariableIndex, IEnumerable<string> inputVariables, int start, int end, int minTimeOffset, int maxTimeOffset) {
13      int rowCount = end - start;
14
15      var targetVector = (from row in Enumerable.Range(start, rowCount)
16                          let val = dataset.GetValue(row, targetVariableIndex)
17                          where !double.IsNaN(val)
18                          select val).ToArray();
19
20
21      SVM.Node[][] nodes = new SVM.Node[targetVector.Length][];
22      List<SVM.Node> tempRow;
23      int addedRows = 0;
24      int maxColumns = 0;
25      for (int row = 0; row < rowCount; row++) {
26        tempRow = new List<SVM.Node>();
27        int nodeIndex = 0;
28        foreach (var inputVariable in inputVariables) {
29          ++nodeIndex;
30          int col = dataset.GetVariableIndex(inputVariable);
31          if (IsUsefulColumn(dataset, col, start, end)) {
32            for (int timeOffset = minTimeOffset; timeOffset <= maxTimeOffset; timeOffset++) {
33              int actualColumn = nodeIndex * (maxTimeOffset - minTimeOffset + 1) + (timeOffset - minTimeOffset);
34              if (start + row + timeOffset >= 0 && start + row + timeOffset < dataset.Rows) {
35                double value = dataset.GetValue(start + row + timeOffset, col);
36                if (!double.IsNaN(value)) {
37                  tempRow.Add(new SVM.Node(actualColumn, value));
38                  if (actualColumn > maxColumns) maxColumns = actualColumn;
39                }
40              }
41            }
42          }
43        }
44        if (!double.IsNaN(dataset.GetValue(start + row, targetVariableIndex))) {
45          nodes[addedRows] = tempRow.ToArray();
46          addedRows++;
47        }
48      }
49
50      return new SVM.Problem(targetVector.Length, targetVector, nodes, maxColumns);
51    }
52
53    // checks if the column has at least two different non-NaN and non-Infinity values
54    private static bool IsUsefulColumn(Dataset dataset, int col, int start, int end) {
55      double min = double.PositiveInfinity;
56      double max = double.NegativeInfinity;
57      for (int i = start; i < end; i++) {
58        double x = dataset.GetValue(i, col);
59        if (!double.IsNaN(x) && !double.IsInfinity(x)) {
60          min = Math.Min(min, x);
61          max = Math.Max(max, x);
62        }
63        if (min != max) return true;
64      }
65      return false;
66    }
67  }
68}
Note: See TracBrowser for help on using the repository browser.