Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
10/08/09 16:22:35 (15 years ago)
Author:
gkronber
Message:

Fixed bugs related to time series prognosis with SVMs. And fixed an exception when trying to save time-series models to the database. #776 (Error when trying to save time-series prognosis predictors to the database)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.SupportVectorMachines/3.2/SVMHelper.cs

    r2417 r2421  
    66using HeuristicLab.Data;
    77using HeuristicLab.DataAnalysis;
     8using HeuristicLab.Common;
    89
    910namespace HeuristicLab.SupportVectorMachines {
    1011  public class SVMHelper {
     12    public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariableIndex, IEnumerable<string> inputVariables, int start, int end, int minTimeOffset, int maxTimeOffset) {
     13      int rowCount = end - start;
    1114
    12     public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariable, int start, int end, int minTimeOffset, int maxTimeOffset) {
    13       return CreateSVMProblem(dataset, targetVariable, Enumerable.Range(0, dataset.Columns).ToDictionary<int, int>(x => x), start, end, minTimeOffset, maxTimeOffset);
    14     }
     15      var targetVector = (from row in Enumerable.Range(start, rowCount)
     16                          let val = dataset.GetValue(row, targetVariableIndex)
     17                          where !double.IsNaN(val)
     18                          select val).ToArray();
    1519
    16     public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariable, Dictionary<int, int> columnMapping, int start, int end, int minTimeOffset, int maxTimeOffset) {
    17       int rowCount = end - start;
    18       List<int> skippedFeatures = new List<int>();
    19       for (int i = 0; i < dataset.Columns; i++) {
    20         if (i != targetVariable) {
    21           if (dataset.GetRange(i, start, end) == 0)
    22             skippedFeatures.Add(i);
    23         }
    24       }
    25 
    26       int maxColumns = 0;
    27 
    28       double[] targetVector = new double[rowCount];
    29       for (int i = 0; i < rowCount; i++) {
    30         double value = dataset.GetValue(start + i, targetVariable);
    31         targetVector[i] = value;
    32       }
    33       targetVector = targetVector.Where(x => !double.IsNaN(x)).ToArray();
    3420
    3521      SVM.Node[][] nodes = new SVM.Node[targetVector.Length][];
    3622      List<SVM.Node> tempRow;
    3723      int addedRows = 0;
    38       int timeOffsetBase = columnMapping.Count;
     24      int maxColumns = 0;
    3925      for (int row = 0; row < rowCount; row++) {
    4026        tempRow = new List<SVM.Node>();
    41         for (int col = 0; col < dataset.Columns; col++) {
    42           if (!skippedFeatures.Contains(col) && col != targetVariable && columnMapping.ContainsKey(col)) {
     27        int nodeIndex = 0;
     28        foreach (var inputVariable in inputVariables) {
     29          ++nodeIndex;
     30          int col = dataset.GetVariableIndex(inputVariable);
     31          if (IsUsefulColumn(dataset, col, start, end)) {
    4332            for (int timeOffset = minTimeOffset; timeOffset <= maxTimeOffset; timeOffset++) {
    44               int actualColumn = columnMapping[col] * (maxTimeOffset - minTimeOffset + 1) + (timeOffset - minTimeOffset);
    45               double value = dataset.GetValue(start + row + timeOffset, col);
    46               if (!double.IsNaN(value)) {
    47                 tempRow.Add(new SVM.Node(actualColumn, value));
    48                 if (actualColumn > maxColumns) maxColumns = actualColumn;
     33              int actualColumn = nodeIndex * (maxTimeOffset - minTimeOffset + 1) + (timeOffset - minTimeOffset);
     34              if (start + row + timeOffset >= 0 && start + row + timeOffset < dataset.Rows) {
     35                double value = dataset.GetValue(start + row + timeOffset, col);
     36                if (!double.IsNaN(value)) {
     37                  tempRow.Add(new SVM.Node(actualColumn, value));
     38                  if (actualColumn > maxColumns) maxColumns = actualColumn;
     39                }
    4940              }
    5041            }
    5142          }
    5243        }
    53         if (!double.IsNaN(dataset.GetValue(start + row, targetVariable))) {
    54           nodes[addedRows] = tempRow.OrderBy(x => x.Index).ToArray();
     44        if (!double.IsNaN(dataset.GetValue(start + row, targetVariableIndex))) {
     45          nodes[addedRows] = tempRow.ToArray();
    5546          addedRows++;
    5647        }
     
    5950      return new SVM.Problem(targetVector.Length, targetVector, nodes, maxColumns);
    6051    }
     52
     53    // checks if the column has at least two different non-NaN and non-Infinity values
     54    private static bool IsUsefulColumn(Dataset dataset, int col, int start, int end) {
     55      double min = double.PositiveInfinity;
     56      double max = double.NegativeInfinity;
     57      for (int i = start; i < end; i++) {
     58        double x = dataset.GetValue(i, col);
     59        if (!double.IsNaN(x) && !double.IsInfinity(x)) {
     60          min = Math.Min(min, x);
     61          max = Math.Max(max, x);
     62        }
     63        if (min != max) return true;
     64      }
     65      return false;
     66    }
    6167  }
    6268}
Note: See TracChangeset for help on using the changeset viewer.