Changeset 2421


Ignore:
Timestamp:
10/08/09 16:22:35 (13 years ago)
Author:
gkronber
Message:

Fixed bugs related to time series prognosis with SVMs. And fixed an exception when trying to save time-series models to the database. #776 (Error when trying to save time-series prognosis predictors to the database)

Location:
trunk/sources
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Modeling/3.2/AnalyzerModel.cs

    r2371 r2421  
    112112
    113113    public IEnumerable<KeyValuePair<ModelingResult, double>> GetVariableResults(string variableName) {
    114       return variableResults[variableName];
     114      if (variableResults.ContainsKey(variableName))
     115        return variableResults[variableName];
     116      else return new KeyValuePair<ModelingResult, double>[] { };
    115117    }
    116118
  • trunk/sources/HeuristicLab.SupportVectorMachines/3.2/HeuristicLab.SupportVectorMachines-3.2.csproj

    r2361 r2421  
    112112  </ItemGroup>
    113113  <ItemGroup>
     114    <ProjectReference Include="..\..\HeuristicLab.Common\3.2\HeuristicLab.Common-3.2.csproj">
     115      <Project>{1FC004FC-59AF-4249-B1B6-FF25873A20E4}</Project>
     116      <Name>HeuristicLab.Common-3.2</Name>
     117    </ProjectReference>
    114118    <ProjectReference Include="..\..\HeuristicLab.Core\3.2\HeuristicLab.Core-3.2.csproj">
    115119      <Project>{F43B59AB-2B8C-4570-BC1E-15592086517C}</Project>
  • trunk/sources/HeuristicLab.SupportVectorMachines/3.2/Predictor.cs

    r2418 r2421  
    3939    }
    4040
    41     private Dictionary<string, int> variableNames = new Dictionary<string, int>();
     41    private List<string> variableNames;
    4242    private string targetVariable;
    4343    private int minTimeOffset;
     
    5252    public Predictor() : base() { } // for persistence
    5353
    54     public Predictor(SVMModel model, string targetVariable, Dictionary<string, int> variableNames) :
     54    public Predictor(SVMModel model, string targetVariable, IEnumerable<string> variableNames) :
    5555      this(model, targetVariable, variableNames, 0, 0) {
    5656    }
    5757
    58     public Predictor(SVMModel model, string targetVariable, Dictionary<string, int> variableNames, int minTimeOffset, int maxTimeOffset)
     58    public Predictor(SVMModel model, string targetVariable, IEnumerable<string> variableNames, int minTimeOffset, int maxTimeOffset)
    5959      : base() {
    6060      this.svmModel = model;
    6161      this.targetVariable = targetVariable;
    62       this.variableNames = variableNames;
     62      this.variableNames = new List<string>(variableNames);
    6363      this.minTimeOffset = minTimeOffset;
    6464      this.maxTimeOffset = maxTimeOffset;
     
    7070      RangeTransform transform = svmModel.RangeTransform;
    7171      Model model = svmModel.Model;
    72       // maps columns of the current input dataset to the columns that were originally used in training
    73       Dictionary<int, int> newIndex = new Dictionary<int, int>();
    74       foreach (var pair in variableNames) {
    75         newIndex[input.GetVariableIndex(pair.Key)] = pair.Value;
    76       }
    7772
    78 
    79       Problem p = SVMHelper.CreateSVMProblem(input, input.GetVariableIndex(targetVariable), newIndex,
     73      Problem p = SVMHelper.CreateSVMProblem(input, input.GetVariableIndex(targetVariable), variableNames,
    8074        start, end, minTimeOffset, maxTimeOffset);
    8175      Problem scaledProblem = transform.Scale(p);
     
    8882        if (double.IsNaN(input.GetValue(resultRow, targetVariableIndex)))
    8983          result[resultRow] = UpperPredictionLimit;
    90         else
     84        else if (resultRow + maxTimeOffset < 0) {
     85          result[resultRow] = UpperPredictionLimit;
     86          problemRow++;
     87        } else {
    9188          result[resultRow] = Math.Max(Math.Min(SVM.Prediction.Predict(model, scaledProblem.X[problemRow++]), UpperPredictionLimit), LowerPredictionLimit);
     89        }
    9290      }
    9391      return result;
     
    9593
    9694    public override IEnumerable<string> GetInputVariables() {
    97       return from pair in variableNames
    98              where pair.Key != targetVariable
    99              orderby pair.Value
    100              select pair.Key;
     95      return variableNames;
    10196    }
    10297
     
    109104      clone.svmModel = (SVMModel)Auxiliary.Clone(svmModel, clonedObjects);
    110105      clone.targetVariable = targetVariable;
    111       clone.variableNames = new Dictionary<string, int>(variableNames);
     106      clone.variableNames = new List<string>(variableNames);
    112107      clone.minTimeOffset = minTimeOffset;
    113108      clone.maxTimeOffset = maxTimeOffset;
     
    128123      node.AppendChild(PersistenceManager.Persist(svmModel, document, persistedObjects));
    129124      XmlNode variablesNode = document.CreateElement("Variables");
    130       foreach (var pair in variableNames) {
    131         XmlNode pairNode = document.CreateElement("Variable");
     125      foreach (var variableName in variableNames) {
     126        XmlNode variableNameNode = document.CreateElement("Variable");
    132127        XmlAttribute nameAttr = document.CreateAttribute("Name");
    133         XmlAttribute indexAttr = document.CreateAttribute("Index");
    134         nameAttr.Value = pair.Key;
    135         indexAttr.Value = XmlConvert.ToString(pair.Value);
    136         pairNode.Attributes.Append(nameAttr);
    137         pairNode.Attributes.Append(indexAttr);
    138         variablesNode.AppendChild(pairNode);
     128        nameAttr.Value = variableName;
     129        variableNameNode.Attributes.Append(nameAttr);
     130        variablesNode.AppendChild(variableNameNode);
    139131      }
    140132      node.AppendChild(variablesNode);
     
    149141      if (node.Attributes["MinTimeOffset"] != null) minTimeOffset = XmlConvert.ToInt32(node.Attributes["MinTimeOffset"].Value);
    150142      if (node.Attributes["MaxTimeOffset"] != null) maxTimeOffset = XmlConvert.ToInt32(node.Attributes["MaxTimeOffset"].Value);
    151       variableNames = new Dictionary<string, int>();
     143      variableNames = new List<string>();
    152144      XmlNode variablesNode = node.ChildNodes[1];
    153       foreach (XmlNode pairNode in variablesNode.ChildNodes) {
    154         variableNames[pairNode.Attributes["Name"].Value] = XmlConvert.ToInt32(pairNode.Attributes["Index"].Value);
     145      foreach (XmlNode variableNameNode in variablesNode.ChildNodes) {
     146        variableNames.Add(variableNameNode.Attributes["Name"].Value);
    155147      }
    156148    }
     
    190182      p.maxTimeOffset = int.Parse(maxTimeOffsetLine[1]);
    191183      p.minTimeOffset = int.Parse(minTimeOffsetLine[1]);
    192       int i = 1;
    193184      foreach (string inputVariable in inputVariableLine.Skip(1)) {
    194         p.variableNames[inputVariable.Trim()] = i++;
     185        p.variableNames.Add(inputVariable.Trim());
    195186      }
    196187      p.svmModel = SVMModel.Import(reader);
  • trunk/sources/HeuristicLab.SupportVectorMachines/3.2/PredictorBuilder.cs

    r2373 r2421  
    3636      AddVariableInfo(new VariableInfo("SVMModel", "The SVM model", typeof(SVMModel), VariableKind.In));
    3737      AddVariableInfo(new VariableInfo("TargetVariable", "The target variable", typeof(StringData), VariableKind.In));
    38       AddVariableInfo(new VariableInfo("InputVariables", "The input variable names", typeof(StringData), VariableKind.In));
     38      AddVariableInfo(new VariableInfo("InputVariables", "The input variable names", typeof(ItemList), VariableKind.In));
    3939      AddVariableInfo(new VariableInfo("TrainingSamplesStart", "Start index of the training set", typeof(IntData), VariableKind.In));
    4040      AddVariableInfo(new VariableInfo("TrainingSamplesEnd", "End index of the training set", typeof(IntData), VariableKind.In));
     
    6363      string targetVariableName = ds.GetVariableName(targetVariable);
    6464      ItemList inputVariables = GetVariableValue<ItemList>("InputVariables", scope, true);
    65       Dictionary<string, int> variableNames = new Dictionary<string, int>();
    66       for (int i = 0; i < ds.Columns; i++) variableNames[ds.GetVariableName(i)] = i;
     65      var inputVariableNames = from x in inputVariables
     66                               select ((StringData)x).Data;
    6767
    6868      double mean = ds.GetMean(targetVariable, start, end);
    6969      double range = ds.GetRange(targetVariable, start, end);
    7070
    71       Predictor predictor = new Predictor(model, targetVariableName, variableNames, minTimeOffset, maxTimeOffset);
     71      Predictor predictor = new Predictor(model, targetVariableName, inputVariableNames, minTimeOffset, maxTimeOffset);
    7272      predictor.LowerPredictionLimit = mean - punishmentFactor * range;
    7373      predictor.UpperPredictionLimit = mean + punishmentFactor * range;
  • trunk/sources/HeuristicLab.SupportVectorMachines/3.2/SVMHelper.cs

    r2417 r2421  
    66using HeuristicLab.Data;
    77using HeuristicLab.DataAnalysis;
     8using HeuristicLab.Common;
    89
    910namespace HeuristicLab.SupportVectorMachines {
    1011  public class SVMHelper {
     12    public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariableIndex, IEnumerable<string> inputVariables, int start, int end, int minTimeOffset, int maxTimeOffset) {
     13      int rowCount = end - start;
    1114
    12     public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariable, int start, int end, int minTimeOffset, int maxTimeOffset) {
    13       return CreateSVMProblem(dataset, targetVariable, Enumerable.Range(0, dataset.Columns).ToDictionary<int, int>(x => x), start, end, minTimeOffset, maxTimeOffset);
    14     }
     15      var targetVector = (from row in Enumerable.Range(start, rowCount)
     16                          let val = dataset.GetValue(row, targetVariableIndex)
     17                          where !double.IsNaN(val)
     18                          select val).ToArray();
    1519
    16     public static SVM.Problem CreateSVMProblem(Dataset dataset, int targetVariable, Dictionary<int, int> columnMapping, int start, int end, int minTimeOffset, int maxTimeOffset) {
    17       int rowCount = end - start;
    18       List<int> skippedFeatures = new List<int>();
    19       for (int i = 0; i < dataset.Columns; i++) {
    20         if (i != targetVariable) {
    21           if (dataset.GetRange(i, start, end) == 0)
    22             skippedFeatures.Add(i);
    23         }
    24       }
    25 
    26       int maxColumns = 0;
    27 
    28       double[] targetVector = new double[rowCount];
    29       for (int i = 0; i < rowCount; i++) {
    30         double value = dataset.GetValue(start + i, targetVariable);
    31         targetVector[i] = value;
    32       }
    33       targetVector = targetVector.Where(x => !double.IsNaN(x)).ToArray();
    3420
    3521      SVM.Node[][] nodes = new SVM.Node[targetVector.Length][];
    3622      List<SVM.Node> tempRow;
    3723      int addedRows = 0;
    38       int timeOffsetBase = columnMapping.Count;
     24      int maxColumns = 0;
    3925      for (int row = 0; row < rowCount; row++) {
    4026        tempRow = new List<SVM.Node>();
    41         for (int col = 0; col < dataset.Columns; col++) {
    42           if (!skippedFeatures.Contains(col) && col != targetVariable && columnMapping.ContainsKey(col)) {
     27        int nodeIndex = 0;
     28        foreach (var inputVariable in inputVariables) {
     29          ++nodeIndex;
     30          int col = dataset.GetVariableIndex(inputVariable);
     31          if (IsUsefulColumn(dataset, col, start, end)) {
    4332            for (int timeOffset = minTimeOffset; timeOffset <= maxTimeOffset; timeOffset++) {
    44               int actualColumn = columnMapping[col] * (maxTimeOffset - minTimeOffset + 1) + (timeOffset - minTimeOffset);
    45               double value = dataset.GetValue(start + row + timeOffset, col);
    46               if (!double.IsNaN(value)) {
    47                 tempRow.Add(new SVM.Node(actualColumn, value));
    48                 if (actualColumn > maxColumns) maxColumns = actualColumn;
     33              int actualColumn = nodeIndex * (maxTimeOffset - minTimeOffset + 1) + (timeOffset - minTimeOffset);
     34              if (start + row + timeOffset >= 0 && start + row + timeOffset < dataset.Rows) {
     35                double value = dataset.GetValue(start + row + timeOffset, col);
     36                if (!double.IsNaN(value)) {
     37                  tempRow.Add(new SVM.Node(actualColumn, value));
     38                  if (actualColumn > maxColumns) maxColumns = actualColumn;
     39                }
    4940              }
    5041            }
    5142          }
    5243        }
    53         if (!double.IsNaN(dataset.GetValue(start + row, targetVariable))) {
    54           nodes[addedRows] = tempRow.OrderBy(x => x.Index).ToArray();
     44        if (!double.IsNaN(dataset.GetValue(start + row, targetVariableIndex))) {
     45          nodes[addedRows] = tempRow.ToArray();
    5546          addedRows++;
    5647        }
     
    5950      return new SVM.Problem(targetVector.Length, targetVector, nodes, maxColumns);
    6051    }
     52
     53    // checks if the column has at least two different non-NaN and non-Infinity values
     54    private static bool IsUsefulColumn(Dataset dataset, int col, int start, int end) {
     55      double min = double.PositiveInfinity;
     56      double max = double.NegativeInfinity;
     57      for (int i = start; i < end; i++) {
     58        double x = dataset.GetValue(i, col);
     59        if (!double.IsNaN(x) && !double.IsInfinity(x)) {
     60          min = Math.Min(min, x);
     61          max = Math.Max(max, x);
     62        }
     63        if (min != max) return true;
     64      }
     65      return false;
     66    }
    6167  }
    6268}
  • trunk/sources/HeuristicLab.SupportVectorMachines/3.2/SupportVectorCreator.cs

    r2415 r2421  
    4141      AddVariableInfo(new VariableInfo("Dataset", "Dataset with all samples on which to apply the function", typeof(Dataset), VariableKind.In));
    4242      AddVariableInfo(new VariableInfo("TargetVariable", "Index of the column of the dataset that holds the target variable", typeof(IntData), VariableKind.In));
     43      AddVariableInfo(new VariableInfo("InputVariables", "List of allowed input variable names", typeof(ItemList), VariableKind.In));
    4344      AddVariableInfo(new VariableInfo("SamplesStart", "Start index of samples in dataset to evaluate", typeof(IntData), VariableKind.In));
    4445      AddVariableInfo(new VariableInfo("SamplesEnd", "End index of samples in dataset to evaluate", typeof(IntData), VariableKind.In));
     
    7071      Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true);
    7172      int targetVariable = GetVariableValue<IntData>("TargetVariable", scope, true).Data;
     73      ItemList inputVaribales = GetVariableValue<ItemList>("InputVariables", scope, true);
     74      var inputVariableNames = from x in inputVaribales
     75                               select ((StringData)x).Data;
    7276      int start = GetVariableValue<IntData>("SamplesStart", scope, true).Data;
    7377      int end = GetVariableValue<IntData>("SamplesEnd", scope, true).Data;
     
    8993      parameter.Probability = false;
    9094
    91       SVM.Problem problem = SVMHelper.CreateSVMProblem(dataset, targetVariable, start, end, minTimeOffset, maxTimeOffset);
     95      SVM.Problem problem = SVMHelper.CreateSVMProblem(dataset, targetVariable, inputVariableNames, start, end, minTimeOffset, maxTimeOffset);
    9296      SVM.RangeTransform rangeTransform = SVM.RangeTransform.Compute(problem);
    9397      SVM.Problem scaledProblem = rangeTransform.Scale(problem);
  • trunk/sources/HeuristicLab.SupportVectorMachines/3.2/SupportVectorEvaluator.cs

    r2415 r2421  
    3737      AddVariableInfo(new VariableInfo("Dataset", "Dataset with all samples on which to apply the function", typeof(Dataset), VariableKind.In));
    3838      AddVariableInfo(new VariableInfo("TargetVariable", "Index of the column of the dataset that holds the target variable", typeof(IntData), VariableKind.In));
     39      AddVariableInfo(new VariableInfo("InputVariables", "List of allowed input variable names", typeof(ItemList), VariableKind.In));
    3940      AddVariableInfo(new VariableInfo("SamplesStart", "Start index of samples in dataset to evaluate", typeof(IntData), VariableKind.In));
    4041      AddVariableInfo(new VariableInfo("SamplesEnd", "End index of samples in dataset to evaluate", typeof(IntData), VariableKind.In));
     
    4849    public override IOperation Apply(IScope scope) {
    4950      Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true);
     51      ItemList inputVariables = GetVariableValue<ItemList>("InputVariables", scope, true);
     52      var inputVariableNames = from x in inputVariables
     53                               select ((StringData)x).Data;
    5054      int targetVariable = GetVariableValue<IntData>("TargetVariable", scope, true).Data;
    5155      int start = GetVariableValue<IntData>("SamplesStart", scope, true).Data;
     
    5660      int maxTimeOffset = maxTimeOffsetData == null ? 0 : maxTimeOffsetData.Data;
    5761      SVMModel modelData = GetVariableValue<SVMModel>("SVMModel", scope, true);
    58       SVM.Problem problem = SVMHelper.CreateSVMProblem(dataset, targetVariable, start, end, minTimeOffset, maxTimeOffset);
     62
     63      SVM.Problem problem = SVMHelper.CreateSVMProblem(dataset, targetVariable, inputVariableNames, start, end, minTimeOffset, maxTimeOffset);
    5964      SVM.Problem scaledProblem = modelData.RangeTransform.Scale(problem);
    6065
Note: See TracChangeset for help on using the changeset viewer.