Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/05/16 14:25:28 (8 years ago)
Author:
gkronber
Message:

#2650: work in progress..

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r14185 r14237  
    7373      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    7474      IEnumerable<int> rows = problemData.TrainingIndices;
    75       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     75      var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
     76      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
     77      var factorVariables = from factor in factorVariableNames
     78                            let distinctValues = dataset.GetStringValues(factor, rows).Distinct().ToArray()
     79                            // 1 distinct value => skip (constant)
     80                            // 2 distinct values => only take one of the two values
     81                            // >=3 distinct values => create a binary value for each value
     82                            let reducedValues = distinctValues.Length <= 2
     83                              ? distinctValues.Take(distinctValues.Length - 1)
     84                              : distinctValues
     85                            select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues);
     86      double[,] binaryMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows);
     87      double[,] doubleVarMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariables.Concat(new string[] { targetVariable }), rows);
     88      var inputMatrix = binaryMatrix.VertCat(doubleVarMatrix);
     89
    7690      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7791        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     
    98112
    99113      int col = 0;
    100       foreach (string column in allowedInputVariables) {
     114      foreach (var kvp in factorVariables) {
     115        var varName = kvp.Key;
     116        foreach (var cat in kvp.Value) {
     117          FactorVariableTreeNode vNode =
     118            (FactorVariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.FactorVariable().CreateTreeNode();
     119          vNode.VariableName = varName;
     120          vNode.VariableValue = cat;
     121          vNode.Weight = coefficients[col];
     122          addition.AddSubtree(vNode);
     123          col++;
     124        }
     125      }
     126      foreach (string column in doubleVariables) {
    101127        VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
    102128        vNode.VariableName = column;
Note: See TracChangeset for help on using the changeset viewer.