Changeset 14237 for branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
- Timestamp:
- 08/05/16 14:25:28 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r14185 r14237 73 73 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 74 74 IEnumerable<int> rows = problemData.TrainingIndices; 75 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 75 var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 76 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 77 var factorVariables = from factor in factorVariableNames 78 let distinctValues = dataset.GetStringValues(factor, rows).Distinct().ToArray() 79 // 1 distinct value => skip (constant) 80 // 2 distinct values => only take one of the two values 81 // >=3 distinct values => create a binary value for each value 82 let reducedValues = distinctValues.Length <= 2 83 ? distinctValues.Take(distinctValues.Length - 1) 84 : distinctValues 85 select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues); 86 double[,] binaryMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows); 87 double[,] doubleVarMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariables.Concat(new string[] { targetVariable }), rows); 88 var inputMatrix = binaryMatrix.VertCat(doubleVarMatrix); 89 76 90 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 77 91 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); … … 98 112 99 113 int col = 0; 100 foreach (string column in allowedInputVariables) { 114 foreach (var kvp in factorVariables) { 115 var varName = kvp.Key; 116 foreach (var cat in kvp.Value) { 117 FactorVariableTreeNode vNode = 118 (FactorVariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.FactorVariable().CreateTreeNode(); 119 vNode.VariableName = varName; 120 vNode.VariableValue = cat; 121 vNode.Weight = coefficients[col]; 122 addition.AddSubtree(vNode); 123 col++; 124 } 125 } 126 foreach (string column in doubleVariables) { 101 127 VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); 102 128 vNode.VariableName = column;
Note: See TracChangeset
for help on using the changeset viewer.