- Timestamp:
- 08/05/16 14:25:28 (8 years ago)
- Location:
- branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessClassificationModelCreator.cs
r14185 r14237 67 67 HyperparameterGradientsParameter.ActualValue = new RealVector(model.HyperparameterGradients); 68 68 return base.Apply(); 69 } catch (ArgumentException) { } catch (alglib.alglibexception) { } 69 } 70 catch (ArgumentException) { } 71 catch (alglib.alglibexception) { } 70 72 NegativeLogLikelihoodParameter.ActualValue = new DoubleValue(1E300); 71 73 HyperparameterGradientsParameter.ActualValue = new RealVector(Hyperparameter.Count()); -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithmStatic.cs
r14185 r14237 148 148 // for custom stepping & termination 149 149 public static IGbmState CreateGbmState(IRegressionProblemData problemData, ILossFunction lossFunction, uint randSeed, int maxSize = 3, double r = 0.66, double m = 0.5, double nu = 0.01) { 150 // check input variables. Only double variables are allowed. 151 var invalidInputs = 152 problemData.AllowedInputVariables.Where(name => !problemData.Dataset.VariableHasType<double>(name)); 153 if (invalidInputs.Any()) 154 throw new NotSupportedException("Gradient tree boosting only supports real-valued variables. Unsupported inputs: " + string.Join(", ", invalidInputs)); 155 150 156 return new GbmState(problemData, lossFunction, randSeed, maxSize, r, m, nu); 151 157 } -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/AlglibUtil.cs
r14185 r14237 20 20 #endregion 21 21 22 using System; 22 23 using System.Collections.Generic; 23 24 using System.Linq; … … 27 28 public static class AlglibUtil { 28 29 public static double[,] PrepareInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) { 29 List<string> variablesList = variables.ToList(); 30 // check input variables. Only double variables are allowed. 31 var invalidInputs = 32 variables.Where(name => !dataset.VariableHasType<double>(name)); 33 if (invalidInputs.Any()) 34 throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs)); 35 30 36 List<int> rowsList = rows.ToList(); 31 32 double[,] matrix = new double[rowsList.Count, variablesList.Count]; 37 double[,] matrix = new double[rowsList.Count, variables.Count()]; 33 38 34 39 int col = 0; … … 45 50 return matrix; 46 51 } 52 47 53 public static double[,] PrepareAndScaleInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, Scaling scaling) { 54 // check input variables. Only double variables are allowed. 55 var invalidInputs = 56 variables.Where(name => !dataset.VariableHasType<double>(name)); 57 if (invalidInputs.Any()) 58 throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs)); 59 48 60 List<string> variablesList = variables.ToList(); 49 61 List<int> rowsList = rows.ToList(); … … 64 76 return matrix; 65 77 } 78 79 /// <summary> 80 /// Prepares a binary data matrix from a number of factors and specified factor values 81 /// </summary> 82 /// <param name="dataset">A dataset that contains the variable values</param> 83 /// <param name="factorVariables">An enumerable of categorical variables (factors). For each variable an enumerable of values must be specified.</param> 84 /// <param name="rows">An enumerable of row indices for the dataset</param> 85 /// <returns></returns> 86 /// <remarks>Factor variables (categorical variables) are split up into multiple binary variables one for each specified value.</remarks> 87 public static double[,] PrepareInputMatrix( 88 IDataset dataset, 89 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables, 90 IEnumerable<int> rows) { 91 // check input variables. Only string variables are allowed. 92 var invalidInputs = 93 factorVariables.Select(kvp => kvp.Key).Where(name => !dataset.VariableHasType<string>(name)); 94 if (invalidInputs.Any()) 95 throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs)); 96 97 int numBinaryColumns = factorVariables.Sum(kvp => kvp.Value.Count()); 98 99 List<int> rowsList = rows.ToList(); 100 double[,] matrix = new double[rowsList.Count, numBinaryColumns]; 101 102 int col = 0; 103 foreach (var kvp in factorVariables) { 104 var varName = kvp.Key; 105 var cats = kvp.Value; 106 var catCount = cats.Count(); 107 if (catCount == 0) continue; 108 foreach (var cat in cats) { 109 var values = dataset.GetStringValues(varName, rows); 110 int row = 0; 111 foreach (var value in values) { 112 matrix[row, col] = value == cat ? 1 : 0; 113 row++; 114 } 115 col++; 116 } 117 } 118 return matrix; 119 } 66 120 } 67 121 } -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r14185 r14237 73 73 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 74 74 IEnumerable<int> rows = problemData.TrainingIndices; 75 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 75 var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 76 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 77 var factorVariables = from factor in factorVariableNames 78 let distinctValues = dataset.GetStringValues(factor, rows).Distinct().ToArray() 79 // 1 distinct value => skip (constant) 80 // 2 distinct values => only take one of the two values 81 // >=3 distinct values => create a binary value for each value 82 let reducedValues = distinctValues.Length <= 2 83 ? distinctValues.Take(distinctValues.Length - 1) 84 : distinctValues 85 select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues); 86 double[,] binaryMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows); 87 double[,] doubleVarMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariables.Concat(new string[] { targetVariable }), rows); 88 var inputMatrix = binaryMatrix.VertCat(doubleVarMatrix); 89 76 90 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 77 91 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); … … 98 112 99 113 int col = 0; 100 foreach (string column in allowedInputVariables) { 114 foreach (var kvp in factorVariables) { 115 var varName = kvp.Key; 116 foreach (var cat in kvp.Value) { 117 FactorVariableTreeNode vNode = 118 (FactorVariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.FactorVariable().CreateTreeNode(); 119 vNode.VariableName = varName; 120 vNode.VariableValue = cat; 121 vNode.Weight = coefficients[col]; 122 addition.AddSubtree(vNode); 123 col++; 124 } 125 } 126 foreach (string column in doubleVariables) { 101 127 VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); 102 128 vNode.VariableName = column; -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/ModelCreation/NcaModelCreator.cs
r14185 r14237 20 20 #endregion 21 21 22 using System; 22 23 using System.Linq; 23 24 using HeuristicLab.Common; -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r14185 r14237 104 104 this.allowedInputVariables = allowedInputVariables.ToArray(); 105 105 106 // check input variables. Only double variables are allowed. 107 var invalidInputs = 108 allowedInputVariables.Where(name => !dataset.VariableHasType<double>(name)); 109 if (invalidInputs.Any()) 110 throw new NotSupportedException("Gradient tree boosting only supports real-valued variables. Unsupported inputs: " + string.Join(", ", invalidInputs)); 111 112 106 113 var inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, 107 114 allowedInputVariables.Concat(new string[] { targetVariable }),
Note: See TracChangeset
for help on using the changeset viewer.