Changeset 14240 for branches/symbreg-factors-2650
- Timestamp:
- 08/05/16 18:44:51 (8 years ago)
- Location:
- branches/symbreg-factors-2650
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/AlglibUtil.cs
r14237 r14240 118 118 return matrix; 119 119 } 120 121 public static IEnumerable<KeyValuePair<string, IEnumerable<string>>> GetFactorVariableValues(IDataset ds, IEnumerable<string> factorVariables, IEnumerable<int> rows) { 122 return from factor in factorVariables 123 let distinctValues = ds.GetStringValues(factor, rows).Distinct().ToArray() 124 // 1 distinct value => skip (constant) 125 // 2 distinct values => only take one of the two values 126 // >=3 distinct values => create a binary value for each value 127 let reducedValues = distinctValues.Length <= 2 128 ? distinctValues.Take(distinctValues.Length - 1) 129 : distinctValues 130 select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues); 131 } 120 132 } 121 133 } -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs
r14185 r14240 36 36 /// Linear discriminant analysis classification algorithm. 37 37 /// </summary> 38 [Item("Linear Discriminant Analysis ", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")]38 [Item("Linear Discriminant Analysis (LDA)", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")] 39 39 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 100)] 40 40 [StorableClass] … … 70 70 IEnumerable<int> rows = problemData.TrainingIndices; 71 71 int nClasses = problemData.ClassNames.Count(); 72 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 72 var doubleVariableNames = allowedInputVariables.Where(dataset.VariableHasType<double>).ToArray(); 73 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>).ToArray(); 74 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariableNames.Concat(new string[] { targetVariable }), rows); 75 76 var factorVariables = AlglibUtil.GetFactorVariableValues(dataset, factorVariableNames, rows); 77 double[,] factorMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows); 78 79 inputMatrix = factorMatrix.VertCat(inputMatrix); 80 73 81 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 74 82 throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset."); … … 82 90 int info; 83 91 double[] w; 84 alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), allowedInputVariables.Count(), nClasses, out info, out w);92 alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1) - 1, nClasses, out info, out w); 85 93 if (info < 1) throw new ArgumentException("Error in calculation of linear discriminant analysis solution"); 86 94 … … 92 100 93 101 int col = 0; 94 foreach (string column in allowedInputVariables) { 102 foreach (var kvp in factorVariables) { 103 var varName = kvp.Key; 104 foreach (var cat in kvp.Value) { 105 FactorVariableTreeNode vNode = 106 (FactorVariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.FactorVariable().CreateTreeNode(); 107 vNode.VariableName = varName; 108 vNode.VariableValue = cat; 109 vNode.Weight = w[col]; 110 addition.AddSubtree(vNode); 111 col++; 112 } 113 } 114 foreach (string column in doubleVariableNames) { 95 115 VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); 96 116 vNode.VariableName = column; -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r14237 r14240 75 75 var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 76 76 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 77 var factorVariables = from factor in factorVariableNames 78 let distinctValues = dataset.GetStringValues(factor, rows).Distinct().ToArray() 79 // 1 distinct value => skip (constant) 80 // 2 distinct values => only take one of the two values 81 // >=3 distinct values => create a binary value for each value 82 let reducedValues = distinctValues.Length <= 2 83 ? distinctValues.Take(distinctValues.Length - 1) 84 : distinctValues 85 select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues); 77 var factorVariables = AlglibUtil.GetFactorVariableValues(dataset, factorVariableNames, rows); 86 78 double[,] binaryMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows); 87 79 double[,] doubleVarMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariables.Concat(new string[] { targetVariable }), rows); -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs
r14185 r14240 68 68 var dataset = problemData.Dataset; 69 69 string targetVariable = problemData.TargetVariable; 70 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 70 var doubleVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<double>); 71 var factorVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<string>); 71 72 IEnumerable<int> rows = problemData.TrainingIndices; 72 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 73 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, doubleVariableNames.Concat(new string[] { targetVariable }), rows); 74 75 var factorVariableValues = AlglibUtil.GetFactorVariableValues(dataset, factorVariableNames, rows); 76 var factorMatrix = AlglibUtil.PrepareInputMatrix(dataset, factorVariableValues, rows); 77 inputMatrix = factorMatrix.VertCat(inputMatrix); 78 73 79 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 74 80 throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset."); … … 95 101 relClassError = alglib.mnlrelclserror(lm, inputMatrix, nRows); 96 102 97 MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, allowedInputVariables, classValues), (IClassificationProblemData)problemData.Clone());103 MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, doubleVariableNames, factorVariableValues, classValues), (IClassificationProblemData)problemData.Clone()); 98 104 return solution; 99 105 } -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs
r14185 r14240 56 56 [Storable] 57 57 private double[] classValues; 58 [Storable] 59 private List<KeyValuePair<string, IEnumerable<string>>> factorVariables; 60 58 61 [StorableConstructor] 59 62 private MultinomialLogitModel(bool deserializing) … … 68 71 allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 69 72 classValues = (double[])original.classValues.Clone(); 73 this.factorVariables = original.factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList(); 70 74 } 71 public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues)75 public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> doubleInputVariables, IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables, double[] classValues) 72 76 : base(targetVariable) { 73 77 this.name = ItemName; 74 78 this.description = ItemDescription; 75 79 this.logitModel = logitModel; 76 this.allowedInputVariables = allowedInputVariables.ToArray(); 80 this.allowedInputVariables = doubleInputVariables.ToArray(); 81 this.factorVariables = factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList(); 77 82 this.classValues = (double[])classValues.Clone(); 83 } 84 85 [StorableHook(HookType.AfterDeserialization)] 86 private void AfterDeserialization() { 87 // BackwardsCompatibility3.3 88 #region Backwards compatible code, remove with 3.4 89 factorVariables = new List<KeyValuePair<string, IEnumerable<string>>>(); 90 #endregion 78 91 } 79 92 … … 83 96 84 97 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 98 85 99 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 100 double[,] factorData = AlglibUtil.PrepareInputMatrix(dataset, factorVariables, rows); 101 102 inputData = factorData.VertCat(inputData); 86 103 87 104 int n = inputData.GetLength(0); -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Symbols/FactorVariable.cs
r14237 r14240 28 28 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 29 29 namespace HeuristicLab.Problems.DataAnalysis.Symbolic { 30 // TODO: handle correctly in all interpreters and formatters31 // TODO: view for factor variables (configuration of actually allowed factors)32 // TODO: handle correctly in variable impacts view33 // TODO: handle correctly in Non-linear regression (infix parser and infix formatter)34 // TODO: support in all analyzers which handle variable symbols specifically35 // TODO: handle correctly in gradient views36 // TODO: handle correctly in ERC view (create linear regression model)37 // TODO: handle correctly in classification - solution comparison38 // TODO: allow factor variables in decision trees (and therefore GBT)?39 // TODO: support in more algs?40 // TODO: support in more views?41 30 [StorableClass] 42 31 [Item("FactorVariable", "Represents a categorical variable (comparable to factors as in R).")] -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionComparisonView.cs
r14185 r14240 131 131 zeroR.Name = "ZeroR Classification Solution"; 132 132 newSolutions.Add(zeroR); 133 var oneR = OneR.CreateOneRSolution(problemData); 134 oneR.Name = "OneR Classification Solution"; 135 newSolutions.Add(oneR); 133 try { 134 var oneR = OneR.CreateOneRSolution(problemData); 135 oneR.Name = "OneR Classification Solution"; 136 newSolutions.Add(oneR); 137 } catch (NotSupportedException) { } catch (ArgumentException) { } 136 138 try { 137 139 var lda = LinearDiscriminantAnalysis.CreateLinearDiscriminantAnalysisSolution(problemData);
Note: See TracChangeset
for help on using the changeset viewer.