Changeset 16692 for branches/2521_ProblemRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear
- Timestamp:
- 03/18/19 17:24:30 (6 years ago)
- Location:
- branches/2521_ProblemRefactoring
- Files:
-
- 1 deleted
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2521_ProblemRefactoring
- Property svn:ignore
-
old new 24 24 protoc.exe 25 25 obj 26 .vs
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/2521_ProblemRefactoring/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
-
branches/2521_ProblemRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
/stable/HeuristicLab.Algorithms.DataAnalysis/3.4 merged eligible /branches/1721-RandomForestPersistence/HeuristicLab.Algorithms.DataAnalysis/3.4 10321-10322 /branches/Async/HeuristicLab.Algorithms.DataAnalysis/3.4 13329-15286 /branches/Benchmarking/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 6917-7005 /branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4 9070-13099 /branches/CloningRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4 4656-4721 /branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4 5471-5808 /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Algorithms.DataAnalysis/3.4 5815-6180 /branches/DataAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.4 4458-4459,4462,4464 /branches/DataPreprocessing/HeuristicLab.Algorithms.DataAnalysis/3.4 10085-11101 /branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4 6284-6795 /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Algorithms.DataAnalysis/3.4 5060 /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 11570-12508 /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Algorithms.DataAnalysis/3.4 11130-12721 /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4 13819-14091 /branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4 8116-8789 /branches/LogResidualEvaluator/HeuristicLab.Algorithms.DataAnalysis/3.4 10202-10483 /branches/NET40/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 5138-5162 /branches/ParallelEngine/HeuristicLab.Algorithms.DataAnalysis/3.4 5175-5192 /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Algorithms.DataAnalysis/3.4 7773-7810 /branches/QAPAlgorithms/HeuristicLab.Algorithms.DataAnalysis/3.4 6350-6627 /branches/Restructure trunk solution/HeuristicLab.Algorithms.DataAnalysis/3.4 6828 /branches/SpectralKernelForGaussianProcesses/HeuristicLab.Algorithms.DataAnalysis/3.4 10204-10479 /branches/SuccessProgressAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.4 5370-5682 /branches/Trunk/HeuristicLab.Algorithms.DataAnalysis/3.4 6829-6865 /branches/VNS/HeuristicLab.Algorithms.DataAnalysis/3.4 5594-5752 /branches/Weighted TSNE/3.4 15451-15531 /branches/histogram/HeuristicLab.Algorithms.DataAnalysis/3.4 5959-6341 /branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4 14232-14825 /trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 13331-15681
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
-
branches/2521_ProblemRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs
r12509 r16692 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 5Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 36 37 /// Linear discriminant analysis classification algorithm. 37 38 /// </summary> 38 [Item("Linear Discriminant Analysis ", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")]39 [Item("Linear Discriminant Analysis (LDA)", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")] 39 40 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 100)] 40 41 [StorableClass] … … 59 60 60 61 #region Fisher LDA 61 protected override void Run( ) {62 protected override void Run(CancellationToken cancellationToken) { 62 63 var solution = CreateLinearDiscriminantAnalysisSolution(Problem.ProblemData); 63 64 Results.Add(new Result(LinearDiscriminantAnalysisSolutionResultName, "The linear discriminant analysis.", solution)); … … 70 71 IEnumerable<int> rows = problemData.TrainingIndices; 71 72 int nClasses = problemData.ClassNames.Count(); 72 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 73 var doubleVariableNames = allowedInputVariables.Where(dataset.VariableHasType<double>).ToArray(); 74 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>).ToArray(); 75 double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows); 76 77 var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 78 var factorMatrix = dataset.ToArray(factorVariables, rows); 79 80 inputMatrix = factorMatrix.HorzCat(inputMatrix); 81 73 82 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 74 83 throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset."); … … 82 91 int info; 83 92 double[] w; 84 alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), allowedInputVariables.Count(), nClasses, out info, out w);93 alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1) - 1, nClasses, out info, out w); 85 94 if (info < 1) throw new ArgumentException("Error in calculation of linear discriminant analysis solution"); 86 95 87 ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); 88 ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); 89 tree.Root.AddSubtree(startNode); 90 ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); 91 startNode.AddSubtree(addition); 96 var nFactorCoeff = factorMatrix.GetLength(1); 97 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, w.Take(nFactorCoeff).ToArray(), 98 doubleVariableNames, w.Skip(nFactorCoeff).Take(doubleVariableNames.Length).ToArray()); 92 99 93 int col = 0; 94 foreach (string column in allowedInputVariables) { 95 VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); 96 vNode.VariableName = column; 97 vNode.Weight = w[col]; 98 addition.AddSubtree(vNode); 99 col++; 100 } 101 102 var model = LinearDiscriminantAnalysis.CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter(), problemData, rows); 100 var model = CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter(), problemData, rows); 103 101 SymbolicDiscriminantFunctionClassificationSolution solution = new SymbolicDiscriminantFunctionClassificationSolution(model, (IClassificationProblemData)problemData.Clone()); 104 102 … … 111 109 IClassificationProblemData problemData, 112 110 IEnumerable<int> rows) { 113 var model = new SymbolicDiscriminantFunctionClassificationModel( tree, interpreter, new AccuracyMaximizationThresholdCalculator());111 var model = new SymbolicDiscriminantFunctionClassificationModel(problemData.TargetVariable, tree, interpreter, new AccuracyMaximizationThresholdCalculator()); 114 112 model.RecalculateModelParameters(problemData, rows); 115 113 return model; -
branches/2521_ProblemRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r13238 r16692 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 5Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; 27 28 using HeuristicLab.Data; 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;29 29 using HeuristicLab.Optimization; 30 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; … … 60 60 61 61 #region linear regression 62 protected override void Run( ) {62 protected override void Run(CancellationToken cancellationToken) { 63 63 double rmsError, cvRmsError; 64 64 var solution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError); … … 73 73 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 74 74 IEnumerable<int> rows = problemData.TrainingIndices; 75 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 75 var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 76 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 77 var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 78 double[,] binaryMatrix = dataset.ToArray(factorVariables, rows); 79 double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows); 80 var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); 81 76 82 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 77 83 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); … … 91 97 alglib.lrunpack(lm, out coefficients, out nFeatures); 92 98 93 ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); 94 ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); 95 tree.Root.AddSubtree(startNode); 96 ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); 97 startNode.AddSubtree(addition); 98 99 int col = 0; 100 foreach (string column in allowedInputVariables) { 101 VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); 102 vNode.VariableName = column; 103 vNode.Weight = coefficients[col]; 104 addition.AddSubtree(vNode); 105 col++; 106 } 107 108 ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode(); 109 cNode.Value = coefficients[coefficients.Length - 1]; 110 addition.AddSubtree(cNode); 111 112 SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone()); 99 int nFactorCoeff = binaryMatrix.GetLength(1); 100 int nVarCoeff = doubleVariables.Count(); 101 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(), 102 doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(), 103 @const: coefficients[nFeatures]); 104 105 SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone()); 113 106 solution.Model.Name = "Linear Regression Model"; 114 107 solution.Name = "Linear Regression Solution"; -
branches/2521_ProblemRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs
r13238 r16692 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 5Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 57 58 58 59 #region logit classification 59 protected override void Run( ) {60 protected override void Run(CancellationToken cancellationToken) { 60 61 double rmsError, relClassError; 61 62 var solution = CreateLogitClassificationSolution(Problem.ProblemData, out rmsError, out relClassError); … … 68 69 var dataset = problemData.Dataset; 69 70 string targetVariable = problemData.TargetVariable; 70 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 71 var doubleVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<double>); 72 var factorVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<string>); 71 73 IEnumerable<int> rows = problemData.TrainingIndices; 72 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 74 double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows); 75 76 var factorVariableValues = dataset.GetFactorVariableValues(factorVariableNames, rows); 77 var factorMatrix = dataset.ToArray(factorVariableValues, rows); 78 inputMatrix = factorMatrix.HorzCat(inputMatrix); 79 73 80 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 74 81 throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset."); … … 95 102 relClassError = alglib.mnlrelclserror(lm, inputMatrix, nRows); 96 103 97 MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution( (IClassificationProblemData)problemData.Clone(), new MultinomialLogitModel(lm, targetVariable, allowedInputVariables, classValues));104 MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, doubleVariableNames, factorVariableValues, classValues), (IClassificationProblemData)problemData.Clone()); 98 105 return solution; 99 106 } -
branches/2521_ProblemRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassificationSolution.cs
r12012 r16692 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 5Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 43 43 : base(original, cloner) { 44 44 } 45 public MultinomialLogitClassificationSolution( IClassificationProblemData problemData, MultinomialLogitModel logitModel)45 public MultinomialLogitClassificationSolution(MultinomialLogitModel logitModel, IClassificationProblemData problemData) 46 46 : base(logitModel, problemData) { 47 47 } -
branches/2521_ProblemRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs
r12509 r16692 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 5Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 34 34 [StorableClass] 35 35 [Item("Multinomial Logit Model", "Represents a multinomial logit model for classification.")] 36 public sealed class MultinomialLogitModel : NamedItem, IClassificationModel {36 public sealed class MultinomialLogitModel : ClassificationModel { 37 37 38 38 private alglib.logitmodel logitModel; … … 48 48 } 49 49 50 [Storable] 51 private string targetVariable; 50 public override IEnumerable<string> VariablesUsedForPrediction { 51 get { return allowedInputVariables; } 52 } 53 52 54 [Storable] 53 55 private string[] allowedInputVariables; 54 56 [Storable] 55 57 private double[] classValues; 58 [Storable] 59 private List<KeyValuePair<string, IEnumerable<string>>> factorVariables; 60 56 61 [StorableConstructor] 57 62 private MultinomialLogitModel(bool deserializing) … … 64 69 logitModel = new alglib.logitmodel(); 65 70 logitModel.innerobj.w = (double[])original.logitModel.innerobj.w.Clone(); 66 targetVariable = original.targetVariable;67 71 allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 68 72 classValues = (double[])original.classValues.Clone(); 73 this.factorVariables = original.factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList(); 69 74 } 70 public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues)71 : base( ) {75 public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> doubleInputVariables, IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables, double[] classValues) 76 : base(targetVariable) { 72 77 this.name = ItemName; 73 78 this.description = ItemDescription; 74 79 this.logitModel = logitModel; 75 this. targetVariable = targetVariable;76 this. allowedInputVariables = allowedInputVariables.ToArray();80 this.allowedInputVariables = doubleInputVariables.ToArray(); 81 this.factorVariables = factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList(); 77 82 this.classValues = (double[])classValues.Clone(); 83 } 84 85 [StorableHook(HookType.AfterDeserialization)] 86 private void AfterDeserialization() { 87 // BackwardsCompatibility3.3 88 #region Backwards compatible code, remove with 3.4 89 factorVariables = new List<KeyValuePair<string, IEnumerable<string>>>(); 90 #endregion 78 91 } 79 92 … … 82 95 } 83 96 84 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 85 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 97 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 98 99 double[,] inputData = dataset.ToArray(allowedInputVariables, rows); 100 double[,] factorData = dataset.ToArray(factorVariables, rows); 101 102 inputData = factorData.HorzCat(inputData); 86 103 87 104 int n = inputData.GetLength(0); … … 108 125 } 109 126 110 public MultinomialLogitClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 111 return new MultinomialLogitClassificationSolution(new ClassificationProblemData(problemData), this); 112 } 113 IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) { 114 return CreateClassificationSolution(problemData); 127 public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 128 return new MultinomialLogitClassificationSolution(this, new ClassificationProblemData(problemData)); 115 129 } 116 130 … … 135 149 } 136 150 #endregion 151 137 152 } 138 153 } -
branches/2521_ProblemRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs
r12509 r16692 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 5Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 29 29 30 30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 [Obsolete("Use transformation classes in Problems.DataAnalysis instead")] 31 32 [StorableClass] 32 33 [Item(Name = "Scaling", Description = "Contains information about scaling of variables for data-analysis algorithms.")]
Note: See TracChangeset
for help on using the changeset viewer.