- Timestamp:
- 01/08/19 14:59:31 (6 years ago)
- Location:
- branches/2972_PDPRowSelect
- Files:
-
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2972_PDPRowSelect
- Property svn:mergeinfo changed
/trunk (added) merged: 16446,16448-16449,16478,16491,16494,16496,16499,16501-16506,16511-16513
- Property svn:mergeinfo changed
-
branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/trunk/HeuristicLab.Algorithms.DataAnalysis (added) merged: 16448-16449,16491
- Property svn:mergeinfo changed
-
branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:mergeinfo changed
/trunk/HeuristicLab.Algorithms.DataAnalysis/3.4 (added) merged: 16448-16449,16491
- Property svn:mergeinfo changed
-
branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r16389 r16518 41 41 [StorableClass] 42 42 public sealed class LinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { 43 private const string LinearRegressionModelResultName = "Linear regression solution"; 43 private const string SolutionResultName = "Linear regression solution"; 44 private const string ConfidenceSolutionResultName = "Solution with prediction intervals"; 44 45 45 46 [StorableConstructor] … … 62 63 protected override void Run(CancellationToken cancellationToken) { 63 64 double rmsError, cvRmsError; 65 // produce both solutions, to allow symbolic manipulation of LR solutions as well 66 // as the calculation of prediction intervals. 67 // There is no clean way to implement the new model class for LR as a symbolic model. 64 68 var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError); 65 Results.Add(new Result(LinearRegressionModelResultName, "The linear regression solution.", solution)); 69 #pragma warning disable 168, 3021 70 var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError); 71 #pragma warning restore 168, 3021 72 Results.Add(new Result(SolutionResultName, "The linear regression solution.", symbolicSolution)); 73 Results.Add(new Result(ConfidenceSolutionResultName, "Linear regression solution with parameter covariance matrix " + 74 "and calculation of prediction intervals", solution)); 66 75 Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError))); 67 76 Results.Add(new Result("Estimated root mean square error (cross-validation)", "The estimated root of the mean of squared errors of the linear regression solution via cross validation.", new DoubleValue(cvRmsError))); … … 88 97 double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant 89 98 alglib.lrunpack(lm, out coefficients, out nFeatures); 90 91 int nFactorCoeff = factorVariables.Sum(kvp =>kvp.Value.Count());99 100 int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count()); 92 101 int nVarCoeff = doubleVariables.Count(); 93 102 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(), … … 132 141 } 133 142 134 private static void PrepareData(IRegressionProblemData problemData, 135 out double[,] inputMatrix, 136 out IEnumerable<string> doubleVariables, 143 private static void PrepareData(IRegressionProblemData problemData, 144 out double[,] inputMatrix, 145 out IEnumerable<string> doubleVariables, 137 146 out IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables) { 138 147 var dataset = problemData.Dataset; -
branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegressionModel.cs
r16415 r16518 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Drawing; 24 25 using System.Linq; 25 26 using HeuristicLab.Common; … … 35 36 [Item("Linear Regression Model", "Represents a linear regression model.")] 36 37 public sealed class LinearRegressionModel : RegressionModel, IConfidenceRegressionModel { 38 public static new Image StaticItemImage { 39 get { return HeuristicLab.Common.Resources.VSImageLibrary.Function; } 40 } 37 41 38 42 [Storable] … … 49 53 get; private set; 50 54 } 51 55 52 56 public override IEnumerable<string> VariablesUsedForPrediction { 53 get { return allowedInputVariables.Union(factorVariables.Select(f => f.Key)); }57 get { return doubleVariables.Union(factorVariables.Select(f => f.Key)); } 54 58 } 55 59 56 60 [Storable] 57 private string[] allowedInputVariables;61 private string[] doubleVariables; 58 62 [Storable] 59 63 private List<KeyValuePair<string, IEnumerable<string>>> factorVariables; 64 65 /// <summary> 66 /// Enumerable of variable names used by the model including one-hot-encoded of factor variables. 67 /// </summary> 68 public IEnumerable<string> ParameterNames { 69 get { 70 return factorVariables.SelectMany(kvp => kvp.Value.Select(factorVal => $"{kvp.Key}={factorVal}")) 71 .Concat(doubleVariables) 72 .Concat(new[] { "<const>" }); 73 } 74 } 60 75 61 76 [StorableConstructor] … … 69 84 this.NoiseSigma = original.NoiseSigma; 70 85 71 allowedInputVariables = (string[])original.allowedInputVariables.Clone();86 doubleVariables = (string[])original.doubleVariables.Clone(); 72 87 this.factorVariables = original.factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList(); 73 88 } … … 78 93 this.W = new double[w.Length]; 79 94 Array.Copy(w, W, w.Length); 80 this.C = new double[covariance.GetLength(0), covariance.GetLength(1)];95 this.C = new double[covariance.GetLength(0), covariance.GetLength(1)]; 81 96 Array.Copy(covariance, C, covariance.Length); 82 97 this.NoiseSigma = noiseSigma; 83 var stringInputVariables = factorVariables.Select(f => f.Key).Distinct();84 this.allowedInputVariables = doubleInputVariables.ToArray();98 this.doubleVariables = doubleInputVariables.ToArray(); 99 // clone 85 100 this.factorVariables = factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList(); 86 101 } … … 95 110 96 111 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 97 double[,] inputData = dataset.ToArray( allowedInputVariables, rows);112 double[,] inputData = dataset.ToArray(doubleVariables, rows); 98 113 double[,] factorData = dataset.ToArray(factorVariables, rows); 99 114 … … 114 129 115 130 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) { 116 double[,] inputData = dataset.ToArray( allowedInputVariables, rows);131 double[,] inputData = dataset.ToArray(doubleVariables, rows); 117 132 double[,] factorData = dataset.ToArray(factorVariables, rows); 118 133 … … 123 138 124 139 double[] d = new double[C.GetLength(0)]; 125 140 126 141 for (int row = 0; row < n; row++) { 127 142 for (int column = 0; column < columns; column++) { 128 d[column] = inputData[row, column];143 d[column] = inputData[row, column]; 129 144 } 130 145 d[columns] = 1; 131 146 132 147 double var = 0.0; 133 for (int i=0;i<d.Length;i++) {134 for (int j = 0;j<d.Length;j++) {148 for (int i = 0; i < d.Length; i++) { 149 for (int j = 0; j < d.Length; j++) { 135 150 var += d[i] * C[i, j] * d[j]; 136 151 } 137 152 } 138 yield return var + NoiseSigma *NoiseSigma;153 yield return var + NoiseSigma * NoiseSigma; 139 154 } 140 155 } 141 142 156 143 157 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { -
branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs
r15869 r16518 65 65 66 66 var ds = ReduceDataset(dataset, rows); 67 nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues);67 nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, false, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues); 68 68 } 69 69 -
branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r15583 r16518 1 #region License Information1 #region License Information 2 2 /* HeuristicLab 3 3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) … … 42 42 private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution"; 43 43 private const string WeightsParameterName = "Weights"; 44 44 private const string SelfMatchParameterName = "SelfMatch"; 45 45 46 46 #region parameter properties 47 47 public IFixedValueParameter<IntValue> KParameter { 48 48 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 49 } 50 public IFixedValueParameter<BoolValue> SelfMatchParameter { 51 get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; } 49 52 } 50 53 public IValueParameter<DoubleArray> WeightsParameter { … … 53 56 #endregion 54 57 #region properties 58 public bool SelfMatch { 59 get { return SelfMatchParameter.Value.Value; } 60 set { SelfMatchParameter.Value.Value = value; } 61 } 55 62 public int K { 56 63 get { return KParameter.Value.Value; } … … 73 80 public NearestNeighbourClassification() 74 81 : base() { 82 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 75 83 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 76 84 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); … … 83 91 if (!Parameters.ContainsKey(WeightsParameterName)) { 84 92 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 93 } 94 if (!Parameters.ContainsKey(SelfMatchParameterName)) { 95 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 85 96 } 86 97 #endregion … … 95 106 double[] weights = null; 96 107 if (Weights != null) weights = Weights.CloneAsArray(); 97 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights);108 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch, weights); 98 109 Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution)); 99 110 } 100 111 101 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) {112 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 102 113 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 103 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone);114 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch, weights), problemDataClone); 104 115 } 105 116 106 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) {117 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 107 118 return new NearestNeighbourModel(problemData.Dataset, 108 119 problemData.TrainingIndices, 109 120 k, 121 selfMatch, 110 122 problemData.TargetVariable, 111 123 problemData.AllowedInputVariables, -
branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r16243 r16518 1 #region License Information1 #region License Information 2 2 /* HeuristicLab 3 3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) … … 37 37 38 38 private readonly object kdTreeLockObject = new object(); 39 39 40 private alglib.nearestneighbor.kdtree kdTree; 40 41 public alglib.nearestneighbor.kdtree KDTree { … … 49 50 } 50 51 51 52 52 public override IEnumerable<string> VariablesUsedForPrediction { 53 53 get { return allowedInputVariables; } … … 60 60 [Storable] 61 61 private int k; 62 [Storable(DefaultValue = false)] 63 private bool selfMatch; 62 64 [Storable(DefaultValue = null)] 63 65 private double[] weights; // not set for old versions loaded from disk … … 97 99 kdTree.x = (double[])original.kdTree.x.Clone(); 98 100 kdTree.xy = (double[,])original.kdTree.xy.Clone(); 99 101 selfMatch = original.selfMatch; 100 102 k = original.k; 101 103 isCompatibilityLoaded = original.IsCompatibilityLoaded; … … 110 112 this.classValues = (double[])original.classValues.Clone(); 111 113 } 112 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)114 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null) 113 115 : base(targetVariable) { 114 116 Name = ItemName; 115 117 Description = ItemDescription; 118 this.selfMatch = selfMatch; 116 119 this.k = k; 117 120 this.allowedInputVariables = allowedInputVariables.ToArray(); … … 132 135 .Select(name => { 133 136 var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop(); 134 return pop.IsAlmost(0) ? 1.0 : 1.0/pop;137 return pop.IsAlmost(0) ? 1.0 : 1.0 / pop; 135 138 }) 136 139 .Concat(new double[] { 1.0 }) // no scaling for target variable … … 201 204 int numNeighbours; 202 205 lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure 203 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);206 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch); 204 207 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 205 208 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); 206 209 } 207 210 if (selfMatch) { 211 // weights for neighbours are 1/d. 212 // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor. 213 // if all k neighbours are selfmatches then they all have weight 0.01. 214 double minDist = dists[0] + 1; 215 for (int i = 0; i < numNeighbours; i++) { 216 if ((minDist > dists[i]) && (dists[i] != 0)) { 217 minDist = dists[i]; 218 } 219 } 220 minDist /= 100.0; 221 for (int i = 0; i < numNeighbours; i++) { 222 if (dists[i] == 0) { 223 dists[i] = minDist; 224 } 225 } 226 } 208 227 double distanceWeightedValue = 0.0; 209 228 double distsSum = 0.0; … … 238 257 lock (kdTreeLockObject) { 239 258 // gkronber: the following calls change the kdTree data structure 240 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);259 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch); 241 260 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 242 261 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); -
branches/2972_PDPRowSelect/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs
r15583 r16518 41 41 private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution"; 42 42 private const string WeightsParameterName = "Weights"; 43 private const string SelfMatchParameterName = "SelfMatch"; 43 44 44 45 #region parameter properties … … 46 47 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 47 48 } 48 49 public IFixedValueParameter<BoolValue> SelfMatchParameter { 50 get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; } 51 } 49 52 public IValueParameter<DoubleArray> WeightsParameter { 50 53 get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; } … … 59 62 } 60 63 } 61 64 public bool SelfMatch { 65 get { return SelfMatchParameter.Value.Value; } 66 set { SelfMatchParameter.Value.Value = value; } 67 } 62 68 public DoubleArray Weights { 63 69 get { return WeightsParameter.Value; } … … 75 81 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 76 82 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 83 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 77 84 Problem = new RegressionProblem(); 78 85 } … … 84 91 if (!Parameters.ContainsKey(WeightsParameterName)) { 85 92 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 93 } 94 if (!Parameters.ContainsKey(SelfMatchParameterName)) { 95 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 86 96 } 87 97 #endregion … … 96 106 double[] weights = null; 97 107 if (Weights != null) weights = Weights.CloneAsArray(); 98 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights);108 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, SelfMatch, weights); 99 109 Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution)); 100 110 } 101 111 102 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) {112 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 103 113 var clonedProblemData = (IRegressionProblemData)problemData.Clone(); 104 return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData);114 return new NearestNeighbourRegressionSolution(Train(problemData, k, selfMatch, weights), clonedProblemData); 105 115 } 106 116 107 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) {117 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 108 118 return new NearestNeighbourModel(problemData.Dataset, 109 119 problemData.TrainingIndices, 110 120 k, 121 selfMatch, 111 122 problemData.TargetVariable, 112 123 problemData.AllowedInputVariables,
Note: See TracChangeset
for help on using the changeset viewer.