- Timestamp:
- 07/13/21 10:55:09 (3 years ago)
- Location:
- branches/3087_Ceres_Integration
- Files:
-
- 22 edited
- 4 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/3087_Ceres_Integration
- Property svn:mergeinfo changed
-
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
-
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:mergeinfo changed
-
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs
r17833 r18006 35 35 using HeuristicLab.Problems.DataAnalysis.Symbolic; 36 36 using HeuristicLab.Random; 37 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; 38 using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification; 37 39 38 40 namespace HeuristicLab.Algorithms.DataAnalysis { … … 338 340 symbolicProblem.FitnessCalculationPartition.End = SamplesEnd.Value; 339 341 } 342 343 // We need to set the estimation limits because they are recalculated by the problem 344 // whenever the data partitions change. 345 // Instead of explicitly handling all types we could also check the parameters-collection 346 // for a parameter with name "EstimationLimits". 347 SetEstimationLimits(problem, new[] { typeof(SymbolicRegressionSingleObjectiveProblem), 348 typeof(SymbolicRegressionMultiObjectiveProblem), 349 typeof(SymbolicClassificationSingleObjectiveProblem), 350 typeof(SymbolicClassificationMultiObjectiveProblem) }); 351 340 352 clonedAlgorithm.Prepare(); 341 353 clonedAlgorithms.Add(clonedAlgorithm); … … 509 521 foreach (KeyValuePair<string, List<IClassificationSolution>> solutions in resultSolutions) { 510 522 // at least one algorithm (GBT with logistic regression loss) produces a classification solution even though the original problem is a regression problem. 511 var targetVariable = solutions.Value.First().ProblemData.TargetVariable;512 523 var dataset = (Dataset)Problem.ProblemData.Dataset; 513 524 if (ShuffleSamples.Value) { … … 516 527 } 517 528 var problemData = (IClassificationProblemData)Problem.ProblemData; 518 var problemDataClone = new ClassificationProblemData(dataset, problemData.AllowedInputVariables, targetVariable);529 var problemDataClone = new ClassificationProblemData(dataset, problemData.AllowedInputVariables, problemData.TargetVariable, problemData.ClassNames, problemData.PositiveClass); 519 530 // set partitions of problem data clone correctly 520 531 problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value; … … 811 822 } 812 823 #endregion 824 825 #region helper 826 827 private void SetEstimationLimits(IDataAnalysisProblem problem, Type[] types) { 828 foreach (var type in types) { 829 if (type.IsAssignableFrom(problem.GetType())) { 830 var originalLimits = (DoubleLimit)Problem.Parameters["EstimationLimits"].ActualValue; // problem is a clone of Problem 831 var limits = (DoubleLimit)problem.Parameters["EstimationLimits"].ActualValue; 832 limits.Lower = originalLimits.Lower; 833 limits.Upper = originalLimits.Upper; 834 } 835 } 836 } 837 838 #endregion 813 839 } 814 840 } -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs
r17180 r18006 321 321 322 322 // cholesky decomposition 323 var res = alglib.trfac.spdmatrixcholesky(ref l, n, false );323 var res = alglib.trfac.spdmatrixcholesky(ref l, n, false, null); 324 324 if (!res) throw new ArgumentException("Matrix is not positive semidefinite"); 325 325 return l; … … 412 412 413 413 // for stddev 414 alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);414 alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, sWKs, 0, 0, null); 415 415 416 416 for (int i = 0; i < newN; i++) { -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs
r17180 r18006 281 281 new AccuracyMaximizationThresholdCalculator()); 282 282 var classificationProblemData = new ClassificationProblemData(problemData.Dataset, 283 problemData.AllowedInputVariables, problemData.TargetVariable, problemData.Transformations);283 problemData.AllowedInputVariables, problemData.TargetVariable, transformations: problemData.Transformations); 284 284 classificationProblemData.TrainingPartition.Start = Problem.ProblemData.TrainingPartition.Start; 285 285 classificationProblemData.TrainingPartition.End = Problem.ProblemData.TrainingPartition.End; -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r17154 r18006 107 107 </PropertyGroup> 108 108 <ItemGroup> 109 <Reference Include="ALGLIB-3.17.0, Version=3.17.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 110 <SpecificVersion>False</SpecificVersion> 111 <HintPath>..\..\bin\ALGLIB-3.17.0.dll</HintPath> 112 <Aliases>global</Aliases> 113 <Private>False</Private> 114 </Reference> 109 115 <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 116 <SpecificVersion>False</SpecificVersion> 110 117 <HintPath>..\..\bin\ALGLIB-3.7.0.dll</HintPath> 118 <Aliases>alglib_3_7</Aliases> 111 119 <Private>False</Private> 112 120 </Reference> … … 133 141 <Compile Include="DoubleArrayExtensions.cs" /> 134 142 <Compile Include="FixedDataAnalysisAlgorithm.cs" /> 143 <Compile Include="GAM\GeneralizedAdditiveModelAlgorithm.cs" /> 144 <Compile Include="GAM\Spline1dModel.cs" /> 135 145 <Compile Include="GaussianProcess\CovarianceFunctions\CovarianceSpectralMixture.cs" /> 136 146 <Compile Include="GaussianProcess\CovarianceFunctions\CovariancePiecewisePolynomial.cs" /> … … 284 294 <Compile Include="NearestNeighbour\NearestNeighbourClassificationSolution.cs" /> 285 295 <Compile Include="NearestNeighbour\NearestNeighbourModel.cs" /> 296 <Compile Include="NearestNeighbour\NearestNeighbourModelAlglib_3_7.cs" /> 286 297 <Compile Include="NearestNeighbour\NearestNeighbourRegression.cs" /> 287 298 <Compile Include="NearestNeighbour\NearestNeighbourRegressionSolution.cs" /> … … 294 305 <Compile Include="NeuralNetwork\NeuralNetworkClassificationSolution.cs" /> 295 306 <Compile Include="NeuralNetwork\NeuralNetworkModel.cs" /> 307 <Compile Include="NeuralNetwork\NeuralNetworkModelAlglib_3_7.cs" /> 296 308 <Compile Include="NeuralNetwork\NeuralNetworkRegression.cs" /> 297 309 <Compile Include="NeuralNetwork\NeuralNetworkRegressionSolution.cs" /> … … 303 315 <Compile Include="RandomForest\RandomForestClassification.cs" /> 304 316 <Compile Include="RandomForest\RandomForestModel.cs" /> 317 <Compile Include="RandomForest\RandomForestModelAlglib_3_7.cs" /> 305 318 <Compile Include="RandomForest\RandomForestModelFull.cs" /> 306 319 <Compile Include="RandomForest\RandomForestRegression.cs" /> -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelRidgeRegressionModel.cs
r17180 r18006 108 108 } 109 109 // cholesky decomposition 110 var res = alglib.trfac.spdmatrixcholesky(ref l, n, false );110 var res = alglib.trfac.spdmatrixcholesky(ref l, n, false, null); 111 111 if (res == false) { //try lua decomposition if cholesky faild 112 112 int[] pivots; -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs
r17180 r18006 65 65 66 66 var ds = ReduceDataset(dataset, rows); 67 nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, false, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues); 67 // the new implementation of kNN uses selfmatch=true by default 68 nnModel = new NearestNeighbourModelAlglib_3_7(ds, Enumerable.Range(0, ds.Rows), k, false, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues); 68 69 } 69 70 -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r17180 r18006 42 42 private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution"; 43 43 private const string WeightsParameterName = "Weights"; 44 private const string SelfMatchParameterName = "SelfMatch";45 44 46 45 #region parameter properties 47 46 public IFixedValueParameter<IntValue> KParameter { 48 47 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 49 }50 public IFixedValueParameter<BoolValue> SelfMatchParameter {51 get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }52 48 } 53 49 public IValueParameter<DoubleArray> WeightsParameter { … … 56 52 #endregion 57 53 #region properties 58 public bool SelfMatch {59 get { return SelfMatchParameter.Value.Value; }60 set { SelfMatchParameter.Value.Value = value; }61 }62 54 public int K { 63 55 get { return KParameter.Value.Value; } … … 80 72 public NearestNeighbourClassification() 81 73 : base() { 82 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));83 74 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 84 75 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); … … 91 82 if (!Parameters.ContainsKey(WeightsParameterName)) { 92 83 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 93 }94 if (!Parameters.ContainsKey(SelfMatchParameterName)) {95 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));96 84 } 97 85 #endregion … … 106 94 double[] weights = null; 107 95 if (Weights != null) weights = Weights.CloneAsArray(); 108 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch,weights);96 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights); 109 97 Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution)); 110 98 } 111 99 112 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false,double[] weights = null) {100 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) { 113 101 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 114 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch,weights), problemDataClone);102 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone); 115 103 } 116 104 117 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false,double[] weights = null) {105 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) { 118 106 return new NearestNeighbourModel(problemData.Dataset, 119 107 problemData.TrainingIndices, 120 108 k, 121 selfMatch,122 109 problemData.TargetVariable, 123 110 problemData.AllowedInputVariables, -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r17180 r18006 32 32 /// Represents a nearest neighbour model for regression and classification 33 33 /// </summary> 34 [StorableType(" A76C0823-3077-4ACE-8A40-E9B717C7DB60")]34 [StorableType("04A07DF6-6EB5-4D29-B7AE-5BE204CAF6BC")] 35 35 [Item("NearestNeighbourModel", "Represents a nearest neighbour model for regression and classification.")] 36 36 public sealed class NearestNeighbourModel : ClassificationModel, INearestNeighbourModel { 37 37 38 private readonly object kdTreeLockObject = new object(); 39 40 private alglib.nearestneighbor.kdtree kdTree; 41 public alglib.nearestneighbor.kdtree KDTree { 42 get { return kdTree; } 43 set { 44 if (value != kdTree) { 45 if (value == null) throw new ArgumentNullException(); 46 kdTree = value; 47 OnChanged(EventArgs.Empty); 48 } 49 } 38 private alglib.knnmodel model; 39 [Storable] 40 private string SerializedModel { 41 get { alglib.knnserialize(model, out var ser); return ser; } 42 set { if (value != null) alglib.knnunserialize(value, out model); } 50 43 } 51 44 … … 60 53 [Storable] 61 54 private int k; 62 [Storable(DefaultValue = false)] 63 private bool selfMatch; 64 [Storable(DefaultValue = null)] 65 private double[] weights; // not set for old versions loaded from disk 66 [Storable(DefaultValue = null)] 67 private double[] offsets; // not set for old versions loaded from disk 55 [Storable] 56 private double[] weights; 57 [Storable] 58 private double[] offsets; 68 59 69 60 [StorableConstructor] 70 private NearestNeighbourModel(StorableConstructorFlag _) : base(_) { 71 kdTree = new alglib.nearestneighbor.kdtree(); 72 } 61 private NearestNeighbourModel(StorableConstructorFlag _) : base(_) { } 73 62 private NearestNeighbourModel(NearestNeighbourModel original, Cloner cloner) 74 63 : base(original, cloner) { 75 kdTree = new alglib.nearestneighbor.kdtree(); 76 kdTree.approxf = original.kdTree.approxf; 77 kdTree.boxmax = (double[])original.kdTree.boxmax.Clone(); 78 kdTree.boxmin = (double[])original.kdTree.boxmin.Clone(); 79 kdTree.buf = (double[])original.kdTree.buf.Clone(); 80 kdTree.curboxmax = (double[])original.kdTree.curboxmax.Clone(); 81 kdTree.curboxmin = (double[])original.kdTree.curboxmin.Clone(); 82 kdTree.curdist = original.kdTree.curdist; 83 kdTree.debugcounter = original.kdTree.debugcounter; 84 kdTree.idx = (int[])original.kdTree.idx.Clone(); 85 kdTree.kcur = original.kdTree.kcur; 86 kdTree.kneeded = original.kdTree.kneeded; 87 kdTree.n = original.kdTree.n; 88 kdTree.nodes = (int[])original.kdTree.nodes.Clone(); 89 kdTree.normtype = original.kdTree.normtype; 90 kdTree.nx = original.kdTree.nx; 91 kdTree.ny = original.kdTree.ny; 92 kdTree.r = (double[])original.kdTree.r.Clone(); 93 kdTree.rneeded = original.kdTree.rneeded; 94 kdTree.selfmatch = original.kdTree.selfmatch; 95 kdTree.splits = (double[])original.kdTree.splits.Clone(); 96 kdTree.tags = (int[])original.kdTree.tags.Clone(); 97 kdTree.x = (double[])original.kdTree.x.Clone(); 98 kdTree.xy = (double[,])original.kdTree.xy.Clone(); 99 selfMatch = original.selfMatch; 64 if (original.model != null) 65 model = (alglib.knnmodel)original.model.make_copy(); 100 66 k = original.k; 101 isCompatibilityLoaded = original.IsCompatibilityLoaded; 102 if (!IsCompatibilityLoaded) { 103 weights = new double[original.weights.Length]; 104 Array.Copy(original.weights, weights, weights.Length); 105 offsets = new double[original.offsets.Length]; 106 Array.Copy(original.offsets, this.offsets, this.offsets.Length); 107 } 67 weights = new double[original.weights.Length]; 68 Array.Copy(original.weights, weights, weights.Length); 69 offsets = new double[original.offsets.Length]; 70 Array.Copy(original.offsets, this.offsets, this.offsets.Length); 71 108 72 allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 109 73 if (original.classValues != null) 110 74 this.classValues = (double[])original.classValues.Clone(); 111 75 } 112 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch,string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)76 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null) 113 77 : base(targetVariable) { 114 78 Name = ItemName; 115 79 Description = ItemDescription; 116 this.selfMatch = selfMatch;117 80 this.k = k; 118 81 this.allowedInputVariables = allowedInputVariables.ToArray(); 119 82 double[,] inputMatrix; 120 if (IsCompatibilityLoaded) { 121 // no scaling 122 inputMatrix = dataset.ToArray( 123 this.allowedInputVariables.Concat(new string[] { targetVariable }), 124 rows); 83 this.offsets = this.allowedInputVariables 84 .Select(name => dataset.GetDoubleValues(name, rows).Average() * -1) 85 .Concat(new double[] { 0 }) // no offset for target variable 86 .ToArray(); 87 if (weights == null) { 88 // automatic determination of weights (all features should have variance = 1) 89 this.weights = this.allowedInputVariables 90 .Select(name => { 91 var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop(); 92 return pop.IsAlmost(0) ? 1.0 : 1.0 / pop; 93 }) 94 .Concat(new double[] { 1.0 }) // no scaling for target variable 95 .ToArray(); 125 96 } else { 126 this.offsets = this.allowedInputVariables 127 .Select(name => dataset.GetDoubleValues(name, rows).Average() * -1) 128 .Concat(new double[] { 0 }) // no offset for target variable 129 .ToArray(); 130 if (weights == null) { 131 // automatic determination of weights (all features should have variance = 1) 132 this.weights = this.allowedInputVariables 133 .Select(name => { 134 var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop(); 135 return pop.IsAlmost(0) ? 1.0 : 1.0 / pop; 136 }) 137 .Concat(new double[] { 1.0 }) // no scaling for target variable 138 .ToArray(); 139 } else { 140 // user specified weights (+ 1 for target) 141 this.weights = weights.Concat(new double[] { 1.0 }).ToArray(); 142 if (this.weights.Length - 1 != this.allowedInputVariables.Length) 143 throw new ArgumentException("The number of elements in the weight vector must match the number of input variables"); 144 } 145 inputMatrix = CreateScaledData(dataset, this.allowedInputVariables.Concat(new string[] { targetVariable }), rows, this.offsets, this.weights); 146 } 97 // user specified weights (+ 1 for target) 98 this.weights = weights.Concat(new double[] { 1.0 }).ToArray(); 99 if (this.weights.Length - 1 != this.allowedInputVariables.Length) 100 throw new ArgumentException("The number of elements in the weight vector must match the number of input variables"); 101 } 102 inputMatrix = CreateScaledData(dataset, this.allowedInputVariables.Concat(new string[] { targetVariable }), rows, this.offsets, this.weights); 147 103 148 104 if (inputMatrix.ContainsNanOrInfinity()) 149 105 throw new NotSupportedException( 150 106 "Nearest neighbour model does not support NaN or infinity values in the input dataset."); 151 152 this.kdTree = new alglib.nearestneighbor.kdtree();153 107 154 108 var nRows = inputMatrix.GetLength(0); … … 167 121 } 168 122 } 169 alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdTree); 123 124 alglib.knnbuildercreate(out var knnbuilder); 125 if (classValues == null) { 126 alglib.knnbuildersetdatasetreg(knnbuilder, inputMatrix, nRows, nFeatures, nout: 1); 127 } else { 128 alglib.knnbuildersetdatasetcls(knnbuilder, inputMatrix, nRows, nFeatures, classValues.Length); 129 } 130 alglib.knnbuilderbuildknnmodel(knnbuilder, k, 0.0, out model, out var report); // eps=0 (exact k-nn search is performed) 131 170 132 } 171 133 … … 184 146 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 185 147 double[,] inputData; 186 if (IsCompatibilityLoaded) { 187 inputData = dataset.ToArray(allowedInputVariables, rows); 188 } else { 189 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); 190 } 148 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); 191 149 192 150 int n = inputData.GetLength(0); 193 151 int columns = inputData.GetLength(1); 194 152 double[] x = new double[columns]; 195 double[] dists = new double[k]; 196 double[,] neighbours = new double[k, columns + 1];197 153 154 alglib.knncreatebuffer(model, out var buf); 155 var y = new double[1]; 198 156 for (int row = 0; row < n; row++) { 199 157 for (int column = 0; column < columns; column++) { 200 158 x[column] = inputData[row, column]; 201 159 } 202 int numNeighbours; 203 lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure 204 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch); 205 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 206 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); 207 } 208 if (selfMatch) { 209 // weights for neighbours are 1/d. 210 // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor. 211 // if all k neighbours are selfmatches then they all have weight 0.01. 212 double minDist = dists[0] + 1; 213 for (int i = 0; i < numNeighbours; i++) { 214 if ((minDist > dists[i]) && (dists[i] != 0)) { 215 minDist = dists[i]; 216 } 217 } 218 minDist /= 100.0; 219 for (int i = 0; i < numNeighbours; i++) { 220 if (dists[i] == 0) { 221 dists[i] = minDist; 222 } 223 } 224 } 225 double distanceWeightedValue = 0.0; 226 double distsSum = 0.0; 227 for (int i = 0; i < numNeighbours; i++) { 228 distanceWeightedValue += neighbours[i, columns] / dists[i]; 229 distsSum += 1.0 / dists[i]; 230 } 231 yield return distanceWeightedValue / distsSum; 160 alglib.knntsprocess(model, buf, x, ref y); // thread-safe process 161 yield return y[0]; 232 162 } 233 163 } … … 236 166 if (classValues == null) throw new InvalidOperationException("No class values are defined."); 237 167 double[,] inputData; 238 if (IsCompatibilityLoaded) { 239 inputData = dataset.ToArray(allowedInputVariables, rows); 240 } else { 241 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); 242 } 168 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); 169 243 170 int n = inputData.GetLength(0); 244 171 int columns = inputData.GetLength(1); 245 172 double[] x = new double[columns]; 246 int[] y = new int[classValues.Length]; 247 double[] dists = new double[k]; 248 double[,] neighbours = new double[k, columns + 1]; 249 173 174 alglib.knncreatebuffer(model, out var buf); 175 var y = new double[classValues.Length]; 250 176 for (int row = 0; row < n; row++) { 251 177 for (int column = 0; column < columns; column++) { 252 178 x[column] = inputData[row, column]; 253 179 } 254 int numNeighbours; 255 lock (kdTreeLockObject) { 256 // gkronber: the following calls change the kdTree data structure 257 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch); 258 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 259 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); 260 } 261 Array.Clear(y, 0, y.Length); 262 for (int i = 0; i < numNeighbours; i++) { 263 int classValue = (int)Math.Round(neighbours[i, columns]); 264 y[classValue]++; 265 } 266 267 // find class for with the largest probability value 268 int maxProbClassIndex = 0; 269 double maxProb = y[0]; 270 for (int i = 1; i < y.Length; i++) { 271 if (maxProb < y[i]) { 272 maxProb = y[i]; 273 maxProbClassIndex = i; 274 } 275 } 276 yield return classValues[maxProbClassIndex]; 180 alglib.knntsprocess(model, buf, x, ref y); // thread-safe process 181 // find most probably class 182 var maxC = 0; 183 for (int i = 1; i < y.Length; i++) 184 if (maxC < y[i]) maxC = i; 185 yield return classValues[maxC]; 277 186 } 278 187 } … … 303 212 return new NearestNeighbourClassificationSolution(this, new ClassificationProblemData(problemData)); 304 213 } 305 306 #region events307 public event EventHandler Changed;308 private void OnChanged(EventArgs e) {309 var handlers = Changed;310 if (handlers != null)311 handlers(this, e);312 }313 #endregion314 315 316 // BackwardsCompatibility3.3317 #region Backwards compatible code, remove with 3.4318 319 private bool isCompatibilityLoaded = false; // new kNN models have the value false, kNN models loaded from disc have the value true320 [Storable(DefaultValue = true)]321 public bool IsCompatibilityLoaded {322 get { return isCompatibilityLoaded; }323 set { isCompatibilityLoaded = value; }324 }325 #endregion326 #region persistence327 [Storable]328 public double KDTreeApproxF {329 get { return kdTree.approxf; }330 set { kdTree.approxf = value; }331 }332 [Storable]333 public double[] KDTreeBoxMax {334 get { return kdTree.boxmax; }335 set { kdTree.boxmax = value; }336 }337 [Storable]338 public double[] KDTreeBoxMin {339 get { return kdTree.boxmin; }340 set { kdTree.boxmin = value; }341 }342 [Storable]343 public double[] KDTreeBuf {344 get { return kdTree.buf; }345 set { kdTree.buf = value; }346 }347 [Storable]348 public double[] KDTreeCurBoxMax {349 get { return kdTree.curboxmax; }350 set { kdTree.curboxmax = value; }351 }352 [Storable]353 public double[] KDTreeCurBoxMin {354 get { return kdTree.curboxmin; }355 set { kdTree.curboxmin = value; }356 }357 [Storable]358 public double KDTreeCurDist {359 get { return kdTree.curdist; }360 set { kdTree.curdist = value; }361 }362 [Storable]363 public int KDTreeDebugCounter {364 get { return kdTree.debugcounter; }365 set { kdTree.debugcounter = value; }366 }367 [Storable]368 public int[] KDTreeIdx {369 get { return kdTree.idx; }370 set { kdTree.idx = value; }371 }372 [Storable]373 public int KDTreeKCur {374 get { return kdTree.kcur; }375 set { kdTree.kcur = value; }376 }377 [Storable]378 public int KDTreeKNeeded {379 get { return kdTree.kneeded; }380 set { kdTree.kneeded = value; }381 }382 [Storable]383 public int KDTreeN {384 get { return kdTree.n; }385 set { kdTree.n = value; }386 }387 [Storable]388 public int[] KDTreeNodes {389 get { return kdTree.nodes; }390 set { kdTree.nodes = value; }391 }392 [Storable]393 public int KDTreeNormType {394 get { return kdTree.normtype; }395 set { kdTree.normtype = value; }396 }397 [Storable]398 public int KDTreeNX {399 get { return kdTree.nx; }400 set { kdTree.nx = value; }401 }402 [Storable]403 public int KDTreeNY {404 get { return kdTree.ny; }405 set { kdTree.ny = value; }406 }407 [Storable]408 public double[] KDTreeR {409 get { return kdTree.r; }410 set { kdTree.r = value; }411 }412 [Storable]413 public double KDTreeRNeeded {414 get { return kdTree.rneeded; }415 set { kdTree.rneeded = value; }416 }417 [Storable]418 public bool KDTreeSelfMatch {419 get { return kdTree.selfmatch; }420 set { kdTree.selfmatch = value; }421 }422 [Storable]423 public double[] KDTreeSplits {424 get { return kdTree.splits; }425 set { kdTree.splits = value; }426 }427 [Storable]428 public int[] KDTreeTags {429 get { return kdTree.tags; }430 set { kdTree.tags = value; }431 }432 [Storable]433 public double[] KDTreeX {434 get { return kdTree.x; }435 set { kdTree.x = value; }436 }437 [Storable]438 public double[,] KDTreeXY {439 get { return kdTree.xy; }440 set { kdTree.xy = value; }441 }442 #endregion443 214 } 444 215 } -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs
r17180 r18006 41 41 private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution"; 42 42 private const string WeightsParameterName = "Weights"; 43 private const string SelfMatchParameterName = "SelfMatch";44 43 45 44 #region parameter properties 46 45 public IFixedValueParameter<IntValue> KParameter { 47 46 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 48 }49 public IFixedValueParameter<BoolValue> SelfMatchParameter {50 get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }51 47 } 52 48 public IValueParameter<DoubleArray> WeightsParameter { … … 61 57 else KParameter.Value.Value = value; 62 58 } 63 }64 public bool SelfMatch {65 get { return SelfMatchParameter.Value.Value; }66 set { SelfMatchParameter.Value.Value = value; }67 59 } 68 60 public DoubleArray Weights { … … 81 73 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 82 74 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 83 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));84 75 Problem = new RegressionProblem(); 85 76 } … … 91 82 if (!Parameters.ContainsKey(WeightsParameterName)) { 92 83 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 93 }94 if (!Parameters.ContainsKey(SelfMatchParameterName)) {95 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));96 84 } 97 85 #endregion … … 106 94 double[] weights = null; 107 95 if (Weights != null) weights = Weights.CloneAsArray(); 108 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, SelfMatch,weights);96 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights); 109 97 Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution)); 110 98 } 111 99 112 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, bool selfMatch = false,double[] weights = null) {100 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) { 113 101 var clonedProblemData = (IRegressionProblemData)problemData.Clone(); 114 return new NearestNeighbourRegressionSolution(Train(problemData, k, selfMatch,weights), clonedProblemData);102 return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData); 115 103 } 116 104 117 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, bool selfMatch = false,double[] weights = null) {105 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) { 118 106 return new NearestNeighbourModel(problemData.Dataset, 119 107 problemData.TrainingIndices, 120 108 k, 121 selfMatch,122 109 problemData.TargetVariable, 123 110 problemData.AllowedInputVariables, -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs
r17180 r18006 209 209 alglib.mlpcreatec2(allowedInputVariables.Count(), nHiddenNodes1, nHiddenNodes2, nClasses, out multiLayerPerceptron); 210 210 } else throw new ArgumentException("Number of layers must be zero, one, or two.", "nLayers"); 211 211 212 alglib.mlpreport rep; 212 213 213 int info; 214 214 // using mlptrainlm instead of mlptraines or mlptrainbfgs because only one parameter is necessary -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs
r17180 r18006 20 20 #endregion 21 21 22 extern alias alglib_3_7; 22 23 using System; 23 24 using System.Collections.Generic; … … 32 33 /// Represents a neural network model for regression and classification 33 34 /// </summary> 34 [StorableType(" AEB9B960-FCA6-4A6D-BD5F-27BCE9CC5BEA")]35 [StorableType("DABDBD64-E93B-4F50-A343-C8A92C1C48A4")] 35 36 [Item("NeuralNetworkModel", "Represents a neural network for regression and classification.")] 36 37 public sealed class NeuralNetworkModel : ClassificationModel, INeuralNetworkModel { 37 38 38 39 private object mlpLocker = new object(); 40 41 42 39 43 private alglib.multilayerperceptron multiLayerPerceptron; 44 [Storable] 45 private string SerializedMultiLayerPerceptron { 46 get { alglib.mlpserialize(multiLayerPerceptron, out var ser); return ser; } 47 set { if (value != null) alglib.mlpunserialize(value, out multiLayerPerceptron); } 48 } 40 49 41 50 public override IEnumerable<string> VariablesUsedForPrediction { … … 48 57 private double[] classValues; 49 58 [StorableConstructor] 50 private NeuralNetworkModel(StorableConstructorFlag _) : base(_) { 51 multiLayerPerceptron = new alglib.multilayerperceptron(); 52 } 59 private NeuralNetworkModel(StorableConstructorFlag _) : base(_) { } 53 60 private NeuralNetworkModel(NeuralNetworkModel original, Cloner cloner) 54 61 : base(original, cloner) { 55 multiLayerPerceptron = new alglib.multilayerperceptron(); 56 multiLayerPerceptron.innerobj.chunks = (double[,])original.multiLayerPerceptron.innerobj.chunks.Clone(); 57 multiLayerPerceptron.innerobj.columnmeans = (double[])original.multiLayerPerceptron.innerobj.columnmeans.Clone(); 58 multiLayerPerceptron.innerobj.columnsigmas = (double[])original.multiLayerPerceptron.innerobj.columnsigmas.Clone(); 59 multiLayerPerceptron.innerobj.derror = (double[])original.multiLayerPerceptron.innerobj.derror.Clone(); 60 multiLayerPerceptron.innerobj.dfdnet = (double[])original.multiLayerPerceptron.innerobj.dfdnet.Clone(); 61 multiLayerPerceptron.innerobj.neurons = (double[])original.multiLayerPerceptron.innerobj.neurons.Clone(); 62 multiLayerPerceptron.innerobj.nwbuf = (double[])original.multiLayerPerceptron.innerobj.nwbuf.Clone(); 63 multiLayerPerceptron.innerobj.structinfo = (int[])original.multiLayerPerceptron.innerobj.structinfo.Clone(); 64 multiLayerPerceptron.innerobj.weights = (double[])original.multiLayerPerceptron.innerobj.weights.Clone(); 65 multiLayerPerceptron.innerobj.x = (double[])original.multiLayerPerceptron.innerobj.x.Clone(); 66 multiLayerPerceptron.innerobj.y = (double[])original.multiLayerPerceptron.innerobj.y.Clone(); 62 if (original.multiLayerPerceptron != null) 63 multiLayerPerceptron = (alglib.multilayerperceptron)original.multiLayerPerceptron.make_copy(); 67 64 allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 68 65 if (original.classValues != null) … … 73 70 this.name = ItemName; 74 71 this.description = ItemDescription; 75 this.multiLayerPerceptron = multiLayerPerceptron;72 this.multiLayerPerceptron = (alglib.multilayerperceptron)multiLayerPerceptron.make_copy(); 76 73 this.allowedInputVariables = allowedInputVariables.ToArray(); 77 74 if (classValues != null) … … 95 92 x[column] = inputData[row, column]; 96 93 } 97 // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-sa ve!94 // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-safe! 98 95 lock (mlpLocker) { 99 96 alglib.mlpprocess(multiLayerPerceptron, x, ref y); … … 115 112 x[column] = inputData[row, column]; 116 113 } 117 // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-sa ve!114 // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-safe! 118 115 lock (mlpLocker) { 119 116 alglib.mlpprocess(multiLayerPerceptron, x, ref y); … … 156 153 return new NeuralNetworkClassificationSolution(this, new ClassificationProblemData(problemData)); 157 154 } 158 159 #region persistence160 [Storable]161 private double[,] MultiLayerPerceptronChunks {162 get {163 return multiLayerPerceptron.innerobj.chunks;164 }165 set {166 multiLayerPerceptron.innerobj.chunks = value;167 }168 }169 [Storable]170 private double[] MultiLayerPerceptronColumnMeans {171 get {172 return multiLayerPerceptron.innerobj.columnmeans;173 }174 set {175 multiLayerPerceptron.innerobj.columnmeans = value;176 }177 }178 [Storable]179 private double[] MultiLayerPerceptronColumnSigmas {180 get {181 return multiLayerPerceptron.innerobj.columnsigmas;182 }183 set {184 multiLayerPerceptron.innerobj.columnsigmas = value;185 }186 }187 [Storable]188 private double[] MultiLayerPerceptronDError {189 get {190 return multiLayerPerceptron.innerobj.derror;191 }192 set {193 multiLayerPerceptron.innerobj.derror = value;194 }195 }196 [Storable]197 private double[] MultiLayerPerceptronDfdnet {198 get {199 return multiLayerPerceptron.innerobj.dfdnet;200 }201 set {202 multiLayerPerceptron.innerobj.dfdnet = value;203 }204 }205 [Storable]206 private double[] MultiLayerPerceptronNeurons {207 get {208 return multiLayerPerceptron.innerobj.neurons;209 }210 set {211 multiLayerPerceptron.innerobj.neurons = value;212 }213 }214 [Storable]215 private double[] MultiLayerPerceptronNwbuf {216 get {217 return multiLayerPerceptron.innerobj.nwbuf;218 }219 set {220 multiLayerPerceptron.innerobj.nwbuf = value;221 }222 }223 [Storable]224 private int[] MultiLayerPerceptronStuctinfo {225 get {226 return multiLayerPerceptron.innerobj.structinfo;227 }228 set {229 multiLayerPerceptron.innerobj.structinfo = value;230 }231 }232 [Storable]233 private double[] MultiLayerPerceptronWeights {234 get {235 return multiLayerPerceptron.innerobj.weights;236 }237 set {238 multiLayerPerceptron.innerobj.weights = value;239 }240 }241 [Storable]242 private double[] MultiLayerPerceptronX {243 get {244 return multiLayerPerceptron.innerobj.x;245 }246 set {247 multiLayerPerceptron.innerobj.x = value;248 }249 }250 [Storable]251 private double[] MultiLayerPerceptronY {252 get {253 return multiLayerPerceptron.innerobj.y;254 }255 set {256 multiLayerPerceptron.innerobj.y = value;257 }258 }259 #endregion260 155 } 261 156 } -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs
r17180 r18006 186 186 IEnumerable<int> rows = problemData.TrainingIndices; 187 187 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 188 int nRows = inputMatrix.GetLength(0); 188 189 if (inputMatrix.ContainsNanOrInfinity()) 189 190 throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset."); … … 197 198 alglib.mlpcreate2(allowedInputVariables.Count(), nHiddenNodes1, nHiddenNodes2, 1, out multiLayerPerceptron); 198 199 } else throw new ArgumentException("Number of layers must be zero, one, or two.", "nLayers"); 199 alglib.mlpreport rep;200 int nRows = inputMatrix.GetLength(0);201 200 202 201 int info; 203 202 // using mlptrainlm instead of mlptraines or mlptrainbfgs because only one parameter is necessary 204 alglib.mlptrainlm(multiLayerPerceptron, inputMatrix, nRows, decay, restarts, out info, out rep);203 alglib.mlptrainlm(multiLayerPerceptron, inputMatrix, nRows, decay, restarts, out info, out _); 205 204 if (info != 2) throw new ArgumentException("Error in calculation of neural network regression solution"); 206 205 -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/Plugin.cs.frame
r17184 r18006 29 29 [PluginFile("HeuristicLab.Algorithms.DataAnalysis-3.4.dll", PluginFileType.Assembly)] 30 30 [PluginDependency("HeuristicLab.ALGLIB", "3.7.0")] 31 [PluginDependency("HeuristicLab.ALGLIB", "3.17")] 31 32 [PluginDependency("HeuristicLab.Algorithms.OffspringSelectionGeneticAlgorithm", "3.3")] // for GBM 32 33 [PluginDependency("HeuristicLab.Algorithms.GradientDescent", "3.3")] -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs
r17180 r18006 207 207 inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]]; 208 208 } 209 210 alglib.dfreport rep; 211 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep); 209 210 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out var rep); 212 211 213 212 rmsError = rep.rmserror; … … 216 215 outOfBagRelClassificationError = rep.oobrelclserror; 217 216 218 return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables, classValues);217 return new RandomForestModelFull(dForest, nTrees, problemData.TargetVariable, problemData.AllowedInputVariables, classValues); 219 218 } 220 219 #endregion -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r17180 r18006 20 20 #endregion 21 21 22 extern alias alglib_3_7; 23 22 24 using System; 23 25 using System.Collections.Generic; … … 39 41 public sealed class RandomForestModel : ClassificationModel, IRandomForestModel { 40 42 // not persisted 41 private alglib .decisionforest randomForest;42 private alglib .decisionforest RandomForest {43 private alglib_3_7.alglib.decisionforest randomForest; 44 private alglib_3_7.alglib.decisionforest RandomForest { 43 45 get { 44 46 // recalculate lazily … … 74 76 private RandomForestModel(StorableConstructorFlag _) : base(_) { 75 77 // for backwards compatibility (loading old solutions) 76 randomForest = new alglib .decisionforest();78 randomForest = new alglib_3_7.alglib.decisionforest(); 77 79 } 78 80 private RandomForestModel(RandomForestModel original, Cloner cloner) 79 81 : base(original, cloner) { 80 randomForest = new alglib .decisionforest();82 randomForest = new alglib_3_7.alglib.decisionforest(); 81 83 randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize; 82 84 randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses; … … 100 102 101 103 // random forest models can only be created through the static factory methods CreateRegressionModel and CreateClassificationModel 102 private RandomForestModel(string targetVariable, alglib .decisionforest randomForest,104 private RandomForestModel(string targetVariable, alglib_3_7.alglib.decisionforest randomForest, 103 105 int seed, IDataAnalysisProblemData originalTrainingData, 104 106 int nTrees, double r, double m, double[] classValues = null) … … 151 153 x[column] = inputData[row, column]; 152 154 } 153 alglib .dfprocess(RandomForest, x, ref y);155 alglib_3_7.alglib.dfprocess(RandomForest, x, ref y); 154 156 yield return y[0]; 155 157 } … … 169 171 x[column] = inputData[row, column]; 170 172 } 171 alglib .dforest.dfprocessraw(RandomForest.innerobj, x, ref ys);173 alglib_3_7.alglib.dforest.dfprocessraw(RandomForest.innerobj, x, ref ys); 172 174 yield return ys.VariancePop(); 173 175 } … … 187 189 x[column] = inputData[row, column]; 188 190 } 189 alglib .dfprocess(randomForest, x, ref y);191 alglib_3_7.alglib.dfprocess(randomForest, x, ref y); 190 192 // find class for with the largest probability value 191 193 int maxProbClassIndex = 0; … … 315 317 double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices); 316 318 317 alglib.dfreport rep; 318 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep); 319 var dForest = RandomForestUtil.CreateRandomForestModelAlglib_3_7(seed, inputMatrix, nTrees, r, m, 1, out var rep); 319 320 320 321 rmsError = rep.rmserror; … … 353 354 } 354 355 355 alglib.dfreport rep; 356 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep); 356 var dForest = RandomForestUtil.CreateRandomForestModelAlglib_3_7(seed, inputMatrix, nTrees, r, m, nClasses, out var rep); 357 357 358 358 rmsError = rep.rmserror; -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModelFull.cs
r17180 r18006 31 31 32 32 namespace HeuristicLab.Algorithms.DataAnalysis { 33 [StorableType(" 9C797DF0-1169-4381-A732-6DAB90802839")]33 [StorableType("55412E08-DAD4-4C2E-9181-C142E7EA9474")] 34 34 [Item("RandomForestModelFull", "Represents a random forest for regression and classification.")] 35 35 public sealed class RandomForestModelFull : ClassificationModel, IRandomForestModel { … … 42 42 private double[] classValues; 43 43 44 public int NumClasses => classValues == null ? 0 : classValues.Length; 45 44 46 [Storable] 45 47 private string[] inputVariables; 46 48 49 [Storable] 47 50 public int NumberOfTrees { 48 get { return RandomForestNTrees; }51 get; private set; 49 52 } 50 53 … … 53 56 54 57 [Storable] 55 private int RandomForestBufSize { 56 get { return randomForest.innerobj.bufsize; } 57 set { randomForest.innerobj.bufsize = value; } 58 } 59 [Storable] 60 private int RandomForestNClasses { 61 get { return randomForest.innerobj.nclasses; } 62 set { randomForest.innerobj.nclasses = value; } 63 } 64 [Storable] 65 private int RandomForestNTrees { 66 get { return randomForest.innerobj.ntrees; } 67 set { randomForest.innerobj.ntrees = value; } 68 } 69 [Storable] 70 private int RandomForestNVars { 71 get { return randomForest.innerobj.nvars; } 72 set { randomForest.innerobj.nvars = value; } 73 } 74 [Storable] 75 private double[] RandomForestTrees { 76 get { return randomForest.innerobj.trees; } 77 set { randomForest.innerobj.trees = value; } 58 private string RandomForestSerialized { 59 get { alglib.dfserialize(randomForest, out var serialized); return serialized; } 60 set { if (value != null) alglib.dfunserialize(value, out randomForest); } 78 61 } 79 62 80 63 [StorableConstructor] 81 private RandomForestModelFull(StorableConstructorFlag _) : base(_) { 82 randomForest = new alglib.decisionforest(); 83 } 64 private RandomForestModelFull(StorableConstructorFlag _) : base(_) { } 84 65 85 66 private RandomForestModelFull(RandomForestModelFull original, Cloner cloner) : base(original, cloner) { 86 randomForest = new alglib.decisionforest(); 87 randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize; 88 randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses; 89 randomForest.innerobj.ntrees = original.randomForest.innerobj.ntrees; 90 randomForest.innerobj.nvars = original.randomForest.innerobj.nvars; 91 randomForest.innerobj.trees = (double[])original.randomForest.innerobj.trees.Clone(); 67 if (original.randomForest != null) 68 randomForest = (alglib.decisionforest)original.randomForest.make_copy(); 69 NumberOfTrees = original.NumberOfTrees; 92 70 93 71 // following fields are immutable so we don't need to clone them … … 99 77 } 100 78 101 public RandomForestModelFull(alglib.decisionforest decisionForest, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) {79 public RandomForestModelFull(alglib.decisionforest decisionForest, int nTrees, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) { 102 80 this.name = ItemName; 103 81 this.description = ItemDescription; 104 82 105 randomForest = decisionForest; 83 randomForest = (alglib.decisionforest)decisionForest.make_copy(); 84 NumberOfTrees = nTrees; 106 85 107 86 this.inputVariables = inputVariables.ToArray(); … … 147 126 double[] y = new double[1]; 148 127 128 alglib.dfcreatebuffer(randomForest, out var buf); 149 129 for (int row = 0; row < n; row++) { 150 130 for (int column = 0; column < columns; column++) { 151 131 x[column] = inputData[row, column]; 152 132 } 153 alglib.df process(randomForest, x, ref y);133 alglib.dftsprocess(randomForest, buf, x, ref y); // thread-safe process (as long as separate buffers are used) 154 134 yield return y[0]; 155 135 } … … 168 148 x[column] = inputData[row, column]; 169 149 } 170 alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys); 150 lock (randomForest) 151 alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys, null); 171 152 yield return ys.VariancePop(); 172 153 } … … 180 161 int columns = inputData.GetLength(1); 181 162 double[] x = new double[columns]; 182 double[] y = new double[randomForest.innerobj.nclasses]; 183 163 double[] y = new double[NumClasses]; 164 165 alglib.dfcreatebuffer(randomForest, out var buf); 184 166 for (int row = 0; row < n; row++) { 185 167 for (int column = 0; column < columns; column++) { 186 168 x[column] = inputData[row, column]; 187 169 } 188 alglib.df process(randomForest, x, ref y);170 alglib.dftsprocess(randomForest, buf, x, ref y); 189 171 // find class for with the largest probability value 190 172 int maxProbClassIndex = 0; -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModelSurrogate.cs
r17278 r18006 135 135 } 136 136 137 public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) { 138 return ActualModel.IsProblemDataCompatible(problemData, out errorMessage); 139 } 140 137 141 //RegressionModel methods 138 142 public bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage) { -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs
r17180 r18006 20 20 #endregion 21 21 22 extern alias alglib_3_7; 23 using alglib_3_7; 22 24 using System.Collections.Generic; 23 25 using System.Linq; … … 200 202 outOfBagAvgRelError = rep.oobavgrelerror; 201 203 202 return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables);204 return new RandomForestModelFull(dForest, nTrees, problemData.TargetVariable, problemData.AllowedInputVariables); 203 205 } 204 206 -
branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestUtil.cs
r17180 r18006 22 22 #endregion 23 23 24 extern alias alglib_3_7; 25 24 26 using System; 25 27 using System.Collections.Generic; … … 95 97 96 98 public static void AssertInputMatrix(double[,] inputMatrix) { 97 if (inputMatrix.ContainsNanOrInfinity())98 throw new NotSupportedException("Random forest modeling does not support NaN or infinityvalues in the input dataset.");99 foreach(var val in inputMatrix) if(double.IsNaN(val)) 100 throw new NotSupportedException("Random forest modeling does not support NaN values in the input dataset."); 99 101 } 100 102 … … 103 105 RandomForestUtil.AssertInputMatrix(inputMatrix); 104 106 107 int nRows = inputMatrix.GetLength(0); 108 int nColumns = inputMatrix.GetLength(1); 109 110 alglib.dfbuildercreate(out var dfbuilder); 111 alglib.dfbuildersetdataset(dfbuilder, inputMatrix, nRows, nColumns - 1, nClasses); 112 alglib.dfbuildersetimportancenone(dfbuilder); // do not calculate importance (TODO add this feature) 113 alglib.dfbuildersetrdfalgo(dfbuilder, 0); // only one algorithm supported in version 3.17 114 alglib.dfbuildersetrdfsplitstrength(dfbuilder, 2); // 0 = split at the random position, fastest one 115 // 1 = split at the middle of the range 116 // 2 = strong split at the best point of the range (default) 117 alglib.dfbuildersetrndvarsratio(dfbuilder, m); 118 alglib.dfbuildersetsubsampleratio(dfbuilder, r); 119 alglib.dfbuildersetseed(dfbuilder, seed); 120 alglib.dfbuilderbuildrandomforest(dfbuilder, nTrees, out var dForest, out rep); 121 return dForest; 122 } 123 internal static alglib_3_7.alglib.decisionforest CreateRandomForestModelAlglib_3_7(int seed, double[,] inputMatrix, int nTrees, double r, double m, int nClasses, out alglib_3_7.alglib.dfreport rep) { 124 RandomForestUtil.AssertParameters(r, m); 125 RandomForestUtil.AssertInputMatrix(inputMatrix); 126 105 127 int info = 0; 106 alglib .math.rndobject = new System.Random(seed);107 var dForest = new alglib .decisionforest();108 rep = new alglib .dfreport();128 alglib_3_7.alglib.math.rndobject = new System.Random(seed); 129 var dForest = new alglib_3_7.alglib.decisionforest(); 130 rep = new alglib_3_7.alglib.dfreport(); 109 131 int nRows = inputMatrix.GetLength(0); 110 132 int nColumns = inputMatrix.GetLength(1); … … 112 134 int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1); 113 135 114 alglib .dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits +alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj);136 alglib_3_7.alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib_3_7.alglib.dforest.dfusestrongsplits + alglib_3_7.alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj); 115 137 if (info != 1) throw new ArgumentException("Error in calculation of random forest model"); 116 138 return dForest; … … 123 145 var targetVariable = GetTargetVariableName(problemData); 124 146 foreach (var tuple in partitions) { 125 double rmsError, avgRelError, outOfBagAvgRelError, outOfBagRmsError;126 147 var trainingRandomForestPartition = tuple.Item1; 127 148 var testRandomForestPartition = tuple.Item2; 128 var model = RandomForestModel.CreateRegressionModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 149 var model = RandomForestRegression.CreateRandomForestRegressionModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, 150 out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError); 129 151 var estimatedValues = model.GetEstimatedValues(ds, testRandomForestPartition); 130 152 var targetValues = ds.GetDoubleValues(targetVariable, testRandomForestPartition); … … 143 165 var targetVariable = GetTargetVariableName(problemData); 144 166 foreach (var tuple in partitions) { 145 double rmsError, avgRelError, outOfBagAvgRelError, outOfBagRmsError;146 167 var trainingRandomForestPartition = tuple.Item1; 147 168 var testRandomForestPartition = tuple.Item2; 148 var model = RandomForestModel.CreateClassificationModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 169 var model = RandomForestClassification.CreateRandomForestClassificationModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, 170 out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError); 149 171 var estimatedValues = model.GetEstimatedClassValues(ds, testRandomForestPartition); 150 172 var targetValues = ds.GetDoubleValues(targetVariable, testRandomForestPartition); … … 176 198 var parameters = new RFParameter(); 177 199 for (int i = 0; i < setters.Count; ++i) { setters[i](parameters, parameterValues[i]); } 178 double rmsError, outOfBagRmsError, avgRelError, outOfBagAvgRelError;179 RandomForestModel.CreateRegressionModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, out rmsError, out outOfBagRmsError, out avgRelError, outoutOfBagAvgRelError);200 RandomForestRegression.CreateRandomForestRegressionModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, 201 out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError); 180 202 181 203 lock (locker) { … … 208 230 var parameters = new RFParameter(); 209 231 for (int i = 0; i < setters.Count; ++i) { setters[i](parameters, parameterValues[i]); } 210 double rmsError, outOfBagRmsError, avgRelError, outOfBagAvgRelError; 211 RandomForestModel.CreateClassificationModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, 212 out rmsError, out outOfBagRmsError, out avgRelError, out outOfBagAvgRelError); 232 RandomForestClassification.CreateRandomForestClassificationModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, 233 out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError); 213 234 214 235 lock (locker) { … … 227 248 /// <param name="problemData">The regression problem data</param> 228 249 /// <param name="numberOfFolds">The number of folds for crossvalidation</param> 229 /// <param name="shuffleFolds">Specifies whether the folds should be shuffled</param>230 250 /// <param name="parameterRanges">The ranges for each parameter in the grid search</param> 231 251 /// <param name="seed">The random seed (required by the random forest model)</param> 232 252 /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param> 233 253 /// <returns>The best parameter values found by the grid search</returns> 234 public static RFParameter GridSearch(IRegressionProblemData problemData, int numberOfFolds, bool shuffleFolds,Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) {254 public static RFParameter GridSearch(IRegressionProblemData problemData, int numberOfFolds, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) { 235 255 DoubleValue mse = new DoubleValue(Double.MaxValue); 236 256 RFParameter bestParameter = new RFParameter();
Note: See TracChangeset
for help on using the changeset viewer.