Changeset 14869
- Timestamp:
- 04/14/17 08:58:45 (8 years ago)
- Location:
- branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 1 deleted
- 42 edited
- 2 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
/stable/HeuristicLab.Algorithms.DataAnalysis/3.4 merged eligible /trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 merged eligible /branches/1721-RandomForestPersistence/HeuristicLab.Algorithms.DataAnalysis/3.4 10321-10322 /branches/Benchmarking/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 6917-7005 /branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4 9070-13099 /branches/CloningRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.4 4656-4721 /branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4 5471-5808 /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Algorithms.DataAnalysis/3.4 5815-6180 /branches/DataAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.4 4458-4459,4462,4464 /branches/DataPreprocessing/HeuristicLab.Algorithms.DataAnalysis/3.4 10085-11101 /branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.4 6284-6795 /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Algorithms.DataAnalysis/3.4 5060 /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 11570-12508 /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Algorithms.DataAnalysis/3.4 11130-12721 /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4 13819-14091 /branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4 8116-8789 /branches/LogResidualEvaluator/HeuristicLab.Algorithms.DataAnalysis/3.4 10202-10483 /branches/NET40/sources/HeuristicLab.Algorithms.DataAnalysis/3.4 5138-5162 /branches/ParallelEngine/HeuristicLab.Algorithms.DataAnalysis/3.4 5175-5192 /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Algorithms.DataAnalysis/3.4 7773-7810 /branches/QAPAlgorithms/HeuristicLab.Algorithms.DataAnalysis/3.4 6350-6627 /branches/Restructure trunk solution/HeuristicLab.Algorithms.DataAnalysis/3.4 6828 /branches/SpectralKernelForGaussianProcesses/HeuristicLab.Algorithms.DataAnalysis/3.4 10204-10479 /branches/SuccessProgressAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.4 5370-5682 /branches/Trunk/HeuristicLab.Algorithms.DataAnalysis/3.4 6829-6865 /branches/VNS/HeuristicLab.Algorithms.DataAnalysis/3.4 5594-5752 /branches/histogram/HeuristicLab.Algorithms.DataAnalysis/3.4 5959-6341
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
-
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneR.cs
r14185 r14869 20 20 #endregion 21 21 22 using System; 22 23 using System.Collections.Generic; 23 24 using System.Linq; 25 using System.Threading; 24 26 using HeuristicLab.Common; 25 27 using HeuristicLab.Core; … … 58 60 } 59 61 60 protected override void Run( ) {62 protected override void Run(CancellationToken cancellationToken) { 61 63 var solution = CreateOneRSolution(Problem.ProblemData, MinBucketSizeParameter.Value.Value); 62 64 Results.Add(new Result("OneR solution", "The 1R classifier.", solution)); … … 64 66 65 67 public static IClassificationSolution CreateOneRSolution(IClassificationProblemData problemData, int minBucketSize = 6) { 68 var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); 69 var model1 = FindBestDoubleVariableModel(problemData, minBucketSize); 70 var model2 = FindBestFactorModel(problemData); 71 72 if (model1 == null && model2 == null) throw new InvalidProgramException("Could not create OneR solution"); 73 else if (model1 == null) return new OneFactorClassificationSolution(model2, (IClassificationProblemData)problemData.Clone()); 74 else if (model2 == null) return new OneRClassificationSolution(model1, (IClassificationProblemData)problemData.Clone()); 75 else { 76 var model1EstimatedValues = model1.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices); 77 var model1NumCorrect = classValues.Zip(model1EstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e); 78 79 var model2EstimatedValues = model2.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices); 80 var model2NumCorrect = classValues.Zip(model2EstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e); 81 82 if (model1NumCorrect > model2NumCorrect) { 83 return new OneRClassificationSolution(model1, (IClassificationProblemData)problemData.Clone()); 84 } else { 85 return new OneFactorClassificationSolution(model2, (IClassificationProblemData)problemData.Clone()); 86 } 87 } 88 } 89 90 private static OneRClassificationModel FindBestDoubleVariableModel(IClassificationProblemData problemData, int minBucketSize = 6) { 66 91 var bestClassified = 0; 67 92 List<Split> bestSplits = null; … … 70 95 var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); 71 96 72 foreach (var variable in problemData.AllowedInputVariables) { 97 var allowedInputVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<double>); 98 99 if (!allowedInputVariables.Any()) return null; 100 101 foreach (var variable in allowedInputVariables) { 73 102 var inputValues = problemData.Dataset.GetDoubleValues(variable, problemData.TrainingIndices); 74 103 var samples = inputValues.Zip(classValues, (i, v) => new Sample(i, v)).OrderBy(s => s.inputValue); 75 104 76 var missingValuesDistribution = samples.Where(s => double.IsNaN(s.inputValue)).GroupBy(s => s.classValue).ToDictionary(s => s.Key, s => s.Count()).MaxItems(s => s.Value).FirstOrDefault(); 105 var missingValuesDistribution = samples 106 .Where(s => double.IsNaN(s.inputValue)).GroupBy(s => s.classValue) 107 .ToDictionary(s => s.Key, s => s.Count()) 108 .MaxItems(s => s.Value) 109 .FirstOrDefault(); 77 110 78 111 //calculate class distributions for all distinct inputValues … … 119 152 while (sample.inputValue >= splits[splitIndex].thresholdValue) 120 153 splitIndex++; 121 correctClassified += sample.classValue == splits[splitIndex].classValue? 1 : 0;154 correctClassified += sample.classValue.IsAlmost(splits[splitIndex].classValue) ? 1 : 0; 122 155 } 123 156 correctClassified += missingValuesDistribution.Value; … … 133 166 //remove neighboring splits with the same class value 134 167 for (int i = 0; i < bestSplits.Count - 1; i++) { 135 if (bestSplits[i].classValue == bestSplits[i + 1].classValue) {168 if (bestSplits[i].classValue.IsAlmost(bestSplits[i + 1].classValue)) { 136 169 bestSplits.Remove(bestSplits[i]); 137 170 i--; … … 139 172 } 140 173 141 var model = new OneRClassificationModel(problemData.TargetVariable, bestVariable, bestSplits.Select(s => s.thresholdValue).ToArray(), bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass); 142 var solution = new OneRClassificationSolution(model, (IClassificationProblemData)problemData.Clone()); 143 144 return solution; 174 var model = new OneRClassificationModel(problemData.TargetVariable, bestVariable, 175 bestSplits.Select(s => s.thresholdValue).ToArray(), 176 bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass); 177 178 return model; 179 } 180 private static OneFactorClassificationModel FindBestFactorModel(IClassificationProblemData problemData) { 181 var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); 182 var defaultClass = FindMostFrequentClassValue(classValues); 183 // only select string variables 184 var allowedInputVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<string>); 185 186 if (!allowedInputVariables.Any()) return null; 187 188 OneFactorClassificationModel bestModel = null; 189 var bestModelNumCorrect = 0; 190 191 foreach (var variable in allowedInputVariables) { 192 var variableValues = problemData.Dataset.GetStringValues(variable, problemData.TrainingIndices); 193 var groupedClassValues = variableValues 194 .Zip(classValues, (v, c) => new KeyValuePair<string, double>(v, c)) 195 .GroupBy(kvp => kvp.Key) 196 .ToDictionary(g => g.Key, g => FindMostFrequentClassValue(g.Select(kvp => kvp.Value))); 197 198 var model = new OneFactorClassificationModel(problemData.TargetVariable, variable, 199 groupedClassValues.Select(kvp => kvp.Key).ToArray(), groupedClassValues.Select(kvp => kvp.Value).ToArray(), defaultClass); 200 201 var modelEstimatedValues = model.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices); 202 var modelNumCorrect = classValues.Zip(modelEstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e); 203 if (modelNumCorrect > bestModelNumCorrect) { 204 bestModelNumCorrect = modelNumCorrect; 205 bestModel = model; 206 } 207 } 208 209 return bestModel; 210 } 211 212 private static double FindMostFrequentClassValue(IEnumerable<double> classValues) { 213 return classValues.GroupBy(c => c).OrderByDescending(g => g.Count()).Select(g => g.Key).First(); 145 214 } 146 215 -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneRClassificationModel.cs
r14185 r14869 31 31 [StorableClass] 32 32 [Item("OneR Classification Model", "A model that uses intervals for one variable to determine the class.")] 33 public class OneRClassificationModel : ClassificationModel {33 public sealed class OneRClassificationModel : ClassificationModel { 34 34 public override IEnumerable<string> VariablesUsedForPrediction { 35 35 get { return new[] { Variable }; } … … 37 37 38 38 [Storable] 39 pr otectedstring variable;39 private string variable; 40 40 public string Variable { 41 41 get { return variable; } … … 43 43 44 44 [Storable] 45 pr otecteddouble[] splits;45 private double[] splits; 46 46 public double[] Splits { 47 47 get { return splits; } … … 49 49 50 50 [Storable] 51 pr otecteddouble[] classes;51 private double[] classes; 52 52 public double[] Classes { 53 53 get { return classes; } … … 55 55 56 56 [Storable] 57 pr otecteddouble missingValuesClass;57 private double missingValuesClass; 58 58 public double MissingValuesClass { 59 59 get { return missingValuesClass; } … … 61 61 62 62 [StorableConstructor] 63 pr otectedOneRClassificationModel(bool deserializing) : base(deserializing) { }64 pr otectedOneRClassificationModel(OneRClassificationModel original, Cloner cloner)63 private OneRClassificationModel(bool deserializing) : base(deserializing) { } 64 private OneRClassificationModel(OneRClassificationModel original, Cloner cloner) 65 65 : base(original, cloner) { 66 66 this.variable = (string)original.variable; 67 67 this.splits = (double[])original.splits.Clone(); 68 68 this.classes = (double[])original.classes.Clone(); 69 this.missingValuesClass = original.missingValuesClass; 69 70 } 70 71 public override IDeepCloneable Clone(Cloner cloner) { return new OneRClassificationModel(this, cloner); } -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneRClassificationSolution.cs
r14185 r14869 28 28 [StorableClass] 29 29 [Item(Name = "OneR Classification Solution", Description = "Represents a OneR classification solution which uses only a single feature with potentially multiple thresholds for class prediction.")] 30 public class OneRClassificationSolution : ClassificationSolution {30 public sealed class OneRClassificationSolution : ClassificationSolution { 31 31 public new OneRClassificationModel Model { 32 32 get { return (OneRClassificationModel)base.Model; } … … 35 35 36 36 [StorableConstructor] 37 pr otectedOneRClassificationSolution(bool deserializing) : base(deserializing) { }38 pr otectedOneRClassificationSolution(OneRClassificationSolution original, Cloner cloner) : base(original, cloner) { }37 private OneRClassificationSolution(bool deserializing) : base(deserializing) { } 38 private OneRClassificationSolution(OneRClassificationSolution original, Cloner cloner) : base(original, cloner) { } 39 39 public OneRClassificationSolution(OneRClassificationModel model, IClassificationProblemData problemData) 40 40 : base(model, problemData) { -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/ZeroR.cs
r14185 r14869 21 21 22 22 using System.Linq; 23 using System.Threading; 23 24 using HeuristicLab.Common; 24 25 using HeuristicLab.Core; … … 49 50 } 50 51 51 protected override void Run( ) {52 protected override void Run(CancellationToken cancellationToken) { 52 53 var solution = CreateZeroRSolution(Problem.ProblemData); 53 54 Results.Add(new Result("ZeroR solution", "The simplest possible classifier, ZeroR always predicts the majority class.", solution)); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs
r14185 r14869 451 451 var aggregatedResults = new List<IResult>(); 452 452 foreach (KeyValuePair<string, List<IClassificationSolution>> solutions in resultSolutions) { 453 // clone manually to correctly clone references between cloned root objects 454 Cloner cloner = new Cloner(); 455 var problemDataClone = (IClassificationProblemData)cloner.Clone(Problem.ProblemData); 453 // at least one algorithm (GBT with logistic regression loss) produces a classification solution even though the original problem is a regression problem. 454 var targetVariable = solutions.Value.First().ProblemData.TargetVariable; 455 var problemDataClone = new ClassificationProblemData(Problem.ProblemData.Dataset, 456 Problem.ProblemData.AllowedInputVariables, targetVariable); 456 457 // set partitions of problem data clone correctly 457 458 problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value; -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/FixedDataAnalysisAlgorithm.cs
r14185 r14869 21 21 22 22 using System; 23 using System.Threading;24 using System.Threading.Tasks;25 23 using HeuristicLab.Common; 26 24 using HeuristicLab.Optimization; … … 30 28 namespace HeuristicLab.Algorithms.DataAnalysis { 31 29 [StorableClass] 32 public abstract class FixedDataAnalysisAlgorithm<T> : Algorithm, 33 IDataAnalysisAlgorithm<T>, 34 IStorableContent 35 where T : class, IDataAnalysisProblem { 36 public string Filename { get; set; } 37 30 public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm where T : class, IDataAnalysisProblem { 38 31 #region Properties 39 32 public override Type ProblemType { … … 44 37 set { base.Problem = value; } 45 38 } 46 [Storable]47 private ResultCollection results;48 public override ResultCollection Results {49 get { return results; }50 }51 39 #endregion 52 40 53 p rivate DateTime lastUpdateTime;41 public override bool SupportsPause { get { return false; } } 54 42 55 43 [StorableConstructor] 56 44 protected FixedDataAnalysisAlgorithm(bool deserializing) : base(deserializing) { } 57 protected FixedDataAnalysisAlgorithm(FixedDataAnalysisAlgorithm<T> original, Cloner cloner) 58 : base(original, cloner) { 59 results = cloner.Clone(original.Results); 60 } 61 public FixedDataAnalysisAlgorithm() 62 : base() { 63 results = new ResultCollection(); 64 } 65 66 public override void Prepare() { 67 if (Problem != null) base.Prepare(); 68 results.Clear(); 69 OnPrepared(); 70 } 71 72 public override void Start() { 73 base.Start(); 74 var cancellationTokenSource = new CancellationTokenSource(); 75 76 OnStarted(); 77 Task task = Task.Factory.StartNew(Run, cancellationTokenSource.Token, cancellationTokenSource.Token); 78 task.ContinueWith(t => { 79 try { 80 t.Wait(); 81 } 82 catch (AggregateException ex) { 83 try { 84 ex.Flatten().Handle(x => x is OperationCanceledException); 85 } 86 catch (AggregateException remaining) { 87 if (remaining.InnerExceptions.Count == 1) OnExceptionOccurred(remaining.InnerExceptions[0]); 88 else OnExceptionOccurred(remaining); 89 } 90 } 91 cancellationTokenSource.Dispose(); 92 cancellationTokenSource = null; 93 OnStopped(); 94 }); 95 } 96 private void Run(object state) { 97 CancellationToken cancellationToken = (CancellationToken)state; 98 lastUpdateTime = DateTime.UtcNow; 99 System.Timers.Timer timer = new System.Timers.Timer(250); 100 timer.AutoReset = true; 101 timer.Elapsed += new System.Timers.ElapsedEventHandler(timer_Elapsed); 102 timer.Start(); 103 try { 104 Run(); 105 } 106 finally { 107 timer.Elapsed -= new System.Timers.ElapsedEventHandler(timer_Elapsed); 108 timer.Stop(); 109 ExecutionTime += DateTime.UtcNow - lastUpdateTime; 110 } 111 112 cancellationToken.ThrowIfCancellationRequested(); 113 } 114 protected abstract void Run(); 115 #region Events 116 protected override void OnProblemChanged() { 117 Problem.Reset += new EventHandler(Problem_Reset); 118 base.OnProblemChanged(); 119 } 120 private void timer_Elapsed(object sender, System.Timers.ElapsedEventArgs e) { 121 System.Timers.Timer timer = (System.Timers.Timer)sender; 122 timer.Enabled = false; 123 DateTime now = DateTime.UtcNow; 124 ExecutionTime += now - lastUpdateTime; 125 lastUpdateTime = now; 126 timer.Enabled = true; 127 } 128 #endregion 45 protected FixedDataAnalysisAlgorithm(FixedDataAnalysisAlgorithm<T> original, Cloner cloner) : base(original, cloner) { } 46 public FixedDataAnalysisAlgorithm() : base() { } 129 47 130 48 } -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GBM/GradientBoostingRegressionAlgorithm.cs
r14185 r14869 44 44 [StorableClass] 45 45 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 350)] 46 public class GradientBoostingRegressionAlgorithm : BasicAlgorithm { 47 public override Type ProblemType { 48 get { return typeof(IRegressionProblem); } 49 } 50 51 public new IRegressionProblem Problem { 52 get { return (IRegressionProblem)base.Problem; } 53 set { base.Problem = value; } 54 } 46 public class GradientBoostingRegressionAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> { 55 47 56 48 #region ParameterNames -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessBase.cs
r14185 r14869 21 21 #endregion 22 22 23 using System.Linq; 23 24 using HeuristicLab.Algorithms.GradientDescent; 24 25 using HeuristicLab.Common; … … 119 120 120 121 // necessary for BFGS 121 Parameters.Add(new ValueParameter<BoolValue>("Maximization", new BoolValue(false)));122 Parameters["Maximization "].Hidden = true;122 Parameters.Add(new FixedValueParameter<BoolValue>("Maximization (BFGS)", new BoolValue(false))); 123 Parameters["Maximization (BFGS)"].Hidden = true; 123 124 124 125 var randomCreator = new HeuristicLab.Random.RandomCreator(); … … 164 165 modelCreator.Successor = updateResults; 165 166 167 updateResults.MaximizationParameter.ActualName = "Maximization (BFGS)"; 166 168 updateResults.StateParameter.ActualName = bfgsInitializer.StateParameter.Name; 167 169 updateResults.QualityParameter.ActualName = NegativeLogLikelihoodParameterName; … … 197 199 // BackwardsCompatibility3.4 198 200 #region Backwards compatible code, remove with 3.5 199 if (!Parameters.ContainsKey("Maximization")) { 200 Parameters.Add(new ValueParameter<BoolValue>("Maximization", new BoolValue(false))); 201 Parameters["Maximization"].Hidden = true; 201 if (Parameters.ContainsKey("Maximization")) { 202 Parameters.Remove("Maximization"); 203 } 204 205 if (!Parameters.ContainsKey("Maximization (BFGS)")) { 206 Parameters.Add(new FixedValueParameter<BoolValue>("Maximization (BFGS)", new BoolValue(false))); 207 Parameters["Maximization (BFGS)"].Hidden = true; 208 OperatorGraph.Operators.OfType<LbfgsUpdateResults>().First().MaximizationParameter.ActualName = "Maximization BFGS"; 202 209 } 203 210 -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessClassificationModelCreator.cs
r14185 r14869 67 67 HyperparameterGradientsParameter.ActualValue = new RealVector(model.HyperparameterGradients); 68 68 return base.Apply(); 69 } catch (ArgumentException) { } catch (alglib.alglibexception) { } 69 } catch (ArgumentException) { 70 } catch (alglib.alglibexception) { 71 } 70 72 NegativeLogLikelihoodParameter.ActualValue = new DoubleValue(1E300); 71 73 HyperparameterGradientsParameter.ActualValue = new RealVector(Hyperparameter.Count()); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs
r14185 r14869 165 165 try { 166 166 CalculateModel(ds, rows, scaleInputs); 167 } 168 catch (alglib.alglibexception ae) { 167 } catch (alglib.alglibexception ae) { 169 168 // wrap exception so that calling code doesn't have to know about alglib implementation 170 169 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); … … 260 259 private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) { 261 260 if (scaling != null) { 262 return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling); 261 // BackwardsCompatibility3.3 262 #region Backwards compatible code, remove with 3.4 263 // TODO: completely remove Scaling class 264 List<string> variablesList = allowedInputs.ToList(); 265 List<int> rowsList = rows.ToList(); 266 267 double[,] matrix = new double[rowsList.Count, variablesList.Count]; 268 269 int col = 0; 270 foreach (string column in variablesList) { 271 var values = scaling.GetScaledValues(ds, column, rowsList); 272 int row = 0; 273 foreach (var value in values) { 274 matrix[row, col] = value; 275 row++; 276 } 277 col++; 278 } 279 return matrix; 280 #endregion 263 281 } else { 264 return AlglibUtil.PrepareInputMatrix(ds,allowedInputs, rows);282 return ds.ToArray(allowedInputs, rows); 265 283 } 266 284 } … … 334 352 return Enumerable.Range(0, newN) 335 353 .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha)); 336 } 337 catch (alglib.alglibexception ae) { 354 } catch (alglib.alglibexception ae) { 338 355 // wrap exception so that calling code doesn't have to know about alglib implementation 339 356 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); … … 381 398 } 382 399 return kss; 383 } 384 catch (alglib.alglibexception ae) { 400 } catch (alglib.alglibexception ae) { 385 401 // wrap exception so that calling code doesn't have to know about alglib implementation 386 402 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs
r14345 r14869 21 21 #endregion 22 22 23 using System;24 23 using System.Linq; 25 24 using System.Threading; … … 38 37 [StorableClass] 39 38 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 125)] 40 public class GradientBoostedTreesAlgorithm : BasicAlgorithm { 41 public override Type ProblemType { 42 get { return typeof(IRegressionProblem); } 43 } 44 public new IRegressionProblem Problem { 45 get { return (IRegressionProblem)base.Problem; } 46 set { base.Problem = value; } 47 } 48 39 public class GradientBoostedTreesAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> { 49 40 #region ParameterNames 50 41 private const string IterationsParameterName = "Iterations"; … … 204 195 table.Rows.Add(new DataRow("Loss (train)")); 205 196 table.Rows.Add(new DataRow("Loss (test)")); 197 table.Rows["Loss (train)"].VisualProperties.StartIndexZero = true; 198 table.Rows["Loss (test)"].VisualProperties.StartIndexZero = true; 199 206 200 Results.Add(new Result("Qualities", table)); 207 201 var curLoss = new DoubleValue(); … … 263 257 var classificationProblemData = new ClassificationProblemData(problemData.Dataset, 264 258 problemData.AllowedInputVariables, problemData.TargetVariable, problemData.Transformations); 265 classificationModel.RecalculateModelParameters(classificationProblemData, classificationProblemData.TrainingIndices); 259 classificationProblemData.TrainingPartition.Start = Problem.ProblemData.TrainingPartition.Start; 260 classificationProblemData.TrainingPartition.End = Problem.ProblemData.TrainingPartition.End; 261 classificationProblemData.TestPartition.Start = Problem.ProblemData.TestPartition.Start; 262 classificationProblemData.TestPartition.End = Problem.ProblemData.TestPartition.End; 263 264 classificationModel.SetThresholdsAndClassValues(new double[] { double.NegativeInfinity, 0.0 }, new[] { 0.0, 1.0 }); 265 266 266 267 267 var classificationSolution = new DiscriminantFunctionClassificationSolution(classificationModel, classificationProblemData); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithmStatic.cs
r14185 r14869 148 148 // for custom stepping & termination 149 149 public static IGbmState CreateGbmState(IRegressionProblemData problemData, ILossFunction lossFunction, uint randSeed, int maxSize = 3, double r = 0.66, double m = 0.5, double nu = 0.01) { 150 // check input variables. Only double variables are allowed. 151 var invalidInputs = 152 problemData.AllowedInputVariables.Where(name => !problemData.Dataset.VariableHasType<double>(name)); 153 if (invalidInputs.Any()) 154 throw new NotSupportedException("Gradient tree boosting only supports real-valued variables. Unsupported inputs: " + string.Join(", ", invalidInputs)); 155 150 156 return new GbmState(problemData, lossFunction, randSeed, maxSize, r, m, nu); 151 157 } -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r14500 r14869 189 189 </ItemGroup> 190 190 <ItemGroup> 191 <Compile Include="BaselineClassifiers\OneFactorClassificationModel.cs" /> 192 <Compile Include="BaselineClassifiers\OneFactorClassificationSolution.cs" /> 191 193 <Compile Include="BaselineClassifiers\OneR.cs" /> 192 194 <Compile Include="BaselineClassifiers\OneRClassificationModel.cs" /> … … 311 313 <SubType>Code</SubType> 312 314 </Compile> 313 <Compile Include="Linear\AlglibUtil.cs" />314 <Compile Include="Linear\Scaling.cs" />315 315 <Compile Include="Linear\LinearDiscriminantAnalysis.cs" /> 316 316 <Compile Include="Linear\LinearRegression.cs"> … … 320 320 <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" /> 321 321 <Compile Include="Linear\MultinomialLogitModel.cs" /> 322 <Compile Include="Linear\Scaling.cs" /> 322 323 <Compile Include="MctsSymbolicRegression\Automaton.cs" /> 323 324 <Compile Include="MctsSymbolicRegression\CodeGenerator.cs" /> -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs
r14185 r14869 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 36 37 /// Linear discriminant analysis classification algorithm. 37 38 /// </summary> 38 [Item("Linear Discriminant Analysis ", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")]39 [Item("Linear Discriminant Analysis (LDA)", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")] 39 40 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 100)] 40 41 [StorableClass] … … 59 60 60 61 #region Fisher LDA 61 protected override void Run( ) {62 protected override void Run(CancellationToken cancellationToken) { 62 63 var solution = CreateLinearDiscriminantAnalysisSolution(Problem.ProblemData); 63 64 Results.Add(new Result(LinearDiscriminantAnalysisSolutionResultName, "The linear discriminant analysis.", solution)); … … 70 71 IEnumerable<int> rows = problemData.TrainingIndices; 71 72 int nClasses = problemData.ClassNames.Count(); 72 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 73 var doubleVariableNames = allowedInputVariables.Where(dataset.VariableHasType<double>).ToArray(); 74 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>).ToArray(); 75 double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows); 76 77 var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 78 var factorMatrix = dataset.ToArray(factorVariables, rows); 79 80 inputMatrix = factorMatrix.HorzCat(inputMatrix); 81 73 82 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 74 83 throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset."); … … 82 91 int info; 83 92 double[] w; 84 alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), allowedInputVariables.Count(), nClasses, out info, out w);93 alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1) - 1, nClasses, out info, out w); 85 94 if (info < 1) throw new ArgumentException("Error in calculation of linear discriminant analysis solution"); 86 95 87 ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); 88 ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); 89 tree.Root.AddSubtree(startNode); 90 ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); 91 startNode.AddSubtree(addition); 96 var nFactorCoeff = factorMatrix.GetLength(1); 97 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, w.Take(nFactorCoeff).ToArray(), 98 doubleVariableNames, w.Skip(nFactorCoeff).Take(doubleVariableNames.Length).ToArray()); 92 99 93 int col = 0; 94 foreach (string column in allowedInputVariables) { 95 VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); 96 vNode.VariableName = column; 97 vNode.Weight = w[col]; 98 addition.AddSubtree(vNode); 99 col++; 100 } 101 102 var model = LinearDiscriminantAnalysis.CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter(), problemData, rows); 100 var model = CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter(), problemData, rows); 103 101 SymbolicDiscriminantFunctionClassificationSolution solution = new SymbolicDiscriminantFunctionClassificationSolution(model, (IClassificationProblemData)problemData.Clone()); 104 102 -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r14185 r14869 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; 27 28 using HeuristicLab.Data; 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;29 29 using HeuristicLab.Optimization; 30 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; … … 60 60 61 61 #region linear regression 62 protected override void Run( ) {62 protected override void Run(CancellationToken cancellationToken) { 63 63 double rmsError, cvRmsError; 64 64 var solution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError); … … 73 73 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 74 74 IEnumerable<int> rows = problemData.TrainingIndices; 75 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 75 var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 76 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 77 var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 78 double[,] binaryMatrix = dataset.ToArray(factorVariables, rows); 79 double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows); 80 var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); 81 76 82 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 77 83 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); … … 91 97 alglib.lrunpack(lm, out coefficients, out nFeatures); 92 98 93 ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); 94 ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); 95 tree.Root.AddSubtree(startNode); 96 ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); 97 startNode.AddSubtree(addition); 98 99 int col = 0; 100 foreach (string column in allowedInputVariables) { 101 VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); 102 vNode.VariableName = column; 103 vNode.Weight = coefficients[col]; 104 addition.AddSubtree(vNode); 105 col++; 106 } 107 108 ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode(); 109 cNode.Value = coefficients[coefficients.Length - 1]; 110 addition.AddSubtree(cNode); 111 112 SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone()); 99 int nFactorCoeff = binaryMatrix.GetLength(1); 100 int nVarCoeff = doubleVariables.Count(); 101 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(), 102 doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(), 103 @const: coefficients[nFeatures]); 104 105 SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone()); 113 106 solution.Model.Name = "Linear Regression Model"; 114 107 solution.Name = "Linear Regression Solution"; -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs
r14185 r14869 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 57 58 58 59 #region logit classification 59 protected override void Run( ) {60 protected override void Run(CancellationToken cancellationToken) { 60 61 double rmsError, relClassError; 61 62 var solution = CreateLogitClassificationSolution(Problem.ProblemData, out rmsError, out relClassError); … … 68 69 var dataset = problemData.Dataset; 69 70 string targetVariable = problemData.TargetVariable; 70 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 71 var doubleVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<double>); 72 var factorVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<string>); 71 73 IEnumerable<int> rows = problemData.TrainingIndices; 72 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 74 double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows); 75 76 var factorVariableValues = dataset.GetFactorVariableValues(factorVariableNames, rows); 77 var factorMatrix = dataset.ToArray(factorVariableValues, rows); 78 inputMatrix = factorMatrix.HorzCat(inputMatrix); 79 73 80 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 74 81 throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset."); … … 95 102 relClassError = alglib.mnlrelclserror(lm, inputMatrix, nRows); 96 103 97 MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, allowedInputVariables, classValues), (IClassificationProblemData)problemData.Clone());104 MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, doubleVariableNames, factorVariableValues, classValues), (IClassificationProblemData)problemData.Clone()); 98 105 return solution; 99 106 } -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassificationSolution.cs
r14185 r14869 43 43 : base(original, cloner) { 44 44 } 45 public MultinomialLogitClassificationSolution( MultinomialLogitModel logitModel,IClassificationProblemData problemData)45 public MultinomialLogitClassificationSolution(MultinomialLogitModel logitModel, IClassificationProblemData problemData) 46 46 : base(logitModel, problemData) { 47 47 } -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs
r14185 r14869 56 56 [Storable] 57 57 private double[] classValues; 58 [Storable] 59 private List<KeyValuePair<string, IEnumerable<string>>> factorVariables; 60 58 61 [StorableConstructor] 59 62 private MultinomialLogitModel(bool deserializing) … … 68 71 allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 69 72 classValues = (double[])original.classValues.Clone(); 73 this.factorVariables = original.factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList(); 70 74 } 71 public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues)75 public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> doubleInputVariables, IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables, double[] classValues) 72 76 : base(targetVariable) { 73 77 this.name = ItemName; 74 78 this.description = ItemDescription; 75 79 this.logitModel = logitModel; 76 this.allowedInputVariables = allowedInputVariables.ToArray(); 80 this.allowedInputVariables = doubleInputVariables.ToArray(); 81 this.factorVariables = factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList(); 77 82 this.classValues = (double[])classValues.Clone(); 83 } 84 85 [StorableHook(HookType.AfterDeserialization)] 86 private void AfterDeserialization() { 87 // BackwardsCompatibility3.3 88 #region Backwards compatible code, remove with 3.4 89 factorVariables = new List<KeyValuePair<string, IEnumerable<string>>>(); 90 #endregion 78 91 } 79 92 … … 83 96 84 97 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 85 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 98 99 double[,] inputData = dataset.ToArray(allowedInputVariables, rows); 100 double[,] factorData = dataset.ToArray(factorVariables, rows); 101 102 inputData = factorData.HorzCat(inputData); 86 103 87 104 int n = inputData.GetLength(0); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs
r14185 r14869 29 29 30 30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 [Obsolete("Use transformation classes in Problems.DataAnalysis instead")] 31 32 [StorableClass] 32 33 [Item(Name = "Scaling", Description = "Contains information about scaling of variables for data-analysis algorithms.")] -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs
r14185 r14869 38 38 [StorableClass] 39 39 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)] 40 public class MctsSymbolicRegressionAlgorithm : BasicAlgorithm { 41 public override Type ProblemType { 42 get { return typeof(IRegressionProblem); } 43 } 44 public new IRegressionProblem Problem { 45 get { return (IRegressionProblem)base.Problem; } 46 set { base.Problem = value; } 47 } 40 public class MctsSymbolicRegressionAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> { 48 41 49 42 #region ParameterNames -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/LdaInitializer.cs
r14185 r14869 44 44 var attributes = data.AllowedInputVariables.Count(); 45 45 46 var ldaDs = AlglibUtil.PrepareInputMatrix(data.Dataset,47 48 46 var ldaDs = data.Dataset.ToArray( 47 data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()), 48 data.TrainingIndices); 49 49 50 50 // map class values to sequential natural numbers (required by alglib) -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/PcaInitializer.cs
r14185 r14869 44 44 var attributes = data.AllowedInputVariables.Count(); 45 45 46 var pcaDs = AlglibUtil.PrepareInputMatrix(data.Dataset,data.AllowedInputVariables, data.TrainingIndices);46 var pcaDs = data.Dataset.ToArray(data.AllowedInputVariables, data.TrainingIndices); 47 47 48 48 int info; -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaGradientCalculator.cs
r14185 r14869 99 99 } 100 100 101 var data = AlglibUtil.PrepareInputMatrix(problemData.Dataset,problemData.AllowedInputVariables,102 101 var data = problemData.Dataset.ToArray(problemData.AllowedInputVariables, 102 problemData.TrainingIndices); 103 103 var classes = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); 104 104 -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs
r14185 r14869 86 86 87 87 public double[,] Reduce(IDataset dataset, IEnumerable<int> rows) { 88 var data = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);88 var data = dataset.ToArray(allowedInputVariables, rows); 89 89 90 90 var targets = dataset.GetDoubleValues(TargetVariable, rows).ToArray(); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r14235 r14869 22 22 using System; 23 23 using System.Linq; 24 using System.Threading; 24 25 using HeuristicLab.Common; 25 26 using HeuristicLab.Core; … … 91 92 92 93 #region nearest neighbour 93 protected override void Run( ) {94 protected override void Run(CancellationToken cancellationToken) { 94 95 double[] weights = null; 95 96 if (Weights != null) weights = Weights.CloneAsArray(); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r14322 r14869 119 119 if (IsCompatibilityLoaded) { 120 120 // no scaling 121 inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,121 inputMatrix = dataset.ToArray( 122 122 this.allowedInputVariables.Concat(new string[] { targetVariable }), 123 123 rows); … … 144 144 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 145 145 throw new NotSupportedException( 146 "Nearest neighbour classificationdoes not support NaN or infinity values in the input dataset.");146 "Nearest neighbour model does not support NaN or infinity values in the input dataset."); 147 147 148 148 this.kdTree = new alglib.nearestneighbor.kdtree(); … … 167 167 168 168 private static double[,] CreateScaledData(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, double[] offsets, double[] factors) { 169 var x = new double[rows.Count(), variables.Count()]; 170 var colIdx = 0; 171 foreach (var variableName in variables) { 172 var rowIdx = 0; 173 foreach (var val in dataset.GetDoubleValues(variableName, rows)) { 174 x[rowIdx, colIdx] = (val + offsets[colIdx]) * factors[colIdx]; 175 rowIdx++; 176 } 177 colIdx++; 178 } 179 return x; 169 var transforms = 170 variables.Select( 171 (_, colIdx) => 172 new LinearTransformation(variables) { Addend = offsets[colIdx] * factors[colIdx], Multiplier = factors[colIdx] }); 173 return dataset.ToArray(variables, transforms, rows); 180 174 } 181 175 … … 187 181 double[,] inputData; 188 182 if (IsCompatibilityLoaded) { 189 inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);183 inputData = dataset.ToArray(allowedInputVariables, rows); 190 184 } else { 191 185 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); … … 223 217 double[,] inputData; 224 218 if (IsCompatibilityLoaded) { 225 inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);219 inputData = dataset.ToArray(allowedInputVariables, rows); 226 220 } else { 227 221 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs
r14235 r14869 21 21 22 22 using System; 23 using System.Threading; 23 24 using HeuristicLab.Common; 24 25 using HeuristicLab.Core; … … 92 93 93 94 #region nearest neighbour 94 protected override void Run( ) {95 protected override void Run(CancellationToken cancellationToken) { 95 96 double[] weights = null; 96 97 if (Weights != null) weights = Weights.CloneAsArray(); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs
r14185 r14869 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 168 169 169 170 #region neural network 170 protected override void Run( ) {171 protected override void Run(CancellationToken cancellationToken) { 171 172 double rmsError, avgRelError, relClassError; 172 173 var solution = CreateNeuralNetworkClassificationSolution(Problem.ProblemData, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError, out relClassError); … … 183 184 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 184 185 IEnumerable<int> rows = problemData.TrainingIndices; 185 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables.Concat(new string[] { targetVariable }), rows);186 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 186 187 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 187 188 throw new NotSupportedException("Neural network classification does not support NaN or infinity values in the input dataset."); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs
r14185 r14869 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 124 125 public NeuralNetworkEnsembleClassification() 125 126 : base() { 126 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 127 (IntValue)new IntValue(0).AsReadOnly(), 128 (IntValue)new IntValue(1).AsReadOnly(), 127 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 128 (IntValue)new IntValue(0).AsReadOnly(), 129 (IntValue)new IntValue(1).AsReadOnly(), 129 130 (IntValue)new IntValue(2).AsReadOnly() }); 130 131 var selectedHiddenLayerValue = (from v in validHiddenLayerValues … … 154 155 155 156 #region neural network ensemble 156 protected override void Run( ) {157 protected override void Run(CancellationToken cancellationToken) { 157 158 double rmsError, avgRelError, relClassError; 158 159 var solution = CreateNeuralNetworkEnsembleClassificationSolution(Problem.ProblemData, EnsembleSize, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError, out relClassError); … … 169 170 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 170 171 IEnumerable<int> rows = problemData.TrainingIndices; 171 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables.Concat(new string[] { targetVariable }), rows);172 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 172 173 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 173 174 throw new NotSupportedException("Neural network ensemble classification does not support NaN or infinity values in the input dataset."); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs
r14185 r14869 91 91 92 92 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 93 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);93 double[,] inputData = dataset.ToArray(allowedInputVariables, rows); 94 94 95 95 int n = inputData.GetLength(0); … … 108 108 109 109 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 110 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);110 double[,] inputData = dataset.ToArray(allowedInputVariables, rows); 111 111 112 112 int n = inputData.GetLength(0); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs
r14185 r14869 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 154 155 155 156 #region neural network ensemble 156 protected override void Run( ) {157 protected override void Run(CancellationToken cancellationToken) { 157 158 double rmsError, avgRelError; 158 159 var solution = CreateNeuralNetworkEnsembleRegressionSolution(Problem.ProblemData, EnsembleSize, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError); … … 168 169 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 169 170 IEnumerable<int> rows = problemData.TrainingIndices; 170 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables.Concat(new string[] { targetVariable }), rows);171 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 171 172 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 172 173 throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset."); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs
r14185 r14869 95 95 96 96 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 97 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);97 double[,] inputData = dataset.ToArray(allowedInputVariables, rows); 98 98 99 99 int n = inputData.GetLength(0); … … 112 112 113 113 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 114 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);114 double[,] inputData = dataset.ToArray( allowedInputVariables, rows); 115 115 116 116 int n = inputData.GetLength(0); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs
r14185 r14869 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 170 171 171 172 #region neural network 172 protected override void Run( ) {173 protected override void Run(CancellationToken cancellationToken) { 173 174 double rmsError, avgRelError; 174 175 var solution = CreateNeuralNetworkRegressionSolution(Problem.ProblemData, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError); … … 184 185 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 185 186 IEnumerable<int> rows = problemData.TrainingIndices; 186 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables.Concat(new string[] { targetVariable }), rows);187 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 187 188 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 188 189 throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset."); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NonlinearRegression/NonlinearRegression.cs
r14319 r14869 21 21 22 22 using System; 23 using System.Collections.Generic; 23 24 using System.Linq; 25 using System.Threading; 24 26 using HeuristicLab.Analysis; 25 27 using HeuristicLab.Common; … … 157 159 158 160 #region nonlinear regression 159 protected override void Run( ) {161 protected override void Run(CancellationToken cancellationToken) { 160 162 IRegressionSolution bestSolution = null; 161 163 if (InitializeParametersRandomly) { … … 207 209 var parser = new InfixExpressionParser(); 208 210 var tree = parser.Parse(modelStructure); 211 // parser handles double and string variables equally by creating a VariableTreeNode 212 // post-process to replace VariableTreeNodes by FactorVariableTreeNodes for all string variables 213 var factorSymbol = new FactorVariable(); 214 factorSymbol.VariableNames = 215 problemData.AllowedInputVariables.Where(name => problemData.Dataset.VariableHasType<string>(name)); 216 factorSymbol.AllVariableNames = factorSymbol.VariableNames; 217 factorSymbol.VariableValues = 218 factorSymbol.VariableNames.Select(name => 219 new KeyValuePair<string, Dictionary<string, int>>(name, 220 problemData.Dataset.GetReadOnlyStringValues(name).Distinct() 221 .Select((n, i) => Tuple.Create(n, i)) 222 .ToDictionary(tup => tup.Item1, tup => tup.Item2))); 223 224 foreach (var parent in tree.IterateNodesPrefix().ToArray()) { 225 for (int i = 0; i < parent.SubtreeCount; i++) { 226 var varChild = parent.GetSubtree(i) as VariableTreeNode; 227 var factorVarChild = parent.GetSubtree(i) as FactorVariableTreeNode; 228 if (varChild != null && factorSymbol.VariableNames.Contains(varChild.VariableName)) { 229 parent.RemoveSubtree(i); 230 var factorTreeNode = (FactorVariableTreeNode)factorSymbol.CreateTreeNode(); 231 factorTreeNode.VariableName = varChild.VariableName; 232 factorTreeNode.Weights = 233 factorTreeNode.Symbol.GetVariableValues(factorTreeNode.VariableName).Select(_ => 1.0).ToArray(); 234 // weight = 1.0 for each value 235 parent.InsertSubtree(i, factorTreeNode); 236 } else if (factorVarChild != null && factorSymbol.VariableNames.Contains(factorVarChild.VariableName)) { 237 if (factorSymbol.GetVariableValues(factorVarChild.VariableName).Count() != factorVarChild.Weights.Length) 238 throw new ArgumentException( 239 string.Format("Factor variable {0} needs exactly {1} weights", 240 factorVarChild.VariableName, 241 factorSymbol.GetVariableValues(factorVarChild.VariableName).Count())); 242 parent.RemoveSubtree(i); 243 var factorTreeNode = (FactorVariableTreeNode)factorSymbol.CreateTreeNode(); 244 factorTreeNode.VariableName = factorVarChild.VariableName; 245 factorTreeNode.Weights = factorVarChild.Weights; 246 parent.InsertSubtree(i, factorTreeNode); 247 } 248 } 249 } 209 250 210 251 if (!SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree)) throw new ArgumentException("The optimizer does not support the specified model structure."); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs
r14185 r14869 20 20 #endregion 21 21 22 using System.Threading; 22 23 using HeuristicLab.Common; 23 24 using HeuristicLab.Core; … … 132 133 133 134 #region random forest 134 protected override void Run( ) {135 protected override void Run(CancellationToken cancellationToken) { 135 136 double rmsError, relClassificationError, outOfBagRmsError, outOfBagRelClassificationError; 136 137 if (SetSeedRandomly) Seed = new System.Random().Next(); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r14368 r14869 139 139 140 140 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 141 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,AllowedInputVariables, rows);141 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); 142 142 AssertInputMatrix(inputData); 143 143 … … 157 157 158 158 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) { 159 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,AllowedInputVariables, rows);159 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); 160 160 AssertInputMatrix(inputData); 161 161 … … 175 175 176 176 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 177 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset,AllowedInputVariables, rows);177 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); 178 178 AssertInputMatrix(inputData); 179 179 … … 294 294 out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError) { 295 295 var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 296 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset,variables, trainingIndices);296 double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices); 297 297 298 298 alglib.dfreport rep; … … 316 316 317 317 var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 318 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset,variables, trainingIndices);318 double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices); 319 319 320 320 var classValues = problemData.ClassValues.ToArray(); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs
r14185 r14869 20 20 #endregion 21 21 22 using System.Threading; 22 23 using HeuristicLab.Common; 23 24 using HeuristicLab.Core; … … 131 132 132 133 #region random forest 133 protected override void Run( ) {134 protected override void Run(CancellationToken cancellationToken) { 134 135 double rmsError, avgRelError, outOfBagRmsError, outOfBagAvgRelError; 135 136 if (SetSeedRandomly) Seed = new System.Random().Next(); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorClassification.cs
r14185 r14869 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 143 144 144 145 #region support vector classification 145 protected override void Run( ) {146 protected override void Run(CancellationToken cancellationToken) { 146 147 IClassificationProblemData problemData = Problem.ProblemData; 147 148 IEnumerable<string> selectedInputVariables = problemData.AllowedInputVariables; -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorRegression.cs
r14185 r14869 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 151 152 152 153 #region support vector regression 153 protected override void Run( ) {154 protected override void Run(CancellationToken cancellationToken) { 154 155 IRegressionProblemData problemData = Problem.ProblemData; 155 156 IEnumerable<string> selectedInputVariables = problemData.AllowedInputVariables; -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs
r14185 r14869 22 22 using System; 23 23 using System.Linq; 24 using System.Threading; 24 25 using HeuristicLab.Common; 25 26 using HeuristicLab.Core; … … 63 64 } 64 65 65 protected override void Run( ) {66 protected override void Run(CancellationToken cancellationToken) { 66 67 double rmsError, cvRmsError; 67 68 var solution = CreateAutoRegressiveSolution(Problem.ProblemData, TimeOffset, out rmsError, out cvRmsError); … … 114 115 alglib.lrunpack(lm, out coefficients, out nFeatures); 115 116 116 117 ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); 118 ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); 119 tree.Root.AddSubtree(startNode); 120 ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); 121 startNode.AddSubtree(addition); 122 123 for (int i = 0; i < timeOffset; i++) { 124 LaggedVariableTreeNode node = (LaggedVariableTreeNode)new LaggedVariable().CreateTreeNode(); 125 node.VariableName = targetVariable; 126 node.Weight = coefficients[i]; 127 node.Lag = (i + 1) * -1; 128 addition.AddSubtree(node); 129 } 130 131 ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode(); 132 cNode.Value = coefficients[coefficients.Length - 1]; 133 addition.AddSubtree(cNode); 117 var tree = LinearModelToTreeConverter.CreateTree( 118 variableNames: Enumerable.Repeat(problemData.TargetVariable, nFeatures).ToArray(), 119 lags: Enumerable.Range(0, timeOffset).Select(i => (i + 1) * -1).ToArray(), 120 coefficients: coefficients.Take(nFeatures).ToArray(), 121 @const: coefficients[nFeatures] 122 ); 134 123 135 124 var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable); -
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs
r14185 r14869 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; … … 77 78 78 79 #region k-Means clustering 79 protected override void Run( ) {80 protected override void Run(CancellationToken cancellationToken) { 80 81 var solution = CreateKMeansSolution(Problem.ProblemData, K.Value, Restarts.Value); 81 82 Results.Add(new Result(KMeansSolutionResultName, "The k-Means clustering solution.", solution)); … … 89 90 double[,] centers; 90 91 int[] xyc; 91 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,allowedInputVariables, rows);92 double[,] inputMatrix = dataset.ToArray(allowedInputVariables, rows); 92 93 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 93 94 throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset.");
Note: See TracChangeset
for help on using the changeset viewer.