Changeset 14869


Ignore:
Timestamp:
04/14/17 08:58:45 (13 days ago)
Author:
gkronber
Message:

#2699: merged changesets from trunk to branch

Location:
branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
1 deleted
42 edited
2 copied

Legend:

Unmodified
Added
Removed
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4

    • Property svn:mergeinfo set to (toggle deleted branches)
      /stable/HeuristicLab.Algorithms.DataAnalysis/3.4mergedeligible
      /trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4mergedeligible
      /branches/1721-RandomForestPersistence/HeuristicLab.Algorithms.DataAnalysis/3.410321-10322
      /branches/Benchmarking/sources/HeuristicLab.Algorithms.DataAnalysis/3.46917-7005
      /branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.49070-13099
      /branches/CloningRefactoring/HeuristicLab.Algorithms.DataAnalysis/3.44656-4721
      /branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.45471-5808
      /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Algorithms.DataAnalysis/3.45815-6180
      /branches/DataAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.44458-4459,​4462,​4464
      /branches/DataPreprocessing/HeuristicLab.Algorithms.DataAnalysis/3.410085-11101
      /branches/GP.Grammar.Editor/HeuristicLab.Algorithms.DataAnalysis/3.46284-6795
      /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Algorithms.DataAnalysis/3.45060
      /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Algorithms.DataAnalysis/3.411570-12508
      /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Algorithms.DataAnalysis/3.411130-12721
      /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.413819-14091
      /branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.48116-8789
      /branches/LogResidualEvaluator/HeuristicLab.Algorithms.DataAnalysis/3.410202-10483
      /branches/NET40/sources/HeuristicLab.Algorithms.DataAnalysis/3.45138-5162
      /branches/ParallelEngine/HeuristicLab.Algorithms.DataAnalysis/3.45175-5192
      /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Algorithms.DataAnalysis/3.47773-7810
      /branches/QAPAlgorithms/HeuristicLab.Algorithms.DataAnalysis/3.46350-6627
      /branches/Restructure trunk solution/HeuristicLab.Algorithms.DataAnalysis/3.46828
      /branches/SpectralKernelForGaussianProcesses/HeuristicLab.Algorithms.DataAnalysis/3.410204-10479
      /branches/SuccessProgressAnalysis/HeuristicLab.Algorithms.DataAnalysis/3.45370-5682
      /branches/Trunk/HeuristicLab.Algorithms.DataAnalysis/3.46829-6865
      /branches/VNS/HeuristicLab.Algorithms.DataAnalysis/3.45594-5752
      /branches/histogram/HeuristicLab.Algorithms.DataAnalysis/3.45959-6341
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneR.cs

    r14185 r14869  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     25using System.Threading;
    2426using HeuristicLab.Common;
    2527using HeuristicLab.Core;
     
    5860    }
    5961
    60     protected override void Run() {
     62    protected override void Run(CancellationToken cancellationToken) {
    6163      var solution = CreateOneRSolution(Problem.ProblemData, MinBucketSizeParameter.Value.Value);
    6264      Results.Add(new Result("OneR solution", "The 1R classifier.", solution));
     
    6466
    6567    public static IClassificationSolution CreateOneRSolution(IClassificationProblemData problemData, int minBucketSize = 6) {
     68      var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
     69      var model1 = FindBestDoubleVariableModel(problemData, minBucketSize);
     70      var model2 = FindBestFactorModel(problemData);
     71
     72      if (model1 == null && model2 == null) throw new InvalidProgramException("Could not create OneR solution");
     73      else if (model1 == null) return new OneFactorClassificationSolution(model2, (IClassificationProblemData)problemData.Clone());
     74      else if (model2 == null) return new OneRClassificationSolution(model1, (IClassificationProblemData)problemData.Clone());
     75      else {
     76        var model1EstimatedValues = model1.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices);
     77        var model1NumCorrect = classValues.Zip(model1EstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e);
     78
     79        var model2EstimatedValues = model2.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices);
     80        var model2NumCorrect = classValues.Zip(model2EstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e);
     81
     82        if (model1NumCorrect > model2NumCorrect) {
     83          return new OneRClassificationSolution(model1, (IClassificationProblemData)problemData.Clone());
     84        } else {
     85          return new OneFactorClassificationSolution(model2, (IClassificationProblemData)problemData.Clone());
     86        }
     87      }
     88    }
     89
     90    private static OneRClassificationModel FindBestDoubleVariableModel(IClassificationProblemData problemData, int minBucketSize = 6) {
    6691      var bestClassified = 0;
    6792      List<Split> bestSplits = null;
     
    7095      var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
    7196
    72       foreach (var variable in problemData.AllowedInputVariables) {
     97      var allowedInputVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<double>);
     98
     99      if (!allowedInputVariables.Any()) return null;
     100
     101      foreach (var variable in allowedInputVariables) {
    73102        var inputValues = problemData.Dataset.GetDoubleValues(variable, problemData.TrainingIndices);
    74103        var samples = inputValues.Zip(classValues, (i, v) => new Sample(i, v)).OrderBy(s => s.inputValue);
    75104
    76         var missingValuesDistribution = samples.Where(s => double.IsNaN(s.inputValue)).GroupBy(s => s.classValue).ToDictionary(s => s.Key, s => s.Count()).MaxItems(s => s.Value).FirstOrDefault();
     105        var missingValuesDistribution = samples
     106          .Where(s => double.IsNaN(s.inputValue)).GroupBy(s => s.classValue)
     107          .ToDictionary(s => s.Key, s => s.Count())
     108          .MaxItems(s => s.Value)
     109          .FirstOrDefault();
    77110
    78111        //calculate class distributions for all distinct inputValues
     
    119152          while (sample.inputValue >= splits[splitIndex].thresholdValue)
    120153            splitIndex++;
    121           correctClassified += sample.classValue == splits[splitIndex].classValue ? 1 : 0;
     154          correctClassified += sample.classValue.IsAlmost(splits[splitIndex].classValue) ? 1 : 0;
    122155        }
    123156        correctClassified += missingValuesDistribution.Value;
     
    133166      //remove neighboring splits with the same class value
    134167      for (int i = 0; i < bestSplits.Count - 1; i++) {
    135         if (bestSplits[i].classValue == bestSplits[i + 1].classValue) {
     168        if (bestSplits[i].classValue.IsAlmost(bestSplits[i + 1].classValue)) {
    136169          bestSplits.Remove(bestSplits[i]);
    137170          i--;
     
    139172      }
    140173
    141       var model = new OneRClassificationModel(problemData.TargetVariable, bestVariable, bestSplits.Select(s => s.thresholdValue).ToArray(), bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass);
    142       var solution = new OneRClassificationSolution(model, (IClassificationProblemData)problemData.Clone());
    143 
    144       return solution;
     174      var model = new OneRClassificationModel(problemData.TargetVariable, bestVariable,
     175        bestSplits.Select(s => s.thresholdValue).ToArray(),
     176        bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass);
     177
     178      return model;
     179    }
     180    private static OneFactorClassificationModel FindBestFactorModel(IClassificationProblemData problemData) {
     181      var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
     182      var defaultClass = FindMostFrequentClassValue(classValues);
     183      // only select string variables
     184      var allowedInputVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<string>);
     185
     186      if (!allowedInputVariables.Any()) return null;
     187
     188      OneFactorClassificationModel bestModel = null;
     189      var bestModelNumCorrect = 0;
     190
     191      foreach (var variable in allowedInputVariables) {
     192        var variableValues = problemData.Dataset.GetStringValues(variable, problemData.TrainingIndices);
     193        var groupedClassValues = variableValues
     194          .Zip(classValues, (v, c) => new KeyValuePair<string, double>(v, c))
     195          .GroupBy(kvp => kvp.Key)
     196          .ToDictionary(g => g.Key, g => FindMostFrequentClassValue(g.Select(kvp => kvp.Value)));
     197
     198        var model = new OneFactorClassificationModel(problemData.TargetVariable, variable,
     199          groupedClassValues.Select(kvp => kvp.Key).ToArray(), groupedClassValues.Select(kvp => kvp.Value).ToArray(), defaultClass);
     200
     201        var modelEstimatedValues = model.GetEstimatedClassValues(problemData.Dataset, problemData.TrainingIndices);
     202        var modelNumCorrect = classValues.Zip(modelEstimatedValues, (a, b) => a.IsAlmost(b)).Count(e => e);
     203        if (modelNumCorrect > bestModelNumCorrect) {
     204          bestModelNumCorrect = modelNumCorrect;
     205          bestModel = model;
     206        }
     207      }
     208
     209      return bestModel;
     210    }
     211
     212    private static double FindMostFrequentClassValue(IEnumerable<double> classValues) {
     213      return classValues.GroupBy(c => c).OrderByDescending(g => g.Count()).Select(g => g.Key).First();
    145214    }
    146215
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneRClassificationModel.cs

    r14185 r14869  
    3131  [StorableClass]
    3232  [Item("OneR Classification Model", "A model that uses intervals for one variable to determine the class.")]
    33   public class OneRClassificationModel : ClassificationModel {
     33  public sealed class OneRClassificationModel : ClassificationModel {
    3434    public override IEnumerable<string> VariablesUsedForPrediction {
    3535      get { return new[] { Variable }; }
     
    3737
    3838    [Storable]
    39     protected string variable;
     39    private string variable;
    4040    public string Variable {
    4141      get { return variable; }
     
    4343
    4444    [Storable]
    45     protected double[] splits;
     45    private double[] splits;
    4646    public double[] Splits {
    4747      get { return splits; }
     
    4949
    5050    [Storable]
    51     protected double[] classes;
     51    private double[] classes;
    5252    public double[] Classes {
    5353      get { return classes; }
     
    5555
    5656    [Storable]
    57     protected double missingValuesClass;
     57    private double missingValuesClass;
    5858    public double MissingValuesClass {
    5959      get { return missingValuesClass; }
     
    6161
    6262    [StorableConstructor]
    63     protected OneRClassificationModel(bool deserializing) : base(deserializing) { }
    64     protected OneRClassificationModel(OneRClassificationModel original, Cloner cloner)
     63    private OneRClassificationModel(bool deserializing) : base(deserializing) { }
     64    private OneRClassificationModel(OneRClassificationModel original, Cloner cloner)
    6565      : base(original, cloner) {
    6666      this.variable = (string)original.variable;
    6767      this.splits = (double[])original.splits.Clone();
    6868      this.classes = (double[])original.classes.Clone();
     69      this.missingValuesClass = original.missingValuesClass;
    6970    }
    7071    public override IDeepCloneable Clone(Cloner cloner) { return new OneRClassificationModel(this, cloner); }
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneRClassificationSolution.cs

    r14185 r14869  
    2828  [StorableClass]
    2929  [Item(Name = "OneR Classification Solution", Description = "Represents a OneR classification solution which uses only a single feature with potentially multiple thresholds for class prediction.")]
    30   public class OneRClassificationSolution : ClassificationSolution {
     30  public sealed class OneRClassificationSolution : ClassificationSolution {
    3131    public new OneRClassificationModel Model {
    3232      get { return (OneRClassificationModel)base.Model; }
     
    3535
    3636    [StorableConstructor]
    37     protected OneRClassificationSolution(bool deserializing) : base(deserializing) { }
    38     protected OneRClassificationSolution(OneRClassificationSolution original, Cloner cloner) : base(original, cloner) { }
     37    private OneRClassificationSolution(bool deserializing) : base(deserializing) { }
     38    private OneRClassificationSolution(OneRClassificationSolution original, Cloner cloner) : base(original, cloner) { }
    3939    public OneRClassificationSolution(OneRClassificationModel model, IClassificationProblemData problemData)
    4040      : base(model, problemData) {
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/ZeroR.cs

    r14185 r14869  
    2121
    2222using System.Linq;
     23using System.Threading;
    2324using HeuristicLab.Common;
    2425using HeuristicLab.Core;
     
    4950    }
    5051
    51     protected override void Run() {
     52    protected override void Run(CancellationToken cancellationToken) {
    5253      var solution = CreateZeroRSolution(Problem.ProblemData);
    5354      Results.Add(new Result("ZeroR solution", "The simplest possible classifier, ZeroR always predicts the majority class.", solution));
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs

    r14185 r14869  
    451451      var aggregatedResults = new List<IResult>();
    452452      foreach (KeyValuePair<string, List<IClassificationSolution>> solutions in resultSolutions) {
    453         // clone manually to correctly clone references between cloned root objects
    454         Cloner cloner = new Cloner();
    455         var problemDataClone = (IClassificationProblemData)cloner.Clone(Problem.ProblemData);
     453        // at least one algorithm (GBT with logistic regression loss) produces a classification solution even though the original problem is a regression problem.
     454        var targetVariable = solutions.Value.First().ProblemData.TargetVariable;
     455        var problemDataClone = new ClassificationProblemData(Problem.ProblemData.Dataset,
     456          Problem.ProblemData.AllowedInputVariables, targetVariable);
    456457        // set partitions of problem data clone correctly
    457458        problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value;
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/FixedDataAnalysisAlgorithm.cs

    r14185 r14869  
    2121
    2222using System;
    23 using System.Threading;
    24 using System.Threading.Tasks;
    2523using HeuristicLab.Common;
    2624using HeuristicLab.Optimization;
     
    3028namespace HeuristicLab.Algorithms.DataAnalysis {
    3129  [StorableClass]
    32   public abstract class FixedDataAnalysisAlgorithm<T> : Algorithm,
    33     IDataAnalysisAlgorithm<T>,
    34     IStorableContent
    35     where T : class, IDataAnalysisProblem {
    36     public string Filename { get; set; }
    37 
     30  public abstract class FixedDataAnalysisAlgorithm<T> : BasicAlgorithm where T : class, IDataAnalysisProblem {
    3831    #region Properties
    3932    public override Type ProblemType {
     
    4437      set { base.Problem = value; }
    4538    }
    46     [Storable]
    47     private ResultCollection results;
    48     public override ResultCollection Results {
    49       get { return results; }
    50     }
    5139    #endregion
    5240
    53     private DateTime lastUpdateTime;
     41    public override bool SupportsPause { get { return false; } }
    5442
    5543    [StorableConstructor]
    5644    protected FixedDataAnalysisAlgorithm(bool deserializing) : base(deserializing) { }
    57     protected FixedDataAnalysisAlgorithm(FixedDataAnalysisAlgorithm<T> original, Cloner cloner)
    58       : base(original, cloner) {
    59       results = cloner.Clone(original.Results);
    60     }
    61     public FixedDataAnalysisAlgorithm()
    62       : base() {
    63       results = new ResultCollection();
    64     }
    65 
    66     public override void Prepare() {
    67       if (Problem != null) base.Prepare();
    68       results.Clear();
    69       OnPrepared();
    70     }
    71 
    72     public override void Start() {
    73       base.Start();
    74       var cancellationTokenSource = new CancellationTokenSource();
    75 
    76       OnStarted();
    77       Task task = Task.Factory.StartNew(Run, cancellationTokenSource.Token, cancellationTokenSource.Token);
    78       task.ContinueWith(t => {
    79         try {
    80           t.Wait();
    81         }
    82         catch (AggregateException ex) {
    83           try {
    84             ex.Flatten().Handle(x => x is OperationCanceledException);
    85           }
    86           catch (AggregateException remaining) {
    87             if (remaining.InnerExceptions.Count == 1) OnExceptionOccurred(remaining.InnerExceptions[0]);
    88             else OnExceptionOccurred(remaining);
    89           }
    90         }
    91         cancellationTokenSource.Dispose();
    92         cancellationTokenSource = null;
    93         OnStopped();
    94       });
    95     }
    96     private void Run(object state) {
    97       CancellationToken cancellationToken = (CancellationToken)state;
    98       lastUpdateTime = DateTime.UtcNow;
    99       System.Timers.Timer timer = new System.Timers.Timer(250);
    100       timer.AutoReset = true;
    101       timer.Elapsed += new System.Timers.ElapsedEventHandler(timer_Elapsed);
    102       timer.Start();
    103       try {
    104         Run();
    105       }
    106       finally {
    107         timer.Elapsed -= new System.Timers.ElapsedEventHandler(timer_Elapsed);
    108         timer.Stop();
    109         ExecutionTime += DateTime.UtcNow - lastUpdateTime;
    110       }
    111 
    112       cancellationToken.ThrowIfCancellationRequested();
    113     }
    114     protected abstract void Run();
    115     #region Events
    116     protected override void OnProblemChanged() {
    117       Problem.Reset += new EventHandler(Problem_Reset);
    118       base.OnProblemChanged();
    119     }
    120     private void timer_Elapsed(object sender, System.Timers.ElapsedEventArgs e) {
    121       System.Timers.Timer timer = (System.Timers.Timer)sender;
    122       timer.Enabled = false;
    123       DateTime now = DateTime.UtcNow;
    124       ExecutionTime += now - lastUpdateTime;
    125       lastUpdateTime = now;
    126       timer.Enabled = true;
    127     }
    128     #endregion
     45    protected FixedDataAnalysisAlgorithm(FixedDataAnalysisAlgorithm<T> original, Cloner cloner) : base(original, cloner) { }
     46    public FixedDataAnalysisAlgorithm() : base() { }
    12947
    13048  }
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GBM/GradientBoostingRegressionAlgorithm.cs

    r14185 r14869  
    4444  [StorableClass]
    4545  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 350)]
    46   public class GradientBoostingRegressionAlgorithm : BasicAlgorithm {
    47     public override Type ProblemType {
    48       get { return typeof(IRegressionProblem); }
    49     }
    50 
    51     public new IRegressionProblem Problem {
    52       get { return (IRegressionProblem)base.Problem; }
    53       set { base.Problem = value; }
    54     }
     46  public class GradientBoostingRegressionAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    5547
    5648    #region ParameterNames
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessBase.cs

    r14185 r14869  
    2121#endregion
    2222
     23using System.Linq;
    2324using HeuristicLab.Algorithms.GradientDescent;
    2425using HeuristicLab.Common;
     
    119120
    120121      // necessary for BFGS
    121       Parameters.Add(new ValueParameter<BoolValue>("Maximization", new BoolValue(false)));
    122       Parameters["Maximization"].Hidden = true;
     122      Parameters.Add(new FixedValueParameter<BoolValue>("Maximization (BFGS)", new BoolValue(false)));
     123      Parameters["Maximization (BFGS)"].Hidden = true;
    123124
    124125      var randomCreator = new HeuristicLab.Random.RandomCreator();
     
    164165      modelCreator.Successor = updateResults;
    165166
     167      updateResults.MaximizationParameter.ActualName = "Maximization (BFGS)";
    166168      updateResults.StateParameter.ActualName = bfgsInitializer.StateParameter.Name;
    167169      updateResults.QualityParameter.ActualName = NegativeLogLikelihoodParameterName;
     
    197199      // BackwardsCompatibility3.4
    198200      #region Backwards compatible code, remove with 3.5
    199       if (!Parameters.ContainsKey("Maximization")) {
    200         Parameters.Add(new ValueParameter<BoolValue>("Maximization", new BoolValue(false)));
    201         Parameters["Maximization"].Hidden = true;
     201      if (Parameters.ContainsKey("Maximization")) {
     202        Parameters.Remove("Maximization");
     203      }
     204
     205      if (!Parameters.ContainsKey("Maximization (BFGS)")) {
     206        Parameters.Add(new FixedValueParameter<BoolValue>("Maximization (BFGS)", new BoolValue(false)));
     207        Parameters["Maximization (BFGS)"].Hidden = true;
     208        OperatorGraph.Operators.OfType<LbfgsUpdateResults>().First().MaximizationParameter.ActualName = "Maximization BFGS";
    202209      }
    203210
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessClassificationModelCreator.cs

    r14185 r14869  
    6767        HyperparameterGradientsParameter.ActualValue = new RealVector(model.HyperparameterGradients);
    6868        return base.Apply();
    69       } catch (ArgumentException) { } catch (alglib.alglibexception) { }
     69      } catch (ArgumentException) {
     70      } catch (alglib.alglibexception) {
     71      }
    7072      NegativeLogLikelihoodParameter.ActualValue = new DoubleValue(1E300);
    7173      HyperparameterGradientsParameter.ActualValue = new RealVector(Hyperparameter.Count());
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs

    r14185 r14869  
    165165      try {
    166166        CalculateModel(ds, rows, scaleInputs);
    167       }
    168       catch (alglib.alglibexception ae) {
     167      } catch (alglib.alglibexception ae) {
    169168        // wrap exception so that calling code doesn't have to know about alglib implementation
    170169        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
     
    260259    private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
    261260      if (scaling != null) {
    262         return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);
     261        // BackwardsCompatibility3.3
     262        #region Backwards compatible code, remove with 3.4
     263        // TODO: completely remove Scaling class
     264        List<string> variablesList = allowedInputs.ToList();
     265        List<int> rowsList = rows.ToList();
     266
     267        double[,] matrix = new double[rowsList.Count, variablesList.Count];
     268
     269        int col = 0;
     270        foreach (string column in variablesList) {
     271          var values = scaling.GetScaledValues(ds, column, rowsList);
     272          int row = 0;
     273          foreach (var value in values) {
     274            matrix[row, col] = value;
     275            row++;
     276          }
     277          col++;
     278        }
     279        return matrix;
     280        #endregion
    263281      } else {
    264         return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);
     282        return ds.ToArray(allowedInputs, rows);
    265283      }
    266284    }
     
    334352        return Enumerable.Range(0, newN)
    335353          .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));
    336       }
    337       catch (alglib.alglibexception ae) {
     354      } catch (alglib.alglibexception ae) {
    338355        // wrap exception so that calling code doesn't have to know about alglib implementation
    339356        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
     
    381398        }
    382399        return kss;
    383       }
    384       catch (alglib.alglibexception ae) {
     400      } catch (alglib.alglibexception ae) {
    385401        // wrap exception so that calling code doesn't have to know about alglib implementation
    386402        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs

    r14345 r14869  
    2121#endregion
    2222
    23 using System;
    2423using System.Linq;
    2524using System.Threading;
     
    3837  [StorableClass]
    3938  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 125)]
    40   public class GradientBoostedTreesAlgorithm : BasicAlgorithm {
    41     public override Type ProblemType {
    42       get { return typeof(IRegressionProblem); }
    43     }
    44     public new IRegressionProblem Problem {
    45       get { return (IRegressionProblem)base.Problem; }
    46       set { base.Problem = value; }
    47     }
    48 
     39  public class GradientBoostedTreesAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    4940    #region ParameterNames
    5041    private const string IterationsParameterName = "Iterations";
     
    204195      table.Rows.Add(new DataRow("Loss (train)"));
    205196      table.Rows.Add(new DataRow("Loss (test)"));
     197      table.Rows["Loss (train)"].VisualProperties.StartIndexZero = true;
     198      table.Rows["Loss (test)"].VisualProperties.StartIndexZero = true;
     199
    206200      Results.Add(new Result("Qualities", table));
    207201      var curLoss = new DoubleValue();
     
    263257          var classificationProblemData = new ClassificationProblemData(problemData.Dataset,
    264258            problemData.AllowedInputVariables, problemData.TargetVariable, problemData.Transformations);
    265           classificationModel.RecalculateModelParameters(classificationProblemData, classificationProblemData.TrainingIndices);
     259          classificationProblemData.TrainingPartition.Start = Problem.ProblemData.TrainingPartition.Start;
     260          classificationProblemData.TrainingPartition.End = Problem.ProblemData.TrainingPartition.End;
     261          classificationProblemData.TestPartition.Start = Problem.ProblemData.TestPartition.Start;
     262          classificationProblemData.TestPartition.End = Problem.ProblemData.TestPartition.End;
     263
     264          classificationModel.SetThresholdsAndClassValues(new double[] { double.NegativeInfinity, 0.0 }, new[] { 0.0, 1.0 });
     265
    266266
    267267          var classificationSolution = new DiscriminantFunctionClassificationSolution(classificationModel, classificationProblemData);
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithmStatic.cs

    r14185 r14869  
    148148    // for custom stepping & termination
    149149    public static IGbmState CreateGbmState(IRegressionProblemData problemData, ILossFunction lossFunction, uint randSeed, int maxSize = 3, double r = 0.66, double m = 0.5, double nu = 0.01) {
     150      // check input variables. Only double variables are allowed.
     151      var invalidInputs =
     152        problemData.AllowedInputVariables.Where(name => !problemData.Dataset.VariableHasType<double>(name));
     153      if (invalidInputs.Any())
     154        throw new NotSupportedException("Gradient tree boosting only supports real-valued variables. Unsupported inputs: " + string.Join(", ", invalidInputs));
     155
    150156      return new GbmState(problemData, lossFunction, randSeed, maxSize, r, m, nu);
    151157    }
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r14500 r14869  
    189189  </ItemGroup>
    190190  <ItemGroup>
     191    <Compile Include="BaselineClassifiers\OneFactorClassificationModel.cs" />
     192    <Compile Include="BaselineClassifiers\OneFactorClassificationSolution.cs" />
    191193    <Compile Include="BaselineClassifiers\OneR.cs" />
    192194    <Compile Include="BaselineClassifiers\OneRClassificationModel.cs" />
     
    311313      <SubType>Code</SubType>
    312314    </Compile>
    313     <Compile Include="Linear\AlglibUtil.cs" />
    314     <Compile Include="Linear\Scaling.cs" />
    315315    <Compile Include="Linear\LinearDiscriminantAnalysis.cs" />
    316316    <Compile Include="Linear\LinearRegression.cs">
     
    320320    <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" />
    321321    <Compile Include="Linear\MultinomialLogitModel.cs" />
     322    <Compile Include="Linear\Scaling.cs" />
    322323    <Compile Include="MctsSymbolicRegression\Automaton.cs" />
    323324    <Compile Include="MctsSymbolicRegression\CodeGenerator.cs" />
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    3637  /// Linear discriminant analysis classification algorithm.
    3738  /// </summary>
    38   [Item("Linear Discriminant Analysis", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")]
     39  [Item("Linear Discriminant Analysis (LDA)", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")]
    3940  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 100)]
    4041  [StorableClass]
     
    5960
    6061    #region Fisher LDA
    61     protected override void Run() {
     62    protected override void Run(CancellationToken cancellationToken) {
    6263      var solution = CreateLinearDiscriminantAnalysisSolution(Problem.ProblemData);
    6364      Results.Add(new Result(LinearDiscriminantAnalysisSolutionResultName, "The linear discriminant analysis.", solution));
     
    7071      IEnumerable<int> rows = problemData.TrainingIndices;
    7172      int nClasses = problemData.ClassNames.Count();
    72       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     73      var doubleVariableNames = allowedInputVariables.Where(dataset.VariableHasType<double>).ToArray();
     74      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>).ToArray();
     75      double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows);
     76
     77      var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
     78      var factorMatrix = dataset.ToArray(factorVariables, rows);
     79
     80      inputMatrix = factorMatrix.HorzCat(inputMatrix);
     81
    7382      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7483        throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset.");
     
    8291      int info;
    8392      double[] w;
    84       alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), allowedInputVariables.Count(), nClasses, out info, out w);
     93      alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1) - 1, nClasses, out info, out w);
    8594      if (info < 1) throw new ArgumentException("Error in calculation of linear discriminant analysis solution");
    8695
    87       ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
    88       ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
    89       tree.Root.AddSubtree(startNode);
    90       ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
    91       startNode.AddSubtree(addition);
     96      var nFactorCoeff = factorMatrix.GetLength(1);
     97      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, w.Take(nFactorCoeff).ToArray(),
     98        doubleVariableNames, w.Skip(nFactorCoeff).Take(doubleVariableNames.Length).ToArray());
    9299
    93       int col = 0;
    94       foreach (string column in allowedInputVariables) {
    95         VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
    96         vNode.VariableName = column;
    97         vNode.Weight = w[col];
    98         addition.AddSubtree(vNode);
    99         col++;
    100       }
    101 
    102       var model = LinearDiscriminantAnalysis.CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter(), problemData, rows);
     100      var model = CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter(), problemData, rows);
    103101      SymbolicDiscriminantFunctionClassificationSolution solution = new SymbolicDiscriminantFunctionClassificationSolution(model, (IClassificationProblemData)problemData.Clone());
    104102
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
    2728using HeuristicLab.Data;
    28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2929using HeuristicLab.Optimization;
    3030using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     
    6060
    6161    #region linear regression
    62     protected override void Run() {
     62    protected override void Run(CancellationToken cancellationToken) {
    6363      double rmsError, cvRmsError;
    6464      var solution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     
    7373      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    7474      IEnumerable<int> rows = problemData.TrainingIndices;
    75       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     75      var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
     76      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
     77      var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
     78      double[,] binaryMatrix = dataset.ToArray(factorVariables, rows);
     79      double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
     80      var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
     81
    7682      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7783        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     
    9197      alglib.lrunpack(lm, out coefficients, out nFeatures);
    9298
    93       ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
    94       ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
    95       tree.Root.AddSubtree(startNode);
    96       ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
    97       startNode.AddSubtree(addition);
    98 
    99       int col = 0;
    100       foreach (string column in allowedInputVariables) {
    101         VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
    102         vNode.VariableName = column;
    103         vNode.Weight = coefficients[col];
    104         addition.AddSubtree(vNode);
    105         col++;
    106       }
    107 
    108       ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode();
    109       cNode.Value = coefficients[coefficients.Length - 1];
    110       addition.AddSubtree(cNode);
    111 
    112       SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone());
     99      int nFactorCoeff = binaryMatrix.GetLength(1);
     100      int nVarCoeff = doubleVariables.Count();
     101      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
     102        doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
     103        @const: coefficients[nFeatures]);
     104     
     105      SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());
    113106      solution.Model.Name = "Linear Regression Model";
    114107      solution.Name = "Linear Regression Solution";
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    5758
    5859    #region logit classification
    59     protected override void Run() {
     60    protected override void Run(CancellationToken cancellationToken) {
    6061      double rmsError, relClassError;
    6162      var solution = CreateLogitClassificationSolution(Problem.ProblemData, out rmsError, out relClassError);
     
    6869      var dataset = problemData.Dataset;
    6970      string targetVariable = problemData.TargetVariable;
    70       IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
     71      var doubleVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<double>);
     72      var factorVariableNames = problemData.AllowedInputVariables.Where(dataset.VariableHasType<string>);
    7173      IEnumerable<int> rows = problemData.TrainingIndices;
    72       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     74      double[,] inputMatrix = dataset.ToArray(doubleVariableNames.Concat(new string[] { targetVariable }), rows);
     75
     76      var factorVariableValues = dataset.GetFactorVariableValues(factorVariableNames, rows);
     77      var factorMatrix = dataset.ToArray(factorVariableValues, rows);
     78      inputMatrix = factorMatrix.HorzCat(inputMatrix);
     79
    7380      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7481        throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset.");
     
    95102      relClassError = alglib.mnlrelclserror(lm, inputMatrix, nRows);
    96103
    97       MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, allowedInputVariables, classValues), (IClassificationProblemData)problemData.Clone());
     104      MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, doubleVariableNames, factorVariableValues, classValues), (IClassificationProblemData)problemData.Clone());
    98105      return solution;
    99106    }
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassificationSolution.cs

    r14185 r14869  
    4343      : base(original, cloner) {
    4444    }
    45     public MultinomialLogitClassificationSolution( MultinomialLogitModel logitModel,IClassificationProblemData problemData)
     45    public MultinomialLogitClassificationSolution(MultinomialLogitModel logitModel, IClassificationProblemData problemData)
    4646      : base(logitModel, problemData) {
    4747    }
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs

    r14185 r14869  
    5656    [Storable]
    5757    private double[] classValues;
     58    [Storable]
     59    private List<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     60
    5861    [StorableConstructor]
    5962    private MultinomialLogitModel(bool deserializing)
     
    6871      allowedInputVariables = (string[])original.allowedInputVariables.Clone();
    6972      classValues = (double[])original.classValues.Clone();
     73      this.factorVariables = original.factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList();
    7074    }
    71     public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues)
     75    public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> doubleInputVariables, IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables, double[] classValues)
    7276      : base(targetVariable) {
    7377      this.name = ItemName;
    7478      this.description = ItemDescription;
    7579      this.logitModel = logitModel;
    76       this.allowedInputVariables = allowedInputVariables.ToArray();
     80      this.allowedInputVariables = doubleInputVariables.ToArray();
     81      this.factorVariables = factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList();
    7782      this.classValues = (double[])classValues.Clone();
     83    }
     84
     85    [StorableHook(HookType.AfterDeserialization)]
     86    private void AfterDeserialization() {
     87      // BackwardsCompatibility3.3
     88      #region Backwards compatible code, remove with 3.4
     89      factorVariables = new List<KeyValuePair<string, IEnumerable<string>>>();
     90      #endregion
    7891    }
    7992
     
    8396
    8497    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    85       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     98
     99      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
     100      double[,] factorData = dataset.ToArray(factorVariables, rows);
     101
     102      inputData = factorData.HorzCat(inputData);
    86103
    87104      int n = inputData.GetLength(0);
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs

    r14185 r14869  
    2929
    3030namespace HeuristicLab.Algorithms.DataAnalysis {
     31  [Obsolete("Use transformation classes in Problems.DataAnalysis instead")]
    3132  [StorableClass]
    3233  [Item(Name = "Scaling", Description = "Contains information about scaling of variables for data-analysis algorithms.")]
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionAlgorithm.cs

    r14185 r14869  
    3838  [StorableClass]
    3939  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)]
    40   public class MctsSymbolicRegressionAlgorithm : BasicAlgorithm {
    41     public override Type ProblemType {
    42       get { return typeof(IRegressionProblem); }
    43     }
    44     public new IRegressionProblem Problem {
    45       get { return (IRegressionProblem)base.Problem; }
    46       set { base.Problem = value; }
    47     }
     40  public class MctsSymbolicRegressionAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    4841
    4942    #region ParameterNames
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/LdaInitializer.cs

    r14185 r14869  
    4444      var attributes = data.AllowedInputVariables.Count();
    4545
    46       var ldaDs = AlglibUtil.PrepareInputMatrix(data.Dataset,
    47                                                 data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()),
    48                                                 data.TrainingIndices);
     46      var ldaDs = data.Dataset.ToArray(
     47                                       data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()),
     48                                       data.TrainingIndices);
    4949
    5050      // map class values to sequential natural numbers (required by alglib)
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/PcaInitializer.cs

    r14185 r14869  
    4444      var attributes = data.AllowedInputVariables.Count();
    4545
    46       var pcaDs = AlglibUtil.PrepareInputMatrix(data.Dataset, data.AllowedInputVariables, data.TrainingIndices);
     46      var pcaDs = data.Dataset.ToArray(data.AllowedInputVariables, data.TrainingIndices);
    4747
    4848      int info;
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaGradientCalculator.cs

    r14185 r14869  
    9999      }
    100100
    101       var data = AlglibUtil.PrepareInputMatrix(problemData.Dataset, problemData.AllowedInputVariables,
    102                                                problemData.TrainingIndices);
     101      var data = problemData.Dataset.ToArray(problemData.AllowedInputVariables,
     102                                             problemData.TrainingIndices);
    103103      var classes = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
    104104
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs

    r14185 r14869  
    8686
    8787    public double[,] Reduce(IDataset dataset, IEnumerable<int> rows) {
    88       var data = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     88      var data = dataset.ToArray(allowedInputVariables, rows);
    8989
    9090      var targets = dataset.GetDoubleValues(TargetVariable, rows).ToArray();
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs

    r14235 r14869  
    2222using System;
    2323using System.Linq;
     24using System.Threading;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
     
    9192
    9293    #region nearest neighbour
    93     protected override void Run() {
     94    protected override void Run(CancellationToken cancellationToken) {
    9495      double[] weights = null;
    9596      if (Weights != null) weights = Weights.CloneAsArray();
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r14322 r14869  
    119119      if (IsCompatibilityLoaded) {
    120120        // no scaling
    121         inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,
     121        inputMatrix = dataset.ToArray(
    122122          this.allowedInputVariables.Concat(new string[] { targetVariable }),
    123123          rows);
     
    144144      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    145145        throw new NotSupportedException(
    146           "Nearest neighbour classification does not support NaN or infinity values in the input dataset.");
     146          "Nearest neighbour model does not support NaN or infinity values in the input dataset.");
    147147
    148148      this.kdTree = new alglib.nearestneighbor.kdtree();
     
    167167
    168168    private static double[,] CreateScaledData(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, double[] offsets, double[] factors) {
    169       var x = new double[rows.Count(), variables.Count()];
    170       var colIdx = 0;
    171       foreach (var variableName in variables) {
    172         var rowIdx = 0;
    173         foreach (var val in dataset.GetDoubleValues(variableName, rows)) {
    174           x[rowIdx, colIdx] = (val + offsets[colIdx]) * factors[colIdx];
    175           rowIdx++;
    176         }
    177         colIdx++;
    178       }
    179       return x;
     169      var transforms =
     170        variables.Select(
     171          (_, colIdx) =>
     172            new LinearTransformation(variables) { Addend = offsets[colIdx] * factors[colIdx], Multiplier = factors[colIdx] });
     173      return dataset.ToArray(variables, transforms, rows);
    180174    }
    181175
     
    187181      double[,] inputData;
    188182      if (IsCompatibilityLoaded) {
    189         inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     183        inputData = dataset.ToArray(allowedInputVariables, rows);
    190184      } else {
    191185        inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
     
    223217      double[,] inputData;
    224218      if (IsCompatibilityLoaded) {
    225         inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     219        inputData = dataset.ToArray(allowedInputVariables, rows);
    226220      } else {
    227221        inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs

    r14235 r14869  
    2121
    2222using System;
     23using System.Threading;
    2324using HeuristicLab.Common;
    2425using HeuristicLab.Core;
     
    9293
    9394    #region nearest neighbour
    94     protected override void Run() {
     95    protected override void Run(CancellationToken cancellationToken) {
    9596      double[] weights = null;
    9697      if (Weights != null) weights = Weights.CloneAsArray();
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    168169
    169170    #region neural network
    170     protected override void Run() {
     171    protected override void Run(CancellationToken cancellationToken) {
    171172      double rmsError, avgRelError, relClassError;
    172173      var solution = CreateNeuralNetworkClassificationSolution(Problem.ProblemData, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError, out relClassError);
     
    183184      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    184185      IEnumerable<int> rows = problemData.TrainingIndices;
    185       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     186      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    186187      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    187188        throw new NotSupportedException("Neural network classification does not support NaN or infinity values in the input dataset.");
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    124125    public NeuralNetworkEnsembleClassification()
    125126      : base() {
    126       var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 
    127         (IntValue)new IntValue(0).AsReadOnly(), 
    128         (IntValue)new IntValue(1).AsReadOnly(), 
     127      var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
     128        (IntValue)new IntValue(0).AsReadOnly(),
     129        (IntValue)new IntValue(1).AsReadOnly(),
    129130        (IntValue)new IntValue(2).AsReadOnly() });
    130131      var selectedHiddenLayerValue = (from v in validHiddenLayerValues
     
    154155
    155156    #region neural network ensemble
    156     protected override void Run() {
     157    protected override void Run(CancellationToken cancellationToken) {
    157158      double rmsError, avgRelError, relClassError;
    158159      var solution = CreateNeuralNetworkEnsembleClassificationSolution(Problem.ProblemData, EnsembleSize, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError, out relClassError);
     
    169170      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    170171      IEnumerable<int> rows = problemData.TrainingIndices;
    171       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     172      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    172173      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    173174        throw new NotSupportedException("Neural network ensemble classification does not support NaN or infinity values in the input dataset.");
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs

    r14185 r14869  
    9191
    9292    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    93       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     93      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
    9494
    9595      int n = inputData.GetLength(0);
     
    108108
    109109    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    110       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     110      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
    111111
    112112      int n = inputData.GetLength(0);
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    154155
    155156    #region neural network ensemble
    156     protected override void Run() {
     157    protected override void Run(CancellationToken cancellationToken) {
    157158      double rmsError, avgRelError;
    158159      var solution = CreateNeuralNetworkEnsembleRegressionSolution(Problem.ProblemData, EnsembleSize, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError);
     
    168169      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    169170      IEnumerable<int> rows = problemData.TrainingIndices;
    170       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     171      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    171172      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    172173        throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset.");
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs

    r14185 r14869  
    9595
    9696    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    97       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     97      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
    9898
    9999      int n = inputData.GetLength(0);
     
    112112
    113113    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    114       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     114      double[,] inputData = dataset.ToArray( allowedInputVariables, rows);
    115115
    116116      int n = inputData.GetLength(0);
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    170171
    171172    #region neural network
    172     protected override void Run() {
     173    protected override void Run(CancellationToken cancellationToken) {
    173174      double rmsError, avgRelError;
    174175      var solution = CreateNeuralNetworkRegressionSolution(Problem.ProblemData, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError);
     
    184185      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    185186      IEnumerable<int> rows = problemData.TrainingIndices;
    186       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     187      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    187188      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    188189        throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset.");
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/NonlinearRegression/NonlinearRegression.cs

    r14319 r14869  
    2121
    2222using System;
     23using System.Collections.Generic;
    2324using System.Linq;
     25using System.Threading;
    2426using HeuristicLab.Analysis;
    2527using HeuristicLab.Common;
     
    157159
    158160    #region nonlinear regression
    159     protected override void Run() {
     161    protected override void Run(CancellationToken cancellationToken) {
    160162      IRegressionSolution bestSolution = null;
    161163      if (InitializeParametersRandomly) {
     
    207209      var parser = new InfixExpressionParser();
    208210      var tree = parser.Parse(modelStructure);
     211      // parser handles double and string variables equally by creating a VariableTreeNode
     212      // post-process to replace VariableTreeNodes by FactorVariableTreeNodes for all string variables
     213      var factorSymbol = new FactorVariable();
     214      factorSymbol.VariableNames =
     215        problemData.AllowedInputVariables.Where(name => problemData.Dataset.VariableHasType<string>(name));
     216      factorSymbol.AllVariableNames = factorSymbol.VariableNames;
     217      factorSymbol.VariableValues =
     218        factorSymbol.VariableNames.Select(name =>
     219        new KeyValuePair<string, Dictionary<string, int>>(name,
     220        problemData.Dataset.GetReadOnlyStringValues(name).Distinct()
     221        .Select((n, i) => Tuple.Create(n, i))
     222        .ToDictionary(tup => tup.Item1, tup => tup.Item2)));
     223
     224      foreach (var parent in tree.IterateNodesPrefix().ToArray()) {
     225        for (int i = 0; i < parent.SubtreeCount; i++) {
     226          var varChild = parent.GetSubtree(i) as VariableTreeNode;
     227          var factorVarChild = parent.GetSubtree(i) as FactorVariableTreeNode;
     228          if (varChild != null && factorSymbol.VariableNames.Contains(varChild.VariableName)) {
     229            parent.RemoveSubtree(i);
     230            var factorTreeNode = (FactorVariableTreeNode)factorSymbol.CreateTreeNode();
     231            factorTreeNode.VariableName = varChild.VariableName;
     232            factorTreeNode.Weights =
     233              factorTreeNode.Symbol.GetVariableValues(factorTreeNode.VariableName).Select(_ => 1.0).ToArray();
     234            // weight = 1.0 for each value
     235            parent.InsertSubtree(i, factorTreeNode);
     236          } else if (factorVarChild != null && factorSymbol.VariableNames.Contains(factorVarChild.VariableName)) {
     237            if (factorSymbol.GetVariableValues(factorVarChild.VariableName).Count() != factorVarChild.Weights.Length)
     238              throw new ArgumentException(
     239                string.Format("Factor variable {0} needs exactly {1} weights",
     240                factorVarChild.VariableName,
     241                factorSymbol.GetVariableValues(factorVarChild.VariableName).Count()));
     242            parent.RemoveSubtree(i);
     243            var factorTreeNode = (FactorVariableTreeNode)factorSymbol.CreateTreeNode();
     244            factorTreeNode.VariableName = factorVarChild.VariableName;
     245            factorTreeNode.Weights = factorVarChild.Weights;
     246            parent.InsertSubtree(i, factorTreeNode);
     247          }
     248        }
     249      }
    209250
    210251      if (!SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree)) throw new ArgumentException("The optimizer does not support the specified model structure.");
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs

    r14185 r14869  
    2020#endregion
    2121
     22using System.Threading;
    2223using HeuristicLab.Common;
    2324using HeuristicLab.Core;
     
    132133
    133134    #region random forest
    134     protected override void Run() {
     135    protected override void Run(CancellationToken cancellationToken) {
    135136      double rmsError, relClassificationError, outOfBagRmsError, outOfBagRelClassificationError;
    136137      if (SetSeedRandomly) Seed = new System.Random().Next();
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs

    r14368 r14869  
    139139
    140140    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    141       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
     141      double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
    142142      AssertInputMatrix(inputData);
    143143
     
    157157
    158158    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
    159       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
     159      double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
    160160      AssertInputMatrix(inputData);
    161161
     
    175175
    176176    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    177       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
     177      double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
    178178      AssertInputMatrix(inputData);
    179179
     
    294294      out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError) {
    295295      var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
    296       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices);
     296      double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);
    297297
    298298      alglib.dfreport rep;
     
    316316
    317317      var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
    318       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices);
     318      double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);
    319319
    320320      var classValues = problemData.ClassValues.ToArray();
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs

    r14185 r14869  
    2020#endregion
    2121
     22using System.Threading;
    2223using HeuristicLab.Common;
    2324using HeuristicLab.Core;
     
    131132
    132133    #region random forest
    133     protected override void Run() {
     134    protected override void Run(CancellationToken cancellationToken) {
    134135      double rmsError, avgRelError, outOfBagRmsError, outOfBagAvgRelError;
    135136      if (SetSeedRandomly) Seed = new System.Random().Next();
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorClassification.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    143144
    144145    #region support vector classification
    145     protected override void Run() {
     146    protected override void Run(CancellationToken cancellationToken) {
    146147      IClassificationProblemData problemData = Problem.ProblemData;
    147148      IEnumerable<string> selectedInputVariables = problemData.AllowedInputVariables;
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorRegression.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    151152
    152153    #region support vector regression
    153     protected override void Run() {
     154    protected override void Run(CancellationToken cancellationToken) {
    154155      IRegressionProblemData problemData = Problem.ProblemData;
    155156      IEnumerable<string> selectedInputVariables = problemData.AllowedInputVariables;
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs

    r14185 r14869  
    2222using System;
    2323using System.Linq;
     24using System.Threading;
    2425using HeuristicLab.Common;
    2526using HeuristicLab.Core;
     
    6364    }
    6465
    65     protected override void Run() {
     66    protected override void Run(CancellationToken cancellationToken) {
    6667      double rmsError, cvRmsError;
    6768      var solution = CreateAutoRegressiveSolution(Problem.ProblemData, TimeOffset, out rmsError, out cvRmsError);
     
    114115      alglib.lrunpack(lm, out coefficients, out nFeatures);
    115116
    116 
    117       ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
    118       ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
    119       tree.Root.AddSubtree(startNode);
    120       ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
    121       startNode.AddSubtree(addition);
    122 
    123       for (int i = 0; i < timeOffset; i++) {
    124         LaggedVariableTreeNode node = (LaggedVariableTreeNode)new LaggedVariable().CreateTreeNode();
    125         node.VariableName = targetVariable;
    126         node.Weight = coefficients[i];
    127         node.Lag = (i + 1) * -1;
    128         addition.AddSubtree(node);
    129       }
    130 
    131       ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode();
    132       cNode.Value = coefficients[coefficients.Length - 1];
    133       addition.AddSubtree(cNode);
     117      var tree = LinearModelToTreeConverter.CreateTree(
     118        variableNames: Enumerable.Repeat(problemData.TargetVariable, nFeatures).ToArray(),
     119        lags: Enumerable.Range(0, timeOffset).Select(i => (i + 1) * -1).ToArray(),
     120        coefficients: coefficients.Take(nFeatures).ToArray(),
     121        @const: coefficients[nFeatures]
     122        );
    134123
    135124      var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable);
  • branches/RBFRegression/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs

    r14185 r14869  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25using System.Threading;
    2526using HeuristicLab.Common;
    2627using HeuristicLab.Core;
     
    7778
    7879    #region k-Means clustering
    79     protected override void Run() {
     80    protected override void Run(CancellationToken cancellationToken) {
    8081      var solution = CreateKMeansSolution(Problem.ProblemData, K.Value, Restarts.Value);
    8182      Results.Add(new Result(KMeansSolutionResultName, "The k-Means clustering solution.", solution));
     
    8990      double[,] centers;
    9091      int[] xyc;
    91       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     92      double[,] inputMatrix = dataset.ToArray(allowedInputVariables, rows);
    9293      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    9394        throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset.");
Note: See TracChangeset for help on using the changeset viewer.