Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/17/16 15:41:33 (8 years ago)
Author:
gkronber
Message:

#2697: reverse merge of r14378, r14390, r14391, r14393, r14394, r14396

Location:
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
21 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs

    r14393 r14400  
    165165      try {
    166166        CalculateModel(ds, rows, scaleInputs);
    167       } catch (alglib.alglibexception ae) {
     167      }
     168      catch (alglib.alglibexception ae) {
    168169        // wrap exception so that calling code doesn't have to know about alglib implementation
    169170        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
     
    259260    private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
    260261      if (scaling != null) {
    261         // TODO: completely remove Scaling class
    262         List<ITransformation<double>> transformations = new List<ITransformation<double>>();
    263 
    264         foreach (var varName in allowedInputs) {
    265           double min;
    266           double max;
    267           scaling.GetScalingParameters(varName, out min, out max);
    268           var add = -min / (max - min);
    269           var mult = 1.0 / (max - min);
    270           transformations.Add(new LinearTransformation(allowedInputs) { Addend = add, Multiplier = mult });
    271         }
    272         return ds.ToArray(allowedInputs, transformations, rows);
     262        return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);
    273263      } else {
    274         return ds.ToArray(allowedInputs, rows);
     264        return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);
    275265      }
    276266    }
     
    344334        return Enumerable.Range(0, newN)
    345335          .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));
    346       } catch (alglib.alglibexception ae) {
     336      }
     337      catch (alglib.alglibexception ae) {
    347338        // wrap exception so that calling code doesn't have to know about alglib implementation
    348339        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
     
    390381        }
    391382        return kss;
    392       } catch (alglib.alglibexception ae) {
     383      }
     384      catch (alglib.alglibexception ae) {
    393385        // wrap exception so that calling code doesn't have to know about alglib implementation
    394386        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r14393 r14400  
    244244      <SubType>Code</SubType>
    245245    </Compile>
     246    <Compile Include="Linear\AlglibUtil.cs" />
     247    <Compile Include="Linear\Scaling.cs" />
    246248    <Compile Include="Linear\LinearDiscriminantAnalysis.cs" />
    247249    <Compile Include="Linear\LinearRegression.cs">
     
    251253    <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" />
    252254    <Compile Include="Linear\MultinomialLogitModel.cs" />
    253     <Compile Include="Linear\Scaling.cs" />
    254255    <Compile Include="MctsSymbolicRegression\Automaton.cs" />
    255256    <Compile Include="MctsSymbolicRegression\CodeGenerator.cs" />
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs

    r14393 r14400  
    7070      IEnumerable<int> rows = problemData.TrainingIndices;
    7171      int nClasses = problemData.ClassNames.Count();
    72       double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     72      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    7373      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7474        throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r14393 r14400  
    7373      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    7474      IEnumerable<int> rows = problemData.TrainingIndices;
    75       double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     75      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    7676      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7777        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     
    8181      int nRows = inputMatrix.GetLength(0);
    8282      int nFeatures = inputMatrix.GetLength(1) - 1;
    83       double[] coefficients;
     83      double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
    8484
    8585      int retVal = 1;
     
    9191      alglib.lrunpack(lm, out coefficients, out nFeatures);
    9292
    93       var tree = LinearModelToTreeConverter.CreateTree(allowedInputVariables.ToArray(),
    94         coefficients.Take(nFeatures).ToArray(), @const: coefficients[nFeatures]);
     93      ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
     94      ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
     95      tree.Root.AddSubtree(startNode);
     96      ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
     97      startNode.AddSubtree(addition);
     98
     99      int col = 0;
     100      foreach (string column in allowedInputVariables) {
     101        VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
     102        vNode.VariableName = column;
     103        vNode.Weight = coefficients[col];
     104        addition.AddSubtree(vNode);
     105        col++;
     106      }
     107
     108      ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode();
     109      cNode.Value = coefficients[coefficients.Length - 1];
     110      addition.AddSubtree(cNode);
    95111
    96112      SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone());
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs

    r14393 r14400  
    7070      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    7171      IEnumerable<int> rows = problemData.TrainingIndices;
    72       double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     72      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    7373      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7474        throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs

    r14393 r14400  
    8383
    8484    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    85       double[,] inputData = dataset.ToArray( allowedInputVariables, rows);
     85      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    8686
    8787      int n = inputData.GetLength(0);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs

    r14393 r14400  
    2929
    3030namespace HeuristicLab.Algorithms.DataAnalysis {
    31   [Obsolete("Use transformation classes in Problems.DataAnalysis instead")]
    3231  [StorableClass]
    3332  [Item(Name = "Scaling", Description = "Contains information about scaling of variables for data-analysis algorithms.")]
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/LdaInitializer.cs

    r14393 r14400  
    4444      var attributes = data.AllowedInputVariables.Count();
    4545
    46       var ldaDs = data.Dataset.ToArray(
    47                                        data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()),
    48                                        data.TrainingIndices);
     46      var ldaDs = AlglibUtil.PrepareInputMatrix(data.Dataset,
     47                                                data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()),
     48                                                data.TrainingIndices);
    4949
    5050      // map class values to sequential natural numbers (required by alglib)
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/PcaInitializer.cs

    r14393 r14400  
    4444      var attributes = data.AllowedInputVariables.Count();
    4545
    46       var pcaDs = data.Dataset.ToArray(data.AllowedInputVariables, data.TrainingIndices);
     46      var pcaDs = AlglibUtil.PrepareInputMatrix(data.Dataset, data.AllowedInputVariables, data.TrainingIndices);
    4747
    4848      int info;
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaGradientCalculator.cs

    r14393 r14400  
    9999      }
    100100
    101       var data = problemData.Dataset.ToArray(problemData.AllowedInputVariables,
    102                                              problemData.TrainingIndices);
     101      var data = AlglibUtil.PrepareInputMatrix(problemData.Dataset, problemData.AllowedInputVariables,
     102                                               problemData.TrainingIndices);
    103103      var classes = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
    104104
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs

    r14393 r14400  
    8686
    8787    public double[,] Reduce(IDataset dataset, IEnumerable<int> rows) {
    88       var data = dataset.ToArray(allowedInputVariables, rows);
     88      var data = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    8989
    9090      var targets = dataset.GetDoubleValues(TargetVariable, rows).ToArray();
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r14393 r14400  
    119119      if (IsCompatibilityLoaded) {
    120120        // no scaling
    121         inputMatrix = dataset.ToArray(
     121        inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,
    122122          this.allowedInputVariables.Concat(new string[] { targetVariable }),
    123123          rows);
     
    167167
    168168    private static double[,] CreateScaledData(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, double[] offsets, double[] factors) {
    169       var transforms =
    170         variables.Select(
    171           (_, colIdx) =>
    172             new LinearTransformation(variables) { Addend = offsets[colIdx] * factors[colIdx], Multiplier = factors[colIdx] });
    173       return dataset.ToArray(variables, transforms, rows);
     169      var x = new double[rows.Count(), variables.Count()];
     170      var colIdx = 0;
     171      foreach (var variableName in variables) {
     172        var rowIdx = 0;
     173        foreach (var val in dataset.GetDoubleValues(variableName, rows)) {
     174          x[rowIdx, colIdx] = (val + offsets[colIdx]) * factors[colIdx];
     175          rowIdx++;
     176        }
     177        colIdx++;
     178      }
     179      return x;
    174180    }
    175181
     
    181187      double[,] inputData;
    182188      if (IsCompatibilityLoaded) {
    183         inputData = dataset.ToArray(allowedInputVariables, rows);
     189        inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    184190      } else {
    185191        inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
     
    217223      double[,] inputData;
    218224      if (IsCompatibilityLoaded) {
    219         inputData = dataset.ToArray(allowedInputVariables, rows);
     225        inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    220226      } else {
    221227        inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs

    r14393 r14400  
    183183      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    184184      IEnumerable<int> rows = problemData.TrainingIndices;
    185       double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     185      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    186186      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    187187        throw new NotSupportedException("Neural network classification does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs

    r14393 r14400  
    124124    public NeuralNetworkEnsembleClassification()
    125125      : base() {
    126       var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
    127         (IntValue)new IntValue(0).AsReadOnly(),
    128         (IntValue)new IntValue(1).AsReadOnly(),
     126      var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 
     127        (IntValue)new IntValue(0).AsReadOnly(), 
     128        (IntValue)new IntValue(1).AsReadOnly(), 
    129129        (IntValue)new IntValue(2).AsReadOnly() });
    130130      var selectedHiddenLayerValue = (from v in validHiddenLayerValues
     
    169169      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    170170      IEnumerable<int> rows = problemData.TrainingIndices;
    171       double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     171      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    172172      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    173173        throw new NotSupportedException("Neural network ensemble classification does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs

    r14393 r14400  
    9191
    9292    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    93       double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
     93      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    9494
    9595      int n = inputData.GetLength(0);
     
    108108
    109109    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    110       double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
     110      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    111111
    112112      int n = inputData.GetLength(0);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs

    r14393 r14400  
    168168      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    169169      IEnumerable<int> rows = problemData.TrainingIndices;
    170       double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     170      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    171171      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    172172        throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs

    r14393 r14400  
    9595
    9696    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    97       double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
     97      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    9898
    9999      int n = inputData.GetLength(0);
     
    112112
    113113    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    114       double[,] inputData = dataset.ToArray( allowedInputVariables, rows);
     114      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    115115
    116116      int n = inputData.GetLength(0);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs

    r14393 r14400  
    184184      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    185185      IEnumerable<int> rows = problemData.TrainingIndices;
    186       double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     186      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    187187      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    188188        throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs

    r14393 r14400  
    139139
    140140    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    141       double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
     141      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
    142142      AssertInputMatrix(inputData);
    143143
     
    157157
    158158    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
    159       double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
     159      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
    160160      AssertInputMatrix(inputData);
    161161
     
    175175
    176176    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    177       double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
     177      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
    178178      AssertInputMatrix(inputData);
    179179
     
    294294      out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError) {
    295295      var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
    296       double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);
     296      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices);
    297297
    298298      alglib.dfreport rep;
     
    316316
    317317      var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
    318       double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);
     318      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices);
    319319
    320320      var classValues = problemData.ClassValues.ToArray();
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs

    r14391 r14400  
    114114      alglib.lrunpack(lm, out coefficients, out nFeatures);
    115115
    116       var tree = LinearModelToTreeConverter.CreateTree(
    117         variableNames: Enumerable.Repeat(problemData.TargetVariable, nFeatures).ToArray(),
    118         lags: Enumerable.Range(0, timeOffset).Select(i => (i + 1) * -1).ToArray(),
    119         coefficients: coefficients.Take(nFeatures).ToArray(),
    120         @const: coefficients[nFeatures]
    121         );
     116
     117      ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
     118      ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();
     119      tree.Root.AddSubtree(startNode);
     120      ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();
     121      startNode.AddSubtree(addition);
     122
     123      for (int i = 0; i < timeOffset; i++) {
     124        LaggedVariableTreeNode node = (LaggedVariableTreeNode)new LaggedVariable().CreateTreeNode();
     125        node.VariableName = targetVariable;
     126        node.Weight = coefficients[i];
     127        node.Lag = (i + 1) * -1;
     128        addition.AddSubtree(node);
     129      }
     130
     131      ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode();
     132      cNode.Value = coefficients[coefficients.Length - 1];
     133      addition.AddSubtree(cNode);
    122134
    123135      var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs

    r14393 r14400  
    8989      double[,] centers;
    9090      int[] xyc;
    91       double[,] inputMatrix = dataset.ToArray(allowedInputVariables, rows);
     91      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    9292      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    9393        throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset.");
Note: See TracChangeset for help on using the changeset viewer.