Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/15/16 21:23:43 (7 years ago)
Author:
gkronber
Message:

#2697:

  • removed AlglibUtil.cs and added an extension method .ToArray(names, rows) to IDataset instead.
  • refactored transformation so that it is possible to apply an transformation without resetting the parameters
  • Used transformations instead of Scaling as far as possible.
  • Moved TakeEvery extension method to HL.Common
Location:
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
1 deleted
20 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs

    r14185 r14393  
    165165      try {
    166166        CalculateModel(ds, rows, scaleInputs);
    167       }
    168       catch (alglib.alglibexception ae) {
     167      } catch (alglib.alglibexception ae) {
    169168        // wrap exception so that calling code doesn't have to know about alglib implementation
    170169        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
     
    260259    private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) {
    261260      if (scaling != null) {
    262         return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling);
     261        // TODO: completely remove Scaling class
     262        List<ITransformation<double>> transformations = new List<ITransformation<double>>();
     263
     264        foreach (var varName in allowedInputs) {
     265          double min;
     266          double max;
     267          scaling.GetScalingParameters(varName, out min, out max);
     268          var add = -min / (max - min);
     269          var mult = 1.0 / (max - min);
     270          transformations.Add(new LinearTransformation(allowedInputs) { Addend = add, Multiplier = mult });
     271        }
     272        return ds.ToArray(allowedInputs, transformations, rows);
    263273      } else {
    264         return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows);
     274        return ds.ToArray(allowedInputs, rows);
    265275      }
    266276    }
     
    334344        return Enumerable.Range(0, newN)
    335345          .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha));
    336       }
    337       catch (alglib.alglibexception ae) {
     346      } catch (alglib.alglibexception ae) {
    338347        // wrap exception so that calling code doesn't have to know about alglib implementation
    339348        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
     
    381390        }
    382391        return kss;
    383       }
    384       catch (alglib.alglibexception ae) {
     392      } catch (alglib.alglibexception ae) {
    385393        // wrap exception so that calling code doesn't have to know about alglib implementation
    386394        throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r14024 r14393  
    244244      <SubType>Code</SubType>
    245245    </Compile>
    246     <Compile Include="Linear\AlglibUtil.cs" />
    247     <Compile Include="Linear\Scaling.cs" />
    248246    <Compile Include="Linear\LinearDiscriminantAnalysis.cs" />
    249247    <Compile Include="Linear\LinearRegression.cs">
     
    253251    <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" />
    254252    <Compile Include="Linear\MultinomialLogitModel.cs" />
     253    <Compile Include="Linear\Scaling.cs" />
    255254    <Compile Include="MctsSymbolicRegression\Automaton.cs" />
    256255    <Compile Include="MctsSymbolicRegression\CodeGenerator.cs" />
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs

    r14185 r14393  
    7070      IEnumerable<int> rows = problemData.TrainingIndices;
    7171      int nClasses = problemData.ClassNames.Count();
    72       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     72      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    7373      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7474        throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r14390 r14393  
    7373      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    7474      IEnumerable<int> rows = problemData.TrainingIndices;
    75       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     75      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    7676      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7777        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs

    r14185 r14393  
    7070      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    7171      IEnumerable<int> rows = problemData.TrainingIndices;
    72       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     72      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    7373      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    7474        throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs

    r14185 r14393  
    8383
    8484    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    85       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     85      double[,] inputData = dataset.ToArray( allowedInputVariables, rows);
    8686
    8787      int n = inputData.GetLength(0);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs

    r14185 r14393  
    2929
    3030namespace HeuristicLab.Algorithms.DataAnalysis {
     31  [Obsolete("Use transformation classes in Problems.DataAnalysis instead")]
    3132  [StorableClass]
    3233  [Item(Name = "Scaling", Description = "Contains information about scaling of variables for data-analysis algorithms.")]
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/LdaInitializer.cs

    r14185 r14393  
    4444      var attributes = data.AllowedInputVariables.Count();
    4545
    46       var ldaDs = AlglibUtil.PrepareInputMatrix(data.Dataset,
    47                                                 data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()),
    48                                                 data.TrainingIndices);
     46      var ldaDs = data.Dataset.ToArray(
     47                                       data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()),
     48                                       data.TrainingIndices);
    4949
    5050      // map class values to sequential natural numbers (required by alglib)
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/PcaInitializer.cs

    r14185 r14393  
    4444      var attributes = data.AllowedInputVariables.Count();
    4545
    46       var pcaDs = AlglibUtil.PrepareInputMatrix(data.Dataset, data.AllowedInputVariables, data.TrainingIndices);
     46      var pcaDs = data.Dataset.ToArray(data.AllowedInputVariables, data.TrainingIndices);
    4747
    4848      int info;
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaGradientCalculator.cs

    r14185 r14393  
    9999      }
    100100
    101       var data = AlglibUtil.PrepareInputMatrix(problemData.Dataset, problemData.AllowedInputVariables,
    102                                                problemData.TrainingIndices);
     101      var data = problemData.Dataset.ToArray(problemData.AllowedInputVariables,
     102                                             problemData.TrainingIndices);
    103103      var classes = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
    104104
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs

    r14185 r14393  
    8686
    8787    public double[,] Reduce(IDataset dataset, IEnumerable<int> rows) {
    88       var data = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     88      var data = dataset.ToArray(allowedInputVariables, rows);
    8989
    9090      var targets = dataset.GetDoubleValues(TargetVariable, rows).ToArray();
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r14322 r14393  
    119119      if (IsCompatibilityLoaded) {
    120120        // no scaling
    121         inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,
     121        inputMatrix = dataset.ToArray(
    122122          this.allowedInputVariables.Concat(new string[] { targetVariable }),
    123123          rows);
     
    167167
    168168    private static double[,] CreateScaledData(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, double[] offsets, double[] factors) {
    169       var x = new double[rows.Count(), variables.Count()];
    170       var colIdx = 0;
    171       foreach (var variableName in variables) {
    172         var rowIdx = 0;
    173         foreach (var val in dataset.GetDoubleValues(variableName, rows)) {
    174           x[rowIdx, colIdx] = (val + offsets[colIdx]) * factors[colIdx];
    175           rowIdx++;
    176         }
    177         colIdx++;
    178       }
    179       return x;
     169      var transforms =
     170        variables.Select(
     171          (_, colIdx) =>
     172            new LinearTransformation(variables) { Addend = offsets[colIdx] * factors[colIdx], Multiplier = factors[colIdx] });
     173      return dataset.ToArray(variables, transforms, rows);
    180174    }
    181175
     
    187181      double[,] inputData;
    188182      if (IsCompatibilityLoaded) {
    189         inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     183        inputData = dataset.ToArray(allowedInputVariables, rows);
    190184      } else {
    191185        inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
     
    223217      double[,] inputData;
    224218      if (IsCompatibilityLoaded) {
    225         inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     219        inputData = dataset.ToArray(allowedInputVariables, rows);
    226220      } else {
    227221        inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs

    r14185 r14393  
    183183      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    184184      IEnumerable<int> rows = problemData.TrainingIndices;
    185       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     185      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    186186      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    187187        throw new NotSupportedException("Neural network classification does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs

    r14185 r14393  
    124124    public NeuralNetworkEnsembleClassification()
    125125      : base() {
    126       var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 
    127         (IntValue)new IntValue(0).AsReadOnly(), 
    128         (IntValue)new IntValue(1).AsReadOnly(), 
     126      var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
     127        (IntValue)new IntValue(0).AsReadOnly(),
     128        (IntValue)new IntValue(1).AsReadOnly(),
    129129        (IntValue)new IntValue(2).AsReadOnly() });
    130130      var selectedHiddenLayerValue = (from v in validHiddenLayerValues
     
    169169      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    170170      IEnumerable<int> rows = problemData.TrainingIndices;
    171       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     171      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    172172      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    173173        throw new NotSupportedException("Neural network ensemble classification does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs

    r14185 r14393  
    9191
    9292    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    93       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     93      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
    9494
    9595      int n = inputData.GetLength(0);
     
    108108
    109109    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    110       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     110      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
    111111
    112112      int n = inputData.GetLength(0);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs

    r14185 r14393  
    168168      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    169169      IEnumerable<int> rows = problemData.TrainingIndices;
    170       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     170      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    171171      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    172172        throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs

    r14185 r14393  
    9595
    9696    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    97       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     97      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
    9898
    9999      int n = inputData.GetLength(0);
     
    112112
    113113    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    114       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     114      double[,] inputData = dataset.ToArray( allowedInputVariables, rows);
    115115
    116116      int n = inputData.GetLength(0);
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs

    r14185 r14393  
    184184      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    185185      IEnumerable<int> rows = problemData.TrainingIndices;
    186       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     186      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    187187      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    188188        throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset.");
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs

    r14368 r14393  
    139139
    140140    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    141       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
     141      double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
    142142      AssertInputMatrix(inputData);
    143143
     
    157157
    158158    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
    159       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
     159      double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
    160160      AssertInputMatrix(inputData);
    161161
     
    175175
    176176    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    177       double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
     177      double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
    178178      AssertInputMatrix(inputData);
    179179
     
    294294      out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError) {
    295295      var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
    296       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices);
     296      double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);
    297297
    298298      alglib.dfreport rep;
     
    316316
    317317      var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
    318       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices);
     318      double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);
    319319
    320320      var classValues = problemData.ClassValues.ToArray();
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs

    r14185 r14393  
    8989      double[,] centers;
    9090      int[] xyc;
    91       double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     91      double[,] inputMatrix = dataset.ToArray(allowedInputVariables, rows);
    9292      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
    9393        throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset.");
Note: See TracChangeset for help on using the changeset viewer.