1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Problems.DataAnalysis;


26 


27  namespace HeuristicLab.Algorithms.DataAnalysis {


28  public static class AlglibUtil {


29  public static double[,] PrepareInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {


30  // check input variables. Only double variables are allowed.


31  var invalidInputs =


32  variables.Where(name => !dataset.VariableHasType<double>(name));


33  if (invalidInputs.Any())


34  throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs));


35 


36  List<int> rowsList = rows.ToList();


37  double[,] matrix = new double[rowsList.Count, variables.Count()];


38 


39  int col = 0;


40  foreach (string column in variables) {


41  var values = dataset.GetDoubleValues(column, rows);


42  int row = 0;


43  foreach (var value in values) {


44  matrix[row, col] = value;


45  row++;


46  }


47  col++;


48  }


49 


50  return matrix;


51  }


52 


53  public static double[,] PrepareAndScaleInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, Scaling scaling) {


54  // check input variables. Only double variables are allowed.


55  var invalidInputs =


56  variables.Where(name => !dataset.VariableHasType<double>(name));


57  if (invalidInputs.Any())


58  throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs));


59 


60  List<string> variablesList = variables.ToList();


61  List<int> rowsList = rows.ToList();


62 


63  double[,] matrix = new double[rowsList.Count, variablesList.Count];


64 


65  int col = 0;


66  foreach (string column in variables) {


67  var values = scaling.GetScaledValues(dataset, column, rows);


68  int row = 0;


69  foreach (var value in values) {


70  matrix[row, col] = value;


71  row++;


72  }


73  col++;


74  }


75 


76  return matrix;


77  }


78 


79  /// <summary>


80  /// Prepares a binary data matrix from a number of factors and specified factor values


81  /// </summary>


82  /// <param name="dataset">A dataset that contains the variable values</param>


83  /// <param name="factorVariables">An enumerable of categorical variables (factors). For each variable an enumerable of values must be specified.</param>


84  /// <param name="rows">An enumerable of row indices for the dataset</param>


85  /// <returns></returns>


86  /// <remarks>Factor variables (categorical variables) are split up into multiple binary variables one for each specified value.</remarks>


87  public static double[,] PrepareInputMatrix(


88  IDataset dataset,


89  IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables,


90  IEnumerable<int> rows) {


91  // check input variables. Only string variables are allowed.


92  var invalidInputs =


93  factorVariables.Select(kvp => kvp.Key).Where(name => !dataset.VariableHasType<string>(name));


94  if (invalidInputs.Any())


95  throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs));


96 


97  int numBinaryColumns = factorVariables.Sum(kvp => kvp.Value.Count());


98 


99  List<int> rowsList = rows.ToList();


100  double[,] matrix = new double[rowsList.Count, numBinaryColumns];


101 


102  int col = 0;


103  foreach (var kvp in factorVariables) {


104  var varName = kvp.Key;


105  var cats = kvp.Value;


106  var catCount = cats.Count();


107  if (catCount == 0) continue;


108  foreach (var cat in cats) {


109  var values = dataset.GetStringValues(varName, rows);


110  int row = 0;


111  foreach (var value in values) {


112  matrix[row, col] = value == cat ? 1 : 0;


113  row++;


114  }


115  col++;


116  }


117  }


118  return matrix;


119  }


120  }


121  }

