Ignore:
Timestamp:
04/11/17 15:55:44 (18 months ago)
Author:
gkronber
Message:

#2697: applied r14390, r14391, r14393, r14394, r14396 again (resolving conflicts)

Location:
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/DatasetExtensions.cs

    r14400 r14843  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
     24using System.Linq;
    2325
    2426namespace HeuristicLab.Problems.DataAnalysis {
    2527  public static class DatasetExtensions {
    26     public static IEnumerable<T> TakeEvery<T>(this IEnumerable<T> xs, int nth) {
    27       int i = 0;
    28       foreach (var x in xs) {
    29         if (i % nth == 0) yield return x;
    30         i++;
     28    public static double[,] ToArray(this IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {
     29      return ToArray(dataset,
     30        variables,
     31        transformations: variables.Select(_ => (ITransformation<double>)null), // no transform
     32        rows: rows);
     33    }
     34    public static double[,] ToArray(this IDataset dataset, IEnumerable<string> variables,
     35      IEnumerable<ITransformation<double>> transformations, IEnumerable<int> rows) {
     36      string[] variablesArr = variables.ToArray();
     37      int[] rowsArr = rows.ToArray();
     38      ITransformation<double>[] transformArr = transformations.ToArray();
     39      if (transformArr.Length != variablesArr.Length)
     40        throw new ArgumentException("Number of variables and number of transformations must match.");
     41
     42      double[,] matrix = new double[rowsArr.Length, variablesArr.Length];
     43
     44      for (int i = 0; i < variablesArr.Length; i++) {
     45        var origValues = dataset.GetDoubleValues(variablesArr[i], rowsArr);
     46        var values = transformArr[i] != null ? transformArr[i].Apply(origValues) : origValues;
     47        int row = 0;
     48        foreach (var value in values) {
     49          matrix[row, i] = value;
     50          row++;
     51        }
    3152      }
     53
     54      return matrix;
     55    }
     56
     57    /// <summary>
     58    /// Prepares a binary data matrix from a number of factors and specified factor values
     59    /// </summary>
     60    /// <param name="dataset">A dataset that contains the variable values</param>
     61    /// <param name="factorVariables">An enumerable of categorical variables (factors). For each variable an enumerable of values must be specified.</param>
     62    /// <param name="rows">An enumerable of row indices for the dataset</param>
     63    /// <returns></returns>
     64    /// <remarks>Factor variables (categorical variables) are split up into multiple binary variables one for each specified value.</remarks>
     65    public static double[,] ToArray(
     66      this IDataset dataset,
     67      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables,
     68      IEnumerable<int> rows) {
     69      // check input variables. Only string variables are allowed.
     70      var invalidInputs =
     71        factorVariables.Select(kvp => kvp.Key).Where(name => !dataset.VariableHasType<string>(name));
     72      if (invalidInputs.Any())
     73        throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs));
     74
     75      int numBinaryColumns = factorVariables.Sum(kvp => kvp.Value.Count());
     76
     77      List<int> rowsList = rows.ToList();
     78      double[,] matrix = new double[rowsList.Count, numBinaryColumns];
     79
     80      int col = 0;
     81      foreach (var kvp in factorVariables) {
     82        var varName = kvp.Key;
     83        var cats = kvp.Value;
     84        if (!cats.Any()) continue;
     85        foreach (var cat in cats) {
     86          var values = dataset.GetStringValues(varName, rows);
     87          int row = 0;
     88          foreach (var value in values) {
     89            matrix[row, col] = value == cat ? 1 : 0;
     90            row++;
     91          }
     92          col++;
     93        }
     94      }
     95      return matrix;
     96    }
     97
     98    public static IEnumerable<KeyValuePair<string, IEnumerable<string>>> GetFactorVariableValues(
     99      this IDataset ds, IEnumerable<string> factorVariables, IEnumerable<int> rows) {
     100      return from factor in factorVariables
     101             let distinctValues = ds.GetStringValues(factor, rows).Distinct().ToArray()
     102             // 1 distinct value => skip (constant)
     103             // 2 distinct values => only take one of the two values
     104             // >=3 distinct values => create a binary value for each value
     105             let reducedValues = distinctValues.Length <= 2
     106               ? distinctValues.Take(distinctValues.Length - 1)
     107               : distinctValues
     108             select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues);
    32109    }
    33110  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs

    r14826 r14843  
    9494    }
    9595
     96    public double[,] AllowedInputsTrainingValues {
     97      get { return Dataset.ToArray(AllowedInputVariables, TrainingIndices); }
     98    }
     99
     100    public double[,] AllowedInputsTestValues { get { return Dataset.ToArray(AllowedInputVariables, TestIndices); } }
    96101    public IntRange TrainingPartition {
    97102      get { return TrainingPartitionParameter.Value; }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/LinearTransformation.cs

    r14400 r14843  
    5252    public double Multiplier {
    5353      get { return MultiplierParameter.Value.Value; }
    54       protected set {
     54      set {
    5555        MultiplierParameter.Value.Value = value;
    5656      }
     
    5959    public double Addend {
    6060      get { return AddendParameter.Value.Value; }
    61       protected set {
     61      set {
    6262        AddendParameter.Value.Value = value;
    6363      }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ShiftStandardDistributionTransformation.cs

    r14400 r14843  
    7171
    7272    public override IEnumerable<double> Apply(IEnumerable<double> data) {
    73       ConfigureParameters(data);
    7473      if (OriginalStandardDeviation.IsAlmost(0.0)) {
    7574        return data;
     
    9493    }
    9594
    96     protected void ConfigureParameters(IEnumerable<double> data) {
     95    public override void ConfigureParameters(IEnumerable<double> data) {
    9796      OriginalStandardDeviation = data.StandardDeviation();
    9897      OriginalMean = data.Average();
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ShiftToRangeTransformation.cs

    r14400 r14843  
    4444    }
    4545
    46     public override IEnumerable<double> Apply(IEnumerable<double> data) {
    47       ConfigureParameters(data);
    48       return base.Apply(data);
    49     }
    50 
    5146    public override bool Check(IEnumerable<double> data, out string errorMsg) {
    5247      ConfigureParameters(data);
     
    5449    }
    5550
    56     protected void ConfigureParameters(IEnumerable<double> data) {
     51    public override void ConfigureParameters(IEnumerable<double> data) {
    5752      double originalRangeStart = data.Min();
    5853      double originalRangeEnd = data.Max();
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/Transformation.cs

    r14400 r14843  
    6666    protected Transformation(IEnumerable<string> allowedColumns) : base(allowedColumns) { }
    6767
     68    public virtual void ConfigureParameters(IEnumerable<T> data) {
     69      // override in transformations with parameters
     70    }
     71
    6872    public abstract IEnumerable<T> Apply(IEnumerable<T> data);
     73    public IEnumerable<T> ConfigureAndApply(IEnumerable<T> data) {
     74      ConfigureParameters(data);
     75      return Apply(data);
     76    }
    6977
    7078    public abstract bool Check(IEnumerable<T> data, out string errorMsg);
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs

    r14400 r14843  
    3333    IEnumerable<string> AllowedInputVariables { get; }
    3434
     35    double[,] AllowedInputsTrainingValues { get; }
     36    double[,] AllowedInputsTestValues { get; }
     37
    3538    IntRange TrainingPartition { get; }
    3639    IntRange TestPartition { get; }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/ITransformation.cs

    r14400 r14843  
    3030
    3131  public interface ITransformation<T> : ITransformation {
     32    void ConfigureParameters(IEnumerable<T> data);
     33    IEnumerable<T> ConfigureAndApply(IEnumerable<T> data);
    3234    IEnumerable<T> Apply(IEnumerable<T> data);
    3335  }
Note: See TracChangeset for help on using the changeset viewer.