Changeset 15030


Ignore:
Timestamp:
06/08/17 17:02:13 (3 weeks ago)
Author:
jzenisek
Message:

#2719 merged with trunk (revision 15029)

Location:
branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis
Files:
14 edited
1 copied

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis

  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs

    r13761 r15030  
    7474        throw new ArgumentException(message);
    7575      }
    76 
    7776      rows = variableValues.First().Count;
    7877      this.variableNames = new List<string>(variableNames);
     
    115114      foreach (var v in variableNames) {
    116115        if (VariableHasType<double>(v)) {
    117           values.Add(new List<double>((List<double>)variableValues[v]));
     116          values.Add(new List<double>((IList<double>)variableValues[v]));
    118117        } else if (VariableHasType<string>(v)) {
    119           values.Add(new List<string>((List<string>)variableValues[v]));
     118          values.Add(new List<string>((IList<string>)variableValues[v]));
    120119        } else if (VariableHasType<DateTime>(v)) {
    121           values.Add(new List<DateTime>((List<DateTime>)variableValues[v]));
     120          values.Add(new List<DateTime>((IList<DateTime>)variableValues[v]));
    122121        } else {
    123122          throw new ArgumentException("Unknown variable type.");
     
    125124      }
    126125      return new ModifiableDataset(variableNames, values);
     126    }
     127    /// <summary>
     128    /// Shuffle a dataset's rows
     129    /// </summary>
     130    /// <param name="random">Random number generator used for shuffling.</param>
     131    /// <returns>A shuffled copy of the current dataset.</returns>
     132    public Dataset Shuffle(IRandom random) {
     133      var values = variableNames.Select(x => variableValues[x]).ToList();
     134      return new Dataset(variableNames, values.ShuffleLists(random));
    127135    }
    128136
     
    166174    }
    167175    public IEnumerable<string> DoubleVariables {
    168       get { return variableValues.Where(p => p.Value is List<double>).Select(p => p.Key); }
    169     }
     176      get { return variableValues.Where(p => p.Value is IList<double>).Select(p => p.Key); }
     177    }
     178
     179    public IEnumerable<string> StringVariables {
     180      get { return variableValues.Where(p => p.Value is IList<string>).Select(p => p.Key); }
     181    }
     182
    170183    public IEnumerable<double> GetDoubleValues(string variableName) {
    171184      return GetValues<double>(variableName);
     
    180193    public ReadOnlyCollection<double> GetReadOnlyDoubleValues(string variableName) {
    181194      var values = GetValues<double>(variableName);
    182       return values.AsReadOnly();
     195      return new ReadOnlyCollection<double>(values);
    183196    }
    184197    public double GetDoubleValue(string variableName, int row) {
     
    189202      return GetValues<double>(variableName, rows);
    190203    }
     204
     205    public string GetStringValue(string variableName, int row) {
     206      var values = GetValues<string>(variableName);
     207      return values[row];
     208    }
     209
     210    public IEnumerable<string> GetStringValues(string variableName, IEnumerable<int> rows) {
     211      return GetValues<string>(variableName, rows);
     212    }
     213    public ReadOnlyCollection<string> GetReadOnlyStringValues(string variableName) {
     214      var values = GetValues<string>(variableName);
     215      return new ReadOnlyCollection<string>(values);
     216    }
     217
    191218    private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) {
    192219      var values = GetValues<T>(variableName);
    193220      return rows.Select(x => values[x]);
    194221    }
    195     private List<T> GetValues<T>(string variableName) {
     222    private IList<T> GetValues<T>(string variableName) {
    196223      IList list;
    197224      if (!variableValues.TryGetValue(variableName, out list))
    198225        throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
    199       List<T> values = list as List<T>;
     226      IList<T> values = list as IList<T>;
    200227      if (values == null) throw new ArgumentException("The variable " + variableName + " is not a " + typeof(T) + " variable.");
    201228      return values;
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/DatasetExtensions.cs

    r14400 r15030  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
     24using System.Linq;
    2325
    2426namespace HeuristicLab.Problems.DataAnalysis {
    2527  public static class DatasetExtensions {
    26     public static IEnumerable<T> TakeEvery<T>(this IEnumerable<T> xs, int nth) {
    27       int i = 0;
    28       foreach (var x in xs) {
    29         if (i % nth == 0) yield return x;
    30         i++;
     28    public static double[,] ToArray(this IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {
     29      return ToArray(dataset,
     30        variables,
     31        transformations: variables.Select(_ => (ITransformation<double>)null), // no transform
     32        rows: rows);
     33    }
     34    public static double[,] ToArray(this IDataset dataset, IEnumerable<string> variables,
     35      IEnumerable<ITransformation<double>> transformations, IEnumerable<int> rows) {
     36      string[] variablesArr = variables.ToArray();
     37      int[] rowsArr = rows.ToArray();
     38      ITransformation<double>[] transformArr = transformations.ToArray();
     39      if (transformArr.Length != variablesArr.Length)
     40        throw new ArgumentException("Number of variables and number of transformations must match.");
     41
     42      double[,] matrix = new double[rowsArr.Length, variablesArr.Length];
     43
     44      for (int i = 0; i < variablesArr.Length; i++) {
     45        var origValues = dataset.GetDoubleValues(variablesArr[i], rowsArr);
     46        var values = transformArr[i] != null ? transformArr[i].Apply(origValues) : origValues;
     47        int row = 0;
     48        foreach (var value in values) {
     49          matrix[row, i] = value;
     50          row++;
     51        }
    3152      }
     53
     54      return matrix;
     55    }
     56
     57    /// <summary>
     58    /// Prepares a binary data matrix from a number of factors and specified factor values
     59    /// </summary>
     60    /// <param name="dataset">A dataset that contains the variable values</param>
     61    /// <param name="factorVariables">An enumerable of categorical variables (factors). For each variable an enumerable of values must be specified.</param>
     62    /// <param name="rows">An enumerable of row indices for the dataset</param>
     63    /// <returns></returns>
     64    /// <remarks>Factor variables (categorical variables) are split up into multiple binary variables one for each specified value.</remarks>
     65    public static double[,] ToArray(
     66      this IDataset dataset,
     67      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables,
     68      IEnumerable<int> rows) {
     69      // check input variables. Only string variables are allowed.
     70      var invalidInputs =
     71        factorVariables.Select(kvp => kvp.Key).Where(name => !dataset.VariableHasType<string>(name));
     72      if (invalidInputs.Any())
     73        throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs));
     74
     75      int numBinaryColumns = factorVariables.Sum(kvp => kvp.Value.Count());
     76
     77      List<int> rowsList = rows.ToList();
     78      double[,] matrix = new double[rowsList.Count, numBinaryColumns];
     79
     80      int col = 0;
     81      foreach (var kvp in factorVariables) {
     82        var varName = kvp.Key;
     83        var cats = kvp.Value;
     84        if (!cats.Any()) continue;
     85        foreach (var cat in cats) {
     86          var values = dataset.GetStringValues(varName, rows);
     87          int row = 0;
     88          foreach (var value in values) {
     89            matrix[row, col] = value == cat ? 1 : 0;
     90            row++;
     91          }
     92          col++;
     93        }
     94      }
     95      return matrix;
     96    }
     97
     98    public static IEnumerable<KeyValuePair<string, IEnumerable<string>>> GetFactorVariableValues(
     99      this IDataset ds, IEnumerable<string> factorVariables, IEnumerable<int> rows) {
     100      return from factor in factorVariables
     101             let distinctValues = ds.GetStringValues(factor, rows).Distinct().ToArray()
     102             // 1 distinct value => skip (constant)
     103             // 2 distinct values => only take one of the two values
     104             // >=3 distinct values => create a binary value for each value
     105             let reducedValues = distinctValues.Length <= 2
     106               ? distinctValues.Take(distinctValues.Length - 1)
     107               : distinctValues
     108             select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues);
    32109    }
    33110  }
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/HeuristicLab.Problems.DataAnalysis-3.4.csproj

    r14710 r15030  
    151151  <ItemGroup>
    152152    <Compile Include="DatasetExtensions.cs" />
     153    <Compile Include="DatasetUtil.cs" />
    153154    <Compile Include="DoubleLimit.cs" />
    154155    <Compile Include="Implementation\Classification\ClassificationEnsembleModel.cs">
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs

    r14400 r15030  
    4141
    4242    #region parameter properites
     43    //mkommend: inserted parameter caching due to performance reasons
     44    private IFixedValueParameter<Dataset> datasetParameter;
    4345    public IFixedValueParameter<Dataset> DatasetParameter {
    44       get { return (IFixedValueParameter<Dataset>)Parameters[DatasetParameterName]; }
    45     }
     46      get {
     47        if (datasetParameter == null) datasetParameter = (IFixedValueParameter<Dataset>)Parameters[DatasetParameterName];
     48        return datasetParameter;
     49      }
     50    }
     51
     52    private IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>> inputVariablesParameter;
    4653    public IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>> InputVariablesParameter {
    47       get { return (IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[InputVariablesParameterName]; }
    48     }
     54      get {
     55        if (inputVariablesParameter == null) inputVariablesParameter = (IFixedValueParameter<ReadOnlyCheckedItemList<StringValue>>)Parameters[InputVariablesParameterName];
     56        return inputVariablesParameter;
     57      }
     58    }
     59
     60    private IFixedValueParameter<IntRange> trainingPartitionParameter;
    4961    public IFixedValueParameter<IntRange> TrainingPartitionParameter {
    50       get { return (IFixedValueParameter<IntRange>)Parameters[TrainingPartitionParameterName]; }
    51     }
     62      get {
     63        if (trainingPartitionParameter == null) trainingPartitionParameter = (IFixedValueParameter<IntRange>)Parameters[TrainingPartitionParameterName];
     64        return trainingPartitionParameter;
     65      }
     66    }
     67
     68    private IFixedValueParameter<IntRange> testPartitionParameter;
    5269    public IFixedValueParameter<IntRange> TestPartitionParameter {
    53       get { return (IFixedValueParameter<IntRange>)Parameters[TestPartitionParameterName]; }
    54     }
     70      get {
     71        if (testPartitionParameter == null) testPartitionParameter = (IFixedValueParameter<IntRange>)Parameters[TestPartitionParameterName];
     72        return testPartitionParameter;
     73      }
     74    }
     75
    5576    public IFixedValueParameter<ReadOnlyItemList<ITransformation>> TransformationsParameter {
    5677      get { return (IFixedValueParameter<ReadOnlyItemList<ITransformation>>)Parameters[TransformationsParameterName]; }
     
    7394    }
    7495
     96    public double[,] AllowedInputsTrainingValues {
     97      get { return Dataset.ToArray(AllowedInputVariables, TrainingIndices); }
     98    }
     99
     100    public double[,] AllowedInputsTestValues { get { return Dataset.ToArray(AllowedInputVariables, TestIndices); } }
    75101    public IntRange TrainingPartition {
    76102      get { return TrainingPartitionParameter.Value; }
     
    102128    public virtual bool IsTrainingSample(int index) {
    103129      return index >= 0 && index < Dataset.Rows &&
    104         TrainingPartition.Start <= index && index < TrainingPartition.End &&
    105         (index < TestPartition.Start || TestPartition.End <= index);
     130             TrainingPartition.Start <= index && index < TrainingPartition.End &&
     131             (index < TestPartition.Start || TestPartition.End <= index);
    106132    }
    107133
     
    131157    protected DataAnalysisProblemData(IDataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<ITransformation> transformations = null) {
    132158      if (dataset == null) throw new ArgumentNullException("The dataset must not be null.");
    133       if (allowedInputVariables == null) throw new ArgumentNullException("The allowedInputVariables must not be null.");
    134 
    135       if (allowedInputVariables.Except(dataset.DoubleVariables).Any())
    136         throw new ArgumentException("All allowed input variables must be present in the dataset and of type double.");
    137 
    138       var inputVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Select(x => new StringValue(x)));
     159      if (allowedInputVariables == null) throw new ArgumentNullException("The allowed input variables must not be null.");
     160
     161      if (allowedInputVariables.Except(dataset.DoubleVariables).Except(dataset.StringVariables).Any())
     162        throw new ArgumentException("All allowed input variables must be present in the dataset and of type double or string.");
     163
     164      var variables = dataset.VariableNames.Where(variable => dataset.VariableHasType<double>(variable) || dataset.VariableHasType<string>(variable));
     165      var inputVariables = new CheckedItemList<StringValue>(variables.Select(x => new StringValue(x)));
    139166      foreach (StringValue x in inputVariables)
    140167        inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value));
     
    214241        InputVariables.SetItemCheckedState(inputVariable, variable != null && data.InputVariables.ItemChecked(variable));
    215242      }
    216 
    217       TrainingPartition.Start = TrainingPartition.End = 0;
    218       TestPartition.Start = 0;
    219       TestPartition.End = Dataset.Rows;
    220243    }
    221244  }
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Regression/RegressionSolutionVariableImpactsCalculator.cs

    r14463 r15030  
    4242      Noise
    4343    }
    44 
     44    public enum FactorReplacementMethodEnum {
     45      Best,
     46      Mode,
     47      Shuffle
     48    }
    4549    public enum DataPartitionEnum {
    4650      Training,
     
    8892    }
    8993
    90     public static IEnumerable<Tuple<string, double>> CalculateImpacts(IRegressionSolution solution,
     94    public static IEnumerable<Tuple<string, double>> CalculateImpacts(
     95      IRegressionSolution solution,
    9196      DataPartitionEnum data = DataPartitionEnum.Training,
    92       ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) {
     97      ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median,
     98      FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) {
    9399
    94100      var problemData = solution.ProblemData;
     
    128134      var allowedInputVariables = dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList();
    129135
    130       foreach (var inputVariable in allowedInputVariables) {
    131         var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacement);
     136      // calculate impacts for double variables
     137      foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<double>)) {
     138        var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod);
    132139        var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
    133140        if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
     
    137144        impacts[inputVariable] = impact;
    138145      }
     146
     147      // calculate impacts for string variables
     148      foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType<string>)) {
     149        if (factorReplacementMethod == FactorReplacementMethodEnum.Best) {
     150          // try replacing with all possible values and find the best replacement value
     151          var smallestImpact = double.PositiveInfinity;
     152          foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) {
     153            var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows,
     154              Enumerable.Repeat(repl, dataset.Rows));
     155            var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
     156            if (error != OnlineCalculatorError.None)
     157              throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
     158
     159            newR2 = newR2 * newR2;
     160            var impact = originalR2 - newR2;
     161            if (impact < smallestImpact) smallestImpact = impact;
     162          }
     163          impacts[inputVariable] = smallestImpact;
     164        } else {
     165          // for replacement methods shuffle and mode
     166          // calculate impacts for factor variables
     167
     168          var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows,
     169            factorReplacementMethod);
     170          var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
     171          if (error != OnlineCalculatorError.None)
     172            throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
     173
     174          newR2 = newR2 * newR2;
     175          var impact = originalR2 - newR2;
     176          impacts[inputVariable] = impact;
     177        }
     178      } // foreach
    139179      return impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value));
    140180    }
     
    184224      }
    185225
    186       dataset.ReplaceVariable(variable, replacementValues);
     226      return EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues);
     227    }
     228
     229    private static IEnumerable<double> EvaluateModelWithReplacedVariable(
     230      IRegressionModel model, string variable, ModifiableDataset dataset,
     231      IEnumerable<int> rows,
     232      FactorReplacementMethodEnum replacement = FactorReplacementMethodEnum.Shuffle) {
     233      var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
     234      List<string> replacementValues;
     235      IRandom rand;
     236
     237      switch (replacement) {
     238        case FactorReplacementMethodEnum.Mode:
     239          var mostCommonValue = rows.Select(r => originalValues[r])
     240            .GroupBy(v => v)
     241            .OrderByDescending(g => g.Count())
     242            .First().Key;
     243          replacementValues = Enumerable.Repeat(mostCommonValue, dataset.Rows).ToList();
     244          break;
     245        case FactorReplacementMethodEnum.Shuffle:
     246          // new var has same empirical distribution but the relation to y is broken
     247          rand = new FastRandom(31415);
     248          // prepare a complete column for the dataset
     249          replacementValues = Enumerable.Repeat(string.Empty, dataset.Rows).ToList();
     250          // shuffle only the selected rows
     251          var shuffledValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList();
     252          int i = 0;
     253          // update column values
     254          foreach (var r in rows) {
     255            replacementValues[r] = shuffledValues[i++];
     256          }
     257          break;
     258        default:
     259          throw new ArgumentException(string.Format("FactorReplacementMethod {0} cannot be handled.", replacement));
     260      }
     261
     262      return EvaluateModelWithReplacedVariable(model, variable, dataset, rows, replacementValues);
     263    }
     264
     265    private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable,
     266      ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<double> replacementValues) {
     267      var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList();
     268      dataset.ReplaceVariable(variable, replacementValues.ToList());
    187269      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
    188270      var estimates = model.GetEstimatedValues(dataset, rows).ToList();
     
    191273      return estimates;
    192274    }
     275    private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable,
     276      ModifiableDataset dataset, IEnumerable<int> rows, IEnumerable<string> replacementValues) {
     277      var originalValues = dataset.GetReadOnlyStringValues(variable).ToList();
     278      dataset.ReplaceVariable(variable, replacementValues.ToList());
     279      //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements
     280      var estimates = model.GetEstimatedValues(dataset, rows).ToList();
     281      dataset.ReplaceVariable(variable, originalValues);
     282
     283      return estimates;
     284    }
    193285  }
    194286}
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/LinearTransformation.cs

    r14400 r15030  
    5252    public double Multiplier {
    5353      get { return MultiplierParameter.Value.Value; }
    54       protected set {
     54      set {
    5555        MultiplierParameter.Value.Value = value;
    5656      }
     
    5959    public double Addend {
    6060      get { return AddendParameter.Value.Value; }
    61       protected set {
     61      set {
    6262        AddendParameter.Value.Value = value;
    6363      }
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ShiftStandardDistributionTransformation.cs

    r14400 r15030  
    7171
    7272    public override IEnumerable<double> Apply(IEnumerable<double> data) {
    73       ConfigureParameters(data);
    7473      if (OriginalStandardDeviation.IsAlmost(0.0)) {
    7574        return data;
     
    9493    }
    9594
    96     protected void ConfigureParameters(IEnumerable<double> data) {
     95    public override void ConfigureParameters(IEnumerable<double> data) {
    9796      OriginalStandardDeviation = data.StandardDeviation();
    9897      OriginalMean = data.Average();
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ShiftToRangeTransformation.cs

    r14400 r15030  
    4444    }
    4545
    46     public override IEnumerable<double> Apply(IEnumerable<double> data) {
    47       ConfigureParameters(data);
    48       return base.Apply(data);
    49     }
    50 
    5146    public override bool Check(IEnumerable<double> data, out string errorMsg) {
    5247      ConfigureParameters(data);
     
    5449    }
    5550
    56     protected void ConfigureParameters(IEnumerable<double> data) {
     51    public override void ConfigureParameters(IEnumerable<double> data) {
    5752      double originalRangeStart = data.Min();
    5853      double originalRangeEnd = data.Max();
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/Transformation.cs

    r14400 r15030  
    6666    protected Transformation(IEnumerable<string> allowedColumns) : base(allowedColumns) { }
    6767
     68    public virtual void ConfigureParameters(IEnumerable<T> data) {
     69      // override in transformations with parameters
     70    }
     71
    6872    public abstract IEnumerable<T> Apply(IEnumerable<T> data);
     73    public IEnumerable<T> ConfigureAndApply(IEnumerable<T> data) {
     74      ConfigureParameters(data);
     75      return Apply(data);
     76    }
    6977
    7078    public abstract bool Check(IEnumerable<T> data, out string errorMsg);
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs

    r14400 r15030  
    3333    IEnumerable<string> AllowedInputVariables { get; }
    3434
     35    double[,] AllowedInputsTrainingValues { get; }
     36    double[,] AllowedInputsTestValues { get; }
     37
    3538    IntRange TrainingPartition { get; }
    3639    IntRange TestPartition { get; }
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataset.cs

    r14185 r15030  
    3030    IEnumerable<string> VariableNames { get; }
    3131    IEnumerable<string> DoubleVariables { get; }
     32    IEnumerable<string> StringVariables { get; }
     33
     34    bool VariableHasType<T>(string variableName);
    3235
    3336    double GetDoubleValue(string variableName, int row);
     
    3639    ReadOnlyCollection<double> GetReadOnlyDoubleValues(string variableName);
    3740
     41    string GetStringValue(string variableName, int row);
    3842    IEnumerable<string> GetStringValues(string variableName);
     43    IEnumerable<string> GetStringValues(string variableName, IEnumerable<int> rows);
     44    ReadOnlyCollection<string> GetReadOnlyStringValues(string VariableName);
     45
    3946    IEnumerable<DateTime> GetDateTimeValues(string variableName);
    4047  }
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/ITransformation.cs

    r14400 r15030  
    3030
    3131  public interface ITransformation<T> : ITransformation {
     32    void ConfigureParameters(IEnumerable<T> data);
     33    IEnumerable<T> ConfigureAndApply(IEnumerable<T> data);
    3234    IEnumerable<T> Apply(IEnumerable<T> data);
    3335  }
  • branches/HeuristicLab.DatastreamAnalysis/HeuristicLab.Problems.DataAnalysis/3.4/ModifiableDataset.cs

    r13761 r15030  
    7777    public void ReplaceVariable(string variableName, IList values) {
    7878      if (!variableValues.ContainsKey(variableName))
    79         throw new ArgumentException(string.Format("Variable {0} is not present in the dataset."), variableName);
     79        throw new ArgumentException(string.Format("Variable {0} is not present in the dataset.", variableName));
    8080      if (values.Count != variableValues[variableName].Count)
    8181        throw new ArgumentException("The number of values must coincide with the number of dataset rows.");
Note: See TracChangeset for help on using the changeset viewer.