Changeset 15142 for stable/HeuristicLab.Problems.DataAnalysis
- Timestamp:
- 07/06/17 11:12:18 (7 years ago)
- Location:
- stable/HeuristicLab.Problems.DataAnalysis/3.4
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
stable/HeuristicLab.Problems.DataAnalysis/3.4/DatasetExtensions.cs
r14186 r15142 20 20 #endregion 21 21 22 using System; 22 23 using System.Collections.Generic; 24 using System.Linq; 23 25 24 26 namespace HeuristicLab.Problems.DataAnalysis { 25 27 public static class DatasetExtensions { 26 public static IEnumerable<T> TakeEvery<T>(this IEnumerable<T> xs, int nth) { 27 int i = 0; 28 foreach (var x in xs) { 29 if (i % nth == 0) yield return x; 30 i++; 28 public static double[,] ToArray(this IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) { 29 return ToArray(dataset, 30 variables, 31 transformations: variables.Select(_ => (ITransformation<double>)null), // no transform 32 rows: rows); 33 } 34 public static double[,] ToArray(this IDataset dataset, IEnumerable<string> variables, 35 IEnumerable<ITransformation<double>> transformations, IEnumerable<int> rows) { 36 string[] variablesArr = variables.ToArray(); 37 int[] rowsArr = rows.ToArray(); 38 ITransformation<double>[] transformArr = transformations.ToArray(); 39 if (transformArr.Length != variablesArr.Length) 40 throw new ArgumentException("Number of variables and number of transformations must match."); 41 42 double[,] matrix = new double[rowsArr.Length, variablesArr.Length]; 43 44 for (int i = 0; i < variablesArr.Length; i++) { 45 var origValues = dataset.GetDoubleValues(variablesArr[i], rowsArr); 46 var values = transformArr[i] != null ? transformArr[i].Apply(origValues) : origValues; 47 int row = 0; 48 foreach (var value in values) { 49 matrix[row, i] = value; 50 row++; 51 } 31 52 } 53 54 return matrix; 55 } 56 57 /// <summary> 58 /// Prepares a binary data matrix from a number of factors and specified factor values 59 /// </summary> 60 /// <param name="dataset">A dataset that contains the variable values</param> 61 /// <param name="factorVariables">An enumerable of categorical variables (factors). For each variable an enumerable of values must be specified.</param> 62 /// <param name="rows">An enumerable of row indices for the dataset</param> 63 /// <returns></returns> 64 /// <remarks>Factor variables (categorical variables) are split up into multiple binary variables one for each specified value.</remarks> 65 public static double[,] ToArray( 66 this IDataset dataset, 67 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables, 68 IEnumerable<int> rows) { 69 // check input variables. Only string variables are allowed. 70 var invalidInputs = 71 factorVariables.Select(kvp => kvp.Key).Where(name => !dataset.VariableHasType<string>(name)); 72 if (invalidInputs.Any()) 73 throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs)); 74 75 int numBinaryColumns = factorVariables.Sum(kvp => kvp.Value.Count()); 76 77 List<int> rowsList = rows.ToList(); 78 double[,] matrix = new double[rowsList.Count, numBinaryColumns]; 79 80 int col = 0; 81 foreach (var kvp in factorVariables) { 82 var varName = kvp.Key; 83 var cats = kvp.Value; 84 if (!cats.Any()) continue; 85 foreach (var cat in cats) { 86 var values = dataset.GetStringValues(varName, rows); 87 int row = 0; 88 foreach (var value in values) { 89 matrix[row, col] = value == cat ? 1 : 0; 90 row++; 91 } 92 col++; 93 } 94 } 95 return matrix; 96 } 97 98 public static IEnumerable<KeyValuePair<string, IEnumerable<string>>> GetFactorVariableValues( 99 this IDataset ds, IEnumerable<string> factorVariables, IEnumerable<int> rows) { 100 return from factor in factorVariables 101 let distinctValues = ds.GetStringValues(factor, rows).Distinct().ToArray() 102 // 1 distinct value => skip (constant) 103 // 2 distinct values => only take one of the two values 104 // >=3 distinct values => create a binary value for each value 105 let reducedValues = distinctValues.Length <= 2 106 ? distinctValues.Take(distinctValues.Length - 1) 107 : distinctValues 108 select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues); 32 109 } 33 110 } -
stable/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs
r15140 r15142 94 94 } 95 95 96 public double[,] AllowedInputsTrainingValues { 97 get { return Dataset.ToArray(AllowedInputVariables, TrainingIndices); } 98 } 99 100 public double[,] AllowedInputsTestValues { get { return Dataset.ToArray(AllowedInputVariables, TestIndices); } } 96 101 public IntRange TrainingPartition { 97 102 get { return TrainingPartitionParameter.Value; } -
stable/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/LinearTransformation.cs
r14186 r15142 52 52 public double Multiplier { 53 53 get { return MultiplierParameter.Value.Value; } 54 protectedset {54 set { 55 55 MultiplierParameter.Value.Value = value; 56 56 } … … 59 59 public double Addend { 60 60 get { return AddendParameter.Value.Value; } 61 protectedset {61 set { 62 62 AddendParameter.Value.Value = value; 63 63 } -
stable/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ShiftStandardDistributionTransformation.cs
r12667 r15142 71 71 72 72 public override IEnumerable<double> Apply(IEnumerable<double> data) { 73 ConfigureParameters(data);74 73 if (OriginalStandardDeviation.IsAlmost(0.0)) { 75 74 return data; … … 94 93 } 95 94 96 p rotectedvoid ConfigureParameters(IEnumerable<double> data) {95 public override void ConfigureParameters(IEnumerable<double> data) { 97 96 OriginalStandardDeviation = data.StandardDeviation(); 98 97 OriginalMean = data.Average(); -
stable/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/ShiftToRangeTransformation.cs
r12667 r15142 44 44 } 45 45 46 public override IEnumerable<double> Apply(IEnumerable<double> data) {47 ConfigureParameters(data);48 return base.Apply(data);49 }50 51 46 public override bool Check(IEnumerable<double> data, out string errorMsg) { 52 47 ConfigureParameters(data); … … 54 49 } 55 50 56 p rotectedvoid ConfigureParameters(IEnumerable<double> data) {51 public override void ConfigureParameters(IEnumerable<double> data) { 57 52 double originalRangeStart = data.Min(); 58 53 double originalRangeEnd = data.Max(); -
stable/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Transformations/Transformation.cs
r14186 r15142 66 66 protected Transformation(IEnumerable<string> allowedColumns) : base(allowedColumns) { } 67 67 68 public virtual void ConfigureParameters(IEnumerable<T> data) { 69 // override in transformations with parameters 70 } 71 68 72 public abstract IEnumerable<T> Apply(IEnumerable<T> data); 73 public IEnumerable<T> ConfigureAndApply(IEnumerable<T> data) { 74 ConfigureParameters(data); 75 return Apply(data); 76 } 69 77 70 78 public abstract bool Check(IEnumerable<T> data, out string errorMsg); -
stable/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs
r14186 r15142 33 33 IEnumerable<string> AllowedInputVariables { get; } 34 34 35 double[,] AllowedInputsTrainingValues { get; } 36 double[,] AllowedInputsTestValues { get; } 37 35 38 IntRange TrainingPartition { get; } 36 39 IntRange TestPartition { get; } -
stable/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/ITransformation.cs
r14186 r15142 30 30 31 31 public interface ITransformation<T> : ITransformation { 32 void ConfigureParameters(IEnumerable<T> data); 33 IEnumerable<T> ConfigureAndApply(IEnumerable<T> data); 32 34 IEnumerable<T> Apply(IEnumerable<T> data); 33 35 }
Note: See TracChangeset
for help on using the changeset viewer.