Changeset 16124
- Timestamp:
- 09/06/18 12:19:23 (6 years ago)
- Location:
- stable
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
stable
- Property svn:mergeinfo changed
/trunk merged: 15769,15829
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/trunk/HeuristicLab.Algorithms.DataAnalysis merged: 15769
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:mergeinfo changed
/trunk/HeuristicLab.Algorithms.DataAnalysis/3.4 merged: 15769
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/GBM/GradientBoostingRegressionAlgorithm.cs
r15584 r16124 258 258 259 259 modifiableDataset.RemoveVariable(targetVarName); 260 modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest) );260 modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest).ToList()); 261 261 262 262 SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed -
stable/HeuristicLab.Problems.DataAnalysis
- Property svn:mergeinfo changed
/trunk/HeuristicLab.Problems.DataAnalysis (added) merged: 15769,15829
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs
r15584 r16124 38 38 protected Dataset(Dataset original, Cloner cloner) 39 39 : base(original, cloner) { 40 // no need to clone the variable values because these can't be modified 40 41 variableValues = new Dictionary<string, IList>(original.variableValues); 41 42 variableNames = new List<string>(original.variableNames); 42 43 rows = original.rows; 43 44 } 45 44 46 public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); } 45 47 … … 58 60 /// <param name="variableValues">The values for the variables (column-oriented storage). Values are not cloned!</param> 59 61 public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) 60 : base() { 62 : this(variableNames, variableValues, cloneValues: true) { 63 } 64 65 protected Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues, bool cloneValues = false) { 61 66 Name = "-"; 62 if (!variableNames.Any()) { 67 68 if (variableNames.Any()) { 69 this.variableNames = new List<string>(variableNames); 70 } else { 63 71 this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList(); 64 } else if (variableNames.Count() != variableValues.Count()) { 65 throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); 66 } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) { 67 throw new ArgumentException("The number of values must be equal for every variable"); 68 } else if (variableNames.Distinct().Count() != variableNames.Count()) { 69 var duplicateVariableNames = 70 variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); 71 string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; 72 foreach (var duplicateVariableName in duplicateVariableNames) 73 message += duplicateVariableName + Environment.NewLine; 74 throw new ArgumentException(message); 75 } 72 } 73 // check if the arguments are consistent (no duplicate variables, same number of rows, correct data types, ...) 74 CheckArguments(this.variableNames, variableValues); 75 76 76 rows = variableValues.First().Count; 77 this.variableNames = new List<string>(variableNames); 78 this.variableValues = new Dictionary<string, IList>(this.variableNames.Count); 79 for (int i = 0; i < this.variableNames.Count; i++) { 80 var values = variableValues.ElementAt(i); 81 this.variableValues.Add(this.variableNames[i], values); 77 78 if (cloneValues) { 79 this.variableValues = CloneValues(this.variableNames, variableValues); 80 } else { 81 this.variableValues = new Dictionary<string, IList>(this.variableNames.Count); 82 for (int i = 0; i < this.variableNames.Count; i++) { 83 var variableName = this.variableNames[i]; 84 var values = variableValues.ElementAt(i); 85 this.variableValues.Add(variableName, values); 86 } 82 87 } 83 88 } … … 125 130 return new ModifiableDataset(variableNames, values); 126 131 } 132 127 133 /// <summary> 128 134 /// Shuffle a dataset's rows … … 231 237 return new ReadOnlyCollection<DateTime>(values); 232 238 } 233 234 235 239 private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) { 236 240 var values = GetValues<T>(variableName); … … 247 251 public bool VariableHasType<T>(string variableName) { 248 252 return variableValues[variableName] is IList<T>; 253 } 254 protected Type GetVariableType(string variableName) { 255 IList list; 256 variableValues.TryGetValue(variableName, out list); 257 if (list == null) 258 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 259 return GetElementType(list); 260 } 261 protected static Type GetElementType(IList list) { 262 var type = list.GetType(); 263 return type.IsGenericType ? type.GetGenericArguments()[0] : type.GetElementType(); 264 } 265 protected static bool IsAllowedType(IList list) { 266 var type = GetElementType(list); 267 return IsAllowedType(type); 268 } 269 protected static bool IsAllowedType(Type type) { 270 return type == typeof(double) || type == typeof(string) || type == typeof(DateTime); 271 } 272 273 protected static void CheckArguments(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) { 274 if (variableNames.Count() != variableValues.Count()) { 275 throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); 276 } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) { 277 throw new ArgumentException("The number of values must be equal for every variable"); 278 } else if (variableNames.Distinct().Count() != variableNames.Count()) { 279 var duplicateVariableNames = 280 variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); 281 string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; 282 foreach (var duplicateVariableName in duplicateVariableNames) 283 message += duplicateVariableName + Environment.NewLine; 284 throw new ArgumentException(message); 285 } 286 // check if all the variables are supported 287 foreach (var t in variableNames.Zip(variableValues, Tuple.Create)) { 288 var variableName = t.Item1; 289 var values = t.Item2; 290 291 if (!IsAllowedType(values)) { 292 throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName)); 293 } 294 } 295 } 296 297 protected static Dictionary<string, IList> CloneValues(Dictionary<string, IList> variableValues) { 298 return variableValues.ToDictionary(x => x.Key, x => CloneValues(x.Value)); 299 } 300 301 protected static Dictionary<string, IList> CloneValues(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) { 302 return variableNames.Zip(variableValues, Tuple.Create).ToDictionary(x => x.Item1, x => CloneValues(x.Item2)); 303 } 304 305 protected static IList CloneValues(IList values) { 306 var doubleValues = values as IList<double>; 307 if (doubleValues != null) return new List<double>(doubleValues); 308 309 var stringValues = values as IList<string>; 310 if (stringValues != null) return new List<string>(stringValues); 311 312 var dateTimeValues = values as IList<DateTime>; 313 if (dateTimeValues != null) return new List<DateTime>(dateTimeValues); 314 315 throw new ArgumentException(string.Format("Unsupported variable type {0}.", GetElementType(values))); 249 316 } 250 317 -
stable/HeuristicLab.Problems.DataAnalysis/3.4/ModifiableDataset.cs
r15584 r16124 39 39 40 40 private ModifiableDataset(ModifiableDataset original, Cloner cloner) : base(original, cloner) { 41 var variables = variableValues.Keys.ToList(); 42 foreach (var v in variables) { 43 var type = GetVariableType(v); 44 if (type == typeof(DateTime)) { 45 variableValues[v] = GetDateTimeValues(v).ToList(); 46 } else if (type == typeof(double)) { 47 variableValues[v] = GetDoubleValues(v).ToList(); 48 } else if (type == typeof(string)) { 49 variableValues[v] = GetStringValues(v).ToList(); 50 } else { 51 throw new ArgumentException("Unsupported type " + type + " for variable " + v); 52 } 53 } 54 } 41 variableNames = new List<string>(original.variableNames); 42 variableValues = CloneValues(original.variableValues); 43 } 44 55 45 public override IDeepCloneable Clone(Cloner cloner) { return new ModifiableDataset(this, cloner); } 56 public ModifiableDataset() : base() { } 57 58 public ModifiableDataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) : base(variableNames, variableValues) { } 46 47 public ModifiableDataset() { } 48 49 public ModifiableDataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) : 50 base(variableNames, variableValues, cloneValues: false) { } 59 51 60 52 public void ReplaceRow(int row, IEnumerable<object> values) { … … 105 97 106 98 // adds a new variable to the dataset 107 public void AddVariable <T>(string variableName, IEnumerable<T>values) {99 public void AddVariable(string variableName, IList values) { 108 100 if (variableValues.ContainsKey(variableName)) 109 throw new ArgumentException("Variable " + variableName + " is already present in the dataset."); 110 int count = values.Count(); 111 if (count != rows) 112 throw new ArgumentException("The number of values must exactly match the number of rows in the dataset."); 113 variableValues[variableName] = new List<T>(values); 101 throw new ArgumentException(string.Format("Variable {0} is already present in the dataset.", variableName)); 102 103 if (values == null || values.Count == 0) 104 throw new ArgumentException("Cannot add variable with no values."); 105 106 if (!IsAllowedType(values)) 107 throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName)); 108 109 variableValues[variableName] = values; 114 110 variableNames.Add(variableName); 111 115 112 OnColumnsChanged(); 116 113 OnColumnNamesChanged(); … … 120 117 public void RemoveVariable(string variableName) { 121 118 if (!variableValues.ContainsKey(variableName)) 122 throw new ArgumentException( "The variable " + variableName + " does not exist in the dataset.");119 throw new ArgumentException(string.Format("The variable {0} does not exist in the dataset.", variableName)); 123 120 variableValues.Remove(variableName); 124 121 variableNames.Remove(variableName); … … 128 125 } 129 126 130 // slow, avoid to usethis127 // slow, avoid using this 131 128 public void RemoveRow(int row) { 132 129 foreach (var list in variableValues.Values) … … 151 148 } 152 149 153 private Type GetVariableType(string variableName) {154 IList list;155 variableValues.TryGetValue(variableName, out list);156 if (list == null)157 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");158 return list.GetType().GetGenericArguments()[0];159 }160 161 150 bool IStringConvertibleMatrix.SetValue(string value, int rowIndex, int columnIndex) { 162 151 var variableName = variableNames[columnIndex];
Note: See TracChangeset
for help on using the changeset viewer.