Changeset 16723 for branches/2521_ProblemRefactoring/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs
- Timestamp:
- 03/28/19 16:54:20 (5 years ago)
- Location:
- branches/2521_ProblemRefactoring
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2521_ProblemRefactoring
- Property svn:mergeinfo changed
-
branches/2521_ProblemRefactoring/HeuristicLab.Problems.DataAnalysis
- Property svn:mergeinfo changed
-
branches/2521_ProblemRefactoring/HeuristicLab.Problems.DataAnalysis/3.4
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
/branches/2520_PersistenceReintegration/HeuristicLab.Problems.DataAnalysis/3.4 merged eligible /branches/2839_HiveProjectManagement/HeuristicLab.Problems.DataAnalysis/3.4 merged eligible /branches/2947_ConfigurableIndexedDataTable/HeuristicLab.Problems.DataAnalysis/3.4 merged eligible /branches/2965_CancelablePersistence/HeuristicLab.Problems.DataAnalysis/3.4 merged eligible /stable/HeuristicLab.Problems.DataAnalysis/3.4 merged eligible /trunk/HeuristicLab.Problems.DataAnalysis/3.4 merged eligible /branches/2892_LR-prediction-intervals/HeuristicLab.Problems.DataAnalysis/3.4 15743-16388 /branches/2904_CalculateImpacts/3.4 15808-16421 /branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis/3.4 15943-16355 /branches/2966_interval_calculation/HeuristicLab.Problems.DataAnalysis/3.4 16320-16406 /branches/Async/HeuristicLab.Problems.DataAnalysis/3.4 13329-15286 /branches/Classification-Extensions/HeuristicLab.Problems.DataAnalysis/3.4 11606-11761 /branches/ClassificationModelComparison/HeuristicLab.Problems.DataAnalysis/3.4 9073-13099 /branches/CloningRefactoring/HeuristicLab.Problems.DataAnalysis/3.4 4656-4721 /branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis/3.4 5471-5808 /branches/DataAnalysis SolutionEnsembles/HeuristicLab.Problems.DataAnalysis/3.4 5815-6180 /branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.4 4220,4226,4236-4238,4389,4458-4459,4462,4464 /branches/DataAnalysisCSVImport/HeuristicLab.Problems.DataAnalysis/3.4 8713-8875 /branches/DataPreprocessing/HeuristicLab.Problems.DataAnalysis/3.4 10085-11101 /branches/DatasetFeatureCorrelation/HeuristicLab.Problems.DataAnalysis/3.4 8035-8538 /branches/GP.Grammar.Editor/HeuristicLab.Problems.DataAnalysis/3.4 6284-6795 /branches/GP.Symbols (TimeLag, Diff, Integral)/HeuristicLab.Problems.DataAnalysis/3.4 5060 /branches/HeuristicLab.DatasetRefactor/sources/HeuristicLab.Problems.DataAnalysis/3.4 11570-12508 /branches/HeuristicLab.Problems.Orienteering/HeuristicLab.Problems.DataAnalysis/3.4 11130-12721 /branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Problems.DataAnalysis/3.4 13819-14091 /branches/HeuristicLab.TimeSeries/HeuristicLab.Problems.DataAnalysis/3.4 7098-8789 /branches/LogResidualEvaluator/HeuristicLab.Problems.DataAnalysis/3.4 10202-10483 /branches/NET40/sources/HeuristicLab.Problems.DataAnalysis/3.4 5138-5162 /branches/ParallelEngine/HeuristicLab.Problems.DataAnalysis/3.4 5175-5192 /branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Problems.DataAnalysis/3.4 7570-7810 /branches/QAPAlgorithms/HeuristicLab.Problems.DataAnalysis/3.4 6350-6627 /branches/Restructure trunk solution/HeuristicLab.Problems.DataAnalysis/3.4 6828 /branches/SimplifierViewsProgress/HeuristicLab.Problems.DataAnalysis/3.4 15318-15370 /branches/SpectralKernelForGaussianProcesses/HeuristicLab.Problems.DataAnalysis/3.4 10204-10479 /branches/Trunk/HeuristicLab.Problems.DataAnalysis/3.4 6829-6865 /branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4 5959-6341 /branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4 14232-14825
-
Property
svn:mergeinfo
set to
(toggle deleted branches)
-
branches/2521_ProblemRefactoring/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs
r16692 r16723 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 8Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 28 28 using HeuristicLab.Core; 29 29 using HeuristicLab.Data; 30 using H euristicLab.Persistence.Default.CompositeSerializers.Storable;30 using HEAL.Attic; 31 31 32 32 namespace HeuristicLab.Problems.DataAnalysis { 33 33 [Item("Dataset", "Represents a dataset containing data that should be analyzed.")] 34 [Storable Class]34 [StorableType("49F4D145-50D7-4497-8D8A-D190CD556CC8")] 35 35 public class Dataset : NamedItem, IDataset { 36 36 [StorableConstructor] 37 protected Dataset( bool deserializing) : base(deserializing) { }37 protected Dataset(StorableConstructorFlag _) : base(_) { } 38 38 protected Dataset(Dataset original, Cloner cloner) 39 39 : base(original, cloner) { 40 // no need to clone the variable values because these can't be modified 40 41 variableValues = new Dictionary<string, IList>(original.variableValues); 41 42 variableNames = new List<string>(original.variableNames); 42 43 rows = original.rows; 43 44 } 45 44 46 public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); } 45 47 … … 58 60 /// <param name="variableValues">The values for the variables (column-oriented storage). Values are not cloned!</param> 59 61 public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) 60 : base() { 62 : this(variableNames, variableValues, cloneValues: true) { 63 } 64 65 protected Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues, bool cloneValues = false) { 61 66 Name = "-"; 62 if (!variableNames.Any()) { 67 68 if (variableNames.Any()) { 69 this.variableNames = new List<string>(variableNames); 70 } else { 63 71 this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList(); 64 } else if (variableNames.Count() != variableValues.Count()) { 65 throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); 66 } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) { 67 throw new ArgumentException("The number of values must be equal for every variable"); 68 } else if (variableNames.Distinct().Count() != variableNames.Count()) { 69 var duplicateVariableNames = 70 variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); 71 string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; 72 foreach (var duplicateVariableName in duplicateVariableNames) 73 message += duplicateVariableName + Environment.NewLine; 74 throw new ArgumentException(message); 75 } 72 } 73 // check if the arguments are consistent (no duplicate variables, same number of rows, correct data types, ...) 74 CheckArguments(this.variableNames, variableValues); 75 76 76 rows = variableValues.First().Count; 77 this.variableNames = new List<string>(variableNames); 78 this.variableValues = new Dictionary<string, IList>(this.variableNames.Count); 79 for (int i = 0; i < this.variableNames.Count; i++) { 80 var values = variableValues.ElementAt(i); 81 this.variableValues.Add(this.variableNames[i], values); 77 78 if (cloneValues) { 79 this.variableValues = CloneValues(this.variableNames, variableValues); 80 } else { 81 this.variableValues = new Dictionary<string, IList>(this.variableNames.Count); 82 for (int i = 0; i < this.variableNames.Count; i++) { 83 var variableName = this.variableNames[i]; 84 var values = variableValues.ElementAt(i); 85 this.variableValues.Add(variableName, values); 86 } 82 87 } 83 88 } … … 111 116 112 117 public ModifiableDataset ToModifiable() { 113 var values = new List<IList>(); 114 foreach (var v in variableNames) { 115 if (VariableHasType<double>(v)) { 116 values.Add(new List<double>((IList<double>)variableValues[v])); 117 } else if (VariableHasType<string>(v)) { 118 values.Add(new List<string>((IList<string>)variableValues[v])); 119 } else if (VariableHasType<DateTime>(v)) { 120 values.Add(new List<DateTime>((IList<DateTime>)variableValues[v])); 121 } else { 122 throw new ArgumentException("Unknown variable type."); 123 } 124 } 125 return new ModifiableDataset(variableNames, values); 126 } 118 return new ModifiableDataset(variableNames, variableNames.Select(v => variableValues[v]), true); 119 } 120 127 121 /// <summary> 128 122 /// Shuffle a dataset's rows … … 135 129 } 136 130 137 protected Dataset(Dataset dataset) : this(dataset.variableNames, dataset.variableValues.Values) { } 131 138 132 139 133 #region Backwards compatible code, remove with 3.5 … … 173 167 } 174 168 } 169 170 public bool ContainsVariable(string variableName) { 171 return variableValues.ContainsKey(variableName); 172 } 175 173 public IEnumerable<string> DoubleVariables { 176 174 get { return variableValues.Where(p => p.Value is IList<double>).Select(p => p.Key); } … … 231 229 return new ReadOnlyCollection<DateTime>(values); 232 230 } 233 234 235 231 private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) { 236 232 var values = GetValues<T>(variableName); … … 248 244 return variableValues[variableName] is IList<T>; 249 245 } 246 protected Type GetVariableType(string variableName) { 247 IList list; 248 variableValues.TryGetValue(variableName, out list); 249 if (list == null) 250 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 251 return GetElementType(list); 252 } 253 protected static Type GetElementType(IList list) { 254 var type = list.GetType(); 255 return type.IsGenericType ? type.GetGenericArguments()[0] : type.GetElementType(); 256 } 257 protected static bool IsAllowedType(IList list) { 258 var type = GetElementType(list); 259 return IsAllowedType(type); 260 } 261 protected static bool IsAllowedType(Type type) { 262 return type == typeof(double) || type == typeof(string) || type == typeof(DateTime); 263 } 264 265 protected static void CheckArguments(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) { 266 if (variableNames.Count() != variableValues.Count()) { 267 throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); 268 } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) { 269 throw new ArgumentException("The number of values must be equal for every variable"); 270 } else if (variableNames.Distinct().Count() != variableNames.Count()) { 271 var duplicateVariableNames = 272 variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); 273 string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; 274 foreach (var duplicateVariableName in duplicateVariableNames) 275 message += duplicateVariableName + Environment.NewLine; 276 throw new ArgumentException(message); 277 } 278 // check if all the variables are supported 279 foreach (var t in variableNames.Zip(variableValues, Tuple.Create)) { 280 var variableName = t.Item1; 281 var values = t.Item2; 282 283 if (!IsAllowedType(values)) { 284 throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName)); 285 } 286 } 287 } 288 289 protected static Dictionary<string, IList> CloneValues(Dictionary<string, IList> variableValues) { 290 return variableValues.ToDictionary(x => x.Key, x => CloneValues(x.Value)); 291 } 292 293 protected static Dictionary<string, IList> CloneValues(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) { 294 return variableNames.Zip(variableValues, Tuple.Create).ToDictionary(x => x.Item1, x => CloneValues(x.Item2)); 295 } 296 297 protected static IList CloneValues(IList values) { 298 var doubleValues = values as IList<double>; 299 if (doubleValues != null) return new List<double>(doubleValues); 300 301 var stringValues = values as IList<string>; 302 if (stringValues != null) return new List<string>(stringValues); 303 304 var dateTimeValues = values as IList<DateTime>; 305 if (dateTimeValues != null) return new List<DateTime>(dateTimeValues); 306 307 throw new ArgumentException(string.Format("Unsupported variable type {0}.", GetElementType(values))); 308 } 250 309 251 310 #region IStringConvertibleMatrix Members 252 311 [Storable] 253 pr otectedint rows;312 private int rows; 254 313 public int Rows { 255 314 get { return rows; } 315 protected set { rows = value; } 256 316 } 257 317 int IStringConvertibleMatrix.Rows {
Note: See TracChangeset
for help on using the changeset viewer.