- Timestamp:
- 09/14/11 13:59:25 (13 years ago)
- Location:
- branches/PersistenceSpeedUp
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/PersistenceSpeedUp
- Property svn:ignore
-
old new 12 12 *.psess 13 13 *.vsp 14 *.docstates
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/PersistenceSpeedUp/HeuristicLab.Problems.DataAnalysis
- Property svn:mergeinfo changed
-
branches/PersistenceSpeedUp/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs
r5847 r6760 21 21 22 22 using System; 23 using System.Collections; 23 24 using System.Collections.Generic; 25 using System.Collections.ObjectModel; 24 26 using System.Linq; 25 27 using HeuristicLab.Common; … … 36 38 private Dataset(Dataset original, Cloner cloner) 37 39 : base(original, cloner) { 38 variableNameToVariableIndexMapping = original.variableNameToVariableIndexMapping; 39 data = original.data; 40 } 41 public override IDeepCloneable Clone(Cloner cloner) { 42 return new Dataset(this, cloner); 43 } 40 variableValues = new Dictionary<string, IList>(original.variableValues); 41 variableNames = new List<string>(original.variableNames); 42 rows = original.rows; 43 } 44 public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); } 44 45 45 46 public Dataset() … … 47 48 Name = "-"; 48 49 VariableNames = Enumerable.Empty<string>(); 49 data = new double[0, 0]; 50 } 51 52 public Dataset(IEnumerable<string> variableNames, double[,] data) 50 variableValues = new Dictionary<string, IList>(); 51 rows = 0; 52 } 53 54 public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) 53 55 : base() { 54 56 Name = "-"; 55 if (variableNames.Count() != data.GetLength(1)) { 56 throw new ArgumentException("Number of variable names doesn't match the number of columns of data"); 57 } 58 this.data = (double[,])data.Clone(); 59 VariableNames = variableNames; 60 } 61 62 63 private Dictionary<string, int> variableNameToVariableIndexMapping; 64 private Dictionary<int, string> variableIndexToVariableNameMapping; 57 if (!variableNames.Any()) { 58 this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList(); 59 } else if (variableNames.Count() != variableValues.Count()) { 60 throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); 61 } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) { 62 throw new ArgumentException("The number of values must be equal for every variable"); 63 } else if (variableNames.Distinct().Count() != variableNames.Count()) { 64 var duplicateVariableNames = 65 variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); 66 string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; 67 foreach (var duplicateVariableName in duplicateVariableNames) 68 message += duplicateVariableName + Environment.NewLine; 69 throw new ArgumentException(message); 70 } 71 72 rows = variableValues.First().Count; 73 this.variableNames = new List<string>(variableNames); 74 this.variableValues = new Dictionary<string, IList>(); 75 for (int i = 0; i < this.variableNames.Count; i++) { 76 var values = variableValues.ElementAt(i); 77 IList clonedValues = null; 78 if (values is List<double>) 79 clonedValues = new List<double>(values.Cast<double>()); 80 else if (values is List<string>) 81 clonedValues = new List<string>(values.Cast<string>()); 82 else if (values is List<DateTime>) 83 clonedValues = new List<DateTime>(values.Cast<DateTime>()); 84 else { 85 this.variableNames = new List<string>(); 86 this.variableValues = new Dictionary<string, IList>(); 87 throw new ArgumentException("The variable values must be of type List<double>, List<string> or List<DateTime>"); 88 } 89 this.variableValues.Add(this.variableNames[i], clonedValues); 90 } 91 } 92 93 public Dataset(IEnumerable<string> variableNames, double[,] variableValues) { 94 Name = "-"; 95 if (variableNames.Count() != variableValues.GetLength(1)) { 96 throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); 97 } 98 if (variableNames.Distinct().Count() != variableNames.Count()) { 99 var duplicateVariableNames = variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); 100 string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; 101 foreach (var duplicateVariableName in duplicateVariableNames) 102 message += duplicateVariableName + Environment.NewLine; 103 throw new ArgumentException(message); 104 } 105 106 rows = variableValues.GetLength(0); 107 this.variableNames = new List<string>(variableNames); 108 109 this.variableValues = new Dictionary<string, IList>(); 110 for (int col = 0; col < variableValues.GetLength(1); col++) { 111 string columName = this.variableNames[col]; 112 var values = new List<double>(); 113 for (int row = 0; row < variableValues.GetLength(0); row++) { 114 values.Add(variableValues[row, col]); 115 } 116 this.variableValues.Add(columName, values); 117 } 118 } 119 120 #region Backwards compatible code, remove with 3.5 121 private double[,] storableData; 122 //name alias used to suppport backwards compatibility 123 [Storable(Name = "data", AllowOneWay = true)] 124 private double[,] StorableData { set { storableData = value; } } 125 126 [StorableHook(HookType.AfterDeserialization)] 127 private void AfterDeserialization() { 128 if (variableValues == null) { 129 rows = storableData.GetLength(0); 130 variableValues = new Dictionary<string, IList>(); 131 for (int col = 0; col < storableData.GetLength(1); col++) { 132 string columName = variableNames[col]; 133 var values = new List<double>(); 134 for (int row = 0; row < storableData.GetLength(0); row++) { 135 values.Add(storableData[row, col]); 136 } 137 variableValues.Add(columName, values); 138 } 139 storableData = null; 140 } 141 } 142 #endregion 143 144 [Storable(Name = "VariableValues")] 145 private Dictionary<string, IList> variableValues; 146 147 private List<string> variableNames; 65 148 [Storable] 66 149 public IEnumerable<string> VariableNames { 67 get { 68 // convert KeyCollection to an array first for persistence 69 return variableNameToVariableIndexMapping.Keys.ToArray(); 70 } 150 get { return variableNames; } 71 151 private set { 72 if (variableNameToVariableIndexMapping != null) throw new InvalidOperationException("VariableNames can only be set once."); 73 this.variableNameToVariableIndexMapping = new Dictionary<string, int>(); 74 this.variableIndexToVariableNameMapping = new Dictionary<int, string>(); 75 int i = 0; 76 foreach (string variableName in value) { 77 this.variableNameToVariableIndexMapping.Add(variableName, i); 78 this.variableIndexToVariableNameMapping.Add(i, variableName); 79 i++; 80 } 81 } 82 } 83 152 if (variableNames != null) throw new InvalidOperationException(); 153 variableNames = new List<string>(value); 154 } 155 } 156 157 public IEnumerable<string> DoubleVariables { 158 get { return variableValues.Where(p => p.Value is List<double>).Select(p => p.Key); } 159 } 160 161 public IEnumerable<double> GetDoubleValues(string variableName) { 162 IList list; 163 if (!variableValues.TryGetValue(variableName, out list)) 164 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 165 List<double> values = list as List<double>; 166 if (values == null) throw new ArgumentException("The variable " + variableName + " is not a double variable."); 167 168 //mkommend yield return used to enable lazy evaluation 169 foreach (double value in values) 170 yield return value; 171 } 172 public ReadOnlyCollection<double> GetReadOnlyDoubleValues(string variableName) { 173 IList list; 174 if (!variableValues.TryGetValue(variableName, out list)) 175 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 176 List<double> values = list as List<double>; 177 if (values == null) throw new ArgumentException("The variable " + variableName + " is not a double variable."); 178 return values.AsReadOnly(); 179 } 180 public double GetDoubleValue(string variableName, int row) { 181 IList list; 182 if (!variableValues.TryGetValue(variableName, out list)) 183 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 184 List<double> values = list as List<double>; 185 if (values == null) throw new ArgumentException("The variable " + variableName + " is not a double variable."); 186 return values[row]; 187 } 188 public IEnumerable<double> GetDoubleValues(string variableName, IEnumerable<int> rows) { 189 IList list; 190 if (!variableValues.TryGetValue(variableName, out list)) 191 throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); 192 List<double> values = list as List<double>; 193 if (values == null) throw new ArgumentException("The varialbe " + variableName + " is not a double variable."); 194 195 foreach (int index in rows) 196 yield return values[index]; 197 } 198 199 #region IStringConvertibleMatrix Members 84 200 [Storable] 85 private double[,] data; 86 private double[,] Data { 87 get { return data; } 88 } 89 90 // elementwise access 91 public double this[int rowIndex, int columnIndex] { 92 get { return data[rowIndex, columnIndex]; } 93 } 94 public double this[string variableName, int rowIndex] { 95 get { 96 int columnIndex = GetVariableIndex(variableName); 97 return data[rowIndex, columnIndex]; 98 } 99 } 100 101 public double[] GetVariableValues(int variableIndex) { 102 return GetVariableValues(variableIndex, 0, Rows); 103 } 104 public double[] GetVariableValues(string variableName) { 105 return GetVariableValues(GetVariableIndex(variableName), 0, Rows); 106 } 107 public double[] GetVariableValues(int variableIndex, int start, int end) { 108 return GetEnumeratedVariableValues(variableIndex, start, end).ToArray(); 109 } 110 public double[] GetVariableValues(string variableName, int start, int end) { 111 return GetVariableValues(GetVariableIndex(variableName), start, end); 112 } 113 114 public IEnumerable<double> GetEnumeratedVariableValues(int variableIndex) { 115 return GetEnumeratedVariableValues(variableIndex, 0, Rows); 116 } 117 public IEnumerable<double> GetEnumeratedVariableValues(int variableIndex, int start, int end) { 118 if (start < 0 || !(start <= end)) 119 throw new ArgumentException("Start must be between 0 and end (" + end + ")."); 120 if (end > Rows || end < start) 121 throw new ArgumentException("End must be between start (" + start + ") and dataset rows (" + Rows + ")."); 122 123 for (int i = start; i < end; i++) 124 yield return data[i, variableIndex]; 125 } 126 public IEnumerable<double> GetEnumeratedVariableValues(int variableIndex, IEnumerable<int> rows) { 127 foreach (int row in rows) 128 yield return data[row, variableIndex]; 129 } 130 131 public IEnumerable<double> GetEnumeratedVariableValues(string variableName) { 132 return GetEnumeratedVariableValues(GetVariableIndex(variableName), 0, Rows); 133 } 134 public IEnumerable<double> GetEnumeratedVariableValues(string variableName, int start, int end) { 135 return GetEnumeratedVariableValues(GetVariableIndex(variableName), start, end); 136 } 137 public IEnumerable<double> GetEnumeratedVariableValues(string variableName, IEnumerable<int> rows) { 138 return GetEnumeratedVariableValues(GetVariableIndex(variableName), rows); 139 } 140 141 public string GetVariableName(int variableIndex) { 142 try { 143 return variableIndexToVariableNameMapping[variableIndex]; 144 } 145 catch (KeyNotFoundException ex) { 146 throw new ArgumentException("The variable index " + variableIndex + " was not found.", ex); 147 } 148 } 149 public int GetVariableIndex(string variableName) { 150 try { 151 return variableNameToVariableIndexMapping[variableName]; 152 } 153 catch (KeyNotFoundException ex) { 154 throw new ArgumentException("The variable name " + variableName + " was not found.", ex); 155 } 156 } 157 158 #region IStringConvertibleMatrix Members 201 private int rows; 159 202 public int Rows { 160 get { return data.GetLength(0); }203 get { return rows; } 161 204 set { throw new NotSupportedException(); } 162 205 } 163 206 public int Columns { 164 get { return data.GetLength(1); }207 get { return variableNames.Count; } 165 208 set { throw new NotSupportedException(); } 166 209 } … … 184 227 185 228 public string GetValue(int rowIndex, int columnIndex) { 186 return data[rowIndex, columnIndex].ToString();229 return variableValues[variableNames[columnIndex]][rowIndex].ToString(); 187 230 } 188 231 public bool SetValue(string value, int rowIndex, int columnIndex) {
Note: See TracChangeset
for help on using the changeset viewer.