- Timestamp:
- 04/09/10 17:28:32 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Dataset.cs
r3264 r3294 35 35 [StorableClass] 36 36 public sealed class Dataset : NamedItem, IStringConvertibleMatrix { 37 private Dictionary<int, Dictionary<int, double>>[] cachedMeans;38 private Dictionary<int, Dictionary<int, double>>[] cachedRanges;39 private bool cachedValuesInvalidated = true;40 41 37 public Dataset() 42 38 : this(new string[] { "x" }, new double[,] { { 0.0 } }) { … … 53 49 } 54 50 51 [Storable] 55 52 private StringArray variableNames; 56 53 public IEnumerable<string> VariableNames { … … 58 55 } 59 56 57 [Storable] 60 58 private DoubleMatrix data; 61 59 private DoubleMatrix Data { … … 93 91 // access to full columns 94 92 public double[] this[string variableName] { 95 get { return VariableValues(VariableIndex(variableName), 0, data.Rows); }96 } 97 98 public double[] VariableValues(int variableIndex, int start, int end) {93 get { return GetVariableValues(GetVariableIndex(variableName), 0, data.Rows); } 94 } 95 96 public double[] GetVariableValues(int variableIndex, int start, int end) { 99 97 if (start < 0 || !(start <= end)) 100 98 throw new ArgumentException("Start must be between 0 and end (" + end + ")."); … … 108 106 } 109 107 110 public double[] VariableValues(string variableName, int start, int end) {111 return VariableValues(VariableIndex(variableName), start, end);108 public double[] GetVariableValues(string variableName, int start, int end) { 109 return GetVariableValues(GetVariableIndex(variableName), start, end); 112 110 } 113 111 114 112 #region Variable name methods 115 public string VariableName(int variableIndex) {113 public string GetVariableName(int variableIndex) { 116 114 return variableNames[variableIndex]; 117 115 } 118 116 119 public int VariableIndex(string variableName) {117 public int GetVariableIndex(string variableName) { 120 118 for (int i = 0; i < variableNames.Length; i++) { 121 119 if (variableNames[i].Equals(variableName)) return i; … … 125 123 126 124 public void SetVariableName(int variableIndex, string name) { 125 if (variableNames.Contains(name)) throw new ArgumentException("The data set already contains a variable with name " + name + "."); 127 126 variableNames[variableIndex] = name; 128 127 } … … 131 130 132 131 #region variable statistics 133 public double Mean(string variableName) { 134 return Mean(VariableIndex(variableName)); 135 } 136 137 public double Mean(string variableName, int start, int end) { 138 return Mean(VariableIndex(variableName), start, end); 139 } 140 141 public double Mean(int variableIndex) { 142 return Mean(variableIndex, 0, data.Rows); 143 } 144 145 public double Mean(int variableIndex, int start, int end) { 146 if (cachedValuesInvalidated) CreateDictionaries(); 147 if (!cachedMeans[variableIndex].ContainsKey(start) || !cachedMeans[variableIndex][start].ContainsKey(end)) { 148 double mean = VariableValues(variableIndex, start, end).Average(); 149 if (!cachedMeans[variableIndex].ContainsKey(start)) cachedMeans[variableIndex][start] = new Dictionary<int, double>(); 150 cachedMeans[variableIndex][start][end] = mean; 151 return mean; 152 } else { 153 return cachedMeans[variableIndex][start][end]; 154 } 155 } 156 157 public double Range(string variableName) { 158 return Range(VariableIndex(variableName)); 159 } 160 161 public double Range(int variableIndex) { 162 return Range(variableIndex, 0, data.Rows); 163 } 164 165 public double Range(string variableName, int start, int end) { 166 return Range(VariableIndex(variableName), start, end); 167 } 168 169 public double Range(int variableIndex, int start, int end) { 170 if (cachedValuesInvalidated) CreateDictionaries(); 171 if (!cachedRanges[variableIndex].ContainsKey(start) || !cachedRanges[variableIndex][start].ContainsKey(end)) { 172 var values = VariableValues(variableIndex, start, end); 173 double range = values.Max() - values.Min(); 174 if (!cachedRanges[variableIndex].ContainsKey(start)) cachedRanges[variableIndex][start] = new Dictionary<int, double>(); 175 cachedRanges[variableIndex][start][end] = range; 176 return range; 177 } else { 178 return cachedRanges[variableIndex][start][end]; 179 } 180 } 181 182 public double Max(string variableName) { 183 return Max(VariableIndex(variableName)); 184 } 185 186 public double Max(int variableIndex) { 187 return Max(variableIndex, 0, data.Rows); 188 } 189 190 public double Max(string variableName, int start, int end) { 191 return Max(VariableIndex(variableName), start, end); 192 } 193 194 public double Max(int variableIndex, int start, int end) { 195 return VariableValues(variableIndex, start, end).Max(); 196 } 197 198 public double Min(string variableName) { 199 return Min(VariableIndex(variableName)); 200 } 201 202 public double Min(int variableIndex) { 203 return Min(variableIndex, 0, data.Rows); 204 } 205 206 public double Min(string variableName, int start, int end) { 207 return Min(VariableIndex(variableName), start, end); 208 } 209 210 public double Min(int variableIndex, int start, int end) { 211 return VariableValues(variableIndex, start, end).Min(); 212 } 213 214 public int MissingValues(string variableName) { 215 return MissingValues(VariableIndex(variableName)); 216 } 217 public int MissingValues(int variableIndex) { 218 return MissingValues(variableIndex, 0, data.Rows); 219 } 220 221 public int MissingValues(string variableName, int start, int end) { 222 return MissingValues(VariableIndex(variableName), start, end); 223 } 224 225 public int MissingValues(int variableIndex, int start, int end) { 226 return VariableValues(variableIndex, start, end).Count(x => double.IsNaN(x)); 132 public double GetMean(string variableName) { 133 return GetMean(GetVariableIndex(variableName)); 134 } 135 136 public double GetMean(string variableName, int start, int end) { 137 return GetMean(GetVariableIndex(variableName), start, end); 138 } 139 140 public double GetMean(int variableIndex) { 141 return GetMean(variableIndex, 0, data.Rows); 142 } 143 144 public double GetMean(int variableIndex, int start, int end) { 145 return GetVariableValues(variableIndex, start, end).Average(); 146 } 147 148 public double GetRange(string variableName) { 149 return GetRange(GetVariableIndex(variableName)); 150 } 151 152 public double GetRange(int variableIndex) { 153 return GetRange(variableIndex, 0, data.Rows); 154 } 155 156 public double GetRange(string variableName, int start, int end) { 157 return GetRange(GetVariableIndex(variableName), start, end); 158 } 159 160 public double GetRange(int variableIndex, int start, int end) { 161 var values = GetVariableValues(variableIndex, start, end); 162 return values.Max() - values.Min(); 163 } 164 165 public double GetMax(string variableName) { 166 return GetMax(GetVariableIndex(variableName)); 167 } 168 169 public double GetMax(int variableIndex) { 170 return GetMax(variableIndex, 0, data.Rows); 171 } 172 173 public double GetMax(string variableName, int start, int end) { 174 return GetMax(GetVariableIndex(variableName), start, end); 175 } 176 177 public double GetMax(int variableIndex, int start, int end) { 178 return GetVariableValues(variableIndex, start, end).Max(); 179 } 180 181 public double GetMin(string variableName) { 182 return GetMin(GetVariableIndex(variableName)); 183 } 184 185 public double GetMin(int variableIndex) { 186 return GetMin(variableIndex, 0, data.Rows); 187 } 188 189 public double GetMin(string variableName, int start, int end) { 190 return GetMin(GetVariableIndex(variableName), start, end); 191 } 192 193 public double GetMin(int variableIndex, int start, int end) { 194 return GetVariableValues(variableIndex, start, end).Min(); 195 } 196 197 public int GetMissingValues(string variableName) { 198 return GetMissingValues(GetVariableIndex(variableName)); 199 } 200 public int GetMissingValues(int variableIndex) { 201 return GetMissingValues(variableIndex, 0, data.Rows); 202 } 203 204 public int GetMissingValues(string variableName, int start, int end) { 205 return GetMissingValues(GetVariableIndex(variableName), start, end); 206 } 207 208 public int GetMissingValues(int variableIndex, int start, int end) { 209 return GetVariableValues(variableIndex, start, end).Count(x => double.IsNaN(x)); 227 210 } 228 211 229 212 #endregion 230 231 private void CreateDictionaries() {232 // keep a means and ranges dictionary for each column (possible target variable) of the dataset.233 cachedMeans = new Dictionary<int, Dictionary<int, double>>[data.Columns];234 cachedRanges = new Dictionary<int, Dictionary<int, double>>[data.Columns];235 for (int i = 0; i < data.Columns; i++) {236 cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();237 cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();238 }239 cachedValuesInvalidated = false;240 }241 213 242 214 public override IDeepCloneable Clone(Cloner cloner) { … … 250 222 public event EventHandler<EventArgs<int, int>> DataChanged; 251 223 private void OnDataChanged(EventArgs<int, int> e) { 252 cachedValuesInvalidated = true;253 254 224 var listeners = DataChanged; 255 225 if (listeners != null) listeners(this, e); … … 257 227 public event EventHandler Reset; 258 228 private void OnReset(EventArgs e) { 259 cachedValuesInvalidated = true;260 261 229 var listeners = Reset; 262 230 if (listeners != null) listeners(this, e); … … 305 273 } 306 274 } 307 string formatString = new StringBuilder().Append(' #', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ###275 string formatString = new StringBuilder().Append('0', (int)Math.Log10(value) + 1).ToString(); // >= 100 variables => ### 308 276 for (int column = 0; column < value; column++) { 309 277 if (column < data.Columns) … … 334 302 public bool SetValue(string value, int rowIndex, int columnIndex) { 335 303 if (rowIndex == 0) { 336 // set variable name 337 variableNames[columnIndex] = value; 338 return true; 304 // check if the variable name is already used 305 if (variableNames.Contains(value)) { 306 return false; 307 } else { 308 variableNames[columnIndex] = value; 309 return true; 310 } 339 311 } else { 340 312 double v;
Note: See TracChangeset
for help on using the changeset viewer.