Changeset 2319 for trunk/sources/HeuristicLab.DataAnalysis
- Timestamp:
- 09/01/09 11:09:50 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.DataAnalysis/3.2/Dataset.cs
r2311 r2319 31 31 namespace HeuristicLab.DataAnalysis { 32 32 public sealed class Dataset : ItemBase { 33 34 private string name;35 private double[] samples;36 private int rows;37 private int columns;38 33 private Dictionary<int, Dictionary<int, double>>[] cachedMeans; 39 34 private Dictionary<int, Dictionary<int, double>>[] cachedRanges; 40 private double[] scalingFactor;41 private double[] scalingOffset;42 35 private bool cachedValuesInvalidated = true; 43 44 private bool fireChangeEvents = true; 45 public bool FireChangeEvents { 46 get { return fireChangeEvents; } 47 set { fireChangeEvents = value; } 48 } 49 36 37 public Dataset() 38 : this(new double[,] { { 0.0 } }) { 39 } 40 41 public Dataset(double[,] samples) { 42 Name = "-"; 43 Rows = samples.GetLength(0); 44 Columns = samples.GetLength(1); 45 double[] values = new double[Rows * Columns]; 46 int i = 0; 47 for (int row = 0; row < Rows; row++) { 48 for (int column = 0; column < columns; column++) { 49 values[i++] = samples[row, column]; 50 } 51 } 52 Samples = values; 53 fireChangeEvents = true; 54 } 55 56 #region Properties 57 private string name; 50 58 public string Name { 51 59 get { return name; } … … 53 61 } 54 62 63 private int rows; 55 64 public int Rows { 56 65 get { return rows; } … … 58 67 } 59 68 69 private int columns; 60 70 public int Columns { 61 71 get { return columns; } … … 68 78 } 69 79 70 public double[] ScalingFactor { 71 get { return scalingFactor; } 72 set { 73 if (value.Length != scalingFactor.Length) 74 throw new ArgumentException("Length of scaling factor array doesn't match number of variables"); 75 scalingFactor = value; 76 } 77 } 78 public double[] ScalingOffset { 79 get { return scalingOffset; } 80 set { 81 if (value.Length != scalingOffset.Length) 82 throw new ArgumentException("Length of scaling offset array doesn't match number of variables"); 83 scalingOffset = value; 84 } 85 } 86 87 public double GetValue(int i, int j) { 88 return samples[columns * i + j]; 89 } 90 91 public void SetValue(int i, int j, double v) { 92 if (v != samples[columns * i + j]) { 93 samples[columns * i + j] = v; 94 cachedValuesInvalidated = true; 95 if (fireChangeEvents) FireChanged(); 96 } 97 } 98 80 private string[] variableNames; 81 public IEnumerable<string> VariableNames { 82 get { return variableNames; } 83 } 84 85 private double[] samples; 99 86 public double[] Samples { 100 87 get { return samples; } … … 113 100 } 114 101 115 private string[] variableNames; 116 public IEnumerable<string> VariableNames { 117 get { return variableNames; } 118 } 119 120 public Dataset() 121 : this(new double[,] { { 0.0 } }) { 122 } 123 124 public Dataset(double[,] samples) { 125 Name = "-"; 126 Rows = samples.GetLength(0); 127 Columns = samples.GetLength(1); 128 double[] values = new double[Rows * Columns]; 129 int i = 0; 130 for (int row = 0; row < Rows; row++) { 131 for (int column = 0; column < columns; column++) { 132 values[i++] = samples[row, column]; 133 } 134 } 135 Samples = values; 136 fireChangeEvents = true; 137 } 138 139 140 public string GetVariableName(int variableIndex) { 141 return variableNames[variableIndex]; 142 } 143 144 public int GetVariableIndex(string variableName) { 145 for (int i = 0; i < variableNames.Length; i++) { 146 if (variableNames[i].Equals(variableName)) return i; 147 } 148 throw new ArgumentException("The variable name " + variableName + " was not found."); 102 private bool fireChangeEvents = true; 103 public bool FireChangeEvents { 104 get { return fireChangeEvents; } 105 set { fireChangeEvents = value; } 106 } 107 108 private double[] scalingFactor; 109 public double[] ScalingFactor { 110 get { return scalingFactor; } 111 set { 112 if (value.Length != scalingFactor.Length) 113 throw new ArgumentException("Length of scaling factor array doesn't match number of variables"); 114 scalingFactor = value; 115 } 116 } 117 118 private double[] scalingOffset; 119 public double[] ScalingOffset { 120 get { return scalingOffset; } 121 set { 122 if (value.Length != scalingOffset.Length) 123 throw new ArgumentException("Length of scaling offset array doesn't match number of variables"); 124 scalingOffset = value; 125 } 126 } 127 #endregion 128 129 #region Modify and get values 130 public double GetValue(int i, int j) { 131 return samples[columns * i + j]; 149 132 } 150 133 … … 173 156 } 174 157 158 public void SetValue(int i, int j, double v) { 159 if (v != samples[columns * i + j]) { 160 samples[columns * i + j] = v; 161 cachedValuesInvalidated = true; 162 if (fireChangeEvents) FireChanged(); 163 } 164 } 165 166 public IEnumerable<double> ReplaceVariableValues(int variableIndex, IEnumerable<double> newValues, int start, int end) { 167 double[] oldValues = new double[end - start]; 168 for (int i = 0; i < end - start; i++) oldValues[i] = this.GetValue(i + start, variableIndex); 169 if (newValues.Count() != end - start) throw new ArgumentException("The length of the new values sequence doesn't match the required length (number of replaced values)"); 170 171 int index = start; 172 this.FireChangeEvents = false; 173 foreach (double v in newValues) { 174 this.SetValue(index++, variableIndex, v); 175 } 176 this.FireChangeEvents = true; 177 this.FireChanged(); 178 return oldValues; 179 } 180 181 public IEnumerable<double> ReplaceVariableValues(string variableName, IEnumerable<double> newValues, int start, int end) { 182 return ReplaceVariableValues(this.GetVariableIndex(variableName), newValues, start, end); 183 } 184 #endregion 185 186 #region Variable name methods 187 public string GetVariableName(int variableIndex) { 188 return variableNames[variableIndex]; 189 } 190 191 public int GetVariableIndex(string variableName) { 192 for (int i = 0; i < variableNames.Length; i++) { 193 if (variableNames[i].Equals(variableName)) return i; 194 } 195 throw new ArgumentException("The variable name " + variableName + " was not found."); 196 } 197 175 198 public void SetVariableName(int variableIndex, string name) { 176 199 variableNames[variableIndex] = name; … … 180 203 return this.variableNames.Contains(variableName); 181 204 } 205 #endregion 182 206 183 207 public override IView CreateView() { 184 208 return new DatasetView(this); 209 } 210 211 212 #region Variable statistics 213 public double GetMean(string variableName) { 214 return GetMean(GetVariableIndex(variableName)); 215 } 216 217 public double GetMean(string variableName, int start, int end) { 218 return GetMean(GetVariableIndex(variableName), start, end); 219 } 220 221 public double GetMean(int column) { 222 return GetMean(column, 0, Rows); 223 } 224 225 public double GetMean(int column, int start, int end) { 226 if (cachedValuesInvalidated) CreateDictionaries(); 227 if (!cachedMeans[column].ContainsKey(start) || !cachedMeans[column][start].ContainsKey(end)) { 228 double[] values = new double[end - start]; 229 for (int sample = start; sample < end; sample++) { 230 values[sample - start] = GetValue(sample, column); 231 } 232 double mean = Statistics.Mean(values); 233 if (!cachedMeans[column].ContainsKey(start)) cachedMeans[column][start] = new Dictionary<int, double>(); 234 cachedMeans[column][start][end] = mean; 235 return mean; 236 } else { 237 return cachedMeans[column][start][end]; 238 } 239 } 240 241 public double GetRange(string variableName) { 242 return GetRange(this.GetVariableIndex(variableName)); 243 } 244 245 public double GetRange(int column) { 246 return GetRange(column, 0, Rows); 247 } 248 249 public double GetRange(string variableName, int start, int end) { 250 return GetRange(this.GetVariableIndex(variableName), start, end); 251 } 252 253 public double GetRange(int column, int start, int end) { 254 if (cachedValuesInvalidated) CreateDictionaries(); 255 if (!cachedRanges[column].ContainsKey(start) || !cachedRanges[column][start].ContainsKey(end)) { 256 double[] values = new double[end - start]; 257 for (int sample = start; sample < end; sample++) { 258 values[sample - start] = GetValue(sample, column); 259 } 260 double range = Statistics.Range(values); 261 if (!cachedRanges[column].ContainsKey(start)) cachedRanges[column][start]= new Dictionary<int, double>(); 262 cachedRanges[column][start][end] = range; 263 return range; 264 } else { 265 return cachedRanges[column][start][end]; 266 } 267 } 268 269 public double GetMaximum(string variableName) { 270 return GetMaximum(this.GetVariableIndex(variableName)); 271 } 272 273 public double GetMaximum(int column) { 274 return GetMaximum(column, 0, Rows); 275 } 276 277 public double GetMaximum(string variableName, int start, int end) { 278 return GetMaximum(this.GetVariableIndex(variableName), start, end); 279 } 280 281 public double GetMaximum(int column, int start, int end) { 282 double max = Double.NegativeInfinity; 283 for (int i = start; i < end; i++) { 284 double val = GetValue(i, column); 285 if (!double.IsNaN(val) && val > max) max = val; 286 } 287 return max; 288 } 289 290 public double GetMinimum(string variableName) { 291 return GetMinimum(GetVariableIndex(variableName)); 292 } 293 294 public double GetMinimum(int column) { 295 return GetMinimum(column, 0, Rows); 296 } 297 298 public double GetMinimum(string variableName, int start, int end) { 299 return GetMinimum(this.GetVariableIndex(variableName), start, end); 300 } 301 302 public double GetMinimum(int column, int start, int end) { 303 double min = Double.PositiveInfinity; 304 for (int i = start; i < end; i++) { 305 double val = GetValue(i, column); 306 if (!double.IsNaN(val) && val < min) min = val; 307 } 308 return min; 309 } 310 #endregion 311 312 internal void ScaleVariable(int column) { 313 if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) { 314 double min = GetMinimum(column); 315 double max = GetMaximum(column); 316 double range = max - min; 317 if (range == 0) ScaleVariable(column, 1.0, -min); 318 else ScaleVariable(column, 1.0 / range, -min); 319 } 320 cachedValuesInvalidated = true; 321 if (fireChangeEvents) FireChanged(); 322 } 323 324 internal void ScaleVariable(int column, double factor, double offset) { 325 scalingFactor[column] = factor; 326 scalingOffset[column] = offset; 327 for (int i = 0; i < Rows; i++) { 328 double origValue = samples[i * columns + column]; 329 samples[i * columns + column] = (origValue + offset) * factor; 330 } 331 cachedValuesInvalidated = true; 332 if (fireChangeEvents) FireChanged(); 333 } 334 335 internal void UnscaleVariable(int column) { 336 if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) { 337 for (int i = 0; i < rows; i++) { 338 double scaledValue = samples[i * columns + column]; 339 samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column]; 340 } 341 scalingFactor[column] = 1.0; 342 scalingOffset[column] = 0.0; 343 } 344 cachedValuesInvalidated = true; 345 if (fireChangeEvents) FireChanged(); 346 } 347 348 private void CreateDictionaries() { 349 // keep a means and ranges dictionary for each column (possible target variable) of the dataset. 350 cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns]; 351 cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns]; 352 for (int i = 0; i < columns; i++) { 353 cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>(); 354 cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>(); 355 } 356 cachedValuesInvalidated = false; 185 357 } 186 358 … … 313 485 } 314 486 #endregion 315 316 public double GetMean(int column) {317 return GetMean(column, 0, Rows);318 }319 320 public double GetMean(int column, int from, int to) {321 if (cachedValuesInvalidated) CreateDictionaries();322 if (!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) {323 double[] values = new double[to - from];324 for (int sample = from; sample < to; sample++) {325 values[sample - from] = GetValue(sample, column);326 }327 double mean = Statistics.Mean(values);328 if (!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary<int, double>();329 cachedMeans[column][from][to] = mean;330 return mean;331 } else {332 return cachedMeans[column][from][to];333 }334 }335 336 public double GetRange(int column) {337 return GetRange(column, 0, Rows);338 }339 340 public double GetRange(int column, int from, int to) {341 if (cachedValuesInvalidated) CreateDictionaries();342 if (!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) {343 double[] values = new double[to - from];344 for (int sample = from; sample < to; sample++) {345 values[sample - from] = GetValue(sample, column);346 }347 double range = Statistics.Range(values);348 if (!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary<int, double>();349 cachedRanges[column][from][to] = range;350 return range;351 } else {352 return cachedRanges[column][from][to];353 }354 }355 356 public double GetMaximum(int column) {357 return GetMaximum(column, 0, Rows);358 }359 360 public double GetMaximum(int column, int start, int end) {361 double max = Double.NegativeInfinity;362 for (int i = start; i < end; i++) {363 double val = GetValue(i, column);364 if (!double.IsNaN(val) && val > max) max = val;365 }366 return max;367 }368 369 public double GetMinimum(int column) {370 return GetMinimum(column, 0, Rows);371 }372 373 public double GetMinimum(int column, int start, int end) {374 double min = Double.PositiveInfinity;375 for (int i = start; i < end; i++) {376 double val = GetValue(i, column);377 if (!double.IsNaN(val) && val < min) min = val;378 }379 return min;380 }381 382 internal void ScaleVariable(int column) {383 if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) {384 double min = GetMinimum(column);385 double max = GetMaximum(column);386 double range = max - min;387 if (range == 0) ScaleVariable(column, 1.0, -min);388 else ScaleVariable(column, 1.0 / range, -min);389 }390 cachedValuesInvalidated = true;391 if (fireChangeEvents) FireChanged();392 }393 394 internal void ScaleVariable(int column, double factor, double offset) {395 scalingFactor[column] = factor;396 scalingOffset[column] = offset;397 for (int i = 0; i < Rows; i++) {398 double origValue = samples[i * columns + column];399 samples[i * columns + column] = (origValue + offset) * factor;400 }401 cachedValuesInvalidated = true;402 if (fireChangeEvents) FireChanged();403 }404 405 internal void UnscaleVariable(int column) {406 if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) {407 for (int i = 0; i < rows; i++) {408 double scaledValue = samples[i * columns + column];409 samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column];410 }411 scalingFactor[column] = 1.0;412 scalingOffset[column] = 0.0;413 }414 cachedValuesInvalidated = true;415 if (fireChangeEvents) FireChanged();416 }417 418 private void CreateDictionaries() {419 // keep a means and ranges dictionary for each column (possible target variable) of the dataset.420 cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns];421 cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns];422 for (int i = 0; i < columns; i++) {423 cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();424 cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();425 }426 cachedValuesInvalidated = false;427 }428 487 } 429 488 }
Note: See TracChangeset
for help on using the changeset viewer.