Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.DataAnalysis/3.3/Dataset.cs @ 2498

Last change on this file since 2498 was 1914, checked in by epitzer, 16 years ago

Migration of DataAnalysis, GP, GP.StructureIdentification and Modeling to new Persistence-3.3 (#603)

File size: 8.5 KB
RevLine 
[2]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using System.Globalization;
28using System.Text;
[1914]29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
[2]30
31namespace HeuristicLab.DataAnalysis {
[207]32  public sealed class Dataset : ItemBase {
[2]33
[1914]34    [Storable]
[2]35    private string name;
[1914]36
37    [Storable]
[2]38    private int rows;
[1914]39
40    [Storable]
[333]41    private int columns;
[1914]42
43    [Storable]
44    private string[] variableNames;
45
46    [Storable]
[237]47    private double[] scalingFactor;
[1914]48
49    [Storable]
[237]50    private double[] scalingOffset;
[2]51
[1914]52    [Storable]
53    private double[] samples;
54
55    private Dictionary<int, Dictionary<int, double>>[] cachedMeans;
56    private Dictionary<int, Dictionary<int, double>>[] cachedRanges;
57
58    [Storable]
59    private object CreateDictionaries_Persistence {
60      get { return null; }
61      set { CreateDictionaries(); }
62    }
63
[333]64    public string Name {
65      get { return name; }
66      set { name = value; }
[312]67    }
68
[2]69    public int Rows {
70      get { return rows; }
71      set { rows = value; }
72    }
73
74    public int Columns {
75      get { return columns; }
[1786]76      set {
[1287]77        columns = value;
78        if (variableNames == null || variableNames.Length != columns) {
79          variableNames = new string[columns];
80        }
81      }
[2]82    }
83
[333]84    public double[] ScalingFactor {
85      get { return scalingFactor; }
86    }
87    public double[] ScalingOffset {
88      get { return scalingOffset; }
89    }
90
[2]91    public double GetValue(int i, int j) {
92      return samples[columns * i + j];
93    }
94
95    public void SetValue(int i, int j, double v) {
[1786]96      if (v != samples[columns * i + j]) {
[2]97        samples[columns * i + j] = v;
[232]98        CreateDictionaries();
[2]99        FireChanged();
100      }
101    }
102
103    public double[] Samples {
104      get { return samples; }
[237]105      set {
106        scalingFactor = new double[columns];
107        scalingOffset = new double[columns];
[1786]108        for (int i = 0; i < scalingFactor.Length; i++) {
[237]109          scalingFactor[i] = 1.0;
110          scalingOffset[i] = 0.0;
111        }
[2]112        samples = value;
113        CreateDictionaries();
114        FireChanged();
115      }
116    }
117
118    public Dataset() {
119      Name = "-";
[1287]120      variableNames = new string[] { "Var0" };
[2]121      Columns = 1;
122      Rows = 1;
123      Samples = new double[1];
[237]124      scalingOffset = new double[] { 0.0 };
125      scalingFactor = new double[] { 1.0 };
[2]126    }
127
128    private void CreateDictionaries() {
129      // keep a means and ranges dictionary for each column (possible target variable) of the dataset.
[196]130      cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns];
131      cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns];
[1786]132      for (int i = 0; i < columns; i++) {
[196]133        cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();
134        cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();
[2]135      }
136    }
137
[1287]138    public string GetVariableName(int variableIndex) {
139      return variableNames[variableIndex];
140    }
141
142    public void SetVariableName(int variableIndex, string name) {
143      variableNames[variableIndex] = name;
144    }
145
146
[2]147    public override IView CreateView() {
148      return new DatasetView(this);
149    }
150
151    public override object Clone(IDictionary<Guid, object> clonedObjects) {
152      Dataset clone = new Dataset();
153      clonedObjects.Add(Guid, clone);
154      double[] cloneSamples = new double[rows * columns];
155      Array.Copy(samples, cloneSamples, samples.Length);
156      clone.rows = rows;
157      clone.columns = columns;
158      clone.Samples = cloneSamples;
159      clone.Name = Name;
[1287]160      clone.variableNames = new string[variableNames.Length];
161      Array.Copy(variableNames, clone.variableNames, variableNames.Length);
[237]162      Array.Copy(scalingFactor, clone.scalingFactor, columns);
163      Array.Copy(scalingOffset, clone.scalingOffset, columns);
[2]164      return clone;
165    }
166
167    public override string ToString() {
168      return ToString(CultureInfo.CurrentCulture.NumberFormat);
169    }
170
171    private string ToString(NumberFormatInfo format) {
172      StringBuilder builder = new StringBuilder();
[1786]173      for (int row = 0; row < rows; row++) {
174        for (int column = 0; column < columns; column++) {
[2]175          builder.Append(";");
[344]176          builder.Append(samples[row * columns + column].ToString("r", format));
[2]177        }
178      }
[1786]179      if (builder.Length > 0) builder.Remove(0, 1);
[2]180      return builder.ToString();
181    }
182
[132]183    public double GetMean(int column) {
[1784]184      return GetMean(column, 0, Rows);
[132]185    }
[2]186
187    public double GetMean(int column, int from, int to) {
[1786]188      if (!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) {
[1784]189        double[] values = new double[to - from];
[1786]190        for (int sample = from; sample < to; sample++) {
[196]191          values[sample - from] = GetValue(sample, column);
192        }
193        double mean = Statistics.Mean(values);
[1786]194        if (!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary<int, double>();
[196]195        cachedMeans[column][from][to] = mean;
196        return mean;
197      } else {
198        return cachedMeans[column][from][to];
[2]199      }
200    }
201
[132]202    public double GetRange(int column) {
[1784]203      return GetRange(column, 0, Rows);
[132]204    }
205
[2]206    public double GetRange(int column, int from, int to) {
[1786]207      if (!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) {
[1784]208        double[] values = new double[to - from];
[1786]209        for (int sample = from; sample < to; sample++) {
[196]210          values[sample - from] = GetValue(sample, column);
211        }
212        double range = Statistics.Range(values);
[1786]213        if (!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary<int, double>();
[196]214        cachedRanges[column][from][to] = range;
215        return range;
216      } else {
217        return cachedRanges[column][from][to];
[2]218      }
219    }
[232]220
221    public double GetMaximum(int column) {
222      double max = Double.NegativeInfinity;
[1786]223      for (int i = 0; i < Rows; i++) {
[232]224        double val = GetValue(i, column);
[1786]225        if (!double.IsNaN(val) && val > max) max = val;
[232]226      }
227      return max;
228    }
229
230    public double GetMinimum(int column) {
231      double min = Double.PositiveInfinity;
[1786]232      for (int i = 0; i < Rows; i++) {
[232]233        double val = GetValue(i, column);
[1786]234        if (!double.IsNaN(val) && val < min) min = val;
[232]235      }
236      return min;
237    }
[237]238
239    internal void ScaleVariable(int column) {
[1786]240      if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) {
[237]241        double min = GetMinimum(column);
242        double max = GetMaximum(column);
243        double range = max - min;
[1786]244        if (range == 0) ScaleVariable(column, 1.0, -min);
[312]245        else ScaleVariable(column, 1.0 / range, -min);
[237]246      }
247      CreateDictionaries();
248      FireChanged();
249    }
250
[312]251    internal void ScaleVariable(int column, double factor, double offset) {
252      scalingFactor[column] = factor;
253      scalingOffset[column] = offset;
[1786]254      for (int i = 0; i < Rows; i++) {
[312]255        double origValue = samples[i * columns + column];
256        samples[i * columns + column] = (origValue + offset) * factor;
257      }
258      CreateDictionaries();
259      FireChanged();
260    }
261
[237]262    internal void UnscaleVariable(int column) {
[1786]263      if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) {
264        for (int i = 0; i < rows; i++) {
[237]265          double scaledValue = samples[i * columns + column];
[312]266          samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column];
[237]267        }
268        scalingFactor[column] = 1.0;
269        scalingOffset[column] = 0.0;
270      }
271    }
[2]272  }
273}
Note: See TracBrowser for help on using the repository browser.