Free cookie consent management tool by TermsFeed Policy Generator

source: branches/Operator Architecture Refactoring/HeuristicLab.DataAnalysis/3.3/Dataset.cs @ 2577

Last change on this file since 2577 was 1914, checked in by epitzer, 15 years ago

Migration of DataAnalysis, GP, GP.StructureIdentification and Modeling to new Persistence-3.3 (#603)

File size: 8.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using System.Globalization;
28using System.Text;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30
31namespace HeuristicLab.DataAnalysis {
32  public sealed class Dataset : ItemBase {
33
34    [Storable]
35    private string name;
36
37    [Storable]
38    private int rows;
39
40    [Storable]
41    private int columns;
42
43    [Storable]
44    private string[] variableNames;
45
46    [Storable]
47    private double[] scalingFactor;
48
49    [Storable]
50    private double[] scalingOffset;
51
52    [Storable]
53    private double[] samples;
54
55    private Dictionary<int, Dictionary<int, double>>[] cachedMeans;
56    private Dictionary<int, Dictionary<int, double>>[] cachedRanges;
57
58    [Storable]
59    private object CreateDictionaries_Persistence {
60      get { return null; }
61      set { CreateDictionaries(); }
62    }
63
64    public string Name {
65      get { return name; }
66      set { name = value; }
67    }
68
69    public int Rows {
70      get { return rows; }
71      set { rows = value; }
72    }
73
74    public int Columns {
75      get { return columns; }
76      set {
77        columns = value;
78        if (variableNames == null || variableNames.Length != columns) {
79          variableNames = new string[columns];
80        }
81      }
82    }
83
84    public double[] ScalingFactor {
85      get { return scalingFactor; }
86    }
87    public double[] ScalingOffset {
88      get { return scalingOffset; }
89    }
90
91    public double GetValue(int i, int j) {
92      return samples[columns * i + j];
93    }
94
95    public void SetValue(int i, int j, double v) {
96      if (v != samples[columns * i + j]) {
97        samples[columns * i + j] = v;
98        CreateDictionaries();
99        FireChanged();
100      }
101    }
102
103    public double[] Samples {
104      get { return samples; }
105      set {
106        scalingFactor = new double[columns];
107        scalingOffset = new double[columns];
108        for (int i = 0; i < scalingFactor.Length; i++) {
109          scalingFactor[i] = 1.0;
110          scalingOffset[i] = 0.0;
111        }
112        samples = value;
113        CreateDictionaries();
114        FireChanged();
115      }
116    }
117
118    public Dataset() {
119      Name = "-";
120      variableNames = new string[] { "Var0" };
121      Columns = 1;
122      Rows = 1;
123      Samples = new double[1];
124      scalingOffset = new double[] { 0.0 };
125      scalingFactor = new double[] { 1.0 };
126    }
127
128    private void CreateDictionaries() {
129      // keep a means and ranges dictionary for each column (possible target variable) of the dataset.
130      cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns];
131      cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns];
132      for (int i = 0; i < columns; i++) {
133        cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();
134        cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();
135      }
136    }
137
138    public string GetVariableName(int variableIndex) {
139      return variableNames[variableIndex];
140    }
141
142    public void SetVariableName(int variableIndex, string name) {
143      variableNames[variableIndex] = name;
144    }
145
146
147    public override IView CreateView() {
148      return new DatasetView(this);
149    }
150
151    public override object Clone(IDictionary<Guid, object> clonedObjects) {
152      Dataset clone = new Dataset();
153      clonedObjects.Add(Guid, clone);
154      double[] cloneSamples = new double[rows * columns];
155      Array.Copy(samples, cloneSamples, samples.Length);
156      clone.rows = rows;
157      clone.columns = columns;
158      clone.Samples = cloneSamples;
159      clone.Name = Name;
160      clone.variableNames = new string[variableNames.Length];
161      Array.Copy(variableNames, clone.variableNames, variableNames.Length);
162      Array.Copy(scalingFactor, clone.scalingFactor, columns);
163      Array.Copy(scalingOffset, clone.scalingOffset, columns);
164      return clone;
165    }
166
167    public override string ToString() {
168      return ToString(CultureInfo.CurrentCulture.NumberFormat);
169    }
170
171    private string ToString(NumberFormatInfo format) {
172      StringBuilder builder = new StringBuilder();
173      for (int row = 0; row < rows; row++) {
174        for (int column = 0; column < columns; column++) {
175          builder.Append(";");
176          builder.Append(samples[row * columns + column].ToString("r", format));
177        }
178      }
179      if (builder.Length > 0) builder.Remove(0, 1);
180      return builder.ToString();
181    }
182
183    public double GetMean(int column) {
184      return GetMean(column, 0, Rows);
185    }
186
187    public double GetMean(int column, int from, int to) {
188      if (!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) {
189        double[] values = new double[to - from];
190        for (int sample = from; sample < to; sample++) {
191          values[sample - from] = GetValue(sample, column);
192        }
193        double mean = Statistics.Mean(values);
194        if (!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary<int, double>();
195        cachedMeans[column][from][to] = mean;
196        return mean;
197      } else {
198        return cachedMeans[column][from][to];
199      }
200    }
201
202    public double GetRange(int column) {
203      return GetRange(column, 0, Rows);
204    }
205
206    public double GetRange(int column, int from, int to) {
207      if (!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) {
208        double[] values = new double[to - from];
209        for (int sample = from; sample < to; sample++) {
210          values[sample - from] = GetValue(sample, column);
211        }
212        double range = Statistics.Range(values);
213        if (!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary<int, double>();
214        cachedRanges[column][from][to] = range;
215        return range;
216      } else {
217        return cachedRanges[column][from][to];
218      }
219    }
220
221    public double GetMaximum(int column) {
222      double max = Double.NegativeInfinity;
223      for (int i = 0; i < Rows; i++) {
224        double val = GetValue(i, column);
225        if (!double.IsNaN(val) && val > max) max = val;
226      }
227      return max;
228    }
229
230    public double GetMinimum(int column) {
231      double min = Double.PositiveInfinity;
232      for (int i = 0; i < Rows; i++) {
233        double val = GetValue(i, column);
234        if (!double.IsNaN(val) && val < min) min = val;
235      }
236      return min;
237    }
238
239    internal void ScaleVariable(int column) {
240      if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) {
241        double min = GetMinimum(column);
242        double max = GetMaximum(column);
243        double range = max - min;
244        if (range == 0) ScaleVariable(column, 1.0, -min);
245        else ScaleVariable(column, 1.0 / range, -min);
246      }
247      CreateDictionaries();
248      FireChanged();
249    }
250
251    internal void ScaleVariable(int column, double factor, double offset) {
252      scalingFactor[column] = factor;
253      scalingOffset[column] = offset;
254      for (int i = 0; i < Rows; i++) {
255        double origValue = samples[i * columns + column];
256        samples[i * columns + column] = (origValue + offset) * factor;
257      }
258      CreateDictionaries();
259      FireChanged();
260    }
261
262    internal void UnscaleVariable(int column) {
263      if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) {
264        for (int i = 0; i < rows; i++) {
265          double scaledValue = samples[i * columns + column];
266          samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column];
267        }
268        scalingFactor[column] = 1.0;
269        scalingOffset[column] = 0.0;
270      }
271    }
272  }
273}
Note: See TracBrowser for help on using the repository browser.