Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3.0/sources/HeuristicLab.DataAnalysis/Dataset.cs @ 134

Last change on this file since 134 was 132, checked in by gkronber, 16 years ago

added methods for GetRange and GetMean that use the full set of rows instead of start and end points (semantically coupled with r128) (ticket #29)

File size: 9.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using System.Globalization;
28using System.Text;
29
30namespace HeuristicLab.DataAnalysis {
31  public class Dataset : ItemBase {
32
33    private string name;
34    public string Name {
35      get { return name; }
36      set { name = value; }
37    }
38
39    private double[] samples;
40    private int rows;
41
42    public int Rows {
43      get { return rows; }
44      set { rows = value; }
45    }
46    private int columns;
47
48    public int Columns {
49      get { return columns; }
50      set { columns = value; }
51    }
52    private Dictionary<int, double[]>[] ranges;
53    private Dictionary<int, double[]>[] means;
54
55    public double GetValue(int i, int j) {
56      return samples[columns * i + j];
57    }
58
59    public void SetValue(int i, int j, double v) {
60      if(v != samples[columns * i + j]) {
61        samples[columns * i + j] = v;
62        FireChanged();
63      }
64    }
65
66    public double[] Samples {
67      get { return samples; }
68      set {
69        samples = value;
70        CreateDictionaries();
71        FireChanged();
72      }
73    }
74
75    private string[] variableNames;
76    public string[] VariableNames {
77      get { return variableNames; }
78      set { variableNames = value; }
79    }
80
81    public Dataset() {
82      Name = "-";
83      VariableNames = new string[] {"Var0"};
84      Columns = 1;
85      Rows = 1;
86      Samples = new double[1];
87    }
88
89    void samples_Changed(object sender, EventArgs e) {
90      CreateDictionaries();
91    }
92
93    private void CreateDictionaries() {
94      // keep a means and ranges dictionary for each column (possible target variable) of the dataset.
95
96      means = new Dictionary<int, double[]>[columns];
97      ranges = new Dictionary<int, double[]>[columns];
98
99      for(int i = 0; i < columns; i++) {
100        means[i] = new Dictionary<int, double[]>();
101        ranges[i] = new Dictionary<int, double[]>();
102      }
103    }
104
105    public override IView CreateView() {
106      return new DatasetView(this);
107    }
108
109    public override object Clone(IDictionary<Guid, object> clonedObjects) {
110      Dataset clone = new Dataset();
111      clonedObjects.Add(Guid, clone);
112      double[] cloneSamples = new double[rows * columns];
113      Array.Copy(samples, cloneSamples, samples.Length);
114      clone.rows = rows;
115      clone.columns = columns;
116      clone.Samples = cloneSamples;
117      clone.Name = Name;
118      clone.VariableNames = new string[VariableNames.Length];
119      Array.Copy(VariableNames, clone.VariableNames, VariableNames.Length);
120      return clone;
121    }
122
123    public override XmlNode GetXmlNode(string name, XmlDocument document, IDictionary<Guid, IStorable> persistedObjects) {
124      XmlNode node = base.GetXmlNode(name, document, persistedObjects);
125      XmlAttribute problemName = document.CreateAttribute("Name");
126      problemName.Value = Name;
127      node.Attributes.Append(problemName);
128      XmlAttribute dim1 = document.CreateAttribute("Dimension1");
129      dim1.Value = rows.ToString(CultureInfo.InvariantCulture.NumberFormat);
130      node.Attributes.Append(dim1);
131      XmlAttribute dim2 = document.CreateAttribute("Dimension2");
132      dim2.Value = columns.ToString(CultureInfo.InvariantCulture.NumberFormat);
133      node.Attributes.Append(dim2);
134
135      XmlAttribute variableNames = document.CreateAttribute("VariableNames");
136      variableNames.Value = GetVariableNamesString();
137      node.Attributes.Append(variableNames);
138
139      node.InnerText = ToString(CultureInfo.InvariantCulture.NumberFormat);
140      return node;
141    }
142
143    public override void Populate(XmlNode node, IDictionary<Guid, IStorable> restoredObjects) {
144      base.Populate(node, restoredObjects);
145      Name = node.Attributes["Name"].Value;
146      rows = int.Parse(node.Attributes["Dimension1"].Value, CultureInfo.InvariantCulture.NumberFormat);
147      columns = int.Parse(node.Attributes["Dimension2"].Value, CultureInfo.InvariantCulture.NumberFormat);
148     
149      VariableNames = ParseVariableNamesString(node.Attributes["VariableNames"].Value);
150
151      string[] tokens = node.InnerText.Split(';');
152      if(tokens.Length != rows * columns) throw new FormatException();
153      samples = new double[rows * columns];
154      for(int row = 0; row < rows; row++) {
155        for(int column = 0; column < columns; column++) {
156          if(double.TryParse(tokens[row * columns + column], NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out samples[row*columns + column]) == false) {
157            throw new FormatException("Can't parse " + tokens[row * columns + column] + " as double value.");
158          }
159        }
160      }
161      CreateDictionaries();
162    }
163
164    public override string ToString() {
165      return ToString(CultureInfo.CurrentCulture.NumberFormat);
166    }
167
168    private string ToString(NumberFormatInfo format) {
169      StringBuilder builder = new StringBuilder();
170      for(int row = 0; row < rows; row++) {
171        for(int column = 0; column < columns; column++) {
172          builder.Append(";");
173          builder.Append(samples[row*columns+column].ToString(format));
174        }
175      }
176      if(builder.Length > 0) builder.Remove(0, 1);
177      return builder.ToString();
178    }
179
180    private string GetVariableNamesString() {
181      string s = "";
182      for (int i = 0; i < variableNames.Length; i++) {
183        s += variableNames[i] + "; ";
184      }
185
186      if (variableNames.Length > 0) {
187        s = s.TrimEnd(';', ' ');
188      }
189      return s;
190    }
191
192    private string[] ParseVariableNamesString(string p) {
193      p = p.Trim();
194      string[] tokens = p.Split(new char[] {';'}, StringSplitOptions.RemoveEmptyEntries);
195      return tokens;
196    }
197
198    public double GetMean(int column) {
199      return GetMean(column, 0, Rows-1);
200    }
201
202    // return value of GetMean should be memoized because it is called repeatedly in Evaluators
203    public double GetMean(int column, int from, int to) {
204      Dictionary<int, double[]> columnMeans = means[column];
205      if(columnMeans.ContainsKey(from)) {
206        double[] fromMeans = columnMeans[from];
207        if(fromMeans[to-from] >= 0.0) {
208          // already calculated
209          return fromMeans[to-from];
210        } else {
211          // not yet calculated => calculate
212          fromMeans[to-from] = CalculateMean(column, from, to);
213          return fromMeans[to-from];
214        }
215      } else {
216        // never saw this from-index => create a new array, initialize and recalculate for to-index
217        double[] fromMeans = new double[rows - from];
218        // fill with negative values to indicate which means have already been calculated
219        for(int i=0;i<fromMeans.Length;i++) {fromMeans[i] = -1.0;}
220        // store new array in the dictionary
221        columnMeans[from] = fromMeans;
222        // calculate for specific to-index
223        fromMeans[to-from] = CalculateMean(column, from, to);
224        return fromMeans[to-from];
225      }
226    }
227
228    private double CalculateMean(int column, int from, int to) {
229      double[] values = new double[to - from +1];
230      for(int sample = from; sample <= to; sample++) {
231        values[sample - from] = GetValue(sample, column);
232      }
233
234      return Statistics.Mean(values);
235    }
236
237    public double GetRange(int column) {
238      return GetRange(column, 0, Rows-1);
239    }
240
241    // return value of GetRange should be memoized because it is called repeatedly in Evaluators
242    public double GetRange(int column, int from, int to) {
243      Dictionary<int, double[]> columnRanges = ranges[column];
244      if(columnRanges.ContainsKey(from)) {
245        double[] fromRanges = columnRanges[from];
246        if(fromRanges[to-from] >= 0.0) {
247          // already calculated
248          return fromRanges[to-from];
249        } else {
250          // not yet calculated => calculate
251          fromRanges[to-from] = CalculateRange(column, from, to);
252          return fromRanges[to-from];
253        }
254      } else {
255        // never saw this from-index => create a new array, initialize and recalculate for to-index
256        double[] fromRanges = new double[rows - from];
257        // fill with negative values to indicate which means have already been calculated
258        for(int i = 0; i < fromRanges.Length; i++) { fromRanges[i] = -1.0; }
259        // store in dictionary
260        columnRanges[from] = fromRanges;
261        // calculate for specific to-index
262        fromRanges[to-from] = CalculateRange(column, from, to);
263        return fromRanges[to-from];
264      }
265    }
266
267    private double CalculateRange(int column, int from, int to) {
268      double[] values = new double[to - from + 1];
269      for(int sample = from; sample <= to; sample++) {
270        values[sample - from] = GetValue(sample, column);
271      }
272
273      return Statistics.Range(values);
274    }
275  }
276}
Note: See TracBrowser for help on using the repository browser.