Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.DataAnalysis/Dataset.cs @ 196

Last change on this file since 196 was 196, checked in by gkronber, 16 years ago

fixed caching of mean and range in dataset for another nice speed improvement in GP evaluation.

File size: 8.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using System.Globalization;
28using System.Text;
29
30namespace HeuristicLab.DataAnalysis {
31  public class Dataset : ItemBase {
32
33    private string name;
34    public string Name {
35      get { return name; }
36      set { name = value; }
37    }
38
39    private double[] samples;
40    private int rows;
41    Dictionary<int, Dictionary<int, double>>[] cachedMeans;
42    Dictionary<int, Dictionary<int, double>>[] cachedRanges;
43
44    public int Rows {
45      get { return rows; }
46      set { rows = value; }
47    }
48    private int columns;
49
50    public int Columns {
51      get { return columns; }
52      set { columns = value; }
53    }
54
55    public double GetValue(int i, int j) {
56      return samples[columns * i + j];
57    }
58
59    public void SetValue(int i, int j, double v) {
60      if(v != samples[columns * i + j]) {
61        samples[columns * i + j] = v;
62        FireChanged();
63      }
64    }
65
66    public double[] Samples {
67      get { return samples; }
68      set {
69        samples = value;
70        CreateDictionaries();
71        FireChanged();
72      }
73    }
74
75    private string[] variableNames;
76    public string[] VariableNames {
77      get { return variableNames; }
78      set { variableNames = value; }
79    }
80
81    public Dataset() {
82      Name = "-";
83      VariableNames = new string[] {"Var0"};
84      Columns = 1;
85      Rows = 1;
86      Samples = new double[1];
87    }
88
89    void samples_Changed(object sender, EventArgs e) {
90      CreateDictionaries();
91    }
92
93    private void CreateDictionaries() {
94      // keep a means and ranges dictionary for each column (possible target variable) of the dataset.
95
96      cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns];
97      cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns];
98
99      for(int i = 0; i < columns; i++) {
100        cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();
101        cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();
102      }
103    }
104
105    public override IView CreateView() {
106      return new DatasetView(this);
107    }
108
109    public override object Clone(IDictionary<Guid, object> clonedObjects) {
110      Dataset clone = new Dataset();
111      clonedObjects.Add(Guid, clone);
112      double[] cloneSamples = new double[rows * columns];
113      Array.Copy(samples, cloneSamples, samples.Length);
114      clone.rows = rows;
115      clone.columns = columns;
116      clone.Samples = cloneSamples;
117      clone.Name = Name;
118      clone.VariableNames = new string[VariableNames.Length];
119      Array.Copy(VariableNames, clone.VariableNames, VariableNames.Length);
120      return clone;
121    }
122
123    public override XmlNode GetXmlNode(string name, XmlDocument document, IDictionary<Guid, IStorable> persistedObjects) {
124      XmlNode node = base.GetXmlNode(name, document, persistedObjects);
125      XmlAttribute problemName = document.CreateAttribute("Name");
126      problemName.Value = Name;
127      node.Attributes.Append(problemName);
128      XmlAttribute dim1 = document.CreateAttribute("Dimension1");
129      dim1.Value = rows.ToString(CultureInfo.InvariantCulture.NumberFormat);
130      node.Attributes.Append(dim1);
131      XmlAttribute dim2 = document.CreateAttribute("Dimension2");
132      dim2.Value = columns.ToString(CultureInfo.InvariantCulture.NumberFormat);
133      node.Attributes.Append(dim2);
134
135      XmlAttribute variableNames = document.CreateAttribute("VariableNames");
136      variableNames.Value = GetVariableNamesString();
137      node.Attributes.Append(variableNames);
138
139      node.InnerText = ToString(CultureInfo.InvariantCulture.NumberFormat);
140      return node;
141    }
142
143    public override void Populate(XmlNode node, IDictionary<Guid, IStorable> restoredObjects) {
144      base.Populate(node, restoredObjects);
145      Name = node.Attributes["Name"].Value;
146      rows = int.Parse(node.Attributes["Dimension1"].Value, CultureInfo.InvariantCulture.NumberFormat);
147      columns = int.Parse(node.Attributes["Dimension2"].Value, CultureInfo.InvariantCulture.NumberFormat);
148     
149      VariableNames = ParseVariableNamesString(node.Attributes["VariableNames"].Value);
150
151      string[] tokens = node.InnerText.Split(';');
152      if(tokens.Length != rows * columns) throw new FormatException();
153      samples = new double[rows * columns];
154      for(int row = 0; row < rows; row++) {
155        for(int column = 0; column < columns; column++) {
156          if(double.TryParse(tokens[row * columns + column], NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out samples[row*columns + column]) == false) {
157            throw new FormatException("Can't parse " + tokens[row * columns + column] + " as double value.");
158          }
159        }
160      }
161      CreateDictionaries();
162    }
163
164    public override string ToString() {
165      return ToString(CultureInfo.CurrentCulture.NumberFormat);
166    }
167
168    private string ToString(NumberFormatInfo format) {
169      StringBuilder builder = new StringBuilder();
170      for(int row = 0; row < rows; row++) {
171        for(int column = 0; column < columns; column++) {
172          builder.Append(";");
173          builder.Append(samples[row*columns+column].ToString(format));
174        }
175      }
176      if(builder.Length > 0) builder.Remove(0, 1);
177      return builder.ToString();
178    }
179
180    private string GetVariableNamesString() {
181      string s = "";
182      for (int i = 0; i < variableNames.Length; i++) {
183        s += variableNames[i] + "; ";
184      }
185
186      if (variableNames.Length > 0) {
187        s = s.TrimEnd(';', ' ');
188      }
189      return s;
190    }
191
192    private string[] ParseVariableNamesString(string p) {
193      p = p.Trim();
194      string[] tokens = p.Split(new char[] {';'}, StringSplitOptions.RemoveEmptyEntries);
195      return tokens;
196    }
197
198    public double GetMean(int column) {
199      return GetMean(column, 0, Rows-1);
200    }
201
202    public double GetMean(int column, int from, int to) {
203      if(!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) {
204        double[] values = new double[to - from + 1];
205        for(int sample = from; sample <= to; sample++) {
206          values[sample - from] = GetValue(sample, column);
207        }
208        double mean = Statistics.Mean(values);
209        if(!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary<int, double>();
210        cachedMeans[column][from][to] = mean;
211        return mean;
212      } else {
213        return cachedMeans[column][from][to];
214      }
215    }
216
217    public double GetRange(int column) {
218      return GetRange(column, 0, Rows-1);
219    }
220
221    public double GetRange(int column, int from, int to) {
222      if(!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) {
223        double[] values = new double[to - from + 1];
224        for(int sample = from; sample <= to; sample++) {
225          values[sample - from] = GetValue(sample, column);
226        }
227        double range = Statistics.Range(values);
228        if(!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary<int, double>();
229        cachedRanges[column][from][to] = range;
230        return range;
231      } else {
232        return cachedRanges[column][from][to];
233      }
234    }
235  }
236}
Note: See TracBrowser for help on using the repository browser.