Free cookie consent management tool by TermsFeed Policy Generator

source: branches/CloningRefactorBranch/HeuristicLab.DataAnalysis/Dataset.cs @ 887

Last change on this file since 887 was 887, checked in by gkronber, 16 years ago

Refactored cloning in all plugins except: HL.Communication, HL.Hive, HL.GP, HL.Routing, HL.Scheduling, HL.SimOpt, HL.Visualization

#285 (Cloning could be improved by creating objects at the bottom of the cloning chain with 'new' instead of the top with Activator.CreateInstance())

File size: 13.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Xml;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using System.Globalization;
28using System.Text;
29
30namespace HeuristicLab.DataAnalysis {
31  public sealed class Dataset : ItemBase {
32
33    private string name;
34    private double[] samples;
35    private int rows;
36    private int columns;
37    private Dictionary<int, Dictionary<int, double>>[] cachedMeans;
38    private Dictionary<int, Dictionary<int, double>>[] cachedRanges;
39    private double[] scalingFactor;
40    private double[] scalingOffset;
41
42    public string Name {
43      get { return name; }
44      set { name = value; }
45    }
46
47    public int Rows {
48      get { return rows; }
49      set { rows = value; }
50    }
51
52    public int Columns {
53      get { return columns; }
54      set { columns = value; }
55    }
56
57    public double[] ScalingFactor {
58      get { return scalingFactor; }
59    }
60    public double[] ScalingOffset {
61      get { return scalingOffset; }
62    }
63
64    public double GetValue(int i, int j) {
65      return samples[columns * i + j];
66    }
67
68    public void SetValue(int i, int j, double v) {
69      if(v != samples[columns * i + j]) {
70        samples[columns * i + j] = v;
71        CreateDictionaries();
72        FireChanged();
73      }
74    }
75
76    public double[] Samples {
77      get { return samples; }
78      set {
79        scalingFactor = new double[columns];
80        scalingOffset = new double[columns];
81        for(int i = 0; i < scalingFactor.Length; i++) {
82          scalingFactor[i] = 1.0;
83          scalingOffset[i] = 0.0;
84        }
85        samples = value;
86        CreateDictionaries();
87        FireChanged();
88      }
89    }
90
91    private string[] variableNames;
92    public string[] VariableNames {
93      get { return variableNames; }
94      set { variableNames = value; }
95    }
96
97    public Dataset() {
98      Name = "-";
99      VariableNames = new string[] { "Var0" };
100      Columns = 1;
101      Rows = 1;
102      Samples = new double[1];
103      scalingOffset = new double[] { 0.0 };
104      scalingFactor = new double[] { 1.0 };
105    }
106
107    /// <summary>
108    /// Copy constructor to create deep clones.
109    /// </summary>
110    /// <param name="original">The instance to be cloned.</param>
111    public Dataset(Dataset original) : this(original, new Dictionary<Guid, object>()) { }
112    /// <summary>
113    /// Copy constructor to create deep clones reusing already cloned object references.
114    /// </summary>
115    /// <param name="original">The instance to be cloned.</param>
116    /// <param name="clonedObjects">Already cloned objects (for referential integrity).</param>
117    protected Dataset(Dataset original, IDictionary<Guid, object> clonedObjects)
118      : base(original, clonedObjects) {
119      double[] cloneSamples = new double[original.rows * original.columns];
120      Array.Copy(original.samples, cloneSamples, original.samples.Length);
121      this.rows = original.rows;
122      this.columns = original.columns;
123      this.Samples = cloneSamples;
124      this.Name = original.Name;
125      this.VariableNames = new string[original.VariableNames.Length];
126      Array.Copy(original.VariableNames, this.VariableNames, original.VariableNames.Length);
127      Array.Copy(original.scalingFactor, this.scalingFactor, original.columns);
128      Array.Copy(original.scalingOffset, this.scalingOffset, original.columns);
129    }
130
131    private void CreateDictionaries() {
132      // keep a means and ranges dictionary for each column (possible target variable) of the dataset.
133      cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns];
134      cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns];
135      for(int i = 0; i < columns; i++) {
136        cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();
137        cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();
138      }
139    }
140
141    public override IView CreateView() {
142      return new DatasetView(this);
143    }
144
145    /// <summary>
146    /// Creates a deep clone with the copy constructor reusing already cloned
147    /// object references.
148    /// </summary>
149    /// <param name="clonedObjects">Already cloned objects (for referential integrity).</param>
150    /// <returns>The cloned instance.</returns>
151    public override object Clone(IDictionary<Guid, object> clonedObjects) {
152      return new Dataset(this, clonedObjects);
153    }
154
155    public override XmlNode GetXmlNode(string name, XmlDocument document, IDictionary<Guid, IStorable> persistedObjects) {
156      XmlNode node = base.GetXmlNode(name, document, persistedObjects);
157      XmlAttribute problemName = document.CreateAttribute("Name");
158      problemName.Value = Name;
159      node.Attributes.Append(problemName);
160      XmlAttribute dim1 = document.CreateAttribute("Dimension1");
161      dim1.Value = rows.ToString(CultureInfo.InvariantCulture.NumberFormat);
162      node.Attributes.Append(dim1);
163      XmlAttribute dim2 = document.CreateAttribute("Dimension2");
164      dim2.Value = columns.ToString(CultureInfo.InvariantCulture.NumberFormat);
165      node.Attributes.Append(dim2);
166      XmlAttribute variableNames = document.CreateAttribute("VariableNames");
167      variableNames.Value = GetVariableNamesString();
168      node.Attributes.Append(variableNames);
169      XmlAttribute scalingFactorsAttribute = document.CreateAttribute("ScalingFactors");
170      scalingFactorsAttribute.Value = GetString(scalingFactor);
171      node.Attributes.Append(scalingFactorsAttribute);
172      XmlAttribute scalingOffsetsAttribute = document.CreateAttribute("ScalingOffsets");
173      scalingOffsetsAttribute.Value = GetString(scalingOffset);
174      node.Attributes.Append(scalingOffsetsAttribute);
175      node.InnerText = ToString(CultureInfo.InvariantCulture.NumberFormat);
176      return node;
177    }
178
179    public override void Populate(XmlNode node, IDictionary<Guid, IStorable> restoredObjects) {
180      base.Populate(node, restoredObjects);
181      Name = node.Attributes["Name"].Value;
182      rows = int.Parse(node.Attributes["Dimension1"].Value, CultureInfo.InvariantCulture.NumberFormat);
183      columns = int.Parse(node.Attributes["Dimension2"].Value, CultureInfo.InvariantCulture.NumberFormat);
184
185      VariableNames = ParseVariableNamesString(node.Attributes["VariableNames"].Value);
186      if(node.Attributes["ScalingFactors"] != null)
187        scalingFactor = ParseDoubleString(node.Attributes["ScalingFactors"].Value);
188      else {
189        scalingFactor = new double[columns]; // compatibility with old serialization format
190        for(int i = 0; i < scalingFactor.Length; i++) scalingFactor[i] = 1.0;
191      }
192      if(node.Attributes["ScalingOffsets"] != null)
193        scalingOffset = ParseDoubleString(node.Attributes["ScalingOffsets"].Value);
194      else {
195        scalingOffset = new double[columns]; // compatibility with old serialization format
196        for(int i = 0; i < scalingOffset.Length; i++) scalingOffset[i] = 0.0;
197      }
198
199      string[] tokens = node.InnerText.Split(';');
200      if(tokens.Length != rows * columns) throw new FormatException();
201      samples = new double[rows * columns];
202      for(int row = 0; row < rows; row++) {
203        for(int column = 0; column < columns; column++) {
204          if(double.TryParse(tokens[row * columns + column], NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out samples[row * columns + column]) == false) {
205            throw new FormatException("Can't parse " + tokens[row * columns + column] + " as double value.");
206          }
207        }
208      }
209      CreateDictionaries();
210    }
211
212    public override string ToString() {
213      return ToString(CultureInfo.CurrentCulture.NumberFormat);
214    }
215
216    private string ToString(NumberFormatInfo format) {
217      StringBuilder builder = new StringBuilder();
218      for(int row = 0; row < rows; row++) {
219        for(int column = 0; column < columns; column++) {
220          builder.Append(";");
221          builder.Append(samples[row * columns + column].ToString("r", format));
222        }
223      }
224      if(builder.Length > 0) builder.Remove(0, 1);
225      return builder.ToString();
226    }
227
228    private string GetVariableNamesString() {
229      string s = "";
230      for(int i = 0; i < variableNames.Length; i++) {
231        s += variableNames[i] + "; ";
232      }
233
234      if(variableNames.Length > 0) {
235        s = s.TrimEnd(';', ' ');
236      }
237      return s;
238    }
239    private string GetString(double[] xs) {
240      string s = "";
241      for(int i = 0; i < xs.Length; i++) {
242        s += xs[i].ToString("r", CultureInfo.InvariantCulture) + "; ";
243      }
244
245      if(xs.Length > 0) {
246        s = s.TrimEnd(';', ' ');
247      }
248      return s;
249    }
250
251    private string[] ParseVariableNamesString(string p) {
252      p = p.Trim();
253      string[] tokens = p.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
254      for(int i = 0; i < tokens.Length; i++) tokens[i] = tokens[i].Trim();
255      return tokens;
256    }
257    private double[] ParseDoubleString(string s) {
258      s = s.Trim();
259      string[] ss = s.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
260      double[] xs = new double[ss.Length];
261      for(int i = 0; i < xs.Length; i++) {
262        xs[i] = double.Parse(ss[i], CultureInfo.InvariantCulture);
263      }
264      return xs;
265    }
266
267    public double GetMean(int column) {
268      return GetMean(column, 0, Rows - 1);
269    }
270
271    public double GetMean(int column, int from, int to) {
272      if(!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) {
273        double[] values = new double[to - from + 1];
274        for(int sample = from; sample <= to; sample++) {
275          values[sample - from] = GetValue(sample, column);
276        }
277        double mean = Statistics.Mean(values);
278        if(!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary<int, double>();
279        cachedMeans[column][from][to] = mean;
280        return mean;
281      } else {
282        return cachedMeans[column][from][to];
283      }
284    }
285
286    public double GetRange(int column) {
287      return GetRange(column, 0, Rows - 1);
288    }
289
290    public double GetRange(int column, int from, int to) {
291      if(!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) {
292        double[] values = new double[to - from + 1];
293        for(int sample = from; sample <= to; sample++) {
294          values[sample - from] = GetValue(sample, column);
295        }
296        double range = Statistics.Range(values);
297        if(!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary<int, double>();
298        cachedRanges[column][from][to] = range;
299        return range;
300      } else {
301        return cachedRanges[column][from][to];
302      }
303    }
304
305    public double GetMaximum(int column) {
306      double max = Double.NegativeInfinity;
307      for(int i = 0; i < Rows; i++) {
308        double val = GetValue(i, column);
309        if(val > max) max = val;
310      }
311      return max;
312    }
313
314    public double GetMinimum(int column) {
315      double min = Double.PositiveInfinity;
316      for(int i = 0; i < Rows; i++) {
317        double val = GetValue(i, column);
318        if(val < min) min = val;
319      }
320      return min;
321    }
322
323    internal void ScaleVariable(int column) {
324      if(scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) {
325        double min = GetMinimum(column);
326        double max = GetMaximum(column);
327        double range = max - min;
328        if(range == 0) ScaleVariable(column, 1.0, -min);
329        else ScaleVariable(column, 1.0 / range, -min);
330      }
331      CreateDictionaries();
332      FireChanged();
333    }
334
335    internal void ScaleVariable(int column, double factor, double offset) {
336      scalingFactor[column] = factor;
337      scalingOffset[column] = offset;
338      for(int i = 0; i < Rows; i++) {
339        double origValue = samples[i * columns + column];
340        samples[i * columns + column] = (origValue + offset) * factor;
341      }
342      CreateDictionaries();
343      FireChanged();
344    }
345
346    internal void UnscaleVariable(int column) {
347      if(scalingFactor[column] != 1.0 || scalingOffset[column]!=0.0) {
348        for(int i = 0; i < rows; i++) {
349          double scaledValue = samples[i * columns + column];
350          samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column];
351        }
352        scalingFactor[column] = 1.0;
353        scalingOffset[column] = 0.0;
354      }
355    }
356  }
357}
Note: See TracBrowser for help on using the repository browser.