#region License Information /* HeuristicLab * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Xml; using HeuristicLab.Core; using HeuristicLab.Data; using System.Globalization; using System.Text; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.DataAnalysis { public sealed class Dataset : ItemBase { [Storable] private string name; [Storable] private int rows; [Storable] private int columns; [Storable] private string[] variableNames; [Storable] private double[] scalingFactor; [Storable] private double[] scalingOffset; [Storable] private double[] samples; private Dictionary>[] cachedMeans; private Dictionary>[] cachedRanges; [Storable] private object CreateDictionaries_Persistence { get { return null; } set { CreateDictionaries(); } } public string Name { get { return name; } set { name = value; } } public int Rows { get { return rows; } set { rows = value; } } public int Columns { get { return columns; } set { columns = value; if (variableNames == null || variableNames.Length != columns) { variableNames = new string[columns]; } } } public double[] ScalingFactor { get { return scalingFactor; } } public double[] ScalingOffset { get { return scalingOffset; } } public double GetValue(int i, int j) { return samples[columns * i + j]; } public void SetValue(int i, int j, double v) { if (v != samples[columns * i + j]) { samples[columns * i + j] = v; CreateDictionaries(); FireChanged(); } } public double[] Samples { get { return samples; } set { scalingFactor = new double[columns]; scalingOffset = new double[columns]; for (int i = 0; i < scalingFactor.Length; i++) { scalingFactor[i] = 1.0; scalingOffset[i] = 0.0; } samples = value; CreateDictionaries(); FireChanged(); } } public Dataset() { Name = "-"; variableNames = new string[] { "Var0" }; Columns = 1; Rows = 1; Samples = new double[1]; scalingOffset = new double[] { 0.0 }; scalingFactor = new double[] { 1.0 }; } private void CreateDictionaries() { // keep a means and ranges dictionary for each column (possible target variable) of the dataset. cachedMeans = new Dictionary>[columns]; cachedRanges = new Dictionary>[columns]; for (int i = 0; i < columns; i++) { cachedMeans[i] = new Dictionary>(); cachedRanges[i] = new Dictionary>(); } } public string GetVariableName(int variableIndex) { return variableNames[variableIndex]; } public void SetVariableName(int variableIndex, string name) { variableNames[variableIndex] = name; } public override IView CreateView() { return new DatasetView(this); } public override object Clone(IDictionary clonedObjects) { Dataset clone = new Dataset(); clonedObjects.Add(Guid, clone); double[] cloneSamples = new double[rows * columns]; Array.Copy(samples, cloneSamples, samples.Length); clone.rows = rows; clone.columns = columns; clone.Samples = cloneSamples; clone.Name = Name; clone.variableNames = new string[variableNames.Length]; Array.Copy(variableNames, clone.variableNames, variableNames.Length); Array.Copy(scalingFactor, clone.scalingFactor, columns); Array.Copy(scalingOffset, clone.scalingOffset, columns); return clone; } public override string ToString() { return ToString(CultureInfo.CurrentCulture.NumberFormat); } private string ToString(NumberFormatInfo format) { StringBuilder builder = new StringBuilder(); for (int row = 0; row < rows; row++) { for (int column = 0; column < columns; column++) { builder.Append(";"); builder.Append(samples[row * columns + column].ToString("r", format)); } } if (builder.Length > 0) builder.Remove(0, 1); return builder.ToString(); } public double GetMean(int column) { return GetMean(column, 0, Rows); } public double GetMean(int column, int from, int to) { if (!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) { double[] values = new double[to - from]; for (int sample = from; sample < to; sample++) { values[sample - from] = GetValue(sample, column); } double mean = Statistics.Mean(values); if (!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary(); cachedMeans[column][from][to] = mean; return mean; } else { return cachedMeans[column][from][to]; } } public double GetRange(int column) { return GetRange(column, 0, Rows); } public double GetRange(int column, int from, int to) { if (!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) { double[] values = new double[to - from]; for (int sample = from; sample < to; sample++) { values[sample - from] = GetValue(sample, column); } double range = Statistics.Range(values); if (!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary(); cachedRanges[column][from][to] = range; return range; } else { return cachedRanges[column][from][to]; } } public double GetMaximum(int column) { double max = Double.NegativeInfinity; for (int i = 0; i < Rows; i++) { double val = GetValue(i, column); if (!double.IsNaN(val) && val > max) max = val; } return max; } public double GetMinimum(int column) { double min = Double.PositiveInfinity; for (int i = 0; i < Rows; i++) { double val = GetValue(i, column); if (!double.IsNaN(val) && val < min) min = val; } return min; } internal void ScaleVariable(int column) { if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) { double min = GetMinimum(column); double max = GetMaximum(column); double range = max - min; if (range == 0) ScaleVariable(column, 1.0, -min); else ScaleVariable(column, 1.0 / range, -min); } CreateDictionaries(); FireChanged(); } internal void ScaleVariable(int column, double factor, double offset) { scalingFactor[column] = factor; scalingOffset[column] = offset; for (int i = 0; i < Rows; i++) { double origValue = samples[i * columns + column]; samples[i * columns + column] = (origValue + offset) * factor; } CreateDictionaries(); FireChanged(); } internal void UnscaleVariable(int column) { if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) { for (int i = 0; i < rows; i++) { double scaledValue = samples[i * columns + column]; samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column]; } scalingFactor[column] = 1.0; scalingOffset[column] = 0.0; } } } }