#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections; using System.Collections.Generic; using System.Collections.ObjectModel; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Persistence; namespace HeuristicLab.Problems.DataAnalysis { [Item("Dataset", "Represents a dataset containing data that should be analyzed.")] [StorableType("b762712e-454c-4fe6-8e1d-0d24dcc2eaea")] public class Dataset : NamedItem, IDataset { [StorableConstructor] protected Dataset(StorableConstructorFlag deserializing) : base(deserializing) { } protected Dataset(Dataset original, Cloner cloner) : base(original, cloner) { variableValues = new Dictionary(original.variableValues); variableNames = new List(original.variableNames); rows = original.rows; } public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); } public Dataset() : base() { Name = "-"; VariableNames = Enumerable.Empty(); variableValues = new Dictionary(); rows = 0; } /// /// Creates a new dataset. The variableValues are not cloned. /// /// The names of the variables in the dataset /// The values for the variables (column-oriented storage). Values are not cloned! public Dataset(IEnumerable variableNames, IEnumerable variableValues) : base() { Name = "-"; if (!variableNames.Any()) { this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList(); } else if (variableNames.Count() != variableValues.Count()) { throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) { throw new ArgumentException("The number of values must be equal for every variable"); } else if (variableNames.Distinct().Count() != variableNames.Count()) { var duplicateVariableNames = variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; foreach (var duplicateVariableName in duplicateVariableNames) message += duplicateVariableName + Environment.NewLine; throw new ArgumentException(message); } DatasetUtil.ValidateInputData(variableValues); // the validation call checks if every values IList is actually a list of the supported type rows = variableValues.First().Count; this.variableNames = new List(variableNames); this.variableValues = new Dictionary(this.variableNames.Count); for (int i = 0; i < this.variableNames.Count; i++) { var values = variableValues.ElementAt(i); this.variableValues.Add(this.variableNames[i], values); } } public Dataset(IEnumerable variableNames, double[,] variableValues) { Name = "-"; if (variableNames.Count() != variableValues.GetLength(1)) { throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues"); } if (variableNames.Distinct().Count() != variableNames.Count()) { var duplicateVariableNames = variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList(); string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine; foreach (var duplicateVariableName in duplicateVariableNames) message += duplicateVariableName + Environment.NewLine; throw new ArgumentException(message); } rows = variableValues.GetLength(0); this.variableNames = new List(variableNames); this.variableValues = new Dictionary(variableValues.GetLength(1)); for (int col = 0; col < variableValues.GetLength(1); col++) { string columName = this.variableNames[col]; var values = new List(variableValues.GetLength(0)); for (int row = 0; row < variableValues.GetLength(0); row++) { values.Add(variableValues[row, col]); } this.variableValues.Add(columName, values); } } public ModifiableDataset ToModifiable() { var values = new List(); foreach (var v in variableNames) { if (VariableHasType(v)) { values.Add(new List((List)variableValues[v])); } else if (VariableHasType(v)) { values.Add(new List((List)variableValues[v])); } else if (VariableHasType(v)) { values.Add(new List((List)variableValues[v])); } else { throw new ArgumentException("Unknown variable type."); } } return new ModifiableDataset(variableNames, values); } /// /// Shuffle a dataset's rows /// /// Random number generator used for shuffling. /// A shuffled copy of the current dataset. public Dataset Shuffle(IRandom random) { var values = variableNames.Select(x => variableValues[x]).ToList(); return new Dataset(variableNames, values.ShuffleLists(random)); } protected Dataset(Dataset dataset) : this(dataset.variableNames, dataset.variableValues.Values) { } #region Backwards compatible code, remove with 3.5 private double[,] storableData; //name alias used to suppport backwards compatibility [Storable(Name = "data", AllowOneWay = true)] private double[,] StorableData { set { storableData = value; } } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (variableValues == null) { rows = storableData.GetLength(0); variableValues = new Dictionary(); for (int col = 0; col < storableData.GetLength(1); col++) { string columName = variableNames[col]; var values = new List(rows); for (int row = 0; row < rows; row++) { values.Add(storableData[row, col]); } variableValues.Add(columName, values); } storableData = null; } } #endregion [Storable(Name = "VariableValues")] protected Dictionary variableValues; protected List variableNames; [Storable] public IEnumerable VariableNames { get { return variableNames; } protected set { if (variableNames != null) throw new InvalidOperationException(); variableNames = new List(value); } } public IEnumerable DoubleVariables { get { return variableValues.Where(p => p.Value is List).Select(p => p.Key); } } public IEnumerable StringVariables { get { return variableValues.Where(p => p.Value is List).Select(p => p.Key); } } public IEnumerable GetDoubleValues(string variableName) { return GetValues(variableName); } public IEnumerable GetStringValues(string variableName) { return GetValues(variableName); } public IEnumerable GetDateTimeValues(string variableName) { return GetValues(variableName); } public ReadOnlyCollection GetReadOnlyDoubleValues(string variableName) { var values = GetValues(variableName); return values.AsReadOnly(); } public double GetDoubleValue(string variableName, int row) { var values = GetValues(variableName); return values[row]; } public IEnumerable GetDoubleValues(string variableName, IEnumerable rows) { return GetValues(variableName, rows); } public string GetStringValue(string variableName, int row) { var values = GetValues(variableName); return values[row]; } public IEnumerable GetStringValues(string variableName, IEnumerable rows) { return GetValues(variableName, rows); } public ReadOnlyCollection GetReadOnlyStringValues(string variableName) { var values = GetValues(variableName); return values.AsReadOnly(); } private IEnumerable GetValues(string variableName, IEnumerable rows) { var values = GetValues(variableName); return rows.Select(x => values[x]); } private List GetValues(string variableName) { IList list; if (!variableValues.TryGetValue(variableName, out list)) throw new ArgumentException("The variable " + variableName + " does not exist in the dataset."); List values = list as List; if (values == null) throw new ArgumentException("The variable " + variableName + " is not a " + typeof(T) + " variable."); return values; } public bool VariableHasType(string variableName) { return variableValues[variableName] is IList; } #region IStringConvertibleMatrix Members [Storable] protected int rows; public int Rows { get { return rows; } } int IStringConvertibleMatrix.Rows { get { return Rows; } set { throw new NotSupportedException(); } } public int Columns { get { return variableNames.Count; } } int IStringConvertibleMatrix.Columns { get { return Columns; } set { throw new NotSupportedException(); } } bool IStringConvertibleMatrix.SortableView { get { return false; } set { throw new NotSupportedException(); } } bool IStringConvertibleMatrix.ReadOnly { get { return true; } } IEnumerable IStringConvertibleMatrix.ColumnNames { get { return this.VariableNames; } set { throw new NotSupportedException(); } } IEnumerable IStringConvertibleMatrix.RowNames { get { return Enumerable.Empty(); } set { throw new NotSupportedException(); } } string IStringConvertibleMatrix.GetValue(int rowIndex, int columnIndex) { return variableValues[variableNames[columnIndex]][rowIndex].ToString(); } bool IStringConvertibleMatrix.SetValue(string value, int rowIndex, int columnIndex) { throw new NotSupportedException(); } bool IStringConvertibleMatrix.Validate(string value, out string errorMessage) { throw new NotSupportedException(); } public virtual event EventHandler ColumnsChanged { add { } remove { } } public virtual event EventHandler RowsChanged { add { } remove { } } public virtual event EventHandler ColumnNamesChanged { add { } remove { } } public virtual event EventHandler RowNamesChanged { add { } remove { } } public virtual event EventHandler SortableViewChanged { add { } remove { } } public virtual event EventHandler> ItemChanged { add { } remove { } } public virtual event EventHandler Reset { add { } remove { } } #endregion } }