Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/06/18 12:19:23 (6 years ago)
Author:
mkommend
Message:

#2897: Merged r15769 and r15829 into stable.

Location:
stable
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • stable

  • stable/HeuristicLab.Problems.DataAnalysis

  • stable/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs

    r15584 r16124  
    3838    protected Dataset(Dataset original, Cloner cloner)
    3939      : base(original, cloner) {
     40      // no need to clone the variable values because these can't be modified
    4041      variableValues = new Dictionary<string, IList>(original.variableValues);
    4142      variableNames = new List<string>(original.variableNames);
    4243      rows = original.rows;
    4344    }
     45
    4446    public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); }
    4547
     
    5860    /// <param name="variableValues">The values for the variables (column-oriented storage). Values are not cloned!</param>
    5961    public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues)
    60       : base() {
     62      : this(variableNames, variableValues, cloneValues: true) {
     63    }
     64
     65    protected Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues, bool cloneValues = false) {
    6166      Name = "-";
    62       if (!variableNames.Any()) {
     67
     68      if (variableNames.Any()) {
     69        this.variableNames = new List<string>(variableNames);
     70      } else {
    6371        this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList();
    64       } else if (variableNames.Count() != variableValues.Count()) {
    65         throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
    66       } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) {
    67         throw new ArgumentException("The number of values must be equal for every variable");
    68       } else if (variableNames.Distinct().Count() != variableNames.Count()) {
    69         var duplicateVariableNames =
    70           variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
    71         string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
    72         foreach (var duplicateVariableName in duplicateVariableNames)
    73           message += duplicateVariableName + Environment.NewLine;
    74         throw new ArgumentException(message);
    75       }
     72      }
     73      // check if the arguments are consistent (no duplicate variables, same number of rows, correct data types, ...)
     74      CheckArguments(this.variableNames, variableValues);
     75
    7676      rows = variableValues.First().Count;
    77       this.variableNames = new List<string>(variableNames);
    78       this.variableValues = new Dictionary<string, IList>(this.variableNames.Count);
    79       for (int i = 0; i < this.variableNames.Count; i++) {
    80         var values = variableValues.ElementAt(i);
    81         this.variableValues.Add(this.variableNames[i], values);
     77
     78      if (cloneValues) {
     79        this.variableValues = CloneValues(this.variableNames, variableValues);
     80      } else {
     81        this.variableValues = new Dictionary<string, IList>(this.variableNames.Count);
     82        for (int i = 0; i < this.variableNames.Count; i++) {
     83          var variableName = this.variableNames[i];
     84          var values = variableValues.ElementAt(i);
     85          this.variableValues.Add(variableName, values);
     86        }
    8287      }
    8388    }
     
    125130      return new ModifiableDataset(variableNames, values);
    126131    }
     132
    127133    /// <summary>
    128134    /// Shuffle a dataset's rows
     
    231237      return new ReadOnlyCollection<DateTime>(values);
    232238    }
    233 
    234 
    235239    private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) {
    236240      var values = GetValues<T>(variableName);
     
    247251    public bool VariableHasType<T>(string variableName) {
    248252      return variableValues[variableName] is IList<T>;
     253    }
     254    protected Type GetVariableType(string variableName) {
     255      IList list;
     256      variableValues.TryGetValue(variableName, out list);
     257      if (list == null)
     258        throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
     259      return GetElementType(list);
     260    }
     261    protected static Type GetElementType(IList list) {
     262      var type = list.GetType();
     263      return type.IsGenericType ? type.GetGenericArguments()[0] : type.GetElementType();
     264    }
     265    protected static bool IsAllowedType(IList list) {
     266      var type = GetElementType(list);
     267      return IsAllowedType(type);
     268    }
     269    protected static bool IsAllowedType(Type type) {
     270      return type == typeof(double) || type == typeof(string) || type == typeof(DateTime);
     271    }
     272
     273    protected static void CheckArguments(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) {
     274      if (variableNames.Count() != variableValues.Count()) {
     275        throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
     276      } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) {
     277        throw new ArgumentException("The number of values must be equal for every variable");
     278      } else if (variableNames.Distinct().Count() != variableNames.Count()) {
     279        var duplicateVariableNames =
     280          variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
     281        string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
     282        foreach (var duplicateVariableName in duplicateVariableNames)
     283          message += duplicateVariableName + Environment.NewLine;
     284        throw new ArgumentException(message);
     285      }
     286      // check if all the variables are supported
     287      foreach (var t in variableNames.Zip(variableValues, Tuple.Create)) {
     288        var variableName = t.Item1;
     289        var values = t.Item2;
     290
     291        if (!IsAllowedType(values)) {
     292          throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName));
     293        }
     294      }
     295    }
     296
     297    protected static Dictionary<string, IList> CloneValues(Dictionary<string, IList> variableValues) {
     298      return variableValues.ToDictionary(x => x.Key, x => CloneValues(x.Value));
     299    }
     300
     301    protected static Dictionary<string, IList> CloneValues(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) {
     302      return variableNames.Zip(variableValues, Tuple.Create).ToDictionary(x => x.Item1, x => CloneValues(x.Item2));
     303    }
     304
     305    protected static IList CloneValues(IList values) {
     306      var doubleValues = values as IList<double>;
     307      if (doubleValues != null) return new List<double>(doubleValues);
     308
     309      var stringValues = values as IList<string>;
     310      if (stringValues != null) return new List<string>(stringValues);
     311
     312      var dateTimeValues = values as IList<DateTime>;
     313      if (dateTimeValues != null) return new List<DateTime>(dateTimeValues);
     314
     315      throw new ArgumentException(string.Format("Unsupported variable type {0}.", GetElementType(values)));
    249316    }
    250317
Note: See TracChangeset for help on using the changeset viewer.