Changeset 15829


Ignore:
Timestamp:
03/07/18 16:34:09 (20 months ago)
Author:
bburlacu
Message:

#2897: Ensure cloning only happens in the Dataset constructor and ModifiableDataset cloning constructor.

Location:
trunk/HeuristicLab.Problems.DataAnalysis/3.4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs

    r15769 r15829  
    3838    protected Dataset(Dataset original, Cloner cloner)
    3939      : base(original, cloner) {
     40      // no need to clone the variable values because these can't be modified
    4041      variableValues = new Dictionary<string, IList>(original.variableValues);
    4142      variableNames = new List<string>(original.variableNames);
    4243      rows = original.rows;
    4344    }
     45
    4446    public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); }
    4547
     
    5860    /// <param name="variableValues">The values for the variables (column-oriented storage). Values are not cloned!</param>
    5961    public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues)
    60       : base() {
     62      : this(variableNames, variableValues, cloneValues: true) {
     63    }
     64
     65    protected Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues, bool cloneValues = false) {
    6166      Name = "-";
    62       if (!variableNames.Any()) {
     67
     68      if (variableNames.Any()) {
     69        this.variableNames = new List<string>(variableNames);
     70      } else {
    6371        this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList();
    64       } else if (variableNames.Count() != variableValues.Count()) {
    65         throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
    66       } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) {
    67         throw new ArgumentException("The number of values must be equal for every variable");
    68       } else if (variableNames.Distinct().Count() != variableNames.Count()) {
    69         var duplicateVariableNames =
    70           variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
    71         string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
    72         foreach (var duplicateVariableName in duplicateVariableNames)
    73           message += duplicateVariableName + Environment.NewLine;
    74         throw new ArgumentException(message);
    75       }
     72      }
     73      // check if the arguments are consistent (no duplicate variables, same number of rows, correct data types, ...)
     74      CheckArguments(this.variableNames, variableValues);
     75
    7676      rows = variableValues.First().Count;
    77       this.variableNames = new List<string>(variableNames);
    78       this.variableValues = new Dictionary<string, IList>(this.variableNames.Count);
    79       for (int i = 0; i < this.variableNames.Count; i++) {
    80         var variableName = this.variableNames[i];
    81         var values = variableValues.ElementAt(i);
    82 
    83         if (!IsAllowedType(values)) {
    84           throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName));
    85         }
    86 
    87         this.variableValues.Add(variableName, values);
     77
     78      if (cloneValues) {
     79        this.variableValues = CloneValues(this.variableNames, variableValues);
     80      } else {
     81        this.variableValues = new Dictionary<string, IList>(this.variableNames.Count);
     82        for (int i = 0; i < this.variableNames.Count; i++) {
     83          var variableName = this.variableNames[i];
     84          var values = variableValues.ElementAt(i);
     85          this.variableValues.Add(variableName, values);
     86        }
    8887      }
    8988    }
     
    238237      return new ReadOnlyCollection<DateTime>(values);
    239238    }
    240 
    241 
    242239    private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) {
    243240      var values = GetValues<T>(variableName);
     
    255252      return variableValues[variableName] is IList<T>;
    256253    }
    257 
    258254    protected Type GetVariableType(string variableName) {
    259255      IList list;
     
    263259      return GetElementType(list);
    264260    }
    265 
    266     protected Type GetElementType(IList list) {
     261    protected static Type GetElementType(IList list) {
    267262      var type = list.GetType();
    268263      return type.IsGenericType ? type.GetGenericArguments()[0] : type.GetElementType();
    269264    }
    270 
    271     protected bool IsAllowedType(IList list) {
     265    protected static bool IsAllowedType(IList list) {
    272266      var type = GetElementType(list);
    273267      return IsAllowedType(type);
    274268    }
    275 
    276     protected bool IsAllowedType(Type type) {
     269    protected static bool IsAllowedType(Type type) {
    277270      return type == typeof(double) || type == typeof(string) || type == typeof(DateTime);
     271    }
     272
     273    protected static void CheckArguments(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) {
     274      if (variableNames.Count() != variableValues.Count()) {
     275        throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
     276      } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) {
     277        throw new ArgumentException("The number of values must be equal for every variable");
     278      } else if (variableNames.Distinct().Count() != variableNames.Count()) {
     279        var duplicateVariableNames =
     280          variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
     281        string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
     282        foreach (var duplicateVariableName in duplicateVariableNames)
     283          message += duplicateVariableName + Environment.NewLine;
     284        throw new ArgumentException(message);
     285      }
     286      // check if all the variables are supported
     287      foreach (var t in variableNames.Zip(variableValues, Tuple.Create)) {
     288        var variableName = t.Item1;
     289        var values = t.Item2;
     290
     291        if (!IsAllowedType(values)) {
     292          throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName));
     293        }
     294      }
     295    }
     296
     297    protected static Dictionary<string, IList> CloneValues(Dictionary<string, IList> variableValues) {
     298      return variableValues.ToDictionary(x => x.Key, x => CloneValues(x.Value));
     299    }
     300
     301    protected static Dictionary<string, IList> CloneValues(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) {
     302      return variableNames.Zip(variableValues, Tuple.Create).ToDictionary(x => x.Item1, x => CloneValues(x.Item2));
     303    }
     304
     305    protected static IList CloneValues(IList values) {
     306      var doubleValues = values as IList<double>;
     307      if (doubleValues != null) return new List<double>(doubleValues);
     308
     309      var stringValues = values as IList<string>;
     310      if (stringValues != null) return new List<string>(stringValues);
     311
     312      var dateTimeValues = values as IList<DateTime>;
     313      if (dateTimeValues != null) return new List<DateTime>(dateTimeValues);
     314
     315      throw new ArgumentException(string.Format("Unsupported variable type {0}.", GetElementType(values)));
    278316    }
    279317
  • trunk/HeuristicLab.Problems.DataAnalysis/3.4/ModifiableDataset.cs

    r15769 r15829  
    3939
    4040    private ModifiableDataset(ModifiableDataset original, Cloner cloner) : base(original, cloner) {
    41       var variables = variableValues.Keys.ToList();
    42       foreach (var v in variables) {
    43         var type = GetVariableType(v);
    44         if (type == typeof(DateTime)) {
    45           variableValues[v] = GetDateTimeValues(v).ToList();
    46         } else if (type == typeof(double)) {
    47           variableValues[v] = GetDoubleValues(v).ToList();
    48         } else if (type == typeof(string)) {
    49           variableValues[v] = GetStringValues(v).ToList();
    50         } else {
    51           throw new ArgumentException("Unsupported type " + type + " for variable " + v);
    52         }
    53       }
    54     }
     41      variableNames = new List<string>(original.variableNames);
     42      variableValues = CloneValues(original.variableValues);
     43    }
     44
    5545    public override IDeepCloneable Clone(Cloner cloner) { return new ModifiableDataset(this, cloner); }
    56     public ModifiableDataset() : base() { }
    57 
    58     public ModifiableDataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) : base(variableNames, variableValues) { }
     46
     47    public ModifiableDataset() { }
     48
     49    public ModifiableDataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) :
     50      base(variableNames, variableValues, cloneValues: false) { }
    5951
    6052    public void ReplaceRow(int row, IEnumerable<object> values) {
Note: See TracChangeset for help on using the changeset viewer.