Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/14/18 11:47:37 (6 years ago)
Author:
abeham
Message:

#2817: updated to trunk r16140

Location:
branches/2817-BinPackingSpeedup
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/2817-BinPackingSpeedup

  • branches/2817-BinPackingSpeedup/HeuristicLab.Problems.DataAnalysis

  • branches/2817-BinPackingSpeedup/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs

    r16140 r16141  
    3838    protected Dataset(Dataset original, Cloner cloner)
    3939      : base(original, cloner) {
     40      // no need to clone the variable values because these can't be modified
    4041      variableValues = new Dictionary<string, IList>(original.variableValues);
    4142      variableNames = new List<string>(original.variableNames);
    4243      rows = original.rows;
    4344    }
     45
    4446    public override IDeepCloneable Clone(Cloner cloner) { return new Dataset(this, cloner); }
    4547
     
    5860    /// <param name="variableValues">The values for the variables (column-oriented storage). Values are not cloned!</param>
    5961    public Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues)
    60       : base() {
     62      : this(variableNames, variableValues, cloneValues: true) {
     63    }
     64
     65    protected Dataset(IEnumerable<string> variableNames, IEnumerable<IList> variableValues, bool cloneValues = false) {
    6166      Name = "-";
    62       if (!variableNames.Any()) {
     67
     68      if (variableNames.Any()) {
     69        this.variableNames = new List<string>(variableNames);
     70      } else {
    6371        this.variableNames = Enumerable.Range(0, variableValues.Count()).Select(x => "Column " + x).ToList();
    64       } else if (variableNames.Count() != variableValues.Count()) {
    65         throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
    66       } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) {
    67         throw new ArgumentException("The number of values must be equal for every variable");
    68       } else if (variableNames.Distinct().Count() != variableNames.Count()) {
    69         var duplicateVariableNames =
    70           variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
    71         string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
    72         foreach (var duplicateVariableName in duplicateVariableNames)
    73           message += duplicateVariableName + Environment.NewLine;
    74         throw new ArgumentException(message);
    75       }
     72      }
     73      // check if the arguments are consistent (no duplicate variables, same number of rows, correct data types, ...)
     74      CheckArguments(this.variableNames, variableValues);
     75
    7676      rows = variableValues.First().Count;
    77       this.variableNames = new List<string>(variableNames);
    78       this.variableValues = new Dictionary<string, IList>(this.variableNames.Count);
    79       for (int i = 0; i < this.variableNames.Count; i++) {
    80         var values = variableValues.ElementAt(i);
    81         this.variableValues.Add(this.variableNames[i], values);
     77
     78      if (cloneValues) {
     79        this.variableValues = CloneValues(this.variableNames, variableValues);
     80      } else {
     81        this.variableValues = new Dictionary<string, IList>(this.variableNames.Count);
     82        for (int i = 0; i < this.variableNames.Count; i++) {
     83          var variableName = this.variableNames[i];
     84          var values = variableValues.ElementAt(i);
     85          this.variableValues.Add(variableName, values);
     86        }
    8287      }
    8388    }
     
    111116
    112117    public ModifiableDataset ToModifiable() {
    113       var values = new List<IList>();
    114       foreach (var v in variableNames) {
    115         if (VariableHasType<double>(v)) {
    116           values.Add(new List<double>((IList<double>)variableValues[v]));
    117         } else if (VariableHasType<string>(v)) {
    118           values.Add(new List<string>((IList<string>)variableValues[v]));
    119         } else if (VariableHasType<DateTime>(v)) {
    120           values.Add(new List<DateTime>((IList<DateTime>)variableValues[v]));
    121         } else {
    122           throw new ArgumentException("Unknown variable type.");
    123         }
    124       }
    125       return new ModifiableDataset(variableNames, values);
    126     }
     118      return new ModifiableDataset(variableNames, variableNames.Select(v => variableValues[v]), true);
     119    }
     120
    127121    /// <summary>
    128122    /// Shuffle a dataset's rows
     
    135129    }
    136130
    137     protected Dataset(Dataset dataset) : this(dataset.variableNames, dataset.variableValues.Values) { }
     131
    138132
    139133    #region Backwards compatible code, remove with 3.5
     
    231225      return new ReadOnlyCollection<DateTime>(values);
    232226    }
    233 
    234 
    235227    private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) {
    236228      var values = GetValues<T>(variableName);
     
    248240      return variableValues[variableName] is IList<T>;
    249241    }
     242    protected Type GetVariableType(string variableName) {
     243      IList list;
     244      variableValues.TryGetValue(variableName, out list);
     245      if (list == null)
     246        throw new ArgumentException("The variable " + variableName + " does not exist in the dataset.");
     247      return GetElementType(list);
     248    }
     249    protected static Type GetElementType(IList list) {
     250      var type = list.GetType();
     251      return type.IsGenericType ? type.GetGenericArguments()[0] : type.GetElementType();
     252    }
     253    protected static bool IsAllowedType(IList list) {
     254      var type = GetElementType(list);
     255      return IsAllowedType(type);
     256    }
     257    protected static bool IsAllowedType(Type type) {
     258      return type == typeof(double) || type == typeof(string) || type == typeof(DateTime);
     259    }
     260
     261    protected static void CheckArguments(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) {
     262      if (variableNames.Count() != variableValues.Count()) {
     263        throw new ArgumentException("Number of variable names doesn't match the number of columns of variableValues");
     264      } else if (!variableValues.All(list => list.Count == variableValues.First().Count)) {
     265        throw new ArgumentException("The number of values must be equal for every variable");
     266      } else if (variableNames.Distinct().Count() != variableNames.Count()) {
     267        var duplicateVariableNames =
     268          variableNames.GroupBy(v => v).Where(g => g.Count() > 1).Select(g => g.Key).ToList();
     269        string message = "The dataset cannot contain duplicate variables names: " + Environment.NewLine;
     270        foreach (var duplicateVariableName in duplicateVariableNames)
     271          message += duplicateVariableName + Environment.NewLine;
     272        throw new ArgumentException(message);
     273      }
     274      // check if all the variables are supported
     275      foreach (var t in variableNames.Zip(variableValues, Tuple.Create)) {
     276        var variableName = t.Item1;
     277        var values = t.Item2;
     278
     279        if (!IsAllowedType(values)) {
     280          throw new ArgumentException(string.Format("Unsupported type {0} for variable {1}.", GetElementType(values), variableName));
     281        }
     282      }
     283    }
     284
     285    protected static Dictionary<string, IList> CloneValues(Dictionary<string, IList> variableValues) {
     286      return variableValues.ToDictionary(x => x.Key, x => CloneValues(x.Value));
     287    }
     288
     289    protected static Dictionary<string, IList> CloneValues(IEnumerable<string> variableNames, IEnumerable<IList> variableValues) {
     290      return variableNames.Zip(variableValues, Tuple.Create).ToDictionary(x => x.Item1, x => CloneValues(x.Item2));
     291    }
     292
     293    protected static IList CloneValues(IList values) {
     294      var doubleValues = values as IList<double>;
     295      if (doubleValues != null) return new List<double>(doubleValues);
     296
     297      var stringValues = values as IList<string>;
     298      if (stringValues != null) return new List<string>(stringValues);
     299
     300      var dateTimeValues = values as IList<DateTime>;
     301      if (dateTimeValues != null) return new List<DateTime>(dateTimeValues);
     302
     303      throw new ArgumentException(string.Format("Unsupported variable type {0}.", GetElementType(values)));
     304    }
    250305
    251306    #region IStringConvertibleMatrix Members
    252307    [Storable]
    253     protected int rows;
     308    private int rows;
    254309    public int Rows {
    255310      get { return rows; }
     311      protected set { rows = value; }
    256312    }
    257313    int IStringConvertibleMatrix.Rows {
Note: See TracChangeset for help on using the changeset viewer.