Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HivePerformance/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs @ 9426

Last change on this file since 9426 was 8829, checked in by gkronber, 12 years ago

#1902: fixed an issue that occurs in scaling if a variable is effectively constant in the training partition

File size: 2.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis;
29
30namespace HeuristicLab.Algorithms.DataAnalysis {
31  [StorableClass]
32  [Item(Name = "Scaling", Description = "Contains information about scaling of variables for data-analysis algorithms.")]
33  public class Scaling : Item {
34    [Storable]
35    private Dictionary<string, Tuple<double, double>> scalingParameters = new Dictionary<string, Tuple<double, double>>();
36    [StorableConstructor]
37    protected Scaling(bool deserializing) : base(deserializing) { }
38    protected Scaling(Scaling original, Cloner cloner)
39      : base(original, cloner) {
40      foreach (var pair in original.scalingParameters)
41        scalingParameters.Add(pair.Key, Tuple.Create(pair.Value.Item1, pair.Value.Item2));
42    }
43    public Scaling(Dataset ds, IEnumerable<string> variables, IEnumerable<int> rows) {
44      foreach (var variable in variables) {
45        var values = ds.GetDoubleValues(variable, rows);
46        var min = values.Where(x => !double.IsNaN(x)).Min();
47        var max = values.Where(x => !double.IsNaN(x)).Max();
48        scalingParameters[variable] = Tuple.Create(min, max);
49      }
50    }
51
52    public override IDeepCloneable Clone(Cloner cloner) {
53      return new Scaling(this, cloner);
54    }
55
56    public IEnumerable<double> GetScaledValues(Dataset ds, string variable, IEnumerable<int> rows) {
57      double min = scalingParameters[variable].Item1;
58      double max = scalingParameters[variable].Item2;
59      if (min.IsAlmost(max)) return rows.Select(i => 0.0); // return enumerable of zeros
60      return ds.GetDoubleValues(variable, rows).Select(x => (x - min) / (max - min));  // scale to range [0..1]
61    }
62
63    public void GetScalingParameters(string variable, out double min, out double max) {
64      min = scalingParameters[variable].Item1;
65      max = scalingParameters[variable].Item2;
66    }
67  }
68}
Note: See TracBrowser for help on using the repository browser.