Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/01/09 11:09:50 (15 years ago)
Author:
gkronber
Message:

Applied patch from mkommend for variable impact calculators and adapted data-modeling algorithms to use the new operators for variable impact calculation. #728

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.DataAnalysis/3.2/Dataset.cs

    r2311 r2319  
    3131namespace HeuristicLab.DataAnalysis {
    3232  public sealed class Dataset : ItemBase {
    33 
    34     private string name;
    35     private double[] samples;
    36     private int rows;
    37     private int columns;
    3833    private Dictionary<int, Dictionary<int, double>>[] cachedMeans;
    3934    private Dictionary<int, Dictionary<int, double>>[] cachedRanges;
    40     private double[] scalingFactor;
    41     private double[] scalingOffset;
    4235    private bool cachedValuesInvalidated = true;
    43 
    44     private bool fireChangeEvents = true;
    45     public bool FireChangeEvents {
    46       get { return fireChangeEvents; }
    47       set { fireChangeEvents = value; }
    48     }
    49 
     36   
     37    public Dataset()
     38      : this(new double[,] { { 0.0 } }) {
     39    }
     40
     41    public Dataset(double[,] samples) {
     42      Name = "-";
     43      Rows = samples.GetLength(0);
     44      Columns = samples.GetLength(1);
     45      double[] values = new double[Rows * Columns];
     46      int i = 0;
     47      for (int row = 0; row < Rows; row++) {
     48        for (int column = 0; column < columns; column++) {
     49          values[i++] = samples[row, column];
     50        }
     51      }
     52      Samples = values;
     53      fireChangeEvents = true;
     54    }
     55
     56    #region Properties
     57    private string name;
    5058    public string Name {
    5159      get { return name; }
     
    5361    }
    5462
     63    private int rows;
    5564    public int Rows {
    5665      get { return rows; }
     
    5867    }
    5968
     69    private int columns;
    6070    public int Columns {
    6171      get { return columns; }
     
    6878    }
    6979
    70     public double[] ScalingFactor {
    71       get { return scalingFactor; }
    72       set {
    73         if (value.Length != scalingFactor.Length)
    74           throw new ArgumentException("Length of scaling factor array doesn't match number of variables");
    75         scalingFactor = value;
    76       }
    77     }
    78     public double[] ScalingOffset {
    79       get { return scalingOffset; }
    80       set {
    81         if (value.Length != scalingOffset.Length)
    82           throw new ArgumentException("Length of scaling offset array doesn't match number of variables");
    83         scalingOffset = value;
    84       }
    85     }
    86 
    87     public double GetValue(int i, int j) {
    88       return samples[columns * i + j];
    89     }
    90 
    91     public void SetValue(int i, int j, double v) {
    92       if (v != samples[columns * i + j]) {
    93         samples[columns * i + j] = v;
    94         cachedValuesInvalidated = true;
    95         if (fireChangeEvents) FireChanged();
    96       }
    97     }
    98 
     80    private string[] variableNames;
     81    public IEnumerable<string> VariableNames {
     82      get { return variableNames; }
     83    }
     84
     85    private double[] samples;
    9986    public double[] Samples {
    10087      get { return samples; }
     
    113100    }
    114101
    115     private string[] variableNames;
    116     public IEnumerable<string> VariableNames {
    117       get { return variableNames; }
    118     }
    119 
    120     public Dataset()
    121       : this(new double[,] { { 0.0 } }) {
    122     }
    123 
    124     public Dataset(double[,] samples) {
    125       Name = "-";
    126       Rows = samples.GetLength(0);
    127       Columns = samples.GetLength(1);
    128       double[] values = new double[Rows * Columns];
    129       int i = 0;
    130       for (int row = 0; row < Rows; row++) {
    131         for (int column = 0; column < columns; column++) {
    132           values[i++] = samples[row, column];
    133         }
    134       }
    135       Samples = values;
    136       fireChangeEvents = true;
    137     }
    138 
    139 
    140     public string GetVariableName(int variableIndex) {
    141       return variableNames[variableIndex];
    142     }
    143 
    144     public int GetVariableIndex(string variableName) {
    145       for (int i = 0; i < variableNames.Length; i++) {
    146         if (variableNames[i].Equals(variableName)) return i;
    147       }
    148       throw new ArgumentException("The variable name " + variableName + " was not found.");
     102    private bool fireChangeEvents = true;
     103    public bool FireChangeEvents {
     104      get { return fireChangeEvents; }
     105      set { fireChangeEvents = value; }
     106    }
     107
     108    private double[] scalingFactor;
     109    public double[] ScalingFactor {
     110      get { return scalingFactor; }
     111      set {
     112        if (value.Length != scalingFactor.Length)
     113          throw new ArgumentException("Length of scaling factor array doesn't match number of variables");
     114        scalingFactor = value;
     115      }
     116    }
     117
     118    private double[] scalingOffset;
     119    public double[] ScalingOffset {
     120      get { return scalingOffset; }
     121      set {
     122        if (value.Length != scalingOffset.Length)
     123          throw new ArgumentException("Length of scaling offset array doesn't match number of variables");
     124        scalingOffset = value;
     125      }
     126    }
     127    #endregion
     128
     129    #region Modify and get values
     130    public double GetValue(int i, int j) {
     131      return samples[columns * i + j];
    149132    }
    150133
     
    173156    }
    174157
     158    public void SetValue(int i, int j, double v) {
     159      if (v != samples[columns * i + j]) {
     160        samples[columns * i + j] = v;
     161        cachedValuesInvalidated = true;
     162        if (fireChangeEvents) FireChanged();
     163      }
     164    }
     165
     166    public IEnumerable<double> ReplaceVariableValues(int variableIndex, IEnumerable<double> newValues, int start, int end) {
     167      double[] oldValues = new double[end - start];
     168      for (int i = 0; i < end - start; i++) oldValues[i] = this.GetValue(i + start, variableIndex);
     169      if (newValues.Count() != end - start) throw new ArgumentException("The length of the new values sequence doesn't match the required length (number of replaced values)");
     170
     171      int index = start;
     172      this.FireChangeEvents = false;
     173      foreach (double v in newValues) {
     174        this.SetValue(index++, variableIndex, v);
     175      }
     176      this.FireChangeEvents = true;
     177      this.FireChanged();
     178      return oldValues;
     179    }
     180
     181    public IEnumerable<double> ReplaceVariableValues(string variableName, IEnumerable<double> newValues, int start, int end) {
     182      return ReplaceVariableValues(this.GetVariableIndex(variableName), newValues, start, end);
     183    }
     184    #endregion
     185
     186    #region Variable name methods
     187    public string GetVariableName(int variableIndex) {
     188      return variableNames[variableIndex];
     189    }
     190
     191    public int GetVariableIndex(string variableName) {
     192      for (int i = 0; i < variableNames.Length; i++) {
     193        if (variableNames[i].Equals(variableName)) return i;
     194      }
     195      throw new ArgumentException("The variable name " + variableName + " was not found.");
     196    }
     197
    175198    public void SetVariableName(int variableIndex, string name) {
    176199      variableNames[variableIndex] = name;
     
    180203      return this.variableNames.Contains(variableName);
    181204    }
     205    #endregion
    182206
    183207    public override IView CreateView() {
    184208      return new DatasetView(this);
     209    }
     210
     211
     212    #region Variable statistics
     213    public double GetMean(string variableName) {
     214      return GetMean(GetVariableIndex(variableName));
     215    }
     216
     217    public double GetMean(string variableName, int start, int end) {
     218      return GetMean(GetVariableIndex(variableName), start, end);
     219    }
     220
     221    public double GetMean(int column) {
     222      return GetMean(column, 0, Rows);
     223    }
     224
     225    public double GetMean(int column, int start, int end) {
     226      if (cachedValuesInvalidated) CreateDictionaries();
     227      if (!cachedMeans[column].ContainsKey(start) || !cachedMeans[column][start].ContainsKey(end)) {
     228        double[] values = new double[end - start];
     229        for (int sample = start; sample < end; sample++) {
     230          values[sample - start] = GetValue(sample, column);
     231        }
     232        double mean = Statistics.Mean(values);
     233        if (!cachedMeans[column].ContainsKey(start)) cachedMeans[column][start] = new Dictionary<int, double>();
     234        cachedMeans[column][start][end] = mean;
     235        return mean;
     236      } else {
     237        return cachedMeans[column][start][end];
     238      }
     239    }
     240
     241    public double GetRange(string variableName) {
     242      return GetRange(this.GetVariableIndex(variableName));
     243    }
     244
     245    public double GetRange(int column) {
     246      return GetRange(column, 0, Rows);
     247    }
     248
     249    public double GetRange(string variableName, int start, int end) {
     250      return GetRange(this.GetVariableIndex(variableName), start, end);
     251    }
     252
     253    public double GetRange(int column, int start, int end) {
     254      if (cachedValuesInvalidated) CreateDictionaries();
     255      if (!cachedRanges[column].ContainsKey(start) || !cachedRanges[column][start].ContainsKey(end)) {
     256        double[] values = new double[end - start];
     257        for (int sample = start; sample < end; sample++) {
     258          values[sample - start] = GetValue(sample, column);
     259        }
     260        double range = Statistics.Range(values);
     261        if (!cachedRanges[column].ContainsKey(start)) cachedRanges[column][start]= new Dictionary<int, double>();
     262        cachedRanges[column][start][end] = range;
     263        return range;
     264      } else {
     265        return cachedRanges[column][start][end];
     266      }
     267    }
     268
     269    public double GetMaximum(string variableName) {
     270      return GetMaximum(this.GetVariableIndex(variableName));
     271    }
     272
     273    public double GetMaximum(int column) {
     274      return GetMaximum(column, 0, Rows);
     275    }
     276
     277    public double GetMaximum(string variableName, int start, int end) {
     278      return GetMaximum(this.GetVariableIndex(variableName), start, end);
     279    }
     280
     281    public double GetMaximum(int column, int start, int end) {
     282      double max = Double.NegativeInfinity;
     283      for (int i = start; i < end; i++) {
     284        double val = GetValue(i, column);
     285        if (!double.IsNaN(val) && val > max) max = val;
     286      }
     287      return max;
     288    }
     289
     290    public double GetMinimum(string variableName) {
     291      return GetMinimum(GetVariableIndex(variableName));
     292    }
     293
     294    public double GetMinimum(int column) {
     295      return GetMinimum(column, 0, Rows);
     296    }
     297
     298    public double GetMinimum(string variableName, int start, int end) {
     299      return GetMinimum(this.GetVariableIndex(variableName), start, end);
     300    }
     301
     302    public double GetMinimum(int column, int start, int end) {
     303      double min = Double.PositiveInfinity;
     304      for (int i = start; i < end; i++) {
     305        double val = GetValue(i, column);
     306        if (!double.IsNaN(val) && val < min) min = val;
     307      }
     308      return min;
     309    }
     310    #endregion
     311
     312    internal void ScaleVariable(int column) {
     313      if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) {
     314        double min = GetMinimum(column);
     315        double max = GetMaximum(column);
     316        double range = max - min;
     317        if (range == 0) ScaleVariable(column, 1.0, -min);
     318        else ScaleVariable(column, 1.0 / range, -min);
     319      }
     320      cachedValuesInvalidated = true;
     321      if (fireChangeEvents) FireChanged();
     322    }
     323
     324    internal void ScaleVariable(int column, double factor, double offset) {
     325      scalingFactor[column] = factor;
     326      scalingOffset[column] = offset;
     327      for (int i = 0; i < Rows; i++) {
     328        double origValue = samples[i * columns + column];
     329        samples[i * columns + column] = (origValue + offset) * factor;
     330      }
     331      cachedValuesInvalidated = true;
     332      if (fireChangeEvents) FireChanged();
     333    }
     334
     335    internal void UnscaleVariable(int column) {
     336      if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) {
     337        for (int i = 0; i < rows; i++) {
     338          double scaledValue = samples[i * columns + column];
     339          samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column];
     340        }
     341        scalingFactor[column] = 1.0;
     342        scalingOffset[column] = 0.0;
     343      }
     344      cachedValuesInvalidated = true;
     345      if (fireChangeEvents) FireChanged();
     346    }
     347
     348    private void CreateDictionaries() {
     349      // keep a means and ranges dictionary for each column (possible target variable) of the dataset.
     350      cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns];
     351      cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns];
     352      for (int i = 0; i < columns; i++) {
     353        cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();
     354        cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();
     355      }
     356      cachedValuesInvalidated = false;
    185357    }
    186358
     
    313485    }
    314486    #endregion
    315 
    316     public double GetMean(int column) {
    317       return GetMean(column, 0, Rows);
    318     }
    319 
    320     public double GetMean(int column, int from, int to) {
    321       if (cachedValuesInvalidated) CreateDictionaries();
    322       if (!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) {
    323         double[] values = new double[to - from];
    324         for (int sample = from; sample < to; sample++) {
    325           values[sample - from] = GetValue(sample, column);
    326         }
    327         double mean = Statistics.Mean(values);
    328         if (!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary<int, double>();
    329         cachedMeans[column][from][to] = mean;
    330         return mean;
    331       } else {
    332         return cachedMeans[column][from][to];
    333       }
    334     }
    335 
    336     public double GetRange(int column) {
    337       return GetRange(column, 0, Rows);
    338     }
    339 
    340     public double GetRange(int column, int from, int to) {
    341       if (cachedValuesInvalidated) CreateDictionaries();
    342       if (!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) {
    343         double[] values = new double[to - from];
    344         for (int sample = from; sample < to; sample++) {
    345           values[sample - from] = GetValue(sample, column);
    346         }
    347         double range = Statistics.Range(values);
    348         if (!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary<int, double>();
    349         cachedRanges[column][from][to] = range;
    350         return range;
    351       } else {
    352         return cachedRanges[column][from][to];
    353       }
    354     }
    355 
    356     public double GetMaximum(int column) {
    357       return GetMaximum(column, 0, Rows);
    358     }
    359 
    360     public double GetMaximum(int column, int start, int end) {
    361       double max = Double.NegativeInfinity;
    362       for (int i = start; i < end; i++) {
    363         double val = GetValue(i, column);
    364         if (!double.IsNaN(val) && val > max) max = val;
    365       }
    366       return max;
    367     }
    368 
    369     public double GetMinimum(int column) {
    370       return GetMinimum(column, 0, Rows);
    371     }
    372 
    373     public double GetMinimum(int column, int start, int end) {
    374       double min = Double.PositiveInfinity;
    375       for (int i = start; i < end; i++) {
    376         double val = GetValue(i, column);
    377         if (!double.IsNaN(val) && val < min) min = val;
    378       }
    379       return min;
    380     }
    381 
    382     internal void ScaleVariable(int column) {
    383       if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) {
    384         double min = GetMinimum(column);
    385         double max = GetMaximum(column);
    386         double range = max - min;
    387         if (range == 0) ScaleVariable(column, 1.0, -min);
    388         else ScaleVariable(column, 1.0 / range, -min);
    389       }
    390       cachedValuesInvalidated = true;
    391       if (fireChangeEvents) FireChanged();
    392     }
    393 
    394     internal void ScaleVariable(int column, double factor, double offset) {
    395       scalingFactor[column] = factor;
    396       scalingOffset[column] = offset;
    397       for (int i = 0; i < Rows; i++) {
    398         double origValue = samples[i * columns + column];
    399         samples[i * columns + column] = (origValue + offset) * factor;
    400       }
    401       cachedValuesInvalidated = true;
    402       if (fireChangeEvents) FireChanged();
    403     }
    404 
    405     internal void UnscaleVariable(int column) {
    406       if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) {
    407         for (int i = 0; i < rows; i++) {
    408           double scaledValue = samples[i * columns + column];
    409           samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column];
    410         }
    411         scalingFactor[column] = 1.0;
    412         scalingOffset[column] = 0.0;
    413       }
    414       cachedValuesInvalidated = true;
    415       if (fireChangeEvents) FireChanged();
    416     }
    417 
    418     private void CreateDictionaries() {
    419       // keep a means and ranges dictionary for each column (possible target variable) of the dataset.
    420       cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns];
    421       cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns];
    422       for (int i = 0; i < columns; i++) {
    423         cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();
    424         cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();
    425       }
    426       cachedValuesInvalidated = false;
    427     }
    428487  }
    429488}
Note: See TracChangeset for help on using the changeset viewer.