Changeset 3934


Ignore:
Timestamp:
06/21/10 15:51:54 (9 years ago)
Author:
gkronber
Message:

Fixed problem with shuffling and cross-validation in SVM cross-validation evaluator. #1009

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/SupportVectorMachine/SupportVectorMachineCrossValidationEvaluator.cs

    r3933 r3934  
    165165
    166166    private Dataset CreateReducedDataset(IRandom random, Dataset dataset, double reductionRatio, int start, int end) {
    167       int reducedRows = (int)((end - start) * reductionRatio);
     167      int n = (int)((end - start) * reductionRatio);
     168      // must not make a fink:
     169      // => select n rows randomly from start..end
     170      // => sort the selected rows by index
     171      // => move rows to beginning of partition (start)
     172
     173      // all possible rowIndexes from start..end
     174      int[] rowIndexes = Enumerable.Range(start, end - start).ToArray();
     175
     176      // knuth shuffle
     177      for (int i = rowIndexes.Length - 1; i > 0; i--) {
     178        int j = random.Next(0, i);
     179        // swap
     180        int tmp = rowIndexes[i];
     181        rowIndexes[i] = rowIndexes[j];
     182        rowIndexes[j] = tmp;
     183      }
     184
     185      // take the first n indexes (selected n rowIndexes from start..end)
     186      // now order by index
     187      var orderedRandomIndexes = rowIndexes.Take(n).OrderBy(x => x).ToArray();
     188
     189      // now build a dataset collecting the rows from orderedRandomIndexes into the dataset starting at index start
    168190      double[,] reducedData = dataset.GetClonedData();
    169       HashSet<int> leftRows = new HashSet<int>(Enumerable.Range(0, end - start));
    170       for (int row = 0; row < reducedRows; row++) {
    171         int rowIndex = random.Next(0, leftRows.Count);
    172         leftRows.Remove(rowIndex);
    173         for (int column = 0; column < dataset.Columns; column++)
    174           reducedData[row, column] = dataset[rowIndex, column];
     191      for (int i = 0; i < n; i++) {
     192        for (int column = 0; column < dataset.Columns; column++) {
     193          reducedData[start + i, column] = dataset[orderedRandomIndexes[i], column];
     194        }
    175195      }
    176196      return new Dataset(dataset.VariableNames, reducedData);
Note: See TracChangeset for help on using the changeset viewer.