- Timestamp:
- 06/21/10 15:51:54 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/SupportVectorMachine/SupportVectorMachineCrossValidationEvaluator.cs
r3933 r3934 165 165 166 166 private Dataset CreateReducedDataset(IRandom random, Dataset dataset, double reductionRatio, int start, int end) { 167 int reducedRows = (int)((end - start) * reductionRatio); 167 int n = (int)((end - start) * reductionRatio); 168 // must not make a fink: 169 // => select n rows randomly from start..end 170 // => sort the selected rows by index 171 // => move rows to beginning of partition (start) 172 173 // all possible rowIndexes from start..end 174 int[] rowIndexes = Enumerable.Range(start, end - start).ToArray(); 175 176 // knuth shuffle 177 for (int i = rowIndexes.Length - 1; i > 0; i--) { 178 int j = random.Next(0, i); 179 // swap 180 int tmp = rowIndexes[i]; 181 rowIndexes[i] = rowIndexes[j]; 182 rowIndexes[j] = tmp; 183 } 184 185 // take the first n indexes (selected n rowIndexes from start..end) 186 // now order by index 187 var orderedRandomIndexes = rowIndexes.Take(n).OrderBy(x => x).ToArray(); 188 189 // now build a dataset collecting the rows from orderedRandomIndexes into the dataset starting at index start 168 190 double[,] reducedData = dataset.GetClonedData(); 169 HashSet<int> leftRows = new HashSet<int>(Enumerable.Range(0, end - start)); 170 for (int row = 0; row < reducedRows; row++) { 171 int rowIndex = random.Next(0, leftRows.Count); 172 leftRows.Remove(rowIndex); 173 for (int column = 0; column < dataset.Columns; column++) 174 reducedData[row, column] = dataset[rowIndex, column]; 191 for (int i = 0; i < n; i++) { 192 for (int column = 0; column < dataset.Columns; column++) { 193 reducedData[start + i, column] = dataset[orderedRandomIndexes[i], column]; 194 } 175 195 } 176 196 return new Dataset(dataset.VariableNames, reducedData);
Note: See TracChangeset
for help on using the changeset viewer.