Changeset 5275 for branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/SupportVectorMachine/SupportVectorMachineCrossValidationEvaluator.cs
- Timestamp:
- 01/11/11 15:03:46 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/SupportVectorMachine/SupportVectorMachineCrossValidationEvaluator.cs
r4068 r5275 21 21 22 22 using System; 23 using System.Collections.Generic; 23 24 using System.Linq; 25 using HeuristicLab.Common; 24 26 using HeuristicLab.Core; 25 27 using HeuristicLab.Data; … … 125 127 #endregion 126 128 129 [StorableConstructor] 130 protected SupportVectorMachineCrossValidationEvaluator(bool deserializing) : base(deserializing) { } 131 132 protected SupportVectorMachineCrossValidationEvaluator(SupportVectorMachineCrossValidationEvaluator original, 133 Cloner cloner) 134 : base(original, cloner) { } 127 135 public SupportVectorMachineCrossValidationEvaluator() 128 136 : base() { … … 142 150 } 143 151 152 public override IDeepCloneable Clone(Cloner cloner) { 153 return new SupportVectorMachineCrossValidationEvaluator(this, cloner); 154 } 155 144 156 public override IOperation Apply() { 145 double reductionRatio = 1.0; 157 double reductionRatio = 1.0; // TODO: make parameter 146 158 if (ActualSamplesParameter.ActualValue != null) 147 159 reductionRatio = ActualSamplesParameter.ActualValue.Value; 148 149 int reducedRows = (int)((SamplesEnd.Value - SamplesStart.Value) * reductionRatio); 160 IEnumerable<int> rows = 161 Enumerable.Range(SamplesStart.Value, SamplesEnd.Value - SamplesStart.Value) 162 .Where(i => i < DataAnalysisProblemData.TestSamplesStart.Value || DataAnalysisProblemData.TestSamplesEnd.Value <= i); 163 164 // create a new DataAnalysisProblemData instance 150 165 DataAnalysisProblemData reducedProblemData = (DataAnalysisProblemData)DataAnalysisProblemData.Clone(); 151 reducedProblemData.Dataset = CreateReducedDataset(RandomParameter.ActualValue, reducedProblemData.Dataset, reductionRatio, SamplesStart.Value, SamplesEnd.Value); 166 reducedProblemData.Dataset = 167 CreateReducedDataset(RandomParameter.ActualValue, reducedProblemData.Dataset, rows, reductionRatio); 168 reducedProblemData.TrainingSamplesStart.Value = 0; 169 reducedProblemData.TrainingSamplesEnd.Value = reducedProblemData.Dataset.Rows; 170 reducedProblemData.TestSamplesStart.Value = reducedProblemData.Dataset.Rows; 171 reducedProblemData.TestSamplesEnd.Value = reducedProblemData.Dataset.Rows; 172 reducedProblemData.ValidationPercentage.Value = 0; 152 173 153 174 double quality = PerformCrossValidation(reducedProblemData, 154 SamplesStart.Value, SamplesStart.Value + reducedRows,155 175 SvmType.Value, KernelType.Value, 156 176 Cost.Value, Nu.Value, Gamma.Value, Epsilon.Value, NumberOfFolds.Value); … … 160 180 } 161 181 162 private Dataset CreateReducedDataset(IRandom random, Dataset dataset, double reductionRatio, int start, int end) {163 int n = (int)((end - start) * reductionRatio); 182 private Dataset CreateReducedDataset(IRandom random, Dataset dataset, IEnumerable<int> rowIndices, double reductionRatio) { 183 164 184 // must not make a fink: 165 185 // => select n rows randomly from start..end … … 168 188 169 189 // all possible rowIndexes from start..end 170 int[] rowIndexes = Enumerable.Range(start, end - start).ToArray(); 190 int[] rowIndexArr = rowIndices.ToArray(); 191 int n = (int)Math.Max(1.0, rowIndexArr.Length * reductionRatio); 171 192 172 193 // knuth shuffle 173 for (int i = rowIndex es.Length - 1; i > 0; i--) {194 for (int i = rowIndexArr.Length - 1; i > 0; i--) { 174 195 int j = random.Next(0, i); 175 196 // swap 176 int tmp = rowIndex es[i];177 rowIndex es[i] = rowIndexes[j];178 rowIndex es[j] = tmp;197 int tmp = rowIndexArr[i]; 198 rowIndexArr[i] = rowIndexArr[j]; 199 rowIndexArr[j] = tmp; 179 200 } 180 201 181 202 // take the first n indexes (selected n rowIndexes from start..end) 182 203 // now order by index 183 var orderedRandomIndexes = rowIndexes.Take(n).OrderBy(x => x).ToArray(); 184 185 // now build a dataset collecting the rows from orderedRandomIndexes into the dataset starting at index start 186 double[,] reducedData = dataset.GetClonedData(); 204 int[] orderedRandomIndexes = 205 rowIndexArr.Take(n) 206 .OrderBy(x => x) 207 .ToArray(); 208 209 // now build a dataset containing only rows from orderedRandomIndexes 210 double[,] reducedData = new double[n, dataset.Columns]; 187 211 for (int i = 0; i < n; i++) { 188 212 for (int column = 0; column < dataset.Columns; column++) { 189 reducedData[ start +i, column] = dataset[orderedRandomIndexes[i], column];213 reducedData[i, column] = dataset[orderedRandomIndexes[i], column]; 190 214 } 191 215 } … … 198 222 double cost, double nu, double gamma, double epsilon, 199 223 int nFolds) { 200 return PerformCrossValidation(problemData, problemData.Training SamplesStart.Value, problemData.TrainingSamplesEnd.Value, svmType, kernelType, cost, nu, gamma, epsilon, nFolds);224 return PerformCrossValidation(problemData, problemData.TrainingIndizes, svmType, kernelType, cost, nu, gamma, epsilon, nFolds); 201 225 } 202 226 203 227 public static double PerformCrossValidation( 204 228 DataAnalysisProblemData problemData, 205 int start, int end,229 IEnumerable<int> rowIndices, 206 230 string svmType, string kernelType, 207 231 double cost, double nu, double gamma, double epsilon, … … 221 245 222 246 223 SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(problemData, start, end);247 SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(problemData, rowIndices); 224 248 SVM.RangeTransform rangeTransform = SVM.RangeTransform.Compute(problem); 225 249 SVM.Problem scaledProblem = Scaling.Scale(rangeTransform, problem);
Note: See TracChangeset
for help on using the changeset viewer.