Changeset 4543 for trunk/sources/HeuristicLab.Problems.DataAnalysis
- Timestamp:
- 10/01/10 12:31:04 (14 years ago)
- Location:
- trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/DataAnalysisProblemData.cs
r4473 r4543 180 180 get { 181 181 return Enumerable.Range(TrainingSamplesStart.Value, TrainingSamplesEnd.Value - TrainingSamplesStart.Value) 182 .Where(i => i > 0 && i < Dataset.Rows && (i < TestSamplesStart.Value || TestSamplesEnd.Value <= i));182 .Where(i => i >= 0 && i < Dataset.Rows && (i < TestSamplesStart.Value || TestSamplesEnd.Value <= i)); 183 183 } 184 184 } … … 186 186 get { 187 187 return Enumerable.Range(TestSamplesStart.Value, TestSamplesEnd.Value - TestSamplesStart.Value) 188 .Where(i => i > 0 && i < Dataset.Rows);188 .Where(i => i >= 0 && i < Dataset.Rows); 189 189 } 190 190 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/SupportVectorMachine/SupportVectorMachineCrossValidationEvaluator.cs
r4068 r4543 29 29 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 30 30 using SVM; 31 using System.Collections.Generic; 31 32 32 33 namespace HeuristicLab.Problems.DataAnalysis.SupportVectorMachine { … … 143 144 144 145 public override IOperation Apply() { 145 double reductionRatio = 1.0; 146 double reductionRatio = 1.0; // TODO: make parameter 146 147 if (ActualSamplesParameter.ActualValue != null) 147 148 reductionRatio = ActualSamplesParameter.ActualValue.Value; 148 149 int reducedRows = (int)((SamplesEnd.Value - SamplesStart.Value) * reductionRatio); 149 IEnumerable<int> rows = 150 Enumerable.Range(SamplesStart.Value, SamplesEnd.Value - SamplesStart.Value) 151 .Where(i => i < DataAnalysisProblemData.TestSamplesStart.Value || DataAnalysisProblemData.TestSamplesEnd.Value <= i); 152 153 // create a new DataAnalysisProblemData instance 150 154 DataAnalysisProblemData reducedProblemData = (DataAnalysisProblemData)DataAnalysisProblemData.Clone(); 151 reducedProblemData.Dataset = CreateReducedDataset(RandomParameter.ActualValue, reducedProblemData.Dataset, reductionRatio, SamplesStart.Value, SamplesEnd.Value); 155 reducedProblemData.Dataset = 156 CreateReducedDataset(RandomParameter.ActualValue, reducedProblemData.Dataset, rows, reductionRatio); 157 reducedProblemData.TrainingSamplesStart.Value = 0; 158 reducedProblemData.TrainingSamplesEnd.Value = reducedProblemData.Dataset.Rows; 159 reducedProblemData.TestSamplesStart.Value = reducedProblemData.Dataset.Rows; 160 reducedProblemData.TestSamplesEnd.Value = reducedProblemData.Dataset.Rows; 161 reducedProblemData.ValidationPercentage.Value = 0; 152 162 153 163 double quality = PerformCrossValidation(reducedProblemData, 154 SamplesStart.Value, SamplesStart.Value + reducedRows,155 164 SvmType.Value, KernelType.Value, 156 165 Cost.Value, Nu.Value, Gamma.Value, Epsilon.Value, NumberOfFolds.Value); … … 160 169 } 161 170 162 private Dataset CreateReducedDataset(IRandom random, Dataset dataset, double reductionRatio, int start, int end) {163 int n = (int)((end - start) * reductionRatio);171 private Dataset CreateReducedDataset(IRandom random, Dataset dataset, IEnumerable<int> rowIndices, double reductionRatio) { 172 164 173 // must not make a fink: 165 174 // => select n rows randomly from start..end … … 168 177 169 178 // all possible rowIndexes from start..end 170 int[] rowIndexes = Enumerable.Range(start, end - start).ToArray(); 179 int[] rowIndexArr = rowIndices.ToArray(); 180 int n = (int)Math.Max(1.0, rowIndexArr.Length * reductionRatio); 171 181 172 182 // knuth shuffle 173 for (int i = rowIndex es.Length - 1; i > 0; i--) {183 for (int i = rowIndexArr.Length - 1; i > 0; i--) { 174 184 int j = random.Next(0, i); 175 185 // swap 176 int tmp = rowIndex es[i];177 rowIndex es[i] = rowIndexes[j];178 rowIndex es[j] = tmp;186 int tmp = rowIndexArr[i]; 187 rowIndexArr[i] = rowIndexArr[j]; 188 rowIndexArr[j] = tmp; 179 189 } 180 190 181 191 // take the first n indexes (selected n rowIndexes from start..end) 182 192 // now order by index 183 var orderedRandomIndexes = rowIndexes.Take(n).OrderBy(x => x).ToArray(); 184 185 // now build a dataset collecting the rows from orderedRandomIndexes into the dataset starting at index start 186 double[,] reducedData = dataset.GetClonedData(); 193 int[] orderedRandomIndexes = 194 rowIndexArr.Take(n) 195 .OrderBy(x => x) 196 .ToArray(); 197 198 // now build a dataset containing only rows from orderedRandomIndexes 199 double[,] reducedData = new double[n, dataset.Columns]; 187 200 for (int i = 0; i < n; i++) { 188 201 for (int column = 0; column < dataset.Columns; column++) { 189 reducedData[ start +i, column] = dataset[orderedRandomIndexes[i], column];202 reducedData[i, column] = dataset[orderedRandomIndexes[i], column]; 190 203 } 191 204 } … … 198 211 double cost, double nu, double gamma, double epsilon, 199 212 int nFolds) { 200 return PerformCrossValidation(problemData, problemData.Training SamplesStart.Value, problemData.TrainingSamplesEnd.Value, svmType, kernelType, cost, nu, gamma, epsilon, nFolds);213 return PerformCrossValidation(problemData, problemData.TrainingIndizes, svmType, kernelType, cost, nu, gamma, epsilon, nFolds); 201 214 } 202 215 203 216 public static double PerformCrossValidation( 204 217 DataAnalysisProblemData problemData, 205 int start, int end,218 IEnumerable<int> rowIndices, 206 219 string svmType, string kernelType, 207 220 double cost, double nu, double gamma, double epsilon, … … 221 234 222 235 223 SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(problemData, start, end);236 SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(problemData, rowIndices); 224 237 SVM.RangeTransform rangeTransform = SVM.RangeTransform.Compute(problem); 225 238 SVM.Problem scaledProblem = Scaling.Scale(rangeTransform, problem); -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/SupportVectorMachine/SupportVectorMachineModel.cs
r4068 r4543 72 72 73 73 public IEnumerable<double> GetEstimatedValues(DataAnalysisProblemData problemData, int start, int end) { 74 SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(problemData, start, end);74 SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(problemData, Enumerable.Range(start, end - start)); 75 75 SVM.Problem scaledProblem = Scaling.Scale(RangeTransform, problem); 76 76 77 77 return (from row in Enumerable.Range(0, scaledProblem.Count) 78 select SVM.Prediction.Predict(Model, scaledProblem.X[row])).ToList(); 78 select SVM.Prediction.Predict(Model, scaledProblem.X[row])) 79 .ToList(); 79 80 } 80 81 -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/SupportVectorMachine/SupportVectorMachineModelCreator.cs
r4068 r4543 27 27 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 28 28 using SVM; 29 using System.Collections.Generic; 30 using System.Linq; 29 31 30 32 namespace HeuristicLab.Problems.DataAnalysis.SupportVectorMachine { … … 125 127 126 128 public override IOperation Apply() { 129 int start = SamplesStart.Value; 130 int end = SamplesEnd.Value; 131 IEnumerable<int> rows = 132 Enumerable.Range(start, end-start) 133 .Where(i => i < DataAnalysisProblemData.TestSamplesStart.Value || DataAnalysisProblemData.TestSamplesEnd.Value <= i); 134 127 135 SupportVectorMachineModel model = TrainModel(DataAnalysisProblemData, 128 SamplesStart.Value, SamplesEnd.Value,136 rows, 129 137 SvmType.Value, KernelType.Value, 130 138 Cost.Value, Nu.Value, Gamma.Value, Epsilon.Value); … … 138 146 string svmType, string kernelType, 139 147 double cost, double nu, double gamma, double epsilon) { 140 return TrainModel(problemData, problemData.Training SamplesStart.Value, problemData.TrainingSamplesEnd.Value, svmType, kernelType, cost, nu, gamma, epsilon);148 return TrainModel(problemData, problemData.TrainingIndizes, svmType, kernelType, cost, nu, gamma, epsilon); 141 149 } 142 150 143 151 public static SupportVectorMachineModel TrainModel( 144 152 DataAnalysisProblemData problemData, 145 int start, int end,153 IEnumerable<int> trainingIndizes, 146 154 string svmType, string kernelType, 147 155 double cost, double nu, double gamma, double epsilon) { … … 160 168 161 169 162 SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(problemData, start, end);170 SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(problemData, trainingIndizes); 163 171 SVM.RangeTransform rangeTransform = SVM.RangeTransform.Compute(problem); 164 172 SVM.Problem scaledProblem = Scaling.Scale(rangeTransform, problem); -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/SupportVectorMachine/SupportVectorMachineModelEvaluator.cs
r4068 r4543 26 26 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 27 27 using SVM; 28 using System.Collections.Generic; 29 using System.Linq; 28 30 29 31 namespace HeuristicLab.Problems.DataAnalysis.SupportVectorMachine { … … 80 82 int start = SamplesStart.Value; 81 83 int end = SamplesEnd.Value; 84 IEnumerable<int> rows = 85 Enumerable.Range(start, end - start) 86 .Where(i => i < DataAnalysisProblemData.TestSamplesStart.Value || DataAnalysisProblemData.TestSamplesEnd.Value <= i); 82 87 83 ValuesParameter.ActualValue = new DoubleMatrix(Evaluate(SupportVectorMachineModel, DataAnalysisProblemData, start, end));88 ValuesParameter.ActualValue = new DoubleMatrix(Evaluate(SupportVectorMachineModel, DataAnalysisProblemData, rows)); 84 89 return base.Apply(); 85 90 } 86 91 87 public static double[,] Evaluate(SupportVectorMachineModel model, DataAnalysisProblemData problemData, int start, int end) {88 SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(problemData, start, end);92 public static double[,] Evaluate(SupportVectorMachineModel model, DataAnalysisProblemData problemData, IEnumerable<int> rowIndices) { 93 SVM.Problem problem = SupportVectorMachineUtil.CreateSvmProblem(problemData, rowIndices); 89 94 SVM.Problem scaledProblem = model.RangeTransform.Scale(problem); 90 95 … … 92 97 93 98 double[,] values = new double[scaledProblem.Count, 2]; 99 var rowEnumerator = rowIndices.GetEnumerator(); 94 100 for (int i = 0; i < scaledProblem.Count; i++) { 95 values[i, 0] = problemData.Dataset[start + i, targetVariableIndex]; 101 rowEnumerator.MoveNext(); 102 values[i, 0] = problemData.Dataset[rowEnumerator.Current, targetVariableIndex]; 96 103 values[i, 1] = SVM.Prediction.Predict(model.Model, scaledProblem.X[i]); 97 104 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/SupportVectorMachine/SupportVectorMachineUtil.cs
r4068 r4543 29 29 /// </summary> 30 30 /// <param name="problemData">The problem data to transform</param> 31 /// <param name="start">The index of the first row of <paramref name="problemData"/> to copy to the output.</param> 32 /// <param name="end">The last of the first row of <paramref name="problemData"/> to copy to the output.</param> 31 /// <param name="rowIndices">The rows of the dataset that should be contained in the resulting SVM-problem</param> 33 32 /// <returns>A problem data type that can be used to train a support vector machine.</returns> 34 public static SVM.Problem CreateSvmProblem(DataAnalysisProblemData problemData, int start, int end) { 35 int rowCount = end - start; 36 var targetVector = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, start, end); 33 public static SVM.Problem CreateSvmProblem(DataAnalysisProblemData problemData, IEnumerable<int> rowIndices) { 34 double[] targetVector = 35 problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable.Value, rowIndices) 36 .ToArray(); 37 37 38 38 SVM.Node[][] nodes = new SVM.Node[targetVector.Length][]; 39 39 List<SVM.Node> tempRow; 40 40 int maxNodeIndex = 0; 41 for (int row = 0; row < rowCount; row++) { 41 int svmProblemRowIndex = 0; 42 foreach (int row in rowIndices) { 42 43 tempRow = new List<SVM.Node>(); 43 44 foreach (var inputVariable in problemData.InputVariables.CheckedItems) { 44 45 int col = problemData.Dataset.GetVariableIndex(inputVariable.Value.Value); 45 double value = problemData.Dataset[ start +row, col];46 double value = problemData.Dataset[row, col]; 46 47 if (!double.IsNaN(value)) { 47 int nodeIndex = col + 1; // make sure the smallest nodeIndex = 148 int nodeIndex = col + 1; // make sure the smallest nodeIndex is 1 (libSVM convention) 48 49 tempRow.Add(new SVM.Node(nodeIndex, value)); 49 50 if (nodeIndex > maxNodeIndex) maxNodeIndex = nodeIndex; 50 51 } 51 52 } 52 nodes[ row] = tempRow.OrderBy(x => x.Index).ToArray(); // make sure the values are sorted by node index53 nodes[svmProblemRowIndex++] = tempRow.OrderBy(x => x.Index).ToArray(); // make sure the values are sorted by node index 53 54 } 54 55
Note: See TracChangeset
for help on using the changeset viewer.