/* * SVM.NET Library * Copyright (C) 2008 Matthew Johnson * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ using System; using System.Collections.Generic; using System.IO; namespace SVM { /// /// This class contains routines which perform parameter selection for a model which uses C-SVC and /// an RBF kernel. /// public static class ParameterSelection { /// /// Default number of times to divide the data. /// public const int NFOLD = 5; /// /// Default minimum power of 2 for the C value (-5) /// public const int MIN_C = -5; /// /// Default maximum power of 2 for the C value (15) /// public const int MAX_C = 15; /// /// Default power iteration step for the C value (2) /// public const int C_STEP = 2; /// /// Default minimum power of 2 for the Gamma value (-15) /// public const int MIN_G = -15; /// /// Default maximum power of 2 for the Gamma Value (3) /// public const int MAX_G = 3; /// /// Default power iteration step for the Gamma value (2) /// public const int G_STEP = 2; /// /// Returns a logarithmic list of values from minimum power of 2 to the maximum power of 2 using the provided iteration size. /// /// The minimum power of 2 /// The maximum power of 2 /// The iteration size to use in powers /// public static List GetList(double minPower, double maxPower, double iteration) { List list = new List(); for (double d = minPower; d <= maxPower; d += iteration) list.Add(Math.Pow(2, d)); return list; } /// /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. The default ranges of C and Gamma values are used. Use this method if there is no validation data available, and it will /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times). /// /// The training data /// The parameters to use when optimizing /// Output file for the parameter results. /// The optimal C value will be put into this variable /// The optimal Gamma value will be put into this variable public static void Grid( Problem problem, Parameter parameters, string outputFile, out double C, out double Gamma) { Grid(problem, parameters, GetList(MIN_C, MAX_C, C_STEP), GetList(MIN_G, MAX_G, G_STEP), outputFile, NFOLD, out C, out Gamma); } /// /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. Use this method if there is no validation data available, and it will /// divide it 5 times to allow 5-fold validation (training on 4/5 and validating on 1/5, 5 times). /// /// The training data /// The parameters to use when optimizing /// The set of C values to use /// The set of Gamma values to use /// Output file for the parameter results. /// The optimal C value will be put into this variable /// The optimal Gamma value will be put into this variable public static void Grid( Problem problem, Parameter parameters, List CValues, List GammaValues, string outputFile, out double C, out double Gamma) { Grid(problem, parameters, CValues, GammaValues, outputFile, NFOLD, out C, out Gamma); } /// /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. Use this method if validation data isn't available, as it will /// divide the training data and train on a portion of it and test on the rest. /// /// The training data /// The parameters to use when optimizing /// The set of C values to use /// The set of Gamma values to use /// Output file for the parameter results. /// The number of times the data should be divided for validation /// The optimal C value will be placed in this variable /// The optimal Gamma value will be placed in this variable public static void Grid( Problem problem, Parameter parameters, List CValues, List GammaValues, string outputFile, int nrfold, out double C, out double Gamma) { C = 0; Gamma = 0; double crossValidation = double.MinValue; StreamWriter output = null; if(outputFile != null) output = new StreamWriter(outputFile); for(int i=0; i crossValidation) { C = parameters.C; Gamma = parameters.Gamma; crossValidation = test; Console.WriteLine(" New Maximum!"); } else Console.WriteLine(); } if(output != null) output.Close(); } /// /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. Uses the default values of C and Gamma. /// /// The training data /// The validation data /// The parameters to use when optimizing /// The output file for the parameter results /// The optimal C value will be placed in this variable /// The optimal Gamma value will be placed in this variable public static void Grid( Problem problem, Problem validation, Parameter parameters, string outputFile, out double C, out double Gamma) { Grid(problem, validation, parameters, GetList(MIN_C, MAX_C, C_STEP), GetList(MIN_G, MAX_G, G_STEP), outputFile, out C, out Gamma); } /// /// Performs a Grid parameter selection, trying all possible combinations of the two lists and returning the /// combination which performed best. /// /// The training data /// The validation data /// The parameters to use when optimizing /// The C values to use /// The Gamma values to use /// The output file for the parameter results /// The optimal C value will be placed in this variable /// The optimal Gamma value will be placed in this variable public static void Grid( Problem problem, Problem validation, Parameter parameters, List CValues, List GammaValues, string outputFile, out double C, out double Gamma) { C = 0; Gamma = 0; double maxScore = double.MinValue; StreamWriter output = null; if(outputFile != null) output = new StreamWriter(outputFile); for (int i = 0; i < CValues.Count; i++) for (int j = 0; j < GammaValues.Count; j++) { parameters.C = CValues[i]; parameters.Gamma = GammaValues[j]; Model model = Training.Train(problem, parameters); double test = Prediction.Predict(validation, "tmp.txt", model, false); Console.Write("{0} {1} {2}", parameters.C, parameters.Gamma, test); if(output != null) output.WriteLine("{0} {1} {2}", parameters.C, parameters.Gamma, test); if (test > maxScore) { C = parameters.C; Gamma = parameters.Gamma; maxScore = test; Console.WriteLine(" New Maximum!"); } else Console.WriteLine(); } if(output != null) output.Close(); } } }