1  /*


2  * SVM.NET Library


3  * Copyright (C) 2008 Matthew Johnson


4  *


5  * This program is free software: you can redistribute it and/or modify


6  * it under the terms of the GNU General Public License as published by


7  * the Free Software Foundation, either version 3 of the License, or


8  * (at your option) any later version.


9  *


10  * This program is distributed in the hope that it will be useful,


11  * but WITHOUT ANY WARRANTY; without even the implied warranty of


12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


13  * GNU General Public License for more details.


14  *


15  * You should have received a copy of the GNU General Public License


16  * along with this program. If not, see <http://www.gnu.org/licenses/>.


17  */


18 


19 


20  using System;


21  using System.IO;


22 


23  namespace SVM {


24 


25  // An SMO algorithm in Fan et al., JMLR 6(2005), p. 18891918


26  // Solves:


27  //


28  // Min 0.5(\alpha^T Q \alpha) + p^T \alpha


29  //


30  // y^T \alpha = \delta


31  // y_i = +1 or 1


32  // 0 <= alpha_i <= Cp for y_i = 1


33  // 0 <= alpha_i <= Cn for y_i = 1


34  //


35  // Given:


36  //


37  // Q, p, y, Cp, Cn, and an initial feasible point \alpha


38  // l is the size of vectors and matrices


39  // eps is the stopping tolerance


40  //


41  // solution will be put in \alpha, objective value will be put in obj


42  //


43  internal class Solver {


44  protected int active_size;


45  protected sbyte[] y;


46  protected double[] G; // gradient of objective function


47  private const byte LOWER_BOUND = 0;


48  private const byte UPPER_BOUND = 1;


49  private const byte FREE = 2;


50  private byte[] alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE


51  private double[] alpha;


52  protected IQMatrix Q;


53  protected float[] QD;


54  protected double EPS;


55  private double Cp, Cn;


56  private double[] p;


57  private int[] active_set;


58  private double[] G_bar; // gradient, if we treat free variables as 0


59  protected int l;


60  protected bool unshrink; // XXX


61 


62  protected const double INF = double.PositiveInfinity;


63 


64  private double get_C(int i) {


65  return (y[i] > 0) ? Cp : Cn;


66  }


67 


68  private void update_alpha_status(int i) {


69  if (alpha[i] >= get_C(i))


70  alpha_status[i] = UPPER_BOUND;


71  else if (alpha[i] <= 0)


72  alpha_status[i] = LOWER_BOUND;


73  else alpha_status[i] = FREE;


74  }


75 


76  protected bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }


77  protected bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }


78 


79  private bool is_free(int i) { return alpha_status[i] == FREE; }


80 


81  public class SolutionInfo {


82  public double obj;


83  public double rho;


84  public double upper_bound_p;


85  public double upper_bound_n;


86  public double r; // for Solver_NU


87  }


88 


89  protected void swap_index(int i, int j) {


90  Q.SwapIndex(i, j);


91  y.SwapIndex(i, j);


92  G.SwapIndex(i, j);


93  alpha_status.SwapIndex(i, j);


94  alpha.SwapIndex(i, j);


95  p.SwapIndex(i, j);


96  active_set.SwapIndex(i, j);


97  G_bar.SwapIndex(i, j);


98  }


99 


100  protected void reconstruct_gradient() {


101  // reconstruct inactive elements of G from G_bar and free variables


102 


103  if (active_size == l) return;


104 


105  int i, j;


106  int nr_free = 0;


107 


108  for (j = active_size; j < l; j++)


109  G[j] = G_bar[j] + p[j];


110 


111  for (j = 0; j < active_size; j++)


112  if (is_free(j))


113  nr_free++;


114 


115  if (2 * nr_free < active_size)


116  Procedures.info("\nWarning: using h 0 may be faster\n");


117 


118  if (nr_free * l > 2 * active_size * (l  active_size)) {


119  for (i = active_size; i < l; i++) {


120  float[] Q_i = Q.GetQ(i, active_size);


121  for (j = 0; j < active_size; j++)


122  if (is_free(j))


123  G[i] += alpha[j] * Q_i[j];


124  }


125  } else {


126  for (i = 0; i < active_size; i++)


127  if (is_free(i)) {


128  float[] Q_i = Q.GetQ(i, l);


129  double alpha_i = alpha[i];


130  for (j = active_size; j < l; j++)


131  G[j] += alpha_i * Q_i[j];


132  }


133  }


134  }


135 


136  public virtual void Solve(int l, IQMatrix Q, double[] p_, sbyte[] y_, double[] alpha_, double Cp, double Cn, double eps, SolutionInfo si, bool shrinking) {


137  this.l = l;


138  this.Q = Q;


139  QD = Q.GetQD();


140  p = (double[])p_.Clone();


141  y = (sbyte[])y_.Clone();


142  alpha = (double[])alpha_.Clone();


143  this.Cp = Cp;


144  this.Cn = Cn;


145  this.EPS = eps;


146  this.unshrink = false;


147 


148  // initialize alpha_status


149  {


150  alpha_status = new byte[l];


151  for (int i = 0; i < l; i++)


152  update_alpha_status(i);


153  }


154 


155  // initialize active set (for shrinking)


156  {


157  active_set = new int[l];


158  for (int i = 0; i < l; i++)


159  active_set[i] = i;


160  active_size = l;


161  }


162 


163  // initialize gradient


164  {


165  G = new double[l];


166  G_bar = new double[l];


167  int i;


168  for (i = 0; i < l; i++) {


169  G[i] = p[i];


170  G_bar[i] = 0;


171  }


172  for (i = 0; i < l; i++)


173  if (!is_lower_bound(i)) {


174  float[] Q_i = Q.GetQ(i, l);


175  double alpha_i = alpha[i];


176  int j;


177  for (j = 0; j < l; j++)


178  G[j] += alpha_i * Q_i[j];


179  if (is_upper_bound(i))


180  for (j = 0; j < l; j++)


181  G_bar[j] += get_C(i) * Q_i[j];


182  }


183  }


184 


185  // optimization step


186 


187  int iter = 0;


188  int counter = Math.Min(l, 1000) + 1;


189  int[] working_set = new int[2];


190 


191  while (true) {


192  // show progress and do shrinking


193 


194  if (counter == 0) {


195  counter = Math.Min(l, 1000);


196  if (shrinking) do_shrinking();


197  Procedures.info(".");


198  }


199 


200  if (select_working_set(working_set) != 0) {


201  // reconstruct the whole gradient


202  reconstruct_gradient();


203  // reset active set size and check


204  active_size = l;


205  Procedures.info("*");


206  if (select_working_set(working_set) != 0)


207  break;


208  else


209  counter = 1; // do shrinking next iteration


210  }


211 


212  int i = working_set[0];


213  int j = working_set[1];


214 


215  ++iter;


216 


217  // update alpha[i] and alpha[j], handle bounds carefully


218 


219  float[] Q_i = Q.GetQ(i, active_size);


220  float[] Q_j = Q.GetQ(j, active_size);


221 


222  double C_i = get_C(i);


223  double C_j = get_C(j);


224 


225  double old_alpha_i = alpha[i];


226  double old_alpha_j = alpha[j];


227 


228  if (y[i] != y[j]) {


229  double quad_coef = Q_i[i] + Q_j[j] + 2 * Q_i[j];


230  if (quad_coef <= 0)


231  quad_coef = 1e12;


232  double delta = (G[i]  G[j]) / quad_coef;


233  double diff = alpha[i]  alpha[j];


234  alpha[i] += delta;


235  alpha[j] += delta;


236 


237  if (diff > 0) {


238  if (alpha[j] < 0) {


239  alpha[j] = 0;


240  alpha[i] = diff;


241  }


242  } else {


243  if (alpha[i] < 0) {


244  alpha[i] = 0;


245  alpha[j] = diff;


246  }


247  }


248  if (diff > C_i  C_j) {


249  if (alpha[i] > C_i) {


250  alpha[i] = C_i;


251  alpha[j] = C_i  diff;


252  }


253  } else {


254  if (alpha[j] > C_j) {


255  alpha[j] = C_j;


256  alpha[i] = C_j + diff;


257  }


258  }


259  } else {


260  double quad_coef = Q_i[i] + Q_j[j]  2 * Q_i[j];


261  if (quad_coef <= 0)


262  quad_coef = 1e12;


263  double delta = (G[i]  G[j]) / quad_coef;


264  double sum = alpha[i] + alpha[j];


265  alpha[i] = delta;


266  alpha[j] += delta;


267 


268  if (sum > C_i) {


269  if (alpha[i] > C_i) {


270  alpha[i] = C_i;


271  alpha[j] = sum  C_i;


272  }


273  } else {


274  if (alpha[j] < 0) {


275  alpha[j] = 0;


276  alpha[i] = sum;


277  }


278  }


279  if (sum > C_j) {


280  if (alpha[j] > C_j) {


281  alpha[j] = C_j;


282  alpha[i] = sum  C_j;


283  }


284  } else {


285  if (alpha[i] < 0) {


286  alpha[i] = 0;


287  alpha[j] = sum;


288  }


289  }


290  }


291 


292  // update G


293 


294  double delta_alpha_i = alpha[i]  old_alpha_i;


295  double delta_alpha_j = alpha[j]  old_alpha_j;


296 


297  for (int k = 0; k < active_size; k++) {


298  G[k] += Q_i[k] * delta_alpha_i + Q_j[k] * delta_alpha_j;


299  }


300 


301  // update alpha_status and G_bar


302 


303  {


304  bool ui = is_upper_bound(i);


305  bool uj = is_upper_bound(j);


306  update_alpha_status(i);


307  update_alpha_status(j);


308  int k;


309  if (ui != is_upper_bound(i)) {


310  Q_i = Q.GetQ(i, l);


311  if (ui)


312  for (k = 0; k < l; k++)


313  G_bar[k] = C_i * Q_i[k];


314  else


315  for (k = 0; k < l; k++)


316  G_bar[k] += C_i * Q_i[k];


317  }


318 


319  if (uj != is_upper_bound(j)) {


320  Q_j = Q.GetQ(j, l);


321  if (uj)


322  for (k = 0; k < l; k++)


323  G_bar[k] = C_j * Q_j[k];


324  else


325  for (k = 0; k < l; k++)


326  G_bar[k] += C_j * Q_j[k];


327  }


328  }


329 


330  }


331 


332  // calculate rho


333 


334  si.rho = calculate_rho();


335 


336  // calculate objective value


337  {


338  double v = 0;


339  int i;


340  for (i = 0; i < l; i++)


341  v += alpha[i] * (G[i] + p[i]);


342 


343  si.obj = v / 2;


344  }


345 


346  // put back the solution


347  {


348  for (int i = 0; i < l; i++)


349  alpha_[active_set[i]] = alpha[i];


350  }


351 


352  si.upper_bound_p = Cp;


353  si.upper_bound_n = Cn;


354 


355  Procedures.info("\noptimization finished, #iter = " + iter + "\n");


356  }


357 


358  // return 1 if already optimal, return 0 otherwise


359  protected virtual int select_working_set(int[] working_set) {


360  // return i,j such that


361  // i: Maximizes y_i * grad(f)_i, i in I_up(\alpha)


362  // j: mimimizes the decrease of obj value


363  // (if quadratic coefficeint <= 0, replace it with tau)


364  // y_j*grad(f)_j < y_i*grad(f)_i, j in I_low(\alpha)


365 


366  double GMax = INF;


367  double GMax2 = INF;


368  int GMax_idx = 1;


369  int GMin_idx = 1;


370  double obj_diff_Min = INF;


371 


372  for (int t = 0; t < active_size; t++)


373  if (y[t] == +1) {


374  if (!is_upper_bound(t))


375  if (G[t] >= GMax) {


376  GMax = G[t];


377  GMax_idx = t;


378  }


379  } else {


380  if (!is_lower_bound(t))


381  if (G[t] >= GMax) {


382  GMax = G[t];


383  GMax_idx = t;


384  }


385  }


386 


387  int i = GMax_idx;


388  float[] Q_i = null;


389  if (i != 1) // null Q_i not accessed: GMax=INF if i=1


390  Q_i = Q.GetQ(i, active_size);


391 


392  for (int j = 0; j < active_size; j++) {


393  if (y[j] == +1) {


394  if (!is_lower_bound(j)) {


395  double grad_diff = GMax + G[j];


396  if (G[j] >= GMax2)


397  GMax2 = G[j];


398  if (grad_diff > 0) {


399  double obj_diff;


400  double quad_coef = Q_i[i] + QD[j]  2.0 * y[i] * Q_i[j];


401  if (quad_coef > 0)


402  obj_diff = (grad_diff * grad_diff) / quad_coef;


403  else


404  obj_diff = (grad_diff * grad_diff) / 1e12;


405 


406  if (obj_diff <= obj_diff_Min) {


407  GMin_idx = j;


408  obj_diff_Min = obj_diff;


409  }


410  }


411  }


412  } else {


413  if (!is_upper_bound(j)) {


414  double grad_diff = GMax  G[j];


415  if (G[j] >= GMax2)


416  GMax2 = G[j];


417  if (grad_diff > 0) {


418  double obj_diff;


419  double quad_coef = Q_i[i] + QD[j] + 2.0 * y[i] * Q_i[j];


420  if (quad_coef > 0)


421  obj_diff = (grad_diff * grad_diff) / quad_coef;


422  else


423  obj_diff = (grad_diff * grad_diff) / 1e12;


424 


425  if (obj_diff <= obj_diff_Min) {


426  GMin_idx = j;


427  obj_diff_Min = obj_diff;


428  }


429  }


430  }


431  }


432  }


433 


434  if (GMax + GMax2 < EPS)


435  return 1;


436 


437  working_set[0] = GMax_idx;


438  working_set[1] = GMin_idx;


439  return 0;


440  }


441 


442  private bool be_shrunk(int i, double GMax1, double GMax2) {


443  if (is_upper_bound(i)) {


444  if (y[i] == +1)


445  return (G[i] > GMax1);


446  else


447  return (G[i] > GMax2);


448  } else if (is_lower_bound(i)) {


449  if (y[i] == +1)


450  return (G[i] > GMax2);


451  else


452  return (G[i] > GMax1);


453  } else


454  return (false);


455  }


456 


457  protected virtual void do_shrinking() {


458  int i;


459  double GMax1 = INF; // Max { y_i * grad(f)_i  i in I_up(\alpha) }


460  double GMax2 = INF; // Max { y_i * grad(f)_i  i in I_low(\alpha) }


461 


462  // find Maximal violating pair first


463  for (i = 0; i < active_size; i++) {


464  if (y[i] == +1) {


465  if (!is_upper_bound(i)) {


466  if (G[i] >= GMax1)


467  GMax1 = G[i];


468  }


469  if (!is_lower_bound(i)) {


470  if (G[i] >= GMax2)


471  GMax2 = G[i];


472  }


473  } else {


474  if (!is_upper_bound(i)) {


475  if (G[i] >= GMax2)


476  GMax2 = G[i];


477  }


478  if (!is_lower_bound(i)) {


479  if (G[i] >= GMax1)


480  GMax1 = G[i];


481  }


482  }


483  }


484 


485  if (unshrink == false && GMax1 + GMax2 <= EPS * 10) {


486  unshrink = true;


487  reconstruct_gradient();


488  active_size = l;


489  }


490 


491  for (i = 0; i < active_size; i++)


492  if (be_shrunk(i, GMax1, GMax2)) {


493  active_size;


494  while (active_size > i) {


495  if (!be_shrunk(active_size, GMax1, GMax2)) {


496  swap_index(i, active_size);


497  break;


498  }


499  active_size;


500  }


501  }


502  }


503 


504  protected virtual double calculate_rho() {


505  double r;


506  int nr_free = 0;


507  double ub = INF, lb = INF, sum_free = 0;


508  for (int i = 0; i < active_size; i++) {


509  double yG = y[i] * G[i];


510 


511  if (is_lower_bound(i)) {


512  if (y[i] > 0)


513  ub = Math.Min(ub, yG);


514  else


515  lb = Math.Max(lb, yG);


516  } else if (is_upper_bound(i)) {


517  if (y[i] < 0)


518  ub = Math.Min(ub, yG);


519  else


520  lb = Math.Max(lb, yG);


521  } else {


522  ++nr_free;


523  sum_free += yG;


524  }


525  }


526 


527  if (nr_free > 0)


528  r = sum_free / nr_free;


529  else


530  r = (ub + lb) / 2;


531 


532  return r;


533  }


534 


535  }


536 


537  //


538  // Solver for nusvm classification and regression


539  //


540  // additional constraint: e^T \alpha = constant


541  //


542  class Solver_NU : Solver {


543  private SolutionInfo si;


544 


545  public sealed override void Solve(int l, IQMatrix Q, double[] p, sbyte[] y,


546  double[] alpha, double Cp, double Cn, double eps,


547  SolutionInfo si, bool shrinking) {


548  this.si = si;


549  base.Solve(l, Q, p, y, alpha, Cp, Cn, eps, si, shrinking);


550  }


551 


552  // return 1 if already optimal, return 0 otherwise


553  protected override sealed int select_working_set(int[] working_set) {


554  // return i,j such that y_i = y_j and


555  // i: Maximizes y_i * grad(f)_i, i in I_up(\alpha)


556  // j: Minimizes the decrease of obj value


557  // (if quadratic coefficeint <= 0, replace it with tau)


558  // y_j*grad(f)_j < y_i*grad(f)_i, j in I_low(\alpha)


559 


560  double GMaxp = INF;


561  double GMaxp2 = INF;


562  int GMaxp_idx = 1;


563 


564  double GMaxn = INF;


565  double GMaxn2 = INF;


566  int GMaxn_idx = 1;


567 


568  int GMin_idx = 1;


569  double obj_diff_Min = INF;


570 


571  for (int t = 0; t < active_size; t++)


572  if (y[t] == +1) {


573  if (!is_upper_bound(t))


574  if (G[t] >= GMaxp) {


575  GMaxp = G[t];


576  GMaxp_idx = t;


577  }


578  } else {


579  if (!is_lower_bound(t))


580  if (G[t] >= GMaxn) {


581  GMaxn = G[t];


582  GMaxn_idx = t;


583  }


584  }


585 


586  int ip = GMaxp_idx;


587  int iN = GMaxn_idx;


588  float[] Q_ip = null;


589  float[] Q_in = null;


590  if (ip != 1) // null Q_ip not accessed: GMaxp=INF if ip=1


591  Q_ip = Q.GetQ(ip, active_size);


592  if (iN != 1)


593  Q_in = Q.GetQ(iN, active_size);


594 


595  for (int j = 0; j < active_size; j++) {


596  if (y[j] == +1) {


597  if (!is_lower_bound(j)) {


598  double grad_diff = GMaxp + G[j];


599  if (G[j] >= GMaxp2)


600  GMaxp2 = G[j];


601  if (grad_diff > 0) {


602  double obj_diff;


603  double quad_coef = Q_ip[ip] + QD[j]  2 * Q_ip[j];


604  if (quad_coef > 0)


605  obj_diff = (grad_diff * grad_diff) / quad_coef;


606  else


607  obj_diff = (grad_diff * grad_diff) / 1e12;


608 


609  if (obj_diff <= obj_diff_Min) {


610  GMin_idx = j;


611  obj_diff_Min = obj_diff;


612  }


613  }


614  }


615  } else {


616  if (!is_upper_bound(j)) {


617  double grad_diff = GMaxn  G[j];


618  if (G[j] >= GMaxn2)


619  GMaxn2 = G[j];


620  if (grad_diff > 0) {


621  double obj_diff;


622  double quad_coef = Q_in[iN] + QD[j]  2 * Q_in[j];


623  if (quad_coef > 0)


624  obj_diff = (grad_diff * grad_diff) / quad_coef;


625  else


626  obj_diff = (grad_diff * grad_diff) / 1e12;


627 


628  if (obj_diff <= obj_diff_Min) {


629  GMin_idx = j;


630  obj_diff_Min = obj_diff;


631  }


632  }


633  }


634  }


635  }


636 


637  if (Math.Max(GMaxp + GMaxp2, GMaxn + GMaxn2) < EPS)


638  return 1;


639 


640  if (y[GMin_idx] == +1)


641  working_set[0] = GMaxp_idx;


642  else


643  working_set[0] = GMaxn_idx;


644  working_set[1] = GMin_idx;


645 


646  return 0;


647  }


648 


649  private bool be_shrunk(int i, double GMax1, double GMax2, double GMax3, double GMax4) {


650  if (is_upper_bound(i)) {


651  if (y[i] == +1)


652  return (G[i] > GMax1);


653  else


654  return (G[i] > GMax4);


655  } else if (is_lower_bound(i)) {


656  if (y[i] == +1)


657  return (G[i] > GMax2);


658  else


659  return (G[i] > GMax3);


660  } else


661  return (false);


662  }


663 


664  protected override sealed void do_shrinking() {


665  double GMax1 = INF; // Max { y_i * grad(f)_i  y_i = +1, i in I_up(\alpha) }


666  double GMax2 = INF; // Max { y_i * grad(f)_i  y_i = +1, i in I_low(\alpha) }


667  double GMax3 = INF; // Max { y_i * grad(f)_i  y_i = 1, i in I_up(\alpha) }


668  double GMax4 = INF; // Max { y_i * grad(f)_i  y_i = 1, i in I_low(\alpha) }


669 


670  // find Maximal violating pair first


671  int i;


672  for (i = 0; i < active_size; i++) {


673  if (!is_upper_bound(i)) {


674  if (y[i] == +1) {


675  if (G[i] > GMax1) GMax1 = G[i];


676  } else if (G[i] > GMax4) GMax4 = G[i];


677  }


678  if (!is_lower_bound(i)) {


679  if (y[i] == +1) {


680  if (G[i] > GMax2) GMax2 = G[i];


681  } else if (G[i] > GMax3) GMax3 = G[i];


682  }


683  }


684 


685  if (unshrink == false && Math.Max(GMax1 + GMax2, GMax3 + GMax4) <= EPS * 10) {


686  unshrink = true;


687  reconstruct_gradient();


688  active_size = l;


689  }


690 


691  for (i = 0; i < active_size; i++)


692  if (be_shrunk(i, GMax1, GMax2, GMax3, GMax4)) {


693  active_size;


694  while (active_size > i) {


695  if (!be_shrunk(active_size, GMax1, GMax2, GMax3, GMax4)) {


696  swap_index(i, active_size);


697  break;


698  }


699  active_size;


700  }


701  }


702  }


703 


704  protected override sealed double calculate_rho() {


705  int nr_free1 = 0, nr_free2 = 0;


706  double ub1 = INF, ub2 = INF;


707  double lb1 = INF, lb2 = INF;


708  double sum_free1 = 0, sum_free2 = 0;


709 


710  for (int i = 0; i < active_size; i++) {


711  if (y[i] == +1) {


712  if (is_lower_bound(i))


713  ub1 = Math.Min(ub1, G[i]);


714  else if (is_upper_bound(i))


715  lb1 = Math.Max(lb1, G[i]);


716  else {


717  ++nr_free1;


718  sum_free1 += G[i];


719  }


720  } else {


721  if (is_lower_bound(i))


722  ub2 = Math.Min(ub2, G[i]);


723  else if (is_upper_bound(i))


724  lb2 = Math.Max(lb2, G[i]);


725  else {


726  ++nr_free2;


727  sum_free2 += G[i];


728  }


729  }


730  }


731 


732  double r1, r2;


733  if (nr_free1 > 0)


734  r1 = sum_free1 / nr_free1;


735  else


736  r1 = (ub1 + lb1) / 2;


737 


738  if (nr_free2 > 0)


739  r2 = sum_free2 / nr_free2;


740  else


741  r2 = (ub2 + lb2) / 2;


742 


743  si.r = (r1 + r2) / 2;


744  return (r1  r2) / 2;


745  }


746  }


747 


748  //


749  // Q matrices for various formulations


750  //


751  class SVC_Q : Kernel {


752  private sbyte[] y;


753  private Cache cache;


754  private float[] QD;


755 


756  public SVC_Q(Problem prob, Parameter param, sbyte[] y_)


757  : base(prob.Count, prob.X, param) {


758  y = (sbyte[])y_.Clone();


759  cache = new Cache(prob.Count, (long)(param.CacheSize * (1 << 20)));


760  QD = new float[prob.Count];


761  for (int i = 0; i < prob.Count; i++)


762  QD[i] = (float)KernelFunction(i, i);


763  }


764 


765  public override sealed float[] GetQ(int i, int len) {


766  float[] data = null;


767  int start, j;


768  if ((start = cache.GetData(i, ref data, len)) < len) {


769  for (j = start; j < len; j++)


770  data[j] = (float)(y[i] * y[j] * KernelFunction(i, j));


771  }


772  return data;


773  }


774 


775  public override sealed float[] GetQD() {


776  return QD;


777  }


778 


779  public override sealed void SwapIndex(int i, int j) {


780  cache.SwapIndex(i, j);


781  base.SwapIndex(i, j);


782  y.SwapIndex(i, j);


783  QD.SwapIndex(i, j);


784  }


785  }


786 


787  class ONE_CLASS_Q : Kernel {


788  private Cache cache;


789  private float[] QD;


790 


791  public ONE_CLASS_Q(Problem prob, Parameter param)


792  : base(prob.Count, prob.X, param) {


793  cache = new Cache(prob.Count, (long)(param.CacheSize * (1 << 20)));


794  QD = new float[prob.Count];


795  for (int i = 0; i < prob.Count; i++)


796  QD[i] = (float)KernelFunction(i, i);


797  }


798 


799  public override sealed float[] GetQ(int i, int len) {


800  float[] data = null;


801  int start, j;


802  if ((start = cache.GetData(i, ref data, len)) < len) {


803  for (j = start; j < len; j++)


804  data[j] = (float)KernelFunction(i, j);


805  }


806  return data;


807  }


808 


809  public override sealed float[] GetQD() {


810  return QD;


811  }


812 


813  public override sealed void SwapIndex(int i, int j) {


814  cache.SwapIndex(i, j);


815  base.SwapIndex(i, j);


816  QD.SwapIndex(i, j);


817  }


818  }


819 


820  class SVR_Q : Kernel {


821  private int l;


822  private Cache cache;


823  private sbyte[] sign;


824  private int[] index;


825  private int next_buffer;


826  private float[][] buffer;


827  private float[] QD;


828 


829  public SVR_Q(Problem prob, Parameter param)


830  : base(prob.Count, prob.X, param) {


831  l = prob.Count;


832  cache = new Cache(l, (long)(param.CacheSize * (1 << 20)));


833  QD = new float[2 * l];


834  sign = new sbyte[2 * l];


835  index = new int[2 * l];


836  for (int k = 0; k < l; k++) {


837  sign[k] = 1;


838  sign[k + l] = 1;


839  index[k] = k;


840  index[k + l] = k;


841  QD[k] = (float)KernelFunction(k, k);


842  QD[k + l] = QD[k];


843  }


844  buffer = new float[2][];


845  buffer[0] = new float[2 * l];


846  buffer[1] = new float[2 * l];


847  next_buffer = 0;


848  }


849 


850  public override sealed void SwapIndex(int i, int j) {


851  sign.SwapIndex(i, j);


852  index.SwapIndex(i, j);


853  QD.SwapIndex(i, j);


854  }


855 


856  public override sealed float[] GetQ(int i, int len) {


857  float[] data = null;


858  int j, real_i = index[i];


859  if (cache.GetData(real_i, ref data, l) < l) {


860  for (j = 0; j < l; j++)


861  data[j] = (float)KernelFunction(real_i, j);


862  }


863 


864  // reorder and copy


865  float[] buf = buffer[next_buffer];


866  next_buffer = 1  next_buffer;


867  sbyte si = sign[i];


868  for (j = 0; j < len; j++)


869  buf[j] = (float)si * sign[j] * data[index[j]];


870  return buf;


871  }


872 


873  public override sealed float[] GetQD() {


874  return QD;


875  }


876  }


877 


878  internal class Procedures {


879  private static bool _verbose;


880  public static bool IsVerbose {


881  get {


882  return _verbose;


883  }


884  set {


885  _verbose = value;


886  }


887  }


888  //


889  // construct and solve various formulations


890  //


891  public const int LIBSVM_VERSION = 289;


892 


893  public static TextWriter svm_print_string = Console.Out;


894 


895  public static void info(string s) {


896  if (_verbose)


897  svm_print_string.Write(s);


898  }


899 


900  private static void solve_c_svc(Problem prob, Parameter param,


901  double[] alpha, Solver.SolutionInfo si,


902  double Cp, double Cn) {


903  int l = prob.Count;


904  double[] Minus_ones = new double[l];


905  sbyte[] y = new sbyte[l];


906 


907  int i;


908 


909  for (i = 0; i < l; i++) {


910  alpha[i] = 0;


911  Minus_ones[i] = 1;


912  if (prob.Y[i] > 0) y[i] = +1; else y[i] = 1;


913  }


914 


915  Solver s = new Solver();


916  s.Solve(l, new SVC_Q(prob, param, y), Minus_ones, y,


917  alpha, Cp, Cn, param.EPS, si, param.Shrinking);


918 


919  double sum_alpha = 0;


920  for (i = 0; i < l; i++)


921  sum_alpha += alpha[i];


922 


923  if (Cp == Cn)


924  Procedures.info("nu = " + sum_alpha / (Cp * prob.Count) + "\n");


925 


926  for (i = 0; i < l; i++)


927  alpha[i] *= y[i];


928  }


929 


930  private static void solve_nu_svc(Problem prob, Parameter param,


931  double[] alpha, Solver.SolutionInfo si) {


932  int i;


933  int l = prob.Count;


934  double nu = param.Nu;


935 


936  sbyte[] y = new sbyte[l];


937 


938  for (i = 0; i < l; i++)


939  if (prob.Y[i] > 0)


940  y[i] = +1;


941  else


942  y[i] = 1;


943 


944  double sum_pos = nu * l / 2;


945  double sum_neg = nu * l / 2;


946 


947  for (i = 0; i < l; i++)


948  if (y[i] == +1) {


949  alpha[i] = Math.Min(1.0, sum_pos);


950  sum_pos = alpha[i];


951  } else {


952  alpha[i] = Math.Min(1.0, sum_neg);


953  sum_neg = alpha[i];


954  }


955 


956  double[] zeros = new double[l];


957 


958  for (i = 0; i < l; i++)


959  zeros[i] = 0;


960 


961  Solver_NU s = new Solver_NU();


962  s.Solve(l, new SVC_Q(prob, param, y), zeros, y, alpha, 1.0, 1.0, param.EPS, si, param.Shrinking);


963  double r = si.r;


964 


965  Procedures.info("C = " + 1 / r + "\n");


966 


967  for (i = 0; i < l; i++)


968  alpha[i] *= y[i] / r;


969 


970  si.rho /= r;


971  si.obj /= (r * r);


972  si.upper_bound_p = 1 / r;


973  si.upper_bound_n = 1 / r;


974  }


975 


976  private static void solve_one_class(Problem prob, Parameter param,


977  double[] alpha, Solver.SolutionInfo si) {


978  int l = prob.Count;


979  double[] zeros = new double[l];


980  sbyte[] ones = new sbyte[l];


981  int i;


982 


983  int n = (int)(param.Nu * prob.Count); // # of alpha's at upper bound


984 


985  for (i = 0; i < n; i++)


986  alpha[i] = 1;


987  if (n < prob.Count)


988  alpha[n] = param.Nu * prob.Count  n;


989  for (i = n + 1; i < l; i++)


990  alpha[i] = 0;


991 


992  for (i = 0; i < l; i++) {


993  zeros[i] = 0;


994  ones[i] = 1;


995  }


996 


997  Solver s = new Solver();


998  s.Solve(l, new ONE_CLASS_Q(prob, param), zeros, ones, alpha, 1.0, 1.0, param.EPS, si, param.Shrinking);


999  }


1000 


1001  private static void solve_epsilon_svr(Problem prob, Parameter param, double[] alpha, Solver.SolutionInfo si) {


1002  int l = prob.Count;


1003  double[] alpha2 = new double[2 * l];


1004  double[] linear_term = new double[2 * l];


1005  sbyte[] y = new sbyte[2 * l];


1006  int i;


1007 


1008  for (i = 0; i < l; i++) {


1009  alpha2[i] = 0;


1010  linear_term[i] = param.P  prob.Y[i];


1011  y[i] = 1;


1012 


1013  alpha2[i + l] = 0;


1014  linear_term[i + l] = param.P + prob.Y[i];


1015  y[i + l] = 1;


1016  }


1017 


1018  Solver s = new Solver();


1019  s.Solve(2 * l, new SVR_Q(prob, param), linear_term, y, alpha2, param.C, param.C, param.EPS, si, param.Shrinking);


1020 


1021  double sum_alpha = 0;


1022  for (i = 0; i < l; i++) {


1023  alpha[i] = alpha2[i]  alpha2[i + l];


1024  sum_alpha += Math.Abs(alpha[i]);


1025  }


1026  Procedures.info("nu = " + sum_alpha / (param.C * l) + "\n");


1027  }


1028 


1029  private static void solve_nu_svr(Problem prob, Parameter param,


1030  double[] alpha, Solver.SolutionInfo si) {


1031  int l = prob.Count;


1032  double C = param.C;


1033  double[] alpha2 = new double[2 * l];


1034  double[] linear_term = new double[2 * l];


1035  sbyte[] y = new sbyte[2 * l];


1036  int i;


1037 


1038  double sum = C * param.Nu * l / 2;


1039  for (i = 0; i < l; i++) {


1040  alpha2[i] = alpha2[i + l] = Math.Min(sum, C);


1041  sum = alpha2[i];


1042 


1043  linear_term[i] = prob.Y[i];


1044  y[i] = 1;


1045 


1046  linear_term[i + l] = prob.Y[i];


1047  y[i + l] = 1;


1048  }


1049 


1050  Solver_NU s = new Solver_NU();


1051  s.Solve(2 * l, new SVR_Q(prob, param), linear_term, y, alpha2, C, C, param.EPS, si, param.Shrinking);


1052 


1053  Procedures.info("epsilon = " + (si.r) + "\n");


1054 


1055  for (i = 0; i < l; i++)


1056  alpha[i] = alpha2[i]  alpha2[i + l];


1057  }


1058 


1059  //


1060  // decision_function


1061  //


1062  internal class decision_function {


1063  public double[] alpha;


1064  public double rho;


1065  };


1066 


1067  static decision_function svm_train_one(Problem prob, Parameter param, double Cp, double Cn) {


1068  double[] alpha = new double[prob.Count];


1069  Solver.SolutionInfo si = new Solver.SolutionInfo();


1070  switch (param.SvmType) {


1071  case SvmType.C_SVC:


1072  solve_c_svc(prob, param, alpha, si, Cp, Cn);


1073  break;


1074  case SvmType.NU_SVC:


1075  solve_nu_svc(prob, param, alpha, si);


1076  break;


1077  case SvmType.ONE_CLASS:


1078  solve_one_class(prob, param, alpha, si);


1079  break;


1080  case SvmType.EPSILON_SVR:


1081  solve_epsilon_svr(prob, param, alpha, si);


1082  break;


1083  case SvmType.NU_SVR:


1084  solve_nu_svr(prob, param, alpha, si);


1085  break;


1086  }


1087 


1088  Procedures.info("obj = " + si.obj + ", rho = " + si.rho + "\n");


1089 


1090  // output SVs


1091 


1092  int nSV = 0;


1093  int nBSV = 0;


1094  for (int i = 0; i < prob.Count; i++) {


1095  if (Math.Abs(alpha[i]) > 0) {


1096  ++nSV;


1097  if (prob.Y[i] > 0) {


1098  if (Math.Abs(alpha[i]) >= si.upper_bound_p)


1099  ++nBSV;


1100  } else {


1101  if (Math.Abs(alpha[i]) >= si.upper_bound_n)


1102  ++nBSV;


1103  }


1104  }


1105  }


1106 


1107  Procedures.info("nSV = " + nSV + ", nBSV = " + nBSV + "\n");


1108 


1109  decision_function f = new decision_function();


1110  f.alpha = alpha;


1111  f.rho = si.rho;


1112  return f;


1113  }


1114 


1115  // Platt's binary SVM Probablistic Output: an improvement from Lin et al.


1116  private static void sigmoid_train(int l, double[] dec_values, double[] labels,


1117  double[] probAB) {


1118  double A, B;


1119  double prior1 = 0, prior0 = 0;


1120  int i;


1121 


1122  for (i = 0; i < l; i++)


1123  if (labels[i] > 0) prior1 += 1;


1124  else prior0 += 1;


1125 


1126  int Max_iter = 100; // Maximal number of iterations


1127  double Min_step = 1e10; // Minimal step taken in line search


1128  double sigma = 1e12; // For numerically strict PD of Hessian


1129  double eps = 1e5;


1130  double hiTarget = (prior1 + 1.0) / (prior1 + 2.0);


1131  double loTarget = 1 / (prior0 + 2.0);


1132  double[] t = new double[l];


1133  double fApB, p, q, h11, h22, h21, g1, g2, det, dA, dB, gd, stepsize;


1134  double newA, newB, newf, d1, d2;


1135  int iter;


1136 


1137  // Initial Point and Initial Fun Value


1138  A = 0.0; B = Math.Log((prior0 + 1.0) / (prior1 + 1.0));


1139  double fval = 0.0;


1140 


1141  for (i = 0; i < l; i++) {


1142  if (labels[i] > 0) t[i] = hiTarget;


1143  else t[i] = loTarget;


1144  fApB = dec_values[i] * A + B;


1145  if (fApB >= 0)


1146  fval += t[i] * fApB + Math.Log(1 + Math.Exp(fApB));


1147  else


1148  fval += (t[i]  1) * fApB + Math.Log(1 + Math.Exp(fApB));


1149  }


1150  for (iter = 0; iter < Max_iter; iter++) {


1151  // Update Gradient and Hessian (use H' = H + sigma I)


1152  h11 = sigma; // numerically ensures strict PD


1153  h22 = sigma;


1154  h21 = 0.0; g1 = 0.0; g2 = 0.0;


1155  for (i = 0; i < l; i++) {


1156  fApB = dec_values[i] * A + B;


1157  if (fApB >= 0) {


1158  p = Math.Exp(fApB) / (1.0 + Math.Exp(fApB));


1159  q = 1.0 / (1.0 + Math.Exp(fApB));


1160  } else {


1161  p = 1.0 / (1.0 + Math.Exp(fApB));


1162  q = Math.Exp(fApB) / (1.0 + Math.Exp(fApB));


1163  }


1164  d2 = p * q;


1165  h11 += dec_values[i] * dec_values[i] * d2;


1166  h22 += d2;


1167  h21 += dec_values[i] * d2;


1168  d1 = t[i]  p;


1169  g1 += dec_values[i] * d1;


1170  g2 += d1;


1171  }


1172 


1173  // Stopping Criteria


1174  if (Math.Abs(g1) < eps && Math.Abs(g2) < eps)


1175  break;


1176 


1177  // Finding Newton direction: inv(H') * g


1178  det = h11 * h22  h21 * h21;


1179  dA = (h22 * g1  h21 * g2) / det;


1180  dB = (h21 * g1 + h11 * g2) / det;


1181  gd = g1 * dA + g2 * dB;


1182 


1183 


1184  stepsize = 1; // Line Search


1185  while (stepsize >= Min_step) {


1186  newA = A + stepsize * dA;


1187  newB = B + stepsize * dB;


1188 


1189  // New function value


1190  newf = 0.0;


1191  for (i = 0; i < l; i++) {


1192  fApB = dec_values[i] * newA + newB;


1193  if (fApB >= 0)


1194  newf += t[i] * fApB + Math.Log(1 + Math.Exp(fApB));


1195  else


1196  newf += (t[i]  1) * fApB + Math.Log(1 + Math.Exp(fApB));


1197  }


1198  // Check sufficient decrease


1199  if (newf < fval + 0.0001 * stepsize * gd) {


1200  A = newA; B = newB; fval = newf;


1201  break;


1202  } else


1203  stepsize = stepsize / 2.0;


1204  }


1205 


1206  if (stepsize < Min_step) {


1207  Procedures.info("Line search fails in twoclass probability estimates\n");


1208  break;


1209  }


1210  }


1211 


1212  if (iter >= Max_iter)


1213  Procedures.info("Reaching Maximal iterations in twoclass probability estimates\n");


1214  probAB[0] = A; probAB[1] = B;


1215  }


1216 


1217  private static double sigmoid_predict(double decision_value, double A, double B) {


1218  double fApB = decision_value * A + B;


1219  if (fApB >= 0)


1220  return Math.Exp(fApB) / (1.0 + Math.Exp(fApB));


1221  else


1222  return 1.0 / (1 + Math.Exp(fApB));


1223  }


1224 


1225  // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng


1226  private static void multiclass_probability(int k, double[,] r, double[] p) {


1227  int t, j;


1228  int iter = 0, Max_iter = Math.Max(100, k);


1229  double[,] Q = new double[k, k];


1230  double[] Qp = new double[k];


1231  double pQp, eps = 0.005 / k;


1232 


1233  for (t = 0; t < k; t++) {


1234  p[t] = 1.0 / k; // Valid if k = 1


1235  Q[t, t] = 0;


1236  for (j = 0; j < t; j++) {


1237  Q[t, t] += r[j, t] * r[j, t];


1238  Q[t, j] = Q[j, t];


1239  }


1240  for (j = t + 1; j < k; j++) {


1241  Q[t, t] += r[j, t] * r[j, t];


1242  Q[t, j] = r[j, t] * r[t, j];


1243  }


1244  }


1245  for (iter = 0; iter < Max_iter; iter++) {


1246  // stopping condition, recalculate QP,pQP for numerical accuracy


1247  pQp = 0;


1248  for (t = 0; t < k; t++) {


1249  Qp[t] = 0;


1250  for (j = 0; j < k; j++)


1251  Qp[t] += Q[t, j] * p[j];


1252  pQp += p[t] * Qp[t];


1253  }


1254  double Max_error = 0;


1255  for (t = 0; t < k; t++) {


1256  double error = Math.Abs(Qp[t]  pQp);


1257  if (error > Max_error)


1258  Max_error = error;


1259  }


1260  if (Max_error < eps) break;


1261 


1262  for (t = 0; t < k; t++) {


1263  double diff = (Qp[t] + pQp) / Q[t, t];


1264  p[t] += diff;


1265  pQp = (pQp + diff * (diff * Q[t, t] + 2 * Qp[t])) / (1 + diff) / (1 + diff);


1266  for (j = 0; j < k; j++) {


1267  Qp[j] = (Qp[j] + diff * Q[t, j]) / (1 + diff);


1268  p[j] /= (1 + diff);


1269  }


1270  }


1271  }


1272  if (iter >= Max_iter)


1273  Procedures.info("Exceeds Max_iter in multiclass_prob\n");


1274  }


1275 


1276  // Crossvalidation decision values for probability estimates


1277  private static void svm_binary_svc_probability(Problem prob, Parameter param, double Cp, double Cn, double[] probAB) {


1278  int i;


1279  int nr_fold = 5;


1280  int[] perm = new int[prob.Count];


1281  double[] dec_values = new double[prob.Count];


1282 


1283  // random shuffle


1284  Random rand = new Random();


1285  for (i = 0; i < prob.Count; i++) perm[i] = i;


1286  for (i = 0; i < prob.Count; i++) {


1287  int j = i + (int)(rand.NextDouble() * (prob.Count  i));


1288  do { int _ = perm[i]; perm[i] = perm[j]; perm[j] = _; } while (false);


1289  }


1290  for (i = 0; i < nr_fold; i++) {


1291  int begin = i * prob.Count / nr_fold;


1292  int end = (i + 1) * prob.Count / nr_fold;


1293  int j, k;


1294  Problem subprob = new Problem();


1295 


1296  subprob.Count = prob.Count  (end  begin);


1297  subprob.X = new Node[subprob.Count][];


1298  subprob.Y = new double[subprob.Count];


1299 


1300  k = 0;


1301  for (j = 0; j < begin; j++) {


1302  subprob.X[k] = prob.X[perm[j]];


1303  subprob.Y[k] = prob.Y[perm[j]];


1304  ++k;


1305  }


1306  for (j = end; j < prob.Count; j++) {


1307  subprob.X[k] = prob.X[perm[j]];


1308  subprob.Y[k] = prob.Y[perm[j]];


1309  ++k;


1310  }


1311  int p_count = 0, n_count = 0;


1312  for (j = 0; j < k; j++)


1313  if (subprob.Y[j] > 0)


1314  p_count++;


1315  else


1316  n_count++;


1317 


1318  if (p_count == 0 && n_count == 0)


1319  for (j = begin; j < end; j++)


1320  dec_values[perm[j]] = 0;


1321  else if (p_count > 0 && n_count == 0)


1322  for (j = begin; j < end; j++)


1323  dec_values[perm[j]] = 1;


1324  else if (p_count == 0 && n_count > 0)


1325  for (j = begin; j < end; j++)


1326  dec_values[perm[j]] = 1;


1327  else {


1328  Parameter subparam = (Parameter)param.Clone();


1329  subparam.Probability = false;


1330  subparam.C = 1.0;


1331  subparam.Weights[1] = Cp;


1332  subparam.Weights[1] = Cn;


1333  Model submodel = svm_train(subprob, subparam);


1334  for (j = begin; j < end; j++) {


1335  double[] dec_value = new double[1];


1336  svm_predict_values(submodel, prob.X[perm[j]], dec_value);


1337  dec_values[perm[j]] = dec_value[0];


1338  // ensure +1 1 order; reason not using CV subroutine


1339  dec_values[perm[j]] *= submodel.ClassLabels[0];


1340  }


1341  }


1342  }


1343  sigmoid_train(prob.Count, dec_values, prob.Y, probAB);


1344  }


1345 


1346  // Return parameter of a Laplace distribution


1347  private static double svm_svr_probability(Problem prob, Parameter param) {


1348  int i;


1349  int nr_fold = 5;


1350  double[] ymv = new double[prob.Count];


1351  double mae = 0;


1352 


1353  Parameter newparam = (Parameter)param.Clone();


1354  newparam.Probability = false;


1355  svm_cross_validation(prob, newparam, nr_fold, ymv, true);


1356  for (i = 0; i < prob.Count; i++) {


1357  ymv[i] = prob.Y[i]  ymv[i];


1358  mae += Math.Abs(ymv[i]);


1359  }


1360  mae /= prob.Count;


1361  double std = Math.Sqrt(2 * mae * mae);


1362  int count = 0;


1363  mae = 0;


1364  for (i = 0; i < prob.Count; i++)


1365  if (Math.Abs(ymv[i]) > 5 * std)


1366  count = count + 1;


1367  else


1368  mae += Math.Abs(ymv[i]);


1369  mae /= (prob.Count  count);


1370  Procedures.info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(z/sigma)/(2sigma),sigma=" + mae + "\n");


1371  return mae;


1372  }


1373 


1374  // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data


1375  // perm, length l, must be allocated before calling this subroutine


1376  private static void svm_group_classes(Problem prob, int[] nr_class_ret, int[][] label_ret, int[][] start_ret, int[][] count_ret, int[] perm) {


1377  int l = prob.Count;


1378  int Max_nr_class = 16;


1379  int nr_class = 0;


1380  int[] label = new int[Max_nr_class];


1381  int[] count = new int[Max_nr_class];


1382  int[] data_label = new int[l];


1383  int i;


1384 


1385  for (i = 0; i < l; i++) {


1386  int this_label = (int)(prob.Y[i]);


1387  int j;


1388  for (j = 0; j < nr_class; j++) {


1389  if (this_label == label[j]) {


1390  ++count[j];


1391  break;


1392  }


1393  }


1394  data_label[i] = j;


1395  if (j == nr_class) {


1396  if (nr_class == Max_nr_class) {


1397  Max_nr_class *= 2;


1398  int[] new_data = new int[Max_nr_class];


1399  Array.Copy(label, 0, new_data, 0, label.Length);


1400  label = new_data;


1401  new_data = new int[Max_nr_class];


1402  Array.Copy(count, 0, new_data, 0, count.Length);


1403  count = new_data;


1404  }


1405  label[nr_class] = this_label;


1406  count[nr_class] = 1;


1407  ++nr_class;


1408  }


1409  }


1410 


1411  int[] start = new int[nr_class];


1412  start[0] = 0;


1413  for (i = 1; i < nr_class; i++)


1414  start[i] = start[i  1] + count[i  1];


1415  for (i = 0; i < l; i++) {


1416  perm[start[data_label[i]]] = i;


1417  ++start[data_label[i]];


1418  }


1419  start[0] = 0;


1420  for (i = 1; i < nr_class; i++)


1421  start[i] = start[i  1] + count[i  1];


1422 


1423  nr_class_ret[0] = nr_class;


1424  label_ret[0] = label;


1425  start_ret[0] = start;


1426  count_ret[0] = count;


1427  }


1428 


1429  //


1430  // Interface functions


1431  //


1432  public static Model svm_train(Problem prob, Parameter param) {


1433  Model model = new Model();


1434  model.Parameter = param;


1435 


1436  if (param.SvmType == SvmType.ONE_CLASS 


1437  param.SvmType == SvmType.EPSILON_SVR 


1438  param.SvmType == SvmType.NU_SVR) {


1439  // regression or oneclasssvm


1440  model.NumberOfClasses = 2;


1441  model.ClassLabels = null;


1442  model.NumberOfSVPerClass = null;


1443  model.PairwiseProbabilityA = null; model.PairwiseProbabilityB = null;


1444  model.SupportVectorCoefficients = new double[1][];


1445 


1446  if (param.Probability &&


1447  (param.SvmType == SvmType.EPSILON_SVR 


1448  param.SvmType == SvmType.NU_SVR)) {


1449  model.PairwiseProbabilityA = new double[1];


1450  model.PairwiseProbabilityA[0] = svm_svr_probability(prob, param);


1451  }


1452 


1453  decision_function f = svm_train_one(prob, param, 0, 0);


1454  model.Rho = new double[1];


1455  model.Rho[0] = f.rho;


1456 


1457  int nSV = 0;


1458  int i;


1459  for (i = 0; i < prob.Count; i++)


1460  if (Math.Abs(f.alpha[i]) > 0) ++nSV;


1461  model.SupportVectorCount = nSV;


1462  model.SupportVectors = new Node[nSV][];


1463  model.SupportVectorCoefficients[0] = new double[nSV];


1464 


1465  int j = 0;


1466  for (i = 0; i < prob.Count; i++)


1467  if (Math.Abs(f.alpha[i]) > 0) {


1468  model.SupportVectors[j] = prob.X[i];


1469  model.SupportVectorCoefficients[0][j] = f.alpha[i];


1470 


1471  ++j;


1472  }


1473  } else {


1474  // classification


1475  int l = prob.Count;


1476  int[] tmp_nr_class = new int[1];


1477  int[][] tmp_label = new int[1][];


1478  int[][] tmp_start = new int[1][];


1479  int[][] tmp_count = new int[1][];


1480  int[] perm = new int[l];


1481 


1482  // group training data of the same class


1483  svm_group_classes(prob, tmp_nr_class, tmp_label, tmp_start, tmp_count, perm);


1484  int nr_class = tmp_nr_class[0];


1485  int[] label = tmp_label[0];


1486  int[] start = tmp_start[0];


1487  int[] count = tmp_count[0];


1488  Node[][] x = new Node[l][];


1489  int i;


1490  for (i = 0; i < l; i++)


1491  x[i] = prob.X[perm[i]];


1492 


1493  // calculate weighted C


1494 


1495  double[] weighted_C = new double[nr_class];


1496  for (i = 0; i < nr_class; i++)


1497  weighted_C[i] = param.C;


1498  foreach (int weightedLabel in param.Weights.Keys) {


1499  int index = Array.IndexOf<int>(label, weightedLabel);


1500  if (index < 0)


1501  Console.Error.WriteLine("warning: class label " + weightedLabel + " specified in weight is not found");


1502  else weighted_C[index] *= param.Weights[weightedLabel];


1503  }


1504 


1505  // train k*(k1)/2 models


1506 


1507  bool[] nonzero = new bool[l];


1508  for (i = 0; i < l; i++)


1509  nonzero[i] = false;


1510  decision_function[] f = new decision_function[nr_class * (nr_class  1) / 2];


1511 


1512  double[] probA = null, probB = null;


1513  if (param.Probability) {


1514  probA = new double[nr_class * (nr_class  1) / 2];


1515  probB = new double[nr_class * (nr_class  1) / 2];


1516  }


1517 


1518  int p = 0;


1519  for (i = 0; i < nr_class; i++)


1520  for (int j = i + 1; j < nr_class; j++) {


1521  Problem sub_prob = new Problem();


1522  int si = start[i], sj = start[j];


1523  int ci = count[i], cj = count[j];


1524  sub_prob.Count = ci + cj;


1525  sub_prob.X = new Node[sub_prob.Count][];


1526  sub_prob.Y = new double[sub_prob.Count];


1527  int k;


1528  for (k = 0; k < ci; k++) {


1529  sub_prob.X[k] = x[si + k];


1530  sub_prob.Y[k] = +1;


1531  }


1532  for (k = 0; k < cj; k++) {


1533  sub_prob.X[ci + k] = x[sj + k];


1534  sub_prob.Y[ci + k] = 1;


1535  }


1536 


1537  if (param.Probability) {


1538  double[] probAB = new double[2];


1539  svm_binary_svc_probability(sub_prob, param, weighted_C[i], weighted_C[j], probAB);


1540  probA[p] = probAB[0];


1541  probB[p] = probAB[1];


1542  }


1543 


1544  f[p] = svm_train_one(sub_prob, param, weighted_C[i], weighted_C[j]);


1545  for (k = 0; k < ci; k++)


1546  if (!nonzero[si + k] && Math.Abs(f[p].alpha[k]) > 0)


1547  nonzero[si + k] = true;


1548  for (k = 0; k < cj; k++)


1549  if (!nonzero[sj + k] && Math.Abs(f[p].alpha[ci + k]) > 0)


1550  nonzero[sj + k] = true;


1551  ++p;


1552  }


1553 


1554  // build output


1555 


1556  model.NumberOfClasses = nr_class;


1557 


1558  model.ClassLabels = new int[nr_class];


1559  for (i = 0; i < nr_class; i++)


1560  model.ClassLabels[i] = label[i];


1561 


1562  model.Rho = new double[nr_class * (nr_class  1) / 2];


1563  for (i = 0; i < nr_class * (nr_class  1) / 2; i++)


1564  model.Rho[i] = f[i].rho;


1565 


1566  if (param.Probability) {


1567  model.PairwiseProbabilityA = new double[nr_class * (nr_class  1) / 2];


1568  model.PairwiseProbabilityB = new double[nr_class * (nr_class  1) / 2];


1569  for (i = 0; i < nr_class * (nr_class  1) / 2; i++) {


1570  model.PairwiseProbabilityA[i] = probA[i];


1571  model.PairwiseProbabilityB[i] = probB[i];


1572  }


1573  } else {


1574  model.PairwiseProbabilityA = null;


1575  model.PairwiseProbabilityB = null;


1576  }


1577 


1578  int nnz = 0;


1579  int[] nz_count = new int[nr_class];


1580  model.NumberOfSVPerClass = new int[nr_class];


1581  for (i = 0; i < nr_class; i++) {


1582  int nSV = 0;


1583  for (int j = 0; j < count[i]; j++)


1584  if (nonzero[start[i] + j]) {


1585  ++nSV;


1586  ++nnz;


1587  }


1588  model.NumberOfSVPerClass[i] = nSV;


1589  nz_count[i] = nSV;


1590  }


1591 


1592  Procedures.info("Total nSV = " + nnz + "\n");


1593 


1594  model.SupportVectorCount = nnz;


1595  model.SupportVectors = new Node[nnz][];


1596  p = 0;


1597  for (i = 0; i < l; i++) {


1598  if (nonzero[i]) {


1599  model.SupportVectors[p] = x[i];


1600  p++;


1601  }


1602  }


1603 


1604  int[] nz_start = new int[nr_class];


1605  nz_start[0] = 0;


1606  for (i = 1; i < nr_class; i++)


1607  nz_start[i] = nz_start[i  1] + nz_count[i  1];


1608 


1609  model.SupportVectorCoefficients = new double[nr_class  1][];


1610  for (i = 0; i < nr_class  1; i++)


1611  model.SupportVectorCoefficients[i] = new double[nnz];


1612 


1613  p = 0;


1614  for (i = 0; i < nr_class; i++)


1615  for (int j = i + 1; j < nr_class; j++) {


1616  // classifier (i,j): coefficients with


1617  // i are in sv_coef[j1][nz_start[i]...],


1618  // j are in sv_coef[i][nz_start[j]...]


1619 


1620  int si = start[i];


1621  int sj = start[j];


1622  int ci = count[i];


1623  int cj = count[j];


1624 


1625  int q = nz_start[i];


1626  int k;


1627  for (k = 0; k < ci; k++)


1628  if (nonzero[si + k])


1629  model.SupportVectorCoefficients[j  1][q++] = f[p].alpha[k];


1630  q = nz_start[j];


1631  for (k = 0; k < cj; k++)


1632  if (nonzero[sj + k])


1633  model.SupportVectorCoefficients[i][q++] = f[p].alpha[ci + k];


1634  ++p;


1635  }


1636  }


1637  return model;


1638  }


1639 


1640  // Stratified cross validation


1641  public static void svm_cross_validation(Problem prob, Parameter param, int nr_fold, double[] target, bool shuffleTraining) {


1642  Random rand = new Random();


1643  int i;


1644  int[] fold_start = new int[nr_fold + 1];


1645  int l = prob.Count;


1646  int[] perm = new int[l];


1647 


1648  // stratified cv may not give leaveoneout rate


1649  // Each class to l folds > some folds may have zero elements


1650  if ((param.SvmType == SvmType.C_SVC 


1651  param.SvmType == SvmType.NU_SVC) && nr_fold < l) {


1652  int[] tmp_nr_class = new int[1];


1653  int[][] tmp_label = new int[1][];


1654  int[][] tmp_start = new int[1][];


1655  int[][] tmp_count = new int[1][];


1656 


1657  svm_group_classes(prob, tmp_nr_class, tmp_label, tmp_start, tmp_count, perm);


1658 


1659  int nr_class = tmp_nr_class[0];


1660  int[] label = tmp_label[0];


1661  int[] start = tmp_start[0];


1662  int[] count = tmp_count[0];


1663 


1664  // random shuffle and then data grouped by fold using the array perm


1665  int[] fold_count = new int[nr_fold];


1666  int c;


1667  int[] index = new int[l];


1668  for (i = 0; i < l; i++)


1669  index[i] = perm[i];


1670  for (c = 0; c < nr_class; c++)


1671  for (i = 0; i < count[c]; i++) {


1672  int j = i + (int)(rand.NextDouble() * (count[c]  i));


1673  do { int _ = index[start[c] + j]; index[start[c] + j] = index[start[c] + i]; index[start[c] + i] = _; } while (false);


1674  }


1675  for (i = 0; i < nr_fold; i++) {


1676  fold_count[i] = 0;


1677  for (c = 0; c < nr_class; c++)


1678  fold_count[i] += (i + 1) * count[c] / nr_fold  i * count[c] / nr_fold;


1679  }


1680  fold_start[0] = 0;


1681  for (i = 1; i <= nr_fold; i++)


1682  fold_start[i] = fold_start[i  1] + fold_count[i  1];


1683  for (c = 0; c < nr_class; c++)


1684  for (i = 0; i < nr_fold; i++) {


1685  int begin = start[c] + i * count[c] / nr_fold;


1686  int end = start[c] + (i + 1) * count[c] / nr_fold;


1687  for (int j = begin; j < end; j++) {


1688  perm[fold_start[i]] = index[j];


1689  fold_start[i]++;


1690  }


1691  }


1692  fold_start[0] = 0;


1693  for (i = 1; i <= nr_fold; i++)


1694  fold_start[i] = fold_start[i  1] + fold_count[i  1];


1695  } else {


1696  for (i = 0; i < l; i++) perm[i] = i;


1697  if (shuffleTraining) {


1698  for (i = 0; i < l; i++) {


1699  int j = i + (int)(rand.NextDouble() * (l  i));


1700  do { int _ = perm[i]; perm[i] = perm[j]; perm[j] = _; } while (false);


1701  }


1702  }


1703  for (i = 0; i <= nr_fold; i++)


1704  fold_start[i] = i * l / nr_fold;


1705  }


1706 


1707  for (i = 0; i < nr_fold; i++) {


1708  int begin = fold_start[i];


1709  int end = fold_start[i + 1];


1710  int j, k;


1711  Problem subprob = new Problem();


1712 


1713  subprob.Count = l  (end  begin);


1714  subprob.X = new Node[subprob.Count][];


1715  subprob.Y = new double[subprob.Count];


1716 


1717  k = 0;


1718  for (j = 0; j < begin; j++) {


1719  subprob.X[k] = prob.X[perm[j]];


1720  subprob.Y[k] = prob.Y[perm[j]];


1721  ++k;


1722  }


1723  for (j = end; j < l; j++) {


1724  subprob.X[k] = prob.X[perm[j]];


1725  subprob.Y[k] = prob.Y[perm[j]];


1726  ++k;


1727  }


1728  Model submodel = svm_train(subprob, param);


1729  if (param.Probability &&


1730  (param.SvmType == SvmType.C_SVC 


1731  param.SvmType == SvmType.NU_SVC)) {


1732  double[] prob_estimates = new double[svm_get_nr_class(submodel)];


1733  for (j = begin; j < end; j++)


1734  target[perm[j]] = svm_predict_probability(submodel, prob.X[perm[j]], prob_estimates);


1735  } else


1736  for (j = begin; j < end; j++)


1737  target[perm[j]] = svm_predict(submodel, prob.X[perm[j]]);


1738  }


1739  }


1740 


1741  public static SvmType svm_get_svm_type(Model model) {


1742  return model.Parameter.SvmType;


1743  }


1744 


1745  public static int svm_get_nr_class(Model model) {


1746  return model.NumberOfClasses;


1747  }


1748 


1749  public static void svm_get_labels(Model model, int[] label) {


1750  if (model.ClassLabels != null)


1751  for (int i = 0; i < model.NumberOfClasses; i++)


1752  label[i] = model.ClassLabels[i];


1753  }


1754 


1755  public static double svm_get_svr_probability(Model model) {


1756  if ((model.Parameter.SvmType == SvmType.EPSILON_SVR  model.Parameter.SvmType == SvmType.NU_SVR) &&


1757  model.PairwiseProbabilityA != null)


1758  return model.PairwiseProbabilityA[0];


1759  else {


1760  Console.Error.WriteLine("Model doesn't contain information for SVR probability inference");


1761  return 0;


1762  }


1763  }


1764 


1765  public static void svm_predict_values(Model model, Node[] x, double[] dec_values) {


1766  if (model.Parameter.SvmType == SvmType.ONE_CLASS 


1767  model.Parameter.SvmType == SvmType.EPSILON_SVR 


1768  model.Parameter.SvmType == SvmType.NU_SVR) {


1769  double[] sv_coef = model.SupportVectorCoefficients[0];


1770  double sum = 0;


1771  for (int i = 0; i < model.SupportVectorCount; i++)


1772  sum += sv_coef[i] * Kernel.KernelFunction(x, model.SupportVectors[i], model.Parameter);


1773  sum = model.Rho[0];


1774  dec_values[0] = sum;


1775  } else {


1776  int i;


1777  int nr_class = model.NumberOfClasses;


1778  int l = model.SupportVectorCount;


1779 


1780  double[] kvalue = new double[l];


1781  for (i = 0; i < l; i++)


1782  kvalue[i] = Kernel.KernelFunction(x, model.SupportVectors[i], model.Parameter);


1783 


1784  int[] start = new int[nr_class];


1785  start[0] = 0;


1786  for (i = 1; i < nr_class; i++)


1787  start[i] = start[i  1] + model.NumberOfSVPerClass[i  1];


1788 


1789  int p = 0;


1790  for (i = 0; i < nr_class; i++)


1791  for (int j = i + 1; j < nr_class; j++) {


1792  double sum = 0;


1793  int si = start[i];


1794  int sj = start[j];


1795  int ci = model.NumberOfSVPerClass[i];


1796  int cj = model.NumberOfSVPerClass[j];


1797 


1798  int k;


1799  double[] coef1 = model.SupportVectorCoefficients[j  1];


1800  double[] coef2 = model.SupportVectorCoefficients[i];


1801  for (k = 0; k < ci; k++)


1802  sum += coef1[si + k] * kvalue[si + k];


1803  for (k = 0; k < cj; k++)


1804  sum += coef2[sj + k] * kvalue[sj + k];


1805  sum = model.Rho[p];


1806  dec_values[p] = sum;


1807  p++;


1808  }


1809  }


1810  }


1811 


1812  public static double svm_predict(Model model, Node[] x) {


1813  if (model.Parameter.SvmType == SvmType.ONE_CLASS 


1814  model.Parameter.SvmType == SvmType.EPSILON_SVR 


1815  model.Parameter.SvmType == SvmType.NU_SVR) {


1816  double[] res = new double[1];


1817  svm_predict_values(model, x, res);


1818 


1819  if (model.Parameter.SvmType == SvmType.ONE_CLASS)


1820  return (res[0] > 0) ? 1 : 1;


1821  else


1822  return res[0];


1823  } else {


1824  int i;


1825  int nr_class = model.NumberOfClasses;


1826  double[] dec_values = new double[nr_class * (nr_class  1) / 2];


1827  svm_predict_values(model, x, dec_values);


1828 


1829  int[] vote = new int[nr_class];


1830  for (i = 0; i < nr_class; i++)


1831  vote[i] = 0;


1832  int pos = 0;


1833  for (i = 0; i < nr_class; i++)


1834  for (int j = i + 1; j < nr_class; j++) {


1835  if (dec_values[pos++] > 0)


1836  ++vote[i];


1837  else


1838  ++vote[j];


1839  }


1840 


1841  int vote_Max_idx = 0;


1842  for (i = 1; i < nr_class; i++)


1843  if (vote[i] > vote[vote_Max_idx])


1844  vote_Max_idx = i;


1845  return model.ClassLabels[vote_Max_idx];


1846  }


1847  }


1848 


1849  public static double svm_predict_probability(Model model, Node[] x, double[] prob_estimates) {


1850  if ((model.Parameter.SvmType == SvmType.C_SVC  model.Parameter.SvmType == SvmType.NU_SVC) &&


1851  model.PairwiseProbabilityA != null && model.PairwiseProbabilityB != null) {


1852  int i;


1853  int nr_class = model.NumberOfClasses;


1854  double[] dec_values = new double[nr_class * (nr_class  1) / 2];


1855  svm_predict_values(model, x, dec_values);


1856 


1857  double Min_prob = 1e7;


1858  double[,] pairwise_prob = new double[nr_class, nr_class];


1859 


1860  int k = 0;


1861  for (i = 0; i < nr_class; i++) {


1862  for (int j = i + 1; j < nr_class; j++) {


1863  pairwise_prob[i, j] = Math.Min(Math.Max(sigmoid_predict(dec_values[k], model.PairwiseProbabilityA[k], model.PairwiseProbabilityB[k]), Min_prob), 1  Min_prob);


1864  pairwise_prob[j, i] = 1  pairwise_prob[i, j];


1865  k++;


1866  }


1867  }


1868  multiclass_probability(nr_class, pairwise_prob, prob_estimates);


1869 


1870  int prob_Max_idx = 0;


1871  for (i = 1; i < nr_class; i++)


1872  if (prob_estimates[i] > prob_estimates[prob_Max_idx])


1873  prob_Max_idx = i;


1874  return model.ClassLabels[prob_Max_idx];


1875  } else


1876  return svm_predict(model, x);


1877  }


1878 


1879  public static string svm_check_parameter(Problem prob, Parameter param) {


1880  // svm_type


1881 


1882  SvmType svm_type = param.SvmType;


1883 


1884  // kernel_type, degree


1885 


1886  KernelType kernel_type = param.KernelType;


1887 


1888  if (param.Degree < 0)


1889  return "degree of polynomial kernel < 0";


1890 


1891  // cache_size,eps,C,nu,p,shrinking


1892 


1893  if (param.CacheSize <= 0)


1894  return "cache_size <= 0";


1895 


1896  if (param.EPS <= 0)


1897  return "eps <= 0";


1898 


1899  if (param.Gamma == 0)


1900  param.Gamma = 1.0 / prob.MaxIndex;


1901 


1902  if (svm_type == SvmType.C_SVC 


1903  svm_type == SvmType.EPSILON_SVR 


1904  svm_type == SvmType.NU_SVR)


1905  if (param.C <= 0)


1906  return "C <= 0";


1907 


1908  if (svm_type == SvmType.NU_SVC 


1909  svm_type == SvmType.ONE_CLASS 


1910  svm_type == SvmType.NU_SVR)


1911  if (param.Nu <= 0  param.Nu > 1)


1912  return "nu <= 0 or nu > 1";


1913 


1914  if (svm_type == SvmType.EPSILON_SVR)


1915  if (param.P < 0)


1916  return "p < 0";


1917 


1918  if (param.Probability &&


1919  svm_type == SvmType.ONE_CLASS)


1920  return "oneclass SVM probability output not supported yet";


1921 


1922  // check whether nusvc is feasible


1923 


1924  if (svm_type == SvmType.NU_SVC) {


1925  int l = prob.Count;


1926  int Max_nr_class = 16;


1927  int nr_class = 0;


1928  int[] label = new int[Max_nr_class];


1929  int[] count = new int[Max_nr_class];


1930 


1931  int i;


1932  for (i = 0; i < l; i++) {


1933  int this_label = (int)prob.Y[i];


1934  int j;


1935  for (j = 0; j < nr_class; j++)


1936  if (this_label == label[j]) {


1937  ++count[j];


1938  break;


1939  }


1940 


1941  if (j == nr_class) {


1942  if (nr_class == Max_nr_class) {


1943  Max_nr_class *= 2;


1944  int[] new_data = new int[Max_nr_class];


1945  Array.Copy(label, 0, new_data, 0, label.Length);


1946  label = new_data;


1947 


1948  new_data = new int[Max_nr_class];


1949  Array.Copy(count, 0, new_data, 0, count.Length);


1950  count = new_data;


1951  }


1952  label[nr_class] = this_label;


1953  count[nr_class] = 1;


1954  ++nr_class;


1955  }


1956  }


1957 


1958  for (i = 0; i < nr_class; i++) {


1959  int n1 = count[i];


1960  for (int j = i + 1; j < nr_class; j++) {


1961  int n2 = count[j];


1962  if (param.Nu * (n1 + n2) / 2 > Math.Min(n1, n2))


1963  return "specified nu is infeasible";


1964  }


1965  }


1966  }


1967 


1968  return null;


1969  }


1970 


1971  public static int svm_check_probability_model(Model model) {


1972  if (((model.Parameter.SvmType == SvmType.C_SVC  model.Parameter.SvmType == SvmType.NU_SVC) &&


1973  model.PairwiseProbabilityA != null && model.PairwiseProbabilityB != null) 


1974  ((model.Parameter.SvmType == SvmType.EPSILON_SVR  model.Parameter.SvmType == SvmType.NU_SVR) &&


1975  model.PairwiseProbabilityA != null))


1976  return 1;


1977  else


1978  return 0;


1979  }


1980  }


1981  }

