Context Navigation

source: trunk/sources/LibSVM/Solver.cs @ 1913

Visit:

Last change on this file since 1913 was 1819, checked in by mkommend, 16 years ago
created new project for LibSVM source files (ticket #619)
File size: 84.9 KB

Line
1	/*
2	* SVM.NET Library
3	* Copyright (C) 2008 Matthew Johnson
4	*
5	* This program is free software: you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation, either version 3 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
17	*/
18
19
20	using System;
21	using System.Collections.Generic;
22	using System.Diagnostics;
23
24	namespace SVM
25	{
26	//
27	// Kernel evaluation
28	//
29	// the static method k_function is for doing single kernel evaluation
30	// the constructor of Kernel prepares to calculate the l*l kernel matrix
31	// the member function get_Q is for getting one column from the Q Matrix
32	//
33	internal abstract class QMatrix
34	{
35	public abstract float[] get_Q(int column, int len);
36	public abstract float[] get_QD();
37	public abstract void swap_index(int i, int j);
38	}
39
40	internal abstract class Kernel : QMatrix
41	{
42	private Node[][] _x;
43	private double[] _x_square;
44
45	// Parameter
46	private KernelType kernel_type;
47	private int degree;
48	private double gamma;
49	private double coef0;
50
51	public override void swap_index(int i, int j)
52	{
53	do { Node[] _ = _x[i]; _x[i] = _x[j]; _x[j] = _; } while (false);
54	if (_x_square != null) do { double _ = _x_square[i]; _x_square[i] = _x_square[j]; _x_square[j] = _; } while (false);
55	}
56
57	private static double powi(double baseValue, int times)
58	{
59	double tmp = baseValue, ret = 1.0;
60
61	for (int t = times; t > 0; t /= 2)
62	{
63	if (t % 2 == 1) ret *= tmp;
64	tmp = tmp * tmp;
65	}
66	return ret;
67	}
68
69	private static double tanh(double x)
70	{
71	double e = Math.Exp(x);
72	return 1.0 - 2.0 / (e * e + 1);
73	}
74
75	public double kernel_function(int i, int j)
76	{
77	switch (kernel_type)
78	{
79	case KernelType.LINEAR:
80	return dot(_x[i], _x[j]);
81	case KernelType.POLY:
82	return powi(gamma * dot(_x[i], _x[j]) + coef0, degree);
83	case KernelType.RBF:
84	return Math.Exp(-gamma * (_x_square[i] + _x_square[j] - 2 * dot(_x[i], _x[j])));
85	case KernelType.SIGMOID:
86	return tanh(gamma * dot(_x[i], _x[j]) + coef0);
87	case KernelType.PRECOMPUTED:
88	return _x[i][(int)(_x[j][0].Value)].Value;
89	default:
90	return 0;
91	}
92	}
93
94	public Kernel(int l, Node[][] x_, Parameter param)
95	{
96	this.kernel_type = param.KernelType;
97	this.degree = param.Degree;
98	this.gamma = param.Gamma;
99	this.coef0 = param.Coefficient0;
100
101	_x = (Node[][])x_.Clone();
102
103	if (kernel_type == KernelType.RBF)
104	{
105	_x_square = new double[l];
106	for (int i = 0; i < l; i++)
107	_x_square[i] = dot(_x[i], _x[i]);
108	}
109	else _x_square = null;
110	}
111
112	public static double dot(Node[] x, Node[] y)
113	{
114	double sum = 0;
115	int xlen = x.Length;
116	int ylen = y.Length;
117	int i = 0;
118	int j = 0;
119	while (i < xlen && j < ylen)
120	{
121	if (x[i].Index == y[j].Index)
122	sum += x[i++].Value * y[j++].Value;
123	else
124	{
125	if (x[i].Index > y[j].Index)
126	++j;
127	else
128	++i;
129	}
130	}
131	return sum;
132	}
133
134	public static double k_function(Node[] x, Node[] y, Parameter param)
135	{
136	switch (param.KernelType)
137	{
138	case KernelType.LINEAR:
139	return dot(x, y);
140	case KernelType.POLY:
141	return powi(param.Gamma * dot(x, y) + param.Coefficient0, param.Degree);
142	case KernelType.RBF:
143	{
144	double sum = 0;
145	int xlen = x.Length;
146	int ylen = y.Length;
147	int i = 0;
148	int j = 0;
149	while (i < xlen && j < ylen)
150	{
151	if (x[i].Index == y[j].Index)
152	{
153	double d = x[i++].Value - y[j++].Value;
154	sum += d * d;
155	}
156	else if (x[i].Index > y[j].Index)
157	{
158	sum += y[j].Value * y[j].Value;
159	++j;
160	}
161	else
162	{
163	sum += x[i].Value * x[i].Value;
164	++i;
165	}
166	}
167
168	while (i < xlen)
169	{
170	sum += x[i].Value * x[i].Value;
171	++i;
172	}
173
174	while (j < ylen)
175	{
176	sum += y[j].Value * y[j].Value;
177	++j;
178	}
179
180	return Math.Exp(-param.Gamma * sum);
181	}
182	case KernelType.SIGMOID:
183	return tanh(param.Gamma * dot(x, y) + param.Coefficient0);
184	case KernelType.PRECOMPUTED:
185	return x[(int)(y[0].Value)].Value;
186	default:
187	return 0;
188	}
189	}
190	}
191
192	// An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
193	// Solves:
194	//
195	// min 0.5(\alpha^T Q \alpha) + p^T \alpha
196	//
197	// y^T \alpha = \delta
198	// y_i = +1 or -1
199	// 0 <= alpha_i <= Cp for y_i = 1
200	// 0 <= alpha_i <= Cn for y_i = -1
201	//
202	// Given:
203	//
204	// Q, p, y, Cp, Cn, and an initial feasible point \alpha
205	// l is the size of vectors and matrices
206	// eps is the stopping tolerance
207	//
208	// solution will be put in \alpha, objective value will be put in obj
209	//
210	internal class Solver
211	{
212	protected int active_size;
213	protected short[] y;
214	protected double[] G; // gradient of objective function
215	protected const byte LOWER_BOUND = 0;
216	protected const byte UPPER_BOUND = 1;
217	protected const byte FREE = 2;
218	protected byte[] alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
219	protected double[] alpha;
220	protected QMatrix Q;
221	protected float[] QD;
222	protected double eps;
223	protected double Cp, Cn;
224	protected double[] p;
225	protected int[] active_set;
226	protected double[] G_bar; // gradient, if we treat free variables as 0
227	protected int l;
228	protected bool unshrinked; // XXX
229
230	protected const double INF = double.PositiveInfinity;
231
232	protected double get_C(int i)
233	{
234	return (y[i] > 0) ? Cp : Cn;
235	}
236	protected void update_alpha_status(int i)
237	{
238	if (alpha[i] >= get_C(i))
239	alpha_status[i] = UPPER_BOUND;
240	else if (alpha[i] <= 0)
241	alpha_status[i] = LOWER_BOUND;
242	else alpha_status[i] = FREE;
243	}
244	protected bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
245	protected bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
246	protected bool is_free(int i) { return alpha_status[i] == FREE; }
247
248	// java: information about solution except alpha,
249	// because we cannot return multiple values otherwise...
250	internal class SolutionInfo
251	{
252	public double obj;
253	public double rho;
254	public double upper_bound_p;
255	public double upper_bound_n;
256	public double r; // for Solver_NU
257	}
258
259	protected void swap_index(int i, int j)
260	{
261	Q.swap_index(i, j);
262	do { short _ = y[i]; y[i] = y[j]; y[j] = _; } while (false);
263	do { double _ = G[i]; G[i] = G[j]; G[j] = _; } while (false);
264	do { byte _ = alpha_status[i]; alpha_status[i] = alpha_status[j]; alpha_status[j] = _; } while (false);
265	do { double _ = alpha[i]; alpha[i] = alpha[j]; alpha[j] = _; } while (false);
266	do { double _ = p[i]; p[i] = p[j]; p[j] = _; } while (false);
267	do { int _ = active_set[i]; active_set[i] = active_set[j]; active_set[j] = _; } while (false);
268	do { double _ = G_bar[i]; G_bar[i] = G_bar[j]; G_bar[j] = _; } while (false);
269	}
270
271	protected void reconstruct_gradient()
272	{
273	// reconstruct inactive elements of G from G_bar and free variables
274
275	if (active_size == l) return;
276
277	int i;
278	for (i = active_size; i < l; i++)
279	G[i] = G_bar[i] + p[i];
280
281	for (i = 0; i < active_size; i++)
282	if (is_free(i))
283	{
284	float[] Q_i = Q.get_Q(i, l);
285	double alpha_i = alpha[i];
286	for (int j = active_size; j < l; j++)
287	G[j] += alpha_i * Q_i[j];
288	}
289	}
290
291	public virtual void Solve(int l, QMatrix Q, double[] p_, short[] y_,
292	double[] alpha_, double Cp, double Cn, double eps, SolutionInfo si, bool shrinking)
293	{
294	this.l = l;
295	this.Q = Q;
296	QD = Q.get_QD();
297	p = (double[])p_.Clone();
298	y = (short[])y_.Clone();
299	alpha = (double[])alpha_.Clone();
300	this.Cp = Cp;
301	this.Cn = Cn;
302	this.eps = eps;
303	this.unshrinked = false;
304
305	// initialize alpha_status
306	{
307	alpha_status = new byte[l];
308	for (int i = 0; i < l; i++)
309	update_alpha_status(i);
310	}
311
312	// initialize active set (for shrinking)
313	{
314	active_set = new int[l];
315	for (int i = 0; i < l; i++)
316	active_set[i] = i;
317	active_size = l;
318	}
319
320	// initialize gradient
321	{
322	G = new double[l];
323	G_bar = new double[l];
324	int i;
325	for (i = 0; i < l; i++)
326	{
327	G[i] = p[i];
328	G_bar[i] = 0;
329	}
330	for (i = 0; i < l; i++)
331	if (!is_lower_bound(i))
332	{
333	float[] Q_i = Q.get_Q(i, l);
334	double alpha_i = alpha[i];
335	int j;
336	for (j = 0; j < l; j++)
337	G[j] += alpha_i * Q_i[j];
338	if (is_upper_bound(i))
339	for (j = 0; j < l; j++)
340	G_bar[j] += get_C(i) * Q_i[j];
341	}
342	}
343
344	// optimization step
345
346	int iter = 0;
347	int counter = Math.Min(l, 1000) + 1;
348	int[] working_set = new int[2];
349
350	while (true)
351	{
352	// show progress and do shrinking
353
354	if (--counter == 0)
355	{
356	counter = Math.Min(l, 1000);
357	if (shrinking) do_shrinking();
358	Debug.Write(".");
359	}
360
361	if (select_working_set(working_set) != 0)
362	{
363	// reconstruct the whole gradient
364	reconstruct_gradient();
365	// reset active set size and check
366	active_size = l;
367	Debug.Write("*");
368	if (select_working_set(working_set) != 0)
369	break;
370	else
371	counter = 1; // do shrinking next iteration
372	}
373
374	int i = working_set[0];
375	int j = working_set[1];
376
377	++iter;
378
379	// update alpha[i] and alpha[j], handle bounds carefully
380
381	float[] Q_i = Q.get_Q(i, active_size);
382	float[] Q_j = Q.get_Q(j, active_size);
383
384	double C_i = get_C(i);
385	double C_j = get_C(j);
386
387	double old_alpha_i = alpha[i];
388	double old_alpha_j = alpha[j];
389
390	if (y[i] != y[j])
391	{
392	double quad_coef = Q_i[i] + Q_j[j] + 2 * Q_i[j];
393	if (quad_coef <= 0)
394	quad_coef = 1e-12;
395	double delta = (-G[i] - G[j]) / quad_coef;
396	double diff = alpha[i] - alpha[j];
397	alpha[i] += delta;
398	alpha[j] += delta;
399
400	if (diff > 0)
401	{
402	if (alpha[j] < 0)
403	{
404	alpha[j] = 0;
405	alpha[i] = diff;
406	}
407	}
408	else
409	{
410	if (alpha[i] < 0)
411	{
412	alpha[i] = 0;
413	alpha[j] = -diff;
414	}
415	}
416	if (diff > C_i - C_j)
417	{
418	if (alpha[i] > C_i)
419	{
420	alpha[i] = C_i;
421	alpha[j] = C_i - diff;
422	}
423	}
424	else
425	{
426	if (alpha[j] > C_j)
427	{
428	alpha[j] = C_j;
429	alpha[i] = C_j + diff;
430	}
431	}
432	}
433	else
434	{
435	double quad_coef = Q_i[i] + Q_j[j] - 2 * Q_i[j];
436	if (quad_coef <= 0)
437	quad_coef = 1e-12;
438	double delta = (G[i] - G[j]) / quad_coef;
439	double sum = alpha[i] + alpha[j];
440	alpha[i] -= delta;
441	alpha[j] += delta;
442
443	if (sum > C_i)
444	{
445	if (alpha[i] > C_i)
446	{
447	alpha[i] = C_i;
448	alpha[j] = sum - C_i;
449	}
450	}
451	else
452	{
453	if (alpha[j] < 0)
454	{
455	alpha[j] = 0;
456	alpha[i] = sum;
457	}
458	}
459	if (sum > C_j)
460	{
461	if (alpha[j] > C_j)
462	{
463	alpha[j] = C_j;
464	alpha[i] = sum - C_j;
465	}
466	}
467	else
468	{
469	if (alpha[i] < 0)
470	{
471	alpha[i] = 0;
472	alpha[j] = sum;
473	}
474	}
475	}
476
477	// update G
478
479	double delta_alpha_i = alpha[i] - old_alpha_i;
480	double delta_alpha_j = alpha[j] - old_alpha_j;
481
482	for (int k = 0; k < active_size; k++)
483	{
484	G[k] += Q_i[k] * delta_alpha_i + Q_j[k] * delta_alpha_j;
485	}
486
487	// update alpha_status and G_bar
488
489	{
490	bool ui = is_upper_bound(i);
491	bool uj = is_upper_bound(j);
492	update_alpha_status(i);
493	update_alpha_status(j);
494	int k;
495	if (ui != is_upper_bound(i))
496	{
497	Q_i = Q.get_Q(i, l);
498	if (ui)
499	for (k = 0; k < l; k++)
500	G_bar[k] -= C_i * Q_i[k];
501	else
502	for (k = 0; k < l; k++)
503	G_bar[k] += C_i * Q_i[k];
504	}
505
506	if (uj != is_upper_bound(j))
507	{
508	Q_j = Q.get_Q(j, l);
509	if (uj)
510	for (k = 0; k < l; k++)
511	G_bar[k] -= C_j * Q_j[k];
512	else
513	for (k = 0; k < l; k++)
514	G_bar[k] += C_j * Q_j[k];
515	}
516	}
517
518	}
519
520	// calculate rho
521
522	si.rho = calculate_rho();
523
524	// calculate objective value
525	{
526	double v = 0;
527	int i;
528	for (i = 0; i < l; i++)
529	v += alpha[i] * (G[i] + p[i]);
530
531	si.obj = v / 2;
532	}
533
534	// put back the solution
535	{
536	for (int i = 0; i < l; i++)
537	alpha_[active_set[i]] = alpha[i];
538	}
539
540	si.upper_bound_p = Cp;
541	si.upper_bound_n = Cn;
542
543	Debug.Write("\noptimization finished, #iter = " + iter + "\n");
544	}
545
546	// return 1 if already optimal, return 0 otherwise
547	protected virtual int select_working_set(int[] working_set)
548	{
549	// return i,j such that
550	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
551	// j: mimimizes the decrease of obj value
552	// (if quadratic coefficeint <= 0, replace it with tau)
553	// -y_jgrad(f)_j < -y_igrad(f)_i, j in I_low(\alpha)
554
555	double Gmax = -INF;
556	double Gmax2 = -INF;
557	int Gmax_idx = -1;
558	int Gmin_idx = -1;
559	double obj_diff_min = INF;
560
561	for (int t = 0; t < active_size; t++)
562	if (y[t] == +1)
563	{
564	if (!is_upper_bound(t))
565	if (-G[t] >= Gmax)
566	{
567	Gmax = -G[t];
568	Gmax_idx = t;
569	}
570	}
571	else
572	{
573	if (!is_lower_bound(t))
574	if (G[t] >= Gmax)
575	{
576	Gmax = G[t];
577	Gmax_idx = t;
578	}
579	}
580
581	int i = Gmax_idx;
582	float[] Q_i = null;
583	if (i != -1) // null Q_i not accessed: Gmax=-INF if i=-1
584	Q_i = Q.get_Q(i, active_size);
585
586	for (int j = 0; j < active_size; j++)
587	{
588	if (y[j] == +1)
589	{
590	if (!is_lower_bound(j))
591	{
592	double grad_diff = Gmax + G[j];
593	if (G[j] >= Gmax2)
594	Gmax2 = G[j];
595	if (grad_diff > 0)
596	{
597	double obj_diff;
598	double quad_coef = Q_i[i] + QD[j] - 2 * y[i] * Q_i[j];
599	if (quad_coef > 0)
600	obj_diff = -(grad_diff * grad_diff) / quad_coef;
601	else
602	obj_diff = -(grad_diff * grad_diff) / 1e-12;
603
604	if (obj_diff <= obj_diff_min)
605	{
606	Gmin_idx = j;
607	obj_diff_min = obj_diff;
608	}
609	}
610	}
611	}
612	else
613	{
614	if (!is_upper_bound(j))
615	{
616	double grad_diff = Gmax - G[j];
617	if (-G[j] >= Gmax2)
618	Gmax2 = -G[j];
619	if (grad_diff > 0)
620	{
621	double obj_diff;
622	double quad_coef = Q_i[i] + QD[j] + 2 * y[i] * Q_i[j];
623	if (quad_coef > 0)
624	obj_diff = -(grad_diff * grad_diff) / quad_coef;
625	else
626	obj_diff = -(grad_diff * grad_diff) / 1e-12;
627
628	if (obj_diff <= obj_diff_min)
629	{
630	Gmin_idx = j;
631	obj_diff_min = obj_diff;
632	}
633	}
634	}
635	}
636	}
637
638	if (Gmax + Gmax2 < eps)
639	return 1;
640
641	working_set[0] = Gmax_idx;
642	working_set[1] = Gmin_idx;
643	return 0;
644	}
645
646	private bool be_shrunken(int i, double Gmax1, double Gmax2)
647	{
648	if (is_upper_bound(i))
649	{
650	if (y[i] == +1)
651	return (-G[i] > Gmax1);
652	else
653	return (-G[i] > Gmax2);
654	}
655	else if (is_lower_bound(i))
656	{
657	if (y[i] == +1)
658	return (G[i] > Gmax2);
659	else
660	return (G[i] > Gmax1);
661	}
662	else
663	return (false);
664	}
665
666	protected virtual void do_shrinking()
667	{
668	int i;
669	double Gmax1 = -INF; // max { -y_i * grad(f)_i \| i in I_up(\alpha) }
670	double Gmax2 = -INF; // max { y_i * grad(f)_i \| i in I_low(\alpha) }
671
672	// find maximal violating pair first
673	for (i = 0; i < active_size; i++)
674	{
675	if (y[i] == +1)
676	{
677	if (!is_upper_bound(i))
678	{
679	if (-G[i] >= Gmax1)
680	Gmax1 = -G[i];
681	}
682	if (!is_lower_bound(i))
683	{
684	if (G[i] >= Gmax2)
685	Gmax2 = G[i];
686	}
687	}
688	else
689	{
690	if (!is_upper_bound(i))
691	{
692	if (-G[i] >= Gmax2)
693	Gmax2 = -G[i];
694	}
695	if (!is_lower_bound(i))
696	{
697	if (G[i] >= Gmax1)
698	Gmax1 = G[i];
699	}
700	}
701	}
702
703	// shrink
704
705	for (i = 0; i < active_size; i++)
706	if (be_shrunken(i, Gmax1, Gmax2))
707	{
708	active_size--;
709	while (active_size > i)
710	{
711	if (!be_shrunken(active_size, Gmax1, Gmax2))
712	{
713	swap_index(i, active_size);
714	break;
715	}
716	active_size--;
717	}
718	}
719
720	// unshrink, check all variables again before sealed iterations
721
722	if (unshrinked \|\| Gmax1 + Gmax2 > eps * 10) return;
723
724	unshrinked = true;
725	reconstruct_gradient();
726
727	for (i = l - 1; i >= active_size; i--)
728	if (!be_shrunken(i, Gmax1, Gmax2))
729	{
730	while (active_size < i)
731	{
732	if (be_shrunken(active_size, Gmax1, Gmax2))
733	{
734	swap_index(i, active_size);
735	break;
736	}
737	active_size++;
738	}
739	active_size++;
740	}
741	}
742
743	protected virtual double calculate_rho()
744	{
745	double r;
746	int nr_free = 0;
747	double ub = INF, lb = -INF, sum_free = 0;
748	for (int i = 0; i < active_size; i++)
749	{
750	double yG = y[i] * G[i];
751
752	if (is_lower_bound(i))
753	{
754	if (y[i] > 0)
755	ub = Math.Min(ub, yG);
756	else
757	lb = Math.Max(lb, yG);
758	}
759	else if (is_upper_bound(i))
760	{
761	if (y[i] < 0)
762	ub = Math.Min(ub, yG);
763	else
764	lb = Math.Max(lb, yG);
765	}
766	else
767	{
768	++nr_free;
769	sum_free += yG;
770	}
771	}
772
773	if (nr_free > 0)
774	r = sum_free / nr_free;
775	else
776	r = (ub + lb) / 2;
777
778	return r;
779	}
780
781	}
782
783	//
784	// Solver for nu-svm classification and regression
785	//
786	// additional constraint: e^T \alpha = constant
787	//
788	sealed class Solver_NU : Solver
789	{
790	private SolutionInfo si;
791
792	public override void Solve(int l, QMatrix Q, double[] p, short[] y,
793	double[] alpha, double Cp, double Cn, double eps,
794	SolutionInfo si, bool shrinking)
795	{
796	this.si = si;
797	base.Solve(l, Q, p, y, alpha, Cp, Cn, eps, si, shrinking);
798	}
799
800	// return 1 if already optimal, return 0 otherwise
801	protected override int select_working_set(int[] working_set)
802	{
803	// return i,j such that y_i = y_j and
804	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
805	// j: minimizes the decrease of obj value
806	// (if quadratic coefficeint <= 0, replace it with tau)
807	// -y_jgrad(f)_j < -y_igrad(f)_i, j in I_low(\alpha)
808
809	double Gmaxp = -INF;
810	double Gmaxp2 = -INF;
811	int Gmaxp_idx = -1;
812
813	double Gmaxn = -INF;
814	double Gmaxn2 = -INF;
815	int Gmaxn_idx = -1;
816
817	int Gmin_idx = -1;
818	double obj_diff_min = INF;
819
820	for (int t = 0; t < active_size; t++)
821	if (y[t] == +1)
822	{
823	if (!is_upper_bound(t))
824	if (-G[t] >= Gmaxp)
825	{
826	Gmaxp = -G[t];
827	Gmaxp_idx = t;
828	}
829	}
830	else
831	{
832	if (!is_lower_bound(t))
833	if (G[t] >= Gmaxn)
834	{
835	Gmaxn = G[t];
836	Gmaxn_idx = t;
837	}
838	}
839
840	int ip = Gmaxp_idx;
841	int iN = Gmaxn_idx;
842	float[] Q_ip = null;
843	float[] Q_in = null;
844	if (ip != -1) // null Q_ip not accessed: Gmaxp=-INF if ip=-1
845	Q_ip = Q.get_Q(ip, active_size);
846	if (iN != -1)
847	Q_in = Q.get_Q(iN, active_size);
848
849	for (int j = 0; j < active_size; j++)
850	{
851	if (y[j] == +1)
852	{
853	if (!is_lower_bound(j))
854	{
855	double grad_diff = Gmaxp + G[j];
856	if (G[j] >= Gmaxp2)
857	Gmaxp2 = G[j];
858	if (grad_diff > 0)
859	{
860	double obj_diff;
861	double quad_coef = Q_ip[ip] + QD[j] - 2 * Q_ip[j];
862	if (quad_coef > 0)
863	obj_diff = -(grad_diff * grad_diff) / quad_coef;
864	else
865	obj_diff = -(grad_diff * grad_diff) / 1e-12;
866
867	if (obj_diff <= obj_diff_min)
868	{
869	Gmin_idx = j;
870	obj_diff_min = obj_diff;
871	}
872	}
873	}
874	}
875	else
876	{
877	if (!is_upper_bound(j))
878	{
879	double grad_diff = Gmaxn - G[j];
880	if (-G[j] >= Gmaxn2)
881	Gmaxn2 = -G[j];
882	if (grad_diff > 0)
883	{
884	double obj_diff;
885	double quad_coef = Q_in[iN] + QD[j] - 2 * Q_in[j];
886	if (quad_coef > 0)
887	obj_diff = -(grad_diff * grad_diff) / quad_coef;
888	else
889	obj_diff = -(grad_diff * grad_diff) / 1e-12;
890
891	if (obj_diff <= obj_diff_min)
892	{
893	Gmin_idx = j;
894	obj_diff_min = obj_diff;
895	}
896	}
897	}
898	}
899	}
900
901	if (Math.Max(Gmaxp + Gmaxp2, Gmaxn + Gmaxn2) < eps)
902	return 1;
903
904	if (y[Gmin_idx] == +1)
905	working_set[0] = Gmaxp_idx;
906	else
907	working_set[0] = Gmaxn_idx;
908	working_set[1] = Gmin_idx;
909
910	return 0;
911	}
912
913	private bool be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
914	{
915	if (is_upper_bound(i))
916	{
917	if (y[i] == +1)
918	return (-G[i] > Gmax1);
919	else
920	return (-G[i] > Gmax4);
921	}
922	else if (is_lower_bound(i))
923	{
924	if (y[i] == +1)
925	return (G[i] > Gmax2);
926	else
927	return (G[i] > Gmax3);
928	}
929	else
930	return (false);
931	}
932
933	protected override void do_shrinking()
934	{
935	double Gmax1 = -INF; // max { -y_i * grad(f)_i \| y_i = +1, i in I_up(\alpha) }
936	double Gmax2 = -INF; // max { y_i * grad(f)_i \| y_i = +1, i in I_low(\alpha) }
937	double Gmax3 = -INF; // max { -y_i * grad(f)_i \| y_i = -1, i in I_up(\alpha) }
938	double Gmax4 = -INF; // max { y_i * grad(f)_i \| y_i = -1, i in I_low(\alpha) }
939
940	// find maximal violating pair first
941	int i;
942	for (i = 0; i < active_size; i++)
943	{
944	if (!is_upper_bound(i))
945	{
946	if (y[i] == +1)
947	{
948	if (-G[i] > Gmax1) Gmax1 = -G[i];
949	}
950	else if (-G[i] > Gmax4) Gmax4 = -G[i];
951	}
952	if (!is_lower_bound(i))
953	{
954	if (y[i] == +1)
955	{
956	if (G[i] > Gmax2) Gmax2 = G[i];
957	}
958	else if (G[i] > Gmax3) Gmax3 = G[i];
959	}
960	}
961
962	// shrinking
963
964	for (i = 0; i < active_size; i++)
965	if (be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
966	{
967	active_size--;
968	while (active_size > i)
969	{
970	if (!be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
971	{
972	swap_index(i, active_size);
973	break;
974	}
975	active_size--;
976	}
977	}
978
979	if (unshrinked \|\| Math.Max(Gmax1 + Gmax2, Gmax3 + Gmax4) > eps * 10) return;
980
981	unshrinked = true;
982	reconstruct_gradient();
983
984	for (i = l - 1; i >= active_size; i--)
985	if (!be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
986	{
987	while (active_size < i)
988	{
989	if (be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
990	{
991	swap_index(i, active_size);
992	break;
993	}
994	active_size++;
995	}
996	active_size++;
997	}
998	}
999
1000	protected override double calculate_rho()
1001	{
1002	int nr_free1 = 0, nr_free2 = 0;
1003	double ub1 = INF, ub2 = INF;
1004	double lb1 = -INF, lb2 = -INF;
1005	double sum_free1 = 0, sum_free2 = 0;
1006
1007	for (int i = 0; i < active_size; i++)
1008	{
1009	if (y[i] == +1)
1010	{
1011	if (is_lower_bound(i))
1012	ub1 = Math.Min(ub1, G[i]);
1013	else if (is_upper_bound(i))
1014	lb1 = Math.Max(lb1, G[i]);
1015	else
1016	{
1017	++nr_free1;
1018	sum_free1 += G[i];
1019	}
1020	}
1021	else
1022	{
1023	if (is_lower_bound(i))
1024	ub2 = Math.Min(ub2, G[i]);
1025	else if (is_upper_bound(i))
1026	lb2 = Math.Max(lb2, G[i]);
1027	else
1028	{
1029	++nr_free2;
1030	sum_free2 += G[i];
1031	}
1032	}
1033	}
1034
1035	double r1, r2;
1036	if (nr_free1 > 0)
1037	r1 = sum_free1 / nr_free1;
1038	else
1039	r1 = (ub1 + lb1) / 2;
1040
1041	if (nr_free2 > 0)
1042	r2 = sum_free2 / nr_free2;
1043	else
1044	r2 = (ub2 + lb2) / 2;
1045
1046	si.r = (r1 + r2) / 2;
1047	return (r1 - r2) / 2;
1048	}
1049	}
1050
1051	//
1052	// Q matrices for various formulations
1053	//
1054	class SVC_Q : Kernel
1055	{
1056	private short[] y;
1057	private Cache cache;
1058	private float[] QD;
1059
1060	public SVC_Q(Problem prob, Parameter param, short[] y_) : base(prob.Count, prob.X, param)
1061	{
1062	y = (short[])y_.Clone();
1063	cache = new Cache(prob.Count, (long)(param.CacheSize * (1 << 20)));
1064	QD = new float[prob.Count];
1065	for (int i = 0; i < prob.Count; i++)
1066	QD[i] = (float)kernel_function(i, i);
1067	}
1068
1069	public override float[] get_Q(int i, int len)
1070	{
1071	float[][] data = new float[1][];
1072	int start;
1073	if ((start = cache.get_data(i, data, len)) < len)
1074	{
1075	for (int j = start; j < len; j++)
1076	data[0][j] = (float)(y[i] * y[j] * kernel_function(i, j));
1077	}
1078	return data[0];
1079	}
1080
1081	public override float[] get_QD()
1082	{
1083	return QD;
1084	}
1085
1086	public override void swap_index(int i, int j)
1087	{
1088	cache.swap_index(i, j);
1089	base.swap_index(i, j);
1090	do { short _ = y[i]; y[i] = y[j]; y[j] = _; } while (false);
1091	do { float _ = QD[i]; QD[i] = QD[j]; QD[j] = _; } while (false);
1092	}
1093	}
1094
1095	class ONE_CLASS_Q : Kernel
1096	{
1097	private Cache cache;
1098	private float[] QD;
1099
1100	public ONE_CLASS_Q(Problem prob, Parameter param) : base(prob.Count, prob.X, param)
1101	{
1102	cache = new Cache(prob.Count, (long)(param.CacheSize * (1 << 20)));
1103	QD = new float[prob.Count];
1104	for (int i = 0; i < prob.Count; i++)
1105	QD[i] = (float)kernel_function(i, i);
1106	}
1107
1108	public override float[] get_Q(int i, int len)
1109	{
1110	float[][] data = new float[1][];
1111	int start;
1112	if ((start = cache.get_data(i, data, len)) < len)
1113	{
1114	for (int j = start; j < len; j++)
1115	data[0][j] = (float)kernel_function(i, j);
1116	}
1117	return data[0];
1118	}
1119
1120	public override float[] get_QD()
1121	{
1122	return QD;
1123	}
1124
1125	public override void swap_index(int i, int j)
1126	{
1127	cache.swap_index(i, j);
1128	base.swap_index(i, j);
1129	do { float _ = QD[i]; QD[i] = QD[j]; QD[j] = _; } while (false);
1130	}
1131	}
1132
1133	class SVR_Q : Kernel
1134	{
1135	private int l;
1136	private Cache cache;
1137	private short[] sign;
1138	private int[] index;
1139	private int next_buffer;
1140	private float[][] buffer;
1141	private float[] QD;
1142
1143	public SVR_Q(Problem prob, Parameter param)
1144	: base(prob.Count, prob.X, param)
1145	{
1146	l = prob.Count;
1147	cache = new Cache(l, (long)(param.CacheSize * (1 << 20)));
1148	QD = new float[2 * l];
1149	sign = new short[2 * l];
1150	index = new int[2 * l];
1151	for (int k = 0; k < l; k++)
1152	{
1153	sign[k] = 1;
1154	sign[k + l] = -1;
1155	index[k] = k;
1156	index[k + l] = k;
1157	QD[k] = (float)kernel_function(k, k);
1158	QD[k + l] = QD[k];
1159	}
1160	buffer = new float[2][];
1161	buffer[0] = new float[2 * l];
1162	buffer[1] = new float[2 * l];
1163	next_buffer = 0;
1164	}
1165
1166	public override void swap_index(int i, int j)
1167	{
1168	do { short _ = sign[i]; sign[i] = sign[j]; sign[j] = _; } while (false);
1169	do { int _ = index[i]; index[i] = index[j]; index[j] = _; } while (false);
1170	do { float _ = QD[i]; QD[i] = QD[j]; QD[j] = _; } while (false);
1171	}
1172
1173	public override float[] get_Q(int i, int len)
1174	{
1175	float[][] data = new float[1][];
1176	int real_i = index[i];
1177	if (cache.get_data(real_i, data, l) < l)
1178	{
1179	for (int j = 0; j < l; j++)
1180	data[0][j] = (float)kernel_function(real_i, j);
1181	}
1182
1183	// reorder and copy
1184	float[] buf = buffer[next_buffer];
1185	next_buffer = 1 - next_buffer;
1186	short si = sign[i];
1187	for (int j = 0; j < len; j++)
1188	buf[j] = si * sign[j] * data[0][index[j]];
1189	return buf;
1190	}
1191
1192	public override float[] get_QD()
1193	{
1194	return QD;
1195	}
1196	}
1197
1198	internal static class Procedures
1199	{
1200	//
1201	// construct and solve various formulations
1202	//
1203	private static void solve_c_svc(Problem prob, Parameter param,
1204	double[] alpha, Solver.SolutionInfo si,
1205	double Cp, double Cn)
1206	{
1207	int l = prob.Count;
1208	double[] minus_ones = new double[l];
1209	short[] y = new short[l];
1210
1211	int i;
1212
1213	for (i = 0; i < l; i++)
1214	{
1215	alpha[i] = 0;
1216	minus_ones[i] = -1;
1217	if (prob.Y[i] > 0) y[i] = +1; else y[i] = -1;
1218	}
1219
1220	Solver s = new Solver();
1221	s.Solve(l, new SVC_Q(prob, param, y), minus_ones, y,
1222	alpha, Cp, Cn, param.EPS, si, param.Shrinking);
1223
1224	double sum_alpha = 0;
1225	for (i = 0; i < l; i++)
1226	sum_alpha += alpha[i];
1227
1228	if (Cp == Cn)
1229	Debug.Write("nu = " + sum_alpha / (Cp * prob.Count) + "\n");
1230
1231	for (i = 0; i < l; i++)
1232	alpha[i] *= y[i];
1233	}
1234
1235	private static void solve_nu_svc(Problem prob, Parameter param,
1236	double[] alpha, Solver.SolutionInfo si)
1237	{
1238	int i;
1239	int l = prob.Count;
1240	double nu = param.Nu;
1241
1242	short[] y = new short[l];
1243
1244	for (i = 0; i < l; i++)
1245	if (prob.Y[i] > 0)
1246	y[i] = +1;
1247	else
1248	y[i] = -1;
1249
1250	double sum_pos = nu * l / 2;
1251	double sum_neg = nu * l / 2;
1252
1253	for (i = 0; i < l; i++)
1254	if (y[i] == +1)
1255	{
1256	alpha[i] = Math.Min(1.0, sum_pos);
1257	sum_pos -= alpha[i];
1258	}
1259	else
1260	{
1261	alpha[i] = Math.Min(1.0, sum_neg);
1262	sum_neg -= alpha[i];
1263	}
1264
1265	double[] zeros = new double[l];
1266
1267	for (i = 0; i < l; i++)
1268	zeros[i] = 0;
1269
1270	Solver_NU s = new Solver_NU();
1271	s.Solve(l, new SVC_Q(prob, param, y), zeros, y,
1272	alpha, 1.0, 1.0, param.EPS, si, param.Shrinking);
1273	double r = si.r;
1274
1275	Debug.Write("C = " + 1 / r + "\n");
1276
1277	for (i = 0; i < l; i++)
1278	alpha[i] *= y[i] / r;
1279
1280	si.rho /= r;
1281	si.obj /= (r * r);
1282	si.upper_bound_p = 1 / r;
1283	si.upper_bound_n = 1 / r;
1284	}
1285
1286	private static void solve_one_class(Problem prob, Parameter param,
1287	double[] alpha, Solver.SolutionInfo si)
1288	{
1289	int l = prob.Count;
1290	double[] zeros = new double[l];
1291	short[] ones = new short[l];
1292	int i;
1293
1294	int n = (int)(param.Nu * prob.Count); // # of alpha's at upper bound
1295
1296	for (i = 0; i < n; i++)
1297	alpha[i] = 1;
1298	if (n < prob.Count)
1299	alpha[n] = param.Nu * prob.Count - n;
1300	for (i = n + 1; i < l; i++)
1301	alpha[i] = 0;
1302
1303	for (i = 0; i < l; i++)
1304	{
1305	zeros[i] = 0;
1306	ones[i] = 1;
1307	}
1308
1309	Solver s = new Solver();
1310	s.Solve(l, new ONE_CLASS_Q(prob, param), zeros, ones,
1311	alpha, 1.0, 1.0, param.EPS, si, param.Shrinking);
1312	}
1313
1314	private static void solve_epsilon_svr(Problem prob, Parameter param,
1315	double[] alpha, Solver.SolutionInfo si)
1316	{
1317	int l = prob.Count;
1318	double[] alpha2 = new double[2 * l];
1319	double[] linear_term = new double[2 * l];
1320	short[] y = new short[2 * l];
1321	int i;
1322
1323	for (i = 0; i < l; i++)
1324	{
1325	alpha2[i] = 0;
1326	linear_term[i] = param.P - prob.Y[i];
1327	y[i] = 1;
1328
1329	alpha2[i + l] = 0;
1330	linear_term[i + l] = param.P + prob.Y[i];
1331	y[i + l] = -1;
1332	}
1333
1334	Solver s = new Solver();
1335	s.Solve(2 * l, new SVR_Q(prob, param), linear_term, y,
1336	alpha2, param.C, param.C, param.EPS, si, param.Shrinking);
1337
1338	double sum_alpha = 0;
1339	for (i = 0; i < l; i++)
1340	{
1341	alpha[i] = alpha2[i] - alpha2[i + l];
1342	sum_alpha += Math.Abs(alpha[i]);
1343	}
1344	Debug.Write("nu = " + sum_alpha / (param.C * l) + "\n");
1345	}
1346
1347	private static void solve_nu_svr(Problem prob, Parameter param,
1348	double[] alpha, Solver.SolutionInfo si)
1349	{
1350	int l = prob.Count;
1351	double C = param.C;
1352	double[] alpha2 = new double[2 * l];
1353	double[] linear_term = new double[2 * l];
1354	short[] y = new short[2 * l];
1355	int i;
1356
1357	double sum = C * param.Nu * l / 2;
1358	for (i = 0; i < l; i++)
1359	{
1360	alpha2[i] = alpha2[i + l] = Math.Min(sum, C);
1361	sum -= alpha2[i];
1362
1363	linear_term[i] = -prob.Y[i];
1364	y[i] = 1;
1365
1366	linear_term[i + l] = prob.Y[i];
1367	y[i + l] = -1;
1368	}
1369
1370	Solver_NU s = new Solver_NU();
1371	s.Solve(2 * l, new SVR_Q(prob, param), linear_term, y, alpha2, C, C, param.EPS, si, param.Shrinking);
1372
1373	Debug.Write("epsilon = " + (-si.r) + "\n");
1374
1375	for (i = 0; i < l; i++)
1376	alpha[i] = alpha2[i] - alpha2[i + l];
1377	}
1378
1379	//
1380	// decision_function
1381	//
1382	private class decision_function
1383	{
1384	public double[] alpha;
1385	public double rho;
1386	};
1387
1388	static decision_function svm_train_one(
1389	Problem prob, Parameter param,
1390	double Cp, double Cn)
1391	{
1392	double[] alpha = new double[prob.Count];
1393	Solver.SolutionInfo si = new Solver.SolutionInfo();
1394	switch (param.SvmType)
1395	{
1396	case SvmType.C_SVC:
1397	solve_c_svc(prob, param, alpha, si, Cp, Cn);
1398	break;
1399	case SvmType.NU_SVC:
1400	solve_nu_svc(prob, param, alpha, si);
1401	break;
1402	case SvmType.ONE_CLASS:
1403	solve_one_class(prob, param, alpha, si);
1404	break;
1405	case SvmType.EPSILON_SVR:
1406	solve_epsilon_svr(prob, param, alpha, si);
1407	break;
1408	case SvmType.NU_SVR:
1409	solve_nu_svr(prob, param, alpha, si);
1410	break;
1411	}
1412
1413	Debug.Write("obj = " + si.obj + ", rho = " + si.rho + "\n");
1414
1415	// output SVs
1416
1417	int nSV = 0;
1418	int nBSV = 0;
1419	for (int i = 0; i < prob.Count; i++)
1420	{
1421	if (Math.Abs(alpha[i]) > 0)
1422	{
1423	++nSV;
1424	if (prob.Y[i] > 0)
1425	{
1426	if (Math.Abs(alpha[i]) >= si.upper_bound_p)
1427	++nBSV;
1428	}
1429	else
1430	{
1431	if (Math.Abs(alpha[i]) >= si.upper_bound_n)
1432	++nBSV;
1433	}
1434	}
1435	}
1436
1437	Debug.Write("nSV = " + nSV + ", nBSV = " + nBSV + "\n");
1438
1439	decision_function f = new decision_function();
1440	f.alpha = alpha;
1441	f.rho = si.rho;
1442	return f;
1443	}
1444
1445	// Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1446	private static void sigmoid_train(int l, double[] dec_values, double[] labels,
1447	double[] probAB)
1448	{
1449	double A, B;
1450	double prior1 = 0, prior0 = 0;
1451	int i;
1452
1453	for (i = 0; i < l; i++)
1454	if (labels[i] > 0) prior1 += 1;
1455	else prior0 += 1;
1456
1457	int max_iter = 100; // Maximal number of iterations
1458	double min_step = 1e-10; // Minimal step taken in line search
1459	double sigma = 1e-3; // For numerically strict PD of Hessian
1460	double eps = 1e-5;
1461	double hiTarget = (prior1 + 1.0) / (prior1 + 2.0);
1462	double loTarget = 1 / (prior0 + 2.0);
1463	double[] t = new double[l];
1464	double fApB, p, q, h11, h22, h21, g1, g2, det, dA, dB, gd, stepsize;
1465	double newA, newB, newf, d1, d2;
1466	int iter;
1467
1468	// Initial Point and Initial Fun Value
1469	A = 0.0; B = Math.Log((prior0 + 1.0) / (prior1 + 1.0));
1470	double fval = 0.0;
1471
1472	for (i = 0; i < l; i++)
1473	{
1474	if (labels[i] > 0) t[i] = hiTarget;
1475	else t[i] = loTarget;
1476	fApB = dec_values[i] * A + B;
1477	if (fApB >= 0)
1478	fval += t[i] * fApB + Math.Log(1 + Math.Exp(-fApB));
1479	else
1480	fval += (t[i] - 1) * fApB + Math.Log(1 + Math.Exp(fApB));
1481	}
1482	for (iter = 0; iter < max_iter; iter++)
1483	{
1484	// Update Gradient and Hessian (use H' = H + sigma I)
1485	h11 = sigma; // numerically ensures strict PD
1486	h22 = sigma;
1487	h21 = 0.0; g1 = 0.0; g2 = 0.0;
1488	for (i = 0; i < l; i++)
1489	{
1490	fApB = dec_values[i] * A + B;
1491	if (fApB >= 0)
1492	{
1493	p = Math.Exp(-fApB) / (1.0 + Math.Exp(-fApB));
1494	q = 1.0 / (1.0 + Math.Exp(-fApB));
1495	}
1496	else
1497	{
1498	p = 1.0 / (1.0 + Math.Exp(fApB));
1499	q = Math.Exp(fApB) / (1.0 + Math.Exp(fApB));
1500	}
1501	d2 = p * q;
1502	h11 += dec_values[i] * dec_values[i] * d2;
1503	h22 += d2;
1504	h21 += dec_values[i] * d2;
1505	d1 = t[i] - p;
1506	g1 += dec_values[i] * d1;
1507	g2 += d1;
1508	}
1509
1510	// Stopping Criteria
1511	if (Math.Abs(g1) < eps && Math.Abs(g2) < eps)
1512	break;
1513
1514	// Finding Newton direction: -inv(H') * g
1515	det = h11 * h22 - h21 * h21;
1516	dA = -(h22 * g1 - h21 * g2) / det;
1517	dB = -(-h21 * g1 + h11 * g2) / det;
1518	gd = g1 * dA + g2 * dB;
1519
1520
1521	stepsize = 1; // Line Search
1522	while (stepsize >= min_step)
1523	{
1524	newA = A + stepsize * dA;
1525	newB = B + stepsize * dB;
1526
1527	// New function value
1528	newf = 0.0;
1529	for (i = 0; i < l; i++)
1530	{
1531	fApB = dec_values[i] * newA + newB;
1532	if (fApB >= 0)
1533	newf += t[i] * fApB + Math.Log(1 + Math.Exp(-fApB));
1534	else
1535	newf += (t[i] - 1) * fApB + Math.Log(1 + Math.Exp(fApB));
1536	}
1537	// Check sufficient decrease
1538	if (newf < fval + 0.0001 * stepsize * gd)
1539	{
1540	A = newA; B = newB; fval = newf;
1541	break;
1542	}
1543	else
1544	stepsize = stepsize / 2.0;
1545	}
1546
1547	if (stepsize < min_step)
1548	{
1549	Debug.Write("Line search fails in two-class probability estimates\n");
1550	break;
1551	}
1552	}
1553
1554	if (iter >= max_iter)
1555	Debug.Write("Reaching maximal iterations in two-class probability estimates\n");
1556	probAB[0] = A; probAB[1] = B;
1557	}
1558
1559	private static double sigmoid_predict(double decision_value, double A, double B)
1560	{
1561	double fApB = decision_value * A + B;
1562	if (fApB >= 0)
1563	return Math.Exp(-fApB) / (1.0 + Math.Exp(-fApB));
1564	else
1565	return 1.0 / (1 + Math.Exp(fApB));
1566	}
1567
1568	// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1569	private static void multiclass_probability(int k, double[,] r, double[] p)
1570	{
1571	int t,j;
1572	int iter = 0, max_iter=Math.Max(100,k);
1573	double[,] Q=new double[k,k];
1574	double[] Qp= new double[k];
1575	double pQp, eps=0.005/k;
1576
1577	for (t=0;t<k;t++)
1578	{
1579	p[t]=1.0/k; // Valid if k = 1
1580	Q[t,t]=0;
1581	for (j=0;j<t;j++)
1582	{
1583	Q[t,t]+=r[j,t]*r[j,t];
1584	Q[t,j]=Q[j,t];
1585	}
1586	for (j=t+1;j<k;j++)
1587	{
1588	Q[t,t]+=r[j,t]*r[j,t];
1589	Q[t,j]=-r[j,t]*r[t,j];
1590	}
1591	}
1592	for (iter=0;iter<max_iter;iter++)
1593	{
1594	// stopping condition, recalculate QP,pQP for numerical accuracy
1595	pQp=0;
1596	for (t=0;t<k;t++)
1597	{
1598	Qp[t]=0;
1599	for (j=0;j<k;j++)
1600	Qp[t]+=Q[t,j]*p[j];
1601	pQp+=p[t]*Qp[t];
1602	}
1603	double max_error=0;
1604	for (t=0;t<k;t++)
1605	{
1606	double error=Math.Abs(Qp[t]-pQp);
1607	if (error>max_error)
1608	max_error=error;
1609	}
1610	if (max_error<eps) break;
1611
1612	for (t=0;t<k;t++)
1613	{
1614	double diff=(-Qp[t]+pQp)/Q[t,t];
1615	p[t]+=diff;
1616	pQp=(pQp+diff(diffQ[t,t]+2*Qp[t]))/(1+diff)/(1+diff);
1617	for (j=0;j<k;j++)
1618	{
1619	Qp[j]=(Qp[j]+diff*Q[t,j])/(1+diff);
1620	p[j]/=(1+diff);
1621	}
1622	}
1623	}
1624	if (iter>=max_iter)
1625	Debug.Write("Exceeds max_iter in multiclass_prob\n");
1626	}
1627
1628	// Cross-validation decision values for probability estimates
1629	private static void svm_binary_svc_probability(Problem prob, Parameter param, double Cp, double Cn, double[] probAB)
1630	{
1631	Random rand = new Random();
1632	int i;
1633	int nr_fold = 5;
1634	int[] perm = new int[prob.Count];
1635	double[] dec_values = new double[prob.Count];
1636
1637	// random shuffle
1638	for (i = 0; i < prob.Count; i++) perm[i] = i;
1639	for (i = 0; i < prob.Count; i++)
1640	{
1641	int j = i + (int)(rand.NextDouble() * (prob.Count - i));
1642	do { int _ = perm[i]; perm[i] = perm[j]; perm[j] = _; } while (false);
1643	}
1644	for (i = 0; i < nr_fold; i++)
1645	{
1646	int begin = i * prob.Count / nr_fold;
1647	int end = (i + 1) * prob.Count / nr_fold;
1648	int j, k;
1649	Problem subprob = new Problem();
1650
1651	subprob.Count = prob.Count - (end - begin);
1652	subprob.X = new Node[subprob.Count][];
1653	subprob.Y = new double[subprob.Count];
1654
1655	k = 0;
1656	for (j = 0; j < begin; j++)
1657	{
1658	subprob.X[k] = prob.X[perm[j]];
1659	subprob.Y[k] = prob.Y[perm[j]];
1660	++k;
1661	}
1662	for (j = end; j < prob.Count; j++)
1663	{
1664	subprob.X[k] = prob.X[perm[j]];
1665	subprob.Y[k] = prob.Y[perm[j]];
1666	++k;
1667	}
1668	int p_count = 0, n_count = 0;
1669	for (j = 0; j < k; j++)
1670	if (subprob.Y[j] > 0)
1671	p_count++;
1672	else
1673	n_count++;
1674
1675	if (p_count == 0 && n_count == 0)
1676	for (j = begin; j < end; j++)
1677	dec_values[perm[j]] = 0;
1678	else if (p_count > 0 && n_count == 0)
1679	for (j = begin; j < end; j++)
1680	dec_values[perm[j]] = 1;
1681	else if (p_count == 0 && n_count > 0)
1682	for (j = begin; j < end; j++)
1683	dec_values[perm[j]] = -1;
1684	else
1685	{
1686	Parameter subparam = (Parameter)param.Clone();
1687	subparam.Probability = false;
1688	subparam.C = 1.0;
1689	subparam.WeightCount = 2;
1690	subparam.WeightLabels = new int[2];
1691	subparam.Weights = new double[2];
1692	subparam.WeightLabels[0] = +1;
1693	subparam.WeightLabels[1] = -1;
1694	subparam.Weights[0] = Cp;
1695	subparam.Weights[1] = Cn;
1696	Model submodel = svm_train(subprob, subparam);
1697	for (j = begin; j < end; j++)
1698	{
1699	double[] dec_value = new double[1];
1700	svm_predict_values(submodel, prob.X[perm[j]], dec_value);
1701	dec_values[perm[j]] = dec_value[0];
1702	// ensure +1 -1 order; reason not using CV subroutine
1703	dec_values[perm[j]] *= submodel.ClassLabels[0];
1704	}
1705	}
1706	}
1707	sigmoid_train(prob.Count, dec_values, prob.Y, probAB);
1708	}
1709
1710	// Return parameter of a Laplace distribution
1711	private static double svm_svr_probability(Problem prob, Parameter param)
1712	{
1713	int i;
1714	int nr_fold = 5;
1715	double[] ymv = new double[prob.Count];
1716	double mae = 0;
1717
1718	Parameter newparam = (Parameter)param.Clone();
1719	newparam.Probability = false;
1720	svm_cross_validation(prob, newparam, nr_fold, ymv, null);
1721	for (i = 0; i < prob.Count; i++)
1722	{
1723	ymv[i] = prob.Y[i] - ymv[i];
1724	mae += Math.Abs(ymv[i]);
1725	}
1726	mae /= prob.Count;
1727	double std = Math.Sqrt(2 * mae * mae);
1728	int count = 0;
1729	mae = 0;
1730	for (i = 0; i < prob.Count; i++)
1731	if (Math.Abs(ymv[i]) > 5 * std)
1732	count = count + 1;
1733	else
1734	mae += Math.Abs(ymv[i]);
1735	mae /= (prob.Count - count);
1736	Debug.Write("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-\|z\|/sigma)/(2sigma),sigma=" + mae + "\n");
1737	return mae;
1738	}
1739
1740	// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
1741	// perm, length l, must be allocated before calling this subroutine
1742	private static void svm_group_classes(Problem prob, int[] nr_class_ret, int[][] label_ret, int[][] start_ret, int[][] count_ret, int[] perm)
1743	{
1744	int l = prob.Count;
1745	int max_nr_class = 16;
1746	int nr_class = 0;
1747	int[] label = new int[max_nr_class];
1748	int[] count = new int[max_nr_class];
1749	int[] data_label = new int[l];
1750	int i;
1751
1752	for (i = 0; i < l; i++)
1753	{
1754	int this_label = (int)(prob.Y[i]);
1755	int j;
1756	for (j = 0; j < nr_class; j++)
1757	{
1758	if (this_label == label[j])
1759	{
1760	++count[j];
1761	break;
1762	}
1763	}
1764	data_label[i] = j;
1765	if (j == nr_class)
1766	{
1767	if (nr_class == max_nr_class)
1768	{
1769	max_nr_class *= 2;
1770	int[] new_data = new int[max_nr_class];
1771	Array.Copy(label, 0, new_data, 0, label.Length);
1772	label = new_data;
1773	new_data = new int[max_nr_class];
1774	Array.Copy(count, 0, new_data, 0, count.Length);
1775	count = new_data;
1776	}
1777	label[nr_class] = this_label;
1778	count[nr_class] = 1;
1779	++nr_class;
1780	}
1781	}
1782
1783	int[] start = new int[nr_class];
1784	start[0] = 0;
1785	for (i = 1; i < nr_class; i++)
1786	start[i] = start[i - 1] + count[i - 1];
1787	for (i = 0; i < l; i++)
1788	{
1789	perm[start[data_label[i]]] = i;
1790	++start[data_label[i]];
1791	}
1792	start[0] = 0;
1793	for (i = 1; i < nr_class; i++)
1794	start[i] = start[i - 1] + count[i - 1];
1795
1796	nr_class_ret[0] = nr_class;
1797	label_ret[0] = label;
1798	start_ret[0] = start;
1799	count_ret[0] = count;
1800	}
1801
1802	//
1803	// Interface functions
1804	//
1805	public static Model svm_train(Problem prob, Parameter param)
1806	{
1807	Model model = new Model();
1808	model.Parameter = param;
1809
1810	if (param.SvmType == SvmType.ONE_CLASS \|\|
1811	param.SvmType == SvmType.EPSILON_SVR \|\|
1812	param.SvmType == SvmType.NU_SVR)
1813	{
1814	// regression or one-class-svm
1815	model.NumberOfClasses = 2;
1816	model.ClassLabels = null;
1817	model.NumberOfSVPerClass = null;
1818	model.PairwiseProbabilityA = null; model.PairwiseProbabilityB = null;
1819	model.SupportVectorCoefficients = new double[1][];
1820
1821	if (param.Probability &&
1822	(param.SvmType == SvmType.EPSILON_SVR \|\|
1823	param.SvmType == SvmType.NU_SVR))
1824	{
1825	model.PairwiseProbabilityA = new double[1];
1826	model.PairwiseProbabilityA[0] = svm_svr_probability(prob, param);
1827	}
1828
1829	decision_function f = svm_train_one(prob, param, 0, 0);
1830	model.Rho = new double[1];
1831	model.Rho[0] = f.rho;
1832
1833	int nSV = 0;
1834	int i;
1835	for (i = 0; i < prob.Count; i++)
1836	if (Math.Abs(f.alpha[i]) > 0) ++nSV;
1837	model.SupportVectorCount = nSV;
1838	model.SupportVectors = new Node[nSV][];
1839	model.SupportVectorCoefficients[0] = new double[nSV];
1840	int j = 0;
1841	for (i = 0; i < prob.Count; i++)
1842	if (Math.Abs(f.alpha[i]) > 0)
1843	{
1844	model.SupportVectors[j] = prob.X[i];
1845	model.SupportVectorCoefficients[0][j] = f.alpha[i];
1846	++j;
1847	}
1848	}
1849	else
1850	{
1851	// classification
1852	int l = prob.Count;
1853	int[] tmp_nr_class = new int[1];
1854	int[][] tmp_label = new int[1][];
1855	int[][] tmp_start = new int[1][];
1856	int[][] tmp_count = new int[1][];
1857	int[] perm = new int[l];
1858
1859	// group training data of the same class
1860	svm_group_classes(prob, tmp_nr_class, tmp_label, tmp_start, tmp_count, perm);
1861	int nr_class = tmp_nr_class[0];
1862	int[] label = tmp_label[0];
1863	int[] start = tmp_start[0];
1864	int[] count = tmp_count[0];
1865	Node[][] x = new Node[l][];
1866	int i;
1867	for (i = 0; i < l; i++)
1868	x[i] = prob.X[perm[i]];
1869
1870	// calculate weighted C
1871
1872	double[] weighted_C = new double[nr_class];
1873	for (i = 0; i < nr_class; i++)
1874	weighted_C[i] = param.C;
1875	for (i = 0; i < param.WeightCount; i++)
1876	{
1877	int j;
1878	for (j = 0; j < nr_class; j++)
1879	if (param.WeightLabels[i] == label[j])
1880	break;
1881	if (j == nr_class)
1882	Debug.Write("warning: class label " + param.WeightLabels[i] + " specified in weight is not found\n");
1883	else
1884	weighted_C[j] *= param.Weights[i];
1885	}
1886
1887	// train k*(k-1)/2 models
1888
1889	bool[] nonzero = new bool[l];
1890	for (i = 0; i < l; i++)
1891	nonzero[i] = false;
1892	decision_function[] f = new decision_function[nr_class * (nr_class - 1) / 2];
1893
1894	double[] probA = null, probB = null;
1895	if (param.Probability)
1896	{
1897	probA = new double[nr_class * (nr_class - 1) / 2];
1898	probB = new double[nr_class * (nr_class - 1) / 2];
1899	}
1900
1901	int p = 0;
1902	for (i = 0; i < nr_class; i++)
1903	for (int j = i + 1; j < nr_class; j++)
1904	{
1905	Problem sub_prob = new Problem();
1906	int si = start[i], sj = start[j];
1907	int ci = count[i], cj = count[j];
1908	sub_prob.Count = ci + cj;
1909	sub_prob.X = new Node[sub_prob.Count][];
1910	sub_prob.Y = new double[sub_prob.Count];
1911	int k;
1912	for (k = 0; k < ci; k++)
1913	{
1914	sub_prob.X[k] = x[si + k];
1915	sub_prob.Y[k] = +1;
1916	}
1917	for (k = 0; k < cj; k++)
1918	{
1919	sub_prob.X[ci + k] = x[sj + k];
1920	sub_prob.Y[ci + k] = -1;
1921	}
1922
1923	if (param.Probability)
1924	{
1925	double[] probAB = new double[2];
1926	svm_binary_svc_probability(sub_prob, param, weighted_C[i], weighted_C[j], probAB);
1927	probA[p] = probAB[0];
1928	probB[p] = probAB[1];
1929	}
1930
1931	f[p] = svm_train_one(sub_prob, param, weighted_C[i], weighted_C[j]);
1932	for (k = 0; k < ci; k++)
1933	if (!nonzero[si + k] && Math.Abs(f[p].alpha[k]) > 0)
1934	nonzero[si + k] = true;
1935	for (k = 0; k < cj; k++)
1936	if (!nonzero[sj + k] && Math.Abs(f[p].alpha[ci + k]) > 0)
1937	nonzero[sj + k] = true;
1938	++p;
1939	}
1940
1941	// build output
1942
1943	model.NumberOfClasses = nr_class;
1944
1945	model.ClassLabels = new int[nr_class];
1946	for (i = 0; i < nr_class; i++)
1947	model.ClassLabels[i] = label[i];
1948
1949	model.Rho = new double[nr_class * (nr_class - 1) / 2];
1950	for (i = 0; i < nr_class * (nr_class - 1) / 2; i++)
1951	model.Rho[i] = f[i].rho;
1952
1953	if (param.Probability)
1954	{
1955	model.PairwiseProbabilityA = new double[nr_class * (nr_class - 1) / 2];
1956	model.PairwiseProbabilityB = new double[nr_class * (nr_class - 1) / 2];
1957	for (i = 0; i < nr_class * (nr_class - 1) / 2; i++)
1958	{
1959	model.PairwiseProbabilityA[i] = probA[i];
1960	model.PairwiseProbabilityB[i] = probB[i];
1961	}
1962	}
1963	else
1964	{
1965	model.PairwiseProbabilityA = null;
1966	model.PairwiseProbabilityB = null;
1967	}
1968
1969	int nnz = 0;
1970	int[] nz_count = new int[nr_class];
1971	model.NumberOfSVPerClass = new int[nr_class];
1972	for (i = 0; i < nr_class; i++)
1973	{
1974	int nSV = 0;
1975	for (int j = 0; j < count[i]; j++)
1976	if (nonzero[start[i] + j])
1977	{
1978	++nSV;
1979	++nnz;
1980	}
1981	model.NumberOfSVPerClass[i] = nSV;
1982	nz_count[i] = nSV;
1983	}
1984
1985	Debug.Write("Total nSV = " + nnz + "\n");
1986
1987	model.SupportVectorCount = nnz;
1988	model.SupportVectors = new Node[nnz][];
1989	p = 0;
1990	for (i = 0; i < l; i++)
1991	if (nonzero[i]) model.SupportVectors[p++] = x[i];
1992
1993	int[] nz_start = new int[nr_class];
1994	nz_start[0] = 0;
1995	for (i = 1; i < nr_class; i++)
1996	nz_start[i] = nz_start[i - 1] + nz_count[i - 1];
1997
1998	model.SupportVectorCoefficients = new double[nr_class - 1][];
1999	for (i = 0; i < nr_class - 1; i++)
2000	model.SupportVectorCoefficients[i] = new double[nnz];
2001
2002	p = 0;
2003	for (i = 0; i < nr_class; i++)
2004	for (int j = i + 1; j < nr_class; j++)
2005	{
2006	// classifier (i,j): coefficients with
2007	// i are in sv_coef[j-1][nz_start[i]...],
2008	// j are in sv_coef[i][nz_start[j]...]
2009
2010	int si = start[i];
2011	int sj = start[j];
2012	int ci = count[i];
2013	int cj = count[j];
2014
2015	int q = nz_start[i];
2016	int k;
2017	for (k = 0; k < ci; k++)
2018	if (nonzero[si + k])
2019	model.SupportVectorCoefficients[j - 1][q++] = f[p].alpha[k];
2020	q = nz_start[j];
2021	for (k = 0; k < cj; k++)
2022	if (nonzero[sj + k])
2023	model.SupportVectorCoefficients[i][q++] = f[p].alpha[ci + k];
2024	++p;
2025	}
2026	}
2027	return model;
2028	}
2029
2030	// Stratified cross validation
2031	public static void svm_cross_validation(Problem prob, Parameter param, int nr_fold, double[] target, Dictionary<int,double>[] confidence)
2032	{
2033	Random rand = new Random();
2034	int i;
2035	int[] fold_start = new int[nr_fold + 1];
2036	int l = prob.Count;
2037	int[] perm = new int[l];
2038
2039	// stratified cv may not give leave-one-out rate
2040	// Each class to l folds -> some folds may have zero elements
2041	if ((param.SvmType == SvmType.C_SVC \|\|
2042	param.SvmType == SvmType.NU_SVC) && nr_fold < l)
2043	{
2044	int[] tmp_nr_class = new int[1];
2045	int[][] tmp_label = new int[1][];
2046	int[][] tmp_start = new int[1][];
2047	int[][] tmp_count = new int[1][];
2048
2049	svm_group_classes(prob, tmp_nr_class, tmp_label, tmp_start, tmp_count, perm);
2050
2051	int nr_class = tmp_nr_class[0];
2052	int[] label = tmp_label[0];
2053	int[] start = tmp_start[0];
2054	int[] count = tmp_count[0];
2055
2056	// random shuffle and then data grouped by fold using the array perm
2057	int[] fold_count = new int[nr_fold];
2058	int c;
2059	int[] index = new int[l];
2060	for (i = 0; i < l; i++)
2061	index[i] = perm[i];
2062	for (c = 0; c < nr_class; c++)
2063	for (i = 0; i < count[c]; i++)
2064	{
2065	int j = i + (int)(rand.NextDouble() * (count[c] - i));
2066	do { int _ = index[start[c] + j]; index[start[c] + j] = index[start[c] + i]; index[start[c] + i] = _; } while (false);
2067	}
2068	for (i = 0; i < nr_fold; i++)
2069	{
2070	fold_count[i] = 0;
2071	for (c = 0; c < nr_class; c++)
2072	fold_count[i] += (i + 1) * count[c] / nr_fold - i * count[c] / nr_fold;
2073	}
2074	fold_start[0] = 0;
2075	for (i = 1; i <= nr_fold; i++)
2076	fold_start[i] = fold_start[i - 1] + fold_count[i - 1];
2077	for (c = 0; c < nr_class; c++)
2078	for (i = 0; i < nr_fold; i++)
2079	{
2080	int begin = start[c] + i * count[c] / nr_fold;
2081	int end = start[c] + (i + 1) * count[c] / nr_fold;
2082	for (int j = begin; j < end; j++)
2083	{
2084	perm[fold_start[i]] = index[j];
2085	fold_start[i]++;
2086	}
2087	}
2088	fold_start[0] = 0;
2089	for (i = 1; i <= nr_fold; i++)
2090	fold_start[i] = fold_start[i - 1] + fold_count[i - 1];
2091	}
2092	else
2093	{
2094	for (i = 0; i < l; i++) perm[i] = i;
2095	for (i = 0; i < l; i++)
2096	{
2097	int j = i + (int)(rand.NextDouble() * (l - i));
2098	do { int _ = perm[i]; perm[i] = perm[j]; perm[j] = _; } while (false);
2099	}
2100	for (i = 0; i <= nr_fold; i++)
2101	fold_start[i] = i * l / nr_fold;
2102	}
2103
2104	for (i = 0; i < nr_fold; i++)
2105	{
2106	int begin = fold_start[i];
2107	int end = fold_start[i + 1];
2108	int j, k;
2109	Problem subprob = new Problem();
2110
2111	subprob.Count = l - (end - begin);
2112	subprob.X = new Node[subprob.Count][];
2113	subprob.Y = new double[subprob.Count];
2114
2115	k = 0;
2116	for (j = 0; j < begin; j++)
2117	{
2118	subprob.X[k] = prob.X[perm[j]];
2119	subprob.Y[k] = prob.Y[perm[j]];
2120	++k;
2121	}
2122	for (j = end; j < l; j++)
2123	{
2124	subprob.X[k] = prob.X[perm[j]];
2125	subprob.Y[k] = prob.Y[perm[j]];
2126	++k;
2127	}
2128	Model submodel = svm_train(subprob, param);
2129	if (param.Probability &&
2130	(param.SvmType == SvmType.C_SVC \|\|
2131	param.SvmType == SvmType.NU_SVC))
2132	{
2133	for (j = begin; j < end; j++)
2134	{
2135	double[] prob_estimates = new double[svm_get_nr_class(submodel)];
2136	target[perm[j]] = svm_predict_probability(submodel, prob.X[perm[j]], prob_estimates);
2137	confidence[perm[j]] = new Dictionary<int, double>();
2138	for (int label = 0; label < prob_estimates.Length; label++)
2139	confidence[perm[j]][submodel.ClassLabels[label]] = prob_estimates[label];
2140
2141	}
2142	}
2143	else
2144	for (j = begin; j < end; j++)
2145	target[perm[j]] = svm_predict(submodel, prob.X[perm[j]]);
2146	}
2147	}
2148
2149	public static SvmType svm_get_svm_type(Model model)
2150	{
2151	return model.Parameter.SvmType;
2152	}
2153
2154	public static int svm_get_nr_class(Model model)
2155	{
2156	return model.NumberOfClasses;
2157	}
2158
2159	public static void svm_get_labels(Model model, int[] label)
2160	{
2161	if (model.ClassLabels != null)
2162	for (int i = 0; i < model.NumberOfClasses; i++)
2163	label[i] = model.ClassLabels[i];
2164	}
2165
2166	public static double svm_get_svr_probability(Model model)
2167	{
2168	if ((model.Parameter.SvmType == SvmType.EPSILON_SVR \|\| model.Parameter.SvmType == SvmType.NU_SVR) &&
2169	model.PairwiseProbabilityA != null)
2170	return model.PairwiseProbabilityA[0];
2171	else
2172	{
2173	Debug.Write("Model doesn't contain information for SVR probability inference\n");
2174	return 0;
2175	}
2176	}
2177
2178	public static void svm_predict_values(Model model, Node[] x, double[] dec_values)
2179	{
2180	if (model.Parameter.SvmType == SvmType.ONE_CLASS \|\|
2181	model.Parameter.SvmType == SvmType.EPSILON_SVR \|\|
2182	model.Parameter.SvmType == SvmType.NU_SVR)
2183	{
2184	double[] sv_coef = model.SupportVectorCoefficients[0];
2185	double sum = 0;
2186	for (int i = 0; i < model.SupportVectorCount; i++)
2187	sum += sv_coef[i] * Kernel.k_function(x, model.SupportVectors[i], model.Parameter);
2188	sum -= model.Rho[0];
2189	dec_values[0] = sum;
2190	}
2191	else
2192	{
2193	int i;
2194	int nr_class = model.NumberOfClasses;
2195	int l = model.SupportVectorCount;
2196
2197	double[] kvalue = new double[l];
2198	for (i = 0; i < l; i++)
2199	kvalue[i] = Kernel.k_function(x, model.SupportVectors[i], model.Parameter);
2200
2201	int[] start = new int[nr_class];
2202	start[0] = 0;
2203	for (i = 1; i < nr_class; i++)
2204	start[i] = start[i - 1] + model.NumberOfSVPerClass[i - 1];
2205
2206	int p = 0;
2207	for (i = 0; i < nr_class; i++)
2208	for (int j = i + 1; j < nr_class; j++)
2209	{
2210	double sum = 0;
2211	int si = start[i];
2212	int sj = start[j];
2213	int ci = model.NumberOfSVPerClass[i];
2214	int cj = model.NumberOfSVPerClass[j];
2215
2216	int k;
2217	double[] coef1 = model.SupportVectorCoefficients[j - 1];
2218	double[] coef2 = model.SupportVectorCoefficients[i];
2219	for (k = 0; k < ci; k++)
2220	sum += coef1[si + k] * kvalue[si + k];
2221	for (k = 0; k < cj; k++)
2222	sum += coef2[sj + k] * kvalue[sj + k];
2223	sum -= model.Rho[p];
2224	dec_values[p] = sum;
2225	p++;
2226	}
2227	}
2228	}
2229
2230	public static double svm_predict(Model model, Node[] x)
2231	{
2232	if (model.Parameter.SvmType == SvmType.ONE_CLASS \|\|
2233	model.Parameter.SvmType == SvmType.EPSILON_SVR \|\|
2234	model.Parameter.SvmType == SvmType.NU_SVR)
2235	{
2236	double[] res = new double[1];
2237	svm_predict_values(model, x, res);
2238
2239	if (model.Parameter.SvmType == SvmType.ONE_CLASS)
2240	return (res[0] > 0) ? 1 : -1;
2241	else
2242	return res[0];
2243	}
2244	else
2245	{
2246	int i;
2247	int nr_class = model.NumberOfClasses;
2248	double[] dec_values = new double[nr_class * (nr_class - 1) / 2];
2249	svm_predict_values(model, x, dec_values);
2250
2251	int[] vote = new int[nr_class];
2252	for (i = 0; i < nr_class; i++)
2253	vote[i] = 0;
2254	int pos = 0;
2255	for (i = 0; i < nr_class; i++)
2256	for (int j = i + 1; j < nr_class; j++)
2257	{
2258	if (dec_values[pos++] > 0)
2259	++vote[i];
2260	else
2261	++vote[j];
2262	}
2263
2264	int vote_max_idx = 0;
2265	for (i = 1; i < nr_class; i++)
2266	if (vote[i] > vote[vote_max_idx])
2267	vote_max_idx = i;
2268	return model.ClassLabels[vote_max_idx];
2269	}
2270	}
2271
2272	public static double svm_predict_probability(Model model, Node[] x, double[] prob_estimates)
2273	{
2274	if ((model.Parameter.SvmType == SvmType.C_SVC \|\| model.Parameter.SvmType == SvmType.NU_SVC) &&
2275	model.PairwiseProbabilityA!=null && model.PairwiseProbabilityB!=null)
2276	{
2277	int i;
2278	int nr_class = model.NumberOfClasses;
2279	double[] dec_values = new double[nr_class*(nr_class-1)/2];
2280	svm_predict_values(model, x, dec_values);
2281
2282	double min_prob=1e-7;
2283	double[,] pairwise_prob=new double[nr_class,nr_class];
2284
2285	int k=0;
2286	for(i=0;i<nr_class;i++)
2287	for(int j=i+1;j<nr_class;j++)
2288	{
2289	pairwise_prob[i,j]=Math.Min(Math.Max(sigmoid_predict(dec_values[k],model.PairwiseProbabilityA[k],model.PairwiseProbabilityB[k]),min_prob),1-min_prob);
2290	pairwise_prob[j,i]=1-pairwise_prob[i,j];
2291	k++;
2292	}
2293	multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2294
2295	int prob_max_idx = 0;
2296	for(i=1;i<nr_class;i++)
2297	if(prob_estimates[i] > prob_estimates[prob_max_idx])
2298	prob_max_idx = i;
2299	return model.ClassLabels[prob_max_idx];
2300	}
2301	else
2302	return svm_predict(model, x);
2303	}
2304
2305	private static double atof(string s)
2306	{
2307	return double.Parse(s);
2308	}
2309
2310	private static int atoi(string s)
2311	{
2312	return int.Parse(s);
2313	}
2314
2315	public static string svm_check_parameter(Problem prob, Parameter param)
2316	{
2317	// svm_type
2318
2319	SvmType svm_type = param.SvmType;
2320	if (svm_type != SvmType.C_SVC &&
2321	svm_type != SvmType.NU_SVC &&
2322	svm_type != SvmType.ONE_CLASS &&
2323	svm_type != SvmType.EPSILON_SVR &&
2324	svm_type != SvmType.NU_SVR)
2325	return "unknown svm type";
2326
2327	// kernel_type, degree
2328
2329	KernelType kernel_type = param.KernelType;
2330	if (kernel_type != KernelType.LINEAR &&
2331	kernel_type != KernelType.POLY &&
2332	kernel_type != KernelType.RBF &&
2333	kernel_type != KernelType.SIGMOID &&
2334	kernel_type != KernelType.PRECOMPUTED)
2335	return "unknown kernel type";
2336
2337	if (param.Degree < 0)
2338	return "degree of polynomial kernel < 0";
2339
2340	// cache_size,eps,C,nu,p,shrinking
2341
2342	if (param.CacheSize <= 0)
2343	return "cache_size <= 0";
2344
2345	if (param.EPS <= 0)
2346	return "eps <= 0";
2347
2348	if (svm_type == SvmType.C_SVC \|\|
2349	svm_type == SvmType.EPSILON_SVR \|\|
2350	svm_type == SvmType.NU_SVR)
2351	if (param.C <= 0)
2352	return "C <= 0";
2353
2354	if (svm_type == SvmType.NU_SVC \|\|
2355	svm_type == SvmType.ONE_CLASS \|\|
2356	svm_type == SvmType.NU_SVR)
2357	if (param.Nu <= 0 \|\| param.Nu > 1)
2358	return "nu <= 0 or nu > 1";
2359
2360	if (svm_type == SvmType.EPSILON_SVR)
2361	if (param.P < 0)
2362	return "p < 0";
2363
2364	if (param.Probability && svm_type == SvmType.ONE_CLASS)
2365	return "one-class SVM probability output not supported yet";
2366
2367	// check whether nu-svc is feasible
2368
2369	if (svm_type == SvmType.NU_SVC)
2370	{
2371	int l = prob.Count;
2372	int max_nr_class = 16;
2373	int nr_class = 0;
2374	int[] label = new int[max_nr_class];
2375	int[] count = new int[max_nr_class];
2376
2377	int i;
2378	for (i = 0; i < l; i++)
2379	{
2380	int this_label = (int)prob.Y[i];
2381	int j;
2382	for (j = 0; j < nr_class; j++)
2383	if (this_label == label[j])
2384	{
2385	++count[j];
2386	break;
2387	}
2388
2389	if (j == nr_class)
2390	{
2391	if (nr_class == max_nr_class)
2392	{
2393	max_nr_class *= 2;
2394	int[] new_data = new int[max_nr_class];
2395	Array.Copy(label, 0, new_data, 0, label.Length);
2396	label = new_data;
2397
2398	new_data = new int[max_nr_class];
2399	Array.Copy(count, 0, new_data, 0, count.Length);
2400	count = new_data;
2401	}
2402	label[nr_class] = this_label;
2403	count[nr_class] = 1;
2404	++nr_class;
2405	}
2406	}
2407
2408	for (i = 0; i < nr_class; i++)
2409	{
2410	int n1 = count[i];
2411	for (int j = i + 1; j < nr_class; j++)
2412	{
2413	int n2 = count[j];
2414	if (param.Nu * (n1 + n2) / 2 > Math.Min(n1, n2))
2415	return "specified nu is infeasible";
2416	}
2417	}
2418	}
2419
2420	return null;
2421	}
2422
2423	public static int svm_check_probability_model(Model model)
2424	{
2425	if (((model.Parameter.SvmType == SvmType.C_SVC \|\| model.Parameter.SvmType == SvmType.NU_SVC) &&
2426	model.PairwiseProbabilityA != null && model.PairwiseProbabilityB != null) \|\|
2427	((model.Parameter.SvmType == SvmType.EPSILON_SVR \|\| model.Parameter.SvmType == SvmType.NU_SVR) &&
2428	model.PairwiseProbabilityA != null))
2429	return 1;
2430	else
2431	return 0;
2432	}
2433	}
2434
2435	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences