Context Navigation

source: trunk/sources/LibSVM/Solver.cs @ 2160

Visit:

Last change on this file since 2160 was 1819, checked in by mkommend, 15 years ago
created new project for LibSVM source files (ticket #619)
File size: 84.9 KB

Rev	Line
[1806]	1	/*
	2	* SVM.NET Library
	3	* Copyright (C) 2008 Matthew Johnson
	4	*
	5	* This program is free software: you can redistribute it and/or modify
	6	* it under the terms of the GNU General Public License as published by
	7	* the Free Software Foundation, either version 3 of the License, or
	8	* (at your option) any later version.
	9	*
	10	* This program is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	* GNU General Public License for more details.
	14	*
	15	* You should have received a copy of the GNU General Public License
	16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	17	*/
	18
	19
	20	using System;
	21	using System.Collections.Generic;
	22	using System.Diagnostics;
	23
	24	namespace SVM
	25	{
	26	//
	27	// Kernel evaluation
	28	//
	29	// the static method k_function is for doing single kernel evaluation
	30	// the constructor of Kernel prepares to calculate the l*l kernel matrix
	31	// the member function get_Q is for getting one column from the Q Matrix
	32	//
	33	internal abstract class QMatrix
	34	{
	35	public abstract float[] get_Q(int column, int len);
	36	public abstract float[] get_QD();
	37	public abstract void swap_index(int i, int j);
	38	}
	39
	40	internal abstract class Kernel : QMatrix
	41	{
	42	private Node[][] _x;
	43	private double[] _x_square;
	44
	45	// Parameter
	46	private KernelType kernel_type;
	47	private int degree;
	48	private double gamma;
	49	private double coef0;
	50
	51	public override void swap_index(int i, int j)
	52	{
	53	do { Node[] _ = _x[i]; _x[i] = _x[j]; _x[j] = _; } while (false);
	54	if (_x_square != null) do { double _ = _x_square[i]; _x_square[i] = _x_square[j]; _x_square[j] = _; } while (false);
	55	}
	56
	57	private static double powi(double baseValue, int times)
	58	{
	59	double tmp = baseValue, ret = 1.0;
	60
	61	for (int t = times; t > 0; t /= 2)
	62	{
	63	if (t % 2 == 1) ret *= tmp;
	64	tmp = tmp * tmp;
	65	}
	66	return ret;
	67	}
	68
	69	private static double tanh(double x)
	70	{
	71	double e = Math.Exp(x);
	72	return 1.0 - 2.0 / (e * e + 1);
	73	}
	74
	75	public double kernel_function(int i, int j)
	76	{
	77	switch (kernel_type)
	78	{
	79	case KernelType.LINEAR:
	80	return dot(_x[i], _x[j]);
	81	case KernelType.POLY:
	82	return powi(gamma * dot(_x[i], _x[j]) + coef0, degree);
	83	case KernelType.RBF:
	84	return Math.Exp(-gamma * (_x_square[i] + _x_square[j] - 2 * dot(_x[i], _x[j])));
	85	case KernelType.SIGMOID:
	86	return tanh(gamma * dot(_x[i], _x[j]) + coef0);
	87	case KernelType.PRECOMPUTED:
	88	return _x[i][(int)(_x[j][0].Value)].Value;
	89	default:
	90	return 0;
	91	}
	92	}
	93
	94	public Kernel(int l, Node[][] x_, Parameter param)
	95	{
	96	this.kernel_type = param.KernelType;
	97	this.degree = param.Degree;
	98	this.gamma = param.Gamma;
	99	this.coef0 = param.Coefficient0;
	100
	101	_x = (Node[][])x_.Clone();
	102
	103	if (kernel_type == KernelType.RBF)
	104	{
	105	_x_square = new double[l];
	106	for (int i = 0; i < l; i++)
	107	_x_square[i] = dot(_x[i], _x[i]);
	108	}
	109	else _x_square = null;
	110	}
	111
	112	public static double dot(Node[] x, Node[] y)
	113	{
	114	double sum = 0;
	115	int xlen = x.Length;
	116	int ylen = y.Length;
	117	int i = 0;
	118	int j = 0;
	119	while (i < xlen && j < ylen)
	120	{
	121	if (x[i].Index == y[j].Index)
	122	sum += x[i++].Value * y[j++].Value;
	123	else
	124	{
	125	if (x[i].Index > y[j].Index)
	126	++j;
	127	else
	128	++i;
	129	}
	130	}
	131	return sum;
	132	}
	133
	134	public static double k_function(Node[] x, Node[] y, Parameter param)
	135	{
	136	switch (param.KernelType)
	137	{
	138	case KernelType.LINEAR:
	139	return dot(x, y);
	140	case KernelType.POLY:
	141	return powi(param.Gamma * dot(x, y) + param.Coefficient0, param.Degree);
	142	case KernelType.RBF:
	143	{
	144	double sum = 0;
	145	int xlen = x.Length;
	146	int ylen = y.Length;
	147	int i = 0;
	148	int j = 0;
	149	while (i < xlen && j < ylen)
	150	{
	151	if (x[i].Index == y[j].Index)
	152	{
	153	double d = x[i++].Value - y[j++].Value;
	154	sum += d * d;
	155	}
	156	else if (x[i].Index > y[j].Index)
	157	{
	158	sum += y[j].Value * y[j].Value;
	159	++j;
	160	}
	161	else
	162	{
	163	sum += x[i].Value * x[i].Value;
	164	++i;
	165	}
	166	}
	167
	168	while (i < xlen)
	169	{
	170	sum += x[i].Value * x[i].Value;
	171	++i;
	172	}
	173
	174	while (j < ylen)
	175	{
	176	sum += y[j].Value * y[j].Value;
	177	++j;
	178	}
	179
	180	return Math.Exp(-param.Gamma * sum);
	181	}
	182	case KernelType.SIGMOID:
	183	return tanh(param.Gamma * dot(x, y) + param.Coefficient0);
	184	case KernelType.PRECOMPUTED:
	185	return x[(int)(y[0].Value)].Value;
	186	default:
	187	return 0;
	188	}
	189	}
	190	}
	191
	192	// An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
	193	// Solves:
	194	//
	195	// min 0.5(\alpha^T Q \alpha) + p^T \alpha
	196	//
	197	// y^T \alpha = \delta
	198	// y_i = +1 or -1
	199	// 0 <= alpha_i <= Cp for y_i = 1
	200	// 0 <= alpha_i <= Cn for y_i = -1
	201	//
	202	// Given:
	203	//
	204	// Q, p, y, Cp, Cn, and an initial feasible point \alpha
	205	// l is the size of vectors and matrices
	206	// eps is the stopping tolerance
	207	//
	208	// solution will be put in \alpha, objective value will be put in obj
	209	//
	210	internal class Solver
	211	{
	212	protected int active_size;
	213	protected short[] y;
	214	protected double[] G; // gradient of objective function
	215	protected const byte LOWER_BOUND = 0;
	216	protected const byte UPPER_BOUND = 1;
	217	protected const byte FREE = 2;
	218	protected byte[] alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
	219	protected double[] alpha;
	220	protected QMatrix Q;
	221	protected float[] QD;
	222	protected double eps;
	223	protected double Cp, Cn;
	224	protected double[] p;
	225	protected int[] active_set;
	226	protected double[] G_bar; // gradient, if we treat free variables as 0
	227	protected int l;
	228	protected bool unshrinked; // XXX
	229
	230	protected const double INF = double.PositiveInfinity;
	231
	232	protected double get_C(int i)
	233	{
	234	return (y[i] > 0) ? Cp : Cn;
	235	}
	236	protected void update_alpha_status(int i)
	237	{
	238	if (alpha[i] >= get_C(i))
	239	alpha_status[i] = UPPER_BOUND;
	240	else if (alpha[i] <= 0)
	241	alpha_status[i] = LOWER_BOUND;
	242	else alpha_status[i] = FREE;
	243	}
	244	protected bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
	245	protected bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
	246	protected bool is_free(int i) { return alpha_status[i] == FREE; }
	247
	248	// java: information about solution except alpha,
	249	// because we cannot return multiple values otherwise...
	250	internal class SolutionInfo
	251	{
	252	public double obj;
	253	public double rho;
	254	public double upper_bound_p;
	255	public double upper_bound_n;
	256	public double r; // for Solver_NU
	257	}
	258
	259	protected void swap_index(int i, int j)
	260	{
	261	Q.swap_index(i, j);
	262	do { short _ = y[i]; y[i] = y[j]; y[j] = _; } while (false);
	263	do { double _ = G[i]; G[i] = G[j]; G[j] = _; } while (false);
	264	do { byte _ = alpha_status[i]; alpha_status[i] = alpha_status[j]; alpha_status[j] = _; } while (false);
	265	do { double _ = alpha[i]; alpha[i] = alpha[j]; alpha[j] = _; } while (false);
	266	do { double _ = p[i]; p[i] = p[j]; p[j] = _; } while (false);
	267	do { int _ = active_set[i]; active_set[i] = active_set[j]; active_set[j] = _; } while (false);
	268	do { double _ = G_bar[i]; G_bar[i] = G_bar[j]; G_bar[j] = _; } while (false);
	269	}
	270
	271	protected void reconstruct_gradient()
	272	{
	273	// reconstruct inactive elements of G from G_bar and free variables
	274
	275	if (active_size == l) return;
	276
	277	int i;
	278	for (i = active_size; i < l; i++)
	279	G[i] = G_bar[i] + p[i];
	280
	281	for (i = 0; i < active_size; i++)
	282	if (is_free(i))
	283	{
	284	float[] Q_i = Q.get_Q(i, l);
	285	double alpha_i = alpha[i];
	286	for (int j = active_size; j < l; j++)
	287	G[j] += alpha_i * Q_i[j];
	288	}
	289	}
	290
	291	public virtual void Solve(int l, QMatrix Q, double[] p_, short[] y_,
	292	double[] alpha_, double Cp, double Cn, double eps, SolutionInfo si, bool shrinking)
	293	{
	294	this.l = l;
	295	this.Q = Q;
	296	QD = Q.get_QD();
	297	p = (double[])p_.Clone();
	298	y = (short[])y_.Clone();
	299	alpha = (double[])alpha_.Clone();
	300	this.Cp = Cp;
	301	this.Cn = Cn;
	302	this.eps = eps;
	303	this.unshrinked = false;
	304
	305	// initialize alpha_status
	306	{
	307	alpha_status = new byte[l];
	308	for (int i = 0; i < l; i++)
	309	update_alpha_status(i);
	310	}
	311
	312	// initialize active set (for shrinking)
	313	{
	314	active_set = new int[l];
	315	for (int i = 0; i < l; i++)
	316	active_set[i] = i;
	317	active_size = l;
	318	}
	319
	320	// initialize gradient
	321	{
	322	G = new double[l];
	323	G_bar = new double[l];
	324	int i;
	325	for (i = 0; i < l; i++)
	326	{
	327	G[i] = p[i];
	328	G_bar[i] = 0;
	329	}
	330	for (i = 0; i < l; i++)
	331	if (!is_lower_bound(i))
	332	{
	333	float[] Q_i = Q.get_Q(i, l);
	334	double alpha_i = alpha[i];
	335	int j;
	336	for (j = 0; j < l; j++)
	337	G[j] += alpha_i * Q_i[j];
	338	if (is_upper_bound(i))
	339	for (j = 0; j < l; j++)
	340	G_bar[j] += get_C(i) * Q_i[j];
	341	}
	342	}
	343
	344	// optimization step
	345
	346	int iter = 0;
	347	int counter = Math.Min(l, 1000) + 1;
	348	int[] working_set = new int[2];
	349
	350	while (true)
	351	{
	352	// show progress and do shrinking
	353
	354	if (--counter == 0)
	355	{
	356	counter = Math.Min(l, 1000);
	357	if (shrinking) do_shrinking();
	358	Debug.Write(".");
	359	}
	360
	361	if (select_working_set(working_set) != 0)
	362	{
	363	// reconstruct the whole gradient
	364	reconstruct_gradient();
	365	// reset active set size and check
	366	active_size = l;
	367	Debug.Write("*");
	368	if (select_working_set(working_set) != 0)
	369	break;
	370	else
	371	counter = 1; // do shrinking next iteration
	372	}
	373
	374	int i = working_set[0];
	375	int j = working_set[1];
	376
	377	++iter;
	378
	379	// update alpha[i] and alpha[j], handle bounds carefully
	380
	381	float[] Q_i = Q.get_Q(i, active_size);
	382	float[] Q_j = Q.get_Q(j, active_size);
	383
	384	double C_i = get_C(i);
	385	double C_j = get_C(j);
	386
	387	double old_alpha_i = alpha[i];
	388	double old_alpha_j = alpha[j];
	389
	390	if (y[i] != y[j])
	391	{
	392	double quad_coef = Q_i[i] + Q_j[j] + 2 * Q_i[j];
	393	if (quad_coef <= 0)
	394	quad_coef = 1e-12;
	395	double delta = (-G[i] - G[j]) / quad_coef;
	396	double diff = alpha[i] - alpha[j];
	397	alpha[i] += delta;
	398	alpha[j] += delta;
	399
	400	if (diff > 0)
	401	{
	402	if (alpha[j] < 0)
	403	{
	404	alpha[j] = 0;
	405	alpha[i] = diff;
	406	}
	407	}
	408	else
	409	{
	410	if (alpha[i] < 0)
	411	{
	412	alpha[i] = 0;
	413	alpha[j] = -diff;
	414	}
	415	}
	416	if (diff > C_i - C_j)
	417	{
	418	if (alpha[i] > C_i)
	419	{
	420	alpha[i] = C_i;
	421	alpha[j] = C_i - diff;
	422	}
	423	}
	424	else
	425	{
	426	if (alpha[j] > C_j)
	427	{
	428	alpha[j] = C_j;
	429	alpha[i] = C_j + diff;
	430	}
	431	}
	432	}
	433	else
	434	{
	435	double quad_coef = Q_i[i] + Q_j[j] - 2 * Q_i[j];
	436	if (quad_coef <= 0)
	437	quad_coef = 1e-12;
	438	double delta = (G[i] - G[j]) / quad_coef;
	439	double sum = alpha[i] + alpha[j];
	440	alpha[i] -= delta;
	441	alpha[j] += delta;
	442
	443	if (sum > C_i)
	444	{
	445	if (alpha[i] > C_i)
	446	{
	447	alpha[i] = C_i;
	448	alpha[j] = sum - C_i;
	449	}
	450	}
	451	else
	452	{
	453	if (alpha[j] < 0)
	454	{
	455	alpha[j] = 0;
	456	alpha[i] = sum;
	457	}
	458	}
	459	if (sum > C_j)
	460	{
	461	if (alpha[j] > C_j)
	462	{
	463	alpha[j] = C_j;
	464	alpha[i] = sum - C_j;
	465	}
	466	}
	467	else
	468	{
	469	if (alpha[i] < 0)
	470	{
	471	alpha[i] = 0;
	472	alpha[j] = sum;
	473	}
	474	}
	475	}
	476
	477	// update G
	478
	479	double delta_alpha_i = alpha[i] - old_alpha_i;
	480	double delta_alpha_j = alpha[j] - old_alpha_j;
	481
	482	for (int k = 0; k < active_size; k++)
	483	{
	484	G[k] += Q_i[k] * delta_alpha_i + Q_j[k] * delta_alpha_j;
	485	}
	486
	487	// update alpha_status and G_bar
	488
	489	{
	490	bool ui = is_upper_bound(i);
	491	bool uj = is_upper_bound(j);
	492	update_alpha_status(i);
	493	update_alpha_status(j);
	494	int k;
	495	if (ui != is_upper_bound(i))
	496	{
	497	Q_i = Q.get_Q(i, l);
	498	if (ui)
	499	for (k = 0; k < l; k++)
	500	G_bar[k] -= C_i * Q_i[k];
	501	else
	502	for (k = 0; k < l; k++)
	503	G_bar[k] += C_i * Q_i[k];
	504	}
	505
	506	if (uj != is_upper_bound(j))
	507	{
	508	Q_j = Q.get_Q(j, l);
	509	if (uj)
	510	for (k = 0; k < l; k++)
	511	G_bar[k] -= C_j * Q_j[k];
	512	else
	513	for (k = 0; k < l; k++)
	514	G_bar[k] += C_j * Q_j[k];
	515	}
	516	}
	517
	518	}
	519
	520	// calculate rho
	521
	522	si.rho = calculate_rho();
	523
	524	// calculate objective value
	525	{
	526	double v = 0;
	527	int i;
	528	for (i = 0; i < l; i++)
	529	v += alpha[i] * (G[i] + p[i]);
	530
	531	si.obj = v / 2;
	532	}
	533
	534	// put back the solution
	535	{
	536	for (int i = 0; i < l; i++)
	537	alpha_[active_set[i]] = alpha[i];
	538	}
	539
	540	si.upper_bound_p = Cp;
	541	si.upper_bound_n = Cn;
	542
	543	Debug.Write("\noptimization finished, #iter = " + iter + "\n");
	544	}
	545
	546	// return 1 if already optimal, return 0 otherwise
	547	protected virtual int select_working_set(int[] working_set)
	548	{
	549	// return i,j such that
	550	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
	551	// j: mimimizes the decrease of obj value
	552	// (if quadratic coefficeint <= 0, replace it with tau)
	553	// -y_jgrad(f)_j < -y_igrad(f)_i, j in I_low(\alpha)
	554
	555	double Gmax = -INF;
	556	double Gmax2 = -INF;
	557	int Gmax_idx = -1;
	558	int Gmin_idx = -1;
	559	double obj_diff_min = INF;
	560
	561	for (int t = 0; t < active_size; t++)
	562	if (y[t] == +1)
	563	{
	564	if (!is_upper_bound(t))
	565	if (-G[t] >= Gmax)
	566	{
	567	Gmax = -G[t];
	568	Gmax_idx = t;
	569	}
	570	}
	571	else
	572	{
	573	if (!is_lower_bound(t))
	574	if (G[t] >= Gmax)
	575	{
	576	Gmax = G[t];
	577	Gmax_idx = t;
	578	}
	579	}
	580
	581	int i = Gmax_idx;
	582	float[] Q_i = null;
	583	if (i != -1) // null Q_i not accessed: Gmax=-INF if i=-1
	584	Q_i = Q.get_Q(i, active_size);
	585
	586	for (int j = 0; j < active_size; j++)
	587	{
	588	if (y[j] == +1)
	589	{
	590	if (!is_lower_bound(j))
	591	{
	592	double grad_diff = Gmax + G[j];
	593	if (G[j] >= Gmax2)
	594	Gmax2 = G[j];
	595	if (grad_diff > 0)
	596	{
	597	double obj_diff;
	598	double quad_coef = Q_i[i] + QD[j] - 2 * y[i] * Q_i[j];
	599	if (quad_coef > 0)
	600	obj_diff = -(grad_diff * grad_diff) / quad_coef;
	601	else
	602	obj_diff = -(grad_diff * grad_diff) / 1e-12;
	603
	604	if (obj_diff <= obj_diff_min)
	605	{
	606	Gmin_idx = j;
	607	obj_diff_min = obj_diff;
	608	}
	609	}
	610	}
	611	}
	612	else
	613	{
	614	if (!is_upper_bound(j))
	615	{
	616	double grad_diff = Gmax - G[j];
	617	if (-G[j] >= Gmax2)
	618	Gmax2 = -G[j];
	619	if (grad_diff > 0)
	620	{
	621	double obj_diff;
	622	double quad_coef = Q_i[i] + QD[j] + 2 * y[i] * Q_i[j];
	623	if (quad_coef > 0)
	624	obj_diff = -(grad_diff * grad_diff) / quad_coef;
	625	else
	626	obj_diff = -(grad_diff * grad_diff) / 1e-12;
	627
	628	if (obj_diff <= obj_diff_min)
	629	{
	630	Gmin_idx = j;
	631	obj_diff_min = obj_diff;
	632	}
	633	}
	634	}
	635	}
	636	}
	637
	638	if (Gmax + Gmax2 < eps)
	639	return 1;
	640
	641	working_set[0] = Gmax_idx;
	642	working_set[1] = Gmin_idx;
	643	return 0;
	644	}
	645
	646	private bool be_shrunken(int i, double Gmax1, double Gmax2)
	647	{
	648	if (is_upper_bound(i))
	649	{
	650	if (y[i] == +1)
	651	return (-G[i] > Gmax1);
	652	else
	653	return (-G[i] > Gmax2);
	654	}
	655	else if (is_lower_bound(i))
	656	{
	657	if (y[i] == +1)
	658	return (G[i] > Gmax2);
	659	else
	660	return (G[i] > Gmax1);
	661	}
	662	else
	663	return (false);
	664	}
	665
	666	protected virtual void do_shrinking()
	667	{
	668	int i;
	669	double Gmax1 = -INF; // max { -y_i * grad(f)_i \| i in I_up(\alpha) }
	670	double Gmax2 = -INF; // max { y_i * grad(f)_i \| i in I_low(\alpha) }
	671
	672	// find maximal violating pair first
	673	for (i = 0; i < active_size; i++)
	674	{
	675	if (y[i] == +1)
	676	{
	677	if (!is_upper_bound(i))
	678	{
	679	if (-G[i] >= Gmax1)
	680	Gmax1 = -G[i];
	681	}
	682	if (!is_lower_bound(i))
	683	{
	684	if (G[i] >= Gmax2)
	685	Gmax2 = G[i];
	686	}
	687	}
	688	else
	689	{
	690	if (!is_upper_bound(i))
	691	{
	692	if (-G[i] >= Gmax2)
	693	Gmax2 = -G[i];
	694	}
	695	if (!is_lower_bound(i))
	696	{
	697	if (G[i] >= Gmax1)
	698	Gmax1 = G[i];
	699	}
	700	}
	701	}
	702
	703	// shrink
	704
	705	for (i = 0; i < active_size; i++)
	706	if (be_shrunken(i, Gmax1, Gmax2))
	707	{
	708	active_size--;
	709	while (active_size > i)
	710	{
	711	if (!be_shrunken(active_size, Gmax1, Gmax2))
	712	{
	713	swap_index(i, active_size);
	714	break;
	715	}
	716	active_size--;
	717	}
	718	}
	719
	720	// unshrink, check all variables again before sealed iterations
	721
	722	if (unshrinked \|\| Gmax1 + Gmax2 > eps * 10) return;
	723
	724	unshrinked = true;
	725	reconstruct_gradient();
	726
	727	for (i = l - 1; i >= active_size; i--)
	728	if (!be_shrunken(i, Gmax1, Gmax2))
	729	{
	730	while (active_size < i)
	731	{
	732	if (be_shrunken(active_size, Gmax1, Gmax2))
	733	{
	734	swap_index(i, active_size);
	735	break;
	736	}
	737	active_size++;
	738	}
	739	active_size++;
	740	}
	741	}
	742
	743	protected virtual double calculate_rho()
	744	{
	745	double r;
	746	int nr_free = 0;
	747	double ub = INF, lb = -INF, sum_free = 0;
	748	for (int i = 0; i < active_size; i++)
	749	{
	750	double yG = y[i] * G[i];
	751
	752	if (is_lower_bound(i))
	753	{
	754	if (y[i] > 0)
	755	ub = Math.Min(ub, yG);
	756	else
	757	lb = Math.Max(lb, yG);
	758	}
	759	else if (is_upper_bound(i))
	760	{
	761	if (y[i] < 0)
	762	ub = Math.Min(ub, yG);
	763	else
	764	lb = Math.Max(lb, yG);
	765	}
	766	else
	767	{
	768	++nr_free;
	769	sum_free += yG;
	770	}
	771	}
	772
	773	if (nr_free > 0)
	774	r = sum_free / nr_free;
	775	else
	776	r = (ub + lb) / 2;
	777
	778	return r;
	779	}
	780
	781	}
	782
	783	//
	784	// Solver for nu-svm classification and regression
	785	//
	786	// additional constraint: e^T \alpha = constant
	787	//
	788	sealed class Solver_NU : Solver
	789	{
	790	private SolutionInfo si;
	791
	792	public override void Solve(int l, QMatrix Q, double[] p, short[] y,
	793	double[] alpha, double Cp, double Cn, double eps,
	794	SolutionInfo si, bool shrinking)
	795	{
	796	this.si = si;
	797	base.Solve(l, Q, p, y, alpha, Cp, Cn, eps, si, shrinking);
	798	}
	799
	800	// return 1 if already optimal, return 0 otherwise
	801	protected override int select_working_set(int[] working_set)
	802	{
	803	// return i,j such that y_i = y_j and
	804	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
	805	// j: minimizes the decrease of obj value
	806	// (if quadratic coefficeint <= 0, replace it with tau)
	807	// -y_jgrad(f)_j < -y_igrad(f)_i, j in I_low(\alpha)
	808
	809	double Gmaxp = -INF;
	810	double Gmaxp2 = -INF;
	811	int Gmaxp_idx = -1;
	812
	813	double Gmaxn = -INF;
	814	double Gmaxn2 = -INF;
	815	int Gmaxn_idx = -1;
	816
	817	int Gmin_idx = -1;
	818	double obj_diff_min = INF;
	819
	820	for (int t = 0; t < active_size; t++)
	821	if (y[t] == +1)
	822	{
	823	if (!is_upper_bound(t))
	824	if (-G[t] >= Gmaxp)
	825	{
	826	Gmaxp = -G[t];
	827	Gmaxp_idx = t;
	828	}
	829	}
	830	else
	831	{
	832	if (!is_lower_bound(t))
	833	if (G[t] >= Gmaxn)
	834	{
	835	Gmaxn = G[t];
	836	Gmaxn_idx = t;
	837	}
	838	}
	839
	840	int ip = Gmaxp_idx;
	841	int iN = Gmaxn_idx;
	842	float[] Q_ip = null;
	843	float[] Q_in = null;
	844	if (ip != -1) // null Q_ip not accessed: Gmaxp=-INF if ip=-1
	845	Q_ip = Q.get_Q(ip, active_size);
	846	if (iN != -1)
	847	Q_in = Q.get_Q(iN, active_size);
	848
	849	for (int j = 0; j < active_size; j++)
	850	{
	851	if (y[j] == +1)
	852	{
	853	if (!is_lower_bound(j))
	854	{
	855	double grad_diff = Gmaxp + G[j];
	856	if (G[j] >= Gmaxp2)
	857	Gmaxp2 = G[j];
	858	if (grad_diff > 0)
	859	{
	860	double obj_diff;
	861	double quad_coef = Q_ip[ip] + QD[j] - 2 * Q_ip[j];
	862	if (quad_coef > 0)
	863	obj_diff = -(grad_diff * grad_diff) / quad_coef;
	864	else
	865	obj_diff = -(grad_diff * grad_diff) / 1e-12;
	866
	867	if (obj_diff <= obj_diff_min)
	868	{
	869	Gmin_idx = j;
	870	obj_diff_min = obj_diff;
	871	}
	872	}
	873	}
	874	}
	875	else
	876	{
	877	if (!is_upper_bound(j))
	878	{
	879	double grad_diff = Gmaxn - G[j];
	880	if (-G[j] >= Gmaxn2)
	881	Gmaxn2 = -G[j];
	882	if (grad_diff > 0)
	883	{
	884	double obj_diff;
	885	double quad_coef = Q_in[iN] + QD[j] - 2 * Q_in[j];
	886	if (quad_coef > 0)
	887	obj_diff = -(grad_diff * grad_diff) / quad_coef;
	888	else
	889	obj_diff = -(grad_diff * grad_diff) / 1e-12;
	890
	891	if (obj_diff <= obj_diff_min)
	892	{
	893	Gmin_idx = j;
	894	obj_diff_min = obj_diff;
	895	}
	896	}
	897	}
	898	}
	899	}
	900
	901	if (Math.Max(Gmaxp + Gmaxp2, Gmaxn + Gmaxn2) < eps)
	902	return 1;
	903
	904	if (y[Gmin_idx] == +1)
	905	working_set[0] = Gmaxp_idx;
	906	else
	907	working_set[0] = Gmaxn_idx;
	908	working_set[1] = Gmin_idx;
	909
	910	return 0;
	911	}
	912
	913	private bool be_shrunken(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
	914	{
	915	if (is_upper_bound(i))
	916	{
	917	if (y[i] == +1)
	918	return (-G[i] > Gmax1);
	919	else
	920	return (-G[i] > Gmax4);
	921	}
	922	else if (is_lower_bound(i))
	923	{
	924	if (y[i] == +1)
	925	return (G[i] > Gmax2);
	926	else
	927	return (G[i] > Gmax3);
	928	}
	929	else
	930	return (false);
	931	}
	932
	933	protected override void do_shrinking()
	934	{
	935	double Gmax1 = -INF; // max { -y_i * grad(f)_i \| y_i = +1, i in I_up(\alpha) }
	936	double Gmax2 = -INF; // max { y_i * grad(f)_i \| y_i = +1, i in I_low(\alpha) }
	937	double Gmax3 = -INF; // max { -y_i * grad(f)_i \| y_i = -1, i in I_up(\alpha) }
	938	double Gmax4 = -INF; // max { y_i * grad(f)_i \| y_i = -1, i in I_low(\alpha) }
	939
	940	// find maximal violating pair first
	941	int i;
	942	for (i = 0; i < active_size; i++)
	943	{
	944	if (!is_upper_bound(i))
	945	{
	946	if (y[i] == +1)
	947	{
	948	if (-G[i] > Gmax1) Gmax1 = -G[i];
	949	}
	950	else if (-G[i] > Gmax4) Gmax4 = -G[i];
	951	}
	952	if (!is_lower_bound(i))
	953	{
	954	if (y[i] == +1)
	955	{
	956	if (G[i] > Gmax2) Gmax2 = G[i];
	957	}
	958	else if (G[i] > Gmax3) Gmax3 = G[i];
	959	}
	960	}
	961
	962	// shrinking
	963
	964	for (i = 0; i < active_size; i++)
	965	if (be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
	966	{
	967	active_size--;
	968	while (active_size > i)
	969	{
	970	if (!be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
	971	{
	972	swap_index(i, active_size);
	973	break;
	974	}
	975	active_size--;
	976	}
	977	}
	978
	979	if (unshrinked \|\| Math.Max(Gmax1 + Gmax2, Gmax3 + Gmax4) > eps * 10) return;
	980
	981	unshrinked = true;
	982	reconstruct_gradient();
	983
	984	for (i = l - 1; i >= active_size; i--)
	985	if (!be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
	986	{
	987	while (active_size < i)
	988	{
	989	if (be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
	990	{
	991	swap_index(i, active_size);
	992	break;
	993	}
	994	active_size++;
	995	}
	996	active_size++;
	997	}
	998	}
	999
	1000	protected override double calculate_rho()
	1001	{
	1002	int nr_free1 = 0, nr_free2 = 0;
	1003	double ub1 = INF, ub2 = INF;
	1004	double lb1 = -INF, lb2 = -INF;
	1005	double sum_free1 = 0, sum_free2 = 0;
	1006
	1007	for (int i = 0; i < active_size; i++)
	1008	{
	1009	if (y[i] == +1)
	1010	{
	1011	if (is_lower_bound(i))
	1012	ub1 = Math.Min(ub1, G[i]);
	1013	else if (is_upper_bound(i))
	1014	lb1 = Math.Max(lb1, G[i]);
	1015	else
	1016	{
	1017	++nr_free1;
	1018	sum_free1 += G[i];
	1019	}
	1020	}
	1021	else
	1022	{
	1023	if (is_lower_bound(i))
	1024	ub2 = Math.Min(ub2, G[i]);
	1025	else if (is_upper_bound(i))
	1026	lb2 = Math.Max(lb2, G[i]);
	1027	else
	1028	{
	1029	++nr_free2;
	1030	sum_free2 += G[i];
	1031	}
	1032	}
	1033	}
	1034
	1035	double r1, r2;
	1036	if (nr_free1 > 0)
	1037	r1 = sum_free1 / nr_free1;
	1038	else
	1039	r1 = (ub1 + lb1) / 2;
	1040
	1041	if (nr_free2 > 0)
	1042	r2 = sum_free2 / nr_free2;
	1043	else
	1044	r2 = (ub2 + lb2) / 2;
	1045
	1046	si.r = (r1 + r2) / 2;
	1047	return (r1 - r2) / 2;
	1048	}
	1049	}
	1050
	1051	//
	1052	// Q matrices for various formulations
	1053	//
	1054	class SVC_Q : Kernel
	1055	{
	1056	private short[] y;
	1057	private Cache cache;
	1058	private float[] QD;
	1059
	1060	public SVC_Q(Problem prob, Parameter param, short[] y_) : base(prob.Count, prob.X, param)
	1061	{
	1062	y = (short[])y_.Clone();
	1063	cache = new Cache(prob.Count, (long)(param.CacheSize * (1 << 20)));
	1064	QD = new float[prob.Count];
	1065	for (int i = 0; i < prob.Count; i++)
	1066	QD[i] = (float)kernel_function(i, i);
	1067	}
	1068
	1069	public override float[] get_Q(int i, int len)
	1070	{
	1071	float[][] data = new float[1][];
	1072	int start;
	1073	if ((start = cache.get_data(i, data, len)) < len)
	1074	{
	1075	for (int j = start; j < len; j++)
	1076	data[0][j] = (float)(y[i] * y[j] * kernel_function(i, j));
	1077	}
	1078	return data[0];
	1079	}
	1080
	1081	public override float[] get_QD()
	1082	{
	1083	return QD;
	1084	}
	1085
	1086	public override void swap_index(int i, int j)
	1087	{
	1088	cache.swap_index(i, j);
	1089	base.swap_index(i, j);
	1090	do { short _ = y[i]; y[i] = y[j]; y[j] = _; } while (false);
	1091	do { float _ = QD[i]; QD[i] = QD[j]; QD[j] = _; } while (false);
	1092	}
	1093	}
	1094
	1095	class ONE_CLASS_Q : Kernel
	1096	{
	1097	private Cache cache;
	1098	private float[] QD;
	1099
	1100	public ONE_CLASS_Q(Problem prob, Parameter param) : base(prob.Count, prob.X, param)
	1101	{
	1102	cache = new Cache(prob.Count, (long)(param.CacheSize * (1 << 20)));
	1103	QD = new float[prob.Count];
	1104	for (int i = 0; i < prob.Count; i++)
	1105	QD[i] = (float)kernel_function(i, i);
	1106	}
	1107
	1108	public override float[] get_Q(int i, int len)
	1109	{
	1110	float[][] data = new float[1][];
	1111	int start;
	1112	if ((start = cache.get_data(i, data, len)) < len)
	1113	{
	1114	for (int j = start; j < len; j++)
	1115	data[0][j] = (float)kernel_function(i, j);
	1116	}
	1117	return data[0];
	1118	}
	1119
	1120	public override float[] get_QD()
	1121	{
	1122	return QD;
	1123	}
	1124
	1125	public override void swap_index(int i, int j)
	1126	{
	1127	cache.swap_index(i, j);
	1128	base.swap_index(i, j);
	1129	do { float _ = QD[i]; QD[i] = QD[j]; QD[j] = _; } while (false);
	1130	}
	1131	}
	1132
	1133	class SVR_Q : Kernel
	1134	{
	1135	private int l;
	1136	private Cache cache;
	1137	private short[] sign;
	1138	private int[] index;
	1139	private int next_buffer;
	1140	private float[][] buffer;
	1141	private float[] QD;
	1142
	1143	public SVR_Q(Problem prob, Parameter param)
	1144	: base(prob.Count, prob.X, param)
	1145	{
	1146	l = prob.Count;
	1147	cache = new Cache(l, (long)(param.CacheSize * (1 << 20)));
	1148	QD = new float[2 * l];
	1149	sign = new short[2 * l];
	1150	index = new int[2 * l];
	1151	for (int k = 0; k < l; k++)
	1152	{
	1153	sign[k] = 1;
	1154	sign[k + l] = -1;
	1155	index[k] = k;
	1156	index[k + l] = k;
	1157	QD[k] = (float)kernel_function(k, k);
	1158	QD[k + l] = QD[k];
	1159	}
	1160	buffer = new float[2][];
	1161	buffer[0] = new float[2 * l];
	1162	buffer[1] = new float[2 * l];
	1163	next_buffer = 0;
	1164	}
	1165
	1166	public override void swap_index(int i, int j)
	1167	{
	1168	do { short _ = sign[i]; sign[i] = sign[j]; sign[j] = _; } while (false);
	1169	do { int _ = index[i]; index[i] = index[j]; index[j] = _; } while (false);
	1170	do { float _ = QD[i]; QD[i] = QD[j]; QD[j] = _; } while (false);
	1171	}
	1172
	1173	public override float[] get_Q(int i, int len)
	1174	{
	1175	float[][] data = new float[1][];
	1176	int real_i = index[i];
	1177	if (cache.get_data(real_i, data, l) < l)
	1178	{
	1179	for (int j = 0; j < l; j++)
	1180	data[0][j] = (float)kernel_function(real_i, j);
	1181	}
	1182
	1183	// reorder and copy
	1184	float[] buf = buffer[next_buffer];
	1185	next_buffer = 1 - next_buffer;
	1186	short si = sign[i];
	1187	for (int j = 0; j < len; j++)
	1188	buf[j] = si * sign[j] * data[0][index[j]];
	1189	return buf;
	1190	}
	1191
	1192	public override float[] get_QD()
	1193	{
	1194	return QD;
	1195	}
	1196	}
	1197
	1198	internal static class Procedures
	1199	{
	1200	//
	1201	// construct and solve various formulations
	1202	//
	1203	private static void solve_c_svc(Problem prob, Parameter param,
	1204	double[] alpha, Solver.SolutionInfo si,
	1205	double Cp, double Cn)
	1206	{
	1207	int l = prob.Count;
	1208	double[] minus_ones = new double[l];
	1209	short[] y = new short[l];
	1210
	1211	int i;
	1212
	1213	for (i = 0; i < l; i++)
	1214	{
	1215	alpha[i] = 0;
	1216	minus_ones[i] = -1;
	1217	if (prob.Y[i] > 0) y[i] = +1; else y[i] = -1;
	1218	}
	1219
	1220	Solver s = new Solver();
	1221	s.Solve(l, new SVC_Q(prob, param, y), minus_ones, y,
	1222	alpha, Cp, Cn, param.EPS, si, param.Shrinking);
	1223
	1224	double sum_alpha = 0;
	1225	for (i = 0; i < l; i++)
	1226	sum_alpha += alpha[i];
	1227
	1228	if (Cp == Cn)
	1229	Debug.Write("nu = " + sum_alpha / (Cp * prob.Count) + "\n");
	1230
	1231	for (i = 0; i < l; i++)
	1232	alpha[i] *= y[i];
	1233	}
	1234
	1235	private static void solve_nu_svc(Problem prob, Parameter param,
	1236	double[] alpha, Solver.SolutionInfo si)
	1237	{
	1238	int i;
	1239	int l = prob.Count;
	1240	double nu = param.Nu;
	1241
	1242	short[] y = new short[l];
	1243
	1244	for (i = 0; i < l; i++)
	1245	if (prob.Y[i] > 0)
	1246	y[i] = +1;
	1247	else
	1248	y[i] = -1;
	1249
	1250	double sum_pos = nu * l / 2;
	1251	double sum_neg = nu * l / 2;
	1252
	1253	for (i = 0; i < l; i++)
	1254	if (y[i] == +1)
	1255	{
	1256	alpha[i] = Math.Min(1.0, sum_pos);
	1257	sum_pos -= alpha[i];
	1258	}
	1259	else
	1260	{
	1261	alpha[i] = Math.Min(1.0, sum_neg);
	1262	sum_neg -= alpha[i];
	1263	}
	1264
	1265	double[] zeros = new double[l];
	1266
	1267	for (i = 0; i < l; i++)
	1268	zeros[i] = 0;
	1269
	1270	Solver_NU s = new Solver_NU();
	1271	s.Solve(l, new SVC_Q(prob, param, y), zeros, y,
	1272	alpha, 1.0, 1.0, param.EPS, si, param.Shrinking);
	1273	double r = si.r;
	1274
	1275	Debug.Write("C = " + 1 / r + "\n");
	1276
	1277	for (i = 0; i < l; i++)
	1278	alpha[i] *= y[i] / r;
	1279
	1280	si.rho /= r;
	1281	si.obj /= (r * r);
	1282	si.upper_bound_p = 1 / r;
	1283	si.upper_bound_n = 1 / r;
	1284	}
	1285
	1286	private static void solve_one_class(Problem prob, Parameter param,
	1287	double[] alpha, Solver.SolutionInfo si)
	1288	{
	1289	int l = prob.Count;
	1290	double[] zeros = new double[l];
	1291	short[] ones = new short[l];
	1292	int i;
	1293
	1294	int n = (int)(param.Nu * prob.Count); // # of alpha's at upper bound
	1295
	1296	for (i = 0; i < n; i++)
	1297	alpha[i] = 1;
	1298	if (n < prob.Count)
	1299	alpha[n] = param.Nu * prob.Count - n;
	1300	for (i = n + 1; i < l; i++)
	1301	alpha[i] = 0;
	1302
	1303	for (i = 0; i < l; i++)
	1304	{
	1305	zeros[i] = 0;
	1306	ones[i] = 1;
	1307	}
	1308
	1309	Solver s = new Solver();
	1310	s.Solve(l, new ONE_CLASS_Q(prob, param), zeros, ones,
	1311	alpha, 1.0, 1.0, param.EPS, si, param.Shrinking);
	1312	}
	1313
	1314	private static void solve_epsilon_svr(Problem prob, Parameter param,
	1315	double[] alpha, Solver.SolutionInfo si)
	1316	{
	1317	int l = prob.Count;
	1318	double[] alpha2 = new double[2 * l];
	1319	double[] linear_term = new double[2 * l];
	1320	short[] y = new short[2 * l];
	1321	int i;
	1322
	1323	for (i = 0; i < l; i++)
	1324	{
	1325	alpha2[i] = 0;
	1326	linear_term[i] = param.P - prob.Y[i];
	1327	y[i] = 1;
	1328
	1329	alpha2[i + l] = 0;
	1330	linear_term[i + l] = param.P + prob.Y[i];
	1331	y[i + l] = -1;
	1332	}
	1333
	1334	Solver s = new Solver();
	1335	s.Solve(2 * l, new SVR_Q(prob, param), linear_term, y,
	1336	alpha2, param.C, param.C, param.EPS, si, param.Shrinking);
	1337
	1338	double sum_alpha = 0;
	1339	for (i = 0; i < l; i++)
	1340	{
	1341	alpha[i] = alpha2[i] - alpha2[i + l];
	1342	sum_alpha += Math.Abs(alpha[i]);
	1343	}
	1344	Debug.Write("nu = " + sum_alpha / (param.C * l) + "\n");
	1345	}
	1346
	1347	private static void solve_nu_svr(Problem prob, Parameter param,
	1348	double[] alpha, Solver.SolutionInfo si)
	1349	{
	1350	int l = prob.Count;
	1351	double C = param.C;
	1352	double[] alpha2 = new double[2 * l];
	1353	double[] linear_term = new double[2 * l];
	1354	short[] y = new short[2 * l];
	1355	int i;
	1356
	1357	double sum = C * param.Nu * l / 2;
	1358	for (i = 0; i < l; i++)
	1359	{
	1360	alpha2[i] = alpha2[i + l] = Math.Min(sum, C);
	1361	sum -= alpha2[i];
	1362
	1363	linear_term[i] = -prob.Y[i];
	1364	y[i] = 1;
	1365
	1366	linear_term[i + l] = prob.Y[i];
	1367	y[i + l] = -1;
	1368	}
	1369
	1370	Solver_NU s = new Solver_NU();
	1371	s.Solve(2 * l, new SVR_Q(prob, param), linear_term, y, alpha2, C, C, param.EPS, si, param.Shrinking);
	1372
	1373	Debug.Write("epsilon = " + (-si.r) + "\n");
	1374
	1375	for (i = 0; i < l; i++)
	1376	alpha[i] = alpha2[i] - alpha2[i + l];
	1377	}
	1378
	1379	//
	1380	// decision_function
	1381	//
	1382	private class decision_function
	1383	{
	1384	public double[] alpha;
	1385	public double rho;
	1386	};
	1387
	1388	static decision_function svm_train_one(
	1389	Problem prob, Parameter param,
	1390	double Cp, double Cn)
	1391	{
	1392	double[] alpha = new double[prob.Count];
	1393	Solver.SolutionInfo si = new Solver.SolutionInfo();
	1394	switch (param.SvmType)
	1395	{
	1396	case SvmType.C_SVC:
	1397	solve_c_svc(prob, param, alpha, si, Cp, Cn);
	1398	break;
	1399	case SvmType.NU_SVC:
	1400	solve_nu_svc(prob, param, alpha, si);
	1401	break;
	1402	case SvmType.ONE_CLASS:
	1403	solve_one_class(prob, param, alpha, si);
	1404	break;
	1405	case SvmType.EPSILON_SVR:
	1406	solve_epsilon_svr(prob, param, alpha, si);
	1407	break;
	1408	case SvmType.NU_SVR:
	1409	solve_nu_svr(prob, param, alpha, si);
	1410	break;
	1411	}
	1412
	1413	Debug.Write("obj = " + si.obj + ", rho = " + si.rho + "\n");
	1414
	1415	// output SVs
	1416
	1417	int nSV = 0;
	1418	int nBSV = 0;
	1419	for (int i = 0; i < prob.Count; i++)
	1420	{
	1421	if (Math.Abs(alpha[i]) > 0)
	1422	{
	1423	++nSV;
	1424	if (prob.Y[i] > 0)
	1425	{
	1426	if (Math.Abs(alpha[i]) >= si.upper_bound_p)
	1427	++nBSV;
	1428	}
	1429	else
	1430	{
	1431	if (Math.Abs(alpha[i]) >= si.upper_bound_n)
	1432	++nBSV;
	1433	}
	1434	}
	1435	}
	1436
	1437	Debug.Write("nSV = " + nSV + ", nBSV = " + nBSV + "\n");
	1438
	1439	decision_function f = new decision_function();
	1440	f.alpha = alpha;
	1441	f.rho = si.rho;
	1442	return f;
	1443	}
	1444
	1445	// Platt's binary SVM Probablistic Output: an improvement from Lin et al.
	1446	private static void sigmoid_train(int l, double[] dec_values, double[] labels,
	1447	double[] probAB)
	1448	{
	1449	double A, B;
	1450	double prior1 = 0, prior0 = 0;
	1451	int i;
	1452
	1453	for (i = 0; i < l; i++)
	1454	if (labels[i] > 0) prior1 += 1;
	1455	else prior0 += 1;
	1456
	1457	int max_iter = 100; // Maximal number of iterations
	1458	double min_step = 1e-10; // Minimal step taken in line search
	1459	double sigma = 1e-3; // For numerically strict PD of Hessian
	1460	double eps = 1e-5;
	1461	double hiTarget = (prior1 + 1.0) / (prior1 + 2.0);
	1462	double loTarget = 1 / (prior0 + 2.0);
	1463	double[] t = new double[l];
	1464	double fApB, p, q, h11, h22, h21, g1, g2, det, dA, dB, gd, stepsize;
	1465	double newA, newB, newf, d1, d2;
	1466	int iter;
	1467
	1468	// Initial Point and Initial Fun Value
	1469	A = 0.0; B = Math.Log((prior0 + 1.0) / (prior1 + 1.0));
	1470	double fval = 0.0;
	1471
	1472	for (i = 0; i < l; i++)
	1473	{
	1474	if (labels[i] > 0) t[i] = hiTarget;
	1475	else t[i] = loTarget;
	1476	fApB = dec_values[i] * A + B;
	1477	if (fApB >= 0)
	1478	fval += t[i] * fApB + Math.Log(1 + Math.Exp(-fApB));
	1479	else
	1480	fval += (t[i] - 1) * fApB + Math.Log(1 + Math.Exp(fApB));
	1481	}
	1482	for (iter = 0; iter < max_iter; iter++)
	1483	{
	1484	// Update Gradient and Hessian (use H' = H + sigma I)
	1485	h11 = sigma; // numerically ensures strict PD
	1486	h22 = sigma;
	1487	h21 = 0.0; g1 = 0.0; g2 = 0.0;
	1488	for (i = 0; i < l; i++)
	1489	{
	1490	fApB = dec_values[i] * A + B;
	1491	if (fApB >= 0)
	1492	{
	1493	p = Math.Exp(-fApB) / (1.0 + Math.Exp(-fApB));
	1494	q = 1.0 / (1.0 + Math.Exp(-fApB));
	1495	}
	1496	else
	1497	{
	1498	p = 1.0 / (1.0 + Math.Exp(fApB));
	1499	q = Math.Exp(fApB) / (1.0 + Math.Exp(fApB));
	1500	}
	1501	d2 = p * q;
	1502	h11 += dec_values[i] * dec_values[i] * d2;
	1503	h22 += d2;
	1504	h21 += dec_values[i] * d2;
	1505	d1 = t[i] - p;
	1506	g1 += dec_values[i] * d1;
	1507	g2 += d1;
	1508	}
	1509
	1510	// Stopping Criteria
	1511	if (Math.Abs(g1) < eps && Math.Abs(g2) < eps)
	1512	break;
	1513
	1514	// Finding Newton direction: -inv(H') * g
	1515	det = h11 * h22 - h21 * h21;
	1516	dA = -(h22 * g1 - h21 * g2) / det;
	1517	dB = -(-h21 * g1 + h11 * g2) / det;
	1518	gd = g1 * dA + g2 * dB;
	1519
	1520
	1521	stepsize = 1; // Line Search
	1522	while (stepsize >= min_step)
	1523	{
	1524	newA = A + stepsize * dA;
	1525	newB = B + stepsize * dB;
	1526
	1527	// New function value
	1528	newf = 0.0;
	1529	for (i = 0; i < l; i++)
	1530	{
	1531	fApB = dec_values[i] * newA + newB;
	1532	if (fApB >= 0)
	1533	newf += t[i] * fApB + Math.Log(1 + Math.Exp(-fApB));
	1534	else
	1535	newf += (t[i] - 1) * fApB + Math.Log(1 + Math.Exp(fApB));
	1536	}
	1537	// Check sufficient decrease
	1538	if (newf < fval + 0.0001 * stepsize * gd)
	1539	{
	1540	A = newA; B = newB; fval = newf;
	1541	break;
	1542	}
	1543	else
	1544	stepsize = stepsize / 2.0;
	1545	}
	1546
	1547	if (stepsize < min_step)
	1548	{
	1549	Debug.Write("Line search fails in two-class probability estimates\n");
	1550	break;
	1551	}
	1552	}
	1553
	1554	if (iter >= max_iter)
	1555	Debug.Write("Reaching maximal iterations in two-class probability estimates\n");
	1556	probAB[0] = A; probAB[1] = B;
	1557	}
	1558
	1559	private static double sigmoid_predict(double decision_value, double A, double B)
	1560	{
	1561	double fApB = decision_value * A + B;
	1562	if (fApB >= 0)
	1563	return Math.Exp(-fApB) / (1.0 + Math.Exp(-fApB));
	1564	else
	1565	return 1.0 / (1 + Math.Exp(fApB));
	1566	}
	1567
	1568	// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
	1569	private static void multiclass_probability(int k, double[,] r, double[] p)
	1570	{
	1571	int t,j;
	1572	int iter = 0, max_iter=Math.Max(100,k);
	1573	double[,] Q=new double[k,k];
	1574	double[] Qp= new double[k];
	1575	double pQp, eps=0.005/k;
	1576
	1577	for (t=0;t<k;t++)
	1578	{
	1579	p[t]=1.0/k; // Valid if k = 1
	1580	Q[t,t]=0;
	1581	for (j=0;j<t;j++)
	1582	{
	1583	Q[t,t]+=r[j,t]*r[j,t];
	1584	Q[t,j]=Q[j,t];
	1585	}
	1586	for (j=t+1;j<k;j++)
	1587	{
	1588	Q[t,t]+=r[j,t]*r[j,t];
	1589	Q[t,j]=-r[j,t]*r[t,j];
	1590	}
	1591	}
	1592	for (iter=0;iter<max_iter;iter++)
	1593	{
	1594	// stopping condition, recalculate QP,pQP for numerical accuracy
	1595	pQp=0;
	1596	for (t=0;t<k;t++)
	1597	{
	1598	Qp[t]=0;
	1599	for (j=0;j<k;j++)
	1600	Qp[t]+=Q[t,j]*p[j];
	1601	pQp+=p[t]*Qp[t];
	1602	}
	1603	double max_error=0;
	1604	for (t=0;t<k;t++)
	1605	{
	1606	double error=Math.Abs(Qp[t]-pQp);
	1607	if (error>max_error)
	1608	max_error=error;
	1609	}
	1610	if (max_error<eps) break;
	1611
	1612	for (t=0;t<k;t++)
	1613	{
	1614	double diff=(-Qp[t]+pQp)/Q[t,t];
	1615	p[t]+=diff;
	1616	pQp=(pQp+diff(diffQ[t,t]+2*Qp[t]))/(1+diff)/(1+diff);
	1617	for (j=0;j<k;j++)
	1618	{
	1619	Qp[j]=(Qp[j]+diff*Q[t,j])/(1+diff);
	1620	p[j]/=(1+diff);
	1621	}
	1622	}
	1623	}
	1624	if (iter>=max_iter)
	1625	Debug.Write("Exceeds max_iter in multiclass_prob\n");
	1626	}
	1627
	1628	// Cross-validation decision values for probability estimates
	1629	private static void svm_binary_svc_probability(Problem prob, Parameter param, double Cp, double Cn, double[] probAB)
	1630	{
	1631	Random rand = new Random();
	1632	int i;
	1633	int nr_fold = 5;
	1634	int[] perm = new int[prob.Count];
	1635	double[] dec_values = new double[prob.Count];
	1636
	1637	// random shuffle
	1638	for (i = 0; i < prob.Count; i++) perm[i] = i;
	1639	for (i = 0; i < prob.Count; i++)
	1640	{
	1641	int j = i + (int)(rand.NextDouble() * (prob.Count - i));
	1642	do { int _ = perm[i]; perm[i] = perm[j]; perm[j] = _; } while (false);
	1643	}
	1644	for (i = 0; i < nr_fold; i++)
	1645	{
	1646	int begin = i * prob.Count / nr_fold;
	1647	int end = (i + 1) * prob.Count / nr_fold;
	1648	int j, k;
	1649	Problem subprob = new Problem();
	1650
	1651	subprob.Count = prob.Count - (end - begin);
	1652	subprob.X = new Node[subprob.Count][];
	1653	subprob.Y = new double[subprob.Count];
	1654
	1655	k = 0;
	1656	for (j = 0; j < begin; j++)
	1657	{
	1658	subprob.X[k] = prob.X[perm[j]];
	1659	subprob.Y[k] = prob.Y[perm[j]];
	1660	++k;
	1661	}
	1662	for (j = end; j < prob.Count; j++)
	1663	{
	1664	subprob.X[k] = prob.X[perm[j]];
	1665	subprob.Y[k] = prob.Y[perm[j]];
	1666	++k;
	1667	}
	1668	int p_count = 0, n_count = 0;
	1669	for (j = 0; j < k; j++)
	1670	if (subprob.Y[j] > 0)
	1671	p_count++;
	1672	else
	1673	n_count++;
	1674
	1675	if (p_count == 0 && n_count == 0)
	1676	for (j = begin; j < end; j++)
	1677	dec_values[perm[j]] = 0;
	1678	else if (p_count > 0 && n_count == 0)
	1679	for (j = begin; j < end; j++)
	1680	dec_values[perm[j]] = 1;
	1681	else if (p_count == 0 && n_count > 0)
	1682	for (j = begin; j < end; j++)
	1683	dec_values[perm[j]] = -1;
	1684	else
	1685	{
	1686	Parameter subparam = (Parameter)param.Clone();
	1687	subparam.Probability = false;
	1688	subparam.C = 1.0;
	1689	subparam.WeightCount = 2;
	1690	subparam.WeightLabels = new int[2];
	1691	subparam.Weights = new double[2];
	1692	subparam.WeightLabels[0] = +1;
	1693	subparam.WeightLabels[1] = -1;
	1694	subparam.Weights[0] = Cp;
	1695	subparam.Weights[1] = Cn;
	1696	Model submodel = svm_train(subprob, subparam);
	1697	for (j = begin; j < end; j++)
	1698	{
	1699	double[] dec_value = new double[1];
	1700	svm_predict_values(submodel, prob.X[perm[j]], dec_value);
	1701	dec_values[perm[j]] = dec_value[0];
	1702	// ensure +1 -1 order; reason not using CV subroutine
	1703	dec_values[perm[j]] *= submodel.ClassLabels[0];
	1704	}
	1705	}
	1706	}
	1707	sigmoid_train(prob.Count, dec_values, prob.Y, probAB);
	1708	}
	1709
	1710	// Return parameter of a Laplace distribution
	1711	private static double svm_svr_probability(Problem prob, Parameter param)
	1712	{
	1713	int i;
	1714	int nr_fold = 5;
	1715	double[] ymv = new double[prob.Count];
	1716	double mae = 0;
	1717
	1718	Parameter newparam = (Parameter)param.Clone();
	1719	newparam.Probability = false;
	1720	svm_cross_validation(prob, newparam, nr_fold, ymv, null);
	1721	for (i = 0; i < prob.Count; i++)
	1722	{
	1723	ymv[i] = prob.Y[i] - ymv[i];
	1724	mae += Math.Abs(ymv[i]);
	1725	}
	1726	mae /= prob.Count;
	1727	double std = Math.Sqrt(2 * mae * mae);
	1728	int count = 0;
	1729	mae = 0;
	1730	for (i = 0; i < prob.Count; i++)
	1731	if (Math.Abs(ymv[i]) > 5 * std)
	1732	count = count + 1;
	1733	else
	1734	mae += Math.Abs(ymv[i]);
	1735	mae /= (prob.Count - count);
	1736	Debug.Write("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-\|z\|/sigma)/(2sigma),sigma=" + mae + "\n");
	1737	return mae;
	1738	}
	1739
	1740	// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
	1741	// perm, length l, must be allocated before calling this subroutine
	1742	private static void svm_group_classes(Problem prob, int[] nr_class_ret, int[][] label_ret, int[][] start_ret, int[][] count_ret, int[] perm)
	1743	{
	1744	int l = prob.Count;
	1745	int max_nr_class = 16;
	1746	int nr_class = 0;
	1747	int[] label = new int[max_nr_class];
	1748	int[] count = new int[max_nr_class];
	1749	int[] data_label = new int[l];
	1750	int i;
	1751
	1752	for (i = 0; i < l; i++)
	1753	{
	1754	int this_label = (int)(prob.Y[i]);
	1755	int j;
	1756	for (j = 0; j < nr_class; j++)
	1757	{
	1758	if (this_label == label[j])
	1759	{
	1760	++count[j];
	1761	break;
	1762	}
	1763	}
	1764	data_label[i] = j;
	1765	if (j == nr_class)
	1766	{
	1767	if (nr_class == max_nr_class)
	1768	{
	1769	max_nr_class *= 2;
	1770	int[] new_data = new int[max_nr_class];
	1771	Array.Copy(label, 0, new_data, 0, label.Length);
	1772	label = new_data;
	1773	new_data = new int[max_nr_class];
	1774	Array.Copy(count, 0, new_data, 0, count.Length);
	1775	count = new_data;
	1776	}
	1777	label[nr_class] = this_label;
	1778	count[nr_class] = 1;
	1779	++nr_class;
	1780	}
	1781	}
	1782
	1783	int[] start = new int[nr_class];
	1784	start[0] = 0;
	1785	for (i = 1; i < nr_class; i++)
	1786	start[i] = start[i - 1] + count[i - 1];
	1787	for (i = 0; i < l; i++)
	1788	{
	1789	perm[start[data_label[i]]] = i;
	1790	++start[data_label[i]];
	1791	}
	1792	start[0] = 0;
	1793	for (i = 1; i < nr_class; i++)
	1794	start[i] = start[i - 1] + count[i - 1];
	1795
	1796	nr_class_ret[0] = nr_class;
	1797	label_ret[0] = label;
	1798	start_ret[0] = start;
	1799	count_ret[0] = count;
	1800	}
	1801
	1802	//
	1803	// Interface functions
	1804	//
	1805	public static Model svm_train(Problem prob, Parameter param)
	1806	{
	1807	Model model = new Model();
	1808	model.Parameter = param;
	1809
	1810	if (param.SvmType == SvmType.ONE_CLASS \|\|
	1811	param.SvmType == SvmType.EPSILON_SVR \|\|
	1812	param.SvmType == SvmType.NU_SVR)
	1813	{
	1814	// regression or one-class-svm
	1815	model.NumberOfClasses = 2;
	1816	model.ClassLabels = null;
	1817	model.NumberOfSVPerClass = null;
	1818	model.PairwiseProbabilityA = null; model.PairwiseProbabilityB = null;
	1819	model.SupportVectorCoefficients = new double[1][];
	1820
	1821	if (param.Probability &&
	1822	(param.SvmType == SvmType.EPSILON_SVR \|\|
	1823	param.SvmType == SvmType.NU_SVR))
	1824	{
	1825	model.PairwiseProbabilityA = new double[1];
	1826	model.PairwiseProbabilityA[0] = svm_svr_probability(prob, param);
	1827	}
	1828
	1829	decision_function f = svm_train_one(prob, param, 0, 0);
	1830	model.Rho = new double[1];
	1831	model.Rho[0] = f.rho;
	1832
	1833	int nSV = 0;
	1834	int i;
	1835	for (i = 0; i < prob.Count; i++)
	1836	if (Math.Abs(f.alpha[i]) > 0) ++nSV;
	1837	model.SupportVectorCount = nSV;
	1838	model.SupportVectors = new Node[nSV][];
	1839	model.SupportVectorCoefficients[0] = new double[nSV];
	1840	int j = 0;
	1841	for (i = 0; i < prob.Count; i++)
	1842	if (Math.Abs(f.alpha[i]) > 0)
	1843	{
	1844	model.SupportVectors[j] = prob.X[i];
	1845	model.SupportVectorCoefficients[0][j] = f.alpha[i];
	1846	++j;
	1847	}
	1848	}
	1849	else
	1850	{
	1851	// classification
	1852	int l = prob.Count;
	1853	int[] tmp_nr_class = new int[1];
	1854	int[][] tmp_label = new int[1][];
	1855	int[][] tmp_start = new int[1][];
	1856	int[][] tmp_count = new int[1][];
	1857	int[] perm = new int[l];
	1858
	1859	// group training data of the same class
	1860	svm_group_classes(prob, tmp_nr_class, tmp_label, tmp_start, tmp_count, perm);
	1861	int nr_class = tmp_nr_class[0];
	1862	int[] label = tmp_label[0];
	1863	int[] start = tmp_start[0];
	1864	int[] count = tmp_count[0];
	1865	Node[][] x = new Node[l][];
	1866	int i;
	1867	for (i = 0; i < l; i++)
	1868	x[i] = prob.X[perm[i]];
	1869
	1870	// calculate weighted C
	1871
	1872	double[] weighted_C = new double[nr_class];
	1873	for (i = 0; i < nr_class; i++)
	1874	weighted_C[i] = param.C;
	1875	for (i = 0; i < param.WeightCount; i++)
	1876	{
	1877	int j;
	1878	for (j = 0; j < nr_class; j++)
	1879	if (param.WeightLabels[i] == label[j])
	1880	break;
	1881	if (j == nr_class)
	1882	Debug.Write("warning: class label " + param.WeightLabels[i] + " specified in weight is not found\n");
	1883	else
	1884	weighted_C[j] *= param.Weights[i];
	1885	}
	1886
	1887	// train k*(k-1)/2 models
	1888
	1889	bool[] nonzero = new bool[l];
	1890	for (i = 0; i < l; i++)
	1891	nonzero[i] = false;
	1892	decision_function[] f = new decision_function[nr_class * (nr_class - 1) / 2];
	1893
	1894	double[] probA = null, probB = null;
	1895	if (param.Probability)
	1896	{
	1897	probA = new double[nr_class * (nr_class - 1) / 2];
	1898	probB = new double[nr_class * (nr_class - 1) / 2];
	1899	}
	1900
	1901	int p = 0;
	1902	for (i = 0; i < nr_class; i++)
	1903	for (int j = i + 1; j < nr_class; j++)
	1904	{
	1905	Problem sub_prob = new Problem();
	1906	int si = start[i], sj = start[j];
	1907	int ci = count[i], cj = count[j];
	1908	sub_prob.Count = ci + cj;
	1909	sub_prob.X = new Node[sub_prob.Count][];
	1910	sub_prob.Y = new double[sub_prob.Count];
	1911	int k;
	1912	for (k = 0; k < ci; k++)
	1913	{
	1914	sub_prob.X[k] = x[si + k];
	1915	sub_prob.Y[k] = +1;
	1916	}
	1917	for (k = 0; k < cj; k++)
	1918	{
	1919	sub_prob.X[ci + k] = x[sj + k];
	1920	sub_prob.Y[ci + k] = -1;
	1921	}
	1922
	1923	if (param.Probability)
	1924	{
	1925	double[] probAB = new double[2];
	1926	svm_binary_svc_probability(sub_prob, param, weighted_C[i], weighted_C[j], probAB);
	1927	probA[p] = probAB[0];
	1928	probB[p] = probAB[1];
	1929	}
	1930
	1931	f[p] = svm_train_one(sub_prob, param, weighted_C[i], weighted_C[j]);
	1932	for (k = 0; k < ci; k++)
	1933	if (!nonzero[si + k] && Math.Abs(f[p].alpha[k]) > 0)
	1934	nonzero[si + k] = true;
	1935	for (k = 0; k < cj; k++)
	1936	if (!nonzero[sj + k] && Math.Abs(f[p].alpha[ci + k]) > 0)
	1937	nonzero[sj + k] = true;
	1938	++p;
	1939	}
	1940
	1941	// build output
	1942
	1943	model.NumberOfClasses = nr_class;
	1944
	1945	model.ClassLabels = new int[nr_class];
	1946	for (i = 0; i < nr_class; i++)
	1947	model.ClassLabels[i] = label[i];
	1948
	1949	model.Rho = new double[nr_class * (nr_class - 1) / 2];
	1950	for (i = 0; i < nr_class * (nr_class - 1) / 2; i++)
	1951	model.Rho[i] = f[i].rho;
	1952
	1953	if (param.Probability)
	1954	{
	1955	model.PairwiseProbabilityA = new double[nr_class * (nr_class - 1) / 2];
	1956	model.PairwiseProbabilityB = new double[nr_class * (nr_class - 1) / 2];
	1957	for (i = 0; i < nr_class * (nr_class - 1) / 2; i++)
	1958	{
	1959	model.PairwiseProbabilityA[i] = probA[i];
	1960	model.PairwiseProbabilityB[i] = probB[i];
	1961	}
	1962	}
	1963	else
	1964	{
	1965	model.PairwiseProbabilityA = null;
	1966	model.PairwiseProbabilityB = null;
	1967	}
	1968
	1969	int nnz = 0;
	1970	int[] nz_count = new int[nr_class];
	1971	model.NumberOfSVPerClass = new int[nr_class];
	1972	for (i = 0; i < nr_class; i++)
	1973	{
	1974	int nSV = 0;
	1975	for (int j = 0; j < count[i]; j++)
	1976	if (nonzero[start[i] + j])
	1977	{
	1978	++nSV;
	1979	++nnz;
	1980	}
	1981	model.NumberOfSVPerClass[i] = nSV;
	1982	nz_count[i] = nSV;
	1983	}
	1984
	1985	Debug.Write("Total nSV = " + nnz + "\n");
	1986
	1987	model.SupportVectorCount = nnz;
	1988	model.SupportVectors = new Node[nnz][];
	1989	p = 0;
	1990	for (i = 0; i < l; i++)
	1991	if (nonzero[i]) model.SupportVectors[p++] = x[i];
	1992
	1993	int[] nz_start = new int[nr_class];
	1994	nz_start[0] = 0;
	1995	for (i = 1; i < nr_class; i++)
	1996	nz_start[i] = nz_start[i - 1] + nz_count[i - 1];
	1997
	1998	model.SupportVectorCoefficients = new double[nr_class - 1][];
	1999	for (i = 0; i < nr_class - 1; i++)
	2000	model.SupportVectorCoefficients[i] = new double[nnz];
	2001
	2002	p = 0;
	2003	for (i = 0; i < nr_class; i++)
	2004	for (int j = i + 1; j < nr_class; j++)
	2005	{
	2006	// classifier (i,j): coefficients with
	2007	// i are in sv_coef[j-1][nz_start[i]...],
	2008	// j are in sv_coef[i][nz_start[j]...]
	2009
	2010	int si = start[i];
	2011	int sj = start[j];
	2012	int ci = count[i];
	2013	int cj = count[j];
	2014
	2015	int q = nz_start[i];
	2016	int k;
	2017	for (k = 0; k < ci; k++)
	2018	if (nonzero[si + k])
	2019	model.SupportVectorCoefficients[j - 1][q++] = f[p].alpha[k];
	2020	q = nz_start[j];
	2021	for (k = 0; k < cj; k++)
	2022	if (nonzero[sj + k])
	2023	model.SupportVectorCoefficients[i][q++] = f[p].alpha[ci + k];
	2024	++p;
	2025	}
	2026	}
	2027	return model;
	2028	}
	2029
	2030	// Stratified cross validation
	2031	public static void svm_cross_validation(Problem prob, Parameter param, int nr_fold, double[] target, Dictionary<int,double>[] confidence)
	2032	{
	2033	Random rand = new Random();
	2034	int i;
	2035	int[] fold_start = new int[nr_fold + 1];
	2036	int l = prob.Count;
	2037	int[] perm = new int[l];
	2038
	2039	// stratified cv may not give leave-one-out rate
	2040	// Each class to l folds -> some folds may have zero elements
	2041	if ((param.SvmType == SvmType.C_SVC \|\|
	2042	param.SvmType == SvmType.NU_SVC) && nr_fold < l)
	2043	{
	2044	int[] tmp_nr_class = new int[1];
	2045	int[][] tmp_label = new int[1][];
	2046	int[][] tmp_start = new int[1][];
	2047	int[][] tmp_count = new int[1][];
	2048
	2049	svm_group_classes(prob, tmp_nr_class, tmp_label, tmp_start, tmp_count, perm);
	2050
	2051	int nr_class = tmp_nr_class[0];
	2052	int[] label = tmp_label[0];
	2053	int[] start = tmp_start[0];
	2054	int[] count = tmp_count[0];
	2055
	2056	// random shuffle and then data grouped by fold using the array perm
	2057	int[] fold_count = new int[nr_fold];
	2058	int c;
	2059	int[] index = new int[l];
	2060	for (i = 0; i < l; i++)
	2061	index[i] = perm[i];
	2062	for (c = 0; c < nr_class; c++)
	2063	for (i = 0; i < count[c]; i++)
	2064	{
	2065	int j = i + (int)(rand.NextDouble() * (count[c] - i));
	2066	do { int _ = index[start[c] + j]; index[start[c] + j] = index[start[c] + i]; index[start[c] + i] = _; } while (false);
	2067	}
	2068	for (i = 0; i < nr_fold; i++)
	2069	{
	2070	fold_count[i] = 0;
	2071	for (c = 0; c < nr_class; c++)
	2072	fold_count[i] += (i + 1) * count[c] / nr_fold - i * count[c] / nr_fold;
	2073	}
	2074	fold_start[0] = 0;
	2075	for (i = 1; i <= nr_fold; i++)
	2076	fold_start[i] = fold_start[i - 1] + fold_count[i - 1];
	2077	for (c = 0; c < nr_class; c++)
	2078	for (i = 0; i < nr_fold; i++)
	2079	{
	2080	int begin = start[c] + i * count[c] / nr_fold;
	2081	int end = start[c] + (i + 1) * count[c] / nr_fold;
	2082	for (int j = begin; j < end; j++)
	2083	{
	2084	perm[fold_start[i]] = index[j];
	2085	fold_start[i]++;
	2086	}
	2087	}
	2088	fold_start[0] = 0;
	2089	for (i = 1; i <= nr_fold; i++)
	2090	fold_start[i] = fold_start[i - 1] + fold_count[i - 1];
	2091	}
	2092	else
	2093	{
	2094	for (i = 0; i < l; i++) perm[i] = i;
	2095	for (i = 0; i < l; i++)
	2096	{
	2097	int j = i + (int)(rand.NextDouble() * (l - i));
	2098	do { int _ = perm[i]; perm[i] = perm[j]; perm[j] = _; } while (false);
	2099	}
	2100	for (i = 0; i <= nr_fold; i++)
	2101	fold_start[i] = i * l / nr_fold;
	2102	}
	2103
	2104	for (i = 0; i < nr_fold; i++)
	2105	{
	2106	int begin = fold_start[i];
	2107	int end = fold_start[i + 1];
	2108	int j, k;
	2109	Problem subprob = new Problem();
	2110
	2111	subprob.Count = l - (end - begin);
	2112	subprob.X = new Node[subprob.Count][];
	2113	subprob.Y = new double[subprob.Count];
	2114
	2115	k = 0;
	2116	for (j = 0; j < begin; j++)
	2117	{
	2118	subprob.X[k] = prob.X[perm[j]];
	2119	subprob.Y[k] = prob.Y[perm[j]];
	2120	++k;
	2121	}
	2122	for (j = end; j < l; j++)
	2123	{
	2124	subprob.X[k] = prob.X[perm[j]];
	2125	subprob.Y[k] = prob.Y[perm[j]];
	2126	++k;
	2127	}
	2128	Model submodel = svm_train(subprob, param);
	2129	if (param.Probability &&
	2130	(param.SvmType == SvmType.C_SVC \|\|
	2131	param.SvmType == SvmType.NU_SVC))
	2132	{
	2133	for (j = begin; j < end; j++)
	2134	{
	2135	double[] prob_estimates = new double[svm_get_nr_class(submodel)];
	2136	target[perm[j]] = svm_predict_probability(submodel, prob.X[perm[j]], prob_estimates);
	2137	confidence[perm[j]] = new Dictionary<int, double>();
	2138	for (int label = 0; label < prob_estimates.Length; label++)
	2139	confidence[perm[j]][submodel.ClassLabels[label]] = prob_estimates[label];
	2140
	2141	}
	2142	}
	2143	else
	2144	for (j = begin; j < end; j++)
	2145	target[perm[j]] = svm_predict(submodel, prob.X[perm[j]]);
	2146	}
	2147	}
	2148
	2149	public static SvmType svm_get_svm_type(Model model)
	2150	{
	2151	return model.Parameter.SvmType;
	2152	}
	2153
	2154	public static int svm_get_nr_class(Model model)
	2155	{
	2156	return model.NumberOfClasses;
	2157	}
	2158
	2159	public static void svm_get_labels(Model model, int[] label)
	2160	{
	2161	if (model.ClassLabels != null)
	2162	for (int i = 0; i < model.NumberOfClasses; i++)
	2163	label[i] = model.ClassLabels[i];
	2164	}
	2165
	2166	public static double svm_get_svr_probability(Model model)
	2167	{
	2168	if ((model.Parameter.SvmType == SvmType.EPSILON_SVR \|\| model.Parameter.SvmType == SvmType.NU_SVR) &&
	2169	model.PairwiseProbabilityA != null)
	2170	return model.PairwiseProbabilityA[0];
	2171	else
	2172	{
	2173	Debug.Write("Model doesn't contain information for SVR probability inference\n");
	2174	return 0;
	2175	}
	2176	}
	2177
	2178	public static void svm_predict_values(Model model, Node[] x, double[] dec_values)
	2179	{
	2180	if (model.Parameter.SvmType == SvmType.ONE_CLASS \|\|
	2181	model.Parameter.SvmType == SvmType.EPSILON_SVR \|\|
	2182	model.Parameter.SvmType == SvmType.NU_SVR)
	2183	{
	2184	double[] sv_coef = model.SupportVectorCoefficients[0];
	2185	double sum = 0;
	2186	for (int i = 0; i < model.SupportVectorCount; i++)
	2187	sum += sv_coef[i] * Kernel.k_function(x, model.SupportVectors[i], model.Parameter);
	2188	sum -= model.Rho[0];
	2189	dec_values[0] = sum;
	2190	}
	2191	else
	2192	{
	2193	int i;
	2194	int nr_class = model.NumberOfClasses;
	2195	int l = model.SupportVectorCount;
	2196
	2197	double[] kvalue = new double[l];
	2198	for (i = 0; i < l; i++)
	2199	kvalue[i] = Kernel.k_function(x, model.SupportVectors[i], model.Parameter);
	2200
	2201	int[] start = new int[nr_class];
	2202	start[0] = 0;
	2203	for (i = 1; i < nr_class; i++)
	2204	start[i] = start[i - 1] + model.NumberOfSVPerClass[i - 1];
	2205
	2206	int p = 0;
	2207	for (i = 0; i < nr_class; i++)
	2208	for (int j = i + 1; j < nr_class; j++)
	2209	{
	2210	double sum = 0;
	2211	int si = start[i];
	2212	int sj = start[j];
	2213	int ci = model.NumberOfSVPerClass[i];
	2214	int cj = model.NumberOfSVPerClass[j];
	2215
	2216	int k;
	2217	double[] coef1 = model.SupportVectorCoefficients[j - 1];
	2218	double[] coef2 = model.SupportVectorCoefficients[i];
	2219	for (k = 0; k < ci; k++)
	2220	sum += coef1[si + k] * kvalue[si + k];
	2221	for (k = 0; k < cj; k++)
	2222	sum += coef2[sj + k] * kvalue[sj + k];
	2223	sum -= model.Rho[p];
	2224	dec_values[p] = sum;
	2225	p++;
	2226	}
	2227	}
	2228	}
	2229
	2230	public static double svm_predict(Model model, Node[] x)
	2231	{
	2232	if (model.Parameter.SvmType == SvmType.ONE_CLASS \|\|
	2233	model.Parameter.SvmType == SvmType.EPSILON_SVR \|\|
	2234	model.Parameter.SvmType == SvmType.NU_SVR)
	2235	{
	2236	double[] res = new double[1];
	2237	svm_predict_values(model, x, res);
	2238
	2239	if (model.Parameter.SvmType == SvmType.ONE_CLASS)
	2240	return (res[0] > 0) ? 1 : -1;
	2241	else
	2242	return res[0];
	2243	}
	2244	else
	2245	{
	2246	int i;
	2247	int nr_class = model.NumberOfClasses;
	2248	double[] dec_values = new double[nr_class * (nr_class - 1) / 2];
	2249	svm_predict_values(model, x, dec_values);
	2250
	2251	int[] vote = new int[nr_class];
	2252	for (i = 0; i < nr_class; i++)
	2253	vote[i] = 0;
	2254	int pos = 0;
	2255	for (i = 0; i < nr_class; i++)
	2256	for (int j = i + 1; j < nr_class; j++)
	2257	{
	2258	if (dec_values[pos++] > 0)
	2259	++vote[i];
	2260	else
	2261	++vote[j];
	2262	}
	2263
	2264	int vote_max_idx = 0;
	2265	for (i = 1; i < nr_class; i++)
	2266	if (vote[i] > vote[vote_max_idx])
	2267	vote_max_idx = i;
	2268	return model.ClassLabels[vote_max_idx];
	2269	}
	2270	}
	2271
	2272	public static double svm_predict_probability(Model model, Node[] x, double[] prob_estimates)
	2273	{
	2274	if ((model.Parameter.SvmType == SvmType.C_SVC \|\| model.Parameter.SvmType == SvmType.NU_SVC) &&
	2275	model.PairwiseProbabilityA!=null && model.PairwiseProbabilityB!=null)
	2276	{
	2277	int i;
	2278	int nr_class = model.NumberOfClasses;
	2279	double[] dec_values = new double[nr_class*(nr_class-1)/2];
	2280	svm_predict_values(model, x, dec_values);
	2281
	2282	double min_prob=1e-7;
	2283	double[,] pairwise_prob=new double[nr_class,nr_class];
	2284
	2285	int k=0;
	2286	for(i=0;i<nr_class;i++)
	2287	for(int j=i+1;j<nr_class;j++)
	2288	{
	2289	pairwise_prob[i,j]=Math.Min(Math.Max(sigmoid_predict(dec_values[k],model.PairwiseProbabilityA[k],model.PairwiseProbabilityB[k]),min_prob),1-min_prob);
	2290	pairwise_prob[j,i]=1-pairwise_prob[i,j];
	2291	k++;
	2292	}
	2293	multiclass_probability(nr_class,pairwise_prob,prob_estimates);
	2294
	2295	int prob_max_idx = 0;
	2296	for(i=1;i<nr_class;i++)
	2297	if(prob_estimates[i] > prob_estimates[prob_max_idx])
	2298	prob_max_idx = i;
	2299	return model.ClassLabels[prob_max_idx];
	2300	}
	2301	else
	2302	return svm_predict(model, x);
	2303	}
	2304
	2305	private static double atof(string s)
	2306	{
	2307	return double.Parse(s);
	2308	}
	2309
	2310	private static int atoi(string s)
	2311	{
	2312	return int.Parse(s);
	2313	}
	2314
	2315	public static string svm_check_parameter(Problem prob, Parameter param)
	2316	{
	2317	// svm_type
	2318
	2319	SvmType svm_type = param.SvmType;
	2320	if (svm_type != SvmType.C_SVC &&
	2321	svm_type != SvmType.NU_SVC &&
	2322	svm_type != SvmType.ONE_CLASS &&
	2323	svm_type != SvmType.EPSILON_SVR &&
	2324	svm_type != SvmType.NU_SVR)
	2325	return "unknown svm type";
	2326
	2327	// kernel_type, degree
	2328
	2329	KernelType kernel_type = param.KernelType;
	2330	if (kernel_type != KernelType.LINEAR &&
	2331	kernel_type != KernelType.POLY &&
	2332	kernel_type != KernelType.RBF &&
	2333	kernel_type != KernelType.SIGMOID &&
	2334	kernel_type != KernelType.PRECOMPUTED)
	2335	return "unknown kernel type";
	2336
	2337	if (param.Degree < 0)
	2338	return "degree of polynomial kernel < 0";
	2339
	2340	// cache_size,eps,C,nu,p,shrinking
	2341
	2342	if (param.CacheSize <= 0)
	2343	return "cache_size <= 0";
	2344
	2345	if (param.EPS <= 0)
	2346	return "eps <= 0";
	2347
	2348	if (svm_type == SvmType.C_SVC \|\|
	2349	svm_type == SvmType.EPSILON_SVR \|\|
	2350	svm_type == SvmType.NU_SVR)
	2351	if (param.C <= 0)
	2352	return "C <= 0";
	2353
	2354	if (svm_type == SvmType.NU_SVC \|\|
	2355	svm_type == SvmType.ONE_CLASS \|\|
	2356	svm_type == SvmType.NU_SVR)
	2357	if (param.Nu <= 0 \|\| param.Nu > 1)
	2358	return "nu <= 0 or nu > 1";
	2359
	2360	if (svm_type == SvmType.EPSILON_SVR)
	2361	if (param.P < 0)
	2362	return "p < 0";
	2363
	2364	if (param.Probability && svm_type == SvmType.ONE_CLASS)
	2365	return "one-class SVM probability output not supported yet";
	2366
	2367	// check whether nu-svc is feasible
	2368
	2369	if (svm_type == SvmType.NU_SVC)
	2370	{
	2371	int l = prob.Count;
	2372	int max_nr_class = 16;
	2373	int nr_class = 0;
	2374	int[] label = new int[max_nr_class];
	2375	int[] count = new int[max_nr_class];
	2376
	2377	int i;
	2378	for (i = 0; i < l; i++)
	2379	{
	2380	int this_label = (int)prob.Y[i];
	2381	int j;
	2382	for (j = 0; j < nr_class; j++)
	2383	if (this_label == label[j])
	2384	{
	2385	++count[j];
	2386	break;
	2387	}
	2388
	2389	if (j == nr_class)
	2390	{
	2391	if (nr_class == max_nr_class)
	2392	{
	2393	max_nr_class *= 2;
	2394	int[] new_data = new int[max_nr_class];
	2395	Array.Copy(label, 0, new_data, 0, label.Length);
	2396	label = new_data;
	2397
	2398	new_data = new int[max_nr_class];
	2399	Array.Copy(count, 0, new_data, 0, count.Length);
	2400	count = new_data;
	2401	}
	2402	label[nr_class] = this_label;
	2403	count[nr_class] = 1;
	2404	++nr_class;
	2405	}
	2406	}
	2407
	2408	for (i = 0; i < nr_class; i++)
	2409	{
	2410	int n1 = count[i];
	2411	for (int j = i + 1; j < nr_class; j++)
	2412	{
	2413	int n2 = count[j];
	2414	if (param.Nu * (n1 + n2) / 2 > Math.Min(n1, n2))
	2415	return "specified nu is infeasible";
	2416	}
	2417	}
	2418	}
	2419
	2420	return null;
	2421	}
	2422
	2423	public static int svm_check_probability_model(Model model)
	2424	{
	2425	if (((model.Parameter.SvmType == SvmType.C_SVC \|\| model.Parameter.SvmType == SvmType.NU_SVC) &&
	2426	model.PairwiseProbabilityA != null && model.PairwiseProbabilityB != null) \|\|
	2427	((model.Parameter.SvmType == SvmType.EPSILON_SVR \|\| model.Parameter.SvmType == SvmType.NU_SVR) &&
	2428	model.PairwiseProbabilityA != null))
	2429	return 1;
	2430	else
	2431	return 0;
	2432	}
	2433	}
	2434
	2435	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences