Context Navigation

source: branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NeighborhoodComponentsAnalysis.cs @ 8425

Visit:

Last change on this file since 8425 was 8425, checked in by abeham, 12 years ago
#1913: Added several initialization methods (LDA, PCA, and Random)
File size: 6.2 KB

Rev	Line
[8412]	1	#region License Information
	2	/* HeuristicLab
	3	* Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
	4	*
	5	* This file is part of HeuristicLab.
	6	*
	7	* HeuristicLab is free software: you can redistribute it and/or modify
	8	* it under the terms of the GNU General Public License as published by
	9	* the Free Software Foundation, either version 3 of the License, or
	10	* (at your option) any later version.
	11	*
	12	* HeuristicLab is distributed in the hope that it will be useful,
	13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	* GNU General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU General Public License
	18	* along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
	19	*/
	20	#endregion
	21
	22	using System;
	23	using System.Collections.Generic;
	24	using System.Linq;
	25	using HeuristicLab.Common;
	26	using HeuristicLab.Problems.DataAnalysis;
	27
	28	namespace HeuristicLab.Algorithms.NCA {
	29	public class NeighborhoodComponentsAnalysis {
	30
[8425]	31	public static INCAModel Train(IClassificationProblemData data, int k, int reduceDimensions, INCAInitializer initializer) {
[8412]	32	var instances = data.TrainingIndices.Count();
	33	var attributes = data.AllowedInputVariables.Count();
[8422]	34
[8425]	35	double[] matrix = initializer.Initialize(data, reduceDimensions);
[8422]	36
[8412]	37	alglib.mincgstate state;
	38	alglib.mincgreport rep;
	39
	40	// first run
	41	alglib.mincgcreate(matrix, out state);
	42	alglib.mincgsetcond(state, 0.0000000001, 0, 0, 0);
	43	alglib.mincgoptimize(state, Gradient, null, new OptimizationInfo(data, reduceDimensions));
	44	alglib.mincgresults(state, out matrix, out rep);
	45
	46	var transformationMatrix = new double[attributes, reduceDimensions];
[8425]	47	var counter = 0;
[8412]	48	for (var i = 0; i < attributes; i++)
	49	for (var j = 0; j < reduceDimensions; j++)
[8420]	50	transformationMatrix[i, j] = matrix[counter++];
[8412]	51
	52	var transformedTrainingset = new double[instances, reduceDimensions];
	53	var rowCount = 0;
[8420]	54	foreach (var r in data.TrainingIndices) {
[8422]	55	var i = 0;
[8412]	56	foreach (var v in data.AllowedInputVariables) {
	57	var val = data.Dataset.GetDoubleValue(v, r);
[8422]	58	for (var j = 0; j < reduceDimensions; j++)
	59	transformedTrainingset[rowCount, j] += val * transformationMatrix[i, j];
	60	i++;
[8412]	61	}
	62	rowCount++;
	63	}
	64
	65	return new NCAModel(transformedTrainingset, transformationMatrix, k, data.TargetVariable, data.AllowedInputVariables,
	66	data.Dataset.GetDoubleValues(data.TargetVariable)
	67	.Select((v, i) => new { I = i, V = v })
	68	.Where(x => x.I >= data.TrainingPartition.Start && x.I < data.TrainingPartition.End
	69	&& !(x.I >= data.TestPartition.Start && x.I < data.TestPartition.End))
	70	.Select(x => x.V).ToArray());
	71	}
	72
	73	private static void Gradient(double[] A, ref double func, double[] grad, object obj) {
	74	var info = (OptimizationInfo)obj;
	75	var instances = info.ProblemData.TrainingIndices.ToArray();
[8422]	76	var attributes = info.ProblemData.AllowedInputVariables.Count();
[8420]	77	var AMatrix = new Matrix(A, A.Length / info.ReduceDimensions, info.ReduceDimensions);
	78
[8422]	79	alglib.sparsematrix probabilities;
	80	alglib.sparsecreate(instances.Length, instances.Length, out probabilities);
	81	var distances = new double[instances.Length];
[8412]	82	for (int i = 0; i < instances.Length - 1; i++) {
[8420]	83	var iVector = new Matrix(GetRow(info.ProblemData, instances[i]));
[8412]	84	var denom = 0.0;
	85	for (int k = 0; k < instances.Length; k++) {
	86	if (k == i) continue;
[8422]	87	var kVector = new Matrix(GetRow(info.ProblemData, instances[k]));
	88	distances[k] = iVector.Multiply(AMatrix).Subtract(kVector.Multiply(AMatrix)).Length();
	89	denom += Math.Exp(-(distances[k] * distances[k]));
[8412]	90	}
[8420]	91	if (denom > 0) {
[8422]	92	for (int j = i + 1; j < instances.Length; j++) {
[8420]	93	if (i == j) continue;
[8422]	94	var v = Math.Exp(-(distances[j] * distances[j])) / denom;
[8420]	95	alglib.sparseset(probabilities, i, j, v);
[8422]	96	alglib.sparseset(probabilities, j, i, v);
[8420]	97	}
[8412]	98	}
	99	}
[8422]	100	alglib.sparseconverttocrs(probabilities); // needed to enumerate in order (top-down and left-right)
[8412]	101
[8420]	102	int t0 = 0, t1 = 0, r, c;
	103	double val;
	104	var classes = info.ProblemData.Dataset.GetDoubleValues(info.ProblemData.TargetVariable, instances).ToArray();
[8412]	105	var pi = new double[instances.Length];
[8420]	106	while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {
	107	if (classes[r].IsAlmost(classes[c]))
	108	pi[r] += val;
[8412]	109	}
	110
[8422]	111	var innerSum = new double[attributes, attributes];
	112	while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {
	113	var vector = new Matrix(GetRow(info.ProblemData, instances[r])).Subtract(new Matrix(GetRow(info.ProblemData, instances[c]))).Apply();
	114	vector.OuterProduct(vector).Multiply(val * pi[r]).AddTo(innerSum);
[8412]	115
[8420]	116	if (classes[r].IsAlmost(classes[c])) {
[8422]	117	vector.OuterProduct(vector).Multiply(-val).AddTo(innerSum);
[8412]	118	}
	119	}
[8422]	120
	121	func = -pi.Sum();
	122
	123	grad = AMatrix.Multiply(-2.0).Transpose().Multiply(new Matrix(innerSum)).Transpose().ToArray();
[8412]	124	}
	125
	126	private static IEnumerable<double> GetRow(IClassificationProblemData data, int row) {
	127	return data.AllowedInputVariables.Select(v => data.Dataset.GetDoubleValue(v, row));
	128	}
	129
[8425]	130	public static NCAClassificationSolution CreateNCASolution(IClassificationProblemData problemData, int k, int reduceDimensions, INCAInitializer initializer) {
	131	return new NCAClassificationSolution(problemData, Train(problemData, k, reduceDimensions, initializer));
[8412]	132	}
	133
	134	private class OptimizationInfo {
	135	public IClassificationProblemData ProblemData { get; set; }
	136	public int ReduceDimensions { get; set; }
	137	public OptimizationInfo(IClassificationProblemData problem, int reduceDimensions) {
	138	this.ProblemData = problem;
	139	this.ReduceDimensions = reduceDimensions;
	140	}
	141	}
	142	}
	143	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences