/* * SVM.NET Library * Copyright (C) 2008 Matthew Johnson * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ using System; using System.Collections.Generic; using System.IO; using System.Globalization; using System.Threading; namespace SVM { /// /// A transform which learns the mean and variance of a sample set and uses these to transform new data /// so that it has zero mean and unit variance. /// public class GaussianTransform : IRangeTransform { private double[] _means; private double[] _stddevs; /// /// Determines the Gaussian transform for the provided problem. /// /// The Problem to analyze /// The Gaussian transform for the problem public static GaussianTransform Compute(Problem prob) { int[] counts = new int[prob.MaxIndex]; double[] means = new double[prob.MaxIndex]; foreach (Node[] sample in prob.X) { for (int i = 0; i < sample.Length; i++) { means[sample[i].Index-1] += sample[i].Value; counts[sample[i].Index-1]++; } } for (int i = 0; i < prob.MaxIndex; i++) { if (counts[i] == 0) counts[i] = 2; means[i] /= counts[i]; } double[] stddevs = new double[prob.MaxIndex]; foreach (Node[] sample in prob.X) { for (int i = 0; i < sample.Length; i++) { double diff = sample[i].Value - means[sample[i].Index - 1]; stddevs[sample[i].Index - 1] += diff * diff; } } for (int i = 0; i < prob.MaxIndex; i++) { if (stddevs[i] == 0) continue; stddevs[i] /= (counts[i] - 1); stddevs[i] = Math.Sqrt(stddevs[i]); } return new GaussianTransform(means, stddevs); } /// /// Constructor. /// /// Means in each dimension /// Standard deviation in each dimension public GaussianTransform(double[] means, double[] stddevs) { _means = means; _stddevs = stddevs; } /// /// Saves the transform to the disk. The samples are not stored, only the /// statistics. /// /// The destination stream /// The transform public static void Write(Stream stream, GaussianTransform transform) { TemporaryCulture.Start(); StreamWriter output = new StreamWriter(stream); output.WriteLine(transform._means.Length); for (int i = 0; i < transform._means.Length; i++) output.WriteLine("{0} {1}", transform._means[i], transform._stddevs[i]); output.Flush(); TemporaryCulture.Stop(); } /// /// Reads a GaussianTransform from the provided stream. /// /// The source stream /// The transform public static GaussianTransform Read(Stream stream) { TemporaryCulture.Start(); StreamReader input = new StreamReader(stream); int length = int.Parse(input.ReadLine(), CultureInfo.InvariantCulture); double[] means = new double[length]; double[] stddevs = new double[length]; for (int i = 0; i < length; i++) { string[] parts = input.ReadLine().Split(); means[i] = double.Parse(parts[0], CultureInfo.InvariantCulture); stddevs[i] = double.Parse(parts[1], CultureInfo.InvariantCulture); } TemporaryCulture.Stop(); return new GaussianTransform(means, stddevs); } /// /// Saves the transform to the disk. The samples are not stored, only the /// statistics. /// /// The destination filename /// The transform public static void Write(string filename, GaussianTransform transform) { FileStream output = File.Open(filename, FileMode.Create); try { Write(output, transform); } finally { output.Close(); } } /// /// Reads a GaussianTransform from the provided stream. /// /// The source filename /// The transform public static GaussianTransform Read(string filename) { FileStream input = File.Open(filename, FileMode.Open); try { return Read(input); } finally { input.Close(); } } #region IRangeTransform Members /// /// Transform the input value using the transform stored for the provided index. /// /// Input value /// Index of the transform to use /// The transformed value public double Transform(double input, int index) { index--; if (_stddevs[index] == 0) return 0; double diff = input - _means[index]; diff /= _stddevs[index]; return diff; } /// /// Transforms the input array. /// /// The array to transform /// The transformed array public Node[] Transform(Node[] input) { Node[] output = new Node[input.Length]; for (int i = 0; i < output.Length; i++) { int index = input[i].Index; double value = input[i].Value; output[i] = new Node(index, Transform(value, index)); } return output; } #endregion } }