/* * SVM.NET Library * Copyright (C) 2008 Matthew Johnson * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ using System; using System.Collections.Generic; using System.IO; namespace SVM { /// /// A transform which learns the mean and variance of a sample set and uses these to transform new data /// so that it has zero mean and unit variance. /// public class GaussianTransform : IRangeTransform { private List _samples; private int _maxIndex; private double[] _means; private double[] _stddevs; /// /// Constructor. /// /// The maximum index of the vectors to be transformed public GaussianTransform(int maxIndex) { _samples = new List(); } private GaussianTransform(double[] means, double[] stddevs, int maxIndex) { _means = means; _stddevs = stddevs; _maxIndex = maxIndex; } /// /// Adds a sample to the data. No computation is performed. The maximum index of the /// sample must be less than MaxIndex. /// /// The sample to add public void Add(Node[] sample) { _samples.Add(sample); } /// /// Computes the statistics for the samples which have been obtained so far. /// public void ComputeStatistics() { int[] counts = new int[_maxIndex]; _means = new double[_maxIndex]; foreach(Node[] sample in _samples) { for (int i = 0; i < sample.Length; i++) { _means[sample[i].Index] += sample[i].Value; counts[sample[i].Index]++; } } for (int i = 0; i < _maxIndex; i++) { if (counts[i] == 0) counts[i] = 2; _means[i] /= counts[i]; } _stddevs = new double[_maxIndex]; foreach(Node[] sample in _samples) { for (int i = 0; i < sample.Length; i++) { double diff = sample[i].Value - _means[sample[i].Index]; _stddevs[sample[i].Index] += diff * diff; } } for (int i = 0; i < _maxIndex; i++) { if (_stddevs[i] == 0) continue; _stddevs[i] /= (counts[i]-1); _stddevs[i] = Math.Sqrt(_stddevs[i]); } } /// /// Saves the transform to the disk. The samples are not stored, only the /// statistics. /// /// The destination stream /// The transform public static void Write(Stream stream, GaussianTransform transform) { StreamWriter output = new StreamWriter(stream); output.WriteLine(transform._maxIndex); for (int i = 0; i < transform._maxIndex; i++) output.WriteLine("{0} {1}", transform._means[i], transform._stddevs[i]); output.Flush(); } /// /// Reads a GaussianTransform from the provided stream. /// /// The source stream /// The transform public static GaussianTransform Read(Stream stream) { StreamReader input = new StreamReader(stream); int length = int.Parse(input.ReadLine()); double[] means = new double[length]; double[] stddevs = new double[length]; for (int i = 0; i < length; i++) { string[] parts = input.ReadLine().Split(); means[i] = double.Parse(parts[0]); stddevs[i] = double.Parse(parts[1]); } return new GaussianTransform(means, stddevs, length); } /// /// Saves the transform to the disk. The samples are not stored, only the /// statistics. /// /// The destination filename /// The transform public static void Write(string filename, GaussianTransform transform) { FileStream output = File.Open(filename, FileMode.Create); try { Write(output, transform); } finally { output.Close(); } } /// /// Reads a GaussianTransform from the provided stream. /// /// The source filename /// The transform public static GaussianTransform Read(string filename) { FileStream input = File.Open(filename, FileMode.Open); try { return Read(input); } finally { input.Close(); } } #region IRangeTransform Members /// /// Transform the input value using the transform stored for the provided index. /// must be called first, or the transform must /// have been read from the disk. /// /// Input value /// Index of the transform to use /// The transformed value public double Transform(double input, int index) { if (_stddevs[index] == 0) return 0; double diff = input - _means[index]; diff /= _stddevs[index]; return diff; } /// /// Transforms the input array. must be called /// first, or the transform must have been read from the disk. /// /// The array to transform /// The transformed array public Node[] Transform(Node[] input) { Node[] output = new Node[input.Length]; for (int i = 0; i < output.Length; i++) { int index = input[i].Index; double value = input[i].Value; output[i] = new Node(index, Transform(value, index)); } return output; } #endregion } }