/*
* SVM.NET Library
* Copyright (C) 2008 Matthew Johnson
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Globalization;
using System.Threading;
namespace SVM
{
///
/// A transform which learns the mean and variance of a sample set and uses these to transform new data
/// so that it has zero mean and unit variance.
///
public class GaussianTransform : IRangeTransform
{
private double[] _means;
private double[] _stddevs;
///
/// Determines the Gaussian transform for the provided problem.
///
/// The Problem to analyze
/// The Gaussian transform for the problem
public static GaussianTransform Compute(Problem prob)
{
int[] counts = new int[prob.MaxIndex];
double[] means = new double[prob.MaxIndex];
foreach (Node[] sample in prob.X)
{
for (int i = 0; i < sample.Length; i++)
{
means[sample[i].Index-1] += sample[i].Value;
counts[sample[i].Index-1]++;
}
}
for (int i = 0; i < prob.MaxIndex; i++)
{
if (counts[i] == 0)
counts[i] = 2;
means[i] /= counts[i];
}
double[] stddevs = new double[prob.MaxIndex];
foreach (Node[] sample in prob.X)
{
for (int i = 0; i < sample.Length; i++)
{
double diff = sample[i].Value - means[sample[i].Index - 1];
stddevs[sample[i].Index - 1] += diff * diff;
}
}
for (int i = 0; i < prob.MaxIndex; i++)
{
if (stddevs[i] == 0)
continue;
stddevs[i] /= (counts[i] - 1);
stddevs[i] = Math.Sqrt(stddevs[i]);
}
return new GaussianTransform(means, stddevs);
}
///
/// Constructor.
///
/// Means in each dimension
/// Standard deviation in each dimension
public GaussianTransform(double[] means, double[] stddevs)
{
_means = means;
_stddevs = stddevs;
}
///
/// Saves the transform to the disk. The samples are not stored, only the
/// statistics.
///
/// The destination stream
/// The transform
public static void Write(Stream stream, GaussianTransform transform)
{
TemporaryCulture.Start();
StreamWriter output = new StreamWriter(stream);
output.WriteLine(transform._means.Length);
for (int i = 0; i < transform._means.Length; i++)
output.WriteLine("{0} {1}", transform._means[i], transform._stddevs[i]);
output.Flush();
TemporaryCulture.Stop();
}
///
/// Reads a GaussianTransform from the provided stream.
///
/// The source stream
/// The transform
public static GaussianTransform Read(Stream stream)
{
TemporaryCulture.Start();
StreamReader input = new StreamReader(stream);
int length = int.Parse(input.ReadLine(), CultureInfo.InvariantCulture);
double[] means = new double[length];
double[] stddevs = new double[length];
for (int i = 0; i < length; i++)
{
string[] parts = input.ReadLine().Split();
means[i] = double.Parse(parts[0], CultureInfo.InvariantCulture);
stddevs[i] = double.Parse(parts[1], CultureInfo.InvariantCulture);
}
TemporaryCulture.Stop();
return new GaussianTransform(means, stddevs);
}
///
/// Saves the transform to the disk. The samples are not stored, only the
/// statistics.
///
/// The destination filename
/// The transform
public static void Write(string filename, GaussianTransform transform)
{
FileStream output = File.Open(filename, FileMode.Create);
try
{
Write(output, transform);
}
finally
{
output.Close();
}
}
///
/// Reads a GaussianTransform from the provided stream.
///
/// The source filename
/// The transform
public static GaussianTransform Read(string filename)
{
FileStream input = File.Open(filename, FileMode.Open);
try
{
return Read(input);
}
finally
{
input.Close();
}
}
#region IRangeTransform Members
///
/// Transform the input value using the transform stored for the provided index.
///
/// Input value
/// Index of the transform to use
/// The transformed value
public double Transform(double input, int index)
{
index--;
if (_stddevs[index] == 0)
return 0;
double diff = input - _means[index];
diff /= _stddevs[index];
return diff;
}
///
/// Transforms the input array.
///
/// The array to transform
/// The transformed array
public Node[] Transform(Node[] input)
{
Node[] output = new Node[input.Length];
for (int i = 0; i < output.Length; i++)
{
int index = input[i].Index;
double value = input[i].Value;
output[i] = new Node(index, Transform(value, index));
}
return output;
}
#endregion
}
}