Free cookie consent management tool by TermsFeed Policy Generator

source: branches/Persistence Test/LibSVM/GaussianTransform.cs @ 4021

Last change on this file since 4021 was 2415, checked in by gkronber, 15 years ago

Updated LibSVM project to latest version. #774

File size: 7.2 KB
Line 
1/*
2 * SVM.NET Library
3 * Copyright (C) 2008 Matthew Johnson
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18
19
20using System;
21using System.Collections.Generic;
22using System.IO;
23using System.Globalization;
24using System.Threading;
25
26namespace SVM
27{
28    /// <summary>
29    /// A transform which learns the mean and variance of a sample set and uses these to transform new data
30    /// so that it has zero mean and unit variance.
31    /// </summary>
32    public class GaussianTransform : IRangeTransform
33    {
34        private double[] _means;
35        private double[] _stddevs;
36
37        /// <summary>
38        /// Determines the Gaussian transform for the provided problem.
39        /// </summary>
40        /// <param name="prob">The Problem to analyze</param>
41        /// <returns>The Gaussian transform for the problem</returns>
42        public static GaussianTransform Compute(Problem prob)
43        {
44            int[] counts = new int[prob.MaxIndex];
45            double[] means = new double[prob.MaxIndex];
46            foreach (Node[] sample in prob.X)
47            {
48                for (int i = 0; i < sample.Length; i++)
49                {
50                    means[sample[i].Index-1] += sample[i].Value;
51                    counts[sample[i].Index-1]++;
52                }
53            }
54            for (int i = 0; i < prob.MaxIndex; i++)
55            {
56                if (counts[i] == 0)
57                    counts[i] = 2;
58                means[i] /= counts[i];
59            }
60
61            double[] stddevs = new double[prob.MaxIndex];
62            foreach (Node[] sample in prob.X)
63            {
64                for (int i = 0; i < sample.Length; i++)
65                {
66                    double diff = sample[i].Value - means[sample[i].Index - 1];
67                    stddevs[sample[i].Index - 1] += diff * diff;
68                }
69            }
70            for (int i = 0; i < prob.MaxIndex; i++)
71            {
72                if (stddevs[i] == 0)
73                    continue;
74                stddevs[i] /= (counts[i] - 1);
75                stddevs[i] = Math.Sqrt(stddevs[i]);
76            }
77
78            return new GaussianTransform(means, stddevs);
79        }
80
81        /// <summary>
82        /// Constructor.
83        /// </summary>
84        /// <param name="means">Means in each dimension</param>
85        /// <param name="stddevs">Standard deviation in each dimension</param>
86        public GaussianTransform(double[] means, double[] stddevs)
87        {
88            _means = means;
89            _stddevs = stddevs;
90        }
91
92        /// <summary>
93        /// Saves the transform to the disk.  The samples are not stored, only the
94        /// statistics.
95        /// </summary>
96        /// <param name="stream">The destination stream</param>
97        /// <param name="transform">The transform</param>
98        public static void Write(Stream stream, GaussianTransform transform)
99        {
100            TemporaryCulture.Start();
101
102            StreamWriter output = new StreamWriter(stream);
103            output.WriteLine(transform._means.Length);
104            for (int i = 0; i < transform._means.Length; i++)
105                output.WriteLine("{0} {1}", transform._means[i], transform._stddevs[i]);
106            output.Flush();
107
108            TemporaryCulture.Stop();
109        }
110
111        /// <summary>
112        /// Reads a GaussianTransform from the provided stream.
113        /// </summary>
114        /// <param name="stream">The source stream</param>
115        /// <returns>The transform</returns>
116        public static GaussianTransform Read(Stream stream)
117        {
118            TemporaryCulture.Start();
119
120            StreamReader input = new StreamReader(stream);
121            int length = int.Parse(input.ReadLine(), CultureInfo.InvariantCulture);
122            double[] means = new double[length];
123            double[] stddevs = new double[length];
124            for (int i = 0; i < length; i++)
125            {
126                string[] parts = input.ReadLine().Split();
127                means[i] = double.Parse(parts[0], CultureInfo.InvariantCulture);
128                stddevs[i] = double.Parse(parts[1], CultureInfo.InvariantCulture);
129            }
130
131            TemporaryCulture.Stop();
132
133            return new GaussianTransform(means, stddevs);
134        }
135
136        /// <summary>
137        /// Saves the transform to the disk.  The samples are not stored, only the
138        /// statistics.
139        /// </summary>
140        /// <param name="filename">The destination filename</param>
141        /// <param name="transform">The transform</param>
142        public static void Write(string filename, GaussianTransform transform)
143        {
144            FileStream output = File.Open(filename, FileMode.Create);
145            try
146            {
147                Write(output, transform);
148            }
149            finally
150            {
151                output.Close();
152            }
153        }
154
155        /// <summary>
156        /// Reads a GaussianTransform from the provided stream.
157        /// </summary>
158        /// <param name="filename">The source filename</param>
159        /// <returns>The transform</returns>
160        public static GaussianTransform Read(string filename)
161        {
162            FileStream input = File.Open(filename, FileMode.Open);
163            try
164            {
165                return Read(input);
166            }
167            finally
168            {
169                input.Close();
170            }
171        }
172
173        #region IRangeTransform Members
174
175        /// <summary>
176        /// Transform the input value using the transform stored for the provided index.
177        /// </summary>
178        /// <param name="input">Input value</param>
179        /// <param name="index">Index of the transform to use</param>
180        /// <returns>The transformed value</returns>
181        public double Transform(double input, int index)
182        {
183            index--;
184            if (_stddevs[index] == 0)
185                return 0;
186            double diff = input - _means[index];
187            diff /= _stddevs[index];
188            return diff;
189        }
190        /// <summary>
191        /// Transforms the input array.
192        /// </summary>
193        /// <param name="input">The array to transform</param>
194        /// <returns>The transformed array</returns>
195        public Node[] Transform(Node[] input)
196        {
197            Node[] output = new Node[input.Length];
198            for (int i = 0; i < output.Length; i++)
199            {
200                int index = input[i].Index;
201                double value = input[i].Value;
202                output[i] = new Node(index, Transform(value, index));
203            }
204            return output;
205        }
206
207        #endregion
208    }
209}
Note: See TracBrowser for help on using the repository browser.