Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianMixtureModel.cs @ 11742

Visit:

Last change on this file since 11742 was 11730, checked in by gkronber, 9 years ago
#2283: several major extensions for grammatical optimization
File size: 2.7 KB

Rev	Line
[11730]	1	using System;
	2	using System.Collections.Generic;
	3	using System.Diagnostics;
	4	using System.Linq;
	5	using System.Text;
	6	using System.Threading.Tasks;
	7	using HeuristicLab.Common;
	8
	9	namespace HeuristicLab.Algorithms.Bandits.Models {
	10	public class GaussianMixtureModel : IModel {
	11	private readonly int numActions;
	12	private readonly double[][] meanMean; // mean of mean for each arm and component
	13	private readonly double[][] meanVariance; // variance of mean for each arm and component
	14	private readonly double[][] componentProb;
	15
	16	// parameters of beta prior distribution
	17	private int numComponents;
	18	private double priorMean;
	19
	20	public GaussianMixtureModel(int numActions, double priorMean = 0.5, int nComponents = 5) {
	21	this.numActions = numActions;
	22	this.numComponents = nComponents;
	23	this.priorMean = priorMean;
	24	this.meanMean = new double[numActions][];
	25	this.meanVariance = new double[numActions][];
	26	this.componentProb = new double[numActions][];
	27	for (int a = 0; a < numActions; a++) {
	28	// TODO: probably need to initizalize this randomly to allow learning
	29	meanMean[a] = Enumerable.Repeat(priorMean, nComponents).ToArray();
	30	meanVariance[a] = Enumerable.Repeat(1.0, nComponents).ToArray(); // prior variance of mean variance = 1
	31	componentProb[a] = Enumerable.Repeat(1.0 / nComponents, nComponents).ToArray(); // uniform prior for component probabilities
	32	}
	33	}
	34
	35
	36	public double[] SampleExpectedRewards(Random random) {
	37	// sample mean foreach action and component from the prior
	38	var exp = new double[numActions];
	39	for (int a = 0; a < numActions; a++) {
	40	var sumReward = 0.0;
	41	var numSamples = 10000;
	42	var sampledComponents = Enumerable.Range(0, numComponents).SampleProportional(random, componentProb[a]).Take(numSamples);
	43	foreach (var k in sampledComponents) {
	44	sumReward += Rand.RandNormal(random) * Math.Sqrt(meanVariance[a][k]) + meanMean[a][k];
	45	}
	46	exp[a] = sumReward / (double)numSamples;
	47	}
	48
	49	return exp;
	50	}
	51
	52	public void Update(int action, double reward) {
	53	// see http://www.cs.toronto.edu/~mackay/itprnn/ps/302.320.pdf Algorithm 22.2 soft k-means
	54	throw new NotImplementedException();
	55	}
	56
	57	public void Disable(int action) {
	58	Array.Clear(meanMean[action], 0, meanMean[action].Length);
	59	Array.Clear(meanVariance[action], 0, meanVariance[action].Length);
	60	}
	61
	62	public void Reset() {
	63	Array.Clear(meanMean, 0, meanMean.Length);
	64	Array.Clear(meanVariance, 0, meanVariance.Length);
	65	}
	66
	67	public void PrintStats() {
	68	throw new NotImplementedException();
	69	}
	70	}
	71	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences