Context Navigation

← Previous Changeset
Next Changeset →

Changeset 11744

Timestamp:

01/09/15 16:54:05 (10 years ago)

Author:

gkronber

Message:

#2283 worked on TD, and models for MCTS

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 3 added
: 7 edited

HeuristicLab.Algorithms.Bandits/BanditPolicies/ModelPolicyActionInfo.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/IPolicy.cs (added)
HeuristicLab.Algorithms.Bandits/Models/GaussianMixtureModel.cs (modified) (1 diff)
HeuristicLab.Algorithms.Bandits/Models/LogitNormalModel.cs (added)
HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj (modified) (1 diff)
HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs (modified) (4 diffs)
HeuristicLab.Algorithms.GrammaticalOptimization/TemporalDifferenceTreeSearchSampler.cs (added)
Main/Program.cs (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ModelPolicyActionInfo.cs

r11742	r11744
20	20	public void UpdateReward(double reward) {
21	21	Debug.Assert(!Disabled);
	22	Tries++;
22	23	model.Update(reward);
23	24	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs

r11742	r11744
77	77	}
78	78
79		private double U(double mu, ~~int~~ totalTries, int n, int k) {
	79	private double U(double mu, double totalTries, int n, int k) {
80	80	//var alpha = Math.Log(2.0 * totalTries * k / delta);
81	81	double alpha = Math.Log(2) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta);

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

r11742	r11744
69	69	<Compile Include="IBanditPolicy.cs" />
70	70	<Compile Include="IBanditPolicyActionInfo.cs" />
	71	<Compile Include="Models\GaussianMixtureModel.cs" />
	72	<Compile Include="Models\LogitNormalModel.cs" />
71	73	<Compile Include="OnlineMeanAndVarianceEstimator.cs" />
72	74	<Compile Include="Models\BernoulliModel.cs" />

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianMixtureModel.cs

-                      r11730
+                      r11744
 namespace HeuristicLab.Algorithms.Bandits.Models {
   public class GaussianMixtureModel : IModel {
+    private readonly int numActions;
+    private readonly double[][] meanMean; // mean of mean for each arm and component
+    private readonly double[][] meanVariance; // variance of mean for each arm and component
+    private readonly double[][] componentProb;
+    private readonly double[] componentMeans;
+    private readonly double[] componentVars;
+    private readonly double[] componentProbs;
-    // parameters of beta prior distribution
     private int numComponents;
-    private double priorMean;
+    public GaussianMixtureModel(int numActions, double priorMean = 0.5, int nComponents = 5) {
+      this.numActions = numActions;
+    public GaussianMixtureModel(int nComponents = 5) {
       this.numComponents = nComponents;
+      this.priorMean = priorMean;
+      this.meanMean = new double[numActions][];
+      this.meanVariance = new double[numActions][];
+      this.componentProb = new double[numActions][];
+      for (int a = 0; a < numActions; a++) {
+        // TODO: probably need to initizalize this randomly to allow learning
+        meanMean[a] = Enumerable.Repeat(priorMean, nComponents).ToArray();
+        meanVariance[a] = Enumerable.Repeat(1.0, nComponents).ToArray(); // prior variance of mean variance = 1
+        componentProb[a] = Enumerable.Repeat(1.0 / nComponents, nComponents).ToArray(); // uniform prior for component probabilities
+      }
+      this.componentProbs = new double[nComponents];
+      this.componentMeans = new double[nComponents];
+      this.componentVars = new double[nComponents];
+    }
+    public double[] SampleExpectedRewards(Random random) {
+      // sample mean foreach action and component from the prior
+      var exp = new double[numActions];
+      for (int a = 0; a < numActions; a++) {
+        var sumReward = 0.0;
+        var numSamples = 10000;
+        var sampledComponents = Enumerable.Range(0, numComponents).SampleProportional(random, componentProb[a]).Take(numSamples);
+        foreach (var k in sampledComponents) {
+          sumReward += Rand.RandNormal(random) * Math.Sqrt(meanVariance[a][k]) + meanMean[a][k];
+        }
+        exp[a] = sumReward / (double)numSamples;
+      }
+      return exp;
+    public double SampleExpectedReward(Random random) {
+      var k = Enumerable.Range(0, numComponents).SampleProportional(random, componentProbs).First();
+      return alglib.invnormaldistribution(random.NextDouble()) * Math.Sqrt(componentVars[k]) + componentMeans[k];
+    }
     public void Update(int action, double reward) {
+    public void Update(double reward) {
       // see http://www.cs.toronto.edu/~mackay/itprnn/ps/302.320.pdf Algorithm 22.2 soft k-means
       throw new NotImplementedException();
+    }
+    public void Disable(int action) {
+      Array.Clear(meanMean[action], 0, meanMean[action].Length);
+      Array.Clear(meanVariance[action], 0, meanVariance[action].Length);
+    public void Disable() {
+      Array.Clear(componentMeans, 0, numComponents);
+      for (int i = 0; i < numComponents; i++)
+        componentVars[i] = 0.0;
+    }
+    public object Clone() {
+      return new GaussianMixtureModel(numComponents);
+    }
     public void Reset() {
+      Array.Clear(meanMean, 0, meanMean.Length);
+      Array.Clear(meanVariance, 0, meanVariance.Length);
+      Array.Clear(componentMeans, 0, numComponents);
+      Array.Clear(componentVars, 0, numComponents);
+      Array.Clear(componentProbs, 0, numComponents);
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

r11742	r11744
45	45	<Compile Include="AlternativesSampler.cs" />
46	46	<Compile Include="AlternativesContextSampler.cs" />
	47	<Compile Include="TemporalDifferenceTreeSearchSampler.cs" />
47	48	<Compile Include="ExhaustiveRandomFirstSearch.cs" />
48	49	<Compile Include="MctsContextualSampler.cs">

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

-                      r11742
+                      r11744
     public int treeDepth;
     public int treeSize;
+    private double bestQuality;
     // public MctsSampler(IProblem problem, int maxLen, Random random) :
 …
     public void Run(int maxIterations) {
       double bestQuality = double.MinValue;
+      bestQuality = double.MinValue;
       InitPolicies(problem.Grammar);
       for (int i = 0; !rootNode.done && i < maxIterations; i++) {
 …
     public void PrintStats() {
       var n = rootNode;
       Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, n.actionInfo.Tries);
+      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.actionInfo.Tries, n.actionInfo.Value, bestQuality);
       while (n.children != null) {
         Console.WriteLine();
 …
         n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
+      }
-      Console.ReadLine();
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11742
+                      r11744
       //var problem = new RoyalPairProblem();
       //var problem = new EvenParityProblem();
+      var alg = new MctsSampler(problem, 25, random, 0, new GaussianThompsonSamplingPolicy(true));
+      var alg = new MctsSampler(problem, 25, random, 0, new GenericThompsonSamplingPolicy(new LogitNormalModel()));
+      //var alg = new TemporalDifferenceTreeSearchSampler(problem, 23, random, 0, new RandomPolicy());
       //var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
       //var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
 …
         iterations++;
         globalStatistics.AddSentence(sentence, quality);
+        if (iterations % 1000 == 0) {
+        if (iterations % 100 == 0) {
+          Console.Clear();
           alg.PrintStats();
+        }

Note: See TracChangeset for help on using the changeset viewer.