Context Navigation

← Previous Change
Next Change →

HeuristicLab.Algorithms.Bandits

Timestamp:

01/19/15 20:09:12 (10 years ago)

Author:

gkronber

Message:

#2283: performance tuning and reactivated random-roll-out policy in sequential search

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits

Files:

: 6 edited

BanditPolicies/BoltzmannExplorationPolicy.cs (modified) (1 diff)
BanditPolicies/GenericThompsonSamplingPolicy.cs (modified) (1 diff)
Bandits/GaussianMixtureBandit.cs (modified) (1 diff)
GrammarPolicies/GenericGrammarPolicy.cs (modified) (6 diffs)
GrammarPolicies/TDPolicy.cs (modified) (1 diff)
Models/GaussianMixtureModel.cs (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs

-                      r11747
+                      r11799
                 : Math.Exp(beta * valueFunction(aInfo));
+      var bestAction = myActionInfos
+        .Select((aInfo, idx) => new { aInfo, idx })
+        .SampleProportional(random, w)
+        .Select(p => p.idx)
+        .First();
+      var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w);
       Debug.Assert(bestAction >= 0);
       return bestAction;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GenericThompsonSamplingPolicy.cs

r11742	r11799
39	39
40	40	public override string ToString() {
41		return string.Format("GenericThompsonSamplingPolicy(~~\"{0}\"~~)", model);
	41	return string.Format("GenericThompsonSamplingPolicy({0})", model);
42	42	}
43	43	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Bandits/GaussianMixtureBandit.cs

r11731	r11799
44	44	double x = 0;
45	45	do {
46		var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm])~~.First()~~;
	46	var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm]);
47	47
48	48	var z = Rand.RandNormal(random);

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs

-                      r11793
+                      r11799
     private readonly IProblem problem;
     private readonly IBanditPolicy banditPolicy;
-    //private readonly HashSet<string> done;
     public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
 …
       this.banditPolicy = banditPolicy;
       this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
-      //this.done = new HashSet<string>();
+    }
 …
         // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
         GetStateInfo(curState).Disable(0.0); // should the value be max of afterstate values instead of 0.0?
+        GetStateInfo(curState).Disable(afterStates.Select(afterState => GetStateInfo(afterState).Value).Max());
         selectedStateIdx = -1;
         return false;
 …
     public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
+      // the last state could be terminal
+      var lastState = stateTrajectory.Last();
+      if (problem.Grammar.IsTerminal(lastState)) {
+        GetStateInfo(lastState).Disable(reward);
+      }
+      foreach (var state in stateTrajectory) {
+        GetStateInfo(state).UpdateReward(reward);
+      // update remaining states
+      foreach (var state in stateTrajectory.Reverse().Skip(1)) {
+        GetStateInfo(state).UpdateReward(reward);
+        // only the last state can be terminal
+        if (problem.Grammar.IsTerminal(state)) {
+          GetStateInfo(state).Disable(reward);
+        }
+      }
+    }
 …
     public virtual void Reset() {
       stateInfo.Clear();
-      //done.Clear();
+    }
 …
     protected string CanonicalState(string state) {
       if (useCanonicalState) {
+        if (problem.Grammar.IsTerminal(state))
+          return problem.CanonicalRepresentation(state);
+        else {
+          // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
+          // e.g. if for the ant problem we have the phrase lllS (and we are limited to 4 symbols) and lllr as well as llll are explored
+          // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
+          // solution: we disable the state rS4
+          return problem.CanonicalRepresentation(state) + state.Length;
+        }
+        return problem.CanonicalRepresentation(state);
       } else
         return state;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs

r11793	r11799
35	35	return false;
36	36	}
37		throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable
	37	throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable (see genericgrammarpolicy)
38	38
39	39	//return epsGreedy.TrySelect(random, curState, afterStates, out selectedState);

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianMixtureModel.cs

r11747	r11799
24	24
25	25	public double SampleExpectedReward(Random random) {
26		var k = Enumerable.Range(0, numComponents).SampleProportional(random, componentProbs)~~.First()~~;
	26	var k = Enumerable.Range(0, numComponents).SampleProportional(random, componentProbs);
27	27	return alglib.invnormaldistribution(random.NextDouble()) * Math.Sqrt(componentVars[k]) + componentMeans[k];
28	28	}

Note: See TracChangeset for help on using the changeset viewer.