Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/19/15 20:09:12 (10 years ago)
Author:
gkronber
Message:

#2283: performance tuning and reactivated random-roll-out policy in sequential search

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs

    r11747 r11799  
    4141                : Math.Exp(beta * valueFunction(aInfo));
    4242
    43       var bestAction = myActionInfos
    44         .Select((aInfo, idx) => new { aInfo, idx })
    45         .SampleProportional(random, w)
    46         .Select(p => p.idx)
    47         .First();
     43      var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w);
    4844      Debug.Assert(bestAction >= 0);
    4945      return bestAction;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GenericThompsonSamplingPolicy.cs

    r11742 r11799  
    3939
    4040    public override string ToString() {
    41       return string.Format("GenericThompsonSamplingPolicy(\"{0}\")", model);
     41      return string.Format("GenericThompsonSamplingPolicy({0})", model);
    4242    }
    4343  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Bandits/GaussianMixtureBandit.cs

    r11731 r11799  
    4444      double x = 0;
    4545      do {
    46         var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm]).First();
     46        var k = Enumerable.Range(0, componentProb[arm].Length).SampleProportional(random, componentProb[arm]);
    4747
    4848        var z = Rand.RandNormal(random);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs

    r11793 r11799  
    1414    private readonly IProblem problem;
    1515    private readonly IBanditPolicy banditPolicy;
    16     //private readonly HashSet<string> done;
    1716
    1817    public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) {
     
    2120      this.banditPolicy = banditPolicy;
    2221      this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>();
    23       //this.done = new HashSet<string>();
    2422    }
    2523
     
    2927        // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
    3028
    31         GetStateInfo(curState).Disable(0.0); // should the value be max of afterstate values instead of 0.0?
     29        GetStateInfo(curState).Disable(afterStates.Select(afterState => GetStateInfo(afterState).Value).Max());
    3230        selectedStateIdx = -1;
    3331        return false;
     
    5048
    5149    public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {
    52       // the last state could be terminal
    53       var lastState = stateTrajectory.Last();
    54       if (problem.Grammar.IsTerminal(lastState)) {
    55         GetStateInfo(lastState).Disable(reward);
    56       }
     50      foreach (var state in stateTrajectory) {
     51        GetStateInfo(state).UpdateReward(reward);
    5752
    58       // update remaining states
    59       foreach (var state in stateTrajectory.Reverse().Skip(1)) {
    60         GetStateInfo(state).UpdateReward(reward);
     53        // only the last state can be terminal
     54        if (problem.Grammar.IsTerminal(state)) {
     55          GetStateInfo(state).Disable(reward);
     56        }
    6157      }
    6258    }
     
    6460    public virtual void Reset() {
    6561      stateInfo.Clear();
    66       //done.Clear();
    6762    }
    6863
     
    8176    protected string CanonicalState(string state) {
    8277      if (useCanonicalState) {
    83         if (problem.Grammar.IsTerminal(state))
    84           return problem.CanonicalRepresentation(state);
    85         else {
    86           // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
    87           // e.g. if for the ant problem we have the phrase lllS (and we are limited to 4 symbols) and lllr as well as llll are explored
    88           // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
    89           // solution: we disable the state rS4
    90           return problem.CanonicalRepresentation(state) + state.Length;
    91         }
     78        return problem.CanonicalRepresentation(state);
    9279      } else
    9380        return state;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs

    r11793 r11799  
    3535        return false;
    3636      }
    37       throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable
     37      throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable (see genericgrammarpolicy)
    3838
    3939      //return epsGreedy.TrySelect(random, curState, afterStates, out selectedState);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/GaussianMixtureModel.cs

    r11747 r11799  
    2424
    2525    public double SampleExpectedReward(Random random) {
    26       var k = Enumerable.Range(0, numComponents).SampleProportional(random, componentProbs).First();
     26      var k = Enumerable.Range(0, numComponents).SampleProportional(random, componentProbs);
    2727      return alglib.invnormaldistribution(random.NextDouble()) * Math.Sqrt(componentVars[k]) + componentMeans[k];
    2828    }
Note: See TracChangeset for help on using the changeset viewer.