Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/15/15 18:59:07 (9 years ago)
Author:
gkronber
Message:

#2283: worked on generic sequential search alg with bandit policy as parameter

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomNoResamplingPolicy.cs

    r11742 r11770  
    11using System;
    22using System.Collections.Generic;
     3using System.Configuration;
    34using System.Linq;
     5using System.Security.Policy;
    46using System.Text;
    5 using System.Threading.Tasks;
    67using HeuristicLab.Common;
    78using HeuristicLab.Problems.GrammaticalOptimization;
    89
    910namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies {
    10   public class RandomNoResamplingPolicy : IGrammarPolicy {
     11  public class RandomNoResamplingPolicy : GrammarPolicy {
    1112
    12     private readonly Dictionary<ReadonlySequence, bool> done;
    13     private readonly Dictionary<Tuple<ReadonlySequence, ReadonlySequence>, ReadonlySequence> nextState;
     13    private readonly HashSet<string> done;
    1414
    15 
    16     public RandomNoResamplingPolicy() {
    17       this.done = new Dictionary<ReadonlySequence, bool>();
     15    public RandomNoResamplingPolicy(IProblem problem, bool useCanonicalRepresentation)
     16      : base(problem, useCanonicalRepresentation) {
     17      this.done = new HashSet<string>();
    1818    }
    1919
    20     public ReadonlySequence SelectAction(Random random, ReadonlySequence state, IEnumerable<ReadonlySequence> actions) {
    21       var allDone = true;
    22       foreach (var a in actions) {
    23         var p = Tuple.Create(state, a);
    24         allDone &= nextState.ContainsKey(p) && Done(nextState[p]);
    25         if (!allDone) break;
     20    public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {
     21      // only select states that are not yet done
     22      afterStates = afterStates.Where(a => !done.Contains(a.ToString())).ToArray();
     23      if (!afterStates.Any()) {
     24        // fail because all follow states have already been visited => also disable the current state
     25        done.Add(CanonicalState(curState.ToString()));
     26        selectedState = null;
     27        return false;
    2628      }
    27       if(allDone)
    28       return actions
    29         .Where(a => !nextState.ContainsKey(Tuple.Create(state, a)) || Done(nextState[Tuple.Create(state, a)]))
    30         .SelectRandom(random);
     29
     30      selectedState = afterStates.SelectRandom(random);
     31      return true;
    3132    }
    3233
    33     public void UpdateReward(ReadonlySequence state, ReadonlySequence action, double reward, ReadonlySequence newState) {
    34       var key = Tuple.Create(state, action);
    35       nextState[key] = newState;
    36       if (newState.IsTerminal) done[newState] = true;
    37       if
     34    public override void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) {
     35      base.UpdateReward(stateTrajectory, reward);
     36      // ignore rewards but update the set of visited terminal states
     37
     38      // the last state could be terminal
     39      var lastState = stateTrajectory.Last();
     40      if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));
    3841    }
    3942
    40     public bool Done(ReadonlySequence state) {
    41       return done.ContainsKey(state);
     43    public override void Reset() {
     44      base.Reset();
     45      done.Clear();
    4246    }
    4347  }
Note: See TracChangeset for help on using the changeset viewer.