Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs @ 11744

Last change on this file since 11744 was 11744, checked in by gkronber, 9 years ago

#2283 worked on TD, and models for MCTS

File size: 805 bytes
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Threading.Tasks;
6using HeuristicLab.Problems.GrammaticalOptimization;
8namespace HeuristicLab.Algorithms.Bandits {
9  // this interface represents a policy for reinforcement learning
10  public interface IPolicy<in TState, TAction> {
11    TAction SelectAction(Random random, TState state, IEnumerable<TAction> actions);
12    void UpdateReward(TState state, TAction action, double reward, TState newState); // reward received when after taking action in state and new state
13    bool Done(TState state); // for deterministic MDP with deterministic rewards and goal to find a state with max reward
14  }
16  public interface IGrammarPolicy : IPolicy<ReadonlySequence, ReadonlySequence> {
18  }
Note: See TracBrowser for help on using the repository browser.