Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.GrammaticalOptimization/ISequentialDecisionPolicy.cs @ 13834

Last change on this file since 13834 was 12893, checked in by gkronber, 9 years ago

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File size: 1.1 KB
RevLine 
[11744]1using System;
2using System.Collections.Generic;
[11770]3using System.Dynamic;
[11744]4using System.Linq;
5using System.Text;
6using System.Threading.Tasks;
7using HeuristicLab.Problems.GrammaticalOptimization;
8
9namespace HeuristicLab.Algorithms.Bandits {
[11770]10  // this interface represents a policy for episodic reinforcement learning (with afterstates)
11  // here we assume that a reward is only recieved at the end of the episode and the update is done only after an episode is complete
12  // we also assume that the policy can fail to select one of the followStates
[11850]13  public interface ISequentialDecisionPolicy<in TState> {
[12893]14    bool TrySelect(System.Random random, TState curState, IEnumerable<TState> afterStates, out int selectedStateIdx); // selectedState \in afterStates
[11744]15
[11770]16    // state-trajectory are the states of the episode, at the end we recieved the reward (only for the terminal state)
17    void UpdateReward(IEnumerable<TState> stateTrajectory, double reward);
[11744]18
[11770]19    void Reset(); // clears all internal state
20
21    // for introspection
22    double GetValue(TState state);
23    int GetTries(TState state);
[11744]24  }
25}
Note: See TracBrowser for help on using the repository browser.