Context Navigation

ISequentialDecisionPolicy.cs @ 13780

Visit:

Last change on this file since 13780 was 12893, checked in by gkronber, 9 years ago
#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)
File size: 1.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Dynamic;
4	using System.Linq;
5	using System.Text;
6	using System.Threading.Tasks;
7	using HeuristicLab.Problems.GrammaticalOptimization;
8
9	namespace HeuristicLab.Algorithms.Bandits {
10	// this interface represents a policy for episodic reinforcement learning (with afterstates)
11	// here we assume that a reward is only recieved at the end of the episode and the update is done only after an episode is complete
12	// we also assume that the policy can fail to select one of the followStates
13	public interface ISequentialDecisionPolicy<in TState> {
14	bool TrySelect(System.Random random, TState curState, IEnumerable<TState> afterStates, out int selectedStateIdx); // selectedState \in afterStates
15
16	// state-trajectory are the states of the episode, at the end we recieved the reward (only for the terminal state)
17	void UpdateReward(IEnumerable<TState> stateTrajectory, double reward);
18
19	void Reset(); // clears all internal state
20
21	// for introspection
22	double GetValue(TState state);
23	int GetTries(TState state);
24	}
25	}

Note: See TracBrowser for help on using the repository browser.