Context Navigation

IPolicy.cs @ 11770

Visit:

Last change on this file since 11770 was 11770, checked in by gkronber, 9 years ago
#2283: worked on generic sequential search alg with bandit policy as parameter
File size: 1.1 KB

Rev	Line
[11744]	1	using System;
	2	using System.Collections.Generic;
[11770]	3	using System.Dynamic;
[11744]	4	using System.Linq;
	5	using System.Text;
	6	using System.Threading.Tasks;
	7	using HeuristicLab.Problems.GrammaticalOptimization;
	8
	9	namespace HeuristicLab.Algorithms.Bandits {
[11770]	10	// this interface represents a policy for episodic reinforcement learning (with afterstates)
	11	// here we assume that a reward is only recieved at the end of the episode and the update is done only after an episode is complete
	12	// we also assume that the policy can fail to select one of the followStates
	13	public interface IPolicy<TState> {
	14	bool TrySelect(Random random, TState curState, IEnumerable<TState> afterStates, out TState selectedState); // selectedState \in afterStates
[11744]	15
[11770]	16	// state-trajectory are the states of the episode, at the end we recieved the reward (only for the terminal state)
	17	void UpdateReward(IEnumerable<TState> stateTrajectory, double reward);
[11744]	18
[11770]	19	void Reset(); // clears all internal state
	20
	21	// for introspection
	22	double GetValue(TState state);
	23	int GetTries(TState state);
[11744]	24	}
	25	}

Note: See TracBrowser for help on using the repository browser.