using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; using HeuristicLab.Common; namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { // also called softmax policy public class BoltzmannExplorationPolicy : IBanditPolicy { private readonly double beta; public BoltzmannExplorationPolicy(double beta) { if (beta < 0) throw new ArgumentException(); this.beta = beta; } public int SelectAction(Random random, IEnumerable actionInfos) { Debug.Assert(actionInfos.Any()); var myActionInfos = actionInfos.OfType(); // try any of the untries actions randomly if (myActionInfos.Any(aInfo => aInfo.Tries == 0)) { return myActionInfos .Select((aInfo, idx) => new { aInfo, idx }) .Where(p => p.aInfo.Tries == 0) .SelectRandom(random).idx; } // using ranks //var qualities = actionInfos.Select(i => i.MaxReward).ToArray(); // largest reward should have largest rank //var ranks = Enumerable.Range(0, myActionInfos.Count()).ToArray(); //Array.Sort(qualities, ranks); // //// set same rank for same quality ////for (int i = 0; i < ranks.Length - 1; i++) { //// if (qualities[i] == qualities[i + 1]) ranks[i + 1] = ranks[i]; ////} //// // //var rankForAction = new int[myActionInfos.Count()]; //for (int i = 0; i < rankForAction.Length; i++) { // rankForAction[ranks[i]] = i; //} // //var w = from idx in Enumerable.Range(0, myActionInfos.Count()) // select Math.Exp(beta * rankForAction[idx]); // windowing var max = actionInfos.Select(i => i.MaxReward).Max(); var min = actionInfos.Select(i => i.MaxReward).Min(); double range = max - min; var w = from aInfo in actionInfos select Math.Exp(beta * (aInfo.MaxReward - min) / range); var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w); Debug.Assert(bestAction >= 0); return bestAction; } public IBanditPolicyActionInfo CreateActionInfo() { return new DefaultPolicyActionInfo(); } public override string ToString() { return string.Format("BoltzmannExplorationPolicy({0:F2})", beta); } } }