Changeset 11732 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs
- Timestamp:
- 01/07/15 09:21:46 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs
r11730 r11732 16 16 private readonly Random random; 17 17 private readonly IProblem problem; 18 private readonly IPolicy policy; 18 19 19 public AlternativesSampler(IProblem problem, int maxLen) {20 public AlternativesSampler(IProblem problem, IPolicy policy, int maxLen) { 20 21 this.problem = problem; 21 22 this.maxLen = maxLen; 22 this.random = new Random(31415); 23 this.random = new Random(); 24 this.policy = policy; 23 25 } 24 26 … … 28 30 for (int i = 0; i < maxIterations; i++) { 29 31 var sentence = SampleSentence(problem.Grammar).ToString(); 30 var quality = problem.Evaluate(sentence) / problem. GetBestKnownQuality(maxLen);32 var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen); 31 33 DistributeReward(quality); 32 34 … … 41 43 42 44 43 private Dictionary<char, IPolicy > ntPolicy;45 private Dictionary<char, IPolicyActionInfo[]> ntActionInfos; 44 46 private List<Tuple<char, int>> updateChain; 45 47 46 48 private void InitPolicies(IGrammar grammar) { 47 this.nt Policy = new Dictionary<char, IPolicy>();49 this.ntActionInfos = new Dictionary<char, IPolicyActionInfo[]>(); 48 50 this.updateChain = new List<Tuple<char, int>>(); 49 51 foreach (var nt in grammar.NonTerminalSymbols) { 50 nt Policy.Add(nt, new EpsGreedyPolicy(random, grammar.GetAlternatives(nt).Count(), 0.1));52 ntActionInfos.Add(nt, grammar.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray()); 51 53 } 52 54 } … … 77 79 } else { 78 80 // all alts are allowed => select using bandit policy 79 var selectedAltIdx = ntPolicy[nt].SelectAction();81 var selectedAltIdx = policy.SelectAction(random, ntActionInfos[nt]); 80 82 selectedAlt = alts.ElementAt(selectedAltIdx); 81 83 updateChain.Add(Tuple.Create(nt, selectedAltIdx)); … … 95 97 var nt = e.Item1; 96 98 var action = e.Item2; 97 nt Policy[nt].UpdateReward(action,reward);99 ntActionInfos[nt][action].UpdateReward(reward); 98 100 } 99 101 }
Note: See TracChangeset
for help on using the changeset viewer.