Changeset 11742 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization
- Timestamp:
- 01/09/15 14:57:28 (9 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization
- Files:
-
- 1 added
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesContextSampler.cs
r11732 r11742 17 17 private readonly Random random; 18 18 private readonly int contextLen; 19 private readonly I Policy policy;19 private readonly IBanditPolicy policy; 20 20 21 public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, I Policy policy) {21 public AlternativesContextSampler(IProblem problem, Random random, int maxLen, int contextLen, IBanditPolicy policy) { 22 22 this.maxLen = maxLen; 23 23 this.problem = problem; … … 45 45 46 46 47 private Dictionary<string, I PolicyActionInfo[]> contextActionInfos;47 private Dictionary<string, IBanditPolicyActionInfo[]> contextActionInfos; 48 48 private List<Tuple<string, int>> updateChain; 49 49 50 50 private void InitPolicies(IGrammar grammar) { 51 this.contextActionInfos = new Dictionary<string, I PolicyActionInfo[]>();51 this.contextActionInfos = new Dictionary<string, IBanditPolicyActionInfo[]>(); 52 52 this.updateChain = new List<Tuple<string, int>>(); 53 53 } … … 82 82 var endIdx = Math.Min(startIdx + contextLen, ntIdx); 83 83 var lft = phrase.Subsequence(startIdx, endIdx - startIdx + 1).ToString(); 84 lft = problem. Hash(lft);84 lft = problem.CanonicalRepresentation(lft); 85 85 if (!contextActionInfos.ContainsKey(lft)) { 86 86 contextActionInfos.Add(lft, g.GetAlternatives(nt).Select(_ => policy.CreateActionInfo()).ToArray()); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/AlternativesSampler.cs
r11732 r11742 16 16 private readonly Random random; 17 17 private readonly IProblem problem; 18 private readonly I Policy policy;18 private readonly IBanditPolicy policy; 19 19 20 public AlternativesSampler(IProblem problem, I Policy policy, int maxLen) {20 public AlternativesSampler(IProblem problem, IBanditPolicy policy, int maxLen) { 21 21 this.problem = problem; 22 22 this.maxLen = maxLen; … … 43 43 44 44 45 private Dictionary<char, I PolicyActionInfo[]> ntActionInfos;45 private Dictionary<char, IBanditPolicyActionInfo[]> ntActionInfos; 46 46 private List<Tuple<char, int>> updateChain; 47 47 48 48 private void InitPolicies(IGrammar grammar) { 49 this.ntActionInfos = new Dictionary<char, I PolicyActionInfo[]>();49 this.ntActionInfos = new Dictionary<char, IBanditPolicyActionInfo[]>(); 50 50 this.updateChain = new List<Tuple<char, int>>(); 51 51 foreach (var nt in grammar.NonTerminalSymbols) { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj
r11732 r11742 46 46 <Compile Include="AlternativesContextSampler.cs" /> 47 47 <Compile Include="ExhaustiveRandomFirstSearch.cs" /> 48 <Compile Include="MctsContextualSampler.cs"> 49 <SubType>Code</SubType> 50 </Compile> 48 51 <Compile Include="MctsSampler.cs" /> 49 52 <Compile Include="ExhaustiveDepthFirstSearch.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs
r11732 r11742 12 12 public string ident; 13 13 public int randomTries; 14 public int policyTries; 15 public IPolicyActionInfo actionInfo; 14 public IBanditPolicyActionInfo actionInfo; 16 15 public TreeNode[] children; 17 16 public bool done = false; … … 22 21 23 22 public override string ToString() { 24 return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, actionInfo);23 return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, actionInfo.Tries, done, actionInfo); 25 24 } 26 25 } … … 34 33 private readonly Random random; 35 34 private readonly int randomTries; 36 private readonly I Policy policy;35 private readonly IBanditPolicy policy; 37 36 38 37 private List<TreeNode> updateChain; … … 47 46 // } 48 47 49 public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, I Policy policy) {48 public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) { 50 49 this.maxLen = maxLen; 51 50 this.problem = problem; … … 78 77 public void PrintStats() { 79 78 var n = rootNode; 80 Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries);79 Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, n.actionInfo.Tries); 81 80 while (n.children != null) { 82 81 Console.WriteLine(); 83 82 Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident)))); 84 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.randomTries + ch.policyTries)))); 83 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.actionInfo.Value * 10)))); 84 Console.WriteLine("{0,5} {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.actionInfo.Tries.ToString())))); 85 85 //n.policy.PrintStats(); 86 n = n.children. OrderByDescending(c => c.policyTries).First();86 n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First(); 87 87 } 88 88 Console.ReadLine(); … … 108 108 if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException(); 109 109 TreeNode n = rootNode; 110 bool done = phrase.IsTerminal;111 110 var curDepth = 0; 112 while (! done) {111 while (!phrase.IsTerminal) { 113 112 updateChain.Add(n); 114 113 … … 127 126 if (n.randomTries == randomTries && n.children == null) { 128 127 n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative 129 //n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative130 128 foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo(); 131 129 treeSize += n.children.Length; 132 130 } 133 n.policyTries++;134 131 // => select using bandit policy 135 132 int selectedAltIdx = policy.SelectAction(random, n.children.Select(c => c.actionInfo)); … … 140 137 141 138 curDepth++; 142 143 done = phrase.IsTerminal;144 139 145 140 // prepare for next iteration … … 153 148 // the last node is a leaf node (sentence is done), so we never need to visit this node again 154 149 n.done = true; 155 n.actionInfo.Disable();156 150 157 151 treeDepth = Math.Max(treeDepth, curDepth); … … 165 159 foreach (var e in updateChain) { 166 160 var node = e; 161 if (node.done) node.actionInfo.Disable(); 167 162 if (node.children != null && node.children.All(c => c.done)) { 168 163 node.done = true; … … 171 166 if (!node.done) { 172 167 node.actionInfo.UpdateReward(reward); 173 //policy.UpdateReward(action, reward / updateChain.Count);174 168 } 175 169 }
Note: See TracChangeset
for help on using the changeset viewer.