Changeset 12098 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch
- Timestamp:
- 02/27/15 21:52:10 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch
- Files:
-
- 2 added
- 3 deleted
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/Base/TreeNode.cs
r12050 r12098 5 5 using System.Threading.Tasks; 6 6 using HeuristicLab.Algorithms.Bandits; 7 using HeuristicLab.Algorithms.Bandits.BanditPolicies; 7 8 using HeuristicLab.Problems.GrammaticalOptimization; 8 9 9 namespace HeuristicLab.Algorithms.MonteCarloTreeSearch 10 namespace HeuristicLab.Algorithms.MonteCarloTreeSearch.Base 10 11 { 11 12 public class TreeNode … … 15 16 public List<TreeNode> children; 16 17 public IBanditPolicyActionInfo actionInfo; 17 public bool expandable;18 public List<int> unvisitedNonTerminals;19 18 20 public TreeNode(TreeNode parent, string phrase , bool expandable, List<int> unvisitedNonTerminals)19 public TreeNode(TreeNode parent, string phrase) 21 20 { 21 this.parent = parent; 22 22 this.phrase = phrase; 23 this.expandable = expandable; 24 this.unvisitedNonTerminals = unvisitedNonTerminals; 23 actionInfo = new DefaultPolicyActionInfo(); 24 } 25 public bool IsLeaf() 26 { 27 return children == null || !children.Any(); 28 } 29 30 internal IEnumerable<IBanditPolicyActionInfo> GetChildActionInfos() 31 { 32 return children.Select(n => n.actionInfo); 25 33 } 26 34 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/HeuristicLab.Algorithms.MonteCarloTreeSearch.csproj
r12050 r12098 40 40 </ItemGroup> 41 41 <ItemGroup> 42 <Compile Include="Simulation\ISimulation.cs" /> 42 43 <Compile Include="MonteCarloTreeSearch.cs" /> 43 44 <Compile Include="Properties\AssemblyInfo.cs" /> 44 <Compile Include="TreeNode.cs" /> 45 <Compile Include="Base\TreeNode.cs" /> 46 <Compile Include="Simulation\RandomSimulation.cs" /> 45 47 </ItemGroup> 46 48 <ItemGroup> … … 52 54 <Project>{eea07488-1a51-412a-a52c-53b754a628b3}</Project> 53 55 <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name> 56 </ProjectReference> 57 <ProjectReference Include="..\HeuristicLab.Common\HeuristicLab.Common.csproj"> 58 <Project>{3a2fbbcb-f9df-4970-87f3-f13337d941ad}</Project> 59 <Name>HeuristicLab.Common</Name> 54 60 </ProjectReference> 55 61 <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj"> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/MonteCarloTreeSearch.cs
r12050 r12098 2 2 using System.Collections.Generic; 3 3 using System.Linq; 4 using System.Resources;5 using System.Text;6 using System.Threading.Tasks;7 4 using HeuristicLab.Algorithms.Bandits; 8 using HeuristicLab.Algorithms.Bandits.GrammarPolicies; 9 using HeuristicLab.Algorithms.MonteCarloTreeSearch; 10 using HeuristicLab.Algorithms.MonteCarloTreeSearch.Expansion; 5 using HeuristicLab.Algorithms.GrammaticalOptimization; 6 using HeuristicLab.Algorithms.MonteCarloTreeSearch.Base; 7 using HeuristicLab.Algorithms.MonteCarloTreeSearch.Simulation; 8 using HeuristicLab.Common; 11 9 using HeuristicLab.Problems.GrammaticalOptimization; 12 10 13 namespace HeuristicLab.Algorithms. GrammaticalOptimization.Solvers11 namespace HeuristicLab.Algorithms.MonteCarloTreeSearch 14 12 { 15 13 public class MonteCarloTreeSearch : SolverBase … … 17 15 private readonly int maxLen; 18 16 private readonly IProblem problem; 17 private readonly IGrammar grammar; 19 18 private readonly Random random; 20 19 private readonly IBanditPolicy behaviourPolicy; 21 private readonly IExpansionPolicy expansionPolicy; 22 private readonly ISimulationPolicy simulationPolicy; 20 private readonly ISimulation simulation; 23 21 private TreeNode rootNode; 24 private List<IBanditPolicyActionInfo> actions;25 private List<TreeNode> nodes;26 22 27 public MonteCarloTreeSearch(IProblem problem, int maxLen, Random random, IBanditPolicy behaviourPolicy, 28 IExpansionPolicy expansionPolicy, ISimulationPolicy simulationPolicy) 23 public MonteCarloTreeSearch(IProblem problem, int maxLen, Random random, IBanditPolicy behaviourPolicy, ISimulation simulationPolicy) 29 24 { 30 25 this.problem = problem; 26 this.grammar = problem.Grammar; 31 27 this.maxLen = maxLen; 32 28 this.random = random; 33 29 this.behaviourPolicy = behaviourPolicy; 34 this.expansionPolicy = expansionPolicy; 35 this.simulationPolicy = simulationPolicy; 30 this.simulation = simulationPolicy; 36 31 } 37 32 … … 45 40 { 46 41 Reset(); 47 for (int i = 0; !StopRequested && !Done() &&i < maxIterations; i++)42 for (int i = 0; !StopRequested && i < maxIterations; i++) 48 43 { 49 // select by behaviour policy50 TreeNode currentNode; 51 do44 TreeNode currentNode = rootNode; 45 46 while (!currentNode.IsLeaf()) 52 47 { 53 int currentActionIndex = behaviourPolicy.SelectAction(random, actions); 54 currentNode = nodes[currentActionIndex]; 55 } while (!Expandable(currentNode)); 48 int currentActionIndex = behaviourPolicy.SelectAction(random, 49 currentNode.GetChildActionInfos()); 50 currentNode = currentNode.children[currentActionIndex]; 51 } 56 52 57 // expand tree 58 currentNode = expansionPolicy.ExpandTreeNode(currentNode); 59 // simulate 60 double reward = simulationPolicy.Simulate(currentNode); 61 // propagate/reward 62 Propagate(currentNode, reward); 53 string phrase = currentNode.phrase; 54 55 if (!grammar.IsTerminal(phrase)) 56 { 57 ExpandTreeNode(currentNode); 58 59 currentNode = 60 currentNode.children[behaviourPolicy.SelectAction(random, currentNode.GetChildActionInfos())]; 61 } 62 double quality = simulation.Simulate(currentNode); 63 OnSolutionEvaluated(phrase, quality); 64 65 Propagate(currentNode, quality); 66 } 67 } 68 69 private void ExpandTreeNode(TreeNode treeNode) 70 { 71 // create children on the first visit 72 if (treeNode.children == null) 73 { 74 treeNode.children = new List<TreeNode>(); 75 76 var phrase = new Sequence(treeNode.phrase); 77 // create subnodes for each nt-symbol in phrase 78 for (int i = 0; i < phrase.Length; i++) 79 { 80 char symbol = phrase[i]; 81 if (grammar.IsNonTerminal(symbol)) 82 { 83 // create subnode for each alternative of symbol 84 foreach (Sequence alternative in grammar.GetAlternatives(symbol)) 85 { 86 Sequence newSequence = new Sequence(phrase); 87 newSequence.ReplaceAt(i, 1, alternative); 88 if (newSequence.Length <= maxLen) 89 { 90 TreeNode childNode = new TreeNode(treeNode, newSequence.ToString()); 91 treeNode.children.Add(childNode); 92 } 93 } 94 } 95 } 63 96 } 64 97 } … … 68 101 StopRequested = false; 69 102 bestQuality = 0.0; 70 rootNode = new TreeNode(null, problem.Grammar.SentenceSymbol.ToString(), true, new List<int>() { 0 });103 rootNode = new TreeNode(null, grammar.SentenceSymbol.ToString()); 71 104 } 72 105 73 private bool Done() 74 { 75 return !rootNode.expandable; 76 } 77 78 private bool Expandable(TreeNode node) 79 { 80 return !problem.Grammar.IsTerminal(node.phrase); 81 } 82 83 private void Propagate(TreeNode node, double reward) 106 private void Propagate(TreeNode node, double quality) 84 107 { 85 108 var currentNode = node; 86 109 do 87 110 { 88 currentNode.actionInfo.UpdateReward( reward);89 currentNode = node.parent;111 currentNode.actionInfo.UpdateReward(quality); 112 currentNode = currentNode.parent; 90 113 } while (currentNode != null); 114 } 115 116 public void PrintStats() 117 { 118 //Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality); 119 120 //// use behaviour strategy to generate the currently prefered sentence 121 //var policy = behaviourPolicy; 122 123 //var n = rootNode; 124 125 //while (n != null) 126 //{ 127 // var phrase = n.phrase; 128 // Console.ForegroundColor = ConsoleColor.White; 129 // Console.WriteLine("{0,-30}", phrase); 130 // var children = n.children; 131 // if (children == null || !children.Any()) break; 132 // var values = children.Select(ch => policy.GetValue(ch.phrase)); 133 // var maxValue = values.Max(); 134 // if (maxValue == 0) maxValue = 1.0; 135 136 // // write phrases 137 // foreach (var ch in children) 138 // { 139 // SetColorForValue(policy.GetValue(ch.phrase) / maxValue); 140 // Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length))); 141 // } 142 // Console.WriteLine(); 143 144 // // write values 145 // foreach (var ch in children) 146 // { 147 // SetColorForValue(policy.GetValue(ch.phrase) / maxValue); 148 // Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0); 149 // } 150 // Console.WriteLine(); 151 152 // // write tries 153 // foreach (var ch in children) 154 // { 155 // SetColorForValue(policy.GetValue(ch.phrase) / maxValue); 156 // Console.Write(" {0,4}", policy.GetTries(ch.phrase)); 157 // } 158 // Console.WriteLine(); 159 // int selectedChildIdx; 160 // if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) 161 // { 162 // break; 163 // } 164 // n = n.children[selectedChildIdx]; 165 //} 166 167 //Console.ForegroundColor = ConsoleColor.White; 168 //Console.WriteLine("-------------------"); 169 } 170 171 private void SetColorForValue(double v) 172 { 173 Console.ForegroundColor = ConsoleEx.ColorForValue(v); 91 174 } 92 175 }
Note: See TracChangeset
for help on using the changeset viewer.