Context Navigation

← Previous Change
Next Change →

HeuristicLab.Algorithms.MonteCarloTreeSearch

Timestamp:

02/27/15 21:52:10 (10 years ago)

Author:

aballeit

Message:

#2283: implemented MCTS

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch

Files:

: 2 added
: 3 deleted
: 3 edited

Base/TreeNode.cs (modified) (2 diffs)
Expansion (deleted)
HeuristicLab.Algorithms.MonteCarloTreeSearch.csproj (modified) (2 diffs)
MonteCarloTreeSearch.cs (modified) (4 diffs)
Simulation/ISimulation.cs (added)
Simulation/ISimulationPolicy.cs (deleted)
Simulation/RandomSimulation.cs (added)
Simulation/RandomSimulationPolicy.cs (deleted)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/Base/TreeNode.cs

-                      r12050
+                      r12098
 using System.Threading.Tasks;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Algorithms.Bandits.BanditPolicies;
 using HeuristicLab.Problems.GrammaticalOptimization;
 namespace HeuristicLab.Algorithms.MonteCarloTreeSearch
+namespace HeuristicLab.Algorithms.MonteCarloTreeSearch.Base
+{
     public class TreeNode
 …
         public List<TreeNode> children;
         public IBanditPolicyActionInfo actionInfo;
-        public bool expandable;
-        public List<int> unvisitedNonTerminals;
         public TreeNode(TreeNode parent, string phrase, bool expandable, List<int> unvisitedNonTerminals)
+        public TreeNode(TreeNode parent, string phrase)
+        {
+            this.parent = parent;
             this.phrase = phrase;
+            this.expandable = expandable;
+            this.unvisitedNonTerminals = unvisitedNonTerminals;
+            actionInfo = new DefaultPolicyActionInfo();
+        }
+        public bool IsLeaf()
+        {
+            return children == null || !children.Any();
+        }
+        internal IEnumerable<IBanditPolicyActionInfo> GetChildActionInfos()
+        {
+            return children.Select(n => n.actionInfo);
+        }
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/HeuristicLab.Algorithms.MonteCarloTreeSearch.csproj

-                      r12050
+                      r12098
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="Simulation\ISimulation.cs" />
     <Compile Include="MonteCarloTreeSearch.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="TreeNode.cs" />
+    <Compile Include="Base\TreeNode.cs" />
+    <Compile Include="Simulation\RandomSimulation.cs" />
   </ItemGroup>
   <ItemGroup>
 …
       <Project>{eea07488-1a51-412a-a52c-53b754a628b3}</Project>
       <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\HeuristicLab.Common\HeuristicLab.Common.csproj">
+      <Project>{3a2fbbcb-f9df-4970-87f3-f13337d941ad}</Project>
+      <Name>HeuristicLab.Common</Name>
     </ProjectReference>
     <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj">

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/MonteCarloTreeSearch.cs

-                      r12050
+                      r12098
 using System.Collections.Generic;
 using System.Linq;
-using System.Resources;
-using System.Text;
-using System.Threading.Tasks;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
+using HeuristicLab.Algorithms.MonteCarloTreeSearch;
+using HeuristicLab.Algorithms.MonteCarloTreeSearch.Expansion;
+using HeuristicLab.Algorithms.GrammaticalOptimization;
+using HeuristicLab.Algorithms.MonteCarloTreeSearch.Base;
+using HeuristicLab.Algorithms.MonteCarloTreeSearch.Simulation;
+using HeuristicLab.Common;
 using HeuristicLab.Problems.GrammaticalOptimization;
 namespace HeuristicLab.Algorithms.GrammaticalOptimization.Solvers
+namespace HeuristicLab.Algorithms.MonteCarloTreeSearch
+{
     public class MonteCarloTreeSearch : SolverBase
 …
         private readonly int maxLen;
         private readonly IProblem problem;
+        private readonly IGrammar grammar;
         private readonly Random random;
         private readonly IBanditPolicy behaviourPolicy;
+        private readonly IExpansionPolicy expansionPolicy;
+        private readonly ISimulationPolicy simulationPolicy;
+        private readonly ISimulation simulation;
         private TreeNode rootNode;
-        private List<IBanditPolicyActionInfo> actions;
-        private List<TreeNode> nodes;
+        public MonteCarloTreeSearch(IProblem problem, int maxLen, Random random, IBanditPolicy behaviourPolicy,
+            IExpansionPolicy expansionPolicy, ISimulationPolicy simulationPolicy)
+        public MonteCarloTreeSearch(IProblem problem, int maxLen, Random random, IBanditPolicy behaviourPolicy, ISimulation simulationPolicy)
+        {
             this.problem = problem;
+            this.grammar = problem.Grammar;
             this.maxLen = maxLen;
             this.random = random;
             this.behaviourPolicy = behaviourPolicy;
+            this.expansionPolicy = expansionPolicy;
+            this.simulationPolicy = simulationPolicy;
+            this.simulation = simulationPolicy;
+        }
 …
+        {
             Reset();
             for (int i = 0; !StopRequested && !Done() && i < maxIterations; i++)
+            for (int i = 0; !StopRequested && i < maxIterations; i++)
+            {
                 // select by behaviour policy
+                TreeNode currentNode;
                 do
+                TreeNode currentNode = rootNode;
+                while (!currentNode.IsLeaf())
+                {
+                    int currentActionIndex = behaviourPolicy.SelectAction(random, actions);
+                    currentNode = nodes[currentActionIndex];
+                } while (!Expandable(currentNode));
+                    int currentActionIndex = behaviourPolicy.SelectAction(random,
+                        currentNode.GetChildActionInfos());
+                    currentNode = currentNode.children[currentActionIndex];
+                }
+                // expand tree
+                currentNode = expansionPolicy.ExpandTreeNode(currentNode);
+                // simulate
+                double reward = simulationPolicy.Simulate(currentNode);
+                // propagate/reward
+                Propagate(currentNode, reward);
+                string phrase = currentNode.phrase;
+                if (!grammar.IsTerminal(phrase))
+                {
+                    ExpandTreeNode(currentNode);
+                    currentNode =
+                        currentNode.children[behaviourPolicy.SelectAction(random, currentNode.GetChildActionInfos())];
+                }
+                double quality = simulation.Simulate(currentNode);
+                OnSolutionEvaluated(phrase, quality);
+                Propagate(currentNode, quality);
+            }
+        }
+        private void ExpandTreeNode(TreeNode treeNode)
+        {
+            // create children on the first visit
+            if (treeNode.children == null)
+            {
+                treeNode.children = new List<TreeNode>();
+                var phrase = new Sequence(treeNode.phrase);
+                // create subnodes for each nt-symbol in phrase
+                for (int i = 0; i < phrase.Length; i++)
+                {
+                    char symbol = phrase[i];
+                    if (grammar.IsNonTerminal(symbol))
+                    {
+                        // create subnode for each alternative of symbol
+                        foreach (Sequence alternative in grammar.GetAlternatives(symbol))
+                        {
+                            Sequence newSequence = new Sequence(phrase);
+                            newSequence.ReplaceAt(i, 1, alternative);
+                            if (newSequence.Length <= maxLen)
+                            {
+                                TreeNode childNode = new TreeNode(treeNode, newSequence.ToString());
+                                treeNode.children.Add(childNode);
+                            }
+                        }
+                    }
+                }
+            }
+        }
 …
             StopRequested = false;
             bestQuality = 0.0;
             rootNode = new TreeNode(null, problem.Grammar.SentenceSymbol.ToString(), true, new List<int>() { 0 });
+            rootNode = new TreeNode(null, grammar.SentenceSymbol.ToString());
+        }
+        private bool Done()
+        {
+            return !rootNode.expandable;
+        }
+        private bool Expandable(TreeNode node)
+        {
+            return !problem.Grammar.IsTerminal(node.phrase);
+        }
+        private void Propagate(TreeNode node, double reward)
+        private void Propagate(TreeNode node, double quality)
+        {
             var currentNode = node;
             do
+            {
                 currentNode.actionInfo.UpdateReward(reward);
                 currentNode = node.parent;
+                currentNode.actionInfo.UpdateReward(quality);
+                currentNode = currentNode.parent;
             } while (currentNode != null);
+        }
+        public void PrintStats()
+        {
+            //Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
+            //// use behaviour strategy to generate the currently prefered sentence
+            //var policy = behaviourPolicy;
+            //var n = rootNode;
+            //while (n != null)
+            //{
+            //    var phrase = n.phrase;
+            //    Console.ForegroundColor = ConsoleColor.White;
+            //    Console.WriteLine("{0,-30}", phrase);
+            //    var children = n.children;
+            //    if (children == null || !children.Any()) break;
+            //    var values = children.Select(ch => policy.GetValue(ch.phrase));
+            //    var maxValue = values.Max();
+            //    if (maxValue == 0) maxValue = 1.0;
+            //    // write phrases
+            //    foreach (var ch in children)
+            //    {
+            //        SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
+            //        Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
+            //    }
+            //    Console.WriteLine();
+            //    // write values
+            //    foreach (var ch in children)
+            //    {
+            //        SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
+            //        Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
+            //    }
+            //    Console.WriteLine();
+            //    // write tries
+            //    foreach (var ch in children)
+            //    {
+            //        SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
+            //        Console.Write(" {0,4}", policy.GetTries(ch.phrase));
+            //    }
+            //    Console.WriteLine();
+            //    int selectedChildIdx;
+            //    if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx))
+            //    {
+            //        break;
+            //    }
+            //    n = n.children[selectedChildIdx];
+            //}
+            //Console.ForegroundColor = ConsoleColor.White;
+            //Console.WriteLine("-------------------");
+        }
+        private void SetColorForValue(double v)
+        {
+            Console.ForegroundColor = ConsoleEx.ColorForValue(v);
+        }
+    }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 12098 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/Base/TreeNode.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/HeuristicLab.Algorithms.MonteCarloTreeSearch.csproj

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/MonteCarloTreeSearch.cs

Download in other formats: