Context Navigation

← Previous Changeset
Next Changeset →

Changeset 12098

Timestamp:

02/27/15 21:52:10 (10 years ago)

Author:

aballeit

Message:

#2283: implemented MCTS

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 2 added
: 3 deleted
: 5 edited

HeuristicLab.Algorithms.MonteCarloTreeSearch/Base/TreeNode.cs (modified) (2 diffs)
HeuristicLab.Algorithms.MonteCarloTreeSearch/Expansion (deleted)
HeuristicLab.Algorithms.MonteCarloTreeSearch/HeuristicLab.Algorithms.MonteCarloTreeSearch.csproj (modified) (2 diffs)
HeuristicLab.Algorithms.MonteCarloTreeSearch/MonteCarloTreeSearch.cs (modified) (4 diffs)
HeuristicLab.Algorithms.MonteCarloTreeSearch/Simulation/ISimulation.cs (added)
HeuristicLab.Algorithms.MonteCarloTreeSearch/Simulation/ISimulationPolicy.cs (deleted)
HeuristicLab.Algorithms.MonteCarloTreeSearch/Simulation/RandomSimulation.cs (added)
HeuristicLab.Algorithms.MonteCarloTreeSearch/Simulation/RandomSimulationPolicy.cs (deleted)
Main/Main.csproj (modified) (1 diff)
Main/Program.cs (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/Base/TreeNode.cs

-                      r12050
+                      r12098
 using System.Threading.Tasks;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Algorithms.Bandits.BanditPolicies;
 using HeuristicLab.Problems.GrammaticalOptimization;
 namespace HeuristicLab.Algorithms.MonteCarloTreeSearch
+namespace HeuristicLab.Algorithms.MonteCarloTreeSearch.Base
+{
     public class TreeNode
 …
         public List<TreeNode> children;
         public IBanditPolicyActionInfo actionInfo;
-        public bool expandable;
-        public List<int> unvisitedNonTerminals;
         public TreeNode(TreeNode parent, string phrase, bool expandable, List<int> unvisitedNonTerminals)
+        public TreeNode(TreeNode parent, string phrase)
+        {
+            this.parent = parent;
             this.phrase = phrase;
+            this.expandable = expandable;
+            this.unvisitedNonTerminals = unvisitedNonTerminals;
+            actionInfo = new DefaultPolicyActionInfo();
+        }
+        public bool IsLeaf()
+        {
+            return children == null || !children.Any();
+        }
+        internal IEnumerable<IBanditPolicyActionInfo> GetChildActionInfos()
+        {
+            return children.Select(n => n.actionInfo);
+        }
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/HeuristicLab.Algorithms.MonteCarloTreeSearch.csproj

-                      r12050
+                      r12098
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="Simulation\ISimulation.cs" />
     <Compile Include="MonteCarloTreeSearch.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
+    <Compile Include="TreeNode.cs" />
+    <Compile Include="Base\TreeNode.cs" />
+    <Compile Include="Simulation\RandomSimulation.cs" />
   </ItemGroup>
   <ItemGroup>
 …
       <Project>{eea07488-1a51-412a-a52c-53b754a628b3}</Project>
       <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name>
+    </ProjectReference>
+    <ProjectReference Include="..\HeuristicLab.Common\HeuristicLab.Common.csproj">
+      <Project>{3a2fbbcb-f9df-4970-87f3-f13337d941ad}</Project>
+      <Name>HeuristicLab.Common</Name>
     </ProjectReference>
     <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj">

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/MonteCarloTreeSearch.cs

-                      r12050
+                      r12098
 using System.Collections.Generic;
 using System.Linq;
-using System.Resources;
-using System.Text;
-using System.Threading.Tasks;
 using HeuristicLab.Algorithms.Bandits;
+using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
+using HeuristicLab.Algorithms.MonteCarloTreeSearch;
+using HeuristicLab.Algorithms.MonteCarloTreeSearch.Expansion;
+using HeuristicLab.Algorithms.GrammaticalOptimization;
+using HeuristicLab.Algorithms.MonteCarloTreeSearch.Base;
+using HeuristicLab.Algorithms.MonteCarloTreeSearch.Simulation;
+using HeuristicLab.Common;
 using HeuristicLab.Problems.GrammaticalOptimization;
 namespace HeuristicLab.Algorithms.GrammaticalOptimization.Solvers
+namespace HeuristicLab.Algorithms.MonteCarloTreeSearch
+{
     public class MonteCarloTreeSearch : SolverBase
 …
         private readonly int maxLen;
         private readonly IProblem problem;
+        private readonly IGrammar grammar;
         private readonly Random random;
         private readonly IBanditPolicy behaviourPolicy;
+        private readonly IExpansionPolicy expansionPolicy;
+        private readonly ISimulationPolicy simulationPolicy;
+        private readonly ISimulation simulation;
         private TreeNode rootNode;
-        private List<IBanditPolicyActionInfo> actions;
-        private List<TreeNode> nodes;
+        public MonteCarloTreeSearch(IProblem problem, int maxLen, Random random, IBanditPolicy behaviourPolicy,
+            IExpansionPolicy expansionPolicy, ISimulationPolicy simulationPolicy)
+        public MonteCarloTreeSearch(IProblem problem, int maxLen, Random random, IBanditPolicy behaviourPolicy, ISimulation simulationPolicy)
+        {
             this.problem = problem;
+            this.grammar = problem.Grammar;
             this.maxLen = maxLen;
             this.random = random;
             this.behaviourPolicy = behaviourPolicy;
+            this.expansionPolicy = expansionPolicy;
+            this.simulationPolicy = simulationPolicy;
+            this.simulation = simulationPolicy;
+        }
 …
+        {
             Reset();
             for (int i = 0; !StopRequested && !Done() && i < maxIterations; i++)
+            for (int i = 0; !StopRequested && i < maxIterations; i++)
+            {
                 // select by behaviour policy
+                TreeNode currentNode;
                 do
+                TreeNode currentNode = rootNode;
+                while (!currentNode.IsLeaf())
+                {
+                    int currentActionIndex = behaviourPolicy.SelectAction(random, actions);
+                    currentNode = nodes[currentActionIndex];
+                } while (!Expandable(currentNode));
+                    int currentActionIndex = behaviourPolicy.SelectAction(random,
+                        currentNode.GetChildActionInfos());
+                    currentNode = currentNode.children[currentActionIndex];
+                }
+                // expand tree
+                currentNode = expansionPolicy.ExpandTreeNode(currentNode);
+                // simulate
+                double reward = simulationPolicy.Simulate(currentNode);
+                // propagate/reward
+                Propagate(currentNode, reward);
+                string phrase = currentNode.phrase;
+                if (!grammar.IsTerminal(phrase))
+                {
+                    ExpandTreeNode(currentNode);
+                    currentNode =
+                        currentNode.children[behaviourPolicy.SelectAction(random, currentNode.GetChildActionInfos())];
+                }
+                double quality = simulation.Simulate(currentNode);
+                OnSolutionEvaluated(phrase, quality);
+                Propagate(currentNode, quality);
+            }
+        }
+        private void ExpandTreeNode(TreeNode treeNode)
+        {
+            // create children on the first visit
+            if (treeNode.children == null)
+            {
+                treeNode.children = new List<TreeNode>();
+                var phrase = new Sequence(treeNode.phrase);
+                // create subnodes for each nt-symbol in phrase
+                for (int i = 0; i < phrase.Length; i++)
+                {
+                    char symbol = phrase[i];
+                    if (grammar.IsNonTerminal(symbol))
+                    {
+                        // create subnode for each alternative of symbol
+                        foreach (Sequence alternative in grammar.GetAlternatives(symbol))
+                        {
+                            Sequence newSequence = new Sequence(phrase);
+                            newSequence.ReplaceAt(i, 1, alternative);
+                            if (newSequence.Length <= maxLen)
+                            {
+                                TreeNode childNode = new TreeNode(treeNode, newSequence.ToString());
+                                treeNode.children.Add(childNode);
+                            }
+                        }
+                    }
+                }
+            }
+        }
 …
             StopRequested = false;
             bestQuality = 0.0;
             rootNode = new TreeNode(null, problem.Grammar.SentenceSymbol.ToString(), true, new List<int>() { 0 });
+            rootNode = new TreeNode(null, grammar.SentenceSymbol.ToString());
+        }
+        private bool Done()
+        {
+            return !rootNode.expandable;
+        }
+        private bool Expandable(TreeNode node)
+        {
+            return !problem.Grammar.IsTerminal(node.phrase);
+        }
+        private void Propagate(TreeNode node, double reward)
+        private void Propagate(TreeNode node, double quality)
+        {
             var currentNode = node;
             do
+            {
                 currentNode.actionInfo.UpdateReward(reward);
                 currentNode = node.parent;
+                currentNode.actionInfo.UpdateReward(quality);
+                currentNode = currentNode.parent;
             } while (currentNode != null);
+        }
+        public void PrintStats()
+        {
+            //Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality);
+            //// use behaviour strategy to generate the currently prefered sentence
+            //var policy = behaviourPolicy;
+            //var n = rootNode;
+            //while (n != null)
+            //{
+            //    var phrase = n.phrase;
+            //    Console.ForegroundColor = ConsoleColor.White;
+            //    Console.WriteLine("{0,-30}", phrase);
+            //    var children = n.children;
+            //    if (children == null || !children.Any()) break;
+            //    var values = children.Select(ch => policy.GetValue(ch.phrase));
+            //    var maxValue = values.Max();
+            //    if (maxValue == 0) maxValue = 1.0;
+            //    // write phrases
+            //    foreach (var ch in children)
+            //    {
+            //        SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
+            //        Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
+            //    }
+            //    Console.WriteLine();
+            //    // write values
+            //    foreach (var ch in children)
+            //    {
+            //        SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
+            //        Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
+            //    }
+            //    Console.WriteLine();
+            //    // write tries
+            //    foreach (var ch in children)
+            //    {
+            //        SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
+            //        Console.Write(" {0,4}", policy.GetTries(ch.phrase));
+            //    }
+            //    Console.WriteLine();
+            //    int selectedChildIdx;
+            //    if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx))
+            //    {
+            //        break;
+            //    }
+            //    n = n.children[selectedChildIdx];
+            //}
+            //Console.ForegroundColor = ConsoleColor.White;
+            //Console.WriteLine("-------------------");
+        }
+        private void SetColorForValue(double v)
+        {
+            Console.ForegroundColor = ConsoleEx.ColorForValue(v);
+        }
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Main.csproj

-                      r11981
+                      r12098
       <Name>HeuristicLab.Algorithms.GrammaticalOptimization</Name>
     </ProjectReference>
+    <ProjectReference Include="..\HeuristicLab.Algorithms.MonteCarloTreeSearch\HeuristicLab.Algorithms.MonteCarloTreeSearch.csproj">
+      <Project>{2c115235-8fa9-4f7f-b3a0-a0144f8a35ca}</Project>
+      <Name>HeuristicLab.Algorithms.MonteCarloTreeSearch</Name>
+    </ProjectReference>
     <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj">
       <Project>{cb9dccf6-667e-4a13-b82d-dbd6b45a045e}</Project>

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r12050
+                      r12098
 using HeuristicLab.Algorithms.Bandits.BanditPolicies;
 using HeuristicLab.Algorithms.GrammaticalOptimization;
+using HeuristicLab.Algorithms.MonteCarloTreeSearch;
+using HeuristicLab.Algorithms.MonteCarloTreeSearch.Simulation;
 using HeuristicLab.Problems.GrammaticalOptimization;
 …
+namespace Main {
+  class Program {
+    static void Main(string[] args) {
+      CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
+namespace Main
+{
+    class Program
+    {
+        static void Main(string[] args)
+        {
+            CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
       RunDemo();
+    }
+            RunDemo();
+        }
+    private static void RunDemo() {
+        private static void RunDemo()
+        {
       int maxIterations = 100000;
       int iterations = 0;
+            int maxIterations = 100000;
+            int iterations = 0;
       var globalStatistics = new SentenceSetStatistics();
       var random = new Random();
+            var globalStatistics = new SentenceSetStatistics();
+            var random = new Random();
+      //var problem = new SymbolicRegressionPoly10Problem();
+      //var problem = new SantaFeAntProblem();
+      var problem = new RoyalPairProblem();
+      //var problem = new EvenParityProblem();
+      var alg = new SequentialSearch(problem, 23, random, 0,
+       new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy()));
+            //var problem = new SymbolicRegressionPoly10Problem();
+            //var problem = new SantaFeAntProblem();
+            var problem = new RoyalPairProblem();
+            //var problem = new EvenParityProblem();
+            //var alg = new SequentialSearch(problem, 23, random, 0,
+            // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy()));
+            var alg = new MonteCarloTreeSearch(problem, 23, random, new UCB1Policy(), new RandomSimulation(problem, random, 23));
+      alg.FoundNewBestSolution += (sentence, quality) => {
+        //Console.WriteLine("{0}", globalStatistics);
+      };
+            alg.FoundNewBestSolution += (sentence, quality) =>
+            {
+                //Console.WriteLine("{0}", globalStatistics);
+            };
+      alg.SolutionEvaluated += (sentence, quality) => {
+        iterations++;
+        globalStatistics.AddSentence(sentence, quality);
+            alg.SolutionEvaluated += (sentence, quality) =>
+            {
+                iterations++;
+                globalStatistics.AddSentence(sentence, quality);
+        // comment this if you don't want to see solver statistics
+        if (iterations % 100 == 0) {
+          if (iterations % 10000 == 0) Console.Clear();
+          Console.SetCursorPosition(0, 0);
+          alg.PrintStats();
+                // comment this if you don't want to see solver statistics
+                if (iterations % 100 == 0)
+                {
+                    if (iterations % 10000 == 0) Console.Clear();
+                    Console.SetCursorPosition(0, 0);
+                    alg.PrintStats();
+                }
+                // uncomment this if you want to collect statistics of the generated sentences
+                // if (iterations % 1000 == 0) {
+                //   Console.WriteLine("{0}", globalStatistics);
+                // }
+            };
+            var sw = new Stopwatch();
+            sw.Start();
+            alg.Run(maxIterations);
+            sw.Stop();
+            Console.Clear();
+            alg.PrintStats();
+            Console.WriteLine(globalStatistics);
+            Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
+              sw.Elapsed.TotalSeconds,
+              maxIterations / (double)sw.Elapsed.TotalSeconds,
+              (double)sw.ElapsedMilliseconds * 1000 / maxIterations);
+        }
-        // uncomment this if you want to collect statistics of the generated sentences
-        // if (iterations % 1000 == 0) {
-        //   Console.WriteLine("{0}", globalStatistics);
-        // }
-      };
-      var sw = new Stopwatch();
-      sw.Start();
-      alg.Run(maxIterations);
-      sw.Stop();
-      Console.Clear();
-      alg.PrintStats();
-      Console.WriteLine(globalStatistics);
-      Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
-        sw.Elapsed.TotalSeconds,
-        maxIterations / (double)sw.Elapsed.TotalSeconds,
-        (double)sw.ElapsedMilliseconds * 1000 / maxIterations);
+    }
+  }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 12098

Legend:

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/Base/TreeNode.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/HeuristicLab.Algorithms.MonteCarloTreeSearch.csproj

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.MonteCarloTreeSearch/MonteCarloTreeSearch.cs

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Main.csproj

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

Download in other formats: