Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/12/15 21:23:01 (9 years ago)
Author:
gkronber
Message:

#2283: implemented test problems for MCTS

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

    r11745 r11747  
    55using System.Text;
    66using HeuristicLab.Algorithms.Bandits;
     7using HeuristicLab.Common;
    78using HeuristicLab.Problems.GrammaticalOptimization;
    89
     
    1314      public int randomTries;
    1415      public IBanditPolicyActionInfo actionInfo;
     16      public TreeNode parent;
    1517      public TreeNode[] children;
    1618      public bool done = false;
    1719
    18       public TreeNode(string id) {
     20      public TreeNode(string id, TreeNode parent) {
    1921        this.ident = id;
     22        this.parent = parent;
    2023      }
    2124
     
    3538    private readonly IBanditPolicy policy;
    3639
    37     private List<TreeNode> updateChain;
     40    private TreeNode lastNode; // the bottom node in one episode
    3841    private TreeNode rootNode;
    3942
     
    7578      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.actionInfo.Tries, n.actionInfo.Value, bestQuality);
    7679      while (n.children != null) {
     80        Console.WriteLine("{0,-30}", n.ident);
     81        double maxVForRow = n.children.Select(ch => ch.actionInfo.Value).Max();
     82        if (maxVForRow == 0) maxVForRow = 1.0;
     83
     84        for (int i = 0; i < n.children.Length; i++) {
     85          var ch = n.children[i];
     86          SetColorForChild(ch, maxVForRow);
     87          Console.Write("{0,5}", ch.ident);
     88        }
    7789        Console.WriteLine();
    78         Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
    79         Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.actionInfo.Value * 10))));
    80         Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.actionInfo.Tries.ToString()))));
     90        for (int i = 0; i < n.children.Length; i++) {
     91          var ch = n.children[i];
     92          SetColorForChild(ch, maxVForRow);
     93          Console.Write("{0,5:F2}", ch.actionInfo.Value * 10);
     94        }
     95        Console.WriteLine();
     96        for (int i = 0; i < n.children.Length; i++) {
     97          var ch = n.children[i];
     98          SetColorForChild(ch, maxVForRow);
     99          Console.Write("{0,5}", ch.done ? "X" : ch.actionInfo.Tries.ToString());
     100        }
     101        Console.ForegroundColor = ConsoleColor.White;
     102        Console.WriteLine();
    81103        //n.policy.PrintStats();
    82         n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
    83       }
     104        //n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
     105        n = n.children.Where(ch=>!ch.done).OrderByDescending(c => c.actionInfo.Value).First();
     106      }
     107      Console.WriteLine("-----------------------");
     108    }
     109
     110    private void SetColorForChild(TreeNode ch, double maxVForRow) {
     111      //if (ch.done) Console.ForegroundColor = ConsoleColor.White;
     112      //else
     113      Console.ForegroundColor = ConsoleEx.ColorForValue(ch.actionInfo.Value / maxVForRow);
    84114    }
    85115
    86116    private void InitPolicies(IGrammar grammar) {
    87       this.updateChain = new List<TreeNode>();
    88 
    89       rootNode = new TreeNode(grammar.SentenceSymbol.ToString());
     117
     118
     119      rootNode = new TreeNode(grammar.SentenceSymbol.ToString(), null);
    90120      rootNode.actionInfo = policy.CreateActionInfo();
    91121      treeDepth = 0;
     
    94124
    95125    private Sequence SampleSentence(IGrammar grammar) {
    96       updateChain.Clear();
     126      lastNode = null;
    97127      var startPhrase = new Sequence(grammar.SentenceSymbol);
     128      //var startPhrase = new Sequence("a*b+c*d+e*f+E");
     129
    98130      return CompleteSentence(grammar, startPhrase);
    99131    }
     
    105137      var curDepth = 0;
    106138      while (!phrase.IsTerminal) {
    107         updateChain.Add(n);
    108139
    109140        if (n.randomTries < randomTries) {
    110141          n.randomTries++;
    111142          treeDepth = Math.Max(treeDepth, curDepth);
     143          lastNode = n;
    112144          return g.CompleteSentenceRandomly(random, phrase, maxLen);
    113145        } else {
     
    120152
    121153          if (n.randomTries == randomTries && n.children == null) {
    122             n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative
     154            n.children = alts.Select(alt => new TreeNode(alt.ToString(), n)).ToArray(); // create a new node for each alternative
    123155            foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo();
    124156            treeSize += n.children.Length;
     
    138170      } // while
    139171
    140       updateChain.Add(n);
     172      lastNode = n;
    141173
    142174
     
    150182    private void DistributeReward(double reward) {
    151183      // iterate in reverse order (bottom up)
    152       updateChain.Reverse();
    153 
    154       foreach (var e in updateChain) {
    155         var node = e;
    156         if (node.done) node.actionInfo.Disable();
     184
     185      var node = lastNode;
     186      while (node != null) {
     187        if (node.done) node.actionInfo.Disable(reward);
    157188        if (node.children != null && node.children.All(c => c.done)) {
    158189          node.done = true;
    159           node.actionInfo.Disable();
     190          var bestActionValue = node.children.Select(c => c.actionInfo.Value).Max();
     191          node.actionInfo.Disable(bestActionValue);
    160192        }
    161193        if (!node.done) {
    162194          node.actionInfo.UpdateReward(reward);
    163195        }
     196        node = node.parent;
    164197      }
    165198    }
Note: See TracChangeset for help on using the changeset viewer.