Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/09/15 14:57:28 (10 years ago)
Author:
gkronber
Message:

#2283 refactoring

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsSampler.cs

    r11732 r11742  
    1212      public string ident;
    1313      public int randomTries;
    14       public int policyTries;
    15       public IPolicyActionInfo actionInfo;
     14      public IBanditPolicyActionInfo actionInfo;
    1615      public TreeNode[] children;
    1716      public bool done = false;
     
    2221
    2322      public override string ToString() {
    24         return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, randomTries + policyTries, done, actionInfo);
     23        return string.Format("Node({0} tries: {1}, done: {2}, policy: {3})", ident, actionInfo.Tries, done, actionInfo);
    2524      }
    2625    }
     
    3433    private readonly Random random;
    3534    private readonly int randomTries;
    36     private readonly IPolicy policy;
     35    private readonly IBanditPolicy policy;
    3736
    3837    private List<TreeNode> updateChain;
     
    4746    // }
    4847
    49     public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IPolicy policy) {
     48    public MctsSampler(IProblem problem, int maxLen, Random random, int randomTries, IBanditPolicy policy) {
    5049      this.maxLen = maxLen;
    5150      this.problem = problem;
     
    7877    public void PrintStats() {
    7978      var n = rootNode;
    80       Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, rootNode.policyTries + rootNode.randomTries);
     79      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}", treeDepth, treeSize, n.actionInfo.Tries);
    8180      while (n.children != null) {
    8281        Console.WriteLine();
    8382        Console.WriteLine("{0,5}->{1,-50}", n.ident, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.ident))));
    84         Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.randomTries + ch.policyTries))));
     83        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4:F2}", ch.actionInfo.Value * 10))));
     84        Console.WriteLine("{0,5}  {1,-50}", string.Empty, string.Join(" ", n.children.Select(ch => string.Format("{0,4}", ch.done ? "X" : ch.actionInfo.Tries.ToString()))));
    8585        //n.policy.PrintStats();
    86         n = n.children.OrderByDescending(c => c.policyTries).First();
     86        n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.actionInfo.Value).First();
    8787      }
    8888      Console.ReadLine();
     
    108108      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
    109109      TreeNode n = rootNode;
    110       bool done = phrase.IsTerminal;
    111110      var curDepth = 0;
    112       while (!done) {
     111      while (!phrase.IsTerminal) {
    113112        updateChain.Add(n);
    114113
     
    127126          if (n.randomTries == randomTries && n.children == null) {
    128127            n.children = alts.Select(alt => new TreeNode(alt.ToString())).ToArray(); // create a new node for each alternative
    129             //n.children = alts.Select(alt => new TreeNode(string.Empty)).ToArray(); // create a new node for each alternative
    130128            foreach (var ch in n.children) ch.actionInfo = policy.CreateActionInfo();
    131129            treeSize += n.children.Length;
    132130          }
    133           n.policyTries++;
    134131          // => select using bandit policy
    135132          int selectedAltIdx = policy.SelectAction(random, n.children.Select(c => c.actionInfo));
     
    140137
    141138          curDepth++;
    142 
    143           done = phrase.IsTerminal;
    144139
    145140          // prepare for next iteration
     
    153148      // the last node is a leaf node (sentence is done), so we never need to visit this node again
    154149      n.done = true;
    155       n.actionInfo.Disable();
    156150
    157151      treeDepth = Math.Max(treeDepth, curDepth);
     
    165159      foreach (var e in updateChain) {
    166160        var node = e;
     161        if (node.done) node.actionInfo.Disable();
    167162        if (node.children != null && node.children.All(c => c.done)) {
    168163          node.done = true;
     
    171166        if (!node.done) {
    172167          node.actionInfo.UpdateReward(reward);
    173           //policy.UpdateReward(action, reward / updateChain.Count);
    174168        }
    175169      }
Note: See TracChangeset for help on using the changeset viewer.