Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/12/15 21:23:01 (9 years ago)
Author:
gkronber
Message:

#2283: implemented test problems for MCTS

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/MctsContextualSampler.cs

    r11745 r11747  
    1515      public int randomTries;
    1616      public int tries;
     17      public List<TreeNode> parents;
    1718      public TreeNode[] children;
    1819      public bool done = false;
     
    2122        this.ident = id;
    2223        this.alt = alt;
     24        this.parents = new List<TreeNode>();
    2325      }
    2426
     
    2830    }
    2931
     32    private Dictionary<string, TreeNode> treeNodes;
     33    private TreeNode GetTreeNode(string id, ReadonlySequence alt) {
     34      TreeNode n;
     35      var canonicalId = problem.CanonicalRepresentation(id);
     36      if (!treeNodes.TryGetValue(canonicalId, out n)) {
     37        n = new TreeNode(canonicalId, alt);
     38        tries.TryGetValue(canonicalId, out n.tries);
     39        treeNodes[canonicalId] = n;
     40      }
     41      return n;
     42    }
    3043
    3144    public event Action<string, double> FoundNewBestSolution;
     
    5164      this.v = new Dictionary<string, double>(1000000);
    5265      this.tries = new Dictionary<string, int>(1000000);
     66      treeNodes = new Dictionary<string, TreeNode>();
    5367    }
    5468
     
    5771      InitPolicies(problem.Grammar);
    5872      for (int i = 0; !rootNode.done && i < maxIterations; i++) {
    59         var sentence = SampleSentence(problem.Grammar).ToString();
    60         var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
    61         Debug.Assert(quality >= 0 && quality <= 1.0);
    62         DistributeReward(quality);
    63 
    64         RaiseSolutionEvaluated(sentence, quality);
    65 
    66         if (quality > bestQuality) {
    67           bestQuality = quality;
    68           RaiseFoundNewBestSolution(sentence, quality);
     73        bool success;
     74        var sentence = SampleSentence(problem.Grammar, out success).ToString();
     75        if (success) {
     76          var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen);
     77          Debug.Assert(quality >= 0 && quality <= 1.0);
     78          DistributeReward(quality);
     79
     80          RaiseSolutionEvaluated(sentence, quality);
     81
     82          if (quality > bestQuality) {
     83            bestQuality = quality;
     84            RaiseFoundNewBestSolution(sentence, quality);
     85          }
    6986        }
    7087      }
     
    7895      Console.WriteLine("depth: {0,5} size: {1,10} root tries {2,10}, rootQ {3:F3}, bestQ {4:F3}", treeDepth, treeSize, n.tries, V(n), bestQuality);
    7996      while (n.children != null) {
    80         Console.WriteLine("{0}", n.ident);
    81         double maxVForRow = n.children.Select(ch => V(ch)).Max();
     97        Console.WriteLine("{0,-30}", n.ident);
     98        double maxVForRow = n.children.Select(ch => Math.Min(1.0, Math.Max(0.0, V(ch)))).Max();
    8299        if (maxVForRow == 0) maxVForRow = 1.0;
    83100
    84101        for (int i = 0; i < n.children.Length; i++) {
    85102          var ch = n.children[i];
    86           Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
     103          Console.ForegroundColor = ConsoleEx.ColorForValue(Math.Min(1.0, V(ch)) / maxVForRow);
    87104          Console.Write("{0,5}", ch.alt);
    88105        }
     
    90107        for (int i = 0; i < n.children.Length; i++) {
    91108          var ch = n.children[i];
    92           Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
    93           Console.Write("{0,5:F2}", V(ch) * 10);
     109          Console.ForegroundColor = ConsoleEx.ColorForValue(Math.Min(1.0, V(ch)) / maxVForRow);
     110          Console.Write("{0,5:F2}", Math.Min(1.0, V(ch)) * 10);
    94111        }
    95112        Console.WriteLine();
    96113        for (int i = 0; i < n.children.Length; i++) {
    97114          var ch = n.children[i];
    98           Console.ForegroundColor = ConsoleEx.ColorForValue(V(ch) / maxVForRow);
     115          Console.ForegroundColor = ConsoleEx.ColorForValue(Math.Min(1.0, V(ch)) / maxVForRow);
    99116          Console.Write("{0,5}", ch.done ? "X" : ch.tries.ToString());
    100117        }
     
    102119        Console.WriteLine();
    103120        //n.policy.PrintStats();
    104         n = n.children.Where(ch => !ch.done).OrderByDescending(c => V(c)).First();
     121        n = n.children.Where(ch => !ch.done).OrderByDescending(c => c.tries).First();
    105122      }
    106123    }
     
    112129      this.tries.Clear();
    113130
    114       rootNode = new TreeNode(grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
     131      rootNode = GetTreeNode(grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
    115132      treeDepth = 0;
    116133      treeSize = 0;
    117134    }
    118135
    119     private Sequence SampleSentence(IGrammar grammar) {
     136    private Sequence SampleSentence(IGrammar grammar, out bool success) {
    120137      updateChain.Clear();
    121138      //var startPhrase = new Sequence("a*b+c*d+e*f+E");
    122139      var startPhrase = new Sequence(grammar.SentenceSymbol);
    123       return CompleteSentence(grammar, startPhrase);
    124     }
    125 
    126     private Sequence CompleteSentence(IGrammar g, Sequence phrase) {
     140      return CompleteSentence(grammar, startPhrase, out success);
     141    }
     142
     143    private Sequence CompleteSentence(IGrammar g, Sequence phrase, out bool success) {
    127144      if (phrase.Length > maxLen) throw new ArgumentException();
    128145      if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
     
    136153          n.randomTries++;
    137154          treeDepth = Math.Max(treeDepth, curDepth);
     155          success = true;
    138156          return g.CompleteSentenceRandomly(random, phrase, maxLen);
    139157        } else {
     
    153171              newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt);
    154172              if (!newPhrase.IsTerminal) newPhrase = newPhrase.Subsequence(0, newPhrase.FirstNonTerminalIndex + 1);
    155               n.children[i++] = new TreeNode(newPhrase.ToString(), new ReadonlySequence(alt));
     173              var treeNode = GetTreeNode(newPhrase.ToString(), new ReadonlySequence(alt));
     174              treeNode.parents.Add(n);
     175              n.children[i++] = treeNode;
    156176            }
    157177            treeSize += n.children.Length;
     178            UpdateDone(n);
     179
     180            // it could happend that we already finished all variations starting from the branch
     181            // stop
     182            if (n.done) {
     183              success = false;
     184              return phrase;
     185            }
    158186          }
     187          //int selectedAltIdx = SelectRandom(random, n.children);
     188
    159189          // => select using eps-greedy
    160190          int selectedAltIdx = SelectEpsGreedy(random, n.children);
     
    167197
    168198          curDepth++;
     199
    169200
    170201          // prepare for next iteration
    171202          parent = n;
    172203          n = n.children[selectedAltIdx];
     204          //UpdateTD(parent, n, 0.0);
    173205        }
    174206      } // while
     
    181213
    182214      treeDepth = Math.Max(treeDepth, curDepth);
     215      success = true;
    183216      return phrase;
    184217    }
    185218
     219
     220    //private void UpdateTD(TreeNode parent, TreeNode child, double reward) {
     221    //  double alpha = 1.0;
     222    //  var vParent = V(parent);
     223    //  var vChild = V(child);
     224    //  if (double.IsInfinity(vParent)) vParent = 0.0;
     225    //  if (double.IsInfinity(vChild)) vChild = 0.0;
     226    //  UpdateV(parent, (alpha * (reward + vChild - vParent)));
     227    //}
     228
    186229    private void DistributeReward(double reward) {
     230
    187231      // iterate in reverse order (bottom up)
    188       updateChain.Reverse();
    189 
     232      //updateChain.Reverse();
     233      UpdateDone(updateChain.Last().Item1);
     234      //UpdateTD(updateChain.Last().Item2, updateChain.Last().Item1, reward);
     235      //return;
     236
     237      BackPropReward(updateChain.Last().Item1, reward);
     238      /*
    190239      foreach (var e in updateChain) {
    191240        var node = e.Item1;
    192         var parent = e.Item2;
     241        //var parent = e.Item2;
    193242        node.tries++;
    194         if (node.children != null && node.children.All(c => c.done)) {
    195           node.done = true;
    196         }
     243        //if (node.children != null && node.children.All(c => c.done)) {
     244        //  node.done = true;
     245        //}
    197246        UpdateV(node, reward);
    198247
    199248        // the reward for the parent is either the just recieved reward or the value of the best action so far
    200         double value = 0.0;
    201         if (parent != null) {
    202           var doneChilds = parent.children.Where(ch => ch.done);
    203           if (doneChilds.Any()) value = doneChilds.Select(ch => V(ch)).Max();
    204         }
     249        //double value = 0.0;
     250        //if (parent != null) {
     251        //  var doneChilds = parent.children.Where(ch => ch.done);
     252        //  if (doneChilds.Any()) value = doneChilds.Select(ch => V(ch)).Max();
     253        //}
    205254        //if (value > reward) reward = value;
    206       }
    207     }
     255      }*/
     256    }
     257
     258    private void BackPropReward(TreeNode n, double reward) {
     259      n.tries++;
     260      UpdateV(n, reward);
     261      foreach (var p in n.parents) BackPropReward(p, reward);
     262    }
     263
     264    private void UpdateDone(TreeNode n) {
     265      if (!n.done && n.children != null && n.children.All(c => c.done)) n.done = true;
     266      if (n.done) foreach (var p in n.parents) UpdateDone(p);
     267    }
     268
    208269
    209270    private Dictionary<string, double> v;
     
    219280        tries.Add(canonicalStr, 1);
    220281      } else {
    221         v[canonicalStr] = stateV + 0.005 * (reward - stateV);
    222         //v[canonicalStr] = stateV + (1.0 / tries[canonicalStr]) * (reward - stateV);
     282        //v[canonicalStr] = stateV + 0.005 * (reward - stateV);
     283        v[canonicalStr] = stateV + (1.0 / tries[canonicalStr]) * (reward - stateV);
    223284        tries[canonicalStr]++;
    224285      }
     
    229290      //var canonicalStr = n.ident;
    230291      double stateV;
    231 
     292      if (!tries.ContainsKey(canonicalStr)) return double.PositiveInfinity;
    232293      if (!v.TryGetValue(canonicalStr, out  stateV)) {
    233294        return 0.0;
     
    237298    }
    238299
     300    private int SelectRandom(Random random, TreeNode[] children) {
     301      return children.Select((ch, i) => Tuple.Create(ch, i)).Where(p => !p.Item1.done).SelectRandom(random).Item2;
     302    }
     303
    239304    private int SelectEpsGreedy(Random random, TreeNode[] children) {
    240305      if (random.NextDouble() < 0.2) {
    241 
    242         return children.Select((ch, i) => Tuple.Create(ch, i)).Where(p => !p.Item1.done).SelectRandom(random).Item2;
     306        return SelectRandom(random, children);
    243307      } else {
    244308        var bestQ = double.NegativeInfinity;
Note: See TracChangeset for help on using the changeset viewer.