Free cookie consent management tool by TermsFeed Policy Generator

Changeset 10436


Ignore:
Timestamp:
02/04/14 19:12:37 (10 years ago)
Author:
gkronber
Message:

#2026 implemented softmax strategy for tree exploration

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GPDL/CodeGenerator/MonteCarloTreeSearchCodeGen.cs

    r10430 r10436  
    138138        //}
    139139        // select the alternative with the largest average
    140         altIdx = -1;
    141         double best = double.NegativeInfinity;
    142         for(int idx = 0; idx < searchTree.children.Length; idx++) {
    143           if(searchTree.children[idx] == null) continue;
    144           if (!searchTree.children[idx].done && Grammar.minDepth[Grammar.transition[state][idx]] <= maxDepth  && UCB(searchTree, searchTree.children[idx]) > best) {
    145             altIdx = idx;
    146             best = UCB(searchTree, searchTree.children[idx]);
    147           }
    148         }
     140        // altIdx = -1;
     141        // double best = double.NegativeInfinity;
     142        // for(int idx = 0; idx < searchTree.children.Length; idx++) {
     143        //   if(searchTree.children[idx] == null) continue;
     144        //   if (!searchTree.children[idx].done && Grammar.minDepth[Grammar.transition[state][idx]] <= maxDepth  && UCB(searchTree, searchTree.children[idx]) > best) {
     145        //     altIdx = idx;
     146        //     best = UCB(searchTree, searchTree.children[idx]);
     147        //   }
     148        // }
     149        // Softmax selection
     150        double temperature = 1;
     151        var ms = searchTree.children.Select((c,i) => c == null || c.done || Grammar.minDepth[Grammar.transition[state][i]] > maxDepth ? 0.0 : Math.Exp((c.sumQuality / c.tries) / temperature)).ToArray();
     152        var msSum = ms.Sum();
     153        if(msSum == 0.0) {
     154          // uniform distribution
     155          ms = searchTree.children.Select((c,i) => c == null || c.done || Grammar.minDepth[Grammar.transition[state][i]] > maxDepth ? 0.0 : 1.0).ToArray(); 
     156          msSum = ms.Sum();
     157        }
     158        Debug.Assert(msSum > 0.0);
     159        var r = random.NextDouble() * msSum;
     160         
     161        altIdx = 0;
     162        var aggSum = 0.0;
     163        while(altIdx < searchTree.children.Length && aggSum <= r) {
     164          var c = searchTree.children[altIdx];
     165          aggSum += ms[altIdx++];
     166        }
     167        altIdx--;
    149168        Debug.Assert(altIdx > -1);
    150169        return altIdx;
    151170      }
    152171    }
    153 
     172 
    154173    public static double UCB(SearchTreeNode parent, SearchTreeNode n) {
    155174      Debug.Assert(parent.tries >= n.tries);
Note: See TracChangeset for help on using the changeset viewer.