Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/24/15 13:56:27 (9 years ago)
Author:
gkronber
Message:

#2283: experiments on grammatical optimization algorithms (maxreward instead of avg reward, ...)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization-gkr/HeuristicLab.Algorithms.GrammaticalOptimization/Solvers/SequentialSearch.cs

    r12876 r12893  
    66using System.Runtime.InteropServices;
    77using System.Text;
     8using System.Windows.Markup;
    89using HeuristicLab.Algorithms.Bandits;
    910using HeuristicLab.Algorithms.Bandits.BanditPolicies;
     
    4041    private readonly int maxLen;
    4142    private readonly IProblem problem;
    42     private readonly Random random;
     43    private readonly System.Random random;
    4344    private readonly int randomTries;
    4445    private readonly IGrammarPolicy behaviourPolicy;
     
    5152    private readonly List<string> stateChain;
    5253
    53     public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
     54    public SequentialSearch(IProblem problem, int maxLen, System.Random random, int randomTries, IGrammarPolicy behaviourPolicy) {
    5455      this.maxLen = maxLen;
    5556      this.problem = problem;
     
    117118          GenerateFollowStates(n); // creates child nodes for node n
    118119
     120
    119121          int selectedChildIdx;
    120122          if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) {
    121123            return false;
    122124          }
     125
    123126          phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative);
    124127
     
    167170    }
    168171
     172
     173
    169174    private void DistributeReward(double reward) {
    170175      behaviourPolicy.UpdateReward(stateChain, reward);
    171176    }
     177
    172178
    173179
     
    178184      bestQuality = 0.0;
    179185      tries = 0;
     186      //rootNode = new TreeNode("a*b+c*d+e*f+E", new ReadonlySequence("$"));
    180187      rootNode = new TreeNode(problem.Grammar.SentenceSymbol.ToString(), new ReadonlySequence("$"));
    181188    }
     
    194201
    195202      var n = rootNode;
    196 
     203      int lvl = 0;
    197204      while (n != null) {
    198205        var phrase = n.phrase;
    199206        Console.ForegroundColor = ConsoleColor.White;
     207       
     208        if (lvl++ > 10) return;
     209
    200210        Console.WriteLine("{0,-30}", phrase);
    201211        var children = n.children;
    202212        if (children == null || !children.Any()) break;
    203         var triesEnumerable = children.Select(ch => policy.GetTries(ch.phrase));
    204         double maxTries = triesEnumerable.Where(v => !double.IsInfinity(v)).DefaultIfEmpty(1).Max();
    205         maxTries = Math.Max(maxTries, 1.0);
     213        var valuesEnumerable = children.Select(ch => policy.GetValue(ch.phrase));
     214        double maxValue = valuesEnumerable.Where(v => !double.IsInfinity(v)).DefaultIfEmpty(0).Max();
     215        maxValue = Math.Max(maxValue, 1.0);
    206216        // write phrases
    207217        foreach (var ch in children) {
    208           SetColorForValue(policy.GetTries(ch.phrase) / maxTries);
     218          //SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
    209219          Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length)));
    210220        }
     
    213223        // write values
    214224        foreach (var ch in children) {
    215           SetColorForValue(policy.GetTries(ch.phrase) / maxTries);
     225          //SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
    216226          if (!double.IsInfinity(policy.GetValue(ch.phrase)))
    217227            Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0);
     
    223233        // write tries
    224234        foreach (var ch in children) {
    225           SetColorForValue(policy.GetTries(ch.phrase) / maxTries);
     235          //SetColorForValue(policy.GetValue(ch.phrase) / maxValue);
    226236          Console.Write(" {0,4}", policy.GetTries(ch.phrase));
    227237        }
    228238        Console.WriteLine();
    229         int selectedChildIdx;
    230         if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) {
    231           break;
    232         }
     239        var triesArr = valuesEnumerable.ToArray();
     240        //var selectedChildIdx = Array.IndexOf(triesArr, triesArr.Max());
     241        var valuesArr = children.Select(ch => policy.GetValue(ch.phrase)).ToArray();
     242        int selectedChildIdx = Enumerable.Range(0, children.Length).OrderByDescending(i => valuesArr[i]).ThenByDescending(i => triesArr[i]).First();
     243
     244        //int selectedChildIdx;
     245        //if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) {
     246        //  break;
     247        //}
    233248        n = n.children[selectedChildIdx];
    234249      }
Note: See TracChangeset for help on using the changeset viewer.