Free cookie consent management tool by TermsFeed Policy Generator

Changeset 11727 for branches


Ignore:
Timestamp:
12/29/14 11:02:36 (9 years ago)
Author:
gkronber
Message:

#2283: worked on grammatical optimization problem solvers (simple MCTS done)

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization
Files:
12 added
29 edited
1 moved

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/GrammaticalOptimization.sln

    r11708 r11727  
    1111EndProject
    1212Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Algorithms.Bandits", "HeuristicLab.Algorithms.Bandits\HeuristicLab.Algorithms.Bandits.csproj", "{24408F7D-EE0F-4886-A08B-EC324D662E47}"
     13EndProject
     14Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Common", "HeuristicLab.Common\HeuristicLab.Common.csproj", "{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}"
    1315EndProject
    1416Global
     
    3840    {24408F7D-EE0F-4886-A08B-EC324D662E47}.Release|Any CPU.ActiveCfg = Release|Any CPU
    3941    {24408F7D-EE0F-4886-A08B-EC324D662E47}.Release|Any CPU.Build.0 = Release|Any CPU
     42    {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
     43    {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Debug|Any CPU.Build.0 = Debug|Any CPU
     44    {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Release|Any CPU.ActiveCfg = Release|Any CPU
     45    {3A2FBBCB-F9DF-4970-87F3-F13337D941AD}.Release|Any CPU.Build.0 = Release|Any CPU
    4046  EndGlobalSection
    4147  GlobalSection(SolutionProperties) = preSolution
    4248    HideSolutionNode = FALSE
    4349  EndGlobalSection
     50  GlobalSection(Performance) = preSolution
     51    HasPerformanceSessions = true
     52  EndGlobalSection
    4453EndGlobal
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj

    r11711 r11727  
    4343    <Compile Include="Bandits\TruncatedNormalBandit.cs" />
    4444    <Compile Include="Policies\BanditPolicy.cs" />
     45    <Compile Include="Policies\BernoulliThompsonSamplingPolicy.cs" />
     46    <Compile Include="Policies\GaussianThompsonSamplingPolicy.cs" />
     47    <Compile Include="Policies\Exp3Policy.cs" />
    4548    <Compile Include="Policies\EpsGreedyPolicy.cs" />
    4649    <Compile Include="Policies\RandomPolicy.cs" />
     
    5154    <Compile Include="Properties\AssemblyInfo.cs" />
    5255  </ItemGroup>
    53   <ItemGroup />
     56  <ItemGroup>
     57    <ProjectReference Include="..\HeuristicLab.Common\HeuristicLab.Common.csproj">
     58      <Project>{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}</Project>
     59      <Name>HeuristicLab.Common</Name>
     60    </ProjectReference>
     61  </ItemGroup>
    5462  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
    5563  <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs

    r11708 r11727  
    66
    77namespace HeuristicLab.Algorithms.Bandits {
     8  // this interface represents a policy for reinforcement learning
    89  public interface IPolicy {
    9     int SelectAction();
    10     void UpdateReward(int action, double reward);
     10    IEnumerable<int> Actions { get; }
     11    int SelectAction(); // action selection ...
     12    void UpdateReward(int action, double reward); // ... and reward update are defined as usual
     13
     14    // policies must also support disabling of potential actions
     15    // for instance if we know that an action in a state has a deterministic
     16    // reward we need to sample it only once
     17    // it is necessary to sample an action only once
     18    void DisableAction(int action);
     19
     20    // reset causes the policy to be reinitialized to it's initial state (as after constructor-call)
    1121    void Reset();
    1222  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/BanditPolicy.cs

    r11711 r11727  
    11using System;
    22using System.Collections.Generic;
     3using System.Diagnostics;
    34using System.Linq;
    45using System.Text;
     
    78namespace HeuristicLab.Algorithms.Bandits {
    89  public abstract class BanditPolicy : IPolicy {
    9     public int NumActions { get; private set; }
    10     public BanditPolicy(int numActions) {
    11       this.NumActions = numActions;
     10    public IEnumerable<int> Actions { get; private set; }
     11    private readonly int numInitialActions;
     12
     13    protected BanditPolicy(int numActions) {
     14      this.numInitialActions = numActions;
     15      Actions = Enumerable.Range(0, numActions).ToArray();
    1216    }
    1317
    1418    public abstract int SelectAction();
    1519    public abstract void UpdateReward(int action, double reward);
    16     public abstract void Reset();
     20
     21    public virtual void DisableAction(int action) {
     22      Debug.Assert(Actions.Contains(action));
     23
     24      Actions = Actions.Where(a => a != action).ToArray();
     25    }
     26
     27    public virtual void Reset() {
     28      Actions = Enumerable.Range(0, numInitialActions).ToArray();
     29    }
    1730  }
    1831}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/EpsGreedyPolicy.cs

    r11711 r11727  
    11using System;
    22using System.Collections.Generic;
     3using System.Diagnostics;
    34using System.Linq;
    45using System.Text;
     
    1112    private readonly int[] tries;
    1213    private readonly double[] sumReward;
     14    private readonly RandomPolicy randomPolicy;
     15
    1316    public EpsGreedyPolicy(Random random, int numActions, double eps)
    1417      : base(numActions) {
    1518      this.random = random;
    1619      this.eps = eps;
    17       this.tries = new int[NumActions];
    18       this.sumReward = new double[NumActions];
     20      this.randomPolicy = new RandomPolicy(random, numActions);
     21      this.tries = new int[numActions];
     22      this.sumReward = new double[numActions];
    1923    }
    2024
    2125    public override int SelectAction() {
     26      Debug.Assert(Actions.Any());
    2227      if (random.NextDouble() > eps) {
    2328        // select best
    2429        var maxReward = double.NegativeInfinity;
    2530        int bestAction = -1;
    26         for (int i = 0; i < NumActions; i++) {
    27           if (tries[i] == 0) return i;
    28           var avgReward = sumReward[i] / tries[i];
     31        foreach (var a in Actions) {
     32          if (tries[a] == 0) return a;
     33          var avgReward = sumReward[a] / tries[a];
    2934          if (maxReward < avgReward) {
    3035            maxReward = avgReward;
    31             bestAction = i;
     36            bestAction = a;
    3237          }
    3338        }
     39        Debug.Assert(bestAction >= 0);
    3440        return bestAction;
    3541      } else {
    3642        // select random
    37         return random.Next(NumActions);
     43        return randomPolicy.SelectAction();
    3844      }
    3945    }
    4046    public override void UpdateReward(int action, double reward) {
     47      Debug.Assert(Actions.Contains(action));
     48
     49      randomPolicy.UpdateReward(action, reward); // does nothing
    4150      tries[action]++;
    4251      sumReward[action] += reward;
    4352    }
     53
     54    public override void DisableAction(int action) {
     55      base.DisableAction(action);
     56      randomPolicy.DisableAction(action);
     57      sumReward[action] = 0;
     58      tries[action] = -1;
     59    }
     60
    4461    public override void Reset() {
     62      base.Reset();
     63      randomPolicy.Reset();
    4564      Array.Clear(tries, 0, tries.Length);
    4665      Array.Clear(sumReward, 0, sumReward.Length);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/RandomPolicy.cs

    r11711 r11727  
    11using System;
    22using System.Collections.Generic;
     3using System.Diagnostics;
    34using System.Linq;
    45using System.Text;
    56using System.Threading.Tasks;
     7using HeuristicLab.Common;
    68
    79namespace HeuristicLab.Algorithms.Bandits {
    810  public class RandomPolicy : BanditPolicy {
    911    private readonly Random random;
     12
    1013    public RandomPolicy(Random random, int numActions)
    1114      : base(numActions) {
     
    1417
    1518    public override int SelectAction() {
    16       return random.Next(NumActions);
     19      Debug.Assert(Actions.Any());
     20      return Actions.SelectRandom(random);
    1721    }
    1822    public override void UpdateReward(int action, double reward) {
    1923      // do nothing
    2024    }
    21     public override void Reset() {
    22       // do nothing
    23     }
     25
    2426  }
    2527}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1Policy.cs

    r11711 r11727  
    11using System;
    22using System.Collections.Generic;
     3using System.Diagnostics;
    34using System.Linq;
    45using System.Text;
     
    1213    public UCB1Policy(int numActions)
    1314      : base(numActions) {
    14       this.tries = new int[NumActions];
    15       this.sumReward = new double[NumActions];
     15      this.tries = new int[numActions];
     16      this.sumReward = new double[numActions];
    1617    }
    1718
     
    1920      int bestAction = -1;
    2021      double bestQ = double.NegativeInfinity;
    21       for (int i = 0; i < NumActions; i++) {
    22         if (tries[i] == 0) return i;
    23         var q = sumReward[i] / tries[i] + Math.Sqrt((2 * Math.Log(totalTries)) / tries[i]);
     22      foreach (var a in Actions) {
     23        if (tries[a] == 0) return a;
     24        var q = sumReward[a] / tries[a] + Math.Sqrt((2 * Math.Log(totalTries)) / tries[a]);
    2425        if (q > bestQ) {
    2526          bestQ = q;
    26           bestAction = i;
     27          bestAction = a;
    2728        }
    2829      }
     
    3031    }
    3132    public override void UpdateReward(int action, double reward) {
     33      Debug.Assert(Actions.Contains(action));
    3234      totalTries++;
    3335      tries[action]++;
    3436      sumReward[action] += reward;
    3537    }
     38
     39    public override void DisableAction(int action) {
     40      base.DisableAction(action);
     41      totalTries -= tries[action];
     42      tries[action] = -1;
     43      sumReward[action] = 0;
     44    }
     45
    3646    public override void Reset() {
     47      base.Reset();
    3748      totalTries = 0;
    3849      Array.Clear(tries, 0, tries.Length);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCB1TunedPolicy.cs

    r11711 r11727  
    11using System;
    22using System.Collections.Generic;
     3using System.Diagnostics;
    34using System.Linq;
    45using System.Text;
     
    1314    public UCB1TunedPolicy(int numActions)
    1415      : base(numActions) {
    15       this.tries = new int[NumActions];
    16       this.sumReward = new double[NumActions];
    17       this.sumSqrReward = new double[NumActions];
     16      this.tries = new int[numActions];
     17      this.sumReward = new double[numActions];
     18      this.sumSqrReward = new double[numActions];
    1819    }
    1920
     
    2526
    2627    public override int SelectAction() {
     28      Debug.Assert(Actions.Any());
    2729      int bestAction = -1;
    2830      double bestQ = double.NegativeInfinity;
    29       for (int i = 0; i < NumActions; i++) {
    30         if (tries[i] == 0) return i;
    31         var q = sumReward[i] / tries[i] + Math.Sqrt((Math.Log(totalTries) / tries[i]) * Math.Min(1.0 / 4, V(i))); // 1/4 is upper bound of bernoulli distributed variable
     31      foreach (var a in Actions) {
     32        if (tries[a] == 0) return a;
     33        var q = sumReward[a] / tries[a] + Math.Sqrt((Math.Log(totalTries) / tries[a]) * Math.Min(1.0 / 4, V(a))); // 1/4 is upper bound of bernoulli distributed variable
    3234        if (q > bestQ) {
    3335          bestQ = q;
    34           bestAction = i;
     36          bestAction = a;
    3537        }
    3638      }
     
    3840    }
    3941    public override void UpdateReward(int action, double reward) {
     42      Debug.Assert(Actions.Contains(action));
    4043      totalTries++;
    4144      tries[action]++;
     
    4346      sumSqrReward[action] += reward * reward;
    4447    }
     48
     49    public override void DisableAction(int action) {
     50      base.DisableAction(action);
     51      totalTries -= tries[action];
     52      tries[action] = -1;
     53      sumReward[action] = 0;
     54      sumSqrReward[action] = 0;
     55    }
     56
    4557    public override void Reset() {
     58      base.Reset();
    4659      totalTries = 0;
    4760      Array.Clear(tries, 0, tries.Length);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/UCBNormalPolicy.cs

    r11711 r11727  
    11using System;
    22using System.Collections.Generic;
     3using System.Diagnostics;
    34using System.Linq;
    45using System.Text;
     
    1314    public UCBNormalPolicy(int numActions)
    1415      : base(numActions) {
    15       this.tries = new int[NumActions];
    16       this.sumReward = new double[NumActions];
    17       this.sumSqrReward = new double[NumActions];
     16      this.tries = new int[numActions];
     17      this.sumReward = new double[numActions];
     18      this.sumSqrReward = new double[numActions];
    1819    }
    1920
    20     private double V(int arm) {
    21       var s = tries[arm];
    22       return sumSqrReward[arm] / s - Math.Pow(sumReward[arm] / s, 2) + Math.Sqrt(2 * Math.Log(totalTries) / s);
    23     }
    24 
    25 
    2621    public override int SelectAction() {
     22      Debug.Assert(Actions.Any());
    2723      int bestAction = -1;
    2824      double bestQ = double.NegativeInfinity;
    29       for (int i = 0; i < NumActions; i++) {
    30         if (totalTries == 0 || tries[i] == 0 || tries[i] < Math.Ceiling(8 * Math.Log(totalTries))) return i;
    31         var avgReward = sumReward[i] / tries[i];
     25      foreach (var a in Actions) {
     26        if (totalTries == 0 || tries[a] == 0 || tries[a] < Math.Ceiling(8 * Math.Log(totalTries))) return a;
     27        var avgReward = sumReward[a] / tries[a];
    3228        var q = avgReward
    33           + Math.Sqrt(16 * ((sumSqrReward[i] - tries[i] * Math.Pow(avgReward, 2)) / (tries[i] - 1)) * (Math.Log(totalTries - 1) / tries[i]));
     29          + Math.Sqrt(16 * ((sumSqrReward[a] - tries[a] * Math.Pow(avgReward, 2)) / (tries[a] - 1)) * (Math.Log(totalTries - 1) / tries[a]));
    3430        if (q > bestQ) {
    3531          bestQ = q;
    36           bestAction = i;
     32          bestAction = a;
    3733        }
    3834      }
     
    4036    }
    4137    public override void UpdateReward(int action, double reward) {
     38      Debug.Assert(Actions.Contains(action));
    4239      totalTries++;
    4340      tries[action]++;
     
    4542      sumSqrReward[action] += reward * reward;
    4643    }
     44
     45    public override void DisableAction(int action) {
     46      base.DisableAction(action);
     47      totalTries -= tries[action];
     48      tries[action] = -1;
     49      sumReward[action] = 0;
     50      sumSqrReward[action] = 0;
     51    }
     52
    4753    public override void Reset() {
     54      base.Reset();
    4855      totalTries = 0;
    4956      Array.Clear(tries, 0, tries.Length);
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveBreadthFirstSearch.cs

    r11708 r11727  
    1212    private readonly int maxLen;
    1313    private readonly Queue<string> bfsQueue = new Queue<string>();
     14    private readonly IProblem problem;
    1415
    15     public ExhaustiveBreadthFirstSearch(int maxLen) {
     16    public ExhaustiveBreadthFirstSearch(IProblem problem, int maxLen) {
     17      this.problem = problem;
    1618      this.maxLen = maxLen;
    1719    }
    1820
    19     public void Run(IProblem problem, int maxIterations) {
     21    public void Run(int maxIterations) {
    2022      double bestQuality = double.MinValue;
    2123      bfsQueue.Enqueue(problem.Grammar.SentenceSymbol.ToString());
     
    2426      for (int i = 0; sentenceEnumerator.MoveNext() && i < maxIterations; i++) {
    2527        var sentence = sentenceEnumerator.Current;
    26         var quality = problem.Evaluate(sentence);
     28        var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
    2729        RaiseSolutionEvaluated(sentence, quality);
    2830
     
    3941        var phrase = bfsQueue.Dequeue();
    4042
    41         var nt = phrase.First(grammar.IsNonTerminal);
    42         var ntIdx = phrase.IndexOf(nt); // TODO perf
     43        char nt;
     44        int ntIdx;
     45        Grammar.FindFirstNonTerminal(grammar, phrase, out nt, out ntIdx);
    4346        var alts = grammar.GetAlternatives(nt);
    4447        foreach (var alt in alts) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveDepthFirstSearch.cs

    r11708 r11727  
    2424      for (int i = 0; sentenceEnumerator.MoveNext() && i < maxIterations; i++) {
    2525        var sentence = sentenceEnumerator.Current;
    26         var quality = problem.Evaluate(sentence);
     26        var quality = problem.Evaluate(sentence) / problem.GetBestKnownQuality(maxLen);
    2727        RaiseSolutionEvaluated(sentence, quality);
    2828
     
    3939        var phrase = stack.Pop();
    4040
    41         var nt = phrase.First(grammar.IsNonTerminal);
    42         var ntIdx = phrase.IndexOf(nt); // TODO perf
     41        char nt;
     42        int ntIdx;
     43        Grammar.FindFirstNonTerminal(grammar, phrase, out nt, out ntIdx);
    4344        var alts = grammar.GetAlternatives(nt);
    4445        foreach (var alt in alts) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj

    r11690 r11727  
    4343  </ItemGroup>
    4444  <ItemGroup>
     45    <Compile Include="AlternativesSampler.cs" />
     46    <Compile Include="AlternativesContextSampler.cs" />
     47    <Compile Include="MctsSampler.cs" />
    4548    <Compile Include="ExhaustiveDepthFirstSearch.cs" />
    4649    <Compile Include="ExhaustiveBreadthFirstSearch.cs" />
     
    4952  </ItemGroup>
    5053  <ItemGroup>
     54    <ProjectReference Include="..\HeuristicLab.Algorithms.Bandits\HeuristicLab.Algorithms.Bandits.csproj">
     55      <Project>{24408F7D-EE0F-4886-A08B-EC324D662E47}</Project>
     56      <Name>HeuristicLab.Algorithms.Bandits</Name>
     57    </ProjectReference>
     58    <ProjectReference Include="..\HeuristicLab.Common\HeuristicLab.Common.csproj">
     59      <Project>{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}</Project>
     60      <Name>HeuristicLab.Common</Name>
     61    </ProjectReference>
    5162    <ProjectReference Include="..\HeuristicLab.Problems.GrammaticalOptimization\HeuristicLab.Problems.GrammaticalOptimization.csproj">
    5263      <Project>{cb9dccf6-667e-4a13-b82d-dbd6b45a045e}</Project>
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs

    r11689 r11727  
    55using System.Threading.Tasks;
    66
    7 namespace HeuristicLab.Problems.GrammaticalOptimization {
     7namespace HeuristicLab.Common {
    88  public static class Extensions {
    99    public static bool IsAlmost(this double x, double y) {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestBanditPolicies.cs

    r11710 r11727  
    1515      var seedForPolicy = globalRand.Next();
    1616      var nArms = 10;
     17      //Console.WriteLine("Exp3 (gamma=0.01)");
     18      //TestPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
     19      //Console.WriteLine("Exp3 (gamma=0.05)");
     20      //estPolicyBernoulli(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 1));
     21      Console.WriteLine("Thompson (Bernoulli)");
     22      TestPolicyBernoulli(globalRand, nArms, new BernoulliThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
    1723      Console.WriteLine("Random");
    18       TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10));
     24      TestPolicyBernoulli(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
    1925      Console.WriteLine("UCB1");
    20       TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(10));
     26      TestPolicyBernoulli(globalRand, nArms, new UCB1Policy(nArms));
    2127      Console.WriteLine("UCB1Tuned");
    22       TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(10));
     28      TestPolicyBernoulli(globalRand, nArms, new UCB1TunedPolicy(nArms));
    2329      Console.WriteLine("UCB1Normal");
    24       TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(10));
     30      TestPolicyBernoulli(globalRand, nArms, new UCBNormalPolicy(nArms));
    2531      Console.WriteLine("Eps(0.01)");
    26       TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));
     32      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
    2733      Console.WriteLine("Eps(0.05)");
    28       TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));
    29       Console.WriteLine("Eps(0.1)");
    30       TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));
    31       Console.WriteLine("Eps(0.2)");
    32       TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));
    33       Console.WriteLine("Eps(0.5)");
    34       TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5));
     34      TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
     35      //Console.WriteLine("Eps(0.1)");
     36      //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
     37      //Console.WriteLine("Eps(0.2)");
     38      //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
     39      //Console.WriteLine("Eps(0.5)");
     40      //TestPolicyBernoulli(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
    3541    }
    3642    [TestMethod]
     
    4046      var seedForPolicy = globalRand.Next();
    4147      var nArms = 10;
     48      Console.WriteLine("Thompson (Gaussian)");
     49      TestPolicyNormal(globalRand, nArms, new GaussianThompsonSamplingPolicy(new Random(seedForPolicy), nArms));
    4250      Console.WriteLine("Random");
    43       TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), 10));
     51      TestPolicyNormal(globalRand, nArms, new RandomPolicy(new Random(seedForPolicy), nArms));
    4452      Console.WriteLine("UCB1");
    45       TestPolicyNormal(globalRand, nArms, new UCB1Policy(10));
     53      TestPolicyNormal(globalRand, nArms, new UCB1Policy(nArms));
    4654      Console.WriteLine("UCB1Tuned");
    47       TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(10));
     55      TestPolicyNormal(globalRand, nArms, new UCB1TunedPolicy(nArms));
    4856      Console.WriteLine("UCB1Normal");
    49       TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(10));
     57      TestPolicyNormal(globalRand, nArms, new UCBNormalPolicy(nArms));
     58      //Console.WriteLine("Exp3 (gamma=0.01)");
     59      //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.01));
     60      //Console.WriteLine("Exp3 (gamma=0.05)");
     61      //TestPolicyNormal(globalRand, nArms, new Exp3Policy(new Random(seedForPolicy), nArms, 0.05));
    5062      Console.WriteLine("Eps(0.01)");
    51       TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.01));
     63      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.01));
    5264      Console.WriteLine("Eps(0.05)");
    53       TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.05));
    54       Console.WriteLine("Eps(0.1)");
    55       TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.1));
    56       Console.WriteLine("Eps(0.2)");
    57       TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.2));
    58       Console.WriteLine("Eps(0.5)");
    59       TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), 10, 0.5));
     65      TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.05));
     66      //Console.WriteLine("Eps(0.1)");
     67      //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.1));
     68      //Console.WriteLine("Eps(0.2)");
     69      //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.2));
     70      //Console.WriteLine("Eps(0.5)");
     71      //TestPolicyNormal(globalRand, nArms, new EpsGreedyPolicy(new Random(seedForPolicy), nArms, 0.5));
    6072    }
    6173
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestSolvers.cs

    r11708 r11727  
    5757
    5858    private void TestBFS(IProblem prob, int len, int numExpectedSols) {
    59       var solver = new ExhaustiveBreadthFirstSearch(len);
     59      var solver = new ExhaustiveBreadthFirstSearch(prob, len);
    6060      int numSols = 0;
    6161
    6262      solver.SolutionEvaluated += (s, d) => { numSols++; };
    6363
    64       solver.Run(prob, int.MaxValue);
     64      solver.Run(int.MaxValue);
    6565      Assert.AreEqual(numExpectedSols, numSols);
    6666    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/EvenParityProblem.cs

    r11659 r11727  
    5050      return nCorrect;
    5151    }
     52
     53    public string Hash(string terminalPhrase) {
     54      return terminalPhrase;
     55    }
    5256  }
    5357}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/ExpressionInterpreter.cs

    r11659 r11727  
    66using System.Text;
    77using System.Threading.Tasks;
     8using HeuristicLab.Common;
    89
    910namespace HeuristicLab.Problems.GrammaticalOptimization {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs

    r11659 r11727  
    88using System.Text.RegularExpressions;
    99using System.Xml.Linq;
     10using HeuristicLab.Common;
    1011
    1112namespace HeuristicLab.Problems.GrammaticalOptimization {
     
    157158      while (!done) {
    158159        int ntIdx; char nt;
    159         FindFirstNonTerminal(phrase, out nt, out ntIdx);
     160        FindFirstNonTerminal(this, phrase, out nt, out ntIdx);
    160161
    161162        int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
     
    175176    }
    176177
    177     private void FindFirstNonTerminal(string phrase, out char nt, out int ntIdx) {
     178    public static void FindFirstNonTerminal(IGrammar g, string phrase, out char nt, out int ntIdx) {
    178179      ntIdx = 0;
    179       while (ntIdx < phrase.Length && IsTerminal(phrase[ntIdx])) ntIdx++;
     180      while (ntIdx < phrase.Length && g.IsTerminal(phrase[ntIdx])) ntIdx++;
    180181      if (ntIdx >= phrase.Length) {
    181182        ntIdx = -1;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs

    r11659 r11727  
    3939    }
    4040
     41    public string Hash(string terminalPhrase) {
     42      return terminalPhrase;
     43    }
    4144  }
    4245}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.csproj

    r11659 r11727  
    4444  <ItemGroup>
    4545    <Compile Include="ExpressionInterpreter.cs" />
    46     <Compile Include="Extensions.cs" />
    4746    <Compile Include="Grammar.cs" />
    4847    <Compile Include="EvenParityProblem.cs" />
     48    <Compile Include="SentenceSetStatistics.cs" />
    4949    <Compile Include="SymbolicRegressionPoly10Problem.cs" />
    5050    <Compile Include="SantaFeAntProblem.cs" />
     
    5959    <Compile Include="RoyalTreeProblem.cs" />
    6060  </ItemGroup>
     61  <ItemGroup>
     62    <ProjectReference Include="..\HeuristicLab.Common\HeuristicLab.Common.csproj">
     63      <Project>{3A2FBBCB-F9DF-4970-87F3-F13337D941AD}</Project>
     64      <Name>HeuristicLab.Common</Name>
     65    </ProjectReference>
     66  </ItemGroup>
    6167  <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
    6268  <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs

    r11659 r11727  
    99    IGrammar Grammar { get; }
    1010    double Evaluate(string sentence);
     11    string Hash(string terminalPhrase);
    1112  }
    1213}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs

    r11659 r11727  
    7979      return result.ToString();
    8080    }
     81
     82    public string Hash(string terminalPhrase) {
     83      return terminalPhrase;
     84    }
    8185  }
    8286}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPairProblem.cs

    r11659 r11727  
    3333      return regex.Matches(sentence).Count;
    3434    }
     35
     36    public string Hash(string terminalPhrase) {
     37      return terminalPhrase;
     38    }
    3539  }
    3640}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalRoadProblem.cs

    r11659 r11727  
    2929      throw new NotImplementedException();
    3030    }
     31    public string Hash(string terminalPhrase) {
     32      return terminalPhrase;
     33    }
    3134
    3235  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSymbolProblem.cs

    r11659 r11727  
    3333      return regex.Matches(sentence).Count;
    3434    }
     35    public string Hash(string terminalPhrase) {
     36      return terminalPhrase;
     37    }
     38
    3539  }
    3640}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalTreeProblem.cs

    r11659 r11727  
    2929      throw new NotImplementedException();
    3030    }
     31    public string Hash(string terminalPhrase) {
     32      return terminalPhrase;
     33    }
    3134
    3235  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs

    r11708 r11727  
    9696        p++;
    9797      }
     98    }
     99
     100    public string Hash(string terminalPhrase) {
     101      return terminalPhrase.Replace("rl", "").Replace("lr", "");
    98102    }
    99103  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs

    r11708 r11727  
    55using System.Security.AccessControl;
    66using System.Text;
     7using HeuristicLab.Common;
    78
    89namespace HeuristicLab.Problems.GrammaticalOptimization {
    910  public class SymbolicRegressionPoly10Problem : IProblem {
     11    //    private const string grammarString = @"
     12    //    G(E):
     13    //    E -> V | V+E | V-E | V*E | (E)
     14    //    V -> a .. j
     15    //    ";
    1016    private const string grammarString = @"
    11 G(E):
    12 E -> V | V+E | V-E | V*E | V/E | (E)
    13 V -> a .. j
    14 ";
     17    G(E):
     18    E -> a | b | c | d | e | f | g | h | j | a+E | b+E | c+E | d+E | e+E | f+E | g+E | h+E | j+E | a*E | b*E | c*E | d*E | e*E | f*E | g*E | h*E | j*E
     19    ";
    1520
    1621
     
    8287      return s * s / (ssX * ssY);
    8388    }
     89
     90
     91    // right now only + and * is supported
     92    public string Hash(string terminalPhrase) {
     93      var terms = terminalPhrase.Split('+');
     94      return string.Join("+", terms.Select(term => string.Join("", term.Replace("*", "").OrderBy(ch => ch)))
     95        .OrderBy(term => term));
     96    }
    8497  }
    8598}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Main.csproj

    r11659 r11727  
    4949  </ItemGroup>
    5050  <ItemGroup>
     51    <ProjectReference Include="..\HeuristicLab.Algorithms.Bandits\HeuristicLab.Algorithms.Bandits.csproj">
     52      <Project>{24408F7D-EE0F-4886-A08B-EC324D662E47}</Project>
     53      <Name>HeuristicLab.Algorithms.Bandits</Name>
     54    </ProjectReference>
    5155    <ProjectReference Include="..\HeuristicLab.Algorithms.GrammaticalOptimization\HeuristicLab.Algorithms.GrammaticalOptimization.csproj">
    5256      <Project>{eea07488-1a51-412a-a52c-53b754a628b3}</Project>
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11690 r11727  
    11using System;
    22using System.Collections.Generic;
     3using System.Data;
    34using System.Diagnostics;
    45using System.Linq;
    56using System.Text;
     7using System.Threading.Tasks;
     8using HeuristicLab.Algorithms.Bandits;
    69using HeuristicLab.Algorithms.GrammaticalOptimization;
    710using HeuristicLab.Problems.GrammaticalOptimization;
     
    1013  class Program {
    1114    static void Main(string[] args) {
     15      // RunDemo();
     16      RunGridTest();
     17    }
     18
     19    private static void RunGridTest() {
     20      int maxIterations = 150000;
     21      var globalRandom = new Random(31415);
     22      var reps = 10;
     23      Parallel.ForEach(new int[] { 1, 5, 10, 100, 500, 1000 }, (randomTries) => {
     24        Random localRand;
     25        lock (globalRandom) {
     26          localRand = new Random(globalRandom.Next());
     27        }
     28        var policyFactories = new Func<int, IPolicy>[]
     29        {
     30          (numActions) => new RandomPolicy(localRand, numActions),
     31          (numActions) => new UCB1Policy(numActions),
     32          (numActions) => new UCB1TunedPolicy(numActions),
     33          (numActions) => new UCBNormalPolicy(numActions),
     34          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.01),
     35          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.05),
     36          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.1),
     37          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.2),
     38          (numActions) => new EpsGreedyPolicy(localRand, numActions, 0.5),
     39          (numActions) => new GaussianThompsonSamplingPolicy(localRand, numActions),
     40          (numActions) => new BernoulliThompsonSamplingPolicy(localRand, numActions)
     41        };
     42
     43        foreach (var policyFactory in policyFactories)
     44          for (int i = 0; i < reps; i++) {
     45            int iterations = 0;
     46            var sw = new Stopwatch();
     47            var globalStatistics = new SentenceSetStatistics();
     48
     49            // var problem = new SymbolicRegressionPoly10Problem();
     50            var problem = new SantaFeAntProblem();
     51            //var problem = new PalindromeProblem();
     52            //var problem = new HardPalindromeProblem();
     53            //var problem = new RoyalPairProblem();
     54            //var problem = new EvenParityProblem();
     55            var alg = new MctsSampler(problem, 17, localRand, randomTries, policyFactory);
     56            //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
     57            //var alg = new AlternativesContextSampler(problem, 25);
     58
     59            alg.SolutionEvaluated += (sentence, quality) => {
     60              iterations++;
     61              globalStatistics.AddSentence(sentence, quality);
     62              if (iterations % 10000 == 0) {
     63                Console.WriteLine("{0} {1} {2}", randomTries, policyFactory(1), globalStatistics);
     64              }
     65            };
     66
     67            sw.Start();
     68
     69            alg.Run(maxIterations);
     70
     71            sw.Stop();
     72          }
     73      });
     74    }
     75
     76    private static void RunDemo() {
     77      // TODO: implement threshold ascent
     78      // TODO: implement inspection for MCTS
     79
    1280      int maxIterations = 10000000;
    1381      int iterations = 0;
     
    1583      double bestQuality = 0;
    1684      string bestSentence = "";
     85      var globalStatistics = new SentenceSetStatistics();
     86      var random = new Random(31415);
    1787
    18       var rs = new ExhaustiveDepthFirstSearch(17);
     88      // var problem = new SymbolicRegressionPoly10Problem();
     89      var problem = new SantaFeAntProblem();
     90      //var problem = new PalindromeProblem();
     91      //var problem = new HardPalindromeProblem();
     92      //var problem = new RoyalPairProblem();
     93      //var problem = new EvenParityProblem();
     94      var alg = new MctsSampler(problem, 17, random);
     95      //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
     96      //var alg = new AlternativesContextSampler(problem, 25);
    1997
    20       rs.FoundNewBestSolution += (sentence, quality) => {
     98      alg.FoundNewBestSolution += (sentence, quality) => {
    2199        bestQuality = quality;
    22100        bestSentence = sentence;
    23101        Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
    24102      };
    25       rs.SolutionEvaluated += (sentence, quality) => {
     103      alg.SolutionEvaluated += (sentence, quality) => {
    26104        iterations++;
     105        globalStatistics.AddSentence(sentence, quality);
    27106        if (iterations % 10000 == 0) {
    28           Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
     107          //Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
     108          Console.WriteLine(globalStatistics.ToString());
    29109        }
    30110      };
     
    33113      sw.Start();
    34114
    35       rs.Run(new SymbolicRegressionPoly10Problem(), maxIterations);
     115      alg.Run(maxIterations);
    36116
    37117      sw.Stop();
Note: See TracChangeset for help on using the changeset viewer.