Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/16/15 18:26:35 (10 years ago)
Author:
gkronber
Message:

#2283 work-in-progress commit (does not compile)

Location:
branches/HeuristicLab.Problems.GrammaticalOptimization
Files:
14 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ActiveLearningPolicy.cs

    r11747 r11792  
    1111    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1212      var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
    13       double bestQ = double.NegativeInfinity;
    1413      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    1514      const double delta = 0.1;
     
    2625        double l;
    2726        if (aInfo.Tries == 0) {
    28           u = 1.0;
    29           l = 0.0;
     27          u = double.PositiveInfinity;
     28          l = double.NegativeInfinity;
    3029        } else {
    3130          q = aInfo.SumReward / aInfo.Tries;
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs

    r11742 r11792  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    89namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     
    2223      int k = myActionInfos.Count(a => !a.Disabled);
    2324      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    24       int bestAction = -1;
    2525      double bestQ = double.NegativeInfinity;
     26      var bestActions = new List<int>();
    2627      var aIdx = -1;
    2728      foreach (var aInfo in myActionInfos) {
    2829        aIdx++;
    2930        if (aInfo.Disabled) continue;
    30         if (aInfo.Tries == 0) return aIdx;
     31        double q;
     32        if (aInfo.Tries == 0) {
     33          q = double.PositiveInfinity;
     34        } else {
    3135
    32         var avgReward = aInfo.SumReward / aInfo.Tries;
     36          var avgReward = aInfo.SumReward / aInfo.Tries;
    3337
    34         // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
    35         // var alpha = Math.Log(2 * totalTries * k / delta);
    36         double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
    37         var q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries;
     38          // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
     39          // var alpha = Math.Log(2 * totalTries * k / delta);
     40          double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta);
     41          // total tries is max tries in the original paper
     42          q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries;
     43        }
    3844        if (q > bestQ) {
    3945          bestQ = q;
    40           bestAction = aIdx;
     46          bestActions.Clear();
     47          bestActions.Add(aIdx);
     48        } else if (q == bestQ) {
     49          bestActions.Add(aIdx);
    4150        }
    4251      }
    43       Debug.Assert(bestAction >= 0);
    44       return bestAction;
     52      Debug.Assert(bestActions.Any());
     53      return bestActions.SelectRandom(random);
    4554    }
    4655
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs

    r11747 r11792  
    3333        get {
    3434          if (Disabled) return knownValue;
    35           if(Tries == 0.0) return 0.0;
     35          if (Tries == 0.0) return 0.0;
    3636          return rewardHistogram[thresholdBin] / (double)Tries;
    3737        }
     
    9999      UpdateThreshold(myActionInfos);
    100100
    101       int bestAction = -1;
     101      var bestActions = new List<int>();
    102102      double bestQ = double.NegativeInfinity;
    103103      int k = myActionInfos.Count(a => !a.Disabled);
     
    107107        aIdx++;
    108108        if (aInfo.Disabled) continue;
    109         if (aInfo.Tries == 0) return aIdx;
    110         double mu = aInfo.Value; // probability of rewards > T
    111         double q = U(mu, totalTries, aInfo.Tries, k);          // totalTries is max iterations in original paper
     109        double q;
     110        if (aInfo.Tries == 0) {
     111          q = double.PositiveInfinity;
     112        } else {
     113          double mu = aInfo.Value; // probability of rewards > T
     114          q = U(mu, totalTries, aInfo.Tries, k); // totalTries is max iterations in original paper
     115        }
    112116        if (q > bestQ) {
    113117          bestQ = q;
    114           bestAction = aIdx;
     118          bestActions.Clear();
     119          bestActions.Add(aIdx);
     120        } else if (q == bestQ) {
     121          bestActions.Add(aIdx);
    115122        }
    116123      }
    117       Debug.Assert(bestAction > -1);
    118       return bestAction;
     124      Debug.Assert(bestActions.Any());
     125      return bestActions.SelectRandom(random);
    119126    }
    120127
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs

    r11742 r11792  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    89namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     
    1213    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1314      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    14       int bestAction = -1;
    15       double bestQ = double.NegativeInfinity;
     15
    1616      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    1717
    1818      int aIdx = -1;
     19      double bestQ = double.NegativeInfinity;
     20      var bestActions = new List<int>();
    1921      foreach (var aInfo in myActionInfos) {
    2022        aIdx++;
    2123        if (aInfo.Disabled) continue;
    22         if (aInfo.Tries == 0) return aIdx;
     24        double q;
     25        if (aInfo.Tries == 0) {
     26          q = double.PositiveInfinity;
     27        } else {
     28          var sumReward = aInfo.SumReward;
     29          var tries = aInfo.Tries;
    2330
    24         var sumReward = aInfo.SumReward;
    25         var tries = aInfo.Tries;
    26 
    27         var avgReward = sumReward / tries;
    28         var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); // 1/4 is upper bound of bernoulli distributed variable
     31          var avgReward = sumReward / tries;
     32          q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries)));
     33          // 1/4 is upper bound of bernoulli distributed variable
     34        }
    2935        if (q > bestQ) {
    3036          bestQ = q;
    31           bestAction = aIdx;
     37          bestActions.Clear();
     38          bestActions.Add(aIdx);
     39        } else if (q == bestQ) {
     40          bestActions.Add(aIdx);
    3241        }
    3342      }
    34       Debug.Assert(bestAction > -1);
    35       return bestAction;
     43      Debug.Assert(bestActions.Any());
     44
     45      return bestActions.SelectRandom(random);
    3646    }
    3747
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCBNormalPolicy.cs

    r11742 r11792  
    55using System.Text;
    66using System.Threading.Tasks;
     7using HeuristicLab.Common;
    78
    89namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
     
    1112    public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
    1213      var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
    13       int bestAction = -1;
     14      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    1415      double bestQ = double.NegativeInfinity;
    15       int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
    1616      int aIdx = -1;
     17      var bestActions = new List<int>();
    1718      foreach (var aInfo in myActionInfos) {
    1819        aIdx++;
    1920        if (aInfo.Disabled) continue;
    20         if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return aIdx;
    21 
    22         var tries = aInfo.Tries;
    23         var avgReward = aInfo.AvgReward;
    24         var rewardVariance = aInfo.RewardVariance;
    25         var estVariance = 16.0 * rewardVariance * (Math.Log(totalTries - 1) / tries);
    26         var q = avgReward + Math.Sqrt(estVariance);
     21        double q;
     22        if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) {
     23          q = double.PositiveInfinity;
     24        } else {
     25          var tries = aInfo.Tries;
     26          var avgReward = aInfo.AvgReward;
     27          var rewardVariance = aInfo.RewardVariance;
     28          var estVariance = 16.0 * rewardVariance * (Math.Log(totalTries - 1) / tries);
     29          q = avgReward + Math.Sqrt(estVariance);
     30        }
    2731        if (q > bestQ) {
    2832          bestQ = q;
    29           bestAction = aIdx;
     33          bestActions.Clear();
     34          bestActions.Add(aIdx);
     35        } else if (q == bestQ) {
     36          bestActions.Add(aIdx);
    3037        }
    3138      }
    32       Debug.Assert(bestAction > -1);
    33       return bestAction;
     39      Debug.Assert(bestActions.Any());
     40      return bestActions.SelectRandom(random);
    3441    }
    3542
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs

    r11770 r11792  
    2727      out ReadonlySequence selectedState) {
    2828      // only select states that are not yet done
    29       afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a.ToString()))).ToArray();
     29      afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a))).ToArray();
    3030      if (!afterStates.Any()) {
    3131        // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
    32         throw new NotImplementedException();
    33         //var curStateCanonical = CanonicalState(curState.ToString());
    34         //if (curState.ToString().Length == curStateCanonical.Length)
    35           done.Add(CanonicalState(curState.ToString()));
     32
     33        done.Add(CanonicalState(curState));
    3634        selectedState = null;
    3735        return false;
     
    4543
    4644    private IBanditPolicyActionInfo GetStateInfo(ReadonlySequence state) {
    47       var s = CanonicalState(state.ToString());
     45      var s = CanonicalState(state);
    4846      IBanditPolicyActionInfo info;
    4947      if (!stateInfo.TryGetValue(s, out info)) {
     
    5755      // the last state could be terminal
    5856      var lastState = stateTrajectory.Last();
    59       if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));
     57      if (lastState.IsTerminal) done.Add(CanonicalState(lastState));
    6058
    6159      foreach (var state in stateTrajectory) {
     
    7068
    7169    public int GetTries(ReadonlySequence state) {
    72       var s = CanonicalState(state.ToString());
     70      var s = CanonicalState(state);
    7371      if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries;
    7472      else return 0;
     
    7674
    7775    public double GetValue(ReadonlySequence state) {
    78       var s = CanonicalState(state.ToString());
     76      var s = CanonicalState(state);
    7977      if (stateInfo.ContainsKey(s)) return stateInfo[s].Value;
    8078      else return 0.0; // TODO: check alternatives
    8179    }
    8280
    83     protected string CanonicalState(string state) {
    84       if (useCanonicalState) return problem.CanonicalRepresentation(state);
    85       else return state;
     81    protected string CanonicalState(ReadonlySequence state) {
     82      if (useCanonicalState) {
     83        if (state.IsTerminal)
     84          return problem.CanonicalRepresentation(state.ToString());
     85        else {
     86          // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
     87          // e.g. if for the ant problem we have the phrase lllS (and we are limited to 4 symbols) and lllr as well as llll are explored
     88          // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
     89          // solution: we disable the state rS4
     90          return problem.CanonicalRepresentation(state.ToString()) + state.Length;
     91        }
     92      } else
     93        return state.ToString();
    8694    }
    8795  }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

    r11742 r11792  
    2828
    2929    public void Update(double reward) {
    30       Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0));
    31       if (reward.IsAlmost(1.0)) {
     30      // Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0));
     31      if (reward > 0) {
    3232        success++;
    3333      } else {
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs

    r11770 r11792  
    5757      Reset();
    5858
    59       for (int i = 0; !Done() && i < maxIterations; i++) {
     59      for (int i = 0; bestQuality < 1.0 && !Done() && i < maxIterations; i++) {
    6060        var phrase = SampleSentence(problem.Grammar);
    6161        // can fail on the last sentence
     
    7676        }
    7777      }
    78 
    79       // clean up
    80       // Reset(); GC.Collect();
    8178    }
    8279
     
    10097      while (!phrase.IsTerminal) {
    10198
     99        var newPhrases = GenerateFollowStates(g, phrase);
     100       
     101        throw new NotImplementedException(); // TODO: reintroduce random-trie checking once the tree of all states has been reintroduced
    102102        //if (n.randomTries < randomTries) {
    103103        //  n.randomTries++;
     
    107107        //} else {
    108108
    109         var newPhrases = GenerateFollowStates(g, phrase);
    110 
    111         // => select using bandit policy
    112         // failure means we simply restart
    113         if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) {
    114           return false;
    115         }
     109
     110          // => select using bandit policy
     111          // failure means we simply restart
     112          if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) {
     113            return false;
     114          }
     115        // }
    116116        stateChain.Add(phrase);
    117117        curDepth++;
     
    125125    private readonly Dictionary<ReadonlySequence, ReadonlySequence[]> cache;
    126126    private IEnumerable<ReadonlySequence> GenerateFollowStates(IGrammar g, ReadonlySequence phrase) {
     127      throw new NotImplementedException();
     128      // TODO: Replace caching by a tree of all states. tree is only used for easily retrieving the follow-states of a state
    127129      ReadonlySequence[] follow;
    128       if (!cache.TryGetValue(phrase, out follow)) {
     130      //if (!cache.TryGetValue(phrase, out follow)) {
    129131        char nt = phrase.FirstNonTerminal;
    130132
     
    142144          follow[idx++] = new ReadonlySequence(newPhrase);
    143145        }
    144         cache[phrase] = follow;
    145       }
     146      //  cache[phrase] = follow;
     147      //}
    146148      return follow;
    147149    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs

    r11742 r11792  
    4040
    4141    public string CanonicalRepresentation(string terminalPhrase) {
     42      throw new NotImplementedException();
    4243      return terminalPhrase;
    4344    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs

    r11742 r11792  
    88    double BestKnownQuality(int maxLen);
    99    IGrammar Grammar { get; }
    10     double Evaluate(string sentence);
    11     string CanonicalRepresentation(string terminalPhrase);
     10    double Evaluate(ReadonlySequence sentence);
     11    ReadonlySequence CanonicalRepresentation(ReadonlySequence terminalPhrase);
    1212  }
    1313}
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs

    r11742 r11792  
    8181
    8282    public string CanonicalRepresentation(string terminalPhrase) {
     83      throw new NotImplementedException();
    8384      return terminalPhrase;
    8485    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSequenceProblem.cs

    r11747 r11792  
    8181
    8282    public string CanonicalRepresentation(string terminalPhrase) {
     83      throw new NotImplementedException();
    8384      return terminalPhrase;
    8485    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs

    r11770 r11792  
    7373
    7474    // right now only + and * is supported
    75     private Dictionary<string, string> cache = new Dictionary<string, string>();
     75    //private Dictionary<string, string> cache = new Dictionary<string, string>();
    7676    public string CanonicalRepresentation(string phrase) {
    7777      string res;
    78       if (!cache.TryGetValue(phrase, out res)) {
    79         var terms = phrase.Split('+').Select(t => t.Replace("*", ""));
    80         var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch)));
    81         var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch)));
     78      //if (!cache.TryGetValue(phrase, out res)) {
     79      var terms = phrase.Split('+').Select(t => t.Replace("*", ""));
     80      var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch)));
     81      var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch)));
    8282
    83         res = string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term))));
    84         cache[phrase] = res;
    85       }
     83      res = string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term))));
     84      //cache[phrase] = res;
     85      //}
    8686      return res;
    8787    }
  • branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

    r11770 r11792  
    99using HeuristicLab.Algorithms.Bandits;
    1010using HeuristicLab.Algorithms.Bandits.BanditPolicies;
     11using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
    1112using HeuristicLab.Algorithms.Bandits.Models;
    1213using HeuristicLab.Algorithms.GrammaticalOptimization;
    1314using HeuristicLab.Problems.GrammaticalOptimization;
    1415using HeuristicLab.Problems.GrammaticalOptimization.SymbReg;
     16using BoltzmannExplorationPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.BoltzmannExplorationPolicy;
     17using EpsGreedyPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.EpsGreedyPolicy;
     18using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy;
     19using UCTPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.UCTPolicy;
    1520
    1621namespace Main {
     
    2025
    2126      RunDemo();
    22       //RunGridTest();
     27      RunGridTest();
    2328    }
    2429
    2530    private static void RunGridTest() {
    26       int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet
     31      int maxIterations = 50000; // for poly-10 with 50000 evaluations no successful try with hl yet
    2732      //var globalRandom = new Random(31415);
    2833      var localRandSeed = 31415;
    29       var reps = 8;
     34      var reps = 5;
    3035
    3136      var policies = new Func<IBanditPolicy>[]
    3237        {
     38         () => new RandomPolicy(),
     39          () => new ActiveLearningPolicy(), 
    3340         () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
    3441         () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
     
    4148         //() => new BernoulliThompsonSamplingPolicy(),
    4249         () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
    43          () => new RandomPolicy(),
    4450         () => new EpsGreedyPolicy(0.01),
    4551         () => new EpsGreedyPolicy(0.05),
     
    5662         () => new UCB1TunedPolicy(),
    5763         () => new UCBNormalPolicy(),
    58          () => new BoltzmannExplorationPolicy(0.1),
    59          () => new BoltzmannExplorationPolicy(0.5),
    6064         () => new BoltzmannExplorationPolicy(1),
    61          () => new BoltzmannExplorationPolicy(5),
    6265         () => new BoltzmannExplorationPolicy(10),
    6366         () => new BoltzmannExplorationPolicy(20),
    6467         () => new BoltzmannExplorationPolicy(100),
     68         () => new BoltzmannExplorationPolicy(200),
     69         () => new BoltzmannExplorationPolicy(500),
    6570         () => new ChernoffIntervalEstimationPolicy( 0.01),
    6671         () => new ChernoffIntervalEstimationPolicy( 0.05),
     
    7580         () => new ThresholdAscentPolicy(100, 0.1),
    7681         () => new ThresholdAscentPolicy(100, 0.2),
    77          () => new ThresholdAscentPolicy(1000, 0.01),
    78          () => new ThresholdAscentPolicy(1000, 0.05),
    79          () => new ThresholdAscentPolicy(1000, 0.1),
    80          () => new ThresholdAscentPolicy(1000, 0.2),
    81          () => new ThresholdAscentPolicy(5000, 0.01),
    82          () => new ThresholdAscentPolicy(10000, 0.01),
     82         () => new ThresholdAscentPolicy(100, 0.01),
     83         () => new ThresholdAscentPolicy(100, 0.05),
     84         () => new ThresholdAscentPolicy(100, 0.1),
     85         () => new ThresholdAscentPolicy(100, 0.2),
     86         //() => new ThresholdAscentPolicy(1000, 0.01),
     87         //() => new ThresholdAscentPolicy(1000, 0.05),
     88         //() => new ThresholdAscentPolicy(1000, 0.1),
     89         //() => new ThresholdAscentPolicy(1000, 0.2),
     90         //() => new ThresholdAscentPolicy(5000, 0.01),
     91         //() => new ThresholdAscentPolicy(10000, 0.01),
    8392        };
    8493
    8594      foreach (var problem in new Tuple<IProblem, int>[]
    8695        {
    87           //Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
     96          Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
    8897          Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23),
    8998        })
    90         foreach (var randomTries in new int[] { 0, 1, 10, /* 5, 100 /*, 500, 1000 */}) {
    91           foreach (var policy in policies) {
    92             var myRandomTries = randomTries;
    93             var localRand = new Random(localRandSeed);
    94             var options = new ParallelOptions();
    95             options.MaxDegreeOfParallelism = 4;
    96             Parallel.For(0, reps, options, (i) => {
    97               //var t = Task.Run(() => {
    98               Random myLocalRand;
    99               lock (localRand)
    100                 myLocalRand = new Random(localRand.Next());
    101 
    102               //for (int i = 0; i < reps; i++) {
    103 
    104               int iterations = 0;
    105               var globalStatistics = new SentenceSetStatistics();
    106 
    107               // var problem = new SymbolicRegressionPoly10Problem();
    108               // var problem = new SantaFeAntProblem();
    109               //var problem = new PalindromeProblem();
    110               //var problem = new HardPalindromeProblem();
    111               //var problem = new RoyalPairProblem();
    112               //var problem = new EvenParityProblem();
    113               var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each experiment
    114               //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
    115               //var alg = new AlternativesContextSampler(problem, 25);
    116 
    117               alg.SolutionEvaluated += (sentence, quality) => {
    118                 iterations++;
    119                 globalStatistics.AddSentence(sentence, quality);
    120                 if (iterations % 10000 == 0) {
    121                   Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, policy(), globalStatistics);
    122                 }
    123               };
    124 
    125 
    126               alg.Run(maxIterations);
    127 
    128               //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
    129               //}
    130               //});
    131               //tasks.Add(t);
    132             });
     99        foreach (var useCanonical in new bool[] { true, false })
     100          foreach (var randomTries in new int[] { 0, /*1, 10, /* 5, 100 /*, 500, 1000 */}) {
     101            foreach (var policy in policies) {
     102              var myRandomTries = randomTries;
     103              var localRand = new Random(localRandSeed);
     104              var options = new ParallelOptions();
     105              options.MaxDegreeOfParallelism = 1;
     106              Parallel.For(0, reps, options, (i) => {
     107                //var t = Task.Run(() => {
     108                Random myLocalRand;
     109                lock (localRand)
     110                  myLocalRand = new Random(localRand.Next());
     111
     112                //for (int i = 0; i < reps; i++) {
     113
     114                int iterations = 0;
     115                var globalStatistics = new SentenceSetStatistics();
     116
     117                // var problem = new SymbolicRegressionPoly10Problem();
     118                // var problem = new SantaFeAntProblem();
     119                //var problem = new PalindromeProblem();
     120                //var problem = new HardPalindromeProblem();
     121                //var problem = new RoyalPairProblem();
     122                //var problem = new EvenParityProblem();
     123                // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each
     124                var alg = new SequentialSearch(problem.Item1, problem.Item2, myLocalRand, myRandomTries, new GenericGrammarPolicy(problem.Item1, policy(), useCanonical));
     125                //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
     126                //var alg = new AlternativesContextSampler(problem, 25);
     127
     128                alg.SolutionEvaluated += (sentence, quality) => {
     129                  iterations++;
     130                  globalStatistics.AddSentence(sentence, quality);
     131                  if (iterations % 1000 == 0) {
     132                    Console.WriteLine("{0,5} {1,25} {2} {3}", myRandomTries, policy(), useCanonical, globalStatistics);
     133                  }
     134                };
     135                alg.FoundNewBestSolution += (sentence, quality) => {
     136                  Console.WriteLine("{0,5} {1,25} {2} {3}", myRandomTries, policy(), useCanonical, globalStatistics);
     137                };
     138
     139
     140                alg.Run(maxIterations);
     141
     142                //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
     143                //}
     144                //});
     145                //tasks.Add(t);
     146              });
     147            }
    133148          }
    134         }
    135149      //Task.WaitAll(tasks.ToArray());
    136150    }
    137151
    138152    private static void RunDemo() {
     153      // TODO: clone problem for parallel grid test
    139154      // TODO: move problem instances into a separate folder
    140155      // TODO: improve performance of SequentialSearch (memory allocations related to sequences)
     
    176191      // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0.0, phrasesAsSets: true);
    177192
    178       //var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
     193      var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
    179194      // Ant
    180195      // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
     
    183198      // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
    184199
    185       var problem = new SantaFeAntProblem();
     200      //var problem = new SantaFeAntProblem();
    186201      //var problem = new SymbolicRegressionProblem("Tower");
    187202      //var problem = new PalindromeProblem();
     
    192207      //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
    193208      //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
    194       var alg = new SequentialSearch(problem, 10, random, 0,
    195         new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new GaussianThompsonSamplingPolicy(true), true));
     209      var alg = new SequentialSearch(problem, 23, random, 0,
     210        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.2), true));
    196211      //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
    197212      //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
Note: See TracChangeset for help on using the changeset viewer.