Context Navigation

← Previous Change
Next Change →

Changeset 11792 for branches/HeuristicLab.Problems.GrammaticalOptimization

Timestamp:

01/16/15 18:26:35 (10 years ago)

Author:

gkronber

Message:

#2283 work-in-progress commit (does not compile)

Location:

branches/HeuristicLab.Problems.GrammaticalOptimization

Files:

: 14 edited

HeuristicLab.Algorithms.Bandits/BanditPolicies/ActiveLearningPolicy.cs (modified) (2 diffs)
HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs (modified) (2 diffs)
HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs (modified) (3 diffs)
HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs (modified) (2 diffs)
HeuristicLab.Algorithms.Bandits/BanditPolicies/UCBNormalPolicy.cs (modified) (2 diffs)
HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs (modified) (5 diffs)
HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs (modified) (1 diff)
HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs (modified) (6 diffs)
HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/RoyalSequenceProblem.cs (modified) (1 diff)
HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs (modified) (1 diff)
Main/Program.cs (modified) (8 diffs)

Legend:

: Unmodified
: Added
: Removed

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ActiveLearningPolicy.cs

-                      r11747
+                      r11792
     public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
       var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>();
-      double bestQ = double.NegativeInfinity;
       int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
       const double delta = 0.1;
 …
         double l;
         if (aInfo.Tries == 0) {
           u = 1.0;
           l = 0.0;
+          u = double.PositiveInfinity;
+          l = double.NegativeInfinity;
         } else {
           q = aInfo.SumReward / aInfo.Tries;

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs

-                      r11742
+                      r11792
 using System.Text;
 using System.Threading.Tasks;
+using HeuristicLab.Common;
 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
 …
       int k = myActionInfos.Count(a => !a.Disabled);
       int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
-      int bestAction = -1;
       double bestQ = double.NegativeInfinity;
+      var bestActions = new List<int>();
       var aIdx = -1;
       foreach (var aInfo in myActionInfos) {
         aIdx++;
         if (aInfo.Disabled) continue;
+        if (aInfo.Tries == 0) return aIdx;
+        double q;
+        if (aInfo.Tries == 0) {
+          q = double.PositiveInfinity;
+        } else {
         var avgReward = aInfo.SumReward / aInfo.Tries;
+          var avgReward = aInfo.SumReward / aInfo.Tries;
+        // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
+        // var alpha = Math.Log(2 * totalTries * k / delta);
+        double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper
+        var q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries;
+          // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem"
+          // var alpha = Math.Log(2 * totalTries * k / delta);
+          double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta);
+          // total tries is max tries in the original paper
+          q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries;
+        }
         if (q > bestQ) {
           bestQ = q;
+          bestAction = aIdx;
+          bestActions.Clear();
+          bestActions.Add(aIdx);
+        } else if (q == bestQ) {
+          bestActions.Add(aIdx);
+        }
+      }
       Debug.Assert(bestAction >= 0);
       return bestAction;
+      Debug.Assert(bestActions.Any());
+      return bestActions.SelectRandom(random);
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs

-                      r11747
+                      r11792
         get {
           if (Disabled) return knownValue;
           if(Tries == 0.0) return 0.0;
+          if (Tries == 0.0) return 0.0;
           return rewardHistogram[thresholdBin] / (double)Tries;
+        }
 …
       UpdateThreshold(myActionInfos);
       int bestAction = -1;
+      var bestActions = new List<int>();
       double bestQ = double.NegativeInfinity;
       int k = myActionInfos.Count(a => !a.Disabled);
 …
         aIdx++;
         if (aInfo.Disabled) continue;
+        if (aInfo.Tries == 0) return aIdx;
+        double mu = aInfo.Value; // probability of rewards > T
+        double q = U(mu, totalTries, aInfo.Tries, k);          // totalTries is max iterations in original paper
+        double q;
+        if (aInfo.Tries == 0) {
+          q = double.PositiveInfinity;
+        } else {
+          double mu = aInfo.Value; // probability of rewards > T
+          q = U(mu, totalTries, aInfo.Tries, k); // totalTries is max iterations in original paper
+        }
         if (q > bestQ) {
           bestQ = q;
+          bestAction = aIdx;
+          bestActions.Clear();
+          bestActions.Add(aIdx);
+        } else if (q == bestQ) {
+          bestActions.Add(aIdx);
+        }
+      }
       Debug.Assert(bestAction > -1);
       return bestAction;
+      Debug.Assert(bestActions.Any());
+      return bestActions.SelectRandom(random);
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs

-                      r11742
+                      r11792
 using System.Text;
 using System.Threading.Tasks;
+using HeuristicLab.Common;
 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
 …
     public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
       var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
+      int bestAction = -1;
+      double bestQ = double.NegativeInfinity;
       int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
       int aIdx = -1;
+      double bestQ = double.NegativeInfinity;
+      var bestActions = new List<int>();
       foreach (var aInfo in myActionInfos) {
         aIdx++;
         if (aInfo.Disabled) continue;
+        if (aInfo.Tries == 0) return aIdx;
+        double q;
+        if (aInfo.Tries == 0) {
+          q = double.PositiveInfinity;
+        } else {
+          var sumReward = aInfo.SumReward;
+          var tries = aInfo.Tries;
+        var sumReward = aInfo.SumReward;
+        var tries = aInfo.Tries;
+        var avgReward = sumReward / tries;
+        var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); // 1/4 is upper bound of bernoulli distributed variable
+          var avgReward = sumReward / tries;
+          q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries)));
+          // 1/4 is upper bound of bernoulli distributed variable
+        }
         if (q > bestQ) {
           bestQ = q;
+          bestAction = aIdx;
+          bestActions.Clear();
+          bestActions.Add(aIdx);
+        } else if (q == bestQ) {
+          bestActions.Add(aIdx);
+        }
+      }
+      Debug.Assert(bestAction > -1);
+      return bestAction;
+      Debug.Assert(bestActions.Any());
+      return bestActions.SelectRandom(random);
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCBNormalPolicy.cs

-                      r11742
+                      r11792
 using System.Text;
 using System.Threading.Tasks;
+using HeuristicLab.Common;
 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies {
 …
     public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) {
       var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>();
       int bestAction = -1;
+      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
       double bestQ = double.NegativeInfinity;
-      int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);
       int aIdx = -1;
+      var bestActions = new List<int>();
       foreach (var aInfo in myActionInfos) {
         aIdx++;
         if (aInfo.Disabled) continue;
+        if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return aIdx;
+        var tries = aInfo.Tries;
+        var avgReward = aInfo.AvgReward;
+        var rewardVariance = aInfo.RewardVariance;
+        var estVariance = 16.0 * rewardVariance * (Math.Log(totalTries - 1) / tries);
+        var q = avgReward + Math.Sqrt(estVariance);
+        double q;
+        if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) {
+          q = double.PositiveInfinity;
+        } else {
+          var tries = aInfo.Tries;
+          var avgReward = aInfo.AvgReward;
+          var rewardVariance = aInfo.RewardVariance;
+          var estVariance = 16.0 * rewardVariance * (Math.Log(totalTries - 1) / tries);
+          q = avgReward + Math.Sqrt(estVariance);
+        }
         if (q > bestQ) {
           bestQ = q;
+          bestAction = aIdx;
+          bestActions.Clear();
+          bestActions.Add(aIdx);
+        } else if (q == bestQ) {
+          bestActions.Add(aIdx);
+        }
+      }
       Debug.Assert(bestAction > -1);
       return bestAction;
+      Debug.Assert(bestActions.Any());
+      return bestActions.SelectRandom(random);
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs

-                      r11770
+                      r11792
       out ReadonlySequence selectedState) {
       // only select states that are not yet done
       afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a.ToString()))).ToArray();
+      afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a))).ToArray();
       if (!afterStates.Any()) {
         // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored)
+        throw new NotImplementedException();
+        //var curStateCanonical = CanonicalState(curState.ToString());
+        //if (curState.ToString().Length == curStateCanonical.Length)
+          done.Add(CanonicalState(curState.ToString()));
+        done.Add(CanonicalState(curState));
         selectedState = null;
         return false;
 …
     private IBanditPolicyActionInfo GetStateInfo(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+      var s = CanonicalState(state);
       IBanditPolicyActionInfo info;
       if (!stateInfo.TryGetValue(s, out info)) {
 …
       // the last state could be terminal
       var lastState = stateTrajectory.Last();
       if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));
+      if (lastState.IsTerminal) done.Add(CanonicalState(lastState));
       foreach (var state in stateTrajectory) {
 …
     public int GetTries(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+      var s = CanonicalState(state);
       if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries;
       else return 0;
 …
     public double GetValue(ReadonlySequence state) {
       var s = CanonicalState(state.ToString());
+      var s = CanonicalState(state);
       if (stateInfo.ContainsKey(s)) return stateInfo[s].Value;
       else return 0.0; // TODO: check alternatives
+    }
+    protected string CanonicalState(string state) {
+      if (useCanonicalState) return problem.CanonicalRepresentation(state);
+      else return state;
+    protected string CanonicalState(ReadonlySequence state) {
+      if (useCanonicalState) {
+        if (state.IsTerminal)
+          return problem.CanonicalRepresentation(state.ToString());
+        else {
+          // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored
+          // e.g. if for the ant problem we have the phrase lllS (and we are limited to 4 symbols) and lllr as well as llll are explored
+          // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored
+          // solution: we disable the state rS4
+          return problem.CanonicalRepresentation(state.ToString()) + state.Length;
+        }
+      } else
+        return state.ToString();
+    }
+  }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs

-                      r11742
+                      r11792
     public void Update(double reward) {
       Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0));
       if (reward.IsAlmost(1.0)) {
+      // Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0));
+      if (reward > 0) {
         success++;
       } else {

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs

-                      r11770
+                      r11792
       Reset();
       for (int i = 0; !Done() && i < maxIterations; i++) {
+      for (int i = 0; bestQuality < 1.0 && !Done() && i < maxIterations; i++) {
         var phrase = SampleSentence(problem.Grammar);
         // can fail on the last sentence
 …
+        }
+      }
-      // clean up
-      // Reset(); GC.Collect();
+    }
 …
       while (!phrase.IsTerminal) {
+        var newPhrases = GenerateFollowStates(g, phrase);
+        throw new NotImplementedException(); // TODO: reintroduce random-trie checking once the tree of all states has been reintroduced
         //if (n.randomTries < randomTries) {
         //  n.randomTries++;
 …
         //} else {
+        var newPhrases = GenerateFollowStates(g, phrase);
         // => select using bandit policy
         // failure means we simply restart
         if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) {
           return false;
+        }
+          // => select using bandit policy
+          // failure means we simply restart
+          if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) {
+            return false;
+          }
+        // }
         stateChain.Add(phrase);
         curDepth++;
 …
     private readonly Dictionary<ReadonlySequence, ReadonlySequence[]> cache;
     private IEnumerable<ReadonlySequence> GenerateFollowStates(IGrammar g, ReadonlySequence phrase) {
+      throw new NotImplementedException();
+      // TODO: Replace caching by a tree of all states. tree is only used for easily retrieving the follow-states of a state
       ReadonlySequence[] follow;
       if (!cache.TryGetValue(phrase, out follow)) {
+      //if (!cache.TryGetValue(phrase, out follow)) {
         char nt = phrase.FirstNonTerminal;
 …
           follow[idx++] = new ReadonlySequence(newPhrase);
+        }
         cache[phrase] = follow;
+      }
+      //  cache[phrase] = follow;
+      //}
       return follow;
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs

r11742	r11792
40	40
41	41	public string CanonicalRepresentation(string terminalPhrase) {
	42	throw new NotImplementedException();
42	43	return terminalPhrase;
43	44	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs

r11742	r11792
8	8	double BestKnownQuality(int maxLen);
9	9	IGrammar Grammar { get; }
10		double Evaluate(~~string~~ sentence);
11		~~string CanonicalRepresentation(string~~ terminalPhrase);
	10	double Evaluate(ReadonlySequence sentence);
	11	ReadonlySequence CanonicalRepresentation(ReadonlySequence terminalPhrase);
12	12	}
13	13	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs

r11742	r11792
81	81
82	82	public string CanonicalRepresentation(string terminalPhrase) {
	83	throw new NotImplementedException();
83	84	return terminalPhrase;
84	85	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSequenceProblem.cs

r11747	r11792
81	81
82	82	public string CanonicalRepresentation(string terminalPhrase) {
	83	throw new NotImplementedException();
83	84	return terminalPhrase;
84	85	}

branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs

-                      r11770
+                      r11792
     // right now only + and * is supported
     private Dictionary<string, string> cache = new Dictionary<string, string>();
+    //private Dictionary<string, string> cache = new Dictionary<string, string>();
     public string CanonicalRepresentation(string phrase) {
       string res;
       if (!cache.TryGetValue(phrase, out res)) {
         var terms = phrase.Split('+').Select(t => t.Replace("*", ""));
         var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch)));
         var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch)));
+      //if (!cache.TryGetValue(phrase, out res)) {
+      var terms = phrase.Split('+').Select(t => t.Replace("*", ""));
+      var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch)));
+      var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch)));
         res = string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term))));
         cache[phrase] = res;
+      }
+      res = string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term))));
+      //cache[phrase] = res;
+      //}
       return res;
+    }

branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs

-                      r11770
+                      r11792
 using HeuristicLab.Algorithms.Bandits;
 using HeuristicLab.Algorithms.Bandits.BanditPolicies;
+using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
 using HeuristicLab.Algorithms.Bandits.Models;
 using HeuristicLab.Algorithms.GrammaticalOptimization;
 using HeuristicLab.Problems.GrammaticalOptimization;
 using HeuristicLab.Problems.GrammaticalOptimization.SymbReg;
+using BoltzmannExplorationPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.BoltzmannExplorationPolicy;
+using EpsGreedyPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.EpsGreedyPolicy;
+using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy;
+using UCTPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.UCTPolicy;
 namespace Main {
 …
       RunDemo();
       //RunGridTest();
+      RunGridTest();
+    }
     private static void RunGridTest() {
       int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet
+      int maxIterations = 50000; // for poly-10 with 50000 evaluations no successful try with hl yet
       //var globalRandom = new Random(31415);
       var localRandSeed = 31415;
       var reps = 8;
+      var reps = 5;
       var policies = new Func<IBanditPolicy>[]
+        {
+         () => new RandomPolicy(),
+          () => new ActiveLearningPolicy(),
          () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
          () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
 …
          //() => new BernoulliThompsonSamplingPolicy(),
          () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
-         () => new RandomPolicy(),
          () => new EpsGreedyPolicy(0.01),
          () => new EpsGreedyPolicy(0.05),
 …
          () => new UCB1TunedPolicy(),
          () => new UCBNormalPolicy(),
-         () => new BoltzmannExplorationPolicy(0.1),
-         () => new BoltzmannExplorationPolicy(0.5),
          () => new BoltzmannExplorationPolicy(1),
-         () => new BoltzmannExplorationPolicy(5),
          () => new BoltzmannExplorationPolicy(10),
          () => new BoltzmannExplorationPolicy(20),
          () => new BoltzmannExplorationPolicy(100),
+         () => new BoltzmannExplorationPolicy(200),
+         () => new BoltzmannExplorationPolicy(500),
          () => new ChernoffIntervalEstimationPolicy( 0.01),
          () => new ChernoffIntervalEstimationPolicy( 0.05),
 …
          () => new ThresholdAscentPolicy(100, 0.1),
          () => new ThresholdAscentPolicy(100, 0.2),
+         () => new ThresholdAscentPolicy(1000, 0.01),
+         () => new ThresholdAscentPolicy(1000, 0.05),
+         () => new ThresholdAscentPolicy(1000, 0.1),
+         () => new ThresholdAscentPolicy(1000, 0.2),
+         () => new ThresholdAscentPolicy(5000, 0.01),
+         () => new ThresholdAscentPolicy(10000, 0.01),
+         () => new ThresholdAscentPolicy(100, 0.01),
+         () => new ThresholdAscentPolicy(100, 0.05),
+         () => new ThresholdAscentPolicy(100, 0.1),
+         () => new ThresholdAscentPolicy(100, 0.2),
+         //() => new ThresholdAscentPolicy(1000, 0.01),
+         //() => new ThresholdAscentPolicy(1000, 0.05),
+         //() => new ThresholdAscentPolicy(1000, 0.1),
+         //() => new ThresholdAscentPolicy(1000, 0.2),
+         //() => new ThresholdAscentPolicy(5000, 0.01),
+         //() => new ThresholdAscentPolicy(10000, 0.01),
         };
       foreach (var problem in new Tuple<IProblem, int>[]
+        {
           //Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
+          Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
           Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23),
         })
+        foreach (var randomTries in new int[] { 0, 1, 10, /* 5, 100 /*, 500, 1000 */}) {
+          foreach (var policy in policies) {
+            var myRandomTries = randomTries;
+            var localRand = new Random(localRandSeed);
+            var options = new ParallelOptions();
+            options.MaxDegreeOfParallelism = 4;
+            Parallel.For(0, reps, options, (i) => {
+              //var t = Task.Run(() => {
+              Random myLocalRand;
+              lock (localRand)
+                myLocalRand = new Random(localRand.Next());
+              //for (int i = 0; i < reps; i++) {
+              int iterations = 0;
+              var globalStatistics = new SentenceSetStatistics();
+              // var problem = new SymbolicRegressionPoly10Problem();
+              // var problem = new SantaFeAntProblem();
+              //var problem = new PalindromeProblem();
+              //var problem = new HardPalindromeProblem();
+              //var problem = new RoyalPairProblem();
+              //var problem = new EvenParityProblem();
+              var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each experiment
+              //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
+              //var alg = new AlternativesContextSampler(problem, 25);
+              alg.SolutionEvaluated += (sentence, quality) => {
+                iterations++;
+                globalStatistics.AddSentence(sentence, quality);
+                if (iterations % 10000 == 0) {
+                  Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, policy(), globalStatistics);
+                }
+              };
+              alg.Run(maxIterations);
+              //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
+              //}
+              //});
+              //tasks.Add(t);
+            });
+        foreach (var useCanonical in new bool[] { true, false })
+          foreach (var randomTries in new int[] { 0, /*1, 10, /* 5, 100 /*, 500, 1000 */}) {
+            foreach (var policy in policies) {
+              var myRandomTries = randomTries;
+              var localRand = new Random(localRandSeed);
+              var options = new ParallelOptions();
+              options.MaxDegreeOfParallelism = 1;
+              Parallel.For(0, reps, options, (i) => {
+                //var t = Task.Run(() => {
+                Random myLocalRand;
+                lock (localRand)
+                  myLocalRand = new Random(localRand.Next());
+                //for (int i = 0; i < reps; i++) {
+                int iterations = 0;
+                var globalStatistics = new SentenceSetStatistics();
+                // var problem = new SymbolicRegressionPoly10Problem();
+                // var problem = new SantaFeAntProblem();
+                //var problem = new PalindromeProblem();
+                //var problem = new HardPalindromeProblem();
+                //var problem = new RoyalPairProblem();
+                //var problem = new EvenParityProblem();
+                // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each
+                var alg = new SequentialSearch(problem.Item1, problem.Item2, myLocalRand, myRandomTries, new GenericGrammarPolicy(problem.Item1, policy(), useCanonical));
+                //var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
+                //var alg = new AlternativesContextSampler(problem, 25);
+                alg.SolutionEvaluated += (sentence, quality) => {
+                  iterations++;
+                  globalStatistics.AddSentence(sentence, quality);
+                  if (iterations % 1000 == 0) {
+                    Console.WriteLine("{0,5} {1,25} {2} {3}", myRandomTries, policy(), useCanonical, globalStatistics);
+                  }
+                };
+                alg.FoundNewBestSolution += (sentence, quality) => {
+                  Console.WriteLine("{0,5} {1,25} {2} {3}", myRandomTries, policy(), useCanonical, globalStatistics);
+                };
+                alg.Run(maxIterations);
+                //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
+                //}
+                //});
+                //tasks.Add(t);
+              });
+            }
+          }
+        }
       //Task.WaitAll(tasks.ToArray());
+    }
     private static void RunDemo() {
+      // TODO: clone problem for parallel grid test
       // TODO: move problem instances into a separate folder
       // TODO: improve performance of SequentialSearch (memory allocations related to sequences)
 …
       // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0.0, phrasesAsSets: true);
       //var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
+      var problem = new SymbolicRegressionPoly10Problem();   // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
       // Ant
       // good results e.g. with       var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
 …
       // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
       var problem = new SantaFeAntProblem();
+      //var problem = new SantaFeAntProblem();
       //var problem = new SymbolicRegressionProblem("Tower");
       //var problem = new PalindromeProblem();
 …
       //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
       //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
       var alg = new SequentialSearch(problem, 10, random, 0,
         new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new GaussianThompsonSamplingPolicy(true), true));
+      var alg = new SequentialSearch(problem, 23, random, 0,
+        new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.2), true));
       //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
       //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11792 for branches/HeuristicLab.Problems.GrammaticalOptimization

Legend:

Download in other formats: