Changeset 11792 for branches/HeuristicLab.Problems.GrammaticalOptimization
- Timestamp:
- 01/16/15 18:26:35 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ActiveLearningPolicy.cs
r11747 r11792 11 11 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 12 12 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 13 double bestQ = double.NegativeInfinity;14 13 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 15 14 const double delta = 0.1; … … 26 25 double l; 27 26 if (aInfo.Tries == 0) { 28 u = 1.0;29 l = 0.0;27 u = double.PositiveInfinity; 28 l = double.NegativeInfinity; 30 29 } else { 31 30 q = aInfo.SumReward / aInfo.Tries; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs
r11742 r11792 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 7 8 8 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { … … 22 23 int k = myActionInfos.Count(a => !a.Disabled); 23 24 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 24 int bestAction = -1;25 25 double bestQ = double.NegativeInfinity; 26 var bestActions = new List<int>(); 26 27 var aIdx = -1; 27 28 foreach (var aInfo in myActionInfos) { 28 29 aIdx++; 29 30 if (aInfo.Disabled) continue; 30 if (aInfo.Tries == 0) return aIdx; 31 double q; 32 if (aInfo.Tries == 0) { 33 q = double.PositiveInfinity; 34 } else { 31 35 32 var avgReward = aInfo.SumReward / aInfo.Tries;36 var avgReward = aInfo.SumReward / aInfo.Tries; 33 37 34 // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem" 35 // var alpha = Math.Log(2 * totalTries * k / delta); 36 double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); // total tries is max tries in the original paper 37 var q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries; 38 // page 5 of "A simple distribution-free appraoch to the max k-armed bandit problem" 39 // var alpha = Math.Log(2 * totalTries * k / delta); 40 double alpha = Math.Log(2.0) + Math.Log(totalTries) + Math.Log(k) - Math.Log(delta); 41 // total tries is max tries in the original paper 42 q = avgReward + (alpha + Math.Sqrt(2 * aInfo.Tries * avgReward * alpha + alpha * alpha)) / aInfo.Tries; 43 } 38 44 if (q > bestQ) { 39 45 bestQ = q; 40 bestAction = aIdx; 46 bestActions.Clear(); 47 bestActions.Add(aIdx); 48 } else if (q == bestQ) { 49 bestActions.Add(aIdx); 41 50 } 42 51 } 43 Debug.Assert(bestAction >= 0);44 return bestAction ;52 Debug.Assert(bestActions.Any()); 53 return bestActions.SelectRandom(random); 45 54 } 46 55 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs
r11747 r11792 33 33 get { 34 34 if (Disabled) return knownValue; 35 if (Tries == 0.0) return 0.0;35 if (Tries == 0.0) return 0.0; 36 36 return rewardHistogram[thresholdBin] / (double)Tries; 37 37 } … … 99 99 UpdateThreshold(myActionInfos); 100 100 101 int bestAction = -1;101 var bestActions = new List<int>(); 102 102 double bestQ = double.NegativeInfinity; 103 103 int k = myActionInfos.Count(a => !a.Disabled); … … 107 107 aIdx++; 108 108 if (aInfo.Disabled) continue; 109 if (aInfo.Tries == 0) return aIdx; 110 double mu = aInfo.Value; // probability of rewards > T 111 double q = U(mu, totalTries, aInfo.Tries, k); // totalTries is max iterations in original paper 109 double q; 110 if (aInfo.Tries == 0) { 111 q = double.PositiveInfinity; 112 } else { 113 double mu = aInfo.Value; // probability of rewards > T 114 q = U(mu, totalTries, aInfo.Tries, k); // totalTries is max iterations in original paper 115 } 112 116 if (q > bestQ) { 113 117 bestQ = q; 114 bestAction = aIdx; 118 bestActions.Clear(); 119 bestActions.Add(aIdx); 120 } else if (q == bestQ) { 121 bestActions.Add(aIdx); 115 122 } 116 123 } 117 Debug.Assert(bestAction > -1);118 return bestAction ;124 Debug.Assert(bestActions.Any()); 125 return bestActions.SelectRandom(random); 119 126 } 120 127 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs
r11742 r11792 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 7 8 8 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { … … 12 13 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 13 14 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>(); 14 int bestAction = -1; 15 double bestQ = double.NegativeInfinity; 15 16 16 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 17 17 18 18 int aIdx = -1; 19 double bestQ = double.NegativeInfinity; 20 var bestActions = new List<int>(); 19 21 foreach (var aInfo in myActionInfos) { 20 22 aIdx++; 21 23 if (aInfo.Disabled) continue; 22 if (aInfo.Tries == 0) return aIdx; 24 double q; 25 if (aInfo.Tries == 0) { 26 q = double.PositiveInfinity; 27 } else { 28 var sumReward = aInfo.SumReward; 29 var tries = aInfo.Tries; 23 30 24 var sumReward = aInfo.SumReward; 25 var tries = aInfo.Tries; 26 27 var avgReward = sumReward / tries; 28 var q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); // 1/4 is upper bound of bernoulli distributed variable 31 var avgReward = sumReward / tries; 32 q = avgReward + Math.Sqrt((Math.Log(totalTries) / tries) * Math.Min(1.0 / 4, V(aInfo, totalTries))); 33 // 1/4 is upper bound of bernoulli distributed variable 34 } 29 35 if (q > bestQ) { 30 36 bestQ = q; 31 bestAction = aIdx; 37 bestActions.Clear(); 38 bestActions.Add(aIdx); 39 } else if (q == bestQ) { 40 bestActions.Add(aIdx); 32 41 } 33 42 } 34 Debug.Assert(bestAction > -1); 35 return bestAction; 43 Debug.Assert(bestActions.Any()); 44 45 return bestActions.SelectRandom(random); 36 46 } 37 47 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCBNormalPolicy.cs
r11742 r11792 5 5 using System.Text; 6 6 using System.Threading.Tasks; 7 using HeuristicLab.Common; 7 8 8 9 namespace HeuristicLab.Algorithms.Bandits.BanditPolicies { … … 11 12 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 12 13 var myActionInfos = actionInfos.OfType<MeanAndVariancePolicyActionInfo>(); 13 int bestAction = -1;14 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries); 14 15 double bestQ = double.NegativeInfinity; 15 int totalTries = myActionInfos.Where(a => !a.Disabled).Sum(a => a.Tries);16 16 int aIdx = -1; 17 var bestActions = new List<int>(); 17 18 foreach (var aInfo in myActionInfos) { 18 19 aIdx++; 19 20 if (aInfo.Disabled) continue; 20 if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) return aIdx; 21 22 var tries = aInfo.Tries; 23 var avgReward = aInfo.AvgReward; 24 var rewardVariance = aInfo.RewardVariance; 25 var estVariance = 16.0 * rewardVariance * (Math.Log(totalTries - 1) / tries); 26 var q = avgReward + Math.Sqrt(estVariance); 21 double q; 22 if (totalTries <= 1 || aInfo.Tries <= 1 || aInfo.Tries <= Math.Ceiling(8 * Math.Log(totalTries))) { 23 q = double.PositiveInfinity; 24 } else { 25 var tries = aInfo.Tries; 26 var avgReward = aInfo.AvgReward; 27 var rewardVariance = aInfo.RewardVariance; 28 var estVariance = 16.0 * rewardVariance * (Math.Log(totalTries - 1) / tries); 29 q = avgReward + Math.Sqrt(estVariance); 30 } 27 31 if (q > bestQ) { 28 32 bestQ = q; 29 bestAction = aIdx; 33 bestActions.Clear(); 34 bestActions.Add(aIdx); 35 } else if (q == bestQ) { 36 bestActions.Add(aIdx); 30 37 } 31 38 } 32 Debug.Assert(bestAction > -1);33 return bestAction ;39 Debug.Assert(bestActions.Any()); 40 return bestActions.SelectRandom(random); 34 41 } 35 42 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs
r11770 r11792 27 27 out ReadonlySequence selectedState) { 28 28 // only select states that are not yet done 29 afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a .ToString()))).ToArray();29 afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a))).ToArray(); 30 30 if (!afterStates.Any()) { 31 31 // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored) 32 throw new NotImplementedException(); 33 //var curStateCanonical = CanonicalState(curState.ToString()); 34 //if (curState.ToString().Length == curStateCanonical.Length) 35 done.Add(CanonicalState(curState.ToString())); 32 33 done.Add(CanonicalState(curState)); 36 34 selectedState = null; 37 35 return false; … … 45 43 46 44 private IBanditPolicyActionInfo GetStateInfo(ReadonlySequence state) { 47 var s = CanonicalState(state .ToString());45 var s = CanonicalState(state); 48 46 IBanditPolicyActionInfo info; 49 47 if (!stateInfo.TryGetValue(s, out info)) { … … 57 55 // the last state could be terminal 58 56 var lastState = stateTrajectory.Last(); 59 if (lastState.IsTerminal) done.Add(CanonicalState(lastState .ToString()));57 if (lastState.IsTerminal) done.Add(CanonicalState(lastState)); 60 58 61 59 foreach (var state in stateTrajectory) { … … 70 68 71 69 public int GetTries(ReadonlySequence state) { 72 var s = CanonicalState(state .ToString());70 var s = CanonicalState(state); 73 71 if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries; 74 72 else return 0; … … 76 74 77 75 public double GetValue(ReadonlySequence state) { 78 var s = CanonicalState(state .ToString());76 var s = CanonicalState(state); 79 77 if (stateInfo.ContainsKey(s)) return stateInfo[s].Value; 80 78 else return 0.0; // TODO: check alternatives 81 79 } 82 80 83 protected string CanonicalState(string state) { 84 if (useCanonicalState) return problem.CanonicalRepresentation(state); 85 else return state; 81 protected string CanonicalState(ReadonlySequence state) { 82 if (useCanonicalState) { 83 if (state.IsTerminal) 84 return problem.CanonicalRepresentation(state.ToString()); 85 else { 86 // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored 87 // e.g. if for the ant problem we have the phrase lllS (and we are limited to 4 symbols) and lllr as well as llll are explored 88 // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored 89 // solution: we disable the state rS4 90 return problem.CanonicalRepresentation(state.ToString()) + state.Length; 91 } 92 } else 93 return state.ToString(); 86 94 } 87 95 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Models/BernoulliModel.cs
r11742 r11792 28 28 29 29 public void Update(double reward) { 30 Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0));31 if (reward .IsAlmost(1.0)) {30 // Debug.Assert(reward.IsAlmost(1.0) || reward.IsAlmost(0.0)); 31 if (reward > 0) { 32 32 success++; 33 33 } else { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs
r11770 r11792 57 57 Reset(); 58 58 59 for (int i = 0; !Done() && i < maxIterations; i++) {59 for (int i = 0; bestQuality < 1.0 && !Done() && i < maxIterations; i++) { 60 60 var phrase = SampleSentence(problem.Grammar); 61 61 // can fail on the last sentence … … 76 76 } 77 77 } 78 79 // clean up80 // Reset(); GC.Collect();81 78 } 82 79 … … 100 97 while (!phrase.IsTerminal) { 101 98 99 var newPhrases = GenerateFollowStates(g, phrase); 100 101 throw new NotImplementedException(); // TODO: reintroduce random-trie checking once the tree of all states has been reintroduced 102 102 //if (n.randomTries < randomTries) { 103 103 // n.randomTries++; … … 107 107 //} else { 108 108 109 var newPhrases = GenerateFollowStates(g, phrase); 110 111 // => select using bandit policy112 // failure means we simply restart113 if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) {114 return false;115 }109 110 // => select using bandit policy 111 // failure means we simply restart 112 if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) { 113 return false; 114 } 115 // } 116 116 stateChain.Add(phrase); 117 117 curDepth++; … … 125 125 private readonly Dictionary<ReadonlySequence, ReadonlySequence[]> cache; 126 126 private IEnumerable<ReadonlySequence> GenerateFollowStates(IGrammar g, ReadonlySequence phrase) { 127 throw new NotImplementedException(); 128 // TODO: Replace caching by a tree of all states. tree is only used for easily retrieving the follow-states of a state 127 129 ReadonlySequence[] follow; 128 if (!cache.TryGetValue(phrase, out follow)) {130 //if (!cache.TryGetValue(phrase, out follow)) { 129 131 char nt = phrase.FirstNonTerminal; 130 132 … … 142 144 follow[idx++] = new ReadonlySequence(newPhrase); 143 145 } 144 cache[phrase] = follow;145 }146 // cache[phrase] = follow; 147 //} 146 148 return follow; 147 149 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs
r11742 r11792 40 40 41 41 public string CanonicalRepresentation(string terminalPhrase) { 42 throw new NotImplementedException(); 42 43 return terminalPhrase; 43 44 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs
r11742 r11792 8 8 double BestKnownQuality(int maxLen); 9 9 IGrammar Grammar { get; } 10 double Evaluate( stringsentence);11 string CanonicalRepresentation(stringterminalPhrase);10 double Evaluate(ReadonlySequence sentence); 11 ReadonlySequence CanonicalRepresentation(ReadonlySequence terminalPhrase); 12 12 } 13 13 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs
r11742 r11792 81 81 82 82 public string CanonicalRepresentation(string terminalPhrase) { 83 throw new NotImplementedException(); 83 84 return terminalPhrase; 84 85 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSequenceProblem.cs
r11747 r11792 81 81 82 82 public string CanonicalRepresentation(string terminalPhrase) { 83 throw new NotImplementedException(); 83 84 return terminalPhrase; 84 85 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs
r11770 r11792 73 73 74 74 // right now only + and * is supported 75 private Dictionary<string, string> cache = new Dictionary<string, string>();75 //private Dictionary<string, string> cache = new Dictionary<string, string>(); 76 76 public string CanonicalRepresentation(string phrase) { 77 77 string res; 78 if (!cache.TryGetValue(phrase, out res)) {79 80 81 78 //if (!cache.TryGetValue(phrase, out res)) { 79 var terms = phrase.Split('+').Select(t => t.Replace("*", "")); 80 var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch))); 81 var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch))); 82 82 83 84 85 }83 res = string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term)))); 84 //cache[phrase] = res; 85 //} 86 86 return res; 87 87 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11770 r11792 9 9 using HeuristicLab.Algorithms.Bandits; 10 10 using HeuristicLab.Algorithms.Bandits.BanditPolicies; 11 using HeuristicLab.Algorithms.Bandits.GrammarPolicies; 11 12 using HeuristicLab.Algorithms.Bandits.Models; 12 13 using HeuristicLab.Algorithms.GrammaticalOptimization; 13 14 using HeuristicLab.Problems.GrammaticalOptimization; 14 15 using HeuristicLab.Problems.GrammaticalOptimization.SymbReg; 16 using BoltzmannExplorationPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.BoltzmannExplorationPolicy; 17 using EpsGreedyPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.EpsGreedyPolicy; 18 using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy; 19 using UCTPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.UCTPolicy; 15 20 16 21 namespace Main { … … 20 25 21 26 RunDemo(); 22 //RunGridTest();27 RunGridTest(); 23 28 } 24 29 25 30 private static void RunGridTest() { 26 int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet31 int maxIterations = 50000; // for poly-10 with 50000 evaluations no successful try with hl yet 27 32 //var globalRandom = new Random(31415); 28 33 var localRandSeed = 31415; 29 var reps = 8;34 var reps = 5; 30 35 31 36 var policies = new Func<IBanditPolicy>[] 32 37 { 38 () => new RandomPolicy(), 39 () => new ActiveLearningPolicy(), 33 40 () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"), 34 41 () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"), … … 41 48 //() => new BernoulliThompsonSamplingPolicy(), 42 49 () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), 43 () => new RandomPolicy(),44 50 () => new EpsGreedyPolicy(0.01), 45 51 () => new EpsGreedyPolicy(0.05), … … 56 62 () => new UCB1TunedPolicy(), 57 63 () => new UCBNormalPolicy(), 58 () => new BoltzmannExplorationPolicy(0.1),59 () => new BoltzmannExplorationPolicy(0.5),60 64 () => new BoltzmannExplorationPolicy(1), 61 () => new BoltzmannExplorationPolicy(5),62 65 () => new BoltzmannExplorationPolicy(10), 63 66 () => new BoltzmannExplorationPolicy(20), 64 67 () => new BoltzmannExplorationPolicy(100), 68 () => new BoltzmannExplorationPolicy(200), 69 () => new BoltzmannExplorationPolicy(500), 65 70 () => new ChernoffIntervalEstimationPolicy( 0.01), 66 71 () => new ChernoffIntervalEstimationPolicy( 0.05), … … 75 80 () => new ThresholdAscentPolicy(100, 0.1), 76 81 () => new ThresholdAscentPolicy(100, 0.2), 77 () => new ThresholdAscentPolicy(1000, 0.01), 78 () => new ThresholdAscentPolicy(1000, 0.05), 79 () => new ThresholdAscentPolicy(1000, 0.1), 80 () => new ThresholdAscentPolicy(1000, 0.2), 81 () => new ThresholdAscentPolicy(5000, 0.01), 82 () => new ThresholdAscentPolicy(10000, 0.01), 82 () => new ThresholdAscentPolicy(100, 0.01), 83 () => new ThresholdAscentPolicy(100, 0.05), 84 () => new ThresholdAscentPolicy(100, 0.1), 85 () => new ThresholdAscentPolicy(100, 0.2), 86 //() => new ThresholdAscentPolicy(1000, 0.01), 87 //() => new ThresholdAscentPolicy(1000, 0.05), 88 //() => new ThresholdAscentPolicy(1000, 0.1), 89 //() => new ThresholdAscentPolicy(1000, 0.2), 90 //() => new ThresholdAscentPolicy(5000, 0.01), 91 //() => new ThresholdAscentPolicy(10000, 0.01), 83 92 }; 84 93 85 94 foreach (var problem in new Tuple<IProblem, int>[] 86 95 { 87 //Tuple.Create((IProblem)new SantaFeAntProblem(), 17),96 Tuple.Create((IProblem)new SantaFeAntProblem(), 17), 88 97 Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23), 89 98 }) 90 foreach (var randomTries in new int[] { 0, 1, 10, /* 5, 100 /*, 500, 1000 */}) { 91 foreach (var policy in policies) { 92 var myRandomTries = randomTries; 93 var localRand = new Random(localRandSeed); 94 var options = new ParallelOptions(); 95 options.MaxDegreeOfParallelism = 4; 96 Parallel.For(0, reps, options, (i) => { 97 //var t = Task.Run(() => { 98 Random myLocalRand; 99 lock (localRand) 100 myLocalRand = new Random(localRand.Next()); 101 102 //for (int i = 0; i < reps; i++) { 103 104 int iterations = 0; 105 var globalStatistics = new SentenceSetStatistics(); 106 107 // var problem = new SymbolicRegressionPoly10Problem(); 108 // var problem = new SantaFeAntProblem(); 109 //var problem = new PalindromeProblem(); 110 //var problem = new HardPalindromeProblem(); 111 //var problem = new RoyalPairProblem(); 112 //var problem = new EvenParityProblem(); 113 var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each experiment 114 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); 115 //var alg = new AlternativesContextSampler(problem, 25); 116 117 alg.SolutionEvaluated += (sentence, quality) => { 118 iterations++; 119 globalStatistics.AddSentence(sentence, quality); 120 if (iterations % 10000 == 0) { 121 Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, policy(), globalStatistics); 122 } 123 }; 124 125 126 alg.Run(maxIterations); 127 128 //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics); 129 //} 130 //}); 131 //tasks.Add(t); 132 }); 99 foreach (var useCanonical in new bool[] { true, false }) 100 foreach (var randomTries in new int[] { 0, /*1, 10, /* 5, 100 /*, 500, 1000 */}) { 101 foreach (var policy in policies) { 102 var myRandomTries = randomTries; 103 var localRand = new Random(localRandSeed); 104 var options = new ParallelOptions(); 105 options.MaxDegreeOfParallelism = 1; 106 Parallel.For(0, reps, options, (i) => { 107 //var t = Task.Run(() => { 108 Random myLocalRand; 109 lock (localRand) 110 myLocalRand = new Random(localRand.Next()); 111 112 //for (int i = 0; i < reps; i++) { 113 114 int iterations = 0; 115 var globalStatistics = new SentenceSetStatistics(); 116 117 // var problem = new SymbolicRegressionPoly10Problem(); 118 // var problem = new SantaFeAntProblem(); 119 //var problem = new PalindromeProblem(); 120 //var problem = new HardPalindromeProblem(); 121 //var problem = new RoyalPairProblem(); 122 //var problem = new EvenParityProblem(); 123 // var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each 124 var alg = new SequentialSearch(problem.Item1, problem.Item2, myLocalRand, myRandomTries, new GenericGrammarPolicy(problem.Item1, policy(), useCanonical)); 125 //var alg = new ExhaustiveBreadthFirstSearch(problem, 25); 126 //var alg = new AlternativesContextSampler(problem, 25); 127 128 alg.SolutionEvaluated += (sentence, quality) => { 129 iterations++; 130 globalStatistics.AddSentence(sentence, quality); 131 if (iterations % 1000 == 0) { 132 Console.WriteLine("{0,5} {1,25} {2} {3}", myRandomTries, policy(), useCanonical, globalStatistics); 133 } 134 }; 135 alg.FoundNewBestSolution += (sentence, quality) => { 136 Console.WriteLine("{0,5} {1,25} {2} {3}", myRandomTries, policy(), useCanonical, globalStatistics); 137 }; 138 139 140 alg.Run(maxIterations); 141 142 //Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics); 143 //} 144 //}); 145 //tasks.Add(t); 146 }); 147 } 133 148 } 134 }135 149 //Task.WaitAll(tasks.ToArray()); 136 150 } 137 151 138 152 private static void RunDemo() { 153 // TODO: clone problem for parallel grid test 139 154 // TODO: move problem instances into a separate folder 140 155 // TODO: improve performance of SequentialSearch (memory allocations related to sequences) … … 176 191 // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0.0, phrasesAsSets: true); 177 192 178 //var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)193 var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward) 179 194 // Ant 180 195 // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); … … 183 198 // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true)); 184 199 185 var problem = new SantaFeAntProblem();200 //var problem = new SantaFeAntProblem(); 186 201 //var problem = new SymbolicRegressionProblem("Tower"); 187 202 //var problem = new PalindromeProblem(); … … 192 207 //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100)); 193 208 //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1)); 194 var alg = new SequentialSearch(problem, 10, random, 0,195 new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new GaussianThompsonSamplingPolicy(true), true));209 var alg = new SequentialSearch(problem, 23, random, 0, 210 new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.2), true)); 196 211 //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null); 197 212 //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
Note: See TracChangeset
for help on using the changeset viewer.