Changeset 11974
- Timestamp:
- 02/10/15 02:05:31 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GaussianThompsonSamplingPolicy.cs
r11832 r11974 57 57 } 58 58 59 // very unlikely to be the same (and we don't care) 59 60 if (theta > bestQ) { 60 61 bestQ = theta; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/Policies/GenericThompsonSamplingPolicy.cs
r11806 r11974 23 23 aIdx++; 24 24 var q = aInfo.SampleExpectedReward(random); 25 // very unlikely to be equal and we don't care 25 26 if (q > bestQ) { 26 27 bestQ = q; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialDecisionPolicies/GenericFunctionApproximationGrammarPolicy.cs
r11832 r11974 57 57 originalIdx++; 58 58 } 59 60 const double beta = 20.0; 61 var w = from q in activeAfterStates 62 select Math.Exp(beta * q); 59 63 64 var bestAction = Enumerable.Range(0, maxIdx).SampleProportional(random, w); 65 selectedStateIdx = actionIndexMap[bestAction]; 66 Debug.Assert(selectedStateIdx >= 0); 67 68 /* 60 69 if (random.NextDouble() < 0.2) { 61 70 selectedStateIdx = actionIndexMap[random.Next(maxIdx)]; … … 63 72 // find max 64 73 var bestQ = double.NegativeInfinity; 65 var bestIdx = -1;74 var bestIdxs = new List<int>(); 66 75 for (int i = 0; i < maxIdx; i++) { 67 76 if (activeAfterStates[i] > bestQ) { 77 bestIdxs.Clear(); 78 bestIdxs.Add(i); 68 79 bestQ = activeAfterStates[i]; 69 bestIdx = i; 80 } else if (activeAfterStates[i].IsAlmost(bestQ)) { 81 bestIdxs.Add(i); 70 82 } 71 83 } 72 selectedStateIdx = actionIndexMap[bestIdx ];84 selectedStateIdx = actionIndexMap[bestIdxs[random.Next(bestIdxs.Count)]]; 73 85 } 86 */ 87 88 74 89 75 90 return true; … … 103 118 104 119 public double GetValue(string state) { 105 return problem.GetFeatures(state).Sum(feature => GetWeight(feature)) 120 return problem.GetFeatures(state).Sum(feature => GetWeight(feature)); 106 121 } 107 122 … … 109 124 double w; 110 125 if (featureWeigths.TryGetValue(feature.Id, out w)) return w * feature.Value; 111 else return 0.0; 126 else return 0.0; // TODO: alternatives? 112 127 } 113 128 private void UpdateWeights(string state, double reward) { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/Extensions.cs
r11846 r11974 7 7 namespace HeuristicLab.Common { 8 8 public static class Extensions { 9 9 10 10 public static bool IsAlmost(this double x, double y) { 11 if (double.IsNaN(x) || double.IsNaN(y)) return false; 12 if (double.IsPositiveInfinity(x) && double.IsPositiveInfinity(y)) return true; 13 if (double.IsNegativeInfinity(x) && double.IsNegativeInfinity(y)) return true; 11 14 return Math.Abs(x - y) < 1.0e-12; 12 15 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SantaFeAntProblem.cs
r11857 r11974 126 126 127 127 public IEnumerable<Feature> GetFeatures(string phrase) { 128 return Enumerable.Repeat(new Feature(CanonicalRepresentation(phrase), 1.0), 1); 128 yield return new Feature(CanonicalRepresentation(phrase), 1.0); 129 // yield return new Feature("Length", phrase.Length); // 130 // foreach (var pair in phrase.Zip(phrase.Skip(1), Tuple.Create)) { 131 // yield return new Feature(pair.Item1.ToString() + pair.Item2, 1.0); 132 // } 133 // number of occurances for each symbol 129 134 } 130 135 -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11973 r11974 30 30 //RunGridTest(); 31 31 //RunGpGridTest(); 32 32 RunFunApproxTest(); 33 33 } 34 34 … … 36 36 int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet 37 37 //var globalRandom = new Random(31415); 38 var localRandSeed = 31415;38 var localRandSeed = new Random().Next(); 39 39 var reps = 20; 40 40 41 41 var policyFactories = new Func<IBanditPolicy>[] 42 42 { 43 () => new RandomPolicy(),44 () => new ActiveLearningPolicy(),45 () => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),46 () => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),47 () => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),48 () => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),49 // () => new GaussianThompsonSamplingPolicy(),50 () => new GaussianThompsonSamplingPolicy(true),51 () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),52 () => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),53 // () => new BernoulliThompsonSamplingPolicy(),54 () => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),55 () => new EpsGreedyPolicy(0.01),56 () => new EpsGreedyPolicy(0.05),57 () => new EpsGreedyPolicy(0.1),58 () => new EpsGreedyPolicy(0.2),59 () => new EpsGreedyPolicy(0.5),60 () => new UCTPolicy(0.01),61 () => new UCTPolicy(0.05),62 () => new UCTPolicy(0.1),63 () => new UCTPolicy(0.5),64 () => new UCTPolicy(1),65 () => new UCTPolicy(2),66 () => new UCTPolicy( 5),67 () => new UCTPolicy( 10),68 () => new ModifiedUCTPolicy(0.01),69 () => new ModifiedUCTPolicy(0.05),70 () => new ModifiedUCTPolicy(0.1),71 () => new ModifiedUCTPolicy(0.5),72 () => new ModifiedUCTPolicy(1),73 () => new ModifiedUCTPolicy(2),74 () => new ModifiedUCTPolicy( 5),75 () => new ModifiedUCTPolicy( 10),76 () => new UCB1Policy(),77 () => new UCB1TunedPolicy(),78 () => new UCBNormalPolicy(),79 () => new BoltzmannExplorationPolicy(1),80 () => new BoltzmannExplorationPolicy(10),81 () => new BoltzmannExplorationPolicy(20),82 () => new BoltzmannExplorationPolicy(100),83 () => new BoltzmannExplorationPolicy(200),84 () => new BoltzmannExplorationPolicy(500),43 //() => new RandomPolicy(), 44 // () => new ActiveLearningPolicy(), 45 //() => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"), 46 //() => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"), 47 //() => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"), 48 //() => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"), 49 ////() => new GaussianThompsonSamplingPolicy(), 50 //() => new GaussianThompsonSamplingPolicy(true), 51 //() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)), 52 //() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), 53 ////() => new BernoulliThompsonSamplingPolicy(), 54 //() => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)), 55 //() => new EpsGreedyPolicy(0.01), 56 //() => new EpsGreedyPolicy(0.05), 57 //() => new EpsGreedyPolicy(0.1), 58 //() => new EpsGreedyPolicy(0.2), 59 //() => new EpsGreedyPolicy(0.5), 60 //() => new UCTPolicy(0.01), 61 //() => new UCTPolicy(0.05), 62 //() => new UCTPolicy(0.1), 63 //() => new UCTPolicy(0.5), 64 //() => new UCTPolicy(1), 65 //() => new UCTPolicy(2), 66 //() => new UCTPolicy( 5), 67 //() => new UCTPolicy( 10), 68 //() => new ModifiedUCTPolicy(0.01), 69 //() => new ModifiedUCTPolicy(0.05), 70 //() => new ModifiedUCTPolicy(0.1), 71 //() => new ModifiedUCTPolicy(0.5), 72 //() => new ModifiedUCTPolicy(1), 73 //() => new ModifiedUCTPolicy(2), 74 //() => new ModifiedUCTPolicy( 5), 75 //() => new ModifiedUCTPolicy( 10), 76 //() => new UCB1Policy(), 77 //() => new UCB1TunedPolicy(), 78 //() => new UCBNormalPolicy(), 79 //() => new BoltzmannExplorationPolicy(1), 80 //() => new BoltzmannExplorationPolicy(10), 81 //() => new BoltzmannExplorationPolicy(20), 82 //() => new BoltzmannExplorationPolicy(100), 83 //() => new BoltzmannExplorationPolicy(200), 84 //() => new BoltzmannExplorationPolicy(500), 85 85 () => new ChernoffIntervalEstimationPolicy( 0.01), 86 86 () => new ChernoffIntervalEstimationPolicy( 0.05), 87 87 () => new ChernoffIntervalEstimationPolicy( 0.1), 88 88 () => new ChernoffIntervalEstimationPolicy( 0.2), 89 () => new ThresholdAscentPolicy(5, 0.01),90 () => new ThresholdAscentPolicy(5, 0.05),91 () => new ThresholdAscentPolicy(5, 0.1),92 () => new ThresholdAscentPolicy(5, 0.2),93 () => new ThresholdAscentPolicy(10, 0.01),94 () => new ThresholdAscentPolicy(10, 0.05),95 () => new ThresholdAscentPolicy(10, 0.1),96 () => new ThresholdAscentPolicy(10, 0.2),97 () => new ThresholdAscentPolicy(50, 0.01),98 () => new ThresholdAscentPolicy(50, 0.05),99 () => new ThresholdAscentPolicy(50, 0.1),100 () => new ThresholdAscentPolicy(50, 0.2),101 () => new ThresholdAscentPolicy(100, 0.01),102 () => new ThresholdAscentPolicy(100, 0.05),103 () => new ThresholdAscentPolicy(100, 0.1),104 () => new ThresholdAscentPolicy(100, 0.2),105 () => new ThresholdAscentPolicy(500, 0.01),106 () => new ThresholdAscentPolicy(500, 0.05),107 () => new ThresholdAscentPolicy(500, 0.1),108 () => new ThresholdAscentPolicy(500, 0.2),89 //() => new ThresholdAscentPolicy(5, 0.01), 90 //() => new ThresholdAscentPolicy(5, 0.05), 91 //() => new ThresholdAscentPolicy(5, 0.1), 92 //() => new ThresholdAscentPolicy(5, 0.2), 93 //() => new ThresholdAscentPolicy(10, 0.01), 94 //() => new ThresholdAscentPolicy(10, 0.05), 95 //() => new ThresholdAscentPolicy(10, 0.1), 96 //() => new ThresholdAscentPolicy(10, 0.2), 97 //() => new ThresholdAscentPolicy(50, 0.01), 98 //() => new ThresholdAscentPolicy(50, 0.05), 99 //() => new ThresholdAscentPolicy(50, 0.1), 100 //() => new ThresholdAscentPolicy(50, 0.2), 101 //() => new ThresholdAscentPolicy(100, 0.01), 102 //() => new ThresholdAscentPolicy(100, 0.05), 103 //() => new ThresholdAscentPolicy(100, 0.1), 104 //() => new ThresholdAscentPolicy(100, 0.2), 105 //() => new ThresholdAscentPolicy(500, 0.01), 106 //() => new ThresholdAscentPolicy(500, 0.05), 107 //() => new ThresholdAscentPolicy(500, 0.1), 108 //() => new ThresholdAscentPolicy(500, 0.2), 109 109 //() => new ThresholdAscentPolicy(5000, 0.01), 110 110 //() => new ThresholdAscentPolicy(10000, 0.01), … … 128 128 var localRand = new Random(localRandSeed); 129 129 var options = new ParallelOptions(); 130 options.MaxDegreeOfParallelism = 4;130 options.MaxDegreeOfParallelism = 1; 131 131 Parallel.For(0, reps, options, (i) => { 132 132 Random myLocalRand; … … 314 314 var problemFactories = new Func<Tuple<int, int, ISymbolicExpressionTreeProblem>>[] 315 315 { 316 () => Tuple.Create(100000, 23, (ISymbolicExpressionTreeProblem)new SymbolicRegressionPoly10Problem()),317 //() => Tuple.Create(100000, 17, (ISymbolicExpressionTreeProblem)new SantaFeAntProblem()),316 //() => Tuple.Create(100000, 23, (ISymbolicExpressionTreeProblem)new SymbolicRegressionPoly10Problem()), 317 () => Tuple.Create(100000, 17, (ISymbolicExpressionTreeProblem)new SantaFeAntProblem()), 318 318 //() => Tuple.Create(50000, 32,(ISymbolicExpressionTreeProblem)new RoyalSymbolProblem()), 319 319 //() => Tuple.Create(50000, 64, (ISymbolicExpressionTreeProblem)new RoyalPairProblem()),
Note: See TracChangeset
for help on using the changeset viewer.