- Timestamp:
- 01/20/15 20:25:00 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 2 added
- 5 deleted
- 27 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ActiveLearningPolicy.cs
r11792 r11806 11 11 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 12 12 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 13 int totalTries = myActionInfos. Where(a => !a.Disabled).Sum(a => a.Tries);13 int totalTries = myActionInfos.Sum(a => a.Tries); 14 14 const double delta = 0.1; 15 int k = myActionInfos. Where(a => !a.Disabled).Count();15 int k = myActionInfos.Count(); 16 16 var bestActions = new List<int>(); 17 17 var us = new List<double>(); … … 20 20 foreach (var aInfo in myActionInfos) { 21 21 aIdx++; 22 if (aInfo.Disabled) continue;23 22 double q; 24 23 double u; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/BoltzmannExplorationPolicy.cs
r11799 r11806 37 37 38 38 var w = from aInfo in myActionInfos 39 select aInfo.Disabled 40 ? 0.0 41 : Math.Exp(beta * valueFunction(aInfo)); 39 select Math.Exp(beta * valueFunction(aInfo)); 42 40 43 41 var bestAction = Enumerable.Range(0, myActionInfos.Count()).SampleProportional(random, w); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ChernoffIntervalEstimationPolicy.cs
r11792 r11806 21 21 // select best 22 22 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 23 int k = myActionInfos.Count( a => !a.Disabled);24 int totalTries = myActionInfos. Where(a => !a.Disabled).Sum(a => a.Tries);23 int k = myActionInfos.Count(); 24 int totalTries = myActionInfos.Sum(a => a.Tries); 25 25 double bestQ = double.NegativeInfinity; 26 26 var bestActions = new List<int>(); … … 28 28 foreach (var aInfo in myActionInfos) { 29 29 aIdx++; 30 if (aInfo.Disabled) continue;31 30 double q; 32 31 if (aInfo.Tries == 0) { … … 46 45 bestActions.Clear(); 47 46 bestActions.Add(aIdx); 48 } else if (q == bestQ) {47 } else if (q.IsAlmost(bestQ)) { 49 48 bestActions.Add(aIdx); 50 49 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/DefaultPolicyActionInfo.cs
r11747 r11806 9 9 // stores information that is relevant for most of the policies 10 10 public class DefaultPolicyActionInfo : IBanditPolicyActionInfo { 11 private double knownValue;12 public bool Disabled { get { return Tries == -1; } }13 11 public double SumReward { get; private set; } 14 12 public int Tries { get; private set; } … … 16 14 public double Value { 17 15 get { 18 if (Disabled) return knownValue;19 else20 16 return Tries > 0 ? SumReward / Tries : 0.0; 21 17 } … … 26 22 27 23 public void UpdateReward(double reward) { 28 Debug.Assert(!Disabled);29 30 24 Tries++; 31 25 SumReward += reward; 32 26 MaxReward = Math.Max(MaxReward, reward); 33 27 } 34 public void Disable(double reward) { 35 this.Tries = -1; 36 this.SumReward = 0.0; 37 this.knownValue = reward; 38 } 28 39 29 public void Reset() { 40 30 SumReward = 0.0; 41 31 Tries = 0; 42 32 MaxReward = 0.0; 43 knownValue = 0.0;44 }45 public void PrintStats() {46 Console.WriteLine("avg reward {0,5:F2} disabled {1}", SumReward / Tries, Disabled);47 33 } 48 34 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs
r11793 r11806 35 35 foreach (var aInfo in myActionInfos) { 36 36 aIdx++; 37 if (aInfo.Disabled) continue;38 37 39 38 var q = valueFunction(aInfo); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/GenericThompsonSamplingPolicy.cs
r11799 r11806 22 22 foreach (var aInfo in myActionInfos) { 23 23 aIdx++; 24 if (aInfo.Disabled) continue;25 //if (aInfo.Tries == 0) return aIdx;26 24 var q = aInfo.SampleExpectedReward(random); 27 25 if (q > bestQ) { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/MeanAndVariancePolicyActionInfo.cs
r11747 r11806 39 39 estimator.Reset(); 40 40 } 41 42 public void PrintStats() {43 Console.WriteLine("avg reward {0,5:F2} disabled {1}", AvgReward, Disabled);44 }45 41 } 46 42 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ModelPolicyActionInfo.cs
r11747 r11806 10 10 public class ModelPolicyActionInfo : IBanditPolicyActionInfo { 11 11 private readonly IModel model; 12 private double knownValue;13 public bool Disabled { get { return Tries == -1; } }14 12 public double Value { 15 13 get { 16 if (Disabled) return knownValue; 17 else 18 return model.SampleExpectedReward(new Random()); 14 return model.SampleExpectedReward(new Random()); 19 15 } 20 16 } … … 26 22 27 23 public void UpdateReward(double reward) { 28 Debug.Assert(!Disabled);29 24 Tries++; 30 25 model.Update(reward); … … 35 30 } 36 31 37 public void Disable(double reward) {38 this.Tries = -1;39 this.knownValue = reward;40 }41 42 32 public void Reset() { 43 33 Tries = 0; 44 knownValue = 0.0;45 34 model.Reset(); 46 35 } 47 36 48 public void PrintStats() {49 model.PrintStats();50 }51 52 37 public override string ToString() { 53 return string.Format(" disabled {0} model {1}", Disabled, model);38 return string.Format("model {1}", model); 54 39 } 55 40 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/RandomPolicy.cs
r11742 r11806 17 17 return actionInfos 18 18 .Select((aInfo, idx) => Tuple.Create(aInfo, idx)) 19 .Where(p => !p.Item1.Disabled)20 19 .SelectRandom(random).Item2; 21 20 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/ThresholdAscentPolicy.cs
r11792 r11806 28 28 public int Tries { get; private set; } 29 29 public int thresholdBin = 1; 30 private double knownValue;31 30 32 31 public double Value { 33 32 get { 34 if (Disabled) return knownValue;35 33 if (Tries == 0.0) return 0.0; 36 34 return rewardHistogram[thresholdBin] / (double)Tries; 37 35 } 38 36 } 39 40 public bool Disabled { get { return Tries == -1; } }41 37 42 38 public void UpdateReward(double reward) { … … 46 42 } 47 43 48 public void Disable(double reward) {49 this.knownValue = reward;50 Tries = -1;51 }52 53 44 public void Reset() { 54 45 Tries = 0; 55 46 thresholdBin = 1; 56 this.knownValue = 0.0;57 47 Array.Clear(rewardHistogram, 0, rewardHistogram.Length); 58 }59 60 public void PrintStats() {61 if (Tries >= 0) {62 Console.Write("{0,6}", Tries);63 } else {64 Console.Write("{0,6}", "");65 }66 48 } 67 49 … … 101 83 var bestActions = new List<int>(); 102 84 double bestQ = double.NegativeInfinity; 103 int k = myActionInfos.Count( a => !a.Disabled);104 var totalTries = myActionInfos. Where(a => !a.Disabled).Sum(a => a.Tries);85 int k = myActionInfos.Count(); 86 var totalTries = myActionInfos.Sum(a => a.Tries); 105 87 int aIdx = -1; 106 88 foreach (var aInfo in myActionInfos) { 107 89 aIdx++; 108 if (aInfo.Disabled) continue;109 90 double q; 110 91 if (aInfo.Tries == 0) { … … 118 99 bestActions.Clear(); 119 100 bestActions.Add(aIdx); 120 } else if (q == bestQ) {101 } else if (q.IsAlmost(bestQ)) { 121 102 bestActions.Add(aIdx); 122 103 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1Policy.cs
r11747 r11806 13 13 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); 14 14 double bestQ = double.NegativeInfinity; 15 int totalTries = myActionInfos. Where(a => !a.Disabled).Sum(a => a.Tries);15 int totalTries = myActionInfos.Sum(a => a.Tries); 16 16 17 17 var bestActions = new List<int>(); … … 19 19 foreach (var aInfo in myActionInfos) { 20 20 aIdx++; 21 if (aInfo.Disabled) continue;22 21 double q; 23 22 if (aInfo.Tries == 0) { … … 31 30 bestActions.Clear(); 32 31 bestActions.Add(aIdx); 33 } else if (q == bestQ) {32 } else if (q.IsAlmost(bestQ)) { 34 33 bestActions.Add(aIdx); 35 34 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCB1TunedPolicy.cs
r11792 r11806 37 37 bestActions.Clear(); 38 38 bestActions.Add(aIdx); 39 } else if (q == bestQ) {39 } else if (q.IsAlmost(bestQ)) { 40 40 bestActions.Add(aIdx); 41 41 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCBNormalPolicy.cs
r11792 r11806 33 33 bestActions.Clear(); 34 34 bestActions.Add(aIdx); 35 } else if (q == bestQ) {35 } else if (q.IsAlmost(bestQ)) { 36 36 bestActions.Add(aIdx); 37 37 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/UCTPolicy.cs
r11747 r11806 21 21 int bestAction = -1; 22 22 double bestQ = double.NegativeInfinity; 23 int totalTries = myActionInfos. Where(a => !a.Disabled).Sum(a => a.Tries);23 int totalTries = myActionInfos.Sum(a => a.Tries); 24 24 25 25 int aIdx = -1; … … 27 27 foreach (var aInfo in myActionInfos) { 28 28 aIdx++; 29 if (aInfo.Disabled) continue;30 29 double q; 31 30 if (aInfo.Tries == 0) { … … 38 37 bestQ = q; 39 38 bestActions.Add(aIdx); 40 } 41 if (q == bestQ) { 39 } else if (q.IsAlmost(bestQ)) { 42 40 bestActions.Add(aIdx); 43 41 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs
r11799 r11806 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 3 4 using System.Linq; 4 5 using System.Text; … … 9 10 namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies { 10 11 // this represents grammar policies that use one of the available bandit policies for state selection 11 public class GenericGrammarPolicy : IGrammarPolicy { 12 protected Dictionary<string, IBanditPolicyActionInfo> stateInfo; // stores the necessary information for bandit policies for each state 13 private readonly bool useCanonicalState; 12 // any bandit policy can be used to select actions for states 13 // a separate datastructure is used to store visited states and to prevent revisiting of states 14 public sealed class GenericGrammarPolicy : IGrammarPolicy { 15 private Dictionary<string, IBanditPolicyActionInfo> stateInfo; // stores the necessary information for bandit policies for each state (=canonical phrase) 16 private HashSet<string> done; 17 private readonly bool useCanonicalPhrases; 14 18 private readonly IProblem problem; 15 19 private readonly IBanditPolicy banditPolicy; 16 20 17 public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonical State= false) {18 this.useCanonical State = useCanonicalState;21 public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalPhrases = false) { 22 this.useCanonicalPhrases = useCanonicalPhrases; 19 23 this.problem = problem; 20 24 this.banditPolicy = banditPolicy; 21 25 this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>(); 26 this.done = new HashSet<string>(); 22 27 } 28 29 private IBanditPolicyActionInfo[] activeAfterStates; // don't allocate each time 30 private int[] actionIndexMap; // don't allocate each time 23 31 24 32 public bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) { 25 33 // fail if all states are done (corresponding state infos are disabled) 26 if (afterStates.All(s => GetStateInfo(s).Disabled)) {34 if (afterStates.All(s => Done(s))) { 27 35 // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored) 36 MarkAsDone(curState); 28 37 29 GetStateInfo(curState).Disable(afterStates.Select(afterState => GetStateInfo(afterState).Value).Max());30 38 selectedStateIdx = -1; 31 39 return false; 32 40 } 33 41 34 selectedStateIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s))); 42 // determine active actions (not done yet) and create an array to map the selected index back to original actions 43 if (activeAfterStates == null || activeAfterStates.Length < afterStates.Count()) { 44 activeAfterStates = new IBanditPolicyActionInfo[afterStates.Count()]; 45 actionIndexMap = new int[afterStates.Count()]; 46 } 47 var idx = 0; int originalIdx = 0; 48 foreach (var afterState in afterStates) { 49 if (!Done(afterState)) { 50 activeAfterStates[idx] = GetStateInfo(afterState); 51 actionIndexMap[idx] = originalIdx; 52 idx++; 53 } 54 originalIdx++; 55 } 56 57 selectedStateIdx = actionIndexMap[banditPolicy.SelectAction(random, activeAfterStates.Take(idx))]; 35 58 36 59 return true; 37 60 } 61 62 38 63 39 64 private IBanditPolicyActionInfo GetStateInfo(string state) { … … 47 72 } 48 73 49 public v irtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) {74 public void UpdateReward(IEnumerable<string> stateTrajectory, double reward) { 50 75 foreach (var state in stateTrajectory) { 51 76 GetStateInfo(state).UpdateReward(reward); … … 53 78 // only the last state can be terminal 54 79 if (problem.Grammar.IsTerminal(state)) { 55 GetStateInfo(state).Disable(reward);80 MarkAsDone(state); 56 81 } 57 82 } 58 83 } 59 84 60 public virtual void Reset() { 85 86 public void Reset() { 61 87 stateInfo.Clear(); 88 done.Clear(); 62 89 } 63 90 … … 74 101 } 75 102 76 protected string CanonicalState(string state) { 77 if (useCanonicalState) { 103 // the canonical states for the value function (banditInfos) and the done set must be distinguished 104 // sequences of different length could have the same canonical representation and can have the same value (banditInfo) 105 // however, if the canonical representation of a state is shorter than we must not mark the canonical state as done when all possible derivations from the initial state have been explored 106 // eg. in the ant problem the canonical representation for ...lllA is ...rA 107 // even though all possible derivations (of limited length) of lllA have been visited we must not mark the state rA as done 108 private void MarkAsDone(string state) { 109 var s = CanonicalState(state); 110 // when the lengths of the canonical string and the original string are the same we also disable the actions 111 // always disable terminals 112 Debug.Assert(s.Length <= state.Length); 113 if (s.Length == state.Length || problem.Grammar.IsTerminal(state)) { 114 Debug.Assert(!done.Contains(s)); 115 done.Add(s); 116 } else { 117 // for non-terminals where the canonical string is shorter than the original string we can only disable the canonical representation for all states in the same level 118 Debug.Assert(!done.Contains(s + state.Length)); 119 done.Add(s + state.Length); // encode the original length of the state, states in the same level of the tree are treated as equivalent 120 } 121 } 122 123 // symmetric to MarkDone 124 private bool Done(string state) { 125 var s = CanonicalState(state); 126 if (s.Length == state.Length || problem.Grammar.IsTerminal(state)) { 127 return done.Contains(s); 128 } else { 129 // it is not necessary to visit states if the canonical representation has already been fully explored 130 if (done.Contains(s)) return true; 131 if (done.Contains(s + state.Length)) return true; 132 for (int i = 1; i < state.Length; i++) { 133 if (done.Contains(s + i)) return true; 134 } 135 return false; 136 } 137 } 138 139 private string CanonicalState(string state) { 140 if (useCanonicalPhrases) { 78 141 return problem.CanonicalRepresentation(state); 79 142 } else -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r11793 r11806 49 49 <Compile Include="BanditPolicies\ChernoffIntervalEstimationPolicy.cs" /> 50 50 <Compile Include="BanditPolicies\ActiveLearningPolicy.cs" /> 51 <Compile Include="BanditPolicies\ModifiedUCTPolicy.cs" /> 51 52 <Compile Include="BanditPolicies\DefaultPolicyActionInfo.cs" /> 52 53 <Compile Include="BanditPolicies\EpsGreedyPolicy.cs" /> … … 66 67 <Compile Include="Bandits\IBandit.cs" /> 67 68 <Compile Include="Bandits\TruncatedNormalBandit.cs" /> 69 <Compile Include="GrammarPolicies\GenericTDPolicy.cs" /> 68 70 <Compile Include="GrammarPolicies\GenericGrammarPolicy.cs"> 69 71 <SubType>Code</SubType> … … 72 74 <SubType>Code</SubType> 73 75 </Compile> 74 <Compile Include="GrammarPolicies\TDPolicy.cs" />75 76 <Compile Include="GrammarPolicies\GrammarPolicy.cs" /> 76 77 <Compile Include="GrammarPolicies\IGrammarPolicy.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IBanditPolicyActionInfo.cs
r11770 r11806 1 1 namespace HeuristicLab.Algorithms.Bandits { 2 2 public interface IBanditPolicyActionInfo { 3 bool Disabled { get; }3 //bool Disabled { get; } 4 4 double Value { get; } 5 5 int Tries { get; } 6 6 void UpdateReward(double reward); 7 void Disable(double reward);7 //void Disable(double reward); 8 8 // reset causes the state of the action to be reinitialized (as after constructor-call) 9 9 void Reset(); 10 void PrintStats();11 10 } 12 11 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj
r11770 r11806 46 46 <Compile Include="AlternativesContextSampler.cs" /> 47 47 <Compile Include="SequentialSearch.cs" /> 48 <Compile Include="TemporalDifferenceTreeSearchSampler.cs" />49 48 <Compile Include="ExhaustiveRandomFirstSearch.cs" /> 50 <Compile Include="MctsContextualSampler.cs">51 <SubType>Code</SubType>52 </Compile>53 <Compile Include="MctsSampler.cs" />54 49 <Compile Include="ExhaustiveDepthFirstSearch.cs" /> 55 50 <Compile Include="ExhaustiveBreadthFirstSearch.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs
r11799 r11806 77 77 tries++; 78 78 var quality = problem.Evaluate(sentence) / problem.BestKnownQuality(maxLen); 79 if (double.IsNaN(quality)) quality = 0.0; 79 80 Debug.Assert(quality >= 0 && quality <= 1.0); 80 81 DistributeReward(quality); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/ConsoleEx.cs
r11770 r11806 49 49 double startGreen = -1.41; 50 50 double endGreen = 1.41; 51 51 52 52 double startBlue = -3; 53 53 double endBlue = 1; 54 54 55 55 double startAlpha = 0; 56 double endAlpha = 0;56 double endAlpha = 0; 57 57 58 58 // fire -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg/SymbolicRegressionProblem.cs
r11747 r11806 15 15 private const string grammarString = @" 16 16 G(E): 17 E -> V | V+E | V-E | V*E | (E)17 E -> V | V+E | V-E | V*E | V/E | (E) 18 18 V -> <variables> 19 19 "; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs
r11793 r11806 9 9 IGrammar Grammar { get; } 10 10 double Evaluate(string sentence); 11 string CanonicalRepresentation(string terminalPhrase); 11 string CanonicalRepresentation(string terminalPhrase); // canonical state must use correct syntax (must be a valid input for evaluate) 12 12 } 13 13 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/RoyalPhraseSequenceProblem.cs
r11803 r11806 115 115 public string CanonicalRepresentation(string terminalPhrase) { 116 116 if (phrasesAsSets) { 117 var phrases = new List<string>();117 var sb = new StringBuilder(); 118 118 var numPhrases = terminalPhrase.Length / phraseLen; 119 119 for (int phraseIdx = 0; phraseIdx < numPhrases; phraseIdx++) { … … 121 121 var phrase = terminalPhrase.Substring(sentenceIdx, phraseLen); 122 122 phrase = CanonicalPhrase(phrase); 123 phrases.Add(phrase);123 sb.Append(phrase); 124 124 } 125 125 126 126 var remainder = terminalPhrase.Substring(numPhrases * phraseLen, terminalPhrase.Length - (numPhrases * phraseLen)); 127 127 remainder = CanonicalPhrase(remainder); 128 phrases.Add(remainder);128 sb.Append(remainder); 129 129 130 return s tring.Join("", phrases);130 return sb.ToString(); 131 131 } else 132 132 return terminalPhrase; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/RoyalSymbolProblem.cs
r11803 r11806 32 32 // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow! 33 33 Debug.Assert(sentence.Any(c => grammar.IsTerminal(c))); 34 return regex.Matches(sentence .ToString()).Count;34 return regex.Matches(sentence).Count; 35 35 } 36 36 public string CanonicalRepresentation(string terminalPhrase) { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SantaFeAntProblem.cs
r11803 r11806 111 111 canonicalPhrase = sb.ToString(); 112 112 } while (canonicalPhrase != oldPhrase); 113 sb.Append(terminalPhrase.Length - canonicalPhrase.Length);114 113 return sb.ToString(); 115 114 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Problems/SymbolicRegressionPoly10Problem.cs
r11803 r11806 92 92 93 93 sb.Append(CanonicalTerm(terms[terms.Length - 1])); 94 sb.Append(phrase.Length - sb.Length);95 94 canonicalPhrase = sb.ToString(); 96 95 canonicalPhraseCache.Add(phrase, canonicalPhrase); … … 109 108 var sb = new StringBuilder(chars.Length); 110 109 // we want to have the up-case characters last 111 for (int i = chars.Length - 1; i >= 0; i--) { 112 if (chars[i] != '*') sb.Append(chars[i]); 110 for (int i = chars.Length - 1; i > 0; i--) { 111 if (chars[i] != '*') { 112 sb.Append(chars[i]); 113 if (chars[i - 1] != '*') sb.Append('*'); 114 } 113 115 } 116 if (chars[0] != '*') sb.Append(chars[0]); // last term 114 117 canonicalTerm = sb.ToString(); 115 118 canonicalTermDictionary.Add(term, canonicalTerm); -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11801 r11806 24 24 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 25 25 26 //RunDemo();27 RunGridTest();26 RunDemo(); 27 //RunGridTest(); 28 28 } 29 29 … … 61 61 () => new UCTPolicy( 5), 62 62 () => new UCTPolicy( 10), 63 () => new ModifiedUCTPolicy(0.01), 64 () => new ModifiedUCTPolicy(0.05), 65 () => new ModifiedUCTPolicy(0.1), 66 () => new ModifiedUCTPolicy(0.5), 67 () => new ModifiedUCTPolicy(1), 68 () => new ModifiedUCTPolicy(2), 69 () => new ModifiedUCTPolicy( 5), 70 () => new ModifiedUCTPolicy( 10), 63 71 () => new UCB1Policy(), 64 72 () => new UCB1TunedPolicy(), … … 161 169 162 170 private static void RunDemo() { 163 // TODO: move problem instances into a separate folder164 171 // TODO: implement bridge to HL-GP 165 172 // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos) … … 170 177 // TODO: warum funktioniert die alte Implementierung von GaussianThompson besser fÃŒr SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling 171 178 // TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem? 172 // TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualitÀt des samplers abzuschÀtzen (bis auf qualitÀt und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis173 179 // TODO: research thompson sampling for max bandit? 174 180 // TODO: ausfÃŒhrlicher test von strategien fÃŒr numCorrectPhrases-armed max bandit … … 192 198 193 199 194 var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);200 //var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0); 195 201 //var phraseLen = 3; 196 202 //var numPhrases = 5; … … 218 224 //var problem = new SymbolicRegressionPoly10Problem(); 219 225 220 //var problem = new SantaFeAntProblem();226 var problem = new SantaFeAntProblem(); 221 227 //var problem = new SymbolicRegressionProblem("Tower"); 222 228 //var problem = new PalindromeProblem(); … … 227 233 //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100)); 228 234 //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1)); 229 var alg = new SequentialSearch(problem, 30, random, 0, 230 new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.1), true)); 235 //var alg = new SequentialSearch(problem, 23, random, 0, 236 // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new ModifiedUCTPolicy(0.1), true)); 237 var alg = new SequentialSearch(problem, 17, random, 0, 238 new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericTDPolicy(problem, true)); 231 239 //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null); 232 240 //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2)); … … 248 256 globalStatistics.AddSentence(sentence, quality); 249 257 if (iterations % 1000 == 0) { 250 if (iterations % 1000 == 0) Console.Clear();258 if (iterations % 10000 == 0) Console.Clear(); 251 259 Console.SetCursorPosition(0, 0); 252 260 alg.PrintStats();
Note: See TracChangeset
for help on using the changeset viewer.