Changeset 11770
- Timestamp:
- 01/15/15 18:59:07 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 9 added
- 13 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomNoResamplingPolicy.cs
r11742 r11770 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Configuration; 3 4 using System.Linq; 5 using System.Security.Policy; 4 6 using System.Text; 5 using System.Threading.Tasks;6 7 using HeuristicLab.Common; 7 8 using HeuristicLab.Problems.GrammaticalOptimization; 8 9 9 10 namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies { 10 public class RandomNoResamplingPolicy : IGrammarPolicy {11 public class RandomNoResamplingPolicy : GrammarPolicy { 11 12 12 private readonly Dictionary<ReadonlySequence, bool> done; 13 private readonly Dictionary<Tuple<ReadonlySequence, ReadonlySequence>, ReadonlySequence> nextState; 13 private readonly HashSet<string> done; 14 14 15 16 public RandomNoResamplingPolicy() {17 this.done = new Dictionary<ReadonlySequence, bool>();15 public RandomNoResamplingPolicy(IProblem problem, bool useCanonicalRepresentation) 16 : base(problem, useCanonicalRepresentation) { 17 this.done = new HashSet<string>(); 18 18 } 19 19 20 public ReadonlySequence SelectAction(Random random, ReadonlySequence state, IEnumerable<ReadonlySequence> actions) { 21 var allDone = true; 22 foreach (var a in actions) { 23 var p = Tuple.Create(state, a); 24 allDone &= nextState.ContainsKey(p) && Done(nextState[p]); 25 if (!allDone) break; 20 public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) { 21 // only select states that are not yet done 22 afterStates = afterStates.Where(a => !done.Contains(a.ToString())).ToArray(); 23 if (!afterStates.Any()) { 24 // fail because all follow states have already been visited => also disable the current state 25 done.Add(CanonicalState(curState.ToString())); 26 selectedState = null; 27 return false; 26 28 } 27 if(allDone) 28 return actions 29 .Where(a => !nextState.ContainsKey(Tuple.Create(state, a)) || Done(nextState[Tuple.Create(state, a)])) 30 .SelectRandom(random); 29 30 selectedState = afterStates.SelectRandom(random); 31 return true; 31 32 } 32 33 33 public void UpdateReward(ReadonlySequence state, ReadonlySequence action, double reward, ReadonlySequence newState) { 34 var key = Tuple.Create(state, action); 35 nextState[key] = newState; 36 if (newState.IsTerminal) done[newState] = true; 37 if 34 public override void UpdateReward(IEnumerable<ReadonlySequence> stateTrajectory, double reward) { 35 base.UpdateReward(stateTrajectory, reward); 36 // ignore rewards but update the set of visited terminal states 37 38 // the last state could be terminal 39 var lastState = stateTrajectory.Last(); 40 if (lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString())); 38 41 } 39 42 40 public bool Done(ReadonlySequence state) { 41 return done.ContainsKey(state); 43 public override void Reset() { 44 base.Reset(); 45 done.Clear(); 42 46 } 43 47 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomPolicy.cs
r11742 r11770 8 8 9 9 namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies { 10 public class RandomPolicy : IGrammarPolicy {11 public R eadonlySequence SelectAction(Random random, ReadonlySequence state, IEnumerable<ReadonlySequence> actions) {12 return actions.SelectRandom(random);10 public class RandomPolicy : GrammarPolicy { 11 public RandomPolicy(IProblem problem, bool useCanonicalRepresentation) 12 : base(problem, useCanonicalRepresentation) { 13 13 } 14 14 15 public void UpdateReward(ReadonlySequence state, ReadonlySequence action, double reward, ReadonlySequence newState) { 16 // ignore 17 } 18 19 public bool Done(ReadonlySequence state) { 20 return false; 15 public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) { 16 // never fail => allows re-visits of terminal states 17 selectedState = afterStates.SelectRandom(random); 18 return true; 21 19 } 22 20 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r11747 r11770 66 66 <Compile Include="Bandits\IBandit.cs" /> 67 67 <Compile Include="Bandits\TruncatedNormalBandit.cs" /> 68 <Compile Include="GrammarPolicies\BoltzmanExplorationPolicy.cs" /> 69 <Compile Include="GrammarPolicies\GenericGrammarPolicy.cs"> 70 <SubType>Code</SubType> 71 </Compile> 72 <Compile Include="GrammarPolicies\TDPolicy.cs" /> 73 <Compile Include="GrammarPolicies\UCTPolicy.cs" /> 74 <Compile Include="GrammarPolicies\GrammarPolicy.cs" /> 75 <Compile Include="GrammarPolicies\EpsGreedyPolicy.cs" /> 76 <Compile Include="GrammarPolicies\GreedyPolicy.cs" /> 77 <Compile Include="GrammarPolicies\IGrammarPolicy.cs" /> 78 <Compile Include="GrammarPolicies\RandomNoResamplingPolicy.cs" /> 68 79 <Compile Include="GrammarPolicies\RandomPolicy.cs" /> 69 80 <Compile Include="IPolicy.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IBanditPolicyActionInfo.cs
r11747 r11770 1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 4 using System.Text; 5 using System.Threading.Tasks; 6 7 namespace HeuristicLab.Algorithms.Bandits { 1 namespace HeuristicLab.Algorithms.Bandits { 8 2 public interface IBanditPolicyActionInfo { 9 3 bool Disabled { get; } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs
r11744 r11770 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Dynamic; 3 4 using System.Linq; 4 5 using System.Text; … … 7 8 8 9 namespace HeuristicLab.Algorithms.Bandits { 9 // this interface represents a policy for reinforcement learning 10 public interface IPolicy<in TState, TAction> { 11 TAction SelectAction(Random random, TState state, IEnumerable<TAction> actions); 12 void UpdateReward(TState state, TAction action, double reward, TState newState); // reward received when after taking action in state and new state 13 bool Done(TState state); // for deterministic MDP with deterministic rewards and goal to find a state with max reward 14 } 10 // this interface represents a policy for episodic reinforcement learning (with afterstates) 11 // here we assume that a reward is only recieved at the end of the episode and the update is done only after an episode is complete 12 // we also assume that the policy can fail to select one of the followStates 13 public interface IPolicy<TState> { 14 bool TrySelect(Random random, TState curState, IEnumerable<TState> afterStates, out TState selectedState); // selectedState \in afterStates 15 15 16 public interface IGrammarPolicy : IPolicy<ReadonlySequence, ReadonlySequence> { 16 // state-trajectory are the states of the episode, at the end we recieved the reward (only for the terminal state) 17 void UpdateReward(IEnumerable<TState> stateTrajectory, double reward); 17 18 19 void Reset(); // clears all internal state 20 21 // for introspection 22 double GetValue(TState state); 23 int GetTries(TState state); 18 24 } 19 25 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization.csproj
r11755 r11770 45 45 <Compile Include="AlternativesSampler.cs" /> 46 46 <Compile Include="AlternativesContextSampler.cs" /> 47 <Compile Include="SequentialSearch.cs" /> 47 48 <Compile Include="TemporalDifferenceTreeSearchSampler.cs" /> 48 49 <Compile Include="ExhaustiveRandomFirstSearch.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Common/ConsoleEx.cs
r11745 r11770 108 108 109 109 public static ConsoleColor ColorForValue(double d) { 110 Debug.Assert(d >= 0 && d <= 1.0); 110 //Debug.Assert(d >= 0 && d <= 1.0); 111 d = Math.Min(1.0, Math.Max(0.0, d)); 111 112 var cIdx = Math.Max(0, (int)Math.Floor(d * 15) - 1); 112 113 return colors[cIdx]; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/EvenParityProblem.cs
r11742 r11770 13 13 private const string grammarString = @" 14 14 G(S): 15 S -> N | N*S | N+S | !S | (S) 16 N -> a | b | c | d 15 S -> a | b | c | d | a*S | b*S | c*S | d*S | a+S | b+S | c+S | d+S | !S | (S) 17 16 "; 18 17 … … 20 19 private readonly ExpressionInterpreter interpreter = new ExpressionInterpreter(); 21 20 public EvenParityProblem() { 22 this.grammar = new Grammar 21 this.grammar = new Grammar(grammarString); 23 22 } 24 23 … … 52 51 53 52 public string CanonicalRepresentation(string terminalPhrase) { 53 throw new NotImplementedException(); 54 54 return terminalPhrase; 55 55 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs
r11755 r11770 143 143 if (!phrases.Contains(phrase)) phrases.Add(phrase); 144 144 } 145 var remainder = terminalPhrase.Substring(numPhrases * phraseLen, terminalPhrase.Length - (numPhrases * phraseLen)); 146 remainder = CanonicalPhrase(remainder); 147 if (!phrases.Contains(remainder)) phrases.Add(remainder); 148 145 149 return string.Join("", phrases); 146 150 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPhraseSequenceProblem.cs
r11755 r11770 26 26 private readonly double correctReward; 27 27 private readonly double incorrectReward; 28 private readonly int _numCorrectPhrases;29 28 private readonly int sequenceLen; 30 private readonly int alphabetSize;31 29 private readonly int phraseLen; 32 30 private readonly bool phrasesAsSets; … … 40 38 if (correctReward <= incorrectReward) throw new ArgumentException(); 41 39 42 this.alphabetSize = alphabetSize;43 40 this.sequenceLen = sequenceLen; 44 41 this.phraseLen = phraseLen; 45 this._numCorrectPhrases = numCorrectPhrases;46 42 this.correctReward = correctReward; 47 43 this.incorrectReward = incorrectReward; … … 127 123 } 128 124 125 var remainder = terminalPhrase.Substring(numPhrases * phraseLen, terminalPhrase.Length - (numPhrases * phraseLen)); 126 remainder = CanonicalPhrase(remainder); 127 phrases.Add(remainder); 128 129 129 return string.Join("", phrases); 130 130 } else -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs
r11747 r11770 11 11 A -> l | r | m | ?(A)(A) | lA | rA | mA 12 12 "; 13 14 13 15 // original koza grammar 14 16 // Ant -> left | right | move | if-food-ahead Ant Ant | Ant Ant | Ant Ant Ant … … 103 105 do { 104 106 oldPhrase = terminalPhrase; 105 terminalPhrase .Replace("ll", "rr").Replace("rl", "lr");107 terminalPhrase = terminalPhrase.Replace("ll", "rr").Replace("rl", "lr").Replace("lr", "").Replace("lll", "r").Replace("rrr", "l"); 106 108 } while (terminalPhrase != oldPhrase); 107 109 return terminalPhrase; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SymbolicRegressionPoly10Problem.cs
r11747 r11770 2 2 using System.Collections.Generic; 3 3 using System.Linq; 4 using System.Net; 4 5 using System.Security; 5 6 using System.Security.AccessControl; … … 72 73 73 74 // right now only + and * is supported 75 private Dictionary<string, string> cache = new Dictionary<string, string>(); 74 76 public string CanonicalRepresentation(string phrase) { 75 var terms = phrase.Split('+').Select(t => t.Replace("*", "")); 76 var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch))); 77 var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch))); 77 string res; 78 if (!cache.TryGetValue(phrase, out res)) { 79 var terms = phrase.Split('+').Select(t => t.Replace("*", "")); 80 var terminalTerms = terms.Where(t => t.All(ch => grammar.IsTerminal(ch))); 81 var nonTerminalTerms = terms.Where(t => t.Any(ch => grammar.IsNonTerminal(ch))); 78 82 79 return string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term)))); 83 res = string.Join("+", terminalTerms.Select(term => CanonicalTerm(term)).OrderBy(term => term).Concat(nonTerminalTerms.Select(term => CanonicalTerm(term)))); 84 cache[phrase] = res; 85 } 86 return res; 80 87 } 81 88 82 89 private string CanonicalTerm(string term) { 83 return string.Join("", term.OrderByDescending(ch => (byte)ch)); 90 return string.Join("", term.OrderByDescending(ch => (byte)ch)); // we want to have the up-case characters last 84 91 } 85 92 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11755 r11770 137 137 138 138 private static void RunDemo() { 139 // TODO: move problem instances into a separate folder 140 // TODO: improve performance of SequentialSearch (memory allocations related to sequences) 141 // TODO: implement bridge to HL-GP 139 142 // TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos) 140 143 // TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!) … … 161 164 int iterations = 0; 162 165 var sw = new Stopwatch(); 163 double bestQuality = 0; 164 string bestSentence = ""; 166 165 167 var globalStatistics = new SentenceSetStatistics(); 166 168 var random = new Random(); … … 168 170 //var phraseLen = 3; 169 171 //var numPhrases = 5; 170 //var problem = new RoyalPhraseSequenceProblem(random, 1 0, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true);171 172 // var phraseLen = 4;173 // var numPhrases = 5;174 // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 500, correctReward: 1.0, decoyReward: 0.2, phrasesAsSets: true);175 176 var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)172 //var problem = new RoyalPhraseSequenceProblem(random, 15, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: true); 173 174 // var phraseLen = 2; 175 // var numPhrases = 5; 176 // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0.0, phrasesAsSets: true); 177 178 //var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward) 177 179 // Ant 178 180 // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); 179 181 // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant 180 //var problem = new SantaFeAntProblem(); 182 // very good results with: var alg = new SequentialSearch(problem, 17, random, 0, 183 // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true)); 184 185 var problem = new SantaFeAntProblem(); 181 186 //var problem = new SymbolicRegressionProblem("Tower"); 182 187 //var problem = new PalindromeProblem(); … … 186 191 // symbreg length = 11 q = 0.824522210419616 187 192 //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100)); 188 var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1)); 193 //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1)); 194 var alg = new SequentialSearch(problem, 10, random, 0, 195 new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new GaussianThompsonSamplingPolicy(true), true)); 189 196 //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null); 190 197 //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2)); … … 199 206 200 207 alg.FoundNewBestSolution += (sentence, quality) => { 201 bestQuality = quality;202 bestSentence = sentence;203 208 //Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics); 204 209 //Console.ReadLine(); … … 208 213 globalStatistics.AddSentence(sentence, quality); 209 214 if (iterations % 100 == 0) { 210 //if (iterations % 1000 == 0) Console.Clear();215 if (iterations % 1000 == 0) Console.Clear(); 211 216 Console.SetCursorPosition(0, 0); 212 217 alg.PrintStats(); … … 226 231 sw.Stop(); 227 232 228 Console.WriteLine("{0,10} Best soultion: {1,10:F5} {2}", iterations, bestQuality, bestSentence); 233 Console.Clear(); 234 alg.PrintStats(); 235 Console.WriteLine(globalStatistics); 229 236 Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol", 230 237 sw.Elapsed.TotalSeconds,
Note: See TracChangeset
for help on using the changeset viewer.