Changeset 11793
- Timestamp:
- 01/18/15 18:24:58 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization
- Files:
-
- 2 added
- 6 deleted
- 25 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/BanditPolicies/EpsGreedyPolicy.cs
r11742 r11793 26 26 public int SelectAction(Random random, IEnumerable<IBanditPolicyActionInfo> actionInfos) { 27 27 Debug.Assert(actionInfos.Any()); 28 if (random.NextDouble() > eps) {28 if (random.NextDouble() >= eps) { // eps == 0 should be equivalent to pure exploitation, eps == 1 is pure exploration 29 29 // select best 30 30 var myActionInfos = actionInfos.OfType<DefaultPolicyActionInfo>(); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GenericGrammarPolicy.cs
r11792 r11793 14 14 private readonly IProblem problem; 15 15 private readonly IBanditPolicy banditPolicy; 16 private readonly HashSet<string> done;16 //private readonly HashSet<string> done; 17 17 18 18 public GenericGrammarPolicy(IProblem problem, IBanditPolicy banditPolicy, bool useCanonicalState = false) { … … 21 21 this.banditPolicy = banditPolicy; 22 22 this.stateInfo = new Dictionary<string, IBanditPolicyActionInfo>(); 23 this.done = new HashSet<string>();23 //this.done = new HashSet<string>(); 24 24 } 25 25 26 public bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, 27 out ReadonlySequence selectedState) { 28 // only select states that are not yet done 29 afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a))).ToArray(); 30 if (!afterStates.Any()) { 26 public bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) { 27 // fail if all states are done (corresponding state infos are disabled) 28 if (afterStates.All(s => GetStateInfo(s).Disabled)) { 31 29 // fail because all follow states have already been visited => also disable the current state (if we can be sure that it has been fully explored) 32 30 33 done.Add(CanonicalState(curState));34 selectedState = null;31 GetStateInfo(curState).Disable(0.0); // should the value be max of afterstate values instead of 0.0? 32 selectedStateIdx = -1; 35 33 return false; 36 34 } 37 35 36 selectedStateIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s))); 38 37 39 var selectedIdx = banditPolicy.SelectAction(random, afterStates.Select(s => GetStateInfo(s)));40 selectedState = afterStates.ElementAt(selectedIdx);41 38 return true; 42 39 } 43 40 44 private IBanditPolicyActionInfo GetStateInfo( ReadonlySequencestate) {41 private IBanditPolicyActionInfo GetStateInfo(string state) { 45 42 var s = CanonicalState(state); 46 43 IBanditPolicyActionInfo info; … … 52 49 } 53 50 54 public virtual void UpdateReward(IEnumerable< ReadonlySequence> stateTrajectory, double reward) {51 public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) { 55 52 // the last state could be terminal 56 53 var lastState = stateTrajectory.Last(); 57 if (lastState.IsTerminal) done.Add(CanonicalState(lastState)); 54 if (problem.Grammar.IsTerminal(lastState)) { 55 GetStateInfo(lastState).Disable(reward); 56 } 58 57 59 foreach (var state in stateTrajectory) { 58 // update remaining states 59 foreach (var state in stateTrajectory.Reverse().Skip(1)) { 60 60 GetStateInfo(state).UpdateReward(reward); 61 61 } … … 64 64 public virtual void Reset() { 65 65 stateInfo.Clear(); 66 done.Clear();66 //done.Clear(); 67 67 } 68 68 69 public int GetTries( ReadonlySequencestate) {69 public int GetTries(string state) { 70 70 var s = CanonicalState(state); 71 71 if (stateInfo.ContainsKey(s)) return stateInfo[s].Tries; … … 73 73 } 74 74 75 public double GetValue( ReadonlySequencestate) {75 public double GetValue(string state) { 76 76 var s = CanonicalState(state); 77 77 if (stateInfo.ContainsKey(s)) return stateInfo[s].Value; … … 79 79 } 80 80 81 protected string CanonicalState( ReadonlySequencestate) {81 protected string CanonicalState(string state) { 82 82 if (useCanonicalState) { 83 if ( state.IsTerminal)84 return problem.CanonicalRepresentation(state .ToString());83 if (problem.Grammar.IsTerminal(state)) 84 return problem.CanonicalRepresentation(state); 85 85 else { 86 86 // for non-terminal phrases make sure we don't disable canonical states that have not yet been fully explored … … 88 88 // then we are not allowed to disable rS (canonical of lllS) because rS might not have been fully explored 89 89 // solution: we disable the state rS4 90 return problem.CanonicalRepresentation(state .ToString()) + state.Length;90 return problem.CanonicalRepresentation(state) + state.Length; 91 91 } 92 92 } else 93 return state .ToString();93 return state; 94 94 } 95 95 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/GrammarPolicy.cs
r11770 r11793 8 8 9 9 namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies { 10 // stores: tries, avg reward and max reward for each state 10 // stores: tries, avg reward and max reward for each state (base class for RandomPolicy and TDPolicy 11 11 public abstract class GrammarPolicy : IGrammarPolicy { 12 12 protected Dictionary<string, double> avgReward; 13 13 protected Dictionary<string, int> tries; 14 14 protected Dictionary<string, double> maxReward; 15 pr ivatereadonly bool useCanonicalState;16 pr ivatereadonly IProblem problem;15 protected readonly bool useCanonicalState; 16 protected readonly IProblem problem; 17 17 18 p ublicGrammarPolicy(IProblem problem, bool useCanonicalState = false) {18 protected GrammarPolicy(IProblem problem, bool useCanonicalState = false) { 19 19 this.useCanonicalState = useCanonicalState; 20 20 this.problem = problem; … … 24 24 } 25 25 26 public abstract bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState);26 public abstract bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx); 27 27 28 public virtual void UpdateReward(IEnumerable< ReadonlySequence> stateTrajectory, double reward) {28 public virtual void UpdateReward(IEnumerable<string> stateTrajectory, double reward) { 29 29 foreach (var state in stateTrajectory) { 30 var s = CanonicalState(state .ToString());30 var s = CanonicalState(state); 31 31 32 32 if (!tries.ContainsKey(s)) tries.Add(s, 0); … … 47 47 } 48 48 49 public double AvgReward( ReadonlySequencestate) {50 var s = CanonicalState(state .ToString());49 public double AvgReward(string state) { 50 var s = CanonicalState(state); 51 51 if (avgReward.ContainsKey(s)) return avgReward[s]; 52 52 else return 0.0; 53 53 } 54 54 55 public double MaxReward( ReadonlySequencestate) {56 var s = CanonicalState(state .ToString());55 public double MaxReward(string state) { 56 var s = CanonicalState(state); 57 57 if (maxReward.ContainsKey(s)) return maxReward[s]; 58 58 else return 0.0; 59 59 } 60 60 61 public virtual int GetTries( ReadonlySequencestate) {62 var s = CanonicalState(state .ToString());61 public virtual int GetTries(string state) { 62 var s = CanonicalState(state); 63 63 if (tries.ContainsKey(s)) return tries[s]; 64 64 else return 0; 65 65 } 66 66 67 public virtual double GetValue( ReadonlySequencestate) {67 public virtual double GetValue(string state) { 68 68 return AvgReward(state); 69 69 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/IGrammarPolicy.cs
r11770 r11793 8 8 9 9 namespace HeuristicLab.Algorithms.Bandits.GrammarPolicies { 10 public interface IGrammarPolicy : IPolicy< ReadonlySequence> {10 public interface IGrammarPolicy : IPolicy<string> { 11 11 } 12 12 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/RandomPolicy.cs
r11770 r11793 13 13 } 14 14 15 public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {15 public override bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) { 16 16 // never fail => allows re-visits of terminal states 17 selectedState = afterStates.SelectRandom(random);17 selectedStateIdx = random.Next(afterStates.Count()); 18 18 return true; 19 19 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/GrammarPolicies/TDPolicy.cs
r11770 r11793 2 2 using System.Collections.Generic; 3 3 using System.Configuration; 4 using System.Diagnostics; 4 5 using System.Linq; 5 6 using System.Security.Policy; … … 7 8 using System.Threading; 8 9 using System.Threading.Tasks; 10 using HeuristicLab.Algorithms.Bandits.BanditPolicies; 9 11 using HeuristicLab.Common; 10 12 using HeuristicLab.Problems.GrammaticalOptimization; … … 15 17 private readonly HashSet<string> done; 16 18 private readonly Dictionary<string, double> v; 17 private EpsGreedyPolicy epsGreedy;19 private IGrammarPolicy epsGreedy; 18 20 19 21 public TDPolicy(IProblem problem, bool useCanonicalRepresentation = false) … … 21 23 this.done = new HashSet<string>(); 22 24 this.v = new Dictionary<string, double>(); 23 this.epsGreedy = new EpsGreedyPolicy(problem, useCanonicalRepresentation, 0.1);25 this.epsGreedy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.1), useCanonicalRepresentation); 24 26 } 25 27 26 public override bool TrySelect(Random random, ReadonlySequence curState, IEnumerable<ReadonlySequence> afterStates, out ReadonlySequence selectedState) {28 public override bool TrySelect(Random random, string curState, IEnumerable<string> afterStates, out int selectedStateIdx) { 27 29 // only select states that are not yet done 28 30 afterStates = afterStates.Where(a => !done.Contains(CanonicalState(a.ToString()))).ToArray(); 29 31 if (!afterStates.Any()) { 30 32 // fail because all follow states have already been visited => also disable the current state 31 done.Add(CanonicalState(curState .ToString()));32 selectedState = null;33 done.Add(CanonicalState(curState)); 34 selectedStateIdx = -1; 33 35 return false; 34 36 } 37 throw new NotImplementedException(); // TODO: remap indices of reduced action enumerable to indices of original enumerable 35 38 36 39 //return epsGreedy.TrySelect(random, curState, afterStates, out selectedState); 37 40 38 41 var bestQ = double.NegativeInfinity; 39 selectedState = null; 42 int idx = -1; 43 selectedStateIdx = -1; 40 44 foreach (var state in afterStates) { 45 idx++; 41 46 // try each state at least once 42 47 if (GetTries(state) == 0) { 43 selectedState = state;48 selectedStateIdx = idx; 44 49 return true; 45 50 } … … 47 52 if (q > bestQ) { 48 53 bestQ = q; 49 selectedState = state;54 selectedStateIdx = idx; 50 55 } 51 56 } 52 57 58 Debug.Assert(selectedStateIdx > -1); 53 59 return true; 54 60 } 55 61 56 private double V( ReadonlySequencestate) {57 var s = CanonicalState(state .ToString());62 private double V(string state) { 63 var s = CanonicalState(state); 58 64 if (v.ContainsKey(s)) return v[s]; 59 65 else return 0.0; 60 66 } 61 67 62 public override void UpdateReward(IEnumerable< ReadonlySequence> stateTrajectory, double reward) {68 public override void UpdateReward(IEnumerable<string> stateTrajectory, double reward) { 63 69 base.UpdateReward(stateTrajectory, reward); 64 70 epsGreedy.UpdateReward(stateTrajectory, reward); 65 71 // the last state could be terminal 66 72 var lastState = stateTrajectory.Last(); 67 if ( lastState.IsTerminal) done.Add(CanonicalState(lastState.ToString()));73 if (problem.Grammar.IsTerminal(lastState)) done.Add(CanonicalState(lastState)); 68 74 69 v[CanonicalState(lastState .ToString())] = V(lastState) + 1.0 / GetTries(lastState) * (reward - V(lastState));75 v[CanonicalState(lastState)] = V(lastState) + 1.0 / GetTries(lastState) * (reward - V(lastState)); 70 76 71 77 foreach (var p in stateTrajectory.Zip(stateTrajectory.Skip(1), Tuple.Create).Reverse()) { … … 73 79 var next = p.Item2; 74 80 75 v[CanonicalState(cur .ToString())] = V(cur) + 1.0 / GetTries(cur) * (V(next) - V(cur));81 v[CanonicalState(cur)] = V(cur) + 1.0 / GetTries(cur) * (V(next) - V(cur)); 76 82 //v[CanonicalState(cur.ToString())] = V(cur) + 0.1 * (V(next) - V(cur)); 77 83 } … … 79 85 } 80 86 81 public override double GetValue( ReadonlySequencestate) {87 public override double GetValue(string state) { 82 88 return V(state); 83 89 } 84 90 85 public void Reset() {91 public override void Reset() { 86 92 base.Reset(); 87 93 epsGreedy.Reset(); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/HeuristicLab.Algorithms.Bandits.csproj
r11770 r11793 66 66 <Compile Include="Bandits\IBandit.cs" /> 67 67 <Compile Include="Bandits\TruncatedNormalBandit.cs" /> 68 <Compile Include="GrammarPolicies\BoltzmanExplorationPolicy.cs" />69 68 <Compile Include="GrammarPolicies\GenericGrammarPolicy.cs"> 70 69 <SubType>Code</SubType> 71 70 </Compile> 71 <Compile Include="GrammarPolicies\RandomPolicy.cs"> 72 <SubType>Code</SubType> 73 </Compile> 72 74 <Compile Include="GrammarPolicies\TDPolicy.cs" /> 73 <Compile Include="GrammarPolicies\UCTPolicy.cs" />74 75 <Compile Include="GrammarPolicies\GrammarPolicy.cs" /> 75 <Compile Include="GrammarPolicies\EpsGreedyPolicy.cs" />76 <Compile Include="GrammarPolicies\GreedyPolicy.cs" />77 76 <Compile Include="GrammarPolicies\IGrammarPolicy.cs" /> 78 <Compile Include="GrammarPolicies\RandomNoResamplingPolicy.cs" />79 <Compile Include="GrammarPolicies\RandomPolicy.cs" />80 77 <Compile Include="IPolicy.cs" /> 81 78 <Compile Include="IBanditPolicy.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.Bandits/IPolicy.cs
r11770 r11793 11 11 // here we assume that a reward is only recieved at the end of the episode and the update is done only after an episode is complete 12 12 // we also assume that the policy can fail to select one of the followStates 13 public interface IPolicy< TState> {14 bool TrySelect(Random random, TState curState, IEnumerable<TState> afterStates, out TState selectedState); // selectedState \in afterStates13 public interface IPolicy<in TState> { 14 bool TrySelect(Random random, TState curState, IEnumerable<TState> afterStates, out int selectedStateIdx); // selectedState \in afterStates 15 15 16 16 // state-trajectory are the states of the episode, at the end we recieved the reward (only for the terminal state) -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/ExhaustiveBreadthFirstSearch.cs
r11732 r11793 42 42 43 43 char nt = phrase.FirstNonTerminal; 44 int ntIdx;45 44 46 45 var alts = grammar.GetAlternatives(nt); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/RandomSearch.cs
r11732 r11793 1 1 using System; 2 using System;3 using System.Collections.Generic;4 using System.Linq;5 using System.Text;6 2 using HeuristicLab.Problems.GrammaticalOptimization; 7 3 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Algorithms.GrammaticalOptimization/SequentialSearch.cs
r11792 r11793 24 24 // 3) Collect reward and update policy (feedback: state of visited rewards from step 2) 25 25 public class SequentialSearch { 26 // only for storing states so that it is not necessary to allocate new state strings whenever we select a follow state using the policy 27 private class TreeNode { 28 public int randomTries; 29 public string phrase; 30 public Sequence alternative; 31 public TreeNode[] children; 32 33 public TreeNode(string phrase, Sequence alternative) { 34 this.alternative = alternative; 35 this.phrase = phrase; 36 } 37 } 38 26 39 27 40 public event Action<string, double> FoundNewBestSolution; … … 34 47 private readonly IGrammarPolicy behaviourPolicy; 35 48 private readonly IGrammarPolicy greedyPolicy; 49 private TreeNode rootNode; 50 51 private int tries; 36 52 private int maxSearchDepth; 37 53 38 54 private double bestQuality; 39 55 private string bestPhrase; 40 private int tries; 41 private readonly List<ReadonlySequence> stateChain; 56 private readonly List<string> stateChain; 42 57 43 58 public SequentialSearch(IProblem problem, int maxLen, Random random, int randomTries, IGrammarPolicy behaviourPolicy) { … … 47 62 this.randomTries = randomTries; 48 63 this.behaviourPolicy = behaviourPolicy; 49 this.greedyPolicy = new GreedyPolicy(problem, false); 50 this.stateChain = new List<ReadonlySequence>(); 51 this.cache = new Dictionary<ReadonlySequence, ReadonlySequence[]>(); 64 this.greedyPolicy = new GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.0), false); 65 this.stateChain = new List<string>(); 52 66 } 53 67 54 68 public void Run(int maxIterations) { 55 69 bestQuality = double.MinValue; 56 //InitPolicies(problem.Grammar);57 70 Reset(); 58 71 … … 79 92 80 93 81 private ReadonlySequence SampleSentence(IGrammar grammar) {82 ReadonlySequence phrase;94 private Sequence SampleSentence(IGrammar grammar) { 95 Sequence phrase; 83 96 do { 84 97 stateChain.Clear(); 85 phrase = new ReadonlySequence(grammar.SentenceSymbol);98 phrase = new Sequence(rootNode.phrase); 86 99 //var startPhrase = new Sequence("a*b+c*d+e*f+E"); 87 100 } while (!Done() && !TryCompleteSentence(grammar, ref phrase)); … … 89 102 } 90 103 91 private bool TryCompleteSentence(IGrammar g, ref ReadonlySequence phrase) {104 private bool TryCompleteSentence(IGrammar g, ref Sequence phrase) { 92 105 if (phrase.Length > maxLen) throw new ArgumentException(); 93 106 if (g.MinPhraseLength(phrase) > maxLen) throw new ArgumentException(); 94 107 var curDepth = 0; 95 96 stateChain.Add(phrase); 108 var n = rootNode; 109 stateChain.Add(n.phrase); 110 97 111 while (!phrase.IsTerminal) { 98 112 99 var newPhrases = GenerateFollowStates(g, phrase); 100 101 throw new NotImplementedException(); // TODO: reintroduce random-trie checking once the tree of all states has been reintroduced 113 102 114 //if (n.randomTries < randomTries) { 103 115 // n.randomTries++; 104 // treeDepth = Math.Max(treeDepth, curDepth);105 // lastNode = n;106 // return g.CompleteSentenceRandomly(random, phrase, maxLen);116 // curDepth = Math.Max(curDepth, curDepth); 117 // g.CompleteSentenceRandomly(random, phrase, maxLen); 118 // return true; 107 119 //} else { 108 109 110 // => select using bandit policy 111 // failure means we simply restart 112 if (!behaviourPolicy.TrySelect(random, phrase, newPhrases, out phrase)) { 113 return false; 114 } 115 // } 116 stateChain.Add(phrase); 120 // => select using bandit policy 121 // failure means we simply restart 122 GenerateFollowStates(n); // creates child nodes for node n 123 124 int selectedChildIdx; 125 if (!behaviourPolicy.TrySelect(random, n.phrase, n.children.Select(ch => ch.phrase), out selectedChildIdx)) { 126 return false; 127 } 128 phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, n.children[selectedChildIdx].alternative); 129 130 // prepare for next iteration 131 n = n.children[selectedChildIdx]; 132 stateChain.Add(n.phrase); 117 133 curDepth++; 134 //} 118 135 } // while 119 136 … … 123 140 124 141 125 private readonly Dictionary<ReadonlySequence, ReadonlySequence[]> cache;126 private IEnumerable<ReadonlySequence> GenerateFollowStates(IGrammar g, ReadonlySequence phrase) {127 throw new NotImplementedException();128 // TODO: Replace caching by a tree of all states. tree is only used for easily retrieving the follow-states of a state129 ReadonlySequence[] follow;130 //if (!cache.TryGetValue(phrase, out follow)) {142 private IEnumerable<string> GenerateFollowStates(TreeNode n) { 143 // create children on the first visit 144 if (n.children == null) { 145 var g = problem.Grammar; 146 // tree is only used for easily retrieving the follow-states of a state 147 var phrase = new Sequence(n.phrase); 131 148 char nt = phrase.FirstNonTerminal; 132 149 … … 137 154 var alts = g.GetAlternatives(nt).Where(alt => g.MinPhraseLength(alt) <= maxLenOfReplacement); 138 155 139 follow = new ReadonlySequence[alts.Count()];156 var children = new TreeNode[alts.Count()]; 140 157 int idx = 0; 141 158 foreach (var alt in alts) { 142 159 var newPhrase = new Sequence(phrase); // clone 143 160 newPhrase.ReplaceAt(newPhrase.FirstNonTerminalIndex, 1, alt); 144 follow[idx++] = new ReadonlySequence(newPhrase);145 } 146 // cache[phrase] = follow;147 //}148 return follow;161 children[idx++] = new TreeNode(newPhrase.ToString(), alt); 162 } 163 n.children = children; 164 } 165 return n.children.Select(ch => ch.phrase); 149 166 } 150 167 … … 161 178 bestQuality = 0.0; 162 179 tries = 0; 163 cache.Clear();180 rootNode = new TreeNode(problem.Grammar.SentenceSymbol.ToString(), new ReadonlySequence("$")); 164 181 } 165 182 166 183 public bool Done() { 167 var g = problem.Grammar; 168 var startState = new ReadonlySequence(g.SentenceSymbol); 169 var follow = GenerateFollowStates(g, startState); 170 ReadonlySequence selectedState; 171 return !behaviourPolicy.TrySelect(random, startState, follow, out selectedState); 184 int selectedStateIdx; 185 return !behaviourPolicy.TrySelect(random, rootNode.phrase, GenerateFollowStates(rootNode), out selectedStateIdx); 172 186 } 173 187 … … 176 190 Console.WriteLine("depth: {0,5} tries: {1,5} best phrase {2,50} bestQ {3:F3}", maxSearchDepth, tries, bestPhrase, bestQuality); 177 191 178 // use greedy strategy to generate the currently prefered sentence 179 var phrase = new ReadonlySequence(problem.Grammar.SentenceSymbol); 192 // use behaviour strategy to generate the currently prefered sentence 180 193 var policy = behaviourPolicy; 181 while (!phrase.IsTerminal) { 194 195 var n = rootNode; 196 197 while (n != null) { 198 var phrase = n.phrase; 182 199 Console.ForegroundColor = ConsoleColor.White; 183 200 Console.WriteLine("{0,-30}", phrase); 184 var newPhrases = GenerateFollowStates(problem.Grammar, phrase);185 if ( !newPhrases.Any()) break;186 var values = newPhrases.Select(p => policy.GetValue(p));201 var children = n.children; 202 if (children == null || !children.Any()) break; 203 var values = children.Select(ch => policy.GetValue(ch.phrase)); 187 204 var maxValue = values.Max(); 188 205 if (maxValue == 0) maxValue = 1.0; 189 206 190 207 // write phrases 191 foreach (var p in newPhrases) {192 SetColorForValue(policy.GetValue( p) / maxValue);193 Console.Write(" {0,-4}", p.Subsequence(Math.Max(0, p.Length - 3), Math.Min(3, p.Length)));208 foreach (var ch in children) { 209 SetColorForValue(policy.GetValue(ch.phrase) / maxValue); 210 Console.Write(" {0,-4}", ch.phrase.Substring(Math.Max(0, ch.phrase.Length - 3), Math.Min(3, ch.phrase.Length))); 194 211 } 195 212 Console.WriteLine(); 196 213 197 214 // write values 198 foreach (var p in newPhrases) {199 SetColorForValue(policy.GetValue( p) / maxValue);200 Console.Write(" {0:F2}", policy.GetValue( p) * 10.0);215 foreach (var ch in children) { 216 SetColorForValue(policy.GetValue(ch.phrase) / maxValue); 217 Console.Write(" {0:F2}", policy.GetValue(ch.phrase) * 10.0); 201 218 } 202 219 Console.WriteLine(); 203 220 204 221 // write tries 205 foreach (var p in newPhrases) {206 SetColorForValue(policy.GetValue( p) / maxValue);207 Console.Write(" {0,4}", policy.GetTries( p));222 foreach (var ch in children) { 223 SetColorForValue(policy.GetValue(ch.phrase) / maxValue); 224 Console.Write(" {0,4}", policy.GetTries(ch.phrase)); 208 225 } 209 226 Console.WriteLine(); 210 if (!policy.TrySelect(random, phrase, newPhrases, out phrase)) { 227 int selectedChildIdx; 228 if (!policy.TrySelect(random, phrase, children.Select(ch => ch.phrase), out selectedChildIdx)) { 211 229 break; 212 230 } 231 n = n.children[selectedChildIdx]; 213 232 } 214 233 -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.Test/TestSequence.cs
r11730 r11793 49 49 var nt = s.FirstNonTerminal; 50 50 Assert.Fail(); 51 } 52 catch (IndexOutOfRangeException e) { 51 } catch (IndexOutOfRangeException e) { 53 52 } 54 53 } … … 201 200 s.ReplaceAt(0, 4, t); 202 201 Assert.Fail(); 203 } 204 catch (ArgumentException) { } 202 } catch (ArgumentException) { } 205 203 206 204 s.ReplaceAt(0, 2, t); // should work … … 209 207 s.ReplaceAt(0, 3, t); 210 208 Assert.Fail(); 211 } 212 catch (ArgumentException) { } 209 } catch (ArgumentException) { } 213 210 214 211 try { 215 212 s.ReplaceAt(1, 2, t); 216 213 Assert.Fail(); 217 } 218 catch (ArgumentException) { } 214 } catch (ArgumentException) { } 219 215 220 216 s.ReplaceAt(1, 1, new Sequence("A")); // should work … … 223 219 s.ReplaceAt(-1, 2, t); 224 220 Assert.Fail(); 225 } 226 catch (ArgumentException) { } 221 } catch (ArgumentException) { } 227 222 228 223 } … … 267 262 Assert.AreEqual("AA", sub.ToString()); 268 263 } 264 { 265 var s = new Sequence("aaaAA"); 266 var sub = s.Subsequence(2, 3); 267 268 Assert.AreEqual(1, sub.FirstNonTerminalIndex); 269 Assert.AreEqual("aAA", sub.ToString()); 270 } 271 } 272 273 [TestMethod] 274 public void TestReadonlySequence() { 275 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 276 { 277 var s = new ReadonlySequence("AAaaaAA"); 278 var sub = s.Subsequence(0, 7); 279 280 Assert.AreEqual(0, sub.FirstNonTerminalIndex); 281 Assert.AreEqual("AAaaaAA", sub.ToString()); 282 } 283 { 284 var s = new ReadonlySequence("AAaaaAA"); 285 var sub = s.Subsequence(0, 3); 286 287 Assert.AreEqual(0, sub.FirstNonTerminalIndex); 288 Assert.AreEqual("AAa", sub.ToString()); 289 290 sub = sub.Subsequence(1, 2); 291 Assert.AreEqual(0, sub.FirstNonTerminalIndex); 292 Assert.AreEqual("Aa", sub.ToString()); 293 } 294 { 295 var s = new ReadonlySequence("AAaaaAA"); 296 var sub = s.Subsequence(2, 3); 297 298 Assert.AreEqual(-1, sub.FirstNonTerminalIndex); 299 Assert.AreEqual("aaa", sub.ToString()); 300 } 301 { 302 var s = new ReadonlySequence("AAaaaAA"); 303 var sub = s.Subsequence(2, 4); 304 305 Assert.AreEqual(3, sub.FirstNonTerminalIndex); 306 Assert.AreEqual("aaaA", sub.ToString()); 307 } 308 { 309 var s = new ReadonlySequence("AAaaaAA"); 310 var sub = s.Subsequence(5, 2); 311 312 Assert.AreEqual(0, sub.FirstNonTerminalIndex); 313 Assert.AreEqual("AA", sub.ToString()); 314 } 315 { 316 var s = new ReadonlySequence("aaaAA"); 317 var sub = s.Subsequence(2, 3); 318 319 Assert.AreEqual(1, sub.FirstNonTerminalIndex); 320 Assert.AreEqual("aAA", sub.ToString()); 321 } 322 269 323 } 270 324 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/FindPhrasesProblem.cs
r11770 r11793 27 27 private readonly int numPhrases; 28 28 private readonly int phraseLen; 29 private readonly int numOptimalPhrases;30 private readonly int numDecoyPhrases;31 29 private readonly double correctReward; 32 30 private readonly double decoyReward; … … 122 120 + phrases.Intersect(decoyPhrases).Count() * decoyReward; 123 121 124 125 126 122 return reward; 127 123 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs
r11732 r11793 201 201 } 202 202 203 public bool IsTerminal(string phrase) { 204 // reverse because for our grammars and left-canonical derivation it is more likely that NTs occur near the end of the sequence 205 return phrase.Reverse().All(IsTerminal); 206 } 207 203 208 public bool IsNonTerminal(char symbol) { 204 209 return nonTerminalSymbols.Contains(symbol); -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/HardPalindromeProblem.cs
r11792 r11793 40 40 41 41 public string CanonicalRepresentation(string terminalPhrase) { 42 throw new NotImplementedException();43 42 return terminalPhrase; 44 43 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IGrammar.cs
r11730 r11793 23 23 24 24 bool IsTerminal(char symbol); 25 bool IsTerminal(string phrase); 25 26 bool IsNonTerminal(char symbol); 26 27 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/IProblem.cs
r11792 r11793 8 8 double BestKnownQuality(int maxLen); 9 9 IGrammar Grammar { get; } 10 double Evaluate( ReadonlySequencesentence);11 ReadonlySequence CanonicalRepresentation(ReadonlySequenceterminalPhrase);10 double Evaluate(string sentence); 11 string CanonicalRepresentation(string terminalPhrase); 12 12 } 13 13 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/PalindromeProblem.cs
r11792 r11793 81 81 82 82 public string CanonicalRepresentation(string terminalPhrase) { 83 throw new NotImplementedException();84 83 return terminalPhrase; 85 84 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/ReadonlySequence.cs
r11742 r11793 1 1 using System; 2 using System.Collections.Generic; 3 using System.Diagnostics; 4 using System.Linq; 5 using System.Text; 2 6 3 7 namespace HeuristicLab.Problems.GrammaticalOptimization { 4 8 public class ReadonlySequence : Sequence { 9 private int symbolsOffset = 0; // the sequence does not have to start with the first symbol of the symbols array (when we reuse these arrays) 10 11 // cloning constructor for readonly sequences 12 // does not allocate the symbols array of the base class 13 // instead: reuse the symbols array (both sequences are readonly) 14 private ReadonlySequence(ReadonlySequence original) 15 : base() { 16 base.symbols = original.symbols; 17 this.symbolsOffset = original.symbolsOffset; 18 } 5 19 public ReadonlySequence(string s) 6 20 : base(s, s.Length) { … … 19 33 } 20 34 35 public override char this[int idx] { 36 get { 37 return base[idx + symbolsOffset]; 38 } 39 set { 40 throw new NotSupportedException(); 41 } 42 } 43 44 public override IEnumerator<char> GetEnumerator() { 45 return symbols.Skip(symbolsOffset).Take(Length).GetEnumerator(); 46 } 47 48 public override string ToString() { 49 var sb = new StringBuilder(Length); 50 sb.Append(symbols, symbolsOffset, Length); 51 return sb.ToString(); 52 } 53 54 public new ReadonlySequence Subsequence(int startIdx, int len) { 55 if (startIdx < 0 || len < 0) throw new ArgumentException(); 56 if (startIdx >= this.Length) throw new ArgumentException(); 57 if (startIdx + len > this.Length) throw new ArgumentException(); 58 var subsequence = new ReadonlySequence(this) { symbolsOffset = startIdx + this.symbolsOffset, Length = len }; 59 60 61 if (FirstNonTerminalIndex < 0) { 62 subsequence.FirstNonTerminalIndex = -1; 63 } else if (FirstNonTerminalIndex < startIdx) { 64 // need to find first nt in subsequence 65 subsequence.FirstNonTerminalIndex = -1; 66 for (int i = 0; subsequence.FirstNonTerminalIndex == -1 && i < len; i++) { 67 if (subsequence[i] >= 'A' && subsequence[i] <= 'Z') subsequence.FirstNonTerminalIndex = i; 68 } 69 } else if (FirstNonTerminalIndex >= startIdx && FirstNonTerminalIndex < startIdx + len) { 70 subsequence.FirstNonTerminalIndex = FirstNonTerminalIndex - startIdx; 71 } else { 72 Debug.Assert(FirstNonTerminalIndex >= startIdx + len); 73 subsequence.FirstNonTerminalIndex = -1; 74 } 75 return subsequence; 76 } 77 21 78 public override bool Equals(object obj) { 22 var other = obj as Sequence;79 var other = obj as ReadonlySequence; 23 80 if (other == null) return false; 24 81 if (other.Length != this.Length) return false; -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalPairProblem.cs
r11742 r11793 17 17 public RoyalPairProblem() { 18 18 this.grammar = new Grammar(grammarString); 19 // TODO: allow configuration of the number of symbols 19 20 } 20 21 … … 35 36 36 37 public string CanonicalRepresentation(string terminalPhrase) { 38 throw new NotImplementedException(); 37 39 return terminalPhrase; 38 40 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalSymbolProblem.cs
r11742 r11793 17 17 public RoyalSymbolProblem() { 18 18 this.grammar = new Grammar(grammarString); 19 //TODO: allow configuration of the number of symbols 19 20 } 20 21 … … 31 32 // sentence must contain only terminal symbols, we are not checking if the sentence is syntactically valid here because it would be too slow! 32 33 Debug.Assert(sentence.Any(c => grammar.IsTerminal(c))); 33 return regex.Matches(sentence ).Count;34 return regex.Matches(sentence.ToString()).Count; 34 35 } 35 36 public string CanonicalRepresentation(string terminalPhrase) { 37 throw new NotImplementedException(); 36 38 return terminalPhrase; 37 39 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/RoyalTreeProblem.cs
r11742 r11793 30 30 } 31 31 public string CanonicalRepresentation(string terminalPhrase) { 32 throw new NotImplementedException(); 32 33 return terminalPhrase; 33 34 } -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/SantaFeAntProblem.cs
r11770 r11793 101 101 102 102 public string CanonicalRepresentation(string terminalPhrase) { 103 //return terminalPhrase;104 103 string oldPhrase; 105 104 do { -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Sequence.cs
r11742 r11793 22 22 private int len; 23 23 private int idxOfFirstNt; 24 pr ivate readonlychar[] symbols;24 protected char[] symbols; 25 25 26 public char this[int idx] {26 public virtual char this[int idx] { 27 27 get { return symbols[idx]; } 28 28 set { throw new NotSupportedException(); } … … 31 31 public int Length { 32 32 get { return len; } 33 pr ivateset { len = value; }33 protected set { len = value; } 34 34 } 35 35 … … 40 40 public int FirstNonTerminalIndex { 41 41 get { return idxOfFirstNt; } 42 protected set { idxOfFirstNt = value; } 42 43 } 43 44 44 45 public char FirstNonTerminal { 45 get { return symbols[idxOfFirstNt]; }46 get { return this[idxOfFirstNt]; } 46 47 } 47 48 … … 90 91 } 91 92 93 // empty constructor does not allocate the symbol array 94 protected Sequence() { } 95 92 96 public virtual void ReplaceAt(int position, int len, Sequence replacement) { 93 97 if (replacement == null) throw new ArgumentNullException(); … … 111 115 idxOfFirstNt = -1; 112 116 for (int i = startIdxOfRemainingPart; idxOfFirstNt == -1 && i < Length; i++) { 113 if ( symbols[i] >= 'A' && symbols[i] <= 'Z') idxOfFirstNt = i;117 if (this[i] >= 'A' && this[i] <= 'Z') idxOfFirstNt = i; 114 118 } 115 119 } 116 120 } 117 121 118 public IEnumerator<char> GetEnumerator() {122 public virtual IEnumerator<char> GetEnumerator() { 119 123 return symbols.AsEnumerable().Take(len).GetEnumerator(); 120 124 } … … 143 147 subsequence.idxOfFirstNt = -1; 144 148 for (int i = 0; subsequence.idxOfFirstNt == -1 && i < len; i++) { 145 if (subsequence .symbols[i] >= 'A' && subsequence.symbols[i] <= 'Z') subsequence.idxOfFirstNt = i;149 if (subsequence[i] >= 'A' && subsequence[i] <= 'Z') subsequence.idxOfFirstNt = i; 146 150 } 147 151 } else if (idxOfFirstNt >= startIdx && idxOfFirstNt < startIdx + len) { 148 subsequence.idxOfFirstNt = idxOfFirstNt ;152 subsequence.idxOfFirstNt = idxOfFirstNt - startIdx; 149 153 } else { 150 154 Debug.Assert(idxOfFirstNt >= startIdx + len); -
branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs
r11792 r11793 24 24 CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture; 25 25 26 RunDemo();26 //RunDemo(); 27 27 RunGridTest(); 28 28 } … … 191 191 // var problem = new FindPhrasesProblem(random, 15, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0.0, phrasesAsSets: true); 192 192 193 var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)193 //var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward) 194 194 // Ant 195 195 // good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01)); 196 196 // GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant 197 // very good results with: var alg = new SequentialSearch(problem, 17, random, 0, 198 // new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true)); 199 200 //var problem = new SantaFeAntProblem(); 197 198 var problem = new SantaFeAntProblem(); 201 199 //var problem = new SymbolicRegressionProblem("Tower"); 202 200 //var problem = new PalindromeProblem(); … … 207 205 //var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100)); 208 206 //var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1)); 209 var alg = new SequentialSearch(problem, 23, random, 0,210 new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new EpsGreedyPolicy(0.2), true));207 var alg = new SequentialSearch(problem, 17, random, 0, 208 new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericGrammarPolicy(problem, new BoltzmannExplorationPolicy(10), true)); 211 209 //var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null); 212 210 //var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
Note: See TracChangeset
for help on using the changeset viewer.