Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs @ 11869

Visit:

Last change on this file since 11869 was 11865, checked in by gkronber, 10 years ago
#2283: implemented royal tree problem and grid test for tree-based gp variants
File size: 19.1 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Globalization;
5	using System.Text;
6	using System.Threading;
7	using System.Threading.Tasks;
8	using HeuristicLab.Algorithms.Bandits;
9	using HeuristicLab.Algorithms.Bandits.BanditPolicies;
10	using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
11	using HeuristicLab.Algorithms.Bandits.Models;
12	using HeuristicLab.Algorithms.GeneticProgramming;
13	using HeuristicLab.Algorithms.GrammaticalOptimization;
14	using HeuristicLab.Problems.GrammaticalOptimization;
15	using BoltzmannExplorationPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.BoltzmannExplorationPolicy;
16	using EpsGreedyPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.EpsGreedyPolicy;
17	using IProblem = HeuristicLab.Problems.GrammaticalOptimization.IProblem;
18	using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy;
19	using UCTPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.UCTPolicy;
20
21	namespace Main {
22	class Program {
23	static void Main(string[] args) {
24	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
25
26	//RunDemo();
27	//RunGpDemo();
28	// RunGridTest();
29	RunGpGridTest();
30	}
31
32	private static void RunGridTest() {
33	int maxIterations = 70000; // for poly-10 with 50000 evaluations no successful try with hl yet
34	//var globalRandom = new Random(31415);
35	var localRandSeed = 31415;
36	var reps = 30;
37
38	var policyFactories = new Func<IBanditPolicy>[]
39	{
40	() => new RandomPolicy(),
41	() => new ActiveLearningPolicy(),
42	() => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
43	() => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
44	() => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
45	() => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
46	//() => new GaussianThompsonSamplingPolicy(),
47	() => new GaussianThompsonSamplingPolicy(true),
48	() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
49	() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
50	//() => new BernoulliThompsonSamplingPolicy(),
51	() => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
52	() => new EpsGreedyPolicy(0.01),
53	() => new EpsGreedyPolicy(0.05),
54	() => new EpsGreedyPolicy(0.1),
55	() => new EpsGreedyPolicy(0.2),
56	() => new EpsGreedyPolicy(0.5),
57	() => new UCTPolicy(0.01),
58	() => new UCTPolicy(0.05),
59	() => new UCTPolicy(0.1),
60	() => new UCTPolicy(0.5),
61	() => new UCTPolicy(1),
62	() => new UCTPolicy(2),
63	() => new UCTPolicy( 5),
64	() => new UCTPolicy( 10),
65	() => new ModifiedUCTPolicy(0.01),
66	() => new ModifiedUCTPolicy(0.05),
67	() => new ModifiedUCTPolicy(0.1),
68	() => new ModifiedUCTPolicy(0.5),
69	() => new ModifiedUCTPolicy(1),
70	() => new ModifiedUCTPolicy(2),
71	() => new ModifiedUCTPolicy( 5),
72	() => new ModifiedUCTPolicy( 10),
73	() => new UCB1Policy(),
74	() => new UCB1TunedPolicy(),
75	() => new UCBNormalPolicy(),
76	() => new BoltzmannExplorationPolicy(1),
77	() => new BoltzmannExplorationPolicy(10),
78	() => new BoltzmannExplorationPolicy(20),
79	() => new BoltzmannExplorationPolicy(100),
80	() => new BoltzmannExplorationPolicy(200),
81	() => new BoltzmannExplorationPolicy(500),
82	() => new ChernoffIntervalEstimationPolicy( 0.01),
83	() => new ChernoffIntervalEstimationPolicy( 0.05),
84	() => new ChernoffIntervalEstimationPolicy( 0.1),
85	() => new ChernoffIntervalEstimationPolicy( 0.2),
86	() => new ThresholdAscentPolicy(5, 0.01),
87	() => new ThresholdAscentPolicy(5, 0.05),
88	() => new ThresholdAscentPolicy(5, 0.1),
89	() => new ThresholdAscentPolicy(5, 0.2),
90	() => new ThresholdAscentPolicy(10, 0.01),
91	() => new ThresholdAscentPolicy(10, 0.05),
92	() => new ThresholdAscentPolicy(10, 0.1),
93	() => new ThresholdAscentPolicy(10, 0.2),
94	() => new ThresholdAscentPolicy(50, 0.01),
95	() => new ThresholdAscentPolicy(50, 0.05),
96	() => new ThresholdAscentPolicy(50, 0.1),
97	() => new ThresholdAscentPolicy(50, 0.2),
98	() => new ThresholdAscentPolicy(100, 0.01),
99	() => new ThresholdAscentPolicy(100, 0.05),
100	() => new ThresholdAscentPolicy(100, 0.1),
101	() => new ThresholdAscentPolicy(100, 0.2),
102	() => new ThresholdAscentPolicy(500, 0.01),
103	() => new ThresholdAscentPolicy(500, 0.05),
104	() => new ThresholdAscentPolicy(500, 0.1),
105	() => new ThresholdAscentPolicy(500, 0.2),
106	//() => new ThresholdAscentPolicy(5000, 0.01),
107	//() => new ThresholdAscentPolicy(10000, 0.01),
108	};
109
110	var instanceFactories = new Func<Random, Tuple<IProblem, int>>[]
111	{
112	//(rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
113	//(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15),
114	//(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15),
115	//(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15),
116	//(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15),
117	(rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23)
118	};
119
120	foreach (var instanceFactory in instanceFactories) {
121	foreach (var useCanonical in new bool[] { true /, false /}) {
122	foreach (var randomTries in new int[] { 0 /, 1, 10 /, /* 5, 100 /, 500, 1000 /}) {
123	foreach (var policyFactory in policyFactories) {
124	var myRandomTries = randomTries;
125	var localRand = new Random(localRandSeed);
126	var options = new ParallelOptions();
127	options.MaxDegreeOfParallelism = 4;
128	Parallel.For(0, reps, options, (i) => {
129	Random myLocalRand;
130	lock (localRand)
131	myLocalRand = new Random(localRand.Next());
132
133	int iterations = 0;
134	var globalStatistics = new SentenceSetStatistics();
135
136	// var problem = new SymbolicRegressionPoly10Problem();
137	// var problem = new SantaFeAntProblem();
138	//var problem = new PalindromeProblem();
139	//var problem = new HardPalindromeProblem();
140	//var problem = new RoyalPairProblem();
141	//var problem = new EvenParityProblem();
142	// var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy());
143	var instance = instanceFactory(myLocalRand);
144	var problem = instance.Item1;
145	var maxLen = instance.Item2;
146	//var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries,
147	// new GenericGrammarPolicy(problem, policyFactory(), useCanonical));
148	var alg = new SequentialSearch(problem, maxLen, myLocalRand,
149	myRandomTries,
150	new GenericFunctionApproximationGrammarPolicy(problem,
151	useCanonical));
152	//var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
153	//var alg = new AlternativesContextSampler(problem, 25);
154
155	alg.SolutionEvaluated += (sentence, quality) => {
156	iterations++;
157	globalStatistics.AddSentence(sentence, quality);
158	if (iterations % 1000 == 0) {
159	Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4} {5}", i, myRandomTries, policyFactory(), useCanonical, problem.ToString(), globalStatistics);
160	}
161	};
162	alg.FoundNewBestSolution += (sentence, quality) => {
163	//Console.WriteLine("{0,5} {1,25} {2} {3}",
164	// myRandomTries, policyFactory(), useCanonical,
165	// globalStatistics);
166	};
167
168	alg.Run(maxIterations);
169	});
170	}
171	}
172	}
173	}
174	}
175
176	private static void RunDemo() {
177	// TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos)
178	// TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
179	// TODO: separate value function from policy
180	// TODO: warum funktioniert die alte Implementierung von GaussianThompson besser fÃŒr SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
181	// TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
182	// TODO: research thompson sampling for max bandit?
183	// TODO: verify TA implementation using example from the original paper
184	// TODO: implement thompson sampling for gaussian mixture models
185	// TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
186	// TODO: vergleich bei complete-randomly mÃ¶glichst kurze sÃ€tze generieren vs. einfach zufÃ€llig alternativen wÃ€hlen
187	// TODO: reward discounting (fÃŒr verÃ€nderliche reward distributions ÃŒber zeit). speziellen unit-test dafÃŒr erstellen
188	// TODO: constant optimization
189
190
191	int maxIterations = 1000000;
192	int iterations = 0;
193	var sw = new Stopwatch();
194
195	var globalStatistics = new SentenceSetStatistics();
196	var random = new Random();
197
198
199	//var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
200	// var phraseLen = 3;
201	// var numPhrases = 5;
202	// var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: false);
203
204	//var phraseLen = 3;
205	//var numPhrases = 5;
206	//var problem = new FindPhrasesProblem(random, 10, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0, phrasesAsSets: false);
207
208	// good results for symb-reg
209	// prev results: e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
210	// 2015 01 19: grid test with canonical states:
211	// - EpsGreedyPolicy(0.20,max)
212	// - GenericThompsonSamplingPolicy("")
213	// - UCTPolicy(0.10) (5 of 5 runs, 35000 iters avg.), 10 successful runs of 10 with rand-tries 0, bei 40000 iters 9 / 10, bei 30000 1 / 10
214	// 2015 01 22: symb-reg: grid test on find-phrases problem showed good results for UCB1TunedPolicy and SequentialSearch with canonical states
215	// - symb-reg: consistent results with UCB1Tuned. finds optimal solution in ~50k iters (new GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
216	// 2015 01 23: grid test with canonical states:
217	// - UCTPolicy(0.10) und UCBNormalPolicy 10/10 optimale LÃ¶sungen bei max. 50k iters, etwas schlechter: generic-thompson with variable sigma und bolzmannexploration (100)
218
219
220	// good results for artificial ant:
221	// prev results:
222	// - var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
223	// - GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
224	// 2015 01 19: grid test with canonical states (non-canonical slightly worse)
225	// - ant: Threshold Ascent (best 100, 0.01; all variants relatively good)
226	// - ant: Policies where the variance has a large weight compared to the mean? (Gaussian(compatible), Gaussian with fixed variance, UCT with large c, alle TA)
227	// - ant: UCB1Tuned with canonical states also works very well for the artificial ant! constistent solutions in less than 10k iters
228
229	var problem = new SymbolicRegressionPoly10Problem();
230	//var problem = new SantaFeAntProblem();
231	//var problem = new SymbolicRegressionProblem(random, "Tower");
232	//var problem = new PalindromeProblem();
233	//var problem = new HardPalindromeProblem();
234	//var problem = new RoyalPairProblem();
235	//var problem = new EvenParityProblem();
236	// symbreg length = 11 q = 0.824522210419616
237	//var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
238	//var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
239	//var alg = new SequentialSearch(problem, 23, random, 0,
240	// new HeuristicLab.Algorithms.Bandits.GrammarPolicies.QLearningGrammarPolicy(problem, new BoltzmannExplorationPolicy(10),
241	// 1, 1, true));
242	//var alg = new SequentialSearch(problem, 23, random, 0,
243	// new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericContextualGrammarPolicy(problem, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), true));
244	var alg = new SequentialSearch(problem, 23, random, 0,
245	new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericFunctionApproximationGrammarPolicy(problem, true));
246	//var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
247	//var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
248	//var alg = new MctsContextualSampler(problem, 23, random, 0); // must visit each canonical solution only once
249	//var alg = new TemporalDifferenceTreeSearchSampler(problem, 30, random, 1);
250	//var alg = new ExhaustiveBreadthFirstSearch(problem, 7);
251	//var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
252	//var alg = new ExhaustiveDepthFirstSearch(problem, 17);
253	// var alg = new AlternativesSampler(problem, 17);
254	// var alg = new RandomSearch(problem, random, 17);
255	//var alg = new ExhaustiveRandomFirstSearch(problem, random, 17);
256
257	alg.FoundNewBestSolution += (sentence, quality) => {
258	//Console.WriteLine("{0}", globalStatistics);
259	//Console.ReadLine();
260	};
261	alg.SolutionEvaluated += (sentence, quality) => {
262	iterations++;
263	globalStatistics.AddSentence(sentence, quality);
264
265	if (iterations % 1000 == 0) {
266	if (iterations % 10000 == 0) Console.Clear();
267	Console.SetCursorPosition(0, 0);
268	alg.PrintStats();
269	}
270
271	//Console.WriteLine(sentence);
272
273	//if (iterations % 10000 == 0) {
274	// Console.WriteLine("{0}", globalStatistics);
275	//}
276	};
277
278
279	sw.Start();
280
281	alg.Run(maxIterations);
282
283	sw.Stop();
284
285	Console.Clear();
286	alg.PrintStats();
287	Console.WriteLine(globalStatistics);
288	Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
289	sw.Elapsed.TotalSeconds,
290	maxIterations / (double)sw.Elapsed.TotalSeconds,
291	(double)sw.ElapsedMilliseconds * 1000 / maxIterations);
292	}
293
294	public static void RunGpDemo() {
295	int iterations = 0;
296	const int seed = 31415;
297	const int maxIterations = 100000;
298
299	}
300
301
302	private static void RunGpGridTest() {
303	const int nReps = 20;
304	const int seed = 31415;
305	const int maxIters = 100000;
306	var rand = new Random(seed);
307	var problemFactories = new Func<ISymbolicExpressionTreeProblem>[]
308	{
309	() => new SantaFeAntProblem(),
310	() => new SymbolicRegressionPoly10Problem(),
311	};
312	foreach (var popSize in new int[] { 50, 100, 250, 500, 1000, 2500, 5000 }) {
313	foreach (var mutationRate in new double[] {/* 0.05, 0.10, / 0.15, / 0.25, 0.3 */ }) {
314	foreach (var maxSize in new int[] { 30, 50, 100 }) {
315	foreach (var problemFactory in problemFactories)
316	for (int i = 0; i < nReps; i++) {
317	var solverSeed = rand.Next();
318	{
319	var prob = problemFactory();
320	RunStandardGP(prob, solverSeed, maxIters, popSize, mutationRate, maxSize);
321	}
322	{
323	var prob = problemFactory();
324	RunOSGP(prob, solverSeed, maxIters, popSize, mutationRate, maxSize);
325	}
326	}
327	}
328	}
329	}
330	}
331
332	private static void RunStandardGP(ISymbolicExpressionTreeProblem prob, int solverSeed, int maxIters, int popSize, double mutationRate, int maxSize) {
333	int iterations = 0;
334	var globalStatistics = new SentenceSetStatistics(prob.BestKnownQuality(maxSize));
335
336	var gp = new StandardGP(prob, new Random(solverSeed));
337	gp.SolutionEvaluated += (sentence, quality) => {
338	iterations++;
339	globalStatistics.AddSentence(sentence, quality);
340
341	if (iterations % 10000 == 0) {
342	Console.WriteLine("\"{0,25}\" \"{1,25}\" {2}", gp, prob, globalStatistics);
343	}
344	};
345
346	gp.PopulationSize = popSize;
347	gp.MutationRate = mutationRate;
348	gp.MaxSolutionSize = maxSize + 2;
349	gp.MaxSolutionDepth = maxSize + 2;
350
351	var sw = new Stopwatch();
352
353	sw.Start();
354	gp.Run(maxIters);
355	sw.Stop();
356
357	Console.WriteLine("\"{0,25}\" \"{1,25}\" {2}", gp, prob, globalStatistics);
358
359	// Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
360	// sw.Elapsed.TotalSeconds,
361	// maxIters / (double)sw.Elapsed.TotalSeconds,
362	// (double)sw.ElapsedMilliseconds * 1000 / maxIters);
363	}
364
365	private static void RunOSGP(ISymbolicExpressionTreeProblem prob, int solverSeed, int maxIters, int popSize, double mutationRate, int maxSize) {
366	int iterations = 0;
367	var globalStatistics = new SentenceSetStatistics(prob.BestKnownQuality(maxSize));
368
369	var gp = new OffspringSelectionGP(prob, new Random(solverSeed));
370	gp.SolutionEvaluated += (sentence, quality) => {
371	iterations++;
372	globalStatistics.AddSentence(sentence, quality);
373
374	if (iterations % 10000 == 0) {
375	Console.WriteLine("\"{0,25}\" \"{1,25}\" {2}", gp, prob, globalStatistics);
376	}
377	};
378
379	gp.PopulationSize = popSize;
380	gp.MutationRate = mutationRate;
381	gp.MaxSolutionSize = maxSize + 2;
382	gp.MaxSolutionDepth = maxSize + 2;
383
384	var sw = new Stopwatch();
385
386	sw.Start();
387	gp.Run(maxIters);
388	sw.Stop();
389
390	Console.WriteLine("\"{0,25}\" \"{1,25}\" {2}", gp, prob, globalStatistics);
391
392	// Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
393	// sw.Elapsed.TotalSeconds,
394	// maxIters / (double)sw.Elapsed.TotalSeconds,
395	// (double)sw.ElapsedMilliseconds * 1000 / maxIters);
396	}
397	}
398	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences