Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs @ 11895

Visit:

Last change on this file since 11895 was 11895, checked in by gkronber, 9 years ago
#2283: constant opt, expressioncompiler, autodiff, fixes in GP solvers
File size: 18.3 KB

Line
1	using System;
2	using System.Collections.Generic;
3	using System.Diagnostics;
4	using System.Globalization;
5	using System.Runtime.Remoting.Messaging;
6	using System.Text;
7	using System.Threading;
8	using System.Threading.Tasks;
9	using HeuristicLab.Algorithms.Bandits;
10	using HeuristicLab.Algorithms.Bandits.BanditPolicies;
11	using HeuristicLab.Algorithms.Bandits.GrammarPolicies;
12	using HeuristicLab.Algorithms.Bandits.Models;
13	using HeuristicLab.Algorithms.GeneticProgramming;
14	using HeuristicLab.Algorithms.GrammaticalOptimization;
15	using HeuristicLab.Problems.GrammaticalOptimization;
16	using HeuristicLab.Problems.GrammaticalOptimization.SymbReg;
17	using BoltzmannExplorationPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.BoltzmannExplorationPolicy;
18	using EpsGreedyPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.EpsGreedyPolicy;
19	using IProblem = HeuristicLab.Problems.GrammaticalOptimization.IProblem;
20	using RandomPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.RandomPolicy;
21	using UCTPolicy = HeuristicLab.Algorithms.Bandits.BanditPolicies.UCTPolicy;
22
23	namespace Main {
24	class Program {
25	static void Main(string[] args) {
26	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
27
28	//RunDemo();
29	RunGpDemo();
30	// RunGridTest();
31	//RunGpGridTest();
32	}
33
34	private static void RunGridTest() {
35	int maxIterations = 70000; // for poly-10 with 50000 evaluations no successful try with hl yet
36	//var globalRandom = new Random(31415);
37	var localRandSeed = 31415;
38	var reps = 30;
39
40	var policyFactories = new Func<IBanditPolicy>[]
41	{
42	() => new RandomPolicy(),
43	() => new ActiveLearningPolicy(),
44	() => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
45	() => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
46	() => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
47	() => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
48	//() => new GaussianThompsonSamplingPolicy(),
49	() => new GaussianThompsonSamplingPolicy(true),
50	() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
51	() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
52	//() => new BernoulliThompsonSamplingPolicy(),
53	() => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
54	() => new EpsGreedyPolicy(0.01),
55	() => new EpsGreedyPolicy(0.05),
56	() => new EpsGreedyPolicy(0.1),
57	() => new EpsGreedyPolicy(0.2),
58	() => new EpsGreedyPolicy(0.5),
59	() => new UCTPolicy(0.01),
60	() => new UCTPolicy(0.05),
61	() => new UCTPolicy(0.1),
62	() => new UCTPolicy(0.5),
63	() => new UCTPolicy(1),
64	() => new UCTPolicy(2),
65	() => new UCTPolicy( 5),
66	() => new UCTPolicy( 10),
67	() => new ModifiedUCTPolicy(0.01),
68	() => new ModifiedUCTPolicy(0.05),
69	() => new ModifiedUCTPolicy(0.1),
70	() => new ModifiedUCTPolicy(0.5),
71	() => new ModifiedUCTPolicy(1),
72	() => new ModifiedUCTPolicy(2),
73	() => new ModifiedUCTPolicy( 5),
74	() => new ModifiedUCTPolicy( 10),
75	() => new UCB1Policy(),
76	() => new UCB1TunedPolicy(),
77	() => new UCBNormalPolicy(),
78	() => new BoltzmannExplorationPolicy(1),
79	() => new BoltzmannExplorationPolicy(10),
80	() => new BoltzmannExplorationPolicy(20),
81	() => new BoltzmannExplorationPolicy(100),
82	() => new BoltzmannExplorationPolicy(200),
83	() => new BoltzmannExplorationPolicy(500),
84	() => new ChernoffIntervalEstimationPolicy( 0.01),
85	() => new ChernoffIntervalEstimationPolicy( 0.05),
86	() => new ChernoffIntervalEstimationPolicy( 0.1),
87	() => new ChernoffIntervalEstimationPolicy( 0.2),
88	() => new ThresholdAscentPolicy(5, 0.01),
89	() => new ThresholdAscentPolicy(5, 0.05),
90	() => new ThresholdAscentPolicy(5, 0.1),
91	() => new ThresholdAscentPolicy(5, 0.2),
92	() => new ThresholdAscentPolicy(10, 0.01),
93	() => new ThresholdAscentPolicy(10, 0.05),
94	() => new ThresholdAscentPolicy(10, 0.1),
95	() => new ThresholdAscentPolicy(10, 0.2),
96	() => new ThresholdAscentPolicy(50, 0.01),
97	() => new ThresholdAscentPolicy(50, 0.05),
98	() => new ThresholdAscentPolicy(50, 0.1),
99	() => new ThresholdAscentPolicy(50, 0.2),
100	() => new ThresholdAscentPolicy(100, 0.01),
101	() => new ThresholdAscentPolicy(100, 0.05),
102	() => new ThresholdAscentPolicy(100, 0.1),
103	() => new ThresholdAscentPolicy(100, 0.2),
104	() => new ThresholdAscentPolicy(500, 0.01),
105	() => new ThresholdAscentPolicy(500, 0.05),
106	() => new ThresholdAscentPolicy(500, 0.1),
107	() => new ThresholdAscentPolicy(500, 0.2),
108	//() => new ThresholdAscentPolicy(5000, 0.01),
109	//() => new ThresholdAscentPolicy(10000, 0.01),
110	};
111
112	var instanceFactories = new Func<Random, Tuple<IProblem, int>>[]
113	{
114	//(rand) => Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
115	//(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:false ), 15),
116	//(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:0, correctReward:1, decoyReward:0, phrasesAsSets:true ), 15),
117	//(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:false), 15),
118	//(rand) => Tuple.Create((IProblem)new FindPhrasesProblem(rand, 10, numPhrases:5, phraseLen:3, numOptimalPhrases:5, numDecoyPhrases:200, correctReward:1, decoyReward:0.5, phrasesAsSets:true), 15),
119	(rand) => Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23)
120	};
121
122	foreach (var instanceFactory in instanceFactories) {
123	foreach (var useCanonical in new bool[] { true /, false /}) {
124	foreach (var randomTries in new int[] { 0 /, 1, 10 /, /* 5, 100 /, 500, 1000 /}) {
125	foreach (var policyFactory in policyFactories) {
126	var myRandomTries = randomTries;
127	var localRand = new Random(localRandSeed);
128	var options = new ParallelOptions();
129	options.MaxDegreeOfParallelism = 4;
130	Parallel.For(0, reps, options, (i) => {
131	Random myLocalRand;
132	lock (localRand)
133	myLocalRand = new Random(localRand.Next());
134
135	int iterations = 0;
136	var globalStatistics = new SentenceSetStatistics();
137
138	// var problem = new SymbolicRegressionPoly10Problem();
139	// var problem = new SantaFeAntProblem();
140	//var problem = new PalindromeProblem();
141	//var problem = new HardPalindromeProblem();
142	//var problem = new RoyalPairProblem();
143	//var problem = new EvenParityProblem();
144	// var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy());
145	var instance = instanceFactory(myLocalRand);
146	var problem = instance.Item1;
147	var maxLen = instance.Item2;
148	//var alg = new SequentialSearch(problem, maxLen, myLocalRand, myRandomTries,
149	// new GenericGrammarPolicy(problem, policyFactory(), useCanonical));
150	var alg = new SequentialSearch(problem, maxLen, myLocalRand,
151	myRandomTries,
152	new GenericFunctionApproximationGrammarPolicy(problem,
153	useCanonical));
154	//var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
155	//var alg = new AlternativesContextSampler(problem, 25);
156
157	alg.SolutionEvaluated += (sentence, quality) => {
158	iterations++;
159	globalStatistics.AddSentence(sentence, quality);
160	if (iterations % 1000 == 0) {
161	Console.WriteLine("{0,3} {1,5} \"{2,25}\" {3} {4} {5}", i, myRandomTries, policyFactory(), useCanonical, problem.ToString(), globalStatistics);
162	}
163	};
164	alg.FoundNewBestSolution += (sentence, quality) => {
165	//Console.WriteLine("{0,5} {1,25} {2} {3}",
166	// myRandomTries, policyFactory(), useCanonical,
167	// globalStatistics);
168	};
169
170	alg.Run(maxIterations);
171	});
172	}
173	}
174	}
175	}
176	}
177
178	private static void RunDemo() {
179	// TODO: unify MCTS, TD and ContextMCTS Solvers (stateInfos)
180	// TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
181	// TODO: separate value function from policy
182	// TODO: warum funktioniert die alte Implementierung von GaussianThompson besser fÃŒr SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
183	// TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
184	// TODO: research thompson sampling for max bandit?
185	// TODO: verify TA implementation using example from the original paper
186	// TODO: implement thompson sampling for gaussian mixture models
187	// TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
188	// TODO: vergleich bei complete-randomly mÃ¶glichst kurze sÃ€tze generieren vs. einfach zufÃ€llig alternativen wÃ€hlen
189	// TODO: reward discounting (fÃŒr verÃ€nderliche reward distributions ÃŒber zeit). speziellen unit-test dafÃŒr erstellen
190	// TODO: constant optimization
191
192
193	int maxIterations = 1000000;
194	int iterations = 0;
195	var sw = new Stopwatch();
196
197	var globalStatistics = new SentenceSetStatistics();
198	var random = new Random();
199
200
201	//var problem = new RoyalSequenceProblem(random, 10, 30, 2, 1, 0);
202	// var phraseLen = 3;
203	// var numPhrases = 5;
204	// var problem = new RoyalPhraseSequenceProblem(random, 10, numPhrases, phraseLen: phraseLen, numCorrectPhrases: 1, correctReward: 1, incorrectReward: 0.0, phrasesAsSets: false);
205
206	//var phraseLen = 3;
207	//var numPhrases = 5;
208	//var problem = new FindPhrasesProblem(random, 10, numPhrases, phraseLen, numOptimalPhrases: numPhrases, numDecoyPhrases: 0, correctReward: 1.0, decoyReward: 0, phrasesAsSets: false);
209
210	// good results for symb-reg
211	// prev results: e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
212	// 2015 01 19: grid test with canonical states:
213	// - EpsGreedyPolicy(0.20,max)
214	// - GenericThompsonSamplingPolicy("")
215	// - UCTPolicy(0.10) (5 of 5 runs, 35000 iters avg.), 10 successful runs of 10 with rand-tries 0, bei 40000 iters 9 / 10, bei 30000 1 / 10
216	// 2015 01 22: symb-reg: grid test on find-phrases problem showed good results for UCB1TunedPolicy and SequentialSearch with canonical states
217	// - symb-reg: consistent results with UCB1Tuned. finds optimal solution in ~50k iters (new GenericGrammarPolicy(problem, new UCB1TunedPolicy(), true));
218	// 2015 01 23: grid test with canonical states:
219	// - UCTPolicy(0.10) und UCBNormalPolicy 10/10 optimale LÃ¶sungen bei max. 50k iters, etwas schlechter: generic-thompson with variable sigma und bolzmannexploration (100)
220
221
222	// good results for artificial ant:
223	// prev results:
224	// - var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
225	// - GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
226	// 2015 01 19: grid test with canonical states (non-canonical slightly worse)
227	// - ant: Threshold Ascent (best 100, 0.01; all variants relatively good)
228	// - ant: Policies where the variance has a large weight compared to the mean? (Gaussian(compatible), Gaussian with fixed variance, UCT with large c, alle TA)
229	// - ant: UCB1Tuned with canonical states also works very well for the artificial ant! constistent solutions in less than 10k iters
230
231	var problem = new SymbolicRegressionPoly10Problem();
232	//var problem = new SantaFeAntProblem();
233	//var problem = new SymbolicRegressionProblem(random, "Tower");
234	//var problem = new PalindromeProblem();
235	//var problem = new HardPalindromeProblem();
236	//var problem = new RoyalPairProblem();
237	//var problem = new EvenParityProblem();
238	// symbreg length = 11 q = 0.824522210419616
239	//var alg = new MctsSampler(problem, 23, random, 0, new BoltzmannExplorationPolicy(100));
240	//var alg = new MctsSampler(problem, 23, random, 0, new EpsGreedyPolicy(0.1));
241	//var alg = new SequentialSearch(problem, 23, random, 0,
242	// new HeuristicLab.Algorithms.Bandits.GrammarPolicies.QLearningGrammarPolicy(problem, new BoltzmannExplorationPolicy(10),
243	// 1, 1, true));
244	//var alg = new SequentialSearch(problem, 23, random, 0,
245	// new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericContextualGrammarPolicy(problem, new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)), true));
246	var alg = new SequentialSearch(problem, 23, random, 0,
247	new HeuristicLab.Algorithms.Bandits.GrammarPolicies.GenericFunctionApproximationGrammarPolicy(problem, true));
248	//var alg = new MctsQLearningSampler(problem, sentenceLen, random, 0, null);
249	//var alg = new MctsQLearningSampler(problem, 30, random, 0, new EpsGreedyPolicy(0.2));
250	//var alg = new MctsContextualSampler(problem, 23, random, 0); // must visit each canonical solution only once
251	//var alg = new TemporalDifferenceTreeSearchSampler(problem, 30, random, 1);
252	//var alg = new ExhaustiveBreadthFirstSearch(problem, 7);
253	//var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
254	//var alg = new ExhaustiveDepthFirstSearch(problem, 17);
255	// var alg = new AlternativesSampler(problem, 17);
256	// var alg = new RandomSearch(problem, random, 17);
257	//var alg = new ExhaustiveRandomFirstSearch(problem, random, 17);
258
259	alg.FoundNewBestSolution += (sentence, quality) => {
260	//Console.WriteLine("{0}", globalStatistics);
261	//Console.ReadLine();
262	};
263	alg.SolutionEvaluated += (sentence, quality) => {
264	iterations++;
265	globalStatistics.AddSentence(sentence, quality);
266
267	if (iterations % 1000 == 0) {
268	if (iterations % 10000 == 0) Console.Clear();
269	Console.SetCursorPosition(0, 0);
270	alg.PrintStats();
271	}
272
273	//Console.WriteLine(sentence);
274
275	//if (iterations % 10000 == 0) {
276	// Console.WriteLine("{0}", globalStatistics);
277	//}
278	};
279
280
281	sw.Start();
282
283	alg.Run(maxIterations);
284
285	sw.Stop();
286
287	Console.Clear();
288	alg.PrintStats();
289	Console.WriteLine(globalStatistics);
290	Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
291	sw.Elapsed.TotalSeconds,
292	maxIterations / (double)sw.Elapsed.TotalSeconds,
293	(double)sw.ElapsedMilliseconds * 1000 / maxIterations);
294	}
295
296	public static void RunGpDemo() {
297	int iterations = 0;
298	const int seed = 31415;
299	const int maxIterations = 100000;
300
301	//var prob = new SymbolicRegressionProblem(new Random(31415), "Tower");
302	var prob = new SymbolicRegressionPoly10Problem();
303	var sgp = new OffspringSelectionGP(prob, new Random(seed), true);
304	RunGP(sgp, prob, 200000, 500, 0.15, 50);
305	}
306
307
308	private static void RunGpGridTest() {
309	const int nReps = 20;
310	const int seed = 31415;
311	const int maxIters = 200000;
312	var rand = new Random(seed);
313	var problemFactories = new Func<ISymbolicExpressionTreeProblem>[]
314	{
315	() => new SymbolicRegressionPoly10Problem(),
316	() => new SantaFeAntProblem(),
317	};
318	foreach (var popSize in new int[] { 50, 100, 250, 500, 1000, 2500, 5000, 10000 }) {
319	foreach (var mutationRate in new double[] { /* 0.05, /* 0.10, / 0.15, / 0.25, 0.3 */}) {
320	foreach (var maxSize in new int[] { 30, 50, 100, 150, 250 }) {
321	// skip experiments that are already done
322	if (popSize == 10000 \|\| maxSize == 150 \|\| maxSize == 250) {
323	foreach (var problemFactory in problemFactories)
324	for (int i = 0; i < nReps; i++) {
325	var solverSeed = rand.Next();
326	{
327	var prob = problemFactory();
328	var sgp = new StandardGP(prob, new Random(solverSeed));
329	RunGP(sgp, prob, maxIters, popSize, mutationRate, maxSize);
330	}
331	// {
332	// var prob = problemFactory();
333	// var osgp = new OffspringSelectionGP(prob, new Random(solverSeed));
334	// RunGP(osgp, prob, maxIters, popSize, mutationRate, maxSize);
335	// }
336	}
337	}
338	}
339	}
340	}
341	}
342
343	private static void RunGP(IGPSolver gp, ISymbolicExpressionTreeProblem prob, int maxIters, int popSize, double mutationRate, int maxSize) {
344	int iterations = 0;
345	var globalStatistics = new SentenceSetStatistics(prob.BestKnownQuality(maxSize));
346	var gpName = gp.GetType().Name;
347	var probName = prob.GetType().Name;
348	gp.SolutionEvaluated += (sentence, quality) => {
349	iterations++;
350	globalStatistics.AddSentence(sentence, quality);
351
352	if (iterations % 1000 == 0) {
353	Console.WriteLine("\"{0,25}\" {1} {2:N2} {3} \"{4,25}\" {5}", gpName, popSize, mutationRate, maxSize, probName, globalStatistics);
354	}
355	};
356
357	gp.PopulationSize = popSize;
358	gp.MutationRate = mutationRate;
359	gp.MaxSolutionSize = maxSize + 2;
360	gp.MaxSolutionDepth = maxSize + 2;
361
362	gp.Run(maxIters);
363	}
364	}
365	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences