Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs @ 11745

Visit:

Last change on this file since 11745 was 11745, checked in by gkronber, 9 years ago
#2283: worked on contextual MCTS
File size: 10.8 KB

Rev	Line
[11659]	1	using System;
	2	using System.Collections.Generic;
[11727]	3	using System.Data;
[11659]	4	using System.Diagnostics;
[11730]	5	using System.Globalization;
[11659]	6	using System.Linq;
	7	using System.Text;
[11727]	8	using System.Threading.Tasks;
	9	using HeuristicLab.Algorithms.Bandits;
[11742]	10	using HeuristicLab.Algorithms.Bandits.BanditPolicies;
[11730]	11	using HeuristicLab.Algorithms.Bandits.Models;
[11659]	12	using HeuristicLab.Algorithms.GrammaticalOptimization;
	13	using HeuristicLab.Problems.GrammaticalOptimization;
[11732]	14	using HeuristicLab.Problems.GrammaticalOptimization.SymbReg;
[11659]	15
	16	namespace Main {
	17	class Program {
	18	static void Main(string[] args) {
[11730]	19	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
	20
	21	RunDemo();
	22	//RunGridTest();
[11727]	23	}
	24
	25	private static void RunGridTest() {
[11732]	26	int maxIterations = 200000; // for poly-10 with 50000 evaluations no successful try with hl yet
	27	//var globalRandom = new Random(31415);
[11730]	28	var localRandSeed = 31415;
[11742]	29	var reps = 8;
[11730]	30
[11742]	31	var policies = new Func<IBanditPolicy>[]
[11727]	32	{
[11742]	33	() => new EpsGreedyPolicy(0.01, (aInfo)=> aInfo.MaxReward, "max"),
	34	() => new EpsGreedyPolicy(0.05, (aInfo)=> aInfo.MaxReward, "max"),
	35	() => new EpsGreedyPolicy(0.1, (aInfo)=> aInfo.MaxReward, "max"),
	36	() => new EpsGreedyPolicy(0.2, (aInfo)=> aInfo.MaxReward, "max"),
	37	//() => new GaussianThompsonSamplingPolicy(),
[11732]	38	() => new GaussianThompsonSamplingPolicy(true),
[11742]	39	() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1)),
	40	() => new GenericThompsonSamplingPolicy(new GaussianModel(0.5, 10, 1, 1)),
	41	//() => new BernoulliThompsonSamplingPolicy(),
[11732]	42	() => new GenericThompsonSamplingPolicy(new BernoulliModel(1, 1)),
	43	() => new RandomPolicy(),
	44	() => new EpsGreedyPolicy(0.01),
	45	() => new EpsGreedyPolicy(0.05),
	46	() => new EpsGreedyPolicy(0.1),
	47	() => new EpsGreedyPolicy(0.2),
	48	() => new EpsGreedyPolicy(0.5),
	49	() => new UCTPolicy(0.1),
	50	() => new UCTPolicy(0.5),
	51	() => new UCTPolicy(1),
	52	() => new UCTPolicy(2),
	53	() => new UCTPolicy( 5),
	54	() => new UCTPolicy( 10),
	55	() => new UCB1Policy(),
	56	() => new UCB1TunedPolicy(),
	57	() => new UCBNormalPolicy(),
	58	() => new BoltzmannExplorationPolicy(0.1),
	59	() => new BoltzmannExplorationPolicy(0.5),
	60	() => new BoltzmannExplorationPolicy(1),
	61	() => new BoltzmannExplorationPolicy(5),
	62	() => new BoltzmannExplorationPolicy(10),
	63	() => new BoltzmannExplorationPolicy(20),
	64	() => new BoltzmannExplorationPolicy(100),
	65	() => new ChernoffIntervalEstimationPolicy( 0.01),
	66	() => new ChernoffIntervalEstimationPolicy( 0.05),
	67	() => new ChernoffIntervalEstimationPolicy( 0.1),
	68	() => new ChernoffIntervalEstimationPolicy( 0.2),
[11742]	69	() => new ThresholdAscentPolicy(10, 0.01),
	70	() => new ThresholdAscentPolicy(10, 0.05),
	71	() => new ThresholdAscentPolicy(10, 0.1),
	72	() => new ThresholdAscentPolicy(10, 0.2),
	73	() => new ThresholdAscentPolicy(100, 0.01),
	74	() => new ThresholdAscentPolicy(100, 0.05),
	75	() => new ThresholdAscentPolicy(100, 0.1),
	76	() => new ThresholdAscentPolicy(100, 0.2),
	77	() => new ThresholdAscentPolicy(1000, 0.01),
	78	() => new ThresholdAscentPolicy(1000, 0.05),
	79	() => new ThresholdAscentPolicy(1000, 0.1),
	80	() => new ThresholdAscentPolicy(1000, 0.2),
	81	() => new ThresholdAscentPolicy(5000, 0.01),
	82	() => new ThresholdAscentPolicy(10000, 0.01),
[11727]	83	};
	84
[11732]	85	foreach (var problem in new Tuple<IProblem, int>[]
	86	{
[11742]	87	//Tuple.Create((IProblem)new SantaFeAntProblem(), 17),
[11732]	88	Tuple.Create((IProblem)new SymbolicRegressionPoly10Problem(), 23),
	89	})
	90	foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /, 500, 1000 /}) {
	91	foreach (var policy in policies) {
	92	var myRandomTries = randomTries;
	93	var localRand = new Random(localRandSeed);
	94	var options = new ParallelOptions();
[11742]	95	options.MaxDegreeOfParallelism = 4;
[11732]	96	Parallel.For(0, reps, options, (i) => {
	97	//var t = Task.Run(() => {
	98	Random myLocalRand;
	99	lock (localRand)
	100	myLocalRand = new Random(localRand.Next());
[11730]	101
[11732]	102	//for (int i = 0; i < reps; i++) {
[11730]	103
[11732]	104	int iterations = 0;
	105	var globalStatistics = new SentenceSetStatistics();
[11727]	106
[11732]	107	// var problem = new SymbolicRegressionPoly10Problem();
	108	// var problem = new SantaFeAntProblem();
	109	//var problem = new PalindromeProblem();
	110	//var problem = new HardPalindromeProblem();
	111	//var problem = new RoyalPairProblem();
	112	//var problem = new EvenParityProblem();
	113	var alg = new MctsSampler(problem.Item1, problem.Item2, myLocalRand, myRandomTries, policy()); // TODO: Make sure we generate the same random numbers for each experiment
	114	//var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
	115	//var alg = new AlternativesContextSampler(problem, 25);
[11727]	116
[11732]	117	alg.SolutionEvaluated += (sentence, quality) => {
	118	iterations++;
	119	globalStatistics.AddSentence(sentence, quality);
	120	if (iterations % 10000 == 0) {
	121	Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, policy(), globalStatistics);
	122	}
	123	};
[11727]	124
	125
[11732]	126	alg.Run(maxIterations);
[11727]	127
[11732]	128	//Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
	129	//}
	130	//});
	131	//tasks.Add(t);
	132	});
	133	}
[11730]	134	}
	135	//Task.WaitAll(tasks.ToArray());
[11727]	136	}
	137
	138	private static void RunDemo() {
[11732]	139	// TODO: test with eps-greedy using max instead of average as value (seems to work well for symb-reg! explore further!)
	140	// TODO: separate value function from policy
	141	// TODO: in contextual MCTS store a bandit info for each node in the _graph_ and also update all bandit infos of all parents
	142	// TODO: exhaustive search with priority list
[11742]	143	// TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als neue? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
[11730]	144	// TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
	145	// TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
	146	// TODO: research thompson sampling for max bandit?
	147	// TODO: ausführlicher test von strategien für k-armed max bandit
[11732]	148	// TODO: verify TA implementation using example from the original paper
[11730]	149	// TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
	150	// TODO: implement thompson sampling for gaussian mixture models
	151	// TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen)
	152	// TODO: implement ACO-style bandit policy
	153	// TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
	154	// TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
	155	// TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen
[11732]	156	// TODO: constant optimization
[11727]	157
[11730]	158
[11732]	159	int maxIterations = 100000;
[11659]	160	int iterations = 0;
	161	var sw = new Stopwatch();
	162	double bestQuality = 0;
	163	string bestSentence = "";
[11727]	164	var globalStatistics = new SentenceSetStatistics();
[11730]	165	var random = new Random();
[11659]	166
[11742]	167	var problem = new SymbolicRegressionPoly10Problem(); // good results e.g. 10 randomtries and EpsGreedyPolicy(0.2, (aInfo)=>aInfo.MaxReward)
	168	// Ant
	169	// good results e.g. with var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new ThresholdAscentPolicy(numActions, 500, 0.01));
	170	// GaussianModelWithUnknownVariance (and Q= 0.99-quantil) also works well for Ant
[11745]	171	//var problem = new SantaFeAntProblem();
[11742]	172	//var problem = new SymbolicRegressionProblem("Tower");
[11727]	173	//var problem = new PalindromeProblem();
	174	//var problem = new HardPalindromeProblem();
	175	//var problem = new RoyalPairProblem();
	176	//var problem = new EvenParityProblem();
[11745]	177	//var alg = new MctsSampler(problem, 23, random, 0, new GaussianThompsonSamplingPolicy(true));
	178	var alg = new MctsContextualSampler(problem, 23, random, 0);
	179	//var alg = new TemporalDifferenceTreeSearchSampler(problem, 17, random, 10, new EpsGreedyPolicy(0.1));
[11730]	180	//var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
	181	//var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
	182	//var alg = new ExhaustiveDepthFirstSearch(problem, 17);
	183	// var alg = new AlternativesSampler(problem, 17);
[11732]	184	// var alg = new RandomSearch(problem, random, 17);
	185	// var alg = new ExhaustiveRandomFirstSearch(problem, random, 17);
[11659]	186
[11727]	187	alg.FoundNewBestSolution += (sentence, quality) => {
[11659]	188	bestQuality = quality;
	189	bestSentence = sentence;
[11745]	190	//Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
	191	//Console.ReadLine();
[11659]	192	};
[11727]	193	alg.SolutionEvaluated += (sentence, quality) => {
[11659]	194	iterations++;
[11727]	195	globalStatistics.AddSentence(sentence, quality);
[11744]	196	if (iterations % 100 == 0) {
[11745]	197	//if (iterations % 1000 == 0) Console.Clear();
	198	Console.SetCursorPosition(0, 0);
[11732]	199	alg.PrintStats();
[11730]	200	}
[11745]	201
[11690]	202	if (iterations % 10000 == 0) {
[11727]	203	//Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
[11730]	204	//Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
[11745]	205	//Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
[11659]	206	}
	207	};
	208
	209
	210	sw.Start();
	211
[11727]	212	alg.Run(maxIterations);
[11659]	213
	214	sw.Stop();
	215
	216	Console.WriteLine("{0,10} Best soultion: {1,10:F5} {2}", iterations, bestQuality, bestSentence);
	217	Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
	218	sw.Elapsed.TotalSeconds,
	219	maxIterations / (double)sw.Elapsed.TotalSeconds,
	220	(double)sw.ElapsedMilliseconds * 1000 / maxIterations);
	221	}
	222	}
	223	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences