Context Navigation

source: branches/HeuristicLab.Problems.GrammaticalOptimization/Main/Program.cs @ 11730

Visit:

Last change on this file since 11730 was 11730, checked in by gkronber, 9 years ago
#2283: several major extensions for grammatical optimization
File size: 10.5 KB

Rev	Line
[11659]	1	using System;
	2	using System.Collections.Generic;
[11727]	3	using System.Data;
[11659]	4	using System.Diagnostics;
[11730]	5	using System.Globalization;
[11659]	6	using System.Linq;
	7	using System.Text;
[11727]	8	using System.Threading.Tasks;
	9	using HeuristicLab.Algorithms.Bandits;
[11730]	10	using HeuristicLab.Algorithms.Bandits.Models;
[11659]	11	using HeuristicLab.Algorithms.GrammaticalOptimization;
	12	using HeuristicLab.Problems.GrammaticalOptimization;
	13
	14	namespace Main {
	15	class Program {
	16	static void Main(string[] args) {
[11730]	17	CultureInfo.DefaultThreadCurrentCulture = CultureInfo.InvariantCulture;
	18
	19	RunDemo();
	20	//RunGridTest();
[11727]	21	}
	22
	23	private static void RunGridTest() {
[11730]	24	int maxIterations = 100000; // for poly-10 with 50000 evaluations no successful try with hl yet
	25	// var globalRandom = new Random(31415);
	26	var localRandSeed = 31415;
	27	var reps = 20;
	28
	29	var policyFactories = new Func<Random, int, IPolicy>[]
[11727]	30	{
[11730]	31	(rand, numActions) => new GaussianThompsonSamplingPolicy(rand, numActions),
	32	(rand, numActions) => new BernoulliThompsonSamplingPolicy(rand, numActions),
	33	(rand, numActions) => new RandomPolicy(rand, numActions),
	34	(rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.01),
	35	(rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.05),
	36	(rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.1),
	37	(rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.2),
	38	(rand, numActions) => new EpsGreedyPolicy(rand, numActions, 0.5),
	39	(rand, numActions) => new UCTPolicy(numActions, 0.1),
	40	(rand, numActions) => new UCTPolicy(numActions, 0.5),
	41	(rand, numActions) => new UCTPolicy(numActions, 1),
	42	(rand, numActions) => new UCTPolicy(numActions, 2),
	43	(rand, numActions) => new UCTPolicy(numActions, 5),
	44	(rand, numActions) => new UCTPolicy(numActions, 10),
	45	(rand, numActions) => new UCB1Policy(numActions),
	46	(rand, numActions) => new UCB1TunedPolicy(numActions),
	47	(rand, numActions) => new UCBNormalPolicy(numActions),
	48	(rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.1),
	49	(rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 0.5),
	50	(rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 1),
	51	(rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 5),
	52	(rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 10),
	53	(rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 20),
	54	(rand, numActions) => new BoltzmannExplorationPolicy(rand, numActions, 100),
	55	(rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.01),
	56	(rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.05),
	57	(rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.1),
	58	(rand, numActions) => new ChernoffIntervalEstimationPolicy(numActions, 0.2),
	59	(rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.01),
	60	(rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.05),
	61	(rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.1),
	62	(rand, numActions) => new ThresholdAscentPolicy(numActions, 10, 0.2),
	63	(rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.01),
	64	(rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.05),
	65	(rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.1),
	66	(rand, numActions) => new ThresholdAscentPolicy(numActions, 100, 0.2),
	67	(rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.01),
	68	(rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.05),
	69	(rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.1),
	70	(rand, numActions) => new ThresholdAscentPolicy(numActions, 1000, 0.2),
	71	(rand, numActions) => new ThresholdAscentPolicy(numActions, 5000, 0.01),
	72	(rand, numActions) => new ThresholdAscentPolicy(numActions, 10000, 0.01),
[11727]	73	};
	74
[11730]	75	var tasks = new List<Task>();
	76	foreach (var randomTries in new int[] { 1, 10, /* 5, 100 /, 500, 1000 /}) {
	77	foreach (var policyFactory in policyFactories) {
	78	var myPolicyFactory = policyFactory;
	79	var myRandomTries = randomTries;
	80	var localRand = new Random(localRandSeed);
	81	var options = new ParallelOptions();
	82	options.MaxDegreeOfParallelism = 1;
	83	Parallel.For(0, reps, options, (i) => {
	84	//var t = Task.Run(() => {
	85	Random myLocalRand;
	86	lock (localRand)
	87	myLocalRand = new Random(localRand.Next());
	88
	89	//for (int i = 0; i < reps; i++) {
	90
[11727]	91	int iterations = 0;
	92	var sw = new Stopwatch();
	93	var globalStatistics = new SentenceSetStatistics();
	94
[11730]	95	var problem = new SymbolicRegressionPoly10Problem();
	96	//var problem = new SantaFeAntProblem();
[11727]	97	//var problem = new PalindromeProblem();
	98	//var problem = new HardPalindromeProblem();
	99	//var problem = new RoyalPairProblem();
	100	//var problem = new EvenParityProblem();
[11730]	101	var alg = new MctsSampler(problem, 25, myLocalRand, myRandomTries, myPolicyFactory);
[11727]	102	//var alg = new ExhaustiveBreadthFirstSearch(problem, 25);
	103	//var alg = new AlternativesContextSampler(problem, 25);
	104
	105	alg.SolutionEvaluated += (sentence, quality) => {
	106	iterations++;
	107	globalStatistics.AddSentence(sentence, quality);
	108	if (iterations % 10000 == 0) {
[11730]	109	Console.WriteLine("{0,4} {1,7} {2,5} {3,25} {4}", alg.treeDepth, alg.treeSize, myRandomTries, myPolicyFactory(myLocalRand, 1), globalStatistics);
[11727]	110	}
	111	};
	112
	113	sw.Start();
	114
	115	alg.Run(maxIterations);
	116
	117	sw.Stop();
[11730]	118	//Console.WriteLine("{0,5} {1} {2}", randomTries, policyFactory(1), globalStatistics);
	119	//}
	120	//});
	121	//tasks.Add(t);
	122	});
	123	}
	124	}
	125	//Task.WaitAll(tasks.ToArray());
[11727]	126	}
	127
	128	private static void RunDemo() {
[11730]	129	// TODO: warum funktioniert die alte Implementierung von GaussianThompson besser für SantaFe als alte? Siehe Vergleich: alte vs. neue implementierung GaussianThompsonSampling
	130	// TODO: why does GaussianThompsonSampling work so well with MCTS for the artificial ant problem?
	131	// TODO: wie kann ich sampler noch vergleichen bzw. was kann man messen um die qualität des samplers abzuschätzen (bis auf qualität und iterationen bis zur besten lösung) => ziel schnellere iterationen zu gutem ergebnis
	132	// TODO: likelihood für R=1 bei Gaussian oder GaussianMixture einfach berechenbar?
	133	// TODO: research thompson sampling for max bandit?
	134	// TODO: ausführlicher test von strategien für k-armed max bandit
	135	// TODO: verify TA implementation using example from the original paper
	136	// TODO: reference HL.ProblemInstances and try on tower dataset
	137	// TODO: compare results for different policies also for the symb-reg problem
	138	// TODO: separate policy from MCTS tree data structure to allow sharing of information over disconnected parts of the tree (semantic equivalence)
	139	// TODO: implement thompson sampling for gaussian mixture models
	140	// TODO: implement inspection for MCTS (eventuell interactive command line für statistiken aus dem baum anzeigen)
	141	// TODO: implement ACO-style bandit policy
	142	// TODO: implement sequences that can be manipulated in-place (instead of strings), alternatives are also stored as sequences, for a sequence the index of the first NT-symb can be stored
	143	// TODO: gleichzeitige modellierung von transformierter zielvariable (y, 1/y, log(y), exp(y), sqrt(y), ...)
	144	// TODO: vergleich bei complete-randomly möglichst kurze sätze generieren vs. einfach zufällig alternativen wählen
	145	// TODO: reward discounting (für veränderliche reward distributions über zeit). speziellen unit-test dafür erstellen
[11727]	146
[11730]	147
[11690]	148	int maxIterations = 10000000;
[11659]	149	int iterations = 0;
	150	var sw = new Stopwatch();
	151	double bestQuality = 0;
	152	string bestSentence = "";
[11727]	153	var globalStatistics = new SentenceSetStatistics();
[11730]	154	var random = new Random();
[11659]	155
[11730]	156	//var problem = new SymbolicRegressionPoly10Problem();
[11727]	157	var problem = new SantaFeAntProblem();
	158	//var problem = new PalindromeProblem();
	159	//var problem = new HardPalindromeProblem();
	160	//var problem = new RoyalPairProblem();
	161	//var problem = new EvenParityProblem();
[11730]	162	//var alg = new MctsSampler(problem, 17, random, 1, (rand, numActions) => new GenericThompsonSamplingPolicy(rand, numActions, new GaussianModel(numActions, 0.5, 10)));
	163	//var alg = new ExhaustiveBreadthFirstSearch(problem, 17);
	164	//var alg = new AlternativesContextSampler(problem, random, 17, 4, (rand, numActions) => new RandomPolicy(rand, numActions));
	165	//var alg = new ExhaustiveDepthFirstSearch(problem, 17);
	166	// var alg = new AlternativesSampler(problem, 17);
	167	var alg = new RandomSearch(problem, random, 17);
[11659]	168
[11727]	169	alg.FoundNewBestSolution += (sentence, quality) => {
[11659]	170	bestQuality = quality;
	171	bestSentence = sentence;
	172	Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
	173	};
[11727]	174	alg.SolutionEvaluated += (sentence, quality) => {
[11659]	175	iterations++;
[11727]	176	globalStatistics.AddSentence(sentence, quality);
[11730]	177	if (iterations % 1000 == 0) {
	178	//alg.PrintStats();
	179	}
[11690]	180	if (iterations % 10000 == 0) {
[11727]	181	//Console.WriteLine("{0,10} {1,10:F5} {2,10:F5} {3}", iterations, bestQuality, quality, sentence);
[11730]	182	//Console.WriteLine("{0,4} {1,7} {2}", alg.treeDepth, alg.treeSize, globalStatistics);
	183	Console.WriteLine(globalStatistics);
[11659]	184	}
	185	};
	186
	187
	188	sw.Start();
	189
[11727]	190	alg.Run(maxIterations);
[11659]	191
	192	sw.Stop();
	193
	194	Console.WriteLine("{0,10} Best soultion: {1,10:F5} {2}", iterations, bestQuality, bestSentence);
	195	Console.WriteLine("{0:F2} sec {1,10:F1} sols/sec {2,10:F1} ns/sol",
	196	sw.Elapsed.TotalSeconds,
	197	maxIterations / (double)sw.Elapsed.TotalSeconds,
	198	(double)sw.ElapsedMilliseconds * 1000 / maxIterations);
	199	}
	200	}
	201	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Update cookies preferences