Changeset 15723
- Timestamp:
- 02/06/18 13:18:31 (7 years ago)
- Location:
- branches/2886_SymRegGrammarEnumeration
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs
r15722 r15723 59 59 60 60 61 #region Production ruless 61 #region Production rules 62 // order of production is important, since they are accessed via index 63 // in memoization. 62 64 StartSymbol = Expr; 63 65 … … 89 91 } 90 92 93 /* 94 #region Memoize subtrees 95 96 public void MemoizeSubtrees(SymbolString sentence) { 97 Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>()); 98 99 // Parse root symbol "+" 100 MemoizeSubtreeExpression(parseStack); 101 } 102 103 private SymbolString MemoizeSubtreeExpression(Stack<TerminalSymbol> parseStack) { 104 SymbolString subtree = new SymbolString(); 105 106 if (ReferenceEquals(parseStack.Peek(), Addition)) { 107 subtree.Add(parseStack.Pop()); 108 subtree.InsertRange(0, MemoizeSubtreeExpression(parseStack)); 109 subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack)); 110 111 Expr.Alternatives[0].GeneratedSentences.Add(subtree); 112 } else { 113 subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack)); 114 115 Expr.Alternatives[1].GeneratedSentences.Add(subtree); 116 } 117 118 return subtree; 119 } 120 121 private SymbolString MemoizeSubtreeTerm(Stack<TerminalSymbol> parseStack) { 122 SymbolString subtree = new SymbolString(); 123 124 if (ReferenceEquals(parseStack.Peek(), Multiplication)) { 125 subtree.Add(parseStack.Pop()); 126 subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack)); 127 subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack)); 128 129 Term.Alternatives[0].GeneratedSentences.Add(subtree); 130 } else { 131 subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack)); 132 133 Term.Alternatives[1].GeneratedSentences.Add(subtree); 134 } 135 136 return subtree; 137 } 138 139 private SymbolString MemoizeSubtreeFactor(Stack<TerminalSymbol> parseStack) { 140 SymbolString subtree = new SymbolString(MemoizeSubtreeVar(parseStack)); 141 142 Factor.Alternatives[0].GeneratedSentences.Add(subtree); 143 return subtree; 144 } 145 146 private SymbolString MemoizeSubtreeVar(Stack<TerminalSymbol> parseStack) { 147 SymbolString subtree = new SymbolString(parseStack.Pop().ToEnumerable()); 148 149 // ... not really 150 //Var.Alternatives[0].GeneratedSentences.Add(subtree); 151 return subtree; 152 } 153 154 155 #endregion 156 */ 157 158 #region Hashing 91 159 public int CalcHashCode(SymbolString sentence) { 92 160 Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); … … 99 167 } 100 168 101 #region Hashing102 169 private int[] GetSubtreeHashes(TerminalSymbol currentSymbol, Stack<TerminalSymbol> parseStack) { 103 170 List<int> childHashes = null; … … 166 233 return childHashes.ToArray(); 167 234 } 168 169 235 170 236 private int AggregateHashes(IEnumerable<int> hashes) { -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
r15722 r15723 1 using System; 2 using System.Collections.Generic; 3 using System.Collections.ObjectModel; 4 using System.Diagnostics; 1 using System.Collections.Generic; 5 2 using System.Linq; 6 3 using System.Threading; … … 23 20 public class GrammarEnumerationAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> { 24 21 private readonly string BestTrainingSolution = "Best solution (training)"; 22 private readonly string BestTrainingSolutionString = "Best solution string (training)"; 25 23 private readonly string BestTrainingSolutionQuality = "Best solution quality (training)"; 26 24 private readonly string BestTestSolution = "Best solution (test)"; 25 private readonly string BestTestSolutionString = "Best solution string (test)"; 27 26 private readonly string BestTestSolutionQuality = "Best solution quality (test)"; 28 27 29 28 private readonly string MaxTreeSizeParameterName = "Max. Tree Nodes"; 30 29 private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval"; 30 private readonly string UseMemoizationParameterName = "Use Memoization?"; 31 31 32 32 33 33 #region properties 34 p ublicIValueParameter<IntValue> MaxTreeSizeParameter {34 protected IValueParameter<IntValue> MaxTreeSizeParameter { 35 35 get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; } 36 36 } 37 38 37 public int MaxTreeSize { 39 38 get { return MaxTreeSizeParameter.Value.Value; } 39 set { MaxTreeSizeParameter.Value.Value = value; } 40 40 } 41 41 42 public IValueParameter<IntValue> GuiUpdateIntervalParameter { 43 get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; } 42 protected IValueParameter<IntValue> GuiUpdateIntervalParameter { 43 get { return (IValueParameter<IntValue>)Parameters[GuiUpdateIntervalParameterName]; } 44 } 45 public int GuiUpdateInterval { 46 get { return GuiUpdateIntervalParameter.Value.Value; } 47 set { GuiUpdateIntervalParameter.Value.Value = value; } 44 48 } 45 49 46 public int GuiUpdateInterval { 47 get { return GuiUpdateIntervalParameter.Value.Value; } 50 protected IValueParameter<BoolValue> UseMemoizationParameter { 51 get { return (IValueParameter<BoolValue>)Parameters[UseMemoizationParameterName]; } 52 } 53 public bool UseMemoization { 54 get { return UseMemoizationParameter.Value.Value; } 55 set { UseMemoizationParameter.Value.Value = value; } 48 56 } 49 57 … … 59 67 60 68 public GrammarEnumerationAlgorithm() { 61 Problem = new RegressionProblem();62 69 63 Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(4))); 70 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234); 71 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); 72 73 Problem = new RegressionProblem() { 74 ProblemData = regProblem 75 }; 76 77 Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6))); 64 78 Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(4000))); 79 Parameters.Add(new ValueParameter<BoolValue>(UseMemoizationParameterName, "Should already subtrees be reused within a run.", new BoolValue(true))); 65 80 } 66 81 … … 72 87 List<SymbolString> allGenerated = new List<SymbolString>(); 73 88 List<SymbolString> distinctGenerated = new List<SymbolString>(); 89 90 int expansions = 0; 91 74 92 HashSet<int> evaluatedHashes = new HashSet<int>(); 75 93 … … 87 105 allGenerated.Add(currSymbolString); 88 106 89 if (evaluatedHashes.Add(grammar.CalcHashCode(currSymbolString))) {90 91 92 }107 //if (evaluatedHashes.Add(grammar.CalcHashCode(currSymbolString))) { 108 EvaluateSentence(currSymbolString); 109 //distinctGenerated.Add(currSymbolString); 110 //} 93 111 94 UpdateView(allGenerated, distinctGenerated );112 UpdateView(allGenerated, distinctGenerated, expansions); 95 113 96 114 } else { … … 111 129 } 112 130 131 UpdateView(allGenerated, distinctGenerated, expansions, force: true); 132 113 133 StringArray sentences = new StringArray(allGenerated.Select(r => r.ToString()).ToArray()); 114 134 Results.Add(new Result("All generated sentences", sentences)); … … 118 138 119 139 120 private void UpdateView(List<SymbolString> allGenerated, List<SymbolString> distinctGenerated ) {140 private void UpdateView(List<SymbolString> allGenerated, List<SymbolString> distinctGenerated, int expansions, bool force = false) { 121 141 int generatedSolutions = allGenerated.Count; 122 142 int distinctSolutions = distinctGenerated.Count; 123 143 124 if ( generatedSolutions % GuiUpdateInterval == 0) {144 if (force || generatedSolutions % GuiUpdateInterval == 0) { 125 145 Results.AddOrUpdateResult("Generated Solutions", new IntValue(generatedSolutions)); 126 Results.Add (new Result("Distinct Solutions", new IntValue(distinctSolutions)));146 Results.AddOrUpdateResult("Distinct Solutions", new IntValue(distinctSolutions)); 127 147 128 148 DoubleValue averageTreeLength = new DoubleValue(allGenerated.Select(r => r.Count).Average()); 129 Results.Add(new Result("Average Tree Length of Solutions", averageTreeLength)); 149 Results.AddOrUpdateResult("Average Tree Length of Solutions", averageTreeLength); 150 151 IntValue expansionsValue = new IntValue(expansions); 152 Results.AddOrUpdateResult("Expansions", expansionsValue); 130 153 } 131 154 } … … 146 169 147 170 Results.Add(new Result(BestTrainingSolution, newSolution)); 171 Results.Add(new Result(BestTrainingSolutionString, new StringValue(symbolString.ToString()).AsReadOnly())); 148 172 Results.Add(new Result(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly())); 149 173 Results.Add(new Result(BestTestSolution, newSolution)); 174 Results.Add(new Result(BestTestSolutionString, new StringValue(symbolString.ToString()).AsReadOnly())); 150 175 Results.Add(new Result(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly())); 151 176 … … 154 179 if (currBestTrainingSolution.TrainingRSquared < newSolution.TrainingRSquared) { 155 180 currBestTrainingSolutionResult.Value = newSolution; 181 Results.AddOrUpdateResult(BestTrainingSolutionString, new StringValue(symbolString.ToString())); 156 182 Results.AddOrUpdateResult(BestTrainingSolutionQuality, new DoubleValue(newSolution.TrainingRSquared).AsReadOnly()); 157 183 } … … 160 186 if (currBestTestSolution.TestRSquared < newSolution.TestRSquared) { 161 187 currBestTestSolutionResult.Value = newSolution; 188 Results.AddOrUpdateResult(BestTestSolutionString, new StringValue(symbolString.ToString())); 162 189 Results.AddOrUpdateResult(BestTestSolutionQuality, new DoubleValue(newSolution.TestRSquared).AsReadOnly()); 163 190 } -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Sentence.cs
r15712 r15723 7 7 namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration { 8 8 public class SymbolString : List<Symbol> { 9 10 public SymbolString() { } 9 11 10 12 public SymbolString(IEnumerable<Symbol> symbols) : base(symbols) { } … … 20 22 } 21 23 22 public override string ToString() 23 { 24 public override string ToString() { 24 25 return string.Join(" ", this); 25 26 } -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Symbol.cs
r15714 r15723 1 1 using System.Collections.Generic; 2 2 using System.Linq; 3 using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration; 3 4 4 5 namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration { -
branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs
r15714 r15723 1 1 using System; 2 using System.Collections.Generic;3 2 using System.Linq; 4 3 using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration; … … 10 9 11 10 namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression { 12 //[TestClass()]11 [TestClass()] 13 12 public class MctsSymbolicRegressionTest { 14 13 private const int Seed = 1234; 15 14 private IRandom rand; 16 15 17 private const double SuccessThreshold = 0.99999; 16 private const double SuccessThreshold = 0.9999999; 17 18 private GrammarEnumerationAlgorithm alg; 19 private RegressionProblem problem; 18 20 19 21 [TestInitialize] … … 21 23 Console.Write("init called... "); 22 24 rand = new FastRandom(Seed); 23 } 25 26 alg = new GrammarEnumerationAlgorithm(); 27 problem = new RegressionProblem(); 28 alg.Problem = problem; 29 alg.GuiUpdateInterval = int.MaxValue; 30 } 31 32 33 private void TestGrammarEnumeration(IRegressionProblemData problemData) { 34 alg.Problem.ProblemData = problemData; 35 36 // Run 37 alg.Start(); 38 39 // Evaluate results 40 var eps = 1.0 - SuccessThreshold; 41 42 // Check if algorithm terminated correctly 43 Assert.IsTrue(alg.Results.ContainsKey("Best solution quality (training)"), "No training solution returned!"); 44 Assert.IsTrue(alg.Results.ContainsKey("Best solution quality (test)"), "No test solution returned!"); 45 46 // Check resultss 47 Assert.AreEqual(1.0, ((DoubleValue)alg.Results["Best solution quality (training)"].Value).Value, eps, "Training quality too low!"); 48 Assert.AreEqual(1.0, ((DoubleValue)alg.Results["Best solution quality (test)"].Value).Value, eps, "Test quality too low!"); 49 50 // Check overfitting 51 Assert.AreEqual(alg.Results["Best solution string (training)"].Value.ToString(), 52 alg.Results["Best solution string (test)"].Value.ToString()); 53 } 54 24 55 25 56 #region test structure search (no constants) 26 57 [TestMethod] 27 [TestCategory("Algorithms.DataAnalysis")] 28 [TestProperty("Time", "short")] 58 [TestProperty("Goal", "structure search")] 29 59 public void MctsSymbReg_NoConstants_Nguyen1() { 30 60 // x³ + x² + x 61 alg.MaxTreeSize = 15; 31 62 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed); 32 63 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 "))); 33 64 TestGrammarEnumeration(regProblem); 34 } 35 [TestMethod] 36 [TestCategory("Algorithms.DataAnalysis")] 37 [TestProperty("Time", "short")] 65 66 Console.WriteLine("Nguyen1: " + alg.Results["Best solution string (training)"].Value); 67 } 68 69 [TestMethod] 70 [TestProperty("Goal", "structure search")] 38 71 public void MctsSymbReg_NoConstants_Nguyen2() { 39 72 // x^4 + x³ + x² + x 73 alg.MaxTreeSize = 20; 40 74 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed); 41 75 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 "))); 42 76 TestGrammarEnumeration(regProblem); 43 } 44 [TestMethod] 45 [TestCategory("Algorithms.DataAnalysis")] 46 [TestProperty("Time", "short")] 77 78 Console.WriteLine("Nguyen2: " + alg.Results["Best solution string (training)"].Value); 79 } 80 81 /* NEXT UP 82 [TestMethod] 83 [TestProperty("Goal", "structure search")] 47 84 public void MctsSymbReg_NoConstants_Nguyen3() { 48 85 // x^5 + x^4 + x³ + x² + x 86 alg.MaxTreeSize = 25; 49 87 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed); 50 88 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F3 "))); … … 52 90 } 53 91 [TestMethod] 54 [TestCategory("Algorithms.DataAnalysis")] 55 [TestProperty("Time", "short")] 92 [TestProperty("Goal", "structure search")] 56 93 public void MctsSymbReg_NoConstants_Nguyen4() { 94 57 95 // x^6 + x^5 + x^4 + x³ + x² + x 96 alg.MaxTreeSize = 30; 58 97 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed); 59 98 var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F4 "))); 60 99 TestGrammarEnumeration(regProblem); 61 } 62 100 } */ 101 102 103 #endregion 104 105 #region TODO 106 107 #if false 63 108 [TestMethod] 64 109 [TestCategory("Algorithms.DataAnalysis")] … … 357 402 TestGrammarEnumeration(problemData); 358 403 } 359 #endregion360 404 361 405 #region restricted structure but including numeric constants … … 557 601 #endregion 558 602 559 private void TestGrammarEnumeration(IRegressionProblemData problemData) 560 { 561 // Configure algorithm and problem 562 var alg = new GrammarEnumerationAlgorithm(); 563 var problem = new RegressionProblem(); 564 alg.Problem = problem; 565 problem.ProblemData = problemData; 566 567 // Run 568 alg.Start(); 569 570 // Evaluate results 571 var eps = 1.0 - SuccessThreshold; 572 573 // Check if algorithm terminated correctly 574 Assert.IsTrue(alg.Results.ContainsKey("Best solution quality (training)"), "No training solution returned!"); 575 Assert.IsTrue(alg.Results.ContainsKey("Best solution quality (test)"), "No test solution returned!"); 576 577 // Check resultss 578 Assert.AreEqual(1.0, ((DoubleValue)alg.Results["Best solution quality (training)"].Value).Value, eps, "Training quality too low!"); 579 Assert.AreEqual(1.0, ((DoubleValue)alg.Results["Best solution quality (test)"].Value).Value, eps, "Test quality too low!"); 580 } 603 #endif 604 #endregion 581 605 } 582 606 }
Note: See TracChangeset
for help on using the changeset viewer.