Changeset 15860
- Timestamp:
- 03/23/18 18:36:23 (7 years ago)
- Location:
- branches/2886_SymRegGrammarEnumeration
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/RSquaredEvaluator.cs
r15859 r15860 77 77 problemData.TrainingIndices, 78 78 applyLinearScaling: false, 79 maxIterations: 200,79 maxIterations: 50, 80 80 updateVariableWeights: true, 81 81 updateConstantsInTree: true); … … 96 96 //var estVals = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices); 97 97 //OnlineCalculatorError error; 98 // r2 = OnlinePearsonsRCalculator.Calculate(target, estVals, out error);98 //double r2 = OnlinePearsonsRCalculator.Calculate(target, estVals, out error); 99 99 //if (error != OnlineCalculatorError.None) r2 = 0.0; 100 100 … … 102 102 bool better = r2 > bestR2Result.Value; 103 103 bool equallyGood = r2.IsAlmost(bestR2Result.Value); 104 bool shorter = algorithm.BestTrainingSentence != null && symbolString.Count() < algorithm.BestTrainingSentence.Count(); 104 bool shorter = false; 105 106 if (!better && equallyGood) { 107 shorter = algorithm.BestTrainingSentence != null && 108 algorithm.Grammar.GetComplexity(algorithm.BestTrainingSentence) > algorithm.Grammar.GetComplexity(symbolString); 109 } 105 110 if (better || (equallyGood && shorter)) { 106 111 bestR2Result.Value = r2; -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/SentenceLogger.cs
r15843 r15860 45 45 46 46 private void GrammarEnumerationAlgorithmOnStarted(object sender, EventArgs eventArgs) { 47 string datePostfix = $"_{DateTime.Now:yyyy-MM-dd_HH-mm}_TreeSize-{((GrammarEnumerationAlgorithm) sender).Max TreeSize}.csv";47 string datePostfix = $"_{DateTime.Now:yyyy-MM-dd_HH-mm}_TreeSize-{((GrammarEnumerationAlgorithm) sender).MaxComplexity}.csv"; 48 48 distinctSentencesFileName = workingDir + @"\distinctSentences" + datePostfix; 49 49 allSentencesFileName = workingDir + @"\allSentences" + datePostfix; -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
r15843 r15860 24 24 private readonly string DistinctSentencesName = "Distinct Sentences"; 25 25 private readonly string PhraseExpansionsName = "Phrase Expansions"; 26 private readonly string AverageSentence LengthName = "Avg. Sentence Lengthamong Distinct";26 private readonly string AverageSentenceComplexityName = "Avg. Sentence Complexity among Distinct"; 27 27 private readonly string OverwrittenSentencesName = "Sentences overwritten"; 28 28 private readonly string AnalyzersParameterName = "Analyzers"; … … 31 31 32 32 private readonly string SearchDataStructureParameterName = "Search Data Structure"; 33 private readonly string Max TreeSizeParameterName = "Max. Tree Nodes";33 private readonly string MaxComplexityParameterName = "Max. Complexity"; 34 34 private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval"; 35 35 36 36 public override bool SupportsPause { get { return false; } } 37 37 38 protected IValueParameter<IntValue> Max TreeSizeParameter {39 get { return (IValueParameter<IntValue>)Parameters[Max TreeSizeParameterName]; }40 } 41 public int Max TreeSize{42 get { return Max TreeSizeParameter.Value.Value; }43 set { Max TreeSizeParameter.Value.Value = value; }38 protected IValueParameter<IntValue> MaxComplexityParameter { 39 get { return (IValueParameter<IntValue>)Parameters[MaxComplexityParameterName]; } 40 } 41 public int MaxComplexity { 42 get { return MaxComplexityParameter.Value.Value; } 43 set { MaxComplexityParameter.Value.Value = value; } 44 44 } 45 45 … … 71 71 #endregion 72 72 73 public Dictionary<int, int> DistinctSentences Length{ get; private set; } // Semantically distinct sentences and their length in a run.73 public Dictionary<int, int> DistinctSentencesComplexity { get; private set; } // Semantically distinct sentences and their length in a run. 74 74 public HashSet<int> ArchivedPhrases { get; private set; } 75 75 internal SearchDataStore OpenPhrases { get; private set; } // Stack/Queue/etc. for fetching the next node in the search tree. … … 97 97 }; 98 98 99 Parameters.Add(new ValueParameter<IntValue>(Max TreeSizeParameterName, "The number of clusters.", new IntValue(6)));99 Parameters.Add(new ValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(5))); 100 100 Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(100000))); 101 101 Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack))); … … 127 127 ArchivedPhrases = new HashSet<int>(); 128 128 129 DistinctSentences Length= new Dictionary<int, int>();129 DistinctSentencesComplexity = new Dictionary<int, int>(); 130 130 AllGeneratedSentencesCount = 0; 131 131 OverwrittenSentencesCount = 0; … … 159 159 160 160 SymbolString newPhrase = currPhrase.DerivePhrase(nonterminalSymbolIndex, appliedProductions[i]); 161 162 if (newPhrase.Count() <= MaxTreeSize) { 161 int newPhraseComplexity = Grammar.GetComplexity(newPhrase); 162 163 if (newPhraseComplexity <= MaxComplexity) { 163 164 var phraseHash = Grammar.Hasher.CalcHashCode(newPhrase); 164 165 … … 170 171 OnSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]); 171 172 172 if (!DistinctSentences Length.ContainsKey(phraseHash) || DistinctSentencesLength[phraseHash] > newPhrase.Count()) {173 if (DistinctSentences Length.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only174 175 DistinctSentences Length[phraseHash] = newPhrase.Count();173 if (!DistinctSentencesComplexity.ContainsKey(phraseHash) || DistinctSentencesComplexity[phraseHash] > newPhraseComplexity) { 174 if (DistinctSentencesComplexity.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only 175 176 DistinctSentencesComplexity[phraseHash] = newPhraseComplexity; 176 177 OnDistinctSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]); 177 178 } … … 196 197 Results.Add(new Result(PhraseExpansionsName, new IntValue(0))); 197 198 Results.Add(new Result(OverwrittenSentencesName, new IntValue(0))); 198 Results.Add(new Result(AverageSentence LengthName, new DoubleValue(1.0)));199 Results.Add(new Result(AverageSentenceComplexityName, new DoubleValue(1.0))); 199 200 Results.Add(new Result(ExpansionsPerSecondName, "In Thousand expansions per second", new IntValue(0))); 200 201 } … … 209 210 ((IntValue)Results[SearchStructureSizeName].Value).Value = OpenPhrases.Count; 210 211 ((IntValue)Results[GeneratedSentencesName].Value).Value = AllGeneratedSentencesCount; 211 ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentences Length.Count;212 ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentencesComplexity.Count; 212 213 ((IntValue)Results[PhraseExpansionsName].Value).Value = PhraseExpansionCount; 213 ((DoubleValue)Results[AverageSentence LengthName].Value).Value = DistinctSentencesLength.Select(pair => pair.Value).Average();214 ((DoubleValue)Results[AverageSentenceComplexityName].Value).Value = DistinctSentencesComplexity.Select(pair => pair.Value).Average(); 214 215 ((IntValue)Results[OverwrittenSentencesName].Value).Value = OverwrittenSentencesCount; 215 216 ((IntValue)Results[ExpansionsPerSecondName].Value).Value = (int)((PhraseExpansionCount / -
branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs
r15859 r15860 60 60 public void NoConstants_Nguyen1() { 61 61 // x³ + x² + x 62 alg.Max TreeSize = 20;62 alg.MaxComplexity = 6; 63 63 alg.Problem.ProblemData = new NguyenFunctionOne(Seed).GenerateRegressionData(); 64 64 … … 80 80 int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); 81 81 82 Assert.IsTrue(alg.DistinctSentences Length.ContainsKey(targetSolutionHash), "Actual solution was not generated!");82 Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); 83 83 84 84 Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); … … 93 93 public void NoConstants_Nguyen2() { 94 94 // x^4 + x³ + x² + x 95 alg.Max TreeSize = 30;95 alg.MaxComplexity = 11; 96 96 alg.Problem.ProblemData = new NguyenFunctionTwo(Seed).GenerateRegressionData(); 97 97 … … 105 105 public void NoConstants_Nguyen3() { 106 106 // x^5 + x^4 + x^3 + x^2 + x 107 alg.Max TreeSize= 32;107 alg.MaxComplexity = 32; 108 108 alg.Problem.ProblemData = new NguyenFunctionThree(Seed).GenerateRegressionData(); 109 109 … … 117 117 public void NoConstants_Nguyen6() { 118 118 // sin(x) + sin(x + x²) 119 alg.Max TreeSize = 25;119 alg.MaxComplexity = 4; 120 120 alg.Problem.ProblemData = new NguyenFunctionSix(Seed).GenerateRegressionData(); 121 121 … … 138 138 int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); 139 139 140 Assert.IsTrue(alg.DistinctSentences Length.ContainsKey(targetSolutionHash), "Actual solution was not generated!");140 Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); 141 141 Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); 142 142 … … 148 148 public void NoConstants_Nguyen9() { 149 149 // sin(x) + sin(y²) 150 alg.Max TreeSize = 22;150 alg.MaxComplexity = 3; 151 151 alg.Problem.ProblemData = new NguyenFunctionNine(Seed).GenerateRegressionData(); 152 152 … … 170 170 int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); 171 171 172 Assert.IsTrue(alg.DistinctSentences Length.ContainsKey(targetSolutionHash), "Actual solution was not generated!");172 Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); 173 173 Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); 174 174 … … 180 180 [TestProperty("Goal", "structure search")] 181 181 public void MctsSymbReg_NoConstants_Poly10() { 182 alg.Max TreeSize= 10;182 alg.MaxComplexity = 10; 183 183 alg.Problem.ProblemData = new PolyTen(Seed).GenerateRegressionData(); 184 184 … … 191 191 public void NoConstants_Inverse() { 192 192 // x / (log(x)*x + x) 193 alg.Max TreeSize = 23;193 alg.MaxComplexity = 4; 194 194 195 195 var x = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() + 1.1).ToList(); … … 205 205 public void Constants_Nguyen7() { 206 206 // log(x+1) + log(x*x + 1) 207 alg.Max TreeSize = 22;207 alg.MaxComplexity = 3; 208 208 alg.Problem.ProblemData = new NguyenFunctionSeven().GenerateRegressionData(); 209 209 … … 225 225 int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); 226 226 227 Assert.IsTrue(alg.DistinctSentences Length.ContainsKey(targetSolutionHash), "Actual solution was not generated!");227 Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); 228 228 Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); 229 230 229 231 230 // Evaluate … … 237 236 public void Constants_Nguyen12() { 238 237 // x*x*x*x - x*x*x + y*y/2 -y 239 alg.Max TreeSize = 28;238 alg.MaxComplexity = 10; 240 239 alg.Problem.ProblemData = new NguyenFunctionTwelve().GenerateRegressionData(); 241 240 … … 250 249 public void Constants_Keijzer3() { 251 250 // 0.3*x*sin(2*pi*x) 252 alg.Max TreeSize = 20;251 alg.MaxComplexity = 2; 253 252 alg.Problem.ProblemData = new KeijzerFunctionThree().GenerateRegressionData(); 254 253 … … 270 269 int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); 271 270 272 Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); 273 271 Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); 274 272 Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); 275 273 … … 282 280 public void Constants_Keijzer5() { 283 281 // (30*x*z) / ((x - 10)*y*y) 284 alg.Max TreeSize = 24;282 alg.MaxComplexity = 5; 285 283 alg.Problem.ProblemData = new KeijzerFunctionFive().GenerateRegressionData(); 286 284 … … 307 305 int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); 308 306 309 Assert.IsTrue(alg.DistinctSentences Length.ContainsKey(targetSolutionHash), "Actual solution was not generated!");307 Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); 310 308 311 309 Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); … … 320 318 public void Constants_Keijzer12() { 321 319 // x*x*x*x - x*x*x + y*y/2 - y 322 alg.Max TreeSize = 29;320 alg.MaxComplexity = 10; 323 321 alg.Problem.ProblemData = new KeijzerFunctionTwelve().GenerateRegressionData(); 324 322 … … 344 342 int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); 345 343 346 Assert.IsTrue(alg.DistinctSentences Length.ContainsKey(targetSolutionHash), "Actual solution was not generated!");344 Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); 347 345 Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); 348 346 … … 355 353 public void Constants_Keijzer14() { 356 354 // 8 / (2 + x*x + y*y 357 alg.Max TreeSize = 19;355 alg.MaxComplexity = 4; 358 356 alg.Problem.ProblemData = new KeijzerFunctionFourteen().GenerateRegressionData(); 359 357 … … 380 378 int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); 381 379 382 Assert.IsTrue(alg.DistinctSentences Length.ContainsKey(targetSolutionHash), "Actual solution was not generated!");380 Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); 383 381 Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); 384 382 … … 392 390 public void Constants_Keijzer15() { 393 391 // x*x*x / 5 + y*y*y / 2 - y - x 394 alg.Max TreeSize = 25;392 alg.MaxComplexity = 8; 395 393 alg.Problem.ProblemData = new KeijzerFunctionFifteen().GenerateRegressionData(); 396 394 … … 415 413 int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); 416 414 417 Assert.IsTrue(alg.DistinctSentences Length.ContainsKey(targetSolutionHash), "Actual solution was not generated!");415 Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); 418 416 Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); 419 417
Note: See TracChangeset
for help on using the changeset viewer.