Changeset 16157 for branches/2886_SymRegGrammarEnumeration
- Timestamp:
- 09/20/18 11:12:57 (6 years ago)
- Location:
- branches/2886_SymRegGrammarEnumeration
- Files:
-
- 1 added
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/IGrammarEnumerationEvaluator.cs
r16053 r16157 21 21 22 22 using HeuristicLab.Core; 23 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 23 24 using HeuristicLab.Problems.DataAnalysis; 24 25 … … 26 27 public interface IGrammarEnumerationEvaluator : IItem { 27 28 double Evaluate(IRegressionProblemData problemData, Grammar grammar, SymbolList sentence); 29 double Evaluate(IRegressionProblemData problemData, ISymbolicExpressionTree tree); 30 bool OptimizeConstants { get; set; } 28 31 } 29 32 } -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
r16053 r16157 52 52 53 53 private readonly string EvaluatorParameterName = "Evaluator"; 54 55 private readonly string ErrorWeightParameterName = "Error Weight";56 54 private readonly string SearchDataStructureParameterName = "Search Data Structure"; 57 55 private readonly string MaxComplexityParameterName = "Max. Complexity"; 56 private readonly string MaxLengthParameterName = "Max. Length"; 58 57 private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval"; 59 58 private readonly string GrammarSymbolsParameterName = "Grammar Symbols"; … … 67 66 public override bool SupportsPause { get { return true; } } 68 67 69 public I FixedValueParameter<RSquaredEvaluator> EvaluatorParameter {70 get { return (I FixedValueParameter<RSquaredEvaluator>)Parameters[EvaluatorParameterName]; }71 } 72 73 public RSquaredEvaluator Evaluator {68 public IValueParameter<IGrammarEnumerationEvaluator> EvaluatorParameter { 69 get { return (IValueParameter<IGrammarEnumerationEvaluator>)Parameters[EvaluatorParameterName]; } 70 } 71 72 public IGrammarEnumerationEvaluator Evaluator { 74 73 get { return EvaluatorParameter.Value; } 75 74 } … … 77 76 protected IFixedValueParameter<IntValue> MaxComplexityParameter { 78 77 get { return (IFixedValueParameter<IntValue>)Parameters[MaxComplexityParameterName]; } 78 } 79 80 protected IFixedValueParameter<IntValue> MaxLengthParameter { 81 get { return (IFixedValueParameter<IntValue>)Parameters[MaxLengthParameterName]; } 79 82 } 80 83 … … 84 87 } 85 88 86 protected IFixedValueParameter<DoubleValue> ErrorWeightParameter { 87 get { return (IFixedValueParameter<DoubleValue>)Parameters[ErrorWeightParameterName]; } 88 } 89 90 public double ErrorWeight { 91 get { return ErrorWeightParameter.Value.Value; } 92 set { ErrorWeightParameter.Value.Value = value; } 89 public int MaxLength { 90 get { return MaxLengthParameter.Value.Value; } 91 set { MaxLengthParameter.Value.Value = value; } 93 92 } 94 93 … … 200 199 201 200 public GrammarEnumerationAlgorithm() { 202 Parameters.Add(new FixedValueParameter<DoubleValue>(ErrorWeightParameterName, "Defines, how much weight is put on a phrase's r² value when priorizing phrases during search.", new DoubleValue(0.8)));203 201 Parameters.Add(new FixedValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(12))); 202 Parameters.Add(new FixedValueParameter<IntValue>(MaxLengthParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(20))); 204 203 Parameters.Add(new FixedValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(5000))); 205 204 Parameters.Add(new FixedValueParameter<IntValue>(SearchDataStructureSizeParameterName, "The size of the search data structure.", new IntValue((int)1e5))); 206 205 Parameters.Add(new FixedValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.SortedSet))); 207 Parameters.Add(new FixedValueParameter<RSquaredEvaluator>(EvaluatorParameterName, new RSquaredEvaluator()));206 Parameters.Add(new ValueParameter<IGrammarEnumerationEvaluator>(EvaluatorParameterName, new RSquaredEvaluator())); 208 207 209 208 SearchDataStructureParameter.Value.ValueChanged += (o, e) => Prepare(); … … 283 282 } 284 283 285 MaxSentenceLength = Grammar.GetMaxSentenceLength(MaxComplexity);286 var errorWeight = ErrorWeight;287 284 var evaluator = EvaluatorParameter.Value; 288 285 var problemData = Problem.ProblemData; 286 287 int maxLength = MaxLength; 288 int maxComplexity = MaxComplexity; 289 289 290 290 // main search loop … … 313 313 314 314 SymbolList newPhrase = currPhrase.DerivePhrase(nonterminalSymbolIndex, appliedProductions[i]); 315 316 if (newPhrase.Count > maxLength) 317 continue; 318 315 319 int newPhraseComplexity = newPhrase.Complexity; 316 317 if (newPhraseComplexity > MaxComplexity) 320 if (newPhraseComplexity > maxComplexity) 318 321 continue; 319 322 … … 327 330 OnSentenceGenerated(fetchedSearchNode.Hash, fetchedSearchNode.SymbolList, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]); 328 331 329 // Is the best solution found? (only if RSquaredEvaluator is activated)330 //if (Results.ContainsKey(RSquaredEvaluator.BestTrainingQualityResultName)) {331 // double r2 = ((DoubleValue)Results[RSquaredEvaluator.BestTrainingQualityResultName].Value).Value;332 // if (r2.IsAlmost(1.0)) {333 // UpdateView(force: true);334 // return;335 // }336 //}337 338 332 if (!DistinctSentencesComplexity.ContainsKey(phraseHash) || DistinctSentencesComplexity[phraseHash] > newPhraseComplexity) { 339 333 if (DistinctSentencesComplexity.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only 340 341 334 DistinctSentencesComplexity[phraseHash] = newPhraseComplexity; 342 335 OnDistinctSentenceGenerated(fetchedSearchNode.Hash, fetchedSearchNode.SymbolList, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]); 343 336 } 344 337 UpdateView(); 345 346 338 } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) { 347 339 double r2 = IsCompleteSentence(newPhrase) ? evaluator.Evaluate(problemData, Grammar, newPhrase) : fetchedSearchNode.R2; … … 397 389 var tree = Grammar.ParseSymbolicExpressionTree(BestTrainingSentence); 398 390 var model = new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, interpreter); 399 400 var iterations = EvaluatorParameter.Value.ConstantOptimizationIterations; 401 var applyLinearScaling = EvaluatorParameter.Value.ApplyLinearScaling; 402 403 SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants( 404 interpreter, 405 model.SymbolicExpressionTree, 406 Problem.ProblemData, 407 Problem.ProblemData.TrainingIndices, 408 applyLinearScaling: applyLinearScaling, 409 maxIterations: iterations, 410 updateVariableWeights: false, 411 updateConstantsInTree: true); 391 Evaluator.Evaluate(Problem.ProblemData, tree); // this call will optimize the constants in the tree (if enabled) 412 392 413 393 model.Scale(Problem.ProblemData); -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/RSquaredEvaluator.cs
r16073 r16157 61 61 } 62 62 63 private IFixedValueParameter<IntValue> SeedParameter { 64 get { return (IFixedValueParameter<IntValue>)Parameters[SeedParameterName]; } 65 } 66 63 67 private int Restarts { 64 68 get { return RestartsParameter.Value.Value; } 65 69 set { RestartsParameter.Value.Value = value; } 70 } 71 72 private int Seed { 73 get { return SeedParameter.Value.Value; } 74 set { SeedParameter.Value.Value = value; } 66 75 } 67 76 … … 113 122 114 123 public double Evaluate(IRegressionProblemData problemData, ISymbolicExpressionTree tree) { 124 random.Seed((uint)Seed); // not the ideal solution for ensuring result consistency 115 125 return Evaluate(problemData, tree, random, OptimizeConstants, ConstantOptimizationIterations, ApplyLinearScaling, Restarts); 116 126 } -
branches/2886_SymRegGrammarEnumeration/Test/Test.csproj
r15974 r16157 119 119 <Compile Include="Properties\AssemblyInfo.cs" /> 120 120 <Compile Include="TreeHashingTest.cs" /> 121 <Compile Include="AlgorithmPerformanceTest.cs" /> 121 122 </ItemGroup> 122 123 <ItemGroup> -
branches/2886_SymRegGrammarEnumeration/Test/TreeHashingTest.cs
r16056 r16157 1 using System.Linq; 1 using System; 2 using System.Collections.Generic; 3 using System.Linq; 2 4 using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration; 3 5 using Microsoft.VisualStudio.TestTools.UnitTesting; … … 146 148 } 147 149 148 /* DEPRECATED; SINCE WE DO NOT ALLOW COMPOUND DIVISIONS 149 [TestMethod] 150 [TestCategory("TreeHashing")] 151 public void CompoundInverseCancellationToSingleInverse() { 152 SymbolList s1 = new SymbolList(new Symbol[] { varA, varB, grammar.Addition, grammar.Inv, grammar.Inv, grammar.Inv }); 153 SymbolList s2 = new SymbolList(new Symbol[] { varA, varB, grammar.Addition, grammar.Inv }); 154 155 int hash1 = grammar.CalcHashCode(s1); 156 int hash2 = grammar.CalcHashCode(s2); 157 158 Assert.AreEqual(hash1, hash2); 159 } 160 161 [TestMethod] 162 [TestCategory("TreeHashing")] 163 public void CompoundInverseCancellationToDivisor() { 164 SymbolList s1 = new SymbolList(new Symbol[] { varA, varB, grammar.Addition, grammar.Inv, grammar.Inv }); 165 SymbolList s2 = new SymbolList(new Symbol[] { varA, varB, grammar.Addition }); 166 167 int hash1 = grammar.CalcHashCode(s1); 168 int hash2 = grammar.CalcHashCode(s2); 169 170 Assert.AreEqual(hash1, hash2); 171 } 172 173 [TestMethod] 174 [TestCategory("TreeHashing")] 175 public void UncancelableCompoundInverse() { 176 // 1 / ( 1/b + sin(a*c) ) 177 SymbolList s1 = new SymbolList(new Symbol[] { varB, grammar.Inv, varA, varC, grammar.Multiplication, grammar.Sin, grammar.Addition, grammar.Inv }); 178 // b + sin(a*c) 179 SymbolList s2 = new SymbolList(new Symbol[] { varB, varA, varC, grammar.Multiplication, grammar.Sin, grammar.Addition }); 180 181 int hash1 = grammar.CalcHashCode(s1); 182 int hash2 = grammar.CalcHashCode(s2); 183 184 Assert.AreNotEqual(hash1, hash2); 185 }*/ 150 [TestMethod] 151 [TestCategory("TreeHashing")] 152 public void EnumerateGrammarTest() { 153 //const int nvars = 1; 154 //var variables = Enumerable.Range(1, nvars).Select(x => $"x{x}").ToArray(); 155 var variables = new[] { "b", "a" }; 156 var grammar = new Grammar(variables, Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>()); 157 158 int hash(SymbolList s) => grammar.Hasher.CalcHashCode(s); 159 160 List<SymbolList> sentences = EnumerateGrammarBreadth(grammar, length: 20, hashPhrases: false).ToList(); 161 Console.WriteLine($"Breadth: {sentences.Count};{sentences.Select(hash).Distinct().Count() }"); 162 163 sentences = EnumerateGrammarBreadth(grammar, length: 20, hashPhrases: true).ToList(); 164 Console.WriteLine($"Breadth (hashed): {sentences.Count};{sentences.Select(hash).Distinct().Count() }"); 165 166 sentences = EnumerateGrammarDepth(grammar, length: 20, hashPhrases: false).ToList(); 167 Console.WriteLine($"Depth: {sentences.Count};{sentences.Select(hash).Distinct().Count() }"); 168 169 sentences = EnumerateGrammarDepth(grammar, length: 20, hashPhrases: true).ToList(); 170 Console.WriteLine($"Depth (hashed): {sentences.Count};{sentences.Select(hash).Distinct().Count() }"); 171 } 172 173 private static IEnumerable<SymbolList> EnumerateGrammarBreadth(Grammar grammar, int length, bool hashPhrases = true) { 174 var phrases = new Queue<SymbolList>(); 175 phrases.Enqueue(new SymbolList(grammar.StartSymbol)); 176 var sentences = new List<SymbolList>(); 177 var archive = new HashSet<int>(); 178 179 while (phrases.Any()) { 180 var phrase = phrases.Dequeue(); 181 182 if (phrase.Count > length) 183 continue; 184 185 if (phrase.IsSentence()) { 186 sentences.Add(phrase); 187 continue; 188 } 189 190 if (hashPhrases && !archive.Add(grammar.Hasher.CalcHashCode(phrase))) { 191 continue; 192 } 193 194 var idx = phrase.NextNonterminalIndex(); 195 var productions = grammar.Productions[phrase[idx]]; 196 var derived = productions.Select(p => phrase.DerivePhrase(idx, p)).Where(p => p.Count <= length); 197 foreach (var d in derived) 198 phrases.Enqueue(d); 199 } 200 return sentences; 201 } 202 203 private static IEnumerable<SymbolList> EnumerateGrammarDepth(Grammar grammar, int length, bool hashPhrases = true) { 204 return Expand(new SymbolList(grammar.StartSymbol), grammar, length, hashPhrases ? new HashSet<int>() : null); 205 } 206 207 private static IEnumerable<SymbolList> Expand(SymbolList phrase, Grammar grammar, int maxLength, HashSet<int> visited) { 208 if (phrase.Count > maxLength) { 209 yield break; 210 } 211 212 if (phrase.IsSentence()) { 213 yield return phrase; 214 yield break; 215 } 216 217 if (visited != null && !visited.Add(grammar.Hasher.CalcHashCode(phrase))) { 218 yield break; 219 } 220 221 var i = phrase.NextNonterminalIndex(); 222 var productions = grammar.Productions[phrase[i]]; 223 224 foreach (var s in productions.SelectMany(p => Expand(phrase.DerivePhrase(i, p), grammar, maxLength, visited))) 225 yield return s; 226 } 186 227 } 187 228 }
Note: See TracChangeset
for help on using the changeset viewer.