Changeset 15910 for branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
- Timestamp:
- 04/17/18 16:59:57 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
r15907 r15910 2 2 using System.Collections.Generic; 3 3 using System.Linq; 4 using System.Security.Cryptography;5 4 using System.Threading; 6 5 using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration; … … 32 31 33 32 private readonly string OptimizeConstantsParameterName = "Optimize Constants"; 33 private readonly string ErrorWeightParameterName = "Error Weight"; 34 34 private readonly string SearchDataStructureParameterName = "Search Data Structure"; 35 35 private readonly string MaxComplexityParameterName = "Max. Complexity"; … … 53 53 get { return MaxComplexityParameter.Value.Value; } 54 54 set { MaxComplexityParameter.Value.Value = value; } 55 } 56 57 protected IValueParameter<DoubleValue> ErrorWeightParameter { 58 get { return (IValueParameter<DoubleValue>)Parameters[ErrorWeightParameterName]; } 59 } 60 public double ErrorWeight { 61 get { return ErrorWeightParameter.Value.Value; } 62 set { ErrorWeightParameter.Value.Value = value; } 55 63 } 56 64 … … 105 113 public GrammarEnumerationAlgorithm() { 106 114 Problem = new RegressionProblem() { 107 ProblemData = new HeuristicLab.Problems.Instances.DataAnalysis. NguyenFunctionNine(seed: 1234).GenerateRegressionData()115 ProblemData = new HeuristicLab.Problems.Instances.DataAnalysis.PolyTen(seed: 1234).GenerateRegressionData() 108 116 }; 109 117 110 Parameters.Add(new ValueParameter<BoolValue>(OptimizeConstantsParameterName, "Run constant optimization in sentence evaluation.", new BoolValue(true))); 111 Parameters.Add(new ValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(5))); 112 Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(100000))); 113 Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack))); 118 Parameters.Add(new ValueParameter<BoolValue>(OptimizeConstantsParameterName, "Run constant optimization in sentence evaluation.", new BoolValue(false))); 119 Parameters.Add(new ValueParameter<DoubleValue>(ErrorWeightParameterName, "Defines, how much weight is put on a phrase's r² value when priorizing phrases during search.", new DoubleValue(0.8))); 120 Parameters.Add(new ValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(12))); 121 Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(5000))); 122 Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.PriorityQueue))); 114 123 115 124 var availableAnalyzers = new IGrammarEnumerationAnalyzer[] { … … 125 134 Analyzers.SetItemCheckedState(analyzer, false); 126 135 } 127 Analyzers.CheckedItemsChanged += AnalyzersOnCheckedItemsChanged;128 136 Analyzers.SetItemCheckedState(Analyzers.First(analyzer => analyzer is RSquaredEvaluator), true); 129 Analyzers.SetItemCheckedState(Analyzers.First(analyzer => analyzer is SentenceLogger), true); 130 } 131 132 public GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) { } 137 //Analyzers.SetItemCheckedState(Analyzers.First(analyzer => analyzer is SentenceLogger), true); 138 } 139 140 public GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) { 141 142 143 } 133 144 #endregion 134 145 … … 136 147 #region init 137 148 InitResults(); 149 150 foreach (IGrammarEnumerationAnalyzer grammarEnumerationAnalyzer in Analyzers) { 151 if (Analyzers.ItemChecked(grammarEnumerationAnalyzer)) { 152 grammarEnumerationAnalyzer.Register(this); 153 } else { 154 grammarEnumerationAnalyzer.Deregister(this); 155 } 156 } 138 157 139 158 Analyzers.OfType<RSquaredEvaluator>().First().OptimizeConstants = OptimizeConstants; … … 152 171 var phrase0Hash = Grammar.Hasher.CalcHashCode(phrase0); 153 172 #endregion 173 174 int maxSentenceLength = GetMaxSentenceLength(); 154 175 155 176 OpenPhrases.Store(phrase0Hash, 0.0, phrase0); … … 203 224 204 225 } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) { 205 double phrasePriority = GetPriority(newPhrase );226 double phrasePriority = GetPriority(newPhrase, maxSentenceLength); 206 227 OpenPhrases.Store(phraseHash, phrasePriority, newPhrase); 207 228 } … … 212 233 } 213 234 214 protected double GetPriority(SymbolString phrase) { 215 double complexity = (double)Grammar.GetComplexity(phrase); 216 217 double length = phrase.Count(); 218 double relLength = (length - 2) / (MaxComplexity * 7); 235 protected double GetPriority(SymbolString phrase, int maxSentenceLength) { 236 double relLength = (double)phrase.Count() / maxSentenceLength; 237 219 238 double r2 = Grammar.EvaluatePhrase(phrase, Problem.ProblemData, OptimizeConstants); 220 239 double error = 1.0 - r2; 221 240 222 double variables = 0; 223 for (int i = 0; i < phrase.Count(); i++) { 224 if (phrase[i] is VariableTerminalSymbol) variables++; 225 } 226 227 double variableRatio = 1.0 - variables / complexity; 228 229 return 1.5*relLength + error; 241 return relLength + ErrorWeight * error; 242 } 243 244 private int GetMaxSentenceLength() { 245 SymbolString s = new SymbolString(Grammar.StartSymbol); 246 247 while (Grammar.GetComplexity(s) <= MaxComplexity) { 248 int expandedSymbolIndex = s.NextNonterminalIndex(); 249 NonterminalSymbol expandedSymbol = (NonterminalSymbol)s[expandedSymbolIndex]; 250 251 var productions = Grammar.Productions[expandedSymbol]; 252 var longestProduction = productions // Find production with most terminal symbols to expand as much as possible... 253 .OrderBy(CountTerminals) // but with lowest complexity/nonterminal count to keep complexity low. 254 .ThenByDescending(CountNonTerminals) 255 .First(); 256 257 s = s.DerivePhrase(expandedSymbolIndex, longestProduction); 258 } 259 260 return s.Count(); 261 } 262 263 private int CountTerminals(Production p) { 264 return p.Count(s => s is TerminalSymbol); 265 } 266 267 private int CountNonTerminals(Production p) { 268 return p.Count(s => s is NonterminalSymbol); 230 269 } 231 270 … … 263 302 264 303 #region events 265 private void AnalyzersOnCheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<IGrammarEnumerationAnalyzer> collectionItemsChangedEventArgs) {266 foreach (IGrammarEnumerationAnalyzer grammarEnumerationAnalyzer in collectionItemsChangedEventArgs.Items) {267 if (Analyzers.ItemChecked(grammarEnumerationAnalyzer)) {268 grammarEnumerationAnalyzer.Register(this);269 } else {270 grammarEnumerationAnalyzer.Deregister(this);271 }272 }273 }274 275 304 public event EventHandler<PhraseEventArgs> PhraseFetched; 276 305 private void OnPhraseFetched(int hash, SymbolString symbolString) {
Note: See TracChangeset
for help on using the changeset viewer.