- Timestamp:
- 07/11/18 10:34:21 (6 years ago)
- Location:
- branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/RSquaredEvaluator.cs
r15985 r15993 2 2 using System.Diagnostics; 3 3 using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration; 4 using HeuristicLab.Analysis; 4 5 using HeuristicLab.Common; 5 6 using HeuristicLab.Core; … … 20 21 public static readonly string BestTrainingSolutionResultName = "Best solution (Training)"; 21 22 public static readonly string BestComplexityResultName = "Best solution complexity"; 23 public static readonly string BestSolutions = "Best solutions"; 22 24 23 25 private static readonly ISymbolicDataAnalysisExpressionTreeInterpreter expressionTreeLinearInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter(); 24 25 public bool OptimizeConstants { get; set; }26 26 27 27 public RSquaredEvaluator() { } … … 31 31 32 32 protected RSquaredEvaluator(RSquaredEvaluator original, Cloner cloner) : base(original, cloner) { 33 this.OptimizeConstants = original.OptimizeConstants;34 33 } 35 34 … … 52 51 private void AlgorithmOnDistinctSentenceGenerated(object sender, PhraseAddedEventArgs phraseAddedEventArgs) { 53 52 GrammarEnumerationAlgorithm algorithm = (GrammarEnumerationAlgorithm)sender; 54 EvaluateSentence(algorithm, phraseAddedEventArgs.NewPhrase );53 EvaluateSentence(algorithm, phraseAddedEventArgs.NewPhrase, algorithm.OptimizeConstants); 55 54 } 56 55 … … 70 69 } 71 70 72 private void EvaluateSentence(GrammarEnumerationAlgorithm algorithm, SymbolString symbolString ) {71 private void EvaluateSentence(GrammarEnumerationAlgorithm algorithm, SymbolString symbolString, bool optimizeConstants) { 73 72 var results = algorithm.Results; 74 73 var grammar = algorithm.Grammar; … … 78 77 Debug.Assert(SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree)); 79 78 80 double r2 = Evaluate(problemData, tree, OptimizeConstants);79 double r2 = Evaluate(problemData, tree, optimizeConstants); 81 80 double bestR2 = results.ContainsKey(BestTrainingQualityResultName) ? GetValue<double>(results[BestTrainingQualityResultName].Value) : 0.0; 82 81 if (r2 < bestR2) … … 90 89 results.AddOrUpdateResult(BestComplexityResultName, new IntValue(complexity)); 91 90 algorithm.BestTrainingSentence = symbolString; 91 92 // record best sentence quality & length 93 DataTable dt; 94 if (!results.ContainsKey(BestSolutions)) { 95 var names = new[] { "Quality", "Relative Length", "Complexity", "Timestamp" }; 96 dt = new DataTable(); 97 foreach (var name in names) { 98 dt.Rows.Add(new DataRow(name) { VisualProperties = { StartIndexZero = true } }); 99 } 100 results.AddOrUpdateResult(BestSolutions, dt); 101 } 102 dt = (DataTable)results[BestSolutions].Value; 103 dt.Rows["Quality"].Values.Add(r2); 104 dt.Rows["Relative Length"].Values.Add((double)symbolString.Count() / algorithm.MaxSentenceLength); 105 dt.Rows["Complexity"].Values.Add(complexity); 106 dt.Rows["Timestamp"].Values.Add(algorithm.ExecutionTime.TotalMilliseconds / 1000d); 92 107 } 93 108 } -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs
r15975 r15993 243 243 } 244 244 245 // returns the maximum achievable sentence length below the maximum complexity 246 public int GetMaxSentenceLength(int maxComplexity) { 247 SymbolString s = new SymbolString(StartSymbol); 248 249 while (!s.IsSentence() && GetComplexity(s) <= maxComplexity) { 250 int expandedSymbolIndex = s.NextNonterminalIndex(); 251 NonterminalSymbol expandedSymbol = (NonterminalSymbol)s[expandedSymbolIndex]; 252 253 var productions = Productions[expandedSymbol]; 254 var longestProduction = productions // Find production with most terminal symbols to expand as much as possible... 255 .OrderBy(CountTerminals) // but with lowest complexity/nonterminal count to keep complexity low. 256 .ThenByDescending(CountNonTerminals) 257 .First(); 258 259 s = s.DerivePhrase(expandedSymbolIndex, longestProduction); 260 } 261 262 return s.Count(); 263 } 264 265 private int CountTerminals(Production p) { 266 return p.Count(s => s is TerminalSymbol); 267 } 268 269 private int CountNonTerminals(Production p) { 270 return p.Count(s => s is NonterminalSymbol); 271 } 272 245 273 public double EvaluatePhrase(SymbolString s, IRegressionProblemData problemData, bool optimizeConstants) { 246 274 SymbolicExpressionTree tree = ParseSymbolicExpressionTree(s); -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs
r15987 r15993 21 21 public class GrammarEnumerationAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> { 22 22 #region properties and result names 23 private readonly string VariableImportanceWeightName = "Variable Importance Weight";24 23 private readonly string SearchStructureSizeName = "Search Structure Size"; 25 24 private readonly string GeneratedPhrasesName = "Generated/Archived Phrases"; … … 38 37 private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval"; 39 38 private readonly string GrammarSymbolsParameterName = "Grammar Symbols"; 40 private readonly string SearchCacheSizeParameterName = "Search Cache Size";41 39 private readonly string SearchDataStructureSizeParameterName = "Search Data Structure Size"; 42 40 … … 48 46 public override bool SupportsPause { get { return true; } } 49 47 50 protected IFixedValueParameter<DoubleValue> VariableImportanceWeightParameter {51 get { return (IFixedValueParameter<DoubleValue>)Parameters[VariableImportanceWeightName]; }52 }53 54 public double VariableImportanceWeight {55 get { return VariableImportanceWeightParameter.Value.Value; }56 set { VariableImportanceWeightParameter.Value.Value = value; }57 }58 59 48 protected IFixedValueParameter<BoolValue> OptimizeConstantsParameter { 60 49 get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeConstantsParameterName]; } … … 105 94 } 106 95 107 public IFixedValueParameter<IntValue> SearchCacheSizeParameter {108 get { return (IFixedValueParameter<IntValue>)Parameters[SearchCacheSizeParameterName]; }109 }110 111 public int SearchCacheSize {112 get { return SearchCacheSizeParameter.Value.Value; }113 }114 115 96 public StorageType SearchDataStructure { 116 97 get { return SearchDataStructureParameter.Value.Value; } … … 146 127 [Storable] 147 128 internal SearchDataStore OpenPhrases { get; private set; } // Stack/Queue/etc. for fetching the next node in the search tree. 129 130 [Storable] 131 public int MaxSentenceLength { get; private set; } 148 132 149 133 #region execution stats … … 178 162 SearchDataStructureParameter.Value.ValueChanged += (o, e) => Prepare(); 179 163 SearchDataStructureSizeParameter.Value.ValueChanged += (o, e) => Prepare(); 180 SearchCacheSizeParameter.Value.ValueChanged += (o, e) => Prepare();181 164 } 182 165 … … 188 171 SearchDataStructureParameter.Value.ValueChanged -= (o, e) => Prepare(); 189 172 SearchDataStructureSizeParameter.Value.ValueChanged -= (o, e) => Prepare(); 190 SearchCacheSizeParameter.Value.ValueChanged -= (o, e) => Prepare();191 173 } 192 174 … … 197 179 198 180 public GrammarEnumerationAlgorithm() { 199 Parameters.Add(new FixedValueParameter<DoubleValue>(VariableImportanceWeightName, "Variable Weight.", new DoubleValue(1.0)));200 181 Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeConstantsParameterName, "Run constant optimization in sentence evaluation.", new BoolValue(false))); 201 182 Parameters.Add(new FixedValueParameter<DoubleValue>(ErrorWeightParameterName, "Defines, how much weight is put on a phrase's r² value when priorizing phrases during search.", new DoubleValue(0.8))); 202 183 Parameters.Add(new FixedValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(12))); 203 184 Parameters.Add(new FixedValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(5000))); 204 Parameters.Add(new FixedValueParameter<IntValue>(SearchCacheSizeParameterName, "The size of the search node cache.", new IntValue((int)1e5)));205 185 Parameters.Add(new FixedValueParameter<IntValue>(SearchDataStructureSizeParameterName, "The size of the search data structure.", new IntValue((int)1e5))); 206 Parameters.Add(new FixedValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType. PriorityQueue)));186 Parameters.Add(new FixedValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.SortedSet))); 207 187 208 188 SearchDataStructureParameter.Value.ValueChanged += (o, e) => Prepare(); 209 189 SearchDataStructureSizeParameter.Value.ValueChanged += (o, e) => Prepare(); 210 SearchCacheSizeParameter.Value.ValueChanged += (o, e) => Prepare();211 190 212 191 var availableAnalyzers = new IGrammarEnumerationAnalyzer[] { … … 258 237 OverwrittenSentencesCount = original.OverwrittenSentencesCount; 259 238 PhraseExpansionCount = original.PhraseExpansionCount; 260 261 if (original.variableImportance != null)262 variableImportance = new Dictionary<VariableTerminalSymbol, double>(original.variableImportance);263 239 } 264 240 #endregion … … 274 250 PhraseExpansionCount = 0; 275 251 276 Analyzers.OfType<RSquaredEvaluator>().First().OptimizeConstants = OptimizeConstants;277 252 Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray(), GrammarSymbols.CheckedItems.Select(v => v.Value)); 278 OpenPhrases = new SearchDataStore(SearchDataStructure, SearchDataStructureSize, SearchCacheSize); // Select search strategy 279 280 CalculateVariableImportances(); 281 253 OpenPhrases = new SearchDataStore(SearchDataStructure, SearchDataStructureSize); // Select search strategy 282 254 base.Prepare(); // this actually clears the results which will get reinitialized on Run() 283 }284 285 private void CalculateVariableImportances() {286 variableImportance = new Dictionary<VariableTerminalSymbol, double>();287 288 RandomForestRegression rf = new RandomForestRegression();289 rf.Problem = Problem;290 rf.Start();291 IRegressionSolution rfSolution = (RandomForestRegressionSolution)rf.Results["Random forest regression solution"].Value;292 var rfImpacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(293 rfSolution,294 RegressionSolutionVariableImpactsCalculator.DataPartitionEnum.Training,295 RegressionSolutionVariableImpactsCalculator.ReplacementMethodEnum.Shuffle);296 297 // save the normalized importances298 var sum = rfImpacts.Sum(x => x.Item2);299 foreach (Tuple<string, double> rfImpact in rfImpacts) {300 VariableTerminalSymbol varSym = Grammar.VarTerminals.First(v => v.StringRepresentation == rfImpact.Item1);301 variableImportance[varSym] = rfImpact.Item2 / sum;302 }303 255 } 304 256 … … 313 265 } 314 266 315 int maxSentenceLength = GetMaxSentenceLength();267 MaxSentenceLength = Grammar.GetMaxSentenceLength(MaxComplexity); 316 268 var errorWeight = ErrorWeight; 317 var variableImportanceWeight = VariableImportanceWeight;269 var optimizeConstants = OptimizeConstants; // cache value to avoid parameter lookup 318 270 // main search loop 319 271 while (OpenPhrases.Count > 0) { … … 375 327 376 328 bool isCompleteSentence = IsCompleteSentence(newPhrase); 377 double r2 = isCompleteSentence ? Grammar.EvaluatePhrase(newPhrase, Problem.ProblemData, OptimizeConstants) : fetchedSearchNode.R2;378 double phrasePriority = GetPriority(newPhrase, r2, maxSentenceLength, errorWeight, variableImportanceWeight);329 double r2 = isCompleteSentence ? Grammar.EvaluatePhrase(newPhrase, Problem.ProblemData, optimizeConstants) : fetchedSearchNode.R2; 330 double phrasePriority = GetPriority(newPhrase, r2, MaxSentenceLength); 379 331 380 332 SearchNode newSearchNode = new SearchNode(phraseHash, phrasePriority, r2, newPhrase); … … 386 338 } 387 339 388 protected double GetPriority(SymbolString phrase, double r2, int maxSentenceLength, double errorWeight, double variableImportanceWeight) { 389 var distinctVars = phrase.OfType<VariableTerminalSymbol>().Distinct(); 390 391 var sum = 0d; 392 foreach (var v in distinctVars) { 393 sum += variableImportance[v]; 394 } 395 var phraseVariableImportance = 1 - sum; 396 397 double relLength = (double)phrase.Count() / maxSentenceLength; 398 double error = 1.0 - r2; 399 return error * relLength; 340 protected static double GetPriority(SymbolString phrase, double r2, int maxSentenceLength) { 341 return (1 - r2) * phrase.Count() / maxSentenceLength; 400 342 } 401 343 402 344 private bool IsCompleteSentence(SymbolString phrase) { 403 345 return !phrase.Any(x => x is NonterminalSymbol && x != Grammar.Expr); 404 }405 406 private int GetMaxSentenceLength() {407 SymbolString s = new SymbolString(Grammar.StartSymbol);408 409 while (!s.IsSentence() && Grammar.GetComplexity(s) <= MaxComplexity) {410 int expandedSymbolIndex = s.NextNonterminalIndex();411 NonterminalSymbol expandedSymbol = (NonterminalSymbol)s[expandedSymbolIndex];412 413 var productions = Grammar.Productions[expandedSymbol];414 var longestProduction = productions // Find production with most terminal symbols to expand as much as possible...415 .OrderBy(CountTerminals) // but with lowest complexity/nonterminal count to keep complexity low.416 .ThenByDescending(CountNonTerminals)417 .First();418 419 s = s.DerivePhrase(expandedSymbolIndex, longestProduction);420 }421 422 return s.Count();423 }424 425 private int CountTerminals(Production p) {426 return p.Count(s => s is TerminalSymbol);427 }428 429 private int CountNonTerminals(Production p) {430 return p.Count(s => s is NonterminalSymbol);431 346 } 432 347 … … 461 376 } 462 377 378 var interpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter(); 463 379 var tree = Grammar.ParseSymbolicExpressionTree(BestTrainingSentence); 464 var model = new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()); 465 model.Scale(Problem.ProblemData); 380 var model = new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, interpreter); 381 382 SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants( 383 interpreter, 384 model.SymbolicExpressionTree, 385 Problem.ProblemData, 386 Problem.ProblemData.TrainingIndices, 387 applyLinearScaling: true, 388 maxIterations: 10, 389 updateVariableWeights: false, 390 updateConstantsInTree: true); 391 466 392 var bestTrainingSolution = new SymbolicRegressionSolution(model, Problem.ProblemData); 467 393 Results.AddOrUpdateResult(BestTrainingModelResultName, model); -
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/SearchDataStructure.cs
r15977 r15993 52 52 class SearchDataStore : DeepCloneable, IEnumerable<SearchNode> { 53 53 [Storable] 54 private LruCache<int, SearchNode> storedValues; 55 //private Dictionary<int, SearchNode> storedValues; // Store hash-references and associated, actual values 54 private Dictionary<int, SearchNode> storedValues; // Store hash-references and associated, actual values 56 55 57 56 [Storable] … … 72 71 [Storable] 73 72 private int searchDataStructureSize; // storage size for search nodes 74 75 [Storable]76 private int cacheSize; // cache for already explored search nodes77 73 78 74 [ExcludeFromObjectGraphTraversal] … … 87 83 protected SearchDataStore(bool deserializing) : this() { } 88 84 89 public SearchDataStore(StorageType storageType, int searchDataStructureSize = (int)1e5 , int cacheSize = (int)1e5) {85 public SearchDataStore(StorageType storageType, int searchDataStructureSize = (int)1e5) { 90 86 this.storageType = storageType; 91 87 92 88 this.searchDataStructureSize = searchDataStructureSize; 93 this.cacheSize = cacheSize; 94 95 storedValues = new LruCache<int, SearchNode>(this.cacheSize); 89 90 storedValues = new Dictionary<int, SearchNode>(); 96 91 InitSearchDataStructure(); 97 92 } … … 142 137 143 138 protected SearchDataStore(SearchDataStore original, Cloner cloner) : base(original, cloner) { 144 storedValues = cloner.Clone(original.storedValues);139 storedValues = original.storedValues.ToDictionary(x => x.Key, x => cloner.Clone(x.Value)); 145 140 storageType = original.storageType; 146 cacheSize = original.cacheSize;147 141 searchDataStructureSize = original.searchDataStructureSize; 148 142 … … 204 198 var max = sortedSet.Max; 205 199 sortedSet.Remove(max); 206 storedValues.Remove(max.Item2); 200 storedValues.Remove(max.Item2); // should always be in sync with the sorted set 207 201 } 208 202 sortedSet.Add(Tuple.Create(prio, hash)); … … 211 205 var elem = sortedSet.FirstOrDefault(); 212 206 if (elem == null) 213 return 0;207 return default(int); 214 208 sortedSet.Remove(elem); 215 209 return elem.Item2; … … 226 220 // size is the 0-based index of the last used element 227 221 if (priorityQueue.Size == capacity - 1) { 228 // if the queue is at capacity we have to replace 229 return; 222 return; // if the queue is at capacity we have to return 230 223 } 231 224 priorityQueue.Insert(prio, hash);
Note: See TracChangeset
for help on using the changeset viewer.