Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs @ 15824

Last change on this file since 15824 was 15824, checked in by lkammere, 6 years ago

#2886: Move R² calculation of sentences to separate class and allow its deactivation.

File size: 14.1 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Threading;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Collections;
7using HeuristicLab.Common;
8using HeuristicLab.Core;
9using HeuristicLab.Data;
10using HeuristicLab.Optimization;
11using HeuristicLab.Parameters;
12using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
13using HeuristicLab.Problems.DataAnalysis;
14
15namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
16  [Item("Grammar Enumeration Symbolic Regression", "Iterates all possible model structures for a fixed grammar.")]
17  [StorableClass]
18  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)]
19  public class GrammarEnumerationAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
20    #region properties and result names
21    private readonly string SearchStructureSizeName = "Search Structure Size";
22    private readonly string GeneratedPhrasesName = "Generated/Archived Phrases";
23    private readonly string GeneratedSentencesName = "Generated Sentences";
24    private readonly string DistinctSentencesName = "Distinct Sentences";
25    private readonly string PhraseExpansionsName = "Phrase Expansions";
26    private readonly string AverageSentenceLengthName = "Avg. Sentence Length among Distinct";
27    private readonly string OverwrittenSentencesName = "Sentences overwritten";
28    private readonly string AnalyzersParameterName = "Analyzers";
29    private readonly string ExpansionsPerSecondName = "Expansions per second";
30
31
32    private readonly string SearchDataStructureParameterName = "Search Data Structure";
33    private readonly string MaxTreeSizeParameterName = "Max. Tree Nodes";
34    private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval";
35
36    public override bool SupportsPause { get { return false; } }
37
38    protected IValueParameter<IntValue> MaxTreeSizeParameter {
39      get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; }
40    }
41    public int MaxTreeSize {
42      get { return MaxTreeSizeParameter.Value.Value; }
43      set { MaxTreeSizeParameter.Value.Value = value; }
44    }
45
46    protected IValueParameter<IntValue> GuiUpdateIntervalParameter {
47      get { return (IValueParameter<IntValue>)Parameters[GuiUpdateIntervalParameterName]; }
48    }
49    public int GuiUpdateInterval {
50      get { return GuiUpdateIntervalParameter.Value.Value; }
51      set { GuiUpdateIntervalParameter.Value.Value = value; }
52    }
53
54    protected IValueParameter<EnumValue<StorageType>> SearchDataStructureParameter {
55      get { return (IValueParameter<EnumValue<StorageType>>)Parameters[SearchDataStructureParameterName]; }
56    }
57    public StorageType SearchDataStructure {
58      get { return SearchDataStructureParameter.Value.Value; }
59      set { SearchDataStructureParameter.Value.Value = value; }
60    }
61
62    public IFixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>> AnalyzersParameter {
63      get { return (IFixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>>)Parameters[AnalyzersParameterName]; }
64    }
65
66    public ICheckedItemCollection<IGrammarEnumerationAnalyzer> Analyzers {
67      get { return AnalyzersParameter.Value; }
68    }
69
70    public SymbolString BestTrainingSentence { get; set; }     // Currently set in RSquaredEvaluator: quite hacky, but makes testing much easier for now...
71    #endregion
72
73    public Dictionary<int, int> DistinctSentencesLength { get; private set; }  // Semantically distinct sentences and their length in a run.
74    public HashSet<int> ArchivedPhrases { get; private set; }
75    internal SearchDataStore OpenPhrases { get; private set; }           // Stack/Queue/etc. for fetching the next node in the search tree. 
76
77    #region execution stats
78    public int AllGeneratedSentencesCount { get; private set; }
79
80    public int OverwrittenSentencesCount { get; private set; } // It is not guaranteed that shorter solutions are found first.
81                                                               // When longer solutions are overwritten with shorter ones,
82                                                               // this counter is increased.
83    public int PhraseExpansionCount { get; private set; }      // Number, how many times a nonterminal symbol is replaced with a production rule.
84    #endregion
85
86    public Grammar Grammar { get; private set; }
87
88
89    #region ctors
90    public override IDeepCloneable Clone(Cloner cloner) {
91      return new GrammarEnumerationAlgorithm(this, cloner);
92    }
93
94    public GrammarEnumerationAlgorithm() {
95      Problem = new RegressionProblem() {
96        ProblemData = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenFunctionNine(seed: 1234).GenerateRegressionData()
97      };
98
99      Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6)));
100      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(1000)));
101      Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack)));
102
103      var availableAnalyzers = new IGrammarEnumerationAnalyzer[] {
104        new SearchGraphVisualizer(),
105        new SentenceLogger(),
106        new RSquaredEvaluator()
107      };
108      Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>>(
109        AnalyzersParameterName,
110        new CheckedItemCollection<IGrammarEnumerationAnalyzer>(availableAnalyzers).AsReadOnly()));
111
112      foreach (var analyzer in Analyzers) {
113        Analyzers.SetItemCheckedState(analyzer, false);
114      }
115      Analyzers.CheckedItemsChanged += AnalyzersOnCheckedItemsChanged;
116      Analyzers.SetItemCheckedState(Analyzers.First(analyzer => analyzer is RSquaredEvaluator), true);
117    }
118
119    public GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) { }
120    #endregion
121
122    protected override void Run(CancellationToken cancellationToken) {
123      #region init
124      InitResults();
125
126      ArchivedPhrases = new HashSet<int>();
127
128      DistinctSentencesLength = new Dictionary<int, int>();
129      AllGeneratedSentencesCount = 0;
130      OverwrittenSentencesCount = 0;
131      PhraseExpansionCount = 0;
132
133      Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray());
134
135      OpenPhrases = new SearchDataStore(SearchDataStructure); // Select search strategy
136      var phrase0 = new SymbolString(new[] { Grammar.StartSymbol });
137      var phrase0Hash = Grammar.CalcHashCode(phrase0);
138      #endregion
139
140      OpenPhrases.Store(phrase0Hash, phrase0);
141      while (OpenPhrases.Count > 0) {
142        if (cancellationToken.IsCancellationRequested) break;
143
144        StoredSymbolString fetchedPhrase = OpenPhrases.GetNext();
145        SymbolString currPhrase = fetchedPhrase.SymbolString;
146
147        OnPhraseFetched(fetchedPhrase.Hash, currPhrase);
148
149        ArchivedPhrases.Add(fetchedPhrase.Hash);
150
151        // expand next nonterminal symbols
152        int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol);
153        NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol;
154
155        foreach (Production productionAlternative in expandedSymbol.Alternatives) {
156          PhraseExpansionCount++;
157
158          SymbolString newPhrase = new SymbolString(currPhrase.Count + productionAlternative.Count);
159          newPhrase.AddRange(currPhrase);
160          newPhrase.RemoveAt(nonterminalSymbolIndex);     // TODO: removeat and insertRange are both O(n)
161          newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative);
162
163          if (newPhrase.Count <= MaxTreeSize) {
164            var phraseHash = Grammar.CalcHashCode(newPhrase);
165
166            OnPhraseDerived(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, productionAlternative);
167
168            if (newPhrase.IsSentence()) {
169              AllGeneratedSentencesCount++;
170
171              OnSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, productionAlternative);
172
173              if (!DistinctSentencesLength.ContainsKey(phraseHash) || DistinctSentencesLength[phraseHash] > newPhrase.Count) {
174                if (DistinctSentencesLength.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only
175
176                DistinctSentencesLength[phraseHash] = newPhrase.Count;
177                OnDistinctSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, productionAlternative);
178              }
179              UpdateView();
180
181            } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) {
182              OpenPhrases.Store(phraseHash, newPhrase);
183            }
184          }
185        }
186      }
187      UpdateView(force: true);
188    }
189
190    #region Visualization in HL
191    // Initialize entries in result set.
192    private void InitResults() {
193      Results.Add(new Result(GeneratedPhrasesName, new IntValue(0)));
194      Results.Add(new Result(SearchStructureSizeName, new IntValue(0)));
195      Results.Add(new Result(GeneratedSentencesName, new IntValue(0)));
196      Results.Add(new Result(DistinctSentencesName, new IntValue(0)));
197      Results.Add(new Result(PhraseExpansionsName, new IntValue(0)));
198      Results.Add(new Result(OverwrittenSentencesName, new IntValue(0)));
199      Results.Add(new Result(AverageSentenceLengthName, new DoubleValue(1.0)));
200      Results.Add(new Result(ExpansionsPerSecondName, "In Thousand expansions per second", new IntValue(0)));
201    }
202
203    // Update the view for intermediate results in an algorithm run.
204    private int updates;
205    private void UpdateView(bool force = false) {
206      updates++;
207
208      if (force || updates % GuiUpdateInterval == 1) {
209        ((IntValue)Results[GeneratedPhrasesName].Value).Value = ArchivedPhrases.Count;
210        ((IntValue)Results[SearchStructureSizeName].Value).Value = OpenPhrases.Count;
211        ((IntValue)Results[GeneratedSentencesName].Value).Value = AllGeneratedSentencesCount;
212        ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentencesLength.Count;
213        ((IntValue)Results[PhraseExpansionsName].Value).Value = PhraseExpansionCount;
214        ((DoubleValue)Results[AverageSentenceLengthName].Value).Value = DistinctSentencesLength.Select(pair => pair.Value).Average();
215        ((IntValue)Results[OverwrittenSentencesName].Value).Value = OverwrittenSentencesCount;
216        ((IntValue)Results[ExpansionsPerSecondName].Value).Value = (int)((PhraseExpansionCount /
217                                                                          ExecutionTime.TotalSeconds) / 1000.0);
218      }
219    }
220    #endregion
221
222    #region events
223    private void AnalyzersOnCheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<IGrammarEnumerationAnalyzer> collectionItemsChangedEventArgs) {
224      foreach (IGrammarEnumerationAnalyzer grammarEnumerationAnalyzer in collectionItemsChangedEventArgs.Items) {
225        if (Analyzers.ItemChecked(grammarEnumerationAnalyzer)) {
226          grammarEnumerationAnalyzer.Register(this);
227        } else {
228          grammarEnumerationAnalyzer.Deregister(this);
229        }
230      }
231    }
232
233    public event EventHandler<PhraseEventArgs> PhraseFetched;
234    private void OnPhraseFetched(int hash, SymbolString symbolString) {
235      if (PhraseFetched != null) {
236        PhraseFetched(this, new PhraseEventArgs(hash, symbolString));
237      }
238    }
239
240    public event EventHandler<PhraseAddedEventArgs> PhraseDerived;
241    private void OnPhraseDerived(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) {
242      if (PhraseDerived != null) {
243        PhraseDerived(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction));
244      }
245    }
246
247    public event EventHandler<PhraseAddedEventArgs> SentenceGenerated;
248    private void OnSentenceGenerated(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) {
249      if (SentenceGenerated != null) {
250        SentenceGenerated(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction));
251      }
252    }
253
254    public event EventHandler<PhraseAddedEventArgs> DistinctSentenceGenerated;
255    private void OnDistinctSentenceGenerated(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) {
256      if (DistinctSentenceGenerated != null) {
257        DistinctSentenceGenerated(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction));
258      }
259    }
260
261    #endregion
262
263  }
264
265  #region events for analysis
266
267  public class PhraseEventArgs : EventArgs {
268    public int Hash { get; }
269
270    public SymbolString Phrase { get; }
271
272    public PhraseEventArgs(int hash, SymbolString phrase) {
273      Hash = hash;
274      Phrase = phrase;
275    }
276  }
277
278  public class PhraseAddedEventArgs : EventArgs {
279    public int ParentHash { get; }
280    public int NewHash { get; }
281
282    public SymbolString ParentPhrase { get; }
283    public SymbolString NewPhrase { get; }
284
285    public Symbol ExpandedSymbol { get; }
286
287    public Production ExpandedProduction { get; }
288
289    public PhraseAddedEventArgs(int parentHash, SymbolString parentPhrase, int newHash, SymbolString newPhrase, Symbol expandedSymbol, Production expandedProduction) {
290      ParentHash = parentHash;
291      ParentPhrase = parentPhrase;
292      NewHash = newHash;
293      NewPhrase = newPhrase;
294      ExpandedSymbol = expandedSymbol;
295      ExpandedProduction = expandedProduction;
296    }
297  }
298
299  #endregion
300}
Note: See TracBrowser for help on using the repository browser.