Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs @ 15850

Last change on this file since 15850 was 15843, checked in by lkammere, 6 years ago

#2886: Remove duplicates in logged sentences using bash commands.

File size: 14.0 KB
RevLine 
[15765]1using System;
2using System.Collections.Generic;
[15712]3using System.Linq;
4using System.Threading;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
[15821]6using HeuristicLab.Collections;
[15712]7using HeuristicLab.Common;
8using HeuristicLab.Core;
9using HeuristicLab.Data;
10using HeuristicLab.Optimization;
[15722]11using HeuristicLab.Parameters;
[15712]12using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
13using HeuristicLab.Problems.DataAnalysis;
14
15namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
16  [Item("Grammar Enumeration Symbolic Regression", "Iterates all possible model structures for a fixed grammar.")]
17  [StorableClass]
18  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 250)]
19  public class GrammarEnumerationAlgorithm : FixedDataAnalysisAlgorithm<IRegressionProblem> {
[15746]20    #region properties and result names
[15803]21    private readonly string SearchStructureSizeName = "Search Structure Size";
22    private readonly string GeneratedPhrasesName = "Generated/Archived Phrases";
[15746]23    private readonly string GeneratedSentencesName = "Generated Sentences";
24    private readonly string DistinctSentencesName = "Distinct Sentences";
25    private readonly string PhraseExpansionsName = "Phrase Expansions";
[15821]26    private readonly string AverageSentenceLengthName = "Avg. Sentence Length among Distinct";
27    private readonly string OverwrittenSentencesName = "Sentences overwritten";
28    private readonly string AnalyzersParameterName = "Analyzers";
[15824]29    private readonly string ExpansionsPerSecondName = "Expansions per second";
[15712]30
[15746]31
32    private readonly string SearchDataStructureParameterName = "Search Data Structure";
[15722]33    private readonly string MaxTreeSizeParameterName = "Max. Tree Nodes";
34    private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval";
[15765]35
[15746]36    public override bool SupportsPause { get { return false; } }
[15712]37
[15723]38    protected IValueParameter<IntValue> MaxTreeSizeParameter {
[15722]39      get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; }
[15712]40    }
[15722]41    public int MaxTreeSize {
42      get { return MaxTreeSizeParameter.Value.Value; }
[15723]43      set { MaxTreeSizeParameter.Value.Value = value; }
[15722]44    }
[15712]45
[15723]46    protected IValueParameter<IntValue> GuiUpdateIntervalParameter {
47      get { return (IValueParameter<IntValue>)Parameters[GuiUpdateIntervalParameterName]; }
[15722]48    }
49    public int GuiUpdateInterval {
50      get { return GuiUpdateIntervalParameter.Value.Value; }
[15723]51      set { GuiUpdateIntervalParameter.Value.Value = value; }
[15722]52    }
[15712]53
[15746]54    protected IValueParameter<EnumValue<StorageType>> SearchDataStructureParameter {
55      get { return (IValueParameter<EnumValue<StorageType>>)Parameters[SearchDataStructureParameterName]; }
[15723]56    }
[15746]57    public StorageType SearchDataStructure {
58      get { return SearchDataStructureParameter.Value.Value; }
59      set { SearchDataStructureParameter.Value.Value = value; }
[15723]60    }
61
[15821]62    public IFixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>> AnalyzersParameter {
63      get { return (IFixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>>)Parameters[AnalyzersParameterName]; }
64    }
65
66    public ICheckedItemCollection<IGrammarEnumerationAnalyzer> Analyzers {
67      get { return AnalyzersParameter.Value; }
68    }
69
[15824]70    public SymbolString BestTrainingSentence { get; set; }     // Currently set in RSquaredEvaluator: quite hacky, but makes testing much easier for now...
[15722]71    #endregion
[15712]72
[15821]73    public Dictionary<int, int> DistinctSentencesLength { get; private set; }  // Semantically distinct sentences and their length in a run.
[15812]74    public HashSet<int> ArchivedPhrases { get; private set; }
[15821]75    internal SearchDataStore OpenPhrases { get; private set; }           // Stack/Queue/etc. for fetching the next node in the search tree. 
[15812]76
[15821]77    #region execution stats
78    public int AllGeneratedSentencesCount { get; private set; }
[15746]79
[15821]80    public int OverwrittenSentencesCount { get; private set; } // It is not guaranteed that shorter solutions are found first.
81                                                               // When longer solutions are overwritten with shorter ones,
82                                                               // this counter is increased.
83    public int PhraseExpansionCount { get; private set; }      // Number, how many times a nonterminal symbol is replaced with a production rule.
84    #endregion
85
[15800]86    public Grammar Grammar { get; private set; }
[15712]87
[15765]88
[15722]89    #region ctors
90    public override IDeepCloneable Clone(Cloner cloner) {
91      return new GrammarEnumerationAlgorithm(this, cloner);
92    }
[15712]93
[15722]94    public GrammarEnumerationAlgorithm() {
[15723]95      Problem = new RegressionProblem() {
[15800]96        ProblemData = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenFunctionNine(seed: 1234).GenerateRegressionData()
[15723]97      };
98
99      Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6)));
[15843]100      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(100000)));
[15784]101      Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack)));
[15821]102
103      var availableAnalyzers = new IGrammarEnumerationAnalyzer[] {
104        new SearchGraphVisualizer(),
[15824]105        new SentenceLogger(),
106        new RSquaredEvaluator()
[15821]107      };
108      Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>>(
109        AnalyzersParameterName,
110        new CheckedItemCollection<IGrammarEnumerationAnalyzer>(availableAnalyzers).AsReadOnly()));
111
112      foreach (var analyzer in Analyzers) {
113        Analyzers.SetItemCheckedState(analyzer, false);
114      }
115      Analyzers.CheckedItemsChanged += AnalyzersOnCheckedItemsChanged;
[15824]116      Analyzers.SetItemCheckedState(Analyzers.First(analyzer => analyzer is RSquaredEvaluator), true);
[15842]117      Analyzers.SetItemCheckedState(Analyzers.First(analyzer => analyzer is SentenceLogger), true);
[15722]118    }
[15712]119
[15746]120    public GrammarEnumerationAlgorithm(GrammarEnumerationAlgorithm original, Cloner cloner) : base(original, cloner) { }
[15722]121    #endregion
[15712]122
[15722]123    protected override void Run(CancellationToken cancellationToken) {
[15746]124      #region init
125      InitResults();
[15723]126
[15812]127      ArchivedPhrases = new HashSet<int>();
128
[15821]129      DistinctSentencesLength = new Dictionary<int, int>();
130      AllGeneratedSentencesCount = 0;
131      OverwrittenSentencesCount = 0;
132      PhraseExpansionCount = 0;
[15746]133
[15724]134      Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray());
[15712]135
[15746]136      OpenPhrases = new SearchDataStore(SearchDataStructure); // Select search strategy
[15734]137      var phrase0 = new SymbolString(new[] { Grammar.StartSymbol });
[15832]138      var phrase0Hash = Grammar.Hasher.CalcHashCode(phrase0);
[15746]139      #endregion
[15712]140
[15821]141      OpenPhrases.Store(phrase0Hash, phrase0);
142      while (OpenPhrases.Count > 0) {
143        if (cancellationToken.IsCancellationRequested) break;
[15746]144
[15821]145        StoredSymbolString fetchedPhrase = OpenPhrases.GetNext();
146        SymbolString currPhrase = fetchedPhrase.SymbolString;
[15722]147
[15821]148        OnPhraseFetched(fetchedPhrase.Hash, currPhrase);
[15765]149
[15821]150        ArchivedPhrases.Add(fetchedPhrase.Hash);
[15726]151
[15821]152        // expand next nonterminal symbols
[15827]153        int nonterminalSymbolIndex = currPhrase.NextNonterminalIndex();
154        NonterminalSymbol expandedSymbol = (NonterminalSymbol)currPhrase[nonterminalSymbolIndex];
[15834]155        var appliedProductions = Grammar.Productions[expandedSymbol];
[15734]156
[15827]157        for (int i = 0; i < appliedProductions.Count; i++) {
[15821]158          PhraseExpansionCount++;
[15734]159
[15827]160          SymbolString newPhrase = currPhrase.DerivePhrase(nonterminalSymbolIndex, appliedProductions[i]);
[15712]161
[15827]162          if (newPhrase.Count() <= MaxTreeSize) {
[15832]163            var phraseHash = Grammar.Hasher.CalcHashCode(newPhrase);
[15765]164
[15827]165            OnPhraseDerived(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]);
[15800]166
[15821]167            if (newPhrase.IsSentence()) {
168              AllGeneratedSentencesCount++;
169
[15827]170              OnSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]);
[15821]171
[15827]172              if (!DistinctSentencesLength.ContainsKey(phraseHash) || DistinctSentencesLength[phraseHash] > newPhrase.Count()) {
[15821]173                if (DistinctSentencesLength.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only
174
[15827]175                DistinctSentencesLength[phraseHash] = newPhrase.Count();
176                OnDistinctSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]);
[15746]177              }
[15821]178              UpdateView();
179
180            } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) {
181              OpenPhrases.Store(phraseHash, newPhrase);
[15712]182            }
183          }
184        }
185      }
[15812]186      UpdateView(force: true);
[15746]187    }
[15723]188
[15821]189    #region Visualization in HL
[15746]190    // Initialize entries in result set.
191    private void InitResults() {
[15803]192      Results.Add(new Result(GeneratedPhrasesName, new IntValue(0)));
193      Results.Add(new Result(SearchStructureSizeName, new IntValue(0)));
[15746]194      Results.Add(new Result(GeneratedSentencesName, new IntValue(0)));
195      Results.Add(new Result(DistinctSentencesName, new IntValue(0)));
196      Results.Add(new Result(PhraseExpansionsName, new IntValue(0)));
[15821]197      Results.Add(new Result(OverwrittenSentencesName, new IntValue(0)));
198      Results.Add(new Result(AverageSentenceLengthName, new DoubleValue(1.0)));
[15824]199      Results.Add(new Result(ExpansionsPerSecondName, "In Thousand expansions per second", new IntValue(0)));
[15712]200    }
[15746]201
202    // Update the view for intermediate results in an algorithm run.
203    private int updates;
[15812]204    private void UpdateView(bool force = false) {
[15746]205      updates++;
206
[15812]207      if (force || updates % GuiUpdateInterval == 1) {
[15803]208        ((IntValue)Results[GeneratedPhrasesName].Value).Value = ArchivedPhrases.Count;
209        ((IntValue)Results[SearchStructureSizeName].Value).Value = OpenPhrases.Count;
[15821]210        ((IntValue)Results[GeneratedSentencesName].Value).Value = AllGeneratedSentencesCount;
211        ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentencesLength.Count;
212        ((IntValue)Results[PhraseExpansionsName].Value).Value = PhraseExpansionCount;
213        ((DoubleValue)Results[AverageSentenceLengthName].Value).Value = DistinctSentencesLength.Select(pair => pair.Value).Average();
214        ((IntValue)Results[OverwrittenSentencesName].Value).Value = OverwrittenSentencesCount;
[15824]215        ((IntValue)Results[ExpansionsPerSecondName].Value).Value = (int)((PhraseExpansionCount /
216                                                                          ExecutionTime.TotalSeconds) / 1000.0);
[15746]217      }
218    }
[15821]219    #endregion
[15746]220
[15821]221    #region events
222    private void AnalyzersOnCheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<IGrammarEnumerationAnalyzer> collectionItemsChangedEventArgs) {
223      foreach (IGrammarEnumerationAnalyzer grammarEnumerationAnalyzer in collectionItemsChangedEventArgs.Items) {
224        if (Analyzers.ItemChecked(grammarEnumerationAnalyzer)) {
225          grammarEnumerationAnalyzer.Register(this);
226        } else {
227          grammarEnumerationAnalyzer.Deregister(this);
[15746]228        }
229      }
[15821]230    }
[15746]231
[15821]232    public event EventHandler<PhraseEventArgs> PhraseFetched;
233    private void OnPhraseFetched(int hash, SymbolString symbolString) {
234      if (PhraseFetched != null) {
235        PhraseFetched(this, new PhraseEventArgs(hash, symbolString));
[15746]236      }
237    }
[15812]238
[15821]239    public event EventHandler<PhraseAddedEventArgs> PhraseDerived;
240    private void OnPhraseDerived(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) {
241      if (PhraseDerived != null) {
242        PhraseDerived(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction));
243      }
[15803]244    }
[15765]245
[15821]246    public event EventHandler<PhraseAddedEventArgs> SentenceGenerated;
247    private void OnSentenceGenerated(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) {
248      if (SentenceGenerated != null) {
249        SentenceGenerated(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction));
250      }
251    }
252
253    public event EventHandler<PhraseAddedEventArgs> DistinctSentenceGenerated;
254    private void OnDistinctSentenceGenerated(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) {
255      if (DistinctSentenceGenerated != null) {
256        DistinctSentenceGenerated(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction));
257      }
258    }
259
260    #endregion
261
[15712]262  }
[15821]263
264  #region events for analysis
265
266  public class PhraseEventArgs : EventArgs {
267    public int Hash { get; }
268
269    public SymbolString Phrase { get; }
270
271    public PhraseEventArgs(int hash, SymbolString phrase) {
272      Hash = hash;
273      Phrase = phrase;
274    }
275  }
276
277  public class PhraseAddedEventArgs : EventArgs {
278    public int ParentHash { get; }
279    public int NewHash { get; }
280
281    public SymbolString ParentPhrase { get; }
282    public SymbolString NewPhrase { get; }
283
284    public Symbol ExpandedSymbol { get; }
285
286    public Production ExpandedProduction { get; }
287
288    public PhraseAddedEventArgs(int parentHash, SymbolString parentPhrase, int newHash, SymbolString newPhrase, Symbol expandedSymbol, Production expandedProduction) {
289      ParentHash = parentHash;
290      ParentPhrase = parentPhrase;
291      NewHash = newHash;
292      NewPhrase = newPhrase;
293      ExpandedSymbol = expandedSymbol;
294      ExpandedProduction = expandedProduction;
295    }
296  }
297
298  #endregion
[15712]299}
Note: See TracBrowser for help on using the repository browser.