Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
02/28/18 14:44:25 (7 years ago)
Author:
lkammere
Message:

#2886 Move code for visualization and logging of sentences to separate classes.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15814 r15821  
    11using System;
    22using System.Collections.Generic;
    3 using System.Diagnostics;
    4 using System.IO;
    53using System.Linq;
    64using System.Threading;
    75using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
     6using HeuristicLab.Collections;
    87using HeuristicLab.Common;
    98using HeuristicLab.Core;
     
    3029    private readonly string DistinctSentencesName = "Distinct Sentences";
    3130    private readonly string PhraseExpansionsName = "Phrase Expansions";
    32     private readonly string AverageTreeLengthName = "Avg. Sentence Length among Distinct";
    33     private readonly string GeneratedEqualSentencesName = "Generated equal sentences";
     31    private readonly string AverageSentenceLengthName = "Avg. Sentence Length among Distinct";
     32    private readonly string OverwrittenSentencesName = "Sentences overwritten";
     33    private readonly string AnalyzersParameterName = "Analyzers";
    3434
    3535
     
    6464    }
    6565
     66    public IFixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>> AnalyzersParameter {
     67      get { return (IFixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>>)Parameters[AnalyzersParameterName]; }
     68    }
     69
     70    public ICheckedItemCollection<IGrammarEnumerationAnalyzer> Analyzers {
     71      get { return AnalyzersParameter.Value; }
     72    }
     73
    6674    public SymbolString BestTrainingSentence;
    6775
    6876    #endregion
    6977
    70     public Dictionary<int, SymbolString> DistinctSentences { get; private set; }  // Semantically distinct sentences in a run.
    71     public Dictionary<int, List<SymbolString>> AllSentences { get; private set; } // All sentences ever generated in a run.
     78    public Dictionary<int, int> DistinctSentencesLength { get; private set; }  // Semantically distinct sentences and their length in a run.
    7279    public HashSet<int> ArchivedPhrases { get; private set; }
    73 
    74     internal SearchDataStore OpenPhrases { get; private set; }                    // Stack/Queue/etc. for fetching the next node in the search tree. 
    75 
    76     public int EqualGeneratedSentences { get; private set; } // It is not guaranteed that shorter solutions are found first.
    77                                                              // When longer solutions are overwritten with shorter ones,
    78                                                              // this counter is increased.
    79     public int Expansions { get; private set; }              // Number, how many times a nonterminal symbol is replaced with a production rule.
     80    internal SearchDataStore OpenPhrases { get; private set; }           // Stack/Queue/etc. for fetching the next node in the search tree. 
     81
     82    #region execution stats
     83    public int AllGeneratedSentencesCount { get; private set; }
     84
     85    public int OverwrittenSentencesCount { get; private set; } // It is not guaranteed that shorter solutions are found first.
     86                                                               // When longer solutions are overwritten with shorter ones,
     87                                                               // this counter is increased.
     88    public int PhraseExpansionCount { get; private set; }      // Number, how many times a nonterminal symbol is replaced with a production rule.
     89    #endregion
     90
    8091    public Grammar Grammar { get; private set; }
    8192
    82     private readonly string dotFileName = Environment.GetFolderPath(System.Environment.SpecialFolder.DesktopDirectory) + @"\searchgraph.dot";
     93    private readonly ISymbolicDataAnalysisExpressionTreeInterpreter expressionTreeLinearInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
    8394
    8495    #region ctors
     
    95106      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(1000)));
    96107      Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack)));
     108
     109      var availableAnalyzers = new IGrammarEnumerationAnalyzer[] {
     110        new SearchGraphVisualizer(),
     111        new SentenceLogger()
     112      };
     113      Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>>(
     114        AnalyzersParameterName,
     115        new CheckedItemCollection<IGrammarEnumerationAnalyzer>(availableAnalyzers).AsReadOnly()));
     116
     117      foreach (var analyzer in Analyzers) {
     118        Analyzers.SetItemCheckedState(analyzer, false);
     119      }
     120      Analyzers.CheckedItemsChanged += AnalyzersOnCheckedItemsChanged;
    97121    }
    98122
     
    104128      InitResults();
    105129
    106       AllSentences = new Dictionary<int, List<SymbolString>>();
    107130      ArchivedPhrases = new HashSet<int>();
    108131
    109       DistinctSentences = new Dictionary<int, SymbolString>();
    110       Expansions = 0;
    111       EqualGeneratedSentences = 0;
     132      DistinctSentencesLength = new Dictionary<int, int>();
     133      AllGeneratedSentencesCount = 0;
     134      OverwrittenSentencesCount = 0;
     135      PhraseExpansionCount = 0;
    112136
    113137      Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray());
     
    118142      #endregion
    119143
    120       using (TextWriterTraceListener dotFileTrace = new TextWriterTraceListener(new FileStream(dotFileName, FileMode.Create))) {
    121         LogSearchGraph(dotFileTrace, "digraph searchgraph { pad=0.02; nodesep=0.3; ranksep=0.02;ratio=0.5625;");
    122 
    123         OpenPhrases.Store(phrase0Hash, phrase0);
    124         while (OpenPhrases.Count > 0) {
    125           if (cancellationToken.IsCancellationRequested) break;
    126 
    127           StoredSymbolString fetchedPhrase = OpenPhrases.GetNext();
    128           SymbolString currPhrase = fetchedPhrase.SymbolString;
    129 #if DEBUG
    130           LogSearchGraph(dotFileTrace, $"{fetchedPhrase.Hash} [label=\"{Grammar.PostfixToInfixParser(fetchedPhrase.SymbolString)}\"];");
    131 #endif
    132           ArchivedPhrases.Add(fetchedPhrase.Hash);
    133 
    134           // expand next nonterminal symbols
    135           int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol);
    136           NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol;
    137 
    138           foreach (Production productionAlternative in expandedSymbol.Alternatives) {
    139             SymbolString newPhrase = new SymbolString(currPhrase.Count + productionAlternative.Count);
    140             newPhrase.AddRange(currPhrase);
    141             newPhrase.RemoveAt(nonterminalSymbolIndex);     // TODO: removeat and insertRange are both O(n)
    142             newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative);
    143 
    144             Expansions++;
    145             if (newPhrase.Count <= MaxTreeSize) {
    146               var phraseHash = Grammar.CalcHashCode(newPhrase);
    147 #if DEBUG
    148               LogSearchGraph(dotFileTrace, $"{fetchedPhrase.Hash} -> {phraseHash} [label=\"{expandedSymbol.StringRepresentation} + &rarr; {productionAlternative}\"];");
    149 #endif
    150               if (newPhrase.IsSentence()) {
    151                 // Sentence was generated.
    152                 SaveToAllSentences(phraseHash, newPhrase);
    153 
    154                 if (!DistinctSentences.ContainsKey(phraseHash) || DistinctSentences[phraseHash].Count > newPhrase.Count) {
    155                   if (DistinctSentences.ContainsKey(phraseHash)) EqualGeneratedSentences++; // for analysis only
    156 
    157                   DistinctSentences[phraseHash] = newPhrase;
    158                   EvaluateSentence(newPhrase);
    159 
    160 #if DEBUG
    161                   LogSearchGraph(dotFileTrace, $"{phraseHash} [label=\"{Grammar.PostfixToInfixParser(newPhrase)}\", style=\"filled\", fillcolor=\"#f79423\", color=\"#f79423\"];");
    162 #endif
    163                 }
    164                 UpdateView();
    165 
    166               } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) {
    167                 OpenPhrases.Store(phraseHash, newPhrase);
     144      OpenPhrases.Store(phrase0Hash, phrase0);
     145      while (OpenPhrases.Count > 0) {
     146        if (cancellationToken.IsCancellationRequested) break;
     147
     148        StoredSymbolString fetchedPhrase = OpenPhrases.GetNext();
     149        SymbolString currPhrase = fetchedPhrase.SymbolString;
     150
     151        OnPhraseFetched(fetchedPhrase.Hash, currPhrase);
     152
     153        ArchivedPhrases.Add(fetchedPhrase.Hash);
     154
     155        // expand next nonterminal symbols
     156        int nonterminalSymbolIndex = currPhrase.FindIndex(s => s is NonterminalSymbol);
     157        NonterminalSymbol expandedSymbol = currPhrase[nonterminalSymbolIndex] as NonterminalSymbol;
     158
     159        foreach (Production productionAlternative in expandedSymbol.Alternatives) {
     160          PhraseExpansionCount++;
     161
     162          SymbolString newPhrase = new SymbolString(currPhrase.Count + productionAlternative.Count);
     163          newPhrase.AddRange(currPhrase);
     164          newPhrase.RemoveAt(nonterminalSymbolIndex);     // TODO: removeat and insertRange are both O(n)
     165          newPhrase.InsertRange(nonterminalSymbolIndex, productionAlternative);
     166
     167          if (newPhrase.Count <= MaxTreeSize) {
     168            var phraseHash = Grammar.CalcHashCode(newPhrase);
     169
     170            OnPhraseDerived(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, productionAlternative);
     171
     172            if (newPhrase.IsSentence()) {
     173              AllGeneratedSentencesCount++;
     174
     175              OnSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, productionAlternative);
     176
     177              if (!DistinctSentencesLength.ContainsKey(phraseHash) || DistinctSentencesLength[phraseHash] > newPhrase.Count) {
     178                if (DistinctSentencesLength.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only
     179
     180                DistinctSentencesLength[phraseHash] = newPhrase.Count;
     181                EvaluateSentence(newPhrase);
     182
     183                OnDistinctSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, productionAlternative);
    168184              }
     185              UpdateView();
     186
     187            } else if (!OpenPhrases.Contains(phraseHash) && !ArchivedPhrases.Contains(phraseHash)) {
     188              OpenPhrases.Store(phraseHash, newPhrase);
    169189            }
    170190          }
    171191        }
    172 #if DEBUG
    173         // Overwrite formatting of start search node and best found solution.
    174         //LogSearchGraph(dotFileTrace, $"{Grammar.CalcHashCode(BestTrainingSentence)} [label=\"{Grammar.PostfixToInfixParser(BestTrainingSentence)}\", shape=Mcircle, style=\"filled,bold\"];");
    175         LogSearchGraph(dotFileTrace, $"{phrase0Hash} [label=\"{Grammar.PostfixToInfixParser(phrase0)}\", shape=doublecircle];}}");
    176         dotFileTrace.Flush();
    177 #endif
    178192      }
    179193
     
    182196    }
    183197
    184     // Store sentence to "MultiDictionary"
    185     private void SaveToAllSentences(int hash, SymbolString sentence) {
    186       if (AllSentences.ContainsKey(hash))
    187         AllSentences[hash].Add(sentence);
    188       else
    189         AllSentences[hash] = new List<SymbolString> { sentence }; //TODO: here we store all sentences even if they have the same hash value, this is not strictly necessary
    190     }
    191 
    192 #region Evaluation of generated models.
     198    #region Evaluation of generated models.
    193199
    194200    // Evaluate sentence within an algorithm run.
     
    198204        Problem.ProblemData.TargetVariable,
    199205        tree,
    200         new SymbolicDataAnalysisExpressionTreeLinearInterpreter());
     206        expressionTreeLinearInterpreter);
    201207
    202208      var probData = Problem.ProblemData;
     
    214220    }
    215221
    216 #endregion
    217 
    218 #region Visualization in HL
     222    #endregion
     223
     224    #region Visualization in HL
    219225    // Initialize entries in result set.
    220226    private void InitResults() {
     
    228234      Results.Add(new Result(DistinctSentencesName, new IntValue(0)));
    229235      Results.Add(new Result(PhraseExpansionsName, new IntValue(0)));
    230       Results.Add(new Result(GeneratedEqualSentencesName, new IntValue(0)));
    231       Results.Add(new Result(AverageTreeLengthName, new DoubleValue(1.0)));
     236      Results.Add(new Result(OverwrittenSentencesName, new IntValue(0)));
     237      Results.Add(new Result(AverageSentenceLengthName, new DoubleValue(1.0)));
    232238    }
    233239
     
    238244
    239245      if (force || updates % GuiUpdateInterval == 1) {
    240         var allGeneratedEnum = AllSentences.Values.SelectMany(x => x).ToArray();
    241246        ((IntValue)Results[GeneratedPhrasesName].Value).Value = ArchivedPhrases.Count;
    242247        ((IntValue)Results[SearchStructureSizeName].Value).Value = OpenPhrases.Count;
    243         ((IntValue)Results[GeneratedSentencesName].Value).Value = allGeneratedEnum.Length;
    244         ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentences.Count;
    245         ((IntValue)Results[PhraseExpansionsName].Value).Value = Expansions;
    246         ((IntValue)Results[GeneratedEqualSentencesName].Value).Value = EqualGeneratedSentences;
    247         ((DoubleValue)Results[AverageTreeLengthName].Value).Value = allGeneratedEnum.Select(sentence => sentence.Count).Average();
     248        ((IntValue)Results[GeneratedSentencesName].Value).Value = AllGeneratedSentencesCount;
     249        ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentencesLength.Count;
     250        ((IntValue)Results[PhraseExpansionsName].Value).Value = PhraseExpansionCount;
     251        ((DoubleValue)Results[AverageSentenceLengthName].Value).Value = DistinctSentencesLength.Select(pair => pair.Value).Average();
     252        ((IntValue)Results[OverwrittenSentencesName].Value).Value = OverwrittenSentencesCount;
    248253      }
    249254    }
     
    259264      IRegressionSolution bestTrainingSolution = new RegressionSolution(model, Problem.ProblemData);
    260265      Results.AddOrUpdateResult(BestTrainingSolutionName, bestTrainingSolution);
    261 
    262       // Print generated sentences.
    263       string[,] sentencesMatrix = new string[AllSentences.Values.SelectMany(x => x).Count(), 3];
    264 
    265       int i = 0;
    266       foreach (var sentenceSet in AllSentences) {
    267         foreach (var sentence in sentenceSet.Value) {
    268           sentencesMatrix[i, 0] = sentence.ToString();
    269           sentencesMatrix[i, 1] = Grammar.PostfixToInfixParser(sentence).ToString();
    270           sentencesMatrix[i, 2] = sentenceSet.Key.ToString();
    271           i++;
     266    }
     267    #endregion
     268
     269    #region events
     270    private void AnalyzersOnCheckedItemsChanged(object sender, CollectionItemsChangedEventArgs<IGrammarEnumerationAnalyzer> collectionItemsChangedEventArgs) {
     271      foreach (IGrammarEnumerationAnalyzer grammarEnumerationAnalyzer in collectionItemsChangedEventArgs.Items) {
     272        if (Analyzers.ItemChecked(grammarEnumerationAnalyzer)) {
     273          grammarEnumerationAnalyzer.Register(this);
     274        } else {
     275          grammarEnumerationAnalyzer.Deregister(this);
    272276        }
    273277      }
    274       Results.Add(new Result("All generated sentences", new StringMatrix(sentencesMatrix)));
    275 
    276       string[,] distinctSentencesMatrix = new string[DistinctSentences.Count, 3];
    277       i = 0;
    278       foreach (KeyValuePair<int, SymbolString> distinctSentence in DistinctSentences) {
    279         distinctSentencesMatrix[i, 0] = distinctSentence.Key.ToString();
    280         distinctSentencesMatrix[i, 1] = Grammar.PostfixToInfixParser(distinctSentence.Value).ToString();
    281         distinctSentencesMatrix[i, 2] = distinctSentence.Key.ToString();
    282         i++;
    283       }
    284       Results.Add(new Result("Distinct generated sentences", new StringMatrix(distinctSentencesMatrix)));
    285     }
    286 
    287     private void LogSearchGraph(TraceListener listener, string msg) {
    288       listener.Write(msg);
    289     }
    290 #endregion
     278    }
     279
     280    public event EventHandler<PhraseEventArgs> PhraseFetched;
     281    private void OnPhraseFetched(int hash, SymbolString symbolString) {
     282      if (PhraseFetched != null) {
     283        PhraseFetched(this, new PhraseEventArgs(hash, symbolString));
     284      }
     285    }
     286
     287    public event EventHandler<PhraseAddedEventArgs> PhraseDerived;
     288    private void OnPhraseDerived(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) {
     289      if (PhraseDerived != null) {
     290        PhraseDerived(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction));
     291      }
     292    }
     293
     294    public event EventHandler<PhraseAddedEventArgs> SentenceGenerated;
     295    private void OnSentenceGenerated(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) {
     296      if (SentenceGenerated != null) {
     297        SentenceGenerated(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction));
     298      }
     299    }
     300
     301    public event EventHandler<PhraseAddedEventArgs> DistinctSentenceGenerated;
     302    private void OnDistinctSentenceGenerated(int parentHash, SymbolString parentSymbolString, int addedHash, SymbolString addedSymbolString, Symbol expandedSymbol, Production expandedProduction) {
     303      if (DistinctSentenceGenerated != null) {
     304        DistinctSentenceGenerated(this, new PhraseAddedEventArgs(parentHash, parentSymbolString, addedHash, addedSymbolString, expandedSymbol, expandedProduction));
     305      }
     306    }
     307
     308    #endregion
    291309
    292310  }
     311
     312  #region events for analysis
     313
     314  public class PhraseEventArgs : EventArgs {
     315    public int Hash { get; }
     316
     317    public SymbolString Phrase { get; }
     318
     319    public PhraseEventArgs(int hash, SymbolString phrase) {
     320      Hash = hash;
     321      Phrase = phrase;
     322    }
     323  }
     324
     325  public class PhraseAddedEventArgs : EventArgs {
     326    public int ParentHash { get; }
     327    public int NewHash { get; }
     328
     329    public SymbolString ParentPhrase { get; }
     330    public SymbolString NewPhrase { get; }
     331
     332    public Symbol ExpandedSymbol { get; }
     333
     334    public Production ExpandedProduction { get; }
     335
     336    public PhraseAddedEventArgs(int parentHash, SymbolString parentPhrase, int newHash, SymbolString newPhrase, Symbol expandedSymbol, Production expandedProduction) {
     337      ParentHash = parentHash;
     338      ParentPhrase = parentPhrase;
     339      NewHash = newHash;
     340      NewPhrase = newPhrase;
     341      ExpandedSymbol = expandedSymbol;
     342      ExpandedProduction = expandedProduction;
     343    }
     344  }
     345
     346  #endregion
    293347}
Note: See TracChangeset for help on using the changeset viewer.