Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/SentenceLogger.cs @ 15843

Last change on this file since 15843 was 15843, checked in by lkammere, 6 years ago

#2886: Remove duplicates in logged sentences using bash commands.

File size: 4.7 KB
Line 
1using System;
2using System.Diagnostics;
3using System.IO;
4using HeuristicLab.Common;
5using HeuristicLab.Core;
6
7namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
8  class SentenceLogger : Item, IGrammarEnumerationAnalyzer {
9    private readonly string workingDir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
10    private readonly string columnDelimiter = ";";
11    private readonly string header = "hash;length;postfix;infix";
12
13    private string distinctSentencesFileName;
14    private string allSentencesFileName;
15    private string shortestDistinctSentencesFileName;
16
17    private TextWriterTraceListener distinctSentencesFileTrace;
18    private TextWriterTraceListener allSentencesFileTrace;
19
20    public SentenceLogger() { }
21
22    protected SentenceLogger(SentenceLogger original, Cloner cloner) { }
23
24    public override IDeepCloneable Clone(Cloner cloner) {
25      return new SentenceLogger(this, cloner);
26    }
27
28    public void Register(GrammarEnumerationAlgorithm algorithm) {
29      algorithm.Started += GrammarEnumerationAlgorithmOnStarted;
30      algorithm.Stopped += GrammarEnumerationAlgorithmOnStopped;
31      algorithm.ExceptionOccurred += GrammarEnumerationAlgorithmOnStopped;
32
33      algorithm.SentenceGenerated += SentenceGenerated;
34      algorithm.DistinctSentenceGenerated += DistinctSentenceGenerated;
35    }
36
37    public void Deregister(GrammarEnumerationAlgorithm algorithm) {
38      algorithm.Started -= GrammarEnumerationAlgorithmOnStarted;
39      algorithm.Stopped -= GrammarEnumerationAlgorithmOnStopped;
40      algorithm.ExceptionOccurred -= GrammarEnumerationAlgorithmOnStopped;
41
42      algorithm.SentenceGenerated -= SentenceGenerated;
43      algorithm.DistinctSentenceGenerated -= DistinctSentenceGenerated;
44    }
45
46    private void GrammarEnumerationAlgorithmOnStarted(object sender, EventArgs eventArgs) {
47      string datePostfix = $"_{DateTime.Now:yyyy-MM-dd_HH-mm}_TreeSize-{((GrammarEnumerationAlgorithm) sender).MaxTreeSize}.csv";
48      distinctSentencesFileName = workingDir + @"\distinctSentences" + datePostfix;
49      allSentencesFileName = workingDir + @"\allSentences" + datePostfix;
50      shortestDistinctSentencesFileName = workingDir + @"\shortestDistinctSentences" + datePostfix;
51
52      distinctSentencesFileTrace = new TextWriterTraceListener(new FileStream(distinctSentencesFileName, FileMode.Create));
53      allSentencesFileTrace = new TextWriterTraceListener(new FileStream(allSentencesFileName, FileMode.Create));
54      ((StreamWriter)distinctSentencesFileTrace.Writer).AutoFlush = true;
55      ((StreamWriter)allSentencesFileTrace.Writer).AutoFlush = true;
56
57      allSentencesFileTrace.WriteLine(header);
58    }
59
60    private void GrammarEnumerationAlgorithmOnStopped(object sender, EventArgs eventArgs) {
61      distinctSentencesFileTrace.Close();
62      allSentencesFileTrace.Close();
63
64      // Remove duplicates afterwards using bash commands from the git bash
65      string bashExecutable = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + @"\Programs\Git\git-bash.exe";
66
67      string commandCreate = $"echo \"{header}\" > {shortestDistinctSentencesFileName.Replace("\\", "/")}";
68      string commandFill = $"sort -s {distinctSentencesFileName.Replace("\\", "/")} | uniq -w 10 >> {shortestDistinctSentencesFileName.Replace("\\", "/")}";
69     
70      ProcessStartInfo startInfo = new ProcessStartInfo {
71        WindowStyle = ProcessWindowStyle.Hidden,
72        UseShellExecute = false,
73        CreateNoWindow = true,
74        FileName = bashExecutable,
75        RedirectStandardError = true,
76        RedirectStandardOutput = true,
77        Arguments = $"-c '{commandCreate};{commandFill}'"
78      };
79      Process.Start(startInfo);
80    }
81
82    private void DistinctSentenceGenerated(object sender, PhraseAddedEventArgs phraseAddedEventArgs) {
83      distinctSentencesFileTrace.WriteLine(ToCsvLine(
84        ((uint)phraseAddedEventArgs.NewHash).ToString("D10"),
85        phraseAddedEventArgs.NewPhrase.Count().ToString("D3"),
86        phraseAddedEventArgs.NewPhrase.ToString(),
87        ((GrammarEnumerationAlgorithm)sender).Grammar.ToInfixString(phraseAddedEventArgs.NewPhrase)));
88    }
89
90    private void SentenceGenerated(object sender, PhraseAddedEventArgs phraseAddedEventArgs) {
91      allSentencesFileTrace.WriteLine(ToCsvLine(
92        ((uint)phraseAddedEventArgs.NewHash).ToString("D10"),
93        phraseAddedEventArgs.NewPhrase.Count().ToString("D3"),
94        phraseAddedEventArgs.NewPhrase.ToString(),
95        ((GrammarEnumerationAlgorithm)sender).Grammar.ToInfixString(phraseAddedEventArgs.NewPhrase)));
96    }
97
98    private string ToCsvLine(params string[] cols) {
99      return string.Join(columnDelimiter, cols);
100    }
101  }
102}
Note: See TracBrowser for help on using the repository browser.