using System; using System.Diagnostics; using System.IO; using System.IO.Compression; using HeuristicLab.Common; using HeuristicLab.Core; namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration { public class SentenceLogger : Item, IGrammarEnumerationAnalyzer { private readonly string workingDir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory); private readonly string columnDelimiter = ";"; private readonly string header = "hash;length;infix"; private string distinctSentencesFileName; private string allSentencesFileName; private string shortestDistinctSentencesFileName; private TextWriterTraceListener distinctSentencesFileTrace; private TextWriterTraceListener allSentencesFileTrace; public SentenceLogger() { } protected SentenceLogger(SentenceLogger original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new SentenceLogger(this, cloner); } public void Register(GrammarEnumerationAlgorithm algorithm) { algorithm.Stopped += GrammarEnumerationAlgorithmOnStopped; algorithm.ExceptionOccurred += GrammarEnumerationAlgorithmOnStopped; algorithm.SentenceGenerated += SentenceGenerated; algorithm.DistinctSentenceGenerated += DistinctSentenceGenerated; } public void Deregister(GrammarEnumerationAlgorithm algorithm) { algorithm.Stopped -= GrammarEnumerationAlgorithmOnStopped; algorithm.ExceptionOccurred -= GrammarEnumerationAlgorithmOnStopped; algorithm.SentenceGenerated -= SentenceGenerated; algorithm.DistinctSentenceGenerated -= DistinctSentenceGenerated; } private void GrammarEnumerationAlgorithmOnStopped(object sender, EventArgs eventArgs) { distinctSentencesFileTrace.Close(); allSentencesFileTrace.Close(); // Remove duplicates afterwards using bash commands from the git bash // string bashExecutable = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + @"\Programs\Git\git-bash.exe"; // // string commandCreate = $"echo \"{header}\" > {shortestDistinctSentencesFileName.Replace("\\", "/")}"; // string commandFill = $"sort -s {distinctSentencesFileName.Replace("\\", "/")} | uniq -w 10 >> {shortestDistinctSentencesFileName.Replace("\\", "/")}"; // // ProcessStartInfo startInfo = new ProcessStartInfo { // WindowStyle = ProcessWindowStyle.Hidden, // UseShellExecute = false, // CreateNoWindow = true, // FileName = bashExecutable, // RedirectStandardError = true, // RedirectStandardOutput = true, // Arguments = $"-c '{commandCreate};{commandFill}'" // }; // Process.Start(startInfo); } private void Init(object sender) { string datePostfix = $"_{DateTime.Now:yyyy-MM-dd_HH-mm}_TreeSize-{((GrammarEnumerationAlgorithm)sender).MaxComplexity}.csv.gz"; distinctSentencesFileName = workingDir + @"\distinctSentences" + datePostfix; allSentencesFileName = workingDir + @"\allSentences" + datePostfix; shortestDistinctSentencesFileName = workingDir + @"\shortestDistinctSentences" + datePostfix; distinctSentencesFileTrace = new TextWriterTraceListener(new GZipStream(new FileStream(distinctSentencesFileName, FileMode.Create), CompressionMode.Compress)); allSentencesFileTrace = new TextWriterTraceListener(new GZipStream(new FileStream(allSentencesFileName, FileMode.Create), CompressionMode.Compress)); ((StreamWriter)distinctSentencesFileTrace.Writer).AutoFlush = true; ((StreamWriter)allSentencesFileTrace.Writer).AutoFlush = true; allSentencesFileTrace.WriteLine(header); } private void DistinctSentenceGenerated(object sender, PhraseAddedEventArgs phraseAddedEventArgs) { if (distinctSentencesFileName == null) Init(sender); distinctSentencesFileTrace.WriteLine(ToCsvLine( ((uint)phraseAddedEventArgs.NewHash).ToString("D10"), phraseAddedEventArgs.NewPhrase.Count().ToString("D3"), //phraseAddedEventArgs.NewPhrase.ToString(), ((GrammarEnumerationAlgorithm)sender).Grammar.ToInfixString(phraseAddedEventArgs.NewPhrase))); } private void SentenceGenerated(object sender, PhraseAddedEventArgs phraseAddedEventArgs) { if (allSentencesFileTrace == null) Init(sender); allSentencesFileTrace.WriteLine(ToCsvLine( ((uint)phraseAddedEventArgs.NewHash).ToString("D10"), phraseAddedEventArgs.NewPhrase.Count().ToString("D3"), //phraseAddedEventArgs.NewPhrase.ToString(), ((GrammarEnumerationAlgorithm)sender).Grammar.ToInfixString(phraseAddedEventArgs.NewPhrase))); } private string ToCsvLine(params string[] cols) { return string.Join(columnDelimiter, cols); } } }