Changeset 15843


Ignore:
Timestamp:
03/16/18 10:41:38 (3 years ago)
Author:
lkammere
Message:

#2886: Remove duplicates in logged sentences using bash commands.

Location:
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/SentenceLogger.cs

    r15821 r15843  
    77namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
    88  class SentenceLogger : Item, IGrammarEnumerationAnalyzer {
    9     private readonly string distinctSentencesFileName = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory) + @"\distinctSentences.csv";
    10     private readonly string allSentencesFileName = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory) + @"\allSentences.csv";
     9    private readonly string workingDir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
    1110    private readonly string columnDelimiter = ";";
    12     private readonly string header = "infix;postfix;hash";
     11    private readonly string header = "hash;length;postfix;infix";
     12
     13    private string distinctSentencesFileName;
     14    private string allSentencesFileName;
     15    private string shortestDistinctSentencesFileName;
    1316
    1417    private TextWriterTraceListener distinctSentencesFileTrace;
     
    4245
    4346    private void GrammarEnumerationAlgorithmOnStarted(object sender, EventArgs eventArgs) {
     47      string datePostfix = $"_{DateTime.Now:yyyy-MM-dd_HH-mm}_TreeSize-{((GrammarEnumerationAlgorithm) sender).MaxTreeSize}.csv";
     48      distinctSentencesFileName = workingDir + @"\distinctSentences" + datePostfix;
     49      allSentencesFileName = workingDir + @"\allSentences" + datePostfix;
     50      shortestDistinctSentencesFileName = workingDir + @"\shortestDistinctSentences" + datePostfix;
     51
    4452      distinctSentencesFileTrace = new TextWriterTraceListener(new FileStream(distinctSentencesFileName, FileMode.Create));
    4553      allSentencesFileTrace = new TextWriterTraceListener(new FileStream(allSentencesFileName, FileMode.Create));
     
    4755      ((StreamWriter)allSentencesFileTrace.Writer).AutoFlush = true;
    4856
    49       distinctSentencesFileTrace.WriteLine(header);
    5057      allSentencesFileTrace.WriteLine(header);
    5158    }
     
    5461      distinctSentencesFileTrace.Close();
    5562      allSentencesFileTrace.Close();
     63
     64      // Remove duplicates afterwards using bash commands from the git bash
     65      string bashExecutable = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData) + @"\Programs\Git\git-bash.exe";
     66
     67      string commandCreate = $"echo \"{header}\" > {shortestDistinctSentencesFileName.Replace("\\", "/")}";
     68      string commandFill = $"sort -s {distinctSentencesFileName.Replace("\\", "/")} | uniq -w 10 >> {shortestDistinctSentencesFileName.Replace("\\", "/")}";
     69     
     70      ProcessStartInfo startInfo = new ProcessStartInfo {
     71        WindowStyle = ProcessWindowStyle.Hidden,
     72        UseShellExecute = false,
     73        CreateNoWindow = true,
     74        FileName = bashExecutable,
     75        RedirectStandardError = true,
     76        RedirectStandardOutput = true,
     77        Arguments = $"-c '{commandCreate};{commandFill}'"
     78      };
     79      Process.Start(startInfo);
    5680    }
    5781
    5882    private void DistinctSentenceGenerated(object sender, PhraseAddedEventArgs phraseAddedEventArgs) {
    5983      distinctSentencesFileTrace.WriteLine(ToCsvLine(
    60         ((GrammarEnumerationAlgorithm)sender).Grammar.ToInfixString(phraseAddedEventArgs.NewPhrase),
     84        ((uint)phraseAddedEventArgs.NewHash).ToString("D10"),
     85        phraseAddedEventArgs.NewPhrase.Count().ToString("D3"),
    6186        phraseAddedEventArgs.NewPhrase.ToString(),
    62         phraseAddedEventArgs.NewHash.ToString()));
     87        ((GrammarEnumerationAlgorithm)sender).Grammar.ToInfixString(phraseAddedEventArgs.NewPhrase)));
    6388    }
    6489
    6590    private void SentenceGenerated(object sender, PhraseAddedEventArgs phraseAddedEventArgs) {
    6691      allSentencesFileTrace.WriteLine(ToCsvLine(
    67         ((GrammarEnumerationAlgorithm)sender).Grammar.ToInfixString(phraseAddedEventArgs.NewPhrase),
     92        ((uint)phraseAddedEventArgs.NewHash).ToString("D10"),
     93        phraseAddedEventArgs.NewPhrase.Count().ToString("D3"),
    6894        phraseAddedEventArgs.NewPhrase.ToString(),
    69         phraseAddedEventArgs.NewHash.ToString()));
     95        ((GrammarEnumerationAlgorithm)sender).Grammar.ToInfixString(phraseAddedEventArgs.NewPhrase)));
    7096    }
    7197
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15842 r15843  
    9898
    9999      Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6)));
    100       Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(1000)));
     100      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(100000)));
    101101      Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack)));
    102102
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.csproj

    r15832 r15843  
    1717    <DebugType>full</DebugType>
    1818    <Optimize>false</Optimize>
    19     <OutputPath>..\..\trunk\bin\</OutputPath>
     19    <OutputPath>..\..\..\trunk\bin\</OutputPath>
    2020    <DefineConstants>DEBUG;TRACE</DefineConstants>
    2121    <ErrorReport>prompt</ErrorReport>
Note: See TracChangeset for help on using the changeset viewer.