Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
03/23/18 18:36:23 (7 years ago)
Author:
lkammere
Message:

#2886: Change complexity measure from number of nodes in tree to number of variable references.

Location:
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/RSquaredEvaluator.cs

    r15859 r15860  
    7777        problemData.TrainingIndices,
    7878        applyLinearScaling: false,
    79         maxIterations: 200,
     79        maxIterations: 50,
    8080        updateVariableWeights: true,
    8181        updateConstantsInTree: true);
     
    9696      //var estVals = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices);
    9797      //OnlineCalculatorError error;
    98       //r2 = OnlinePearsonsRCalculator.Calculate(target, estVals, out error);
     98      //double r2 = OnlinePearsonsRCalculator.Calculate(target, estVals, out error);
    9999      //if (error != OnlineCalculatorError.None) r2 = 0.0;
    100100
     
    102102      bool better = r2 > bestR2Result.Value;
    103103      bool equallyGood = r2.IsAlmost(bestR2Result.Value);
    104       bool shorter = algorithm.BestTrainingSentence != null && symbolString.Count() < algorithm.BestTrainingSentence.Count();
     104      bool shorter = false;
     105
     106      if (!better && equallyGood) {
     107        shorter = algorithm.BestTrainingSentence != null &&
     108          algorithm.Grammar.GetComplexity(algorithm.BestTrainingSentence) > algorithm.Grammar.GetComplexity(symbolString);
     109      }
    105110      if (better || (equallyGood && shorter)) {
    106111        bestR2Result.Value = r2;
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/SentenceLogger.cs

    r15843 r15860  
    4545
    4646    private void GrammarEnumerationAlgorithmOnStarted(object sender, EventArgs eventArgs) {
    47       string datePostfix = $"_{DateTime.Now:yyyy-MM-dd_HH-mm}_TreeSize-{((GrammarEnumerationAlgorithm) sender).MaxTreeSize}.csv";
     47      string datePostfix = $"_{DateTime.Now:yyyy-MM-dd_HH-mm}_TreeSize-{((GrammarEnumerationAlgorithm) sender).MaxComplexity}.csv";
    4848      distinctSentencesFileName = workingDir + @"\distinctSentences" + datePostfix;
    4949      allSentencesFileName = workingDir + @"\allSentences" + datePostfix;
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15843 r15860  
    2424    private readonly string DistinctSentencesName = "Distinct Sentences";
    2525    private readonly string PhraseExpansionsName = "Phrase Expansions";
    26     private readonly string AverageSentenceLengthName = "Avg. Sentence Length among Distinct";
     26    private readonly string AverageSentenceComplexityName = "Avg. Sentence Complexity among Distinct";
    2727    private readonly string OverwrittenSentencesName = "Sentences overwritten";
    2828    private readonly string AnalyzersParameterName = "Analyzers";
     
    3131
    3232    private readonly string SearchDataStructureParameterName = "Search Data Structure";
    33     private readonly string MaxTreeSizeParameterName = "Max. Tree Nodes";
     33    private readonly string MaxComplexityParameterName = "Max. Complexity";
    3434    private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval";
    3535
    3636    public override bool SupportsPause { get { return false; } }
    3737
    38     protected IValueParameter<IntValue> MaxTreeSizeParameter {
    39       get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; }
    40     }
    41     public int MaxTreeSize {
    42       get { return MaxTreeSizeParameter.Value.Value; }
    43       set { MaxTreeSizeParameter.Value.Value = value; }
     38    protected IValueParameter<IntValue> MaxComplexityParameter {
     39      get { return (IValueParameter<IntValue>)Parameters[MaxComplexityParameterName]; }
     40    }
     41    public int MaxComplexity {
     42      get { return MaxComplexityParameter.Value.Value; }
     43      set { MaxComplexityParameter.Value.Value = value; }
    4444    }
    4545
     
    7171    #endregion
    7272
    73     public Dictionary<int, int> DistinctSentencesLength { get; private set; }  // Semantically distinct sentences and their length in a run.
     73    public Dictionary<int, int> DistinctSentencesComplexity { get; private set; }  // Semantically distinct sentences and their length in a run.
    7474    public HashSet<int> ArchivedPhrases { get; private set; }
    7575    internal SearchDataStore OpenPhrases { get; private set; }           // Stack/Queue/etc. for fetching the next node in the search tree. 
     
    9797      };
    9898
    99       Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6)));
     99      Parameters.Add(new ValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(5)));
    100100      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(100000)));
    101101      Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack)));
     
    127127      ArchivedPhrases = new HashSet<int>();
    128128
    129       DistinctSentencesLength = new Dictionary<int, int>();
     129      DistinctSentencesComplexity = new Dictionary<int, int>();
    130130      AllGeneratedSentencesCount = 0;
    131131      OverwrittenSentencesCount = 0;
     
    159159
    160160          SymbolString newPhrase = currPhrase.DerivePhrase(nonterminalSymbolIndex, appliedProductions[i]);
    161 
    162           if (newPhrase.Count() <= MaxTreeSize) {
     161          int newPhraseComplexity = Grammar.GetComplexity(newPhrase);
     162
     163          if (newPhraseComplexity <= MaxComplexity) {
    163164            var phraseHash = Grammar.Hasher.CalcHashCode(newPhrase);
    164165
     
    170171              OnSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]);
    171172
    172               if (!DistinctSentencesLength.ContainsKey(phraseHash) || DistinctSentencesLength[phraseHash] > newPhrase.Count()) {
    173                 if (DistinctSentencesLength.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only
    174 
    175                 DistinctSentencesLength[phraseHash] = newPhrase.Count();
     173              if (!DistinctSentencesComplexity.ContainsKey(phraseHash) || DistinctSentencesComplexity[phraseHash] > newPhraseComplexity) {
     174                if (DistinctSentencesComplexity.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only
     175
     176                DistinctSentencesComplexity[phraseHash] = newPhraseComplexity;
    176177                OnDistinctSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]);
    177178              }
     
    196197      Results.Add(new Result(PhraseExpansionsName, new IntValue(0)));
    197198      Results.Add(new Result(OverwrittenSentencesName, new IntValue(0)));
    198       Results.Add(new Result(AverageSentenceLengthName, new DoubleValue(1.0)));
     199      Results.Add(new Result(AverageSentenceComplexityName, new DoubleValue(1.0)));
    199200      Results.Add(new Result(ExpansionsPerSecondName, "In Thousand expansions per second", new IntValue(0)));
    200201    }
     
    209210        ((IntValue)Results[SearchStructureSizeName].Value).Value = OpenPhrases.Count;
    210211        ((IntValue)Results[GeneratedSentencesName].Value).Value = AllGeneratedSentencesCount;
    211         ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentencesLength.Count;
     212        ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentencesComplexity.Count;
    212213        ((IntValue)Results[PhraseExpansionsName].Value).Value = PhraseExpansionCount;
    213         ((DoubleValue)Results[AverageSentenceLengthName].Value).Value = DistinctSentencesLength.Select(pair => pair.Value).Average();
     214        ((DoubleValue)Results[AverageSentenceComplexityName].Value).Value = DistinctSentencesComplexity.Select(pair => pair.Value).Average();
    214215        ((IntValue)Results[OverwrittenSentencesName].Value).Value = OverwrittenSentencesCount;
    215216        ((IntValue)Results[ExpansionsPerSecondName].Value).Value = (int)((PhraseExpansionCount /
Note: See TracChangeset for help on using the changeset viewer.