Changeset 15860


Ignore:
Timestamp:
03/23/18 18:36:23 (2 years ago)
Author:
lkammere
Message:

#2886: Change complexity measure from number of nodes in tree to number of variable references.

Location:
branches/2886_SymRegGrammarEnumeration
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/RSquaredEvaluator.cs

    r15859 r15860  
    7777        problemData.TrainingIndices,
    7878        applyLinearScaling: false,
    79         maxIterations: 200,
     79        maxIterations: 50,
    8080        updateVariableWeights: true,
    8181        updateConstantsInTree: true);
     
    9696      //var estVals = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices);
    9797      //OnlineCalculatorError error;
    98       //r2 = OnlinePearsonsRCalculator.Calculate(target, estVals, out error);
     98      //double r2 = OnlinePearsonsRCalculator.Calculate(target, estVals, out error);
    9999      //if (error != OnlineCalculatorError.None) r2 = 0.0;
    100100
     
    102102      bool better = r2 > bestR2Result.Value;
    103103      bool equallyGood = r2.IsAlmost(bestR2Result.Value);
    104       bool shorter = algorithm.BestTrainingSentence != null && symbolString.Count() < algorithm.BestTrainingSentence.Count();
     104      bool shorter = false;
     105
     106      if (!better && equallyGood) {
     107        shorter = algorithm.BestTrainingSentence != null &&
     108          algorithm.Grammar.GetComplexity(algorithm.BestTrainingSentence) > algorithm.Grammar.GetComplexity(symbolString);
     109      }
    105110      if (better || (equallyGood && shorter)) {
    106111        bestR2Result.Value = r2;
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/SentenceLogger.cs

    r15843 r15860  
    4545
    4646    private void GrammarEnumerationAlgorithmOnStarted(object sender, EventArgs eventArgs) {
    47       string datePostfix = $"_{DateTime.Now:yyyy-MM-dd_HH-mm}_TreeSize-{((GrammarEnumerationAlgorithm) sender).MaxTreeSize}.csv";
     47      string datePostfix = $"_{DateTime.Now:yyyy-MM-dd_HH-mm}_TreeSize-{((GrammarEnumerationAlgorithm) sender).MaxComplexity}.csv";
    4848      distinctSentencesFileName = workingDir + @"\distinctSentences" + datePostfix;
    4949      allSentencesFileName = workingDir + @"\allSentences" + datePostfix;
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15843 r15860  
    2424    private readonly string DistinctSentencesName = "Distinct Sentences";
    2525    private readonly string PhraseExpansionsName = "Phrase Expansions";
    26     private readonly string AverageSentenceLengthName = "Avg. Sentence Length among Distinct";
     26    private readonly string AverageSentenceComplexityName = "Avg. Sentence Complexity among Distinct";
    2727    private readonly string OverwrittenSentencesName = "Sentences overwritten";
    2828    private readonly string AnalyzersParameterName = "Analyzers";
     
    3131
    3232    private readonly string SearchDataStructureParameterName = "Search Data Structure";
    33     private readonly string MaxTreeSizeParameterName = "Max. Tree Nodes";
     33    private readonly string MaxComplexityParameterName = "Max. Complexity";
    3434    private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval";
    3535
    3636    public override bool SupportsPause { get { return false; } }
    3737
    38     protected IValueParameter<IntValue> MaxTreeSizeParameter {
    39       get { return (IValueParameter<IntValue>)Parameters[MaxTreeSizeParameterName]; }
    40     }
    41     public int MaxTreeSize {
    42       get { return MaxTreeSizeParameter.Value.Value; }
    43       set { MaxTreeSizeParameter.Value.Value = value; }
     38    protected IValueParameter<IntValue> MaxComplexityParameter {
     39      get { return (IValueParameter<IntValue>)Parameters[MaxComplexityParameterName]; }
     40    }
     41    public int MaxComplexity {
     42      get { return MaxComplexityParameter.Value.Value; }
     43      set { MaxComplexityParameter.Value.Value = value; }
    4444    }
    4545
     
    7171    #endregion
    7272
    73     public Dictionary<int, int> DistinctSentencesLength { get; private set; }  // Semantically distinct sentences and their length in a run.
     73    public Dictionary<int, int> DistinctSentencesComplexity { get; private set; }  // Semantically distinct sentences and their length in a run.
    7474    public HashSet<int> ArchivedPhrases { get; private set; }
    7575    internal SearchDataStore OpenPhrases { get; private set; }           // Stack/Queue/etc. for fetching the next node in the search tree. 
     
    9797      };
    9898
    99       Parameters.Add(new ValueParameter<IntValue>(MaxTreeSizeParameterName, "The number of clusters.", new IntValue(6)));
     99      Parameters.Add(new ValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(5)));
    100100      Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(100000)));
    101101      Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.Stack)));
     
    127127      ArchivedPhrases = new HashSet<int>();
    128128
    129       DistinctSentencesLength = new Dictionary<int, int>();
     129      DistinctSentencesComplexity = new Dictionary<int, int>();
    130130      AllGeneratedSentencesCount = 0;
    131131      OverwrittenSentencesCount = 0;
     
    159159
    160160          SymbolString newPhrase = currPhrase.DerivePhrase(nonterminalSymbolIndex, appliedProductions[i]);
    161 
    162           if (newPhrase.Count() <= MaxTreeSize) {
     161          int newPhraseComplexity = Grammar.GetComplexity(newPhrase);
     162
     163          if (newPhraseComplexity <= MaxComplexity) {
    163164            var phraseHash = Grammar.Hasher.CalcHashCode(newPhrase);
    164165
     
    170171              OnSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]);
    171172
    172               if (!DistinctSentencesLength.ContainsKey(phraseHash) || DistinctSentencesLength[phraseHash] > newPhrase.Count()) {
    173                 if (DistinctSentencesLength.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only
    174 
    175                 DistinctSentencesLength[phraseHash] = newPhrase.Count();
     173              if (!DistinctSentencesComplexity.ContainsKey(phraseHash) || DistinctSentencesComplexity[phraseHash] > newPhraseComplexity) {
     174                if (DistinctSentencesComplexity.ContainsKey(phraseHash)) OverwrittenSentencesCount++; // for analysis only
     175
     176                DistinctSentencesComplexity[phraseHash] = newPhraseComplexity;
    176177                OnDistinctSentenceGenerated(fetchedPhrase.Hash, fetchedPhrase.SymbolString, phraseHash, newPhrase, expandedSymbol, appliedProductions[i]);
    177178              }
     
    196197      Results.Add(new Result(PhraseExpansionsName, new IntValue(0)));
    197198      Results.Add(new Result(OverwrittenSentencesName, new IntValue(0)));
    198       Results.Add(new Result(AverageSentenceLengthName, new DoubleValue(1.0)));
     199      Results.Add(new Result(AverageSentenceComplexityName, new DoubleValue(1.0)));
    199200      Results.Add(new Result(ExpansionsPerSecondName, "In Thousand expansions per second", new IntValue(0)));
    200201    }
     
    209210        ((IntValue)Results[SearchStructureSizeName].Value).Value = OpenPhrases.Count;
    210211        ((IntValue)Results[GeneratedSentencesName].Value).Value = AllGeneratedSentencesCount;
    211         ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentencesLength.Count;
     212        ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentencesComplexity.Count;
    212213        ((IntValue)Results[PhraseExpansionsName].Value).Value = PhraseExpansionCount;
    213         ((DoubleValue)Results[AverageSentenceLengthName].Value).Value = DistinctSentencesLength.Select(pair => pair.Value).Average();
     214        ((DoubleValue)Results[AverageSentenceComplexityName].Value).Value = DistinctSentencesComplexity.Select(pair => pair.Value).Average();
    214215        ((IntValue)Results[OverwrittenSentencesName].Value).Value = OverwrittenSentencesCount;
    215216        ((IntValue)Results[ExpansionsPerSecondName].Value).Value = (int)((PhraseExpansionCount /
  • branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs

    r15859 r15860  
    6060    public void NoConstants_Nguyen1() {
    6161      // x³ + x² + x
    62       alg.MaxTreeSize = 20;
     62      alg.MaxComplexity = 6;
    6363      alg.Problem.ProblemData = new NguyenFunctionOne(Seed).GenerateRegressionData();
    6464
     
    8080      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    8181
    82       Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     82      Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    8383
    8484      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
     
    9393    public void NoConstants_Nguyen2() {
    9494      // x^4 + x³ + x² + x
    95       alg.MaxTreeSize = 30;
     95      alg.MaxComplexity = 11;
    9696      alg.Problem.ProblemData = new NguyenFunctionTwo(Seed).GenerateRegressionData();
    9797
     
    105105    public void NoConstants_Nguyen3() {
    106106      // x^5 + x^4 + x^3 + x^2 + x
    107       alg.MaxTreeSize = 32;
     107      alg.MaxComplexity = 32;
    108108      alg.Problem.ProblemData = new NguyenFunctionThree(Seed).GenerateRegressionData();
    109109
     
    117117    public void NoConstants_Nguyen6() {
    118118      // sin(x) + sin(x + x²)
    119       alg.MaxTreeSize = 25;
     119      alg.MaxComplexity = 4;
    120120      alg.Problem.ProblemData = new NguyenFunctionSix(Seed).GenerateRegressionData();
    121121
     
    138138      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    139139
    140       Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     140      Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    141141      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
    142142
     
    148148    public void NoConstants_Nguyen9() {
    149149      // sin(x) + sin(y²)
    150       alg.MaxTreeSize = 22;
     150      alg.MaxComplexity = 3;
    151151      alg.Problem.ProblemData = new NguyenFunctionNine(Seed).GenerateRegressionData();
    152152
     
    170170      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    171171
    172       Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     172      Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    173173      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
    174174
     
    180180    [TestProperty("Goal", "structure search")]
    181181    public void MctsSymbReg_NoConstants_Poly10() {
    182       alg.MaxTreeSize = 10;
     182      alg.MaxComplexity = 10;
    183183      alg.Problem.ProblemData = new PolyTen(Seed).GenerateRegressionData();
    184184
     
    191191    public void NoConstants_Inverse() {
    192192      // x / (log(x)*x + x)
    193       alg.MaxTreeSize = 23;
     193      alg.MaxComplexity = 4;
    194194
    195195      var x = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() + 1.1).ToList();
     
    205205    public void Constants_Nguyen7() {
    206206      // log(x+1) + log(x*x + 1)
    207       alg.MaxTreeSize = 22;
     207      alg.MaxComplexity = 3;
    208208      alg.Problem.ProblemData = new NguyenFunctionSeven().GenerateRegressionData();
    209209
     
    225225      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    226226
    227       Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     227      Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    228228      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
    229 
    230229
    231230      // Evaluate
     
    237236    public void Constants_Nguyen12() {
    238237      // x*x*x*x - x*x*x + y*y/2 -y
    239       alg.MaxTreeSize = 28;
     238      alg.MaxComplexity = 10;
    240239      alg.Problem.ProblemData = new NguyenFunctionTwelve().GenerateRegressionData();
    241240
     
    250249    public void Constants_Keijzer3() {
    251250      // 0.3*x*sin(2*pi*x)
    252       alg.MaxTreeSize = 20;
     251      alg.MaxComplexity = 2;
    253252      alg.Problem.ProblemData = new KeijzerFunctionThree().GenerateRegressionData();
    254253
     
    270269      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    271270
    272       Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    273 
     271      Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    274272      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
    275273
     
    282280    public void Constants_Keijzer5() {
    283281      // (30*x*z) / ((x - 10)*y*y)
    284       alg.MaxTreeSize = 24;
     282      alg.MaxComplexity = 5;
    285283      alg.Problem.ProblemData = new KeijzerFunctionFive().GenerateRegressionData();
    286284
     
    307305      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    308306
    309       Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     307      Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    310308
    311309      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
     
    320318    public void Constants_Keijzer12() {
    321319      // x*x*x*x - x*x*x + y*y/2 - y
    322       alg.MaxTreeSize = 29;
     320      alg.MaxComplexity = 10;
    323321      alg.Problem.ProblemData = new KeijzerFunctionTwelve().GenerateRegressionData();
    324322
     
    344342      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    345343
    346       Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     344      Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    347345      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
    348346
     
    355353    public void Constants_Keijzer14() {
    356354      // 8 / (2 + x*x + y*y
    357       alg.MaxTreeSize = 19;
     355      alg.MaxComplexity = 4;
    358356      alg.Problem.ProblemData = new KeijzerFunctionFourteen().GenerateRegressionData();
    359357
     
    380378      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    381379
    382       Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     380      Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    383381      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
    384382
     
    392390    public void Constants_Keijzer15() {
    393391      // x*x*x / 5 + y*y*y / 2 - y - x
    394       alg.MaxTreeSize = 25;
     392      alg.MaxComplexity = 8;
    395393      alg.Problem.ProblemData = new KeijzerFunctionFifteen().GenerateRegressionData();
    396394
     
    415413      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
    416414
    417       Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
     415      Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
    418416      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
    419417
Note: See TracChangeset for help on using the changeset viewer.