Changeset 15974


Ignore:
Timestamp:
06/28/18 11:35:13 (3 years ago)
Author:
bburlacu
Message:

#2886: implement LRU cache for storing search nodes, introduce SortedSet for handling priorities, fix serialization and cloning

Location:
branches/2886_SymRegGrammarEnumeration
Files:
2 added
9 edited

Legend:

Unmodified
Added
Removed
  • branches/2886_SymRegGrammarEnumeration/ExpressionClustering/ExpressionClustering.csproj

    r15929 r15974  
    1515  </PropertyGroup>
    1616  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
    17     <PlatformTarget>x64</PlatformTarget>
     17    <PlatformTarget>AnyCPU</PlatformTarget>
    1818    <DebugSymbols>true</DebugSymbols>
    1919    <DebugType>full</DebugType>
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/Analysis/RSquaredEvaluator.cs

    r15949 r15974  
    1616  public class RSquaredEvaluator : Item, IGrammarEnumerationAnalyzer {
    1717    public static readonly string BestTrainingQualityResultName = "Best R² (Training)";
     18    public static readonly string BestTestQualityResultName = "Best R² (Test)";
    1819    public static readonly string BestTrainingModelResultName = "Best model (Training)";
    1920    public static readonly string BestTrainingSolutionResultName = "Best solution (Training)";
     21    public static readonly string BestComplexityResultName = "Best solution complexity";
    2022
    2123    private static readonly ISymbolicDataAnalysisExpressionTreeInterpreter expressionTreeLinearInterpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
     
    4244      algorithm.Started -= OnStarted;
    4345      algorithm.Stopped -= OnStopped;
    44 
    4546      algorithm.DistinctSentenceGenerated -= AlgorithmOnDistinctSentenceGenerated;
    4647    }
     
    5758    }
    5859
    59     private void OnStopped(object sender, EventArgs eventArgs) {
    60       GrammarEnumerationAlgorithm algorithm = (GrammarEnumerationAlgorithm)sender;
    61       if (algorithm.Results.ContainsKey(BestTrainingModelResultName)) {
    62         SymbolicRegressionModel model = (SymbolicRegressionModel)algorithm.Results[BestTrainingModelResultName].Value;
    63         IRegressionSolution bestTrainingSolution = new RegressionSolution(model, algorithm.Problem.ProblemData);
     60    private void OnStopped(object sender, EventArgs eventArgs) { }
    6461
    65         algorithm.Results.AddOrUpdateResult(BestTrainingSolutionResultName, bestTrainingSolution);
    66       }
     62    private T GetValue<T>(IItem value) where T : struct {
     63      var v = value as ValueTypeValue<T>;
     64      if (v == null)
     65        throw new ArgumentException(string.Format("Item is not of type {0}", typeof(ValueTypeValue<T>)));
     66      return v.Value;
    6767    }
    6868
    6969    private void EvaluateSentence(GrammarEnumerationAlgorithm algorithm, SymbolString symbolString) {
     70      var results = algorithm.Results;
     71      var grammar = algorithm.Grammar;
    7072      var problemData = algorithm.Problem.ProblemData;
    7173
     
    7476
    7577      double r2 = Evaluate(problemData, tree, OptimizeConstants);
     78      double bestR2 = results.ContainsKey(BestTrainingQualityResultName) ? GetValue<double>(results[BestTrainingQualityResultName].Value) : 0.0;
     79      if (r2 < bestR2)
     80        return;
    7681
    77       double bestR2 = 0.0;
    78       if (algorithm.Results.ContainsKey(BestTrainingQualityResultName))
    79         bestR2 = ((DoubleValue)algorithm.Results[BestTrainingQualityResultName].Value).Value;
    80       bool better = r2 > bestR2;
    81       bool equallyGood = r2.IsAlmost(bestR2);
    82       bool shorter = false;
     82      var bestComplexity = int.MaxValue;
     83      if (results.ContainsKey(BestComplexityResultName)) {
     84        bestComplexity = GetValue<int>(results[BestComplexityResultName].Value);
     85      } else if (algorithm.BestTrainingSentence != null) {
     86        bestComplexity = grammar.GetComplexity(algorithm.BestTrainingSentence);
     87        results.AddOrUpdateResult(BestComplexityResultName, new IntValue(bestComplexity));
     88      }
     89      var complexity = grammar.GetComplexity(symbolString);
    8390
    84       if (!better && equallyGood) {
    85         shorter = algorithm.BestTrainingSentence != null &&
    86           algorithm.Grammar.GetComplexity(algorithm.BestTrainingSentence) > algorithm.Grammar.GetComplexity(symbolString);
    87       }
    88       if (better || (equallyGood && shorter)) {
    89         algorithm.Results.AddOrUpdateResult(BestTrainingQualityResultName, new DoubleValue(r2));
    90 
    91         SymbolicRegressionModel model = new SymbolicRegressionModel(
    92           problemData.TargetVariable,
    93           tree,
    94           expressionTreeLinearInterpreter);
    95 
    96         algorithm.Results.AddOrUpdateResult(BestTrainingModelResultName, model);
    97 
     91      if (r2 > bestR2 || (r2.IsAlmost(bestR2) && complexity < bestComplexity)) {
     92        results.AddOrUpdateResult(BestTrainingQualityResultName, new DoubleValue(r2));
     93        results.AddOrUpdateResult(BestComplexityResultName, new IntValue(complexity));
    9894        algorithm.BestTrainingSentence = symbolString;
    9995      }
     
    121117          }
    122118        }
    123 
    124         r2 = double.IsNaN(r2) ? 0.0 : r2;
    125 
    126119      } else {
    127         var target = problemData.TargetVariableTrainingValues;
    128 
    129         SymbolicRegressionModel model = new SymbolicRegressionModel(
    130           problemData.TargetVariable,
     120        r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(expressionTreeLinearInterpreter,
    131121          tree,
    132           expressionTreeLinearInterpreter);
    133 
    134         var estVals = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices);
    135         OnlineCalculatorError error;
    136         r2 = OnlinePearsonsRCalculator.Calculate(target, estVals, out error);
    137         if (error != OnlineCalculatorError.None) r2 = 0.0;
     122          double.MinValue,
     123          double.MaxValue,
     124          problemData,
     125          problemData.TrainingIndices,
     126          applyLinearScaling: true);
    138127      }
    139 
    140       return r2;
     128      return double.IsNaN(r2) ? 0.0 : r2;
    141129    }
    142130  }
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs

    r15960 r15974  
    2222  [StorableClass(StorableClassType.AllFieldsAndAllProperties)]
    2323  public class Grammar : DeepCloneable {
    24 
    25     public Symbol StartSymbol { get; }
     24    public Symbol StartSymbol { get; private set; }
    2625
    2726    public Hasher<int> Hasher { get; }
     
    2928    #region Symbols
    3029
    31     public IReadOnlyDictionary<Symbol, IReadOnlyList<Production>> Productions { get; }
     30    public IReadOnlyDictionary<Symbol, IReadOnlyList<Production>> Productions { get; private set; }
    3231
    3332    public NonterminalSymbol Var;
     
    7978
    8079    protected Grammar(Grammar original, Cloner cloner) : base(original, cloner) {
    81       VarTerminals = original.VarTerminals.Select(cloner.Clone).ToArray();
    82       StartSymbol = cloner.Clone(original.StartSymbol);
    83       Hasher = cloner.Clone(original.Hasher); // how to handle circular reference grammar <-> hasher? (the cloner *should* handle it)
     80      infixExpressionFormatter = cloner.Clone(original.infixExpressionFormatter);
     81
    8482      Productions = original.Productions.ToDictionary(x => cloner.Clone(x.Key), x => (IReadOnlyList<Production>)x.Value.Select(cloner.Clone).ToList());
     83      VarTerminals = original.VarTerminals.Select(cloner.Clone).ToList();
     84
    8585      Var = cloner.Clone(original.Var);
    86       VarTerminals = original.VarTerminals.Select(cloner.Clone).ToList();
    8786      Expr = cloner.Clone(original.Expr);
    8887      Term = cloner.Clone(original.Term);
     
    9594      InvExpr = cloner.Clone(original.InvExpr);
    9695      InvTerm = cloner.Clone(original.InvTerm);
     96
    9797      Addition = cloner.Clone(original.Addition);
    9898      Multiplication = cloner.Clone(original.Multiplication);
     
    100100      Exp = cloner.Clone(original.Exp);
    101101      Sin = cloner.Clone(original.Sin);
    102       Sin = cloner.Clone(original.Sin);
    103102      Inv = cloner.Clone(original.Inv);
    104103      Const = cloner.Clone(original.Const);
    105104
    106 
    107       constSy = cloner.Clone(original.constSy);
    108       varSy = cloner.Clone(original.varSy);
    109       addSy = cloner.Clone(original.addSy);
    110       mulSy = cloner.Clone(original.mulSy);
    111       logSy = cloner.Clone(original.logSy);
    112       expSy = cloner.Clone(original.expSy);
    113       divSy = cloner.Clone(original.divSy);
    114       sinSy = cloner.Clone(original.sinSy);
    115 
    116       rootSy = cloner.Clone(original.rootSy);
    117       startSy = cloner.Clone(original.startSy);
    118 
    119       infixExpressionFormatter = cloner.Clone(original.infixExpressionFormatter);
    120     }
    121 
    122     public Grammar(string[] variables, IEnumerable<GrammarRule> includedRules) {
     105      StartSymbol = Expr;
     106      Hasher = cloner.Clone(original.Hasher);
     107
     108      InitTreeParser(); // easier this way (and less typing)
     109    }
     110
     111    private void InitProductions(string[] variables, IEnumerable<GrammarRule> includedRules) {
    123112      #region Define Symbols
    124113      Var = new NonterminalSymbol("Var");
     
    210199      Productions = productions;
    211200      #endregion
    212 
     201    }
     202
     203    private void InitTreeParser() {
    213204      #region Parsing to SymbolicExpressionTree
    214205      var symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
     
    229220      infixExpressionFormatter = new InfixExpressionFormatter();
    230221      #endregion
     222    }
     223
     224    public Grammar(string[] variables, IEnumerable<GrammarRule> includedRules) {
     225      InitProductions(variables, includedRules);
     226      InitTreeParser();
    231227
    232228      Hasher = new IntHasher(this);
     
    244240    public double EvaluatePhrase(SymbolString s, IRegressionProblemData problemData, bool optimizeConstants) {
    245241      SymbolicExpressionTree tree = ParseSymbolicExpressionTree(s);
    246 
    247       double r2 = RSquaredEvaluator.Evaluate(problemData, tree, optimizeConstants);
    248 
    249       return r2;
     242      return RSquaredEvaluator.Evaluate(problemData, tree, optimizeConstants);
    250243    }
    251244
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/GrammarEnumerationAlgorithm.cs

    r15960 r15974  
    1212using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    1313using HeuristicLab.Problems.DataAnalysis;
     14using HeuristicLab.Problems.DataAnalysis.Symbolic;
     15using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
    1416
    1517namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
     
    3638    private readonly string GuiUpdateIntervalParameterName = "GUI Update Interval";
    3739    private readonly string GrammarSymbolsParameterName = "Grammar Symbols";
     40    private readonly string SearchCacheSizeParameterName = "Search Cache Size";
     41    private readonly string SearchDataStructureSizeParameterName = "Search Data Structure Size";
     42
     43    // result names
     44    public static readonly string BestTrainingModelResultName = "Best model (Training)";
     45    public static readonly string BestTrainingSolutionResultName = "Best solution (Training)";
     46    public static readonly string BestComplexityResultName = "Best solution complexity";
    3847
    3948    public override bool SupportsPause { get { return true; } }
    4049
    41     protected IValueParameter<DoubleValue> VariableImportanceWeightParameter {
    42       get { return (IValueParameter<DoubleValue>)Parameters[VariableImportanceWeightName]; }
     50    protected IFixedValueParameter<DoubleValue> VariableImportanceWeightParameter {
     51      get { return (IFixedValueParameter<DoubleValue>)Parameters[VariableImportanceWeightName]; }
    4352    }
    4453
     
    4756    }
    4857
    49     protected IValueParameter<BoolValue> OptimizeConstantsParameter {
    50       get { return (IValueParameter<BoolValue>)Parameters[OptimizeConstantsParameterName]; }
     58    protected IFixedValueParameter<BoolValue> OptimizeConstantsParameter {
     59      get { return (IFixedValueParameter<BoolValue>)Parameters[OptimizeConstantsParameterName]; }
    5160    }
    5261
     
    5665    }
    5766
    58     protected IValueParameter<IntValue> MaxComplexityParameter {
    59       get { return (IValueParameter<IntValue>)Parameters[MaxComplexityParameterName]; }
    60     }
     67    protected IFixedValueParameter<IntValue> MaxComplexityParameter {
     68      get { return (IFixedValueParameter<IntValue>)Parameters[MaxComplexityParameterName]; }
     69    }
     70
    6171    public int MaxComplexity {
    6272      get { return MaxComplexityParameter.Value.Value; }
     
    6474    }
    6575
    66     protected IValueParameter<DoubleValue> ErrorWeightParameter {
    67       get { return (IValueParameter<DoubleValue>)Parameters[ErrorWeightParameterName]; }
    68     }
     76    protected IFixedValueParameter<DoubleValue> ErrorWeightParameter {
     77      get { return (IFixedValueParameter<DoubleValue>)Parameters[ErrorWeightParameterName]; }
     78    }
     79
    6980    public double ErrorWeight {
    7081      get { return ErrorWeightParameter.Value.Value; }
     
    7283    }
    7384
    74     protected IValueParameter<IntValue> GuiUpdateIntervalParameter {
    75       get { return (IValueParameter<IntValue>)Parameters[GuiUpdateIntervalParameterName]; }
    76     }
     85    protected IFixedValueParameter<IntValue> GuiUpdateIntervalParameter {
     86      get { return (IFixedValueParameter<IntValue>)Parameters[GuiUpdateIntervalParameterName]; }
     87    }
     88
    7789    public int GuiUpdateInterval {
    7890      get { return GuiUpdateIntervalParameter.Value.Value; }
     
    8092    }
    8193
    82     protected IValueParameter<EnumValue<StorageType>> SearchDataStructureParameter {
    83       get { return (IValueParameter<EnumValue<StorageType>>)Parameters[SearchDataStructureParameterName]; }
    84     }
     94    protected IFixedValueParameter<EnumValue<StorageType>> SearchDataStructureParameter {
     95      get { return (IFixedValueParameter<EnumValue<StorageType>>)Parameters[SearchDataStructureParameterName]; }
     96    }
     97
     98    public IFixedValueParameter<IntValue> SearchDataStructureSizeParameter {
     99      get { return (IFixedValueParameter<IntValue>)Parameters[SearchDataStructureSizeParameterName]; }
     100    }
     101
     102    public int SearchDataStructureSize {
     103      get { return SearchDataStructureSizeParameter.Value.Value; }
     104    }
     105
     106    public IFixedValueParameter<IntValue> SearchCacheSizeParameter {
     107      get { return (IFixedValueParameter<IntValue>)Parameters[SearchCacheSizeParameterName]; }
     108    }
     109
     110    public int SearchCacheSize {
     111      get { return SearchCacheSizeParameter.Value.Value; }
     112    }
     113
    85114    public StorageType SearchDataStructure {
    86115      get { return SearchDataStructureParameter.Value.Value; }
     
    116145    internal SearchDataStore OpenPhrases { get; private set; }           // Stack/Queue/etc. for fetching the next node in the search tree. 
    117146
     147    [StorableHook(HookType.AfterDeserialization)]
     148    private void AfterDeserialization() {
     149      variableImportance = CalculateVariableImportances();
     150    }
     151
    118152    #region execution stats
     153    [Storable]
    119154    public int AllGeneratedSentencesCount { get; private set; }
    120155
     156    [Storable]
    121157    public int OverwrittenSentencesCount { get; private set; } // It is not guaranteed that shorter solutions are found first.
    122158                                                               // When longer solutions are overwritten with shorter ones,
    123159                                                               // this counter is increased.
     160    [Storable]
    124161    public int PhraseExpansionCount { get; private set; }      // Number, how many times a nonterminal symbol is replaced with a production rule.
    125162    #endregion
     
    133170
    134171    public GrammarEnumerationAlgorithm() {
    135       Parameters.Add(new ValueParameter<DoubleValue>(VariableImportanceWeightName, "Variable Weight.", new DoubleValue(1.0)));
    136       Parameters.Add(new ValueParameter<BoolValue>(OptimizeConstantsParameterName, "Run constant optimization in sentence evaluation.", new BoolValue(false)));
    137       Parameters.Add(new ValueParameter<DoubleValue>(ErrorWeightParameterName, "Defines, how much weight is put on a phrase's r² value when priorizing phrases during search.", new DoubleValue(0.8)));
    138       Parameters.Add(new ValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(12)));
    139       Parameters.Add(new ValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(5000)));
    140       Parameters.Add(new ValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.PriorityQueue)));
     172      Parameters.Add(new FixedValueParameter<DoubleValue>(VariableImportanceWeightName, "Variable Weight.", new DoubleValue(1.0)));
     173      Parameters.Add(new FixedValueParameter<BoolValue>(OptimizeConstantsParameterName, "Run constant optimization in sentence evaluation.", new BoolValue(false)));
     174      Parameters.Add(new FixedValueParameter<DoubleValue>(ErrorWeightParameterName, "Defines, how much weight is put on a phrase's r² value when priorizing phrases during search.", new DoubleValue(0.8)));
     175      Parameters.Add(new FixedValueParameter<IntValue>(MaxComplexityParameterName, "The maximum number of variable symbols in a sentence.", new IntValue(12)));
     176      Parameters.Add(new FixedValueParameter<IntValue>(GuiUpdateIntervalParameterName, "Number of generated sentences, until GUI is refreshed.", new IntValue(5000)));
     177      Parameters.Add(new FixedValueParameter<IntValue>(SearchCacheSizeParameterName, "The size of the search node cache.", new IntValue((int)1e5)));
     178      Parameters.Add(new FixedValueParameter<IntValue>(SearchDataStructureSizeParameterName, "The size of the search data structure.", new IntValue((int)1e5)));
     179      Parameters.Add(new FixedValueParameter<EnumValue<StorageType>>(SearchDataStructureParameterName, new EnumValue<StorageType>(StorageType.PriorityQueue)));
     180
     181      SearchDataStructureParameter.Value.ValueChanged += (o, e) => Prepare();
     182      SearchDataStructureSizeParameter.Value.ValueChanged += (o, e) => Prepare();
     183      SearchCacheSizeParameter.Value.ValueChanged += (o, e) => Prepare();
    141184
    142185      var availableAnalyzers = new IGrammarEnumerationAnalyzer[] {
     
    145188        new RSquaredEvaluator()
    146189      };
     190
    147191      Parameters.Add(new FixedValueParameter<ReadOnlyCheckedItemCollection<IGrammarEnumerationAnalyzer>>(
    148192        AnalyzersParameterName,
     
    170214      // set a default problem
    171215      Problem = new RegressionProblem() {
    172         ProblemData = new HeuristicLab.Problems.Instances.DataAnalysis.PolyTen(seed: 1234).GenerateRegressionData()
     216        ProblemData = new Problems.Instances.DataAnalysis.PolyTen(seed: 1234).GenerateRegressionData()
    173217      };
    174218    }
     
    182226      ArchivedPhrases = new HashSet<int>(original.ArchivedPhrases);
    183227      OpenPhrases = cloner.Clone(original.OpenPhrases);
     228      Grammar = cloner.Clone(original.Grammar);
    184229
    185230      AllGeneratedSentencesCount = original.AllGeneratedSentencesCount;
    186231      OverwrittenSentencesCount = original.OverwrittenSentencesCount;
    187232      PhraseExpansionCount = original.PhraseExpansionCount;
    188       Grammar = cloner.Clone(original.Grammar);
    189233
    190234      if (original.variableImportance != null)
     
    204248      Analyzers.OfType<RSquaredEvaluator>().First().OptimizeConstants = OptimizeConstants;
    205249      Grammar = new Grammar(Problem.ProblemData.AllowedInputVariables.ToArray(), GrammarSymbols.CheckedItems.Select(v => v.Value));
    206       OpenPhrases = new SearchDataStore(SearchDataStructure); // Select search strategy
     250      OpenPhrases = new SearchDataStore(SearchDataStructure, SearchDataStructureSize, SearchCacheSize); // Select search strategy
    207251
    208252      base.Prepare(); // this actually clears the results which will get reinitialized on Run()
     
    237281        var phrase0 = new SymbolString(new[] { Grammar.StartSymbol });
    238282        var phrase0Hash = Grammar.Hasher.CalcHashCode(phrase0);
     283
    239284        OpenPhrases.Store(new SearchNode(phrase0Hash, 0.0, 0.0, phrase0));
    240285      }
     
    249294
    250295        SearchNode fetchedSearchNode = OpenPhrases.GetNext();
     296
     297        if (fetchedSearchNode == null)
     298          continue;
     299
    251300        SymbolString currPhrase = fetchedSearchNode.SymbolString;
    252301
     
    380429    protected override void OnStopped() {
    381430      previousExecutionState = this.ExecutionState;
     431
     432      if (BestTrainingSentence == null) {
     433        base.OnStopped();
     434        return;
     435      }
     436
     437      var tree = Grammar.ParseSymbolicExpressionTree(BestTrainingSentence);
     438      var model = new SymbolicRegressionModel(Problem.ProblemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter());
     439      model.Scale(Problem.ProblemData);
     440      var bestTrainingSolution = new SymbolicRegressionSolution(model, Problem.ProblemData);
     441      Results.AddOrUpdateResult(BestTrainingModelResultName, model);
     442      Results.AddOrUpdateResult(BestTrainingSolutionResultName, bestTrainingSolution);
     443      Results.AddOrUpdateResult(BestComplexityResultName, new IntValue(Grammar.GetComplexity(BestTrainingSentence)));
    382444      base.OnStopped();
    383445    }
     
    409471        ((IntValue)Results[DistinctSentencesName].Value).Value = DistinctSentencesComplexity.Count;
    410472        ((IntValue)Results[PhraseExpansionsName].Value).Value = PhraseExpansionCount;
    411         ((DoubleValue)Results[AverageSentenceComplexityName].Value).Value = DistinctSentencesComplexity.Select(pair => pair.Value).Average();
     473        ((DoubleValue)Results[AverageSentenceComplexityName].Value).Value = DistinctSentencesComplexity.Count > 0
     474          ? DistinctSentencesComplexity.Select(pair => pair.Value).Average()
     475          : 0;
    412476        ((IntValue)Results[OverwrittenSentencesName].Value).Value = OverwrittenSentencesCount;
    413477        ((IntValue)Results[ExpansionsPerSecondName].Value).Value = (int)((PhraseExpansionCount /
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/SearchDataStructure.cs

    r15960 r15974  
    1111    [Storable]
    1212    public readonly int Hash;
     13
    1314    [Storable]
    1415    public readonly SymbolString SymbolString;
     16
    1517    [Storable]
    1618    public readonly double Priority;
     19
    1720    [Storable]
    1821    public readonly double R2;
     
    4346
    4447  public enum StorageType {
    45     PriorityQueue, Stack, Queue, RandomList
     48    PriorityQueue, Stack, Queue, RandomList, SortedSet
    4649  }
    4750
     
    4952  class SearchDataStore : DeepCloneable, IEnumerable<SearchNode> {
    5053    [Storable]
    51     private Dictionary<int, SearchNode> storedValues; // Store hash-references and associated, actual values
     54    private LruCache<int, SearchNode> storedValues;
     55    //private Dictionary<int, SearchNode> storedValues; // Store hash-references and associated, actual values
     56
    5257    [Storable]
    5358    private StorageType storageType;
    5459
    55     // private storage
    5660    [Storable]
    5761    private Queue<int> queue;
     
    6266    [Storable]
    6367    private List<int> list;
     68
     69    [Storable]
     70    private SortedSet<Tuple<double, int>> sortedSet;
     71
     72    [Storable]
     73    private int searchDataStructureSize; // storage size for search nodes
     74
     75    [Storable]
     76    private int cacheSize; // cache for already explored search nodes
    6477
    6578    [ExcludeFromObjectGraphTraversal]
     
    7487    protected SearchDataStore(bool deserializing) : this() { }
    7588
    76     public SearchDataStore(StorageType storageType) {
     89    public SearchDataStore(StorageType storageType, int searchDataStructureSize = (int)1e5, int cacheSize = (int)1e5) {
    7790      this.storageType = storageType;
    78       storedValues = new Dictionary<int, SearchNode>();
     91
     92      this.searchDataStructureSize = searchDataStructureSize;
     93      this.cacheSize = cacheSize;
     94
     95      storedValues = new LruCache<int, SearchNode>(this.cacheSize);
    7996      InitStorage();
    8097    }
     
    94111          InitRandomList();
    95112          break;
     113        case StorageType.SortedSet:
     114          InitSortedSet();
     115          break;
    96116      }
    97117    }
     
    102122
    103123    protected SearchDataStore(SearchDataStore original, Cloner cloner) : base(original, cloner) {
    104       storedValues = original.storedValues.ToDictionary(x => x.Key, x => cloner.Clone(x.Value));
     124      storedValues = cloner.Clone(original.storedValues);
    105125      storageType = original.storageType;
     126      cacheSize = original.cacheSize;
     127      searchDataStructureSize = original.searchDataStructureSize;
     128
    106129      InitStorage();
    107130      switch (storageType) {
     
    122145        case StorageType.RandomList:
    123146          list.AddRange(original.list);
     147          break;
     148        case StorageType.SortedSet:
     149          sortedSet = new SortedSet<Tuple<double, int>>(original.sortedSet);
    124150          break;
    125151      }
     
    153179          };
    154180          break;
     181        case StorageType.SortedSet:
     182          storeInternal = (hash, prio) => {
     183            if (sortedSet.Count >= searchDataStructureSize) {
     184              var max = sortedSet.Max;
     185              sortedSet.Remove(max);
     186              storedValues.Remove(max.Item2);
     187            }
     188            sortedSet.Add(Tuple.Create(prio, hash));
     189          };
     190          fetchInternal = () => {
     191            var elem = sortedSet.FirstOrDefault();
     192            if (elem == null)
     193              return 0;
     194            sortedSet.Remove(elem);
     195            return elem.Item2;
     196          };
     197          break;
    155198      }
    156199    }
     
    158201
    159202    private void InitPriorityQueue() {
    160       int capacity = 10000000;
     203      int capacity = searchDataStructureSize;
    161204      priorityQueue = new PriorityQueue<double, int>(double.MaxValue, double.MinValue, capacity);
    162       storeInternal = (hash, prio) => priorityQueue.Insert(prio, hash);
     205      storeInternal = (hash, prio) => {
     206        // size is the 0-based index of the last used element
     207        if (priorityQueue.Size == capacity - 1) {
     208          // if the queue is at capacity we have to replace
     209          return;
     210        }
     211        priorityQueue.Insert(prio, hash);
     212      };
    163213      fetchInternal = () => {
    164214        int ret = priorityQueue.PeekMinValue();
    165         priorityQueue.RemoveMin();
     215        if (priorityQueue.Size > 0) {
     216          priorityQueue.RemoveMin();
     217        }
    166218        return ret;
    167219      };
     
    195247    }
    196248
     249    private void InitSortedSet() {
     250      sortedSet = new SortedSet<Tuple<double, int>>();
     251      storeInternal = (hash, prio) => {
     252        if (sortedSet.Count >= searchDataStructureSize) {
     253          var max = sortedSet.Max;
     254          sortedSet.Remove(max);
     255          storedValues.Remove(max.Item2);
     256        }
     257        sortedSet.Add(Tuple.Create(prio, hash));
     258      };
     259      fetchInternal = () => {
     260        var elem = sortedSet.FirstOrDefault();
     261        if (elem == null)
     262          return default(int);
     263        sortedSet.Remove(elem);
     264        return elem.Item2;
     265      };
     266    }
     267
    197268    #endregion
    198269
     
    201272    public SearchNode GetNext() {
    202273      int hash = fetchInternal.Invoke();
    203       SearchNode result = storedValues[hash];
    204       storedValues.Remove(hash);
     274      SearchNode result = null;
     275      if (storedValues.TryGetValue(hash, out result)) {
     276        storedValues.Remove(hash);
     277      }
    205278      return result;
    206279    }
     
    212285
    213286    public bool Contains(int hash) {
    214       return storedValues.ContainsKey(hash);
     287      SearchNode result;
     288      return storedValues.TryGetValue(hash, out result);
    215289    }
    216290
     
    220294
    221295    public int Count {
    222       get { return storedValues.Count; }
     296      get {
     297        switch (storageType) {
     298          case StorageType.PriorityQueue:
     299            return priorityQueue.Size;
     300          case StorageType.Queue:
     301            return queue.Count;
     302          case StorageType.RandomList:
     303            return list.Count;
     304          case StorageType.Stack:
     305            return stack.Count;
     306          case StorageType.SortedSet:
     307            return sortedSet.Count;
     308          default:
     309            throw new InvalidOperationException("");
     310        }
     311      }
    223312    }
    224313
    225314    public IEnumerator<SearchNode> GetEnumerator() {
    226       return storedValues.Values.GetEnumerator();
     315      return storedValues.Select(x => x.Value).GetEnumerator();
    227316    }
    228317
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Sentence.cs

    r15963 r15974  
    2929
    3030    protected SymbolString(SymbolString original, Cloner cloner) : base(original, cloner) {
    31       symbols = original.symbols.ToArray();
     31      symbols = original.symbols.Select(cloner.Clone).ToArray();
    3232    }
    3333
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Symbol.cs

    r15963 r15974  
    1010    private readonly int stringRepresentationHash;
    1111
    12     [Storable(AllowOneWay = true)]
    13     public string StringRepresentation { get; }
     12    [Storable]
     13    public string StringRepresentation { get; private set; }
    1414
    1515    protected Symbol(string representation) {
  • branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.csproj

    r15960 r15974  
    122122    <Compile Include="GrammarEnumeration\Grammar.cs" />
    123123    <Compile Include="GrammarEnumeration\GrammarEnumerationAlgorithm.cs" />
     124    <Compile Include="GrammarEnumeration\LruCache.cs" />
    124125    <Compile Include="Hashing\Hasher.cs" />
    125126    <Compile Include="GrammarEnumeration\SearchDataStructure.cs" />
  • branches/2886_SymRegGrammarEnumeration/Test/Test.csproj

    r15714 r15974  
    3434    <ErrorReport>prompt</ErrorReport>
    3535    <WarningLevel>4</WarningLevel>
     36  </PropertyGroup>
     37  <PropertyGroup>
     38    <SignAssembly>true</SignAssembly>
     39  </PropertyGroup>
     40  <PropertyGroup>
     41    <AssemblyOriginatorKeyFile>HeuristicLab.snk</AssemblyOriginatorKeyFile>
    3642  </PropertyGroup>
    3743  <ItemGroup>
     
    120126    </ProjectReference>
    121127  </ItemGroup>
     128  <ItemGroup>
     129    <None Include="HeuristicLab.snk" />
     130  </ItemGroup>
    122131  <Choose>
    123132    <When Condition="'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'">
Note: See TracChangeset for help on using the changeset viewer.