Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/02/15 16:08:21 (9 years ago)
Author:
gkronber
Message:

#2283: several major extensions for grammatical optimization

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization/Grammar.cs

    r11727 r11730  
    1919  public class Grammar : IGrammar {
    2020
    21     private readonly Dictionary<char, List<string>> rules;
     21    private readonly Dictionary<char, List<Sequence>> rules;
    2222    private readonly HashSet<char> terminalSymbols;
    2323    private readonly char sentenceSymbol;
    2424    private readonly HashSet<char> nonTerminalSymbols;
    25     private readonly Dictionary<string, int> maxPhraseLength = new Dictionary<string, int>();
    26     private readonly Dictionary<string, int> minPhraseLength = new Dictionary<string, int>();
     25    private readonly Dictionary<Sequence, int> maxPhraseLength = new Dictionary<Sequence, int>();
     26    private readonly Dictionary<Sequence, int> minPhraseLength = new Dictionary<Sequence, int>();
    2727
    2828    public char SentenceSymbol { get { return sentenceSymbol; } }
     
    3333    // cloning ctor
    3434    public Grammar(Grammar orig) {
    35       this.rules = new Dictionary<char, List<string>>(orig.rules);
     35      this.rules = new Dictionary<char, List<Sequence>>();
     36      foreach (var r in orig.rules)
     37        this.rules.Add(r.Key, new List<Sequence>(r.Value.Select(v => new Sequence(v)))); // clone sequences
    3638      this.terminalSymbols = new HashSet<char>(orig.terminalSymbols);
    3739      this.sentenceSymbol = orig.sentenceSymbol;
    3840      this.nonTerminalSymbols = new HashSet<char>(orig.nonTerminalSymbols);
    39       this.maxPhraseLength = new Dictionary<string, int>(orig.maxPhraseLength);
    40       this.minPhraseLength = new Dictionary<string, int>(orig.minPhraseLength);
     41      this.maxPhraseLength = new Dictionary<Sequence, int>();
     42      foreach (var p in orig.maxPhraseLength) this.maxPhraseLength.Add(new Sequence(p.Key), p.Value);
     43      this.minPhraseLength = new Dictionary<Sequence, int>();
     44      foreach (var p in orig.minPhraseLength) this.minPhraseLength.Add(new Sequence(p.Key), p.Value);
    4145    }
    4246
     
    4751      this.terminalSymbols = new HashSet<char>(terminalSymbols);
    4852      this.nonTerminalSymbols = new HashSet<char>(nonTerminalSymbols);
    49       this.rules = new Dictionary<char, List<string>>();
     53      this.rules = new Dictionary<char, List<Sequence>>();
    5054      foreach (var r in rules) {
    51         if (!this.rules.ContainsKey(r.Item1)) this.rules.Add(r.Item1, new List<string>());
    52         this.rules[r.Item1].Add(r.Item2); // here we store an array of symbols for a phase
     55        if (!this.rules.ContainsKey(r.Item1)) this.rules.Add(r.Item1, new List<Sequence>());
     56        this.rules[r.Item1].Add(new Sequence(r.Item2)); // here we store an array of symbols for a phase
    5357      }
    5458
     
    8690          max = Math.Max(max, maxPhraseLength[alt]);
    8791        }
    88         minPhraseLength[nt.ToString()] = min;
    89         maxPhraseLength[nt.ToString()] = max;
    90       }
    91     }
    92 
    93 
    94     public IEnumerable<string> GetAlternatives(char nt) {
     92        minPhraseLength[new Sequence(nt)] = min;
     93        maxPhraseLength[new Sequence(nt)] = max;
     94      }
     95    }
     96
     97
     98    public IEnumerable<Sequence> GetAlternatives(char nt) {
    9599      return rules[nt];
    96100    }
    97101
    98     public IEnumerable<string> GetTerminalAlternatives(char nt) {
     102    public IEnumerable<Sequence> GetTerminalAlternatives(char nt) {
    99103      return GetAlternatives(nt).Where(alt => alt.All(IsTerminal));
    100104    }
    101105
    102     public IEnumerable<string> GetNonTerminalAlternatives(char nt) {
     106    public IEnumerable<Sequence> GetNonTerminalAlternatives(char nt) {
    103107      return GetAlternatives(nt).Where(alt => alt.Any(IsNonTerminal));
    104108    }
    105109
    106110    // caches for this are build in construction of object
    107     public int MinPhraseLength(string phrase) {
     111    public int MinPhraseLength(Sequence phrase) {
    108112      int l;
    109113      if (minPhraseLength.TryGetValue(phrase, out l)) return l;
     
    125129
    126130    // caches for this are build in construction of object
    127     public int MaxPhraseLength(string phrase) {
     131    public int MaxPhraseLength(Sequence phrase) {
    128132      int l;
    129133      if (maxPhraseLength.TryGetValue(phrase, out l)) return l;
     
    152156    }
    153157
    154     public string CompleteSentenceRandomly(Random random, string phrase, int maxLen) {
     158    public Sequence CompleteSentenceRandomly(Random random, Sequence phrase, int maxLen) {
    155159      if (phrase.Length > maxLen) throw new ArgumentException();
    156160      if (MinPhraseLength(phrase) > maxLen) throw new ArgumentException();
    157       bool done = phrase.All(IsTerminal); // terminal phrase means we are done
     161      bool done = phrase.IsTerminal; // terminal phrase means we are done
    158162      while (!done) {
    159         int ntIdx; char nt;
    160         FindFirstNonTerminal(this, phrase, out nt, out ntIdx);
     163        char nt = phrase.FirstNonTerminal;
    161164
    162165        int maxLenOfReplacement = maxLen - (phrase.Length - 1); // replacing aAb with maxLen 4 means we can only use alternatives with a minPhraseLen <= 2
     
    168171        // replace nt with random alternative
    169172        var selectedAlt = alts.SelectRandom(random);
    170         phrase = phrase.Remove(ntIdx, 1);
    171         phrase = phrase.Insert(ntIdx, selectedAlt);
     173        phrase.ReplaceAt(phrase.FirstNonTerminalIndex, 1, selectedAlt);
    172174
    173175        done = phrase.All(IsTerminal); // terminal phrase means we are done
    174176      }
    175177      return phrase;
    176     }
    177 
    178     public static void FindFirstNonTerminal(IGrammar g, string phrase, out char nt, out int ntIdx) {
    179       ntIdx = 0;
    180       while (ntIdx < phrase.Length && g.IsTerminal(phrase[ntIdx])) ntIdx++;
    181       if (ntIdx >= phrase.Length) {
    182         ntIdx = -1;
    183         nt = '\0';
    184       } else {
    185         nt = phrase[ntIdx];
    186       }
    187178    }
    188179
     
    262253      foreach (var r in rules) {
    263254        foreach (var alt in r.Value) {
    264           var phrase = string.Join(" ", alt);
    265           sb.AppendFormat("  {0} -> {1} (min: {2}, max {3})", r.Key, phrase, MinPhraseLength(phrase), MaxPhraseLength(phrase))
     255          sb.AppendFormat("  {0} -> {1} (min: {2}, max {3})", r.Key, alt, MinPhraseLength(alt), MaxPhraseLength(alt))
    266256            .AppendLine();
    267257        }
Note: See TracChangeset for help on using the changeset viewer.