#region License Information /* HeuristicLab * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text.RegularExpressions; namespace HeuristicLab.Grammars { public class Grammar : IGrammar { public static readonly ISymbol EmptySymbol = new Symbol("EPS"); private readonly Dictionary> rules; private readonly HashSet allSymbols; public ISymbol StartSymbol { get; set; } public IEnumerable TerminalSymbols { get { return allSymbols.Except(NonTerminalSymbols); } } public IEnumerable NonTerminalSymbols { get { return rules.Keys; } } public IEnumerable Symbols { get { return allSymbols; } } public Grammar(ISymbol startSymbol, IEnumerable nonTerminals, IEnumerable terminals) { Debug.Assert(startSymbol != EmptySymbol); this.StartSymbol = startSymbol; this.allSymbols = new HashSet(nonTerminals.Concat(terminals)); this.rules = nonTerminals.ToDictionary(nt => nt, nt => new List()); } public int NumberOfAlternatives(ISymbol ntSymbol) { return rules[ntSymbol].Count; } public IEnumerable GetAlternatives(ISymbol ntSymbol) { return rules[ntSymbol].AsReadOnly(); } public Sequence GetAlternative(ISymbol ntSymbol, int index) { return rules[ntSymbol][index]; } public virtual void AddProductionRule(ISymbol ntSymbol, Sequence production) { Debug.Assert(ntSymbol != EmptySymbol); Debug.Assert(rules.ContainsKey(ntSymbol)); Debug.Assert(production.All(s => allSymbols.Contains(s))); var l = rules[ntSymbol]; Debug.Assert(!l.Any(s => s.SequenceEqual(production))); l.Add(production); } public bool IsTerminal(ISymbol symbol) { // terminals must not have rules but must occur in the set of all symbols return !rules.ContainsKey(symbol) && allSymbols.Contains(symbol); } public bool IsNonTerminal(ISymbol symbol) { return rules.ContainsKey(symbol); } private static Regex ruleExpr = new Regex(@"\s*(?\w+)\s*->\s*(?\w+(?:\s+\w+)*)(?:\s*\|\s*(?\w+(?:\s+\w+)*))*"); private static Regex empty = new Regex(@"^\s*$"); public static Grammar FromString(string gStr) { var lines = gStr.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries); lines = lines.Where(l => !empty.IsMatch(l)).ToArray(); // remove empty lines // make two passes: 1) find all symbols 2) add production rules var nonTerminals = new List(); var allSymbols = new List(); // first line is the rule for the start-symbol var m = ruleExpr.Match(lines.First()); var startSymbol = new Symbol(m.Groups["ntSymbol"].Value); nonTerminals.Add(startSymbol); allSymbols.Add(startSymbol); // parse first line foreach (var alt in m.Groups["alternative"].Captures) { foreach (var s in alt.ToString() .Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries) .Select(n => new Symbol(n))) allSymbols.Add(s); } // parse all remaining lines foreach (var line in lines.Skip(1)) { m = ruleExpr.Match(line); var ntSymbol = new Symbol(m.Groups["ntSymbol"].Value); nonTerminals.Add(ntSymbol); allSymbols.Add(ntSymbol); foreach (var alt in m.Groups["alternative"].Captures) { foreach (var s in alt.ToString() .Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries) .Select(n => new Symbol(n))) allSymbols.Add(s); } } var g = new Grammar(startSymbol, nonTerminals, allSymbols.Except(nonTerminals)); m = ruleExpr.Match(lines.First()); // add production rules foreach (var alt in m.Groups["alternative"].Captures) { g.AddProductionRule(startSymbol, new Sequence(alt.ToString() .Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries) .Select(n => allSymbols.Single(s => s.Name == n)).ToList())); } // parse all remaining lines foreach (var line in lines.Skip(1)) { m = ruleExpr.Match(line); var ntSymbol = nonTerminals.Single(s => s.Name == m.Groups["ntSymbol"].Value); foreach (var alt in m.Groups["alternative"].Captures) { g.AddProductionRule(ntSymbol, new Sequence(alt.ToString() .Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries) .Select(n => allSymbols.Single(s => s.Name == n)).ToList())); } } return g; } } }