#region License Information
/* HeuristicLab
* Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text.RegularExpressions;
namespace HeuristicLab.Grammars {
public class Grammar : IGrammar {
public static readonly ISymbol EmptySymbol = new Symbol("EPS");
private Dictionary> rules = new Dictionary>();
private HashSet allSymbols = new HashSet();
public ISymbol StartSymbol { get; set; }
public IEnumerable TerminalSymbols { get { return allSymbols.Except(NonTerminalSymbols); } }
public IEnumerable NonTerminalSymbols { get { return rules.Keys; } }
public IEnumerable Symbols { get { return allSymbols; } }
public Grammar(ISymbol startSymbol) {
Debug.Assert(startSymbol != EmptySymbol);
this.StartSymbol = startSymbol;
}
public int NumberOfAlternatives(ISymbol ntSymbol) {
return rules[ntSymbol].Count;
}
public IEnumerable GetAlternatives(ISymbol ntSymbol) {
return rules[ntSymbol].AsReadOnly();
}
public Sequence GetAlternative(ISymbol ntSymbol, int index) {
return rules[ntSymbol][index];
}
public virtual void AddProductionRule(ISymbol ntSymbol, Sequence production) {
Debug.Assert(ntSymbol != EmptySymbol);
List l;
if (!rules.TryGetValue(ntSymbol, out l)) {
l = new List();
rules.Add(ntSymbol, l);
allSymbols.Add(ntSymbol); // register new nt-symbol
}
// check if the same production exists already
Debug.Assert(!l.Any(s => s.SequenceEqual(production)));
l.Add(production);
foreach (var s in production) allSymbols.Add(s); // register all symbols in the production
}
public bool IsTerminal(ISymbol symbol) {
return !rules.ContainsKey(symbol) && allSymbols.Contains(symbol);
}
public bool IsNonTerminal(ISymbol symbol) {
return rules.ContainsKey(symbol);
}
// checks if a rule exists for each NT symbol
public bool IsComplete() {
return rules.ContainsKey(StartSymbol) &&
NonTerminalSymbols.All(nt => rules.ContainsKey(nt));
}
private static Regex ruleExpr = new Regex(@"\s*(?\w+)\s*->\s*(?\w+(?:\s+\w+)*)(?:\s*\|\s*(?\w+(?:\s+\w+)*))*");
private static Regex empty = new Regex(@"^\s*$");
public static Grammar FromString(string gStr) {
var lines = gStr.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
lines = lines.Where(l => !empty.IsMatch(l)).ToArray(); // remove empty lines
// first line is the rule for the start-symbol
var m = ruleExpr.Match(lines.First());
var startSymbol = new Symbol(m.Groups["ntSymbol"].Value);
var g = new Grammar(startSymbol);
foreach (var alt in m.Groups["alternative"].Captures) {
g.AddProductionRule(startSymbol, new Sequence(alt.ToString().Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Select(n => new Symbol(n)).ToList()));
}
foreach (var line in lines.Skip(1)) {
m = ruleExpr.Match(line);
var ntSymbol = new Symbol(m.Groups["ntSymbol"].Value);
foreach (var alt in m.Groups["alternative"].Captures) {
g.AddProductionRule(ntSymbol, new Sequence(alt.ToString().Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Select(n => new Symbol(n)).ToList()));
}
}
return g;
}
}
}