#region License Information
/* HeuristicLab
* Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text.RegularExpressions;
namespace HeuristicLab.Grammars {
public class Grammar : IGrammar {
public static readonly ISymbol EmptySymbol = new Symbol("EPS");
private readonly Dictionary> rules;
private readonly HashSet allSymbols;
public ISymbol StartSymbol { get; set; }
public IEnumerable TerminalSymbols { get { return allSymbols.Except(NonTerminalSymbols); } }
public IEnumerable NonTerminalSymbols { get { return rules.Keys; } }
public IEnumerable Symbols { get { return allSymbols; } }
public Grammar(ISymbol startSymbol, IEnumerable nonTerminals, IEnumerable terminals) {
Debug.Assert(startSymbol != EmptySymbol);
this.StartSymbol = startSymbol;
this.allSymbols = new HashSet(nonTerminals.Concat(terminals));
this.rules = nonTerminals.ToDictionary(nt => nt, nt => new List());
}
public int NumberOfAlternatives(ISymbol ntSymbol) {
return rules[ntSymbol].Count;
}
public IEnumerable GetAlternatives(ISymbol ntSymbol) {
return rules[ntSymbol].AsReadOnly();
}
public Sequence GetAlternative(ISymbol ntSymbol, int index) {
return rules[ntSymbol][index];
}
public virtual void AddProductionRule(ISymbol ntSymbol, Sequence production) {
Debug.Assert(ntSymbol != EmptySymbol);
Debug.Assert(rules.ContainsKey(ntSymbol));
Debug.Assert(production.All(s => allSymbols.Contains(s)));
var l = rules[ntSymbol];
Debug.Assert(!l.Any(s => s.SequenceEqual(production)));
l.Add(production);
}
public bool IsTerminal(ISymbol symbol) {
// terminals must not have rules but must occur in the set of all symbols
return !rules.ContainsKey(symbol) && allSymbols.Contains(symbol);
}
public bool IsNonTerminal(ISymbol symbol) {
return rules.ContainsKey(symbol);
}
private static Regex ruleExpr = new Regex(@"\s*(?\w+)\s*->\s*(?\w+(?:\s+\w+)*)(?:\s*\|\s*(?\w+(?:\s+\w+)*))*");
private static Regex empty = new Regex(@"^\s*$");
public static Grammar FromString(string gStr) {
var lines = gStr.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
lines = lines.Where(l => !empty.IsMatch(l)).ToArray(); // remove empty lines
// make two passes: 1) find all symbols 2) add production rules
var nonTerminals = new List();
var allSymbols = new List();
// first line is the rule for the start-symbol
var m = ruleExpr.Match(lines.First());
var startSymbol = new Symbol(m.Groups["ntSymbol"].Value);
nonTerminals.Add(startSymbol);
allSymbols.Add(startSymbol);
// parse first line
foreach (var alt in m.Groups["alternative"].Captures) {
foreach (var s in alt.ToString()
.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
.Select(n => new Symbol(n))) allSymbols.Add(s);
}
// parse all remaining lines
foreach (var line in lines.Skip(1)) {
m = ruleExpr.Match(line);
var ntSymbol = new Symbol(m.Groups["ntSymbol"].Value);
nonTerminals.Add(ntSymbol);
allSymbols.Add(ntSymbol);
foreach (var alt in m.Groups["alternative"].Captures) {
foreach (var s in alt.ToString()
.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
.Select(n => new Symbol(n))) allSymbols.Add(s);
}
}
var g = new Grammar(startSymbol, nonTerminals, allSymbols.Except(nonTerminals));
m = ruleExpr.Match(lines.First());
// add production rules
foreach (var alt in m.Groups["alternative"].Captures) {
g.AddProductionRule(startSymbol,
new Sequence(alt.ToString()
.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
.Select(n => allSymbols.Single(s => s.Name == n)).ToList()));
}
// parse all remaining lines
foreach (var line in lines.Skip(1)) {
m = ruleExpr.Match(line);
var ntSymbol = nonTerminals.Single(s => s.Name == m.Groups["ntSymbol"].Value);
foreach (var alt in m.Groups["alternative"].Captures) {
g.AddProductionRule(ntSymbol,
new Sequence(alt.ToString()
.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
.Select(n => allSymbols.Single(s => s.Name == n)).ToList()));
}
}
return g;
}
}
}