using System; using System.Collections; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace HeuristicLab.Problems.GrammaticalOptimization { // represents a sequence of symbols (non-terminal and terminal symbols) // a sequence consisting only of terminal symbols can be a sentence of a language // the class supports in-place manipulation of the sequence symbols (replace NT with another sequence) // sequences provide efficient support left-canonical derivation by storing the index of the first non-terminal symbol // maximal length of sequences is limited to 1000 symbols // for symbols the same assumptions for the implementation of grammars apply // - non-terminal symbols must be characters in the range [A..Z] // - terminal symbols can be almost all other characters public class Sequence : IEnumerable { private const int maxIdx = 200; private int len; private int idxOfFirstNt; private readonly char[] symbols; public char this[int idx] { get { return symbols[idx]; } set { throw new NotSupportedException(); } } public int Length { get { return len; } private set { len = value; } } public bool IsTerminal { get { return idxOfFirstNt == -1; } } public int FirstNonTerminalIndex { get { return idxOfFirstNt; } } public char FirstNonTerminal { get { return symbols[idxOfFirstNt]; } } private Sequence(int maxLength) { this.symbols = new char[maxLength]; } // create a sequence from a character public Sequence(char ch) : this(ch, maxIdx + 1) { } protected Sequence(char ch, int maxLength) : this(maxLength) { this.len = 1; symbols[0] = ch; if (ch >= 'A' && ch <= 'Z') idxOfFirstNt = 0; else idxOfFirstNt = -1; } // create a sequence from a string public Sequence(string s) : this(s, maxIdx + 1) { } protected Sequence(string s, int maxLength) : this(maxLength) { if (string.IsNullOrEmpty(s)) throw new ArgumentException(); if (s.Length > (maxIdx + 1)) throw new ArgumentException(); this.len = s.Length; this.idxOfFirstNt = -1; for (int i = 0; i < len; i++) { symbols[i] = s[i]; if (idxOfFirstNt == -1 && symbols[i] >= 'A' && symbols[i] <= 'Z') { idxOfFirstNt = i; } } } // cloning ctor public Sequence(Sequence original) : this(original, maxIdx + 1) { } protected Sequence(Sequence original, int maxLength) : this(maxLength) { this.len = original.len; Array.Copy(original.symbols, this.symbols, len); this.idxOfFirstNt = original.idxOfFirstNt; } public virtual void ReplaceAt(int position, int len, Sequence replacement) { if (replacement == null) throw new ArgumentNullException(); if (len <= 0) throw new ArgumentException(); if (position + len >= maxIdx) throw new ArgumentException(); if (Length - len + replacement.Length > (maxIdx + 1)) throw new ArgumentException(); var lenDelta = replacement.Length - len; var remainingPartLen = Length - position - len; var startIdxOfRemainingPart = position + len + lenDelta; Array.Copy(symbols, position + len, symbols, startIdxOfRemainingPart, remainingPartLen); Array.Copy(replacement.symbols, 0, symbols, position, replacement.Length); this.len = Length + lenDelta; // when the first part contains an NT then it's not necessary to update the index if (idxOfFirstNt >= 0 && idxOfFirstNt < position) return; // if the replacement contains an NT then we can directly calculate the idx of that NT in the new sequence if (replacement.idxOfFirstNt >= 0) { this.idxOfFirstNt = position + replacement.idxOfFirstNt; } else { // otherwise we must find the first NT in the remaining part idxOfFirstNt = -1; for (int i = startIdxOfRemainingPart; idxOfFirstNt == -1 && i < Length; i++) { if (symbols[i] >= 'A' && symbols[i] <= 'Z') idxOfFirstNt = i; } } } public IEnumerator GetEnumerator() { return symbols.AsEnumerable().Take(len).GetEnumerator(); } public override string ToString() { var sb = new StringBuilder(len); sb.Append(symbols, 0, len); return sb.ToString(); } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } public Sequence Subsequence(int startIdx, int len) { if (startIdx < 0 || len < 0) throw new ArgumentException(); if (startIdx >= this.len) throw new ArgumentException(); if (startIdx + len > this.len) throw new ArgumentException(); var subsequence = new Sequence(maxIdx + 1) { len = len }; Array.Copy(this.symbols, startIdx, subsequence.symbols, 0, len); if (idxOfFirstNt < 0) { subsequence.idxOfFirstNt = -1; } else if (idxOfFirstNt < startIdx) { // need to find first nt in subsequence subsequence.idxOfFirstNt = -1; for (int i = 0; subsequence.idxOfFirstNt == -1 && i < len; i++) { if (subsequence.symbols[i] >= 'A' && subsequence.symbols[i] <= 'Z') subsequence.idxOfFirstNt = i; } } else if (idxOfFirstNt >= startIdx && idxOfFirstNt < startIdx + len) { subsequence.idxOfFirstNt = idxOfFirstNt; } else { Debug.Assert(idxOfFirstNt >= startIdx + len); subsequence.idxOfFirstNt = -1; } return subsequence; } } }