1 | using System;
2 | using System.Collections;
3 | using System.Collections.Generic;
4 | using System.Diagnostics;
5 | using System.Linq;
6 | using System.Text;
7 | using System.Threading.Tasks;
8 |
9 | namespace HeuristicLab.Problems.GrammaticalOptimization {
10 | // represents a sequence of symbols (non-terminal and terminal symbols)
11 | // a sequence consisting only of terminal symbols can be a sentence of a language
12 | // the class supports in-place manipulation of the sequence symbols (replace NT with another sequence)
13 | // sequences provide efficient support left-canonical derivation by storing the index of the first non-terminal symbol
14 | // maximal length of sequences is limited to 1000 symbols
15 |
16 | // for symbols the same assumptions for the implementation of grammars apply
17 | // - non-terminal symbols must be characters in the range [A..Z]
18 | // - terminal symbols can be almost all other characters
19 | public class Sequence : IEnumerable<char> {
20 | private const int maxIdx = 200;
21 |
22 | private int len;
23 | private int idxOfFirstNt;
24 | protected char[] symbols;
25 |
26 | public virtual char this[int idx] {
27 | get { return symbols[idx]; }
28 | set { throw new NotSupportedException(); }
29 | }
30 |
31 | public int Length {
32 | get { return len; }
33 | protected set { len = value; }
34 | }
35 |
36 | public bool IsTerminal {
37 | get { return idxOfFirstNt == -1; }
38 | }
39 |
40 | public int FirstNonTerminalIndex {
41 | get { return idxOfFirstNt; }
42 | protected set { idxOfFirstNt = value; }
43 | }
44 |
45 | public char FirstNonTerminal {
46 | get { return this[idxOfFirstNt]; }
47 | }
48 |
49 | private Sequence(int maxLength) {
50 | this.symbols = new char[maxLength];
51 | }
52 |
53 | // create a sequence from a character
54 | public Sequence(char ch)
55 | : this(ch, maxIdx + 1) {
56 | }
57 |
58 | protected Sequence(char ch, int maxLength)
59 | : this(maxLength) {
60 | this.len = 1;
61 | symbols[0] = ch;
62 |
63 | if (ch >= 'A' && ch <= 'Z') idxOfFirstNt = 0;
64 | else idxOfFirstNt = -1;
65 | }
66 |
67 | // create a sequence from a string
68 | public Sequence(string s) : this(s, maxIdx + 1) { }
69 | protected Sequence(string s, int maxLength)
70 | : this(maxLength) {
71 | if (string.IsNullOrEmpty(s)) throw new ArgumentException();
72 | if (s.Length > (maxIdx + 1)) throw new ArgumentException();
73 | this.len = s.Length;
74 | this.idxOfFirstNt = -1;
75 |
76 | for (int i = 0; i < len; i++) {
77 | symbols[i] = s[i];
78 | if (idxOfFirstNt == -1 && symbols[i] >= 'A' && symbols[i] <= 'Z') {
79 | idxOfFirstNt = i;
80 | }
81 | }
82 | }
83 |
84 | // cloning ctor
85 | public Sequence(Sequence original) : this(original, maxIdx + 1) { }
86 | protected Sequence(Sequence original, int maxLength)
87 | : this(maxLength) {
88 | this.len = original.len;
89 | Array.Copy(original.symbols, this.symbols, len);
90 | this.idxOfFirstNt = original.idxOfFirstNt;
91 | }
92 |
93 | // empty constructor does not allocate the symbol array
94 | protected Sequence() { }
95 |
96 | public virtual void ReplaceAt(int position, int len, Sequence replacement) {
97 | if (replacement == null) throw new ArgumentNullException();
98 | if (len <= 0) throw new ArgumentException();
99 | if (position + len >= maxIdx) throw new ArgumentException();
100 | if (Length - len + replacement.Length > (maxIdx + 1)) throw new ArgumentException();
101 | var lenDelta = replacement.Length - len;
102 | var remainingPartLen = Length - position - len;
103 | var startIdxOfRemainingPart = position + len + lenDelta;
104 | Array.Copy(symbols, position + len, symbols, startIdxOfRemainingPart, remainingPartLen);
105 | Array.Copy(replacement.symbols, 0, symbols, position, replacement.Length);
106 |
107 | this.len = Length + lenDelta;
108 | // when the first part contains an NT then it's not necessary to update the index
109 | if (idxOfFirstNt >= 0 && idxOfFirstNt < position) return;
110 | // if the replacement contains an NT then we can directly calculate the idx of that NT in the new sequence
111 | if (replacement.idxOfFirstNt >= 0) {
112 | this.idxOfFirstNt = position + replacement.idxOfFirstNt;
113 | } else {
114 | // otherwise we must find the first NT in the remaining part
115 | idxOfFirstNt = -1;
116 | for (int i = startIdxOfRemainingPart; idxOfFirstNt == -1 && i < Length; i++) {
117 | if (this[i] >= 'A' && this[i] <= 'Z') idxOfFirstNt = i;
118 | }
119 | }
120 | }
121 |
122 | public virtual IEnumerator<char> GetEnumerator() {
123 | return symbols.AsEnumerable().Take(len).GetEnumerator();
124 | }
125 |
126 | public override string ToString() {
127 | var sb = new StringBuilder(len);
128 | sb.Append(symbols, 0, len);
129 | return sb.ToString();
130 | }
131 |
132 | IEnumerator IEnumerable.GetEnumerator() {
133 | return GetEnumerator();
134 | }
135 |
136 | public Sequence Subsequence(int startIdx, int len) {
137 | if (startIdx < 0 || len < 0) throw new ArgumentException();
138 | if (startIdx >= this.len) throw new ArgumentException();
139 | if (startIdx + len > this.len) throw new ArgumentException();
140 | var subsequence = new Sequence(maxIdx + 1) { len = len };
141 |
142 | Array.Copy(this.symbols, startIdx, subsequence.symbols, 0, len);
143 | if (idxOfFirstNt < 0) {
144 | subsequence.idxOfFirstNt = -1;
145 | } else if (idxOfFirstNt < startIdx) {
146 | // need to find first nt in subsequence
147 | subsequence.idxOfFirstNt = -1;
148 | for (int i = 0; subsequence.idxOfFirstNt == -1 && i < len; i++) {
149 | if (subsequence[i] >= 'A' && subsequence[i] <= 'Z') subsequence.idxOfFirstNt = i;
150 | }
151 | } else if (idxOfFirstNt >= startIdx && idxOfFirstNt < startIdx + len) {
152 | subsequence.idxOfFirstNt = idxOfFirstNt - startIdx;
153 | } else {
154 | Debug.Assert(idxOfFirstNt >= startIdx + len);
155 | subsequence.idxOfFirstNt = -1;
156 | }
157 | return subsequence;
158 | }
159 | }
160 | }