[11730] | 1 | using System;
|
---|
| 2 | using System.Collections;
|
---|
| 3 | using System.Collections.Generic;
|
---|
| 4 | using System.Diagnostics;
|
---|
| 5 | using System.Linq;
|
---|
| 6 | using System.Text;
|
---|
| 7 | using System.Threading.Tasks;
|
---|
| 8 |
|
---|
| 9 | namespace HeuristicLab.Problems.GrammaticalOptimization {
|
---|
| 10 | // represents a sequence of symbols (non-terminal and terminal symbols)
|
---|
| 11 | // a sequence consisting only of terminal symbols can be a sentence of a language
|
---|
| 12 | // the class supports in-place manipulation of the sequence symbols (replace NT with another sequence)
|
---|
| 13 | // sequences provide efficient support left-canonical derivation by storing the index of the first non-terminal symbol
|
---|
| 14 | // maximal length of sequences is limited to 1000 symbols
|
---|
| 15 |
|
---|
| 16 | // for symbols the same assumptions for the implementation of grammars apply
|
---|
| 17 | // - non-terminal symbols must be characters in the range [A..Z]
|
---|
| 18 | // - terminal symbols can be almost all other characters
|
---|
| 19 | public class Sequence : IEnumerable<char> {
|
---|
[12503] | 20 | // changed maxIdx to 10000...
|
---|
| 21 | private const int maxIdx = 10000;
|
---|
[11730] | 22 |
|
---|
| 23 | private int len;
|
---|
| 24 | private int idxOfFirstNt;
|
---|
[11793] | 25 | protected char[] symbols;
|
---|
[11730] | 26 |
|
---|
[11793] | 27 | public virtual char this[int idx] {
|
---|
[11730] | 28 | get { return symbols[idx]; }
|
---|
| 29 | set { throw new NotSupportedException(); }
|
---|
| 30 | }
|
---|
| 31 |
|
---|
| 32 | public int Length {
|
---|
| 33 | get { return len; }
|
---|
[11793] | 34 | protected set { len = value; }
|
---|
[11730] | 35 | }
|
---|
| 36 |
|
---|
| 37 | public bool IsTerminal {
|
---|
| 38 | get { return idxOfFirstNt == -1; }
|
---|
| 39 | }
|
---|
| 40 |
|
---|
| 41 | public int FirstNonTerminalIndex {
|
---|
| 42 | get { return idxOfFirstNt; }
|
---|
[11793] | 43 | protected set { idxOfFirstNt = value; }
|
---|
[11730] | 44 | }
|
---|
| 45 |
|
---|
| 46 | public char FirstNonTerminal {
|
---|
[11793] | 47 | get { return this[idxOfFirstNt]; }
|
---|
[11730] | 48 | }
|
---|
| 49 |
|
---|
[11742] | 50 | private Sequence(int maxLength) {
|
---|
| 51 | this.symbols = new char[maxLength];
|
---|
[11730] | 52 | }
|
---|
| 53 |
|
---|
| 54 | // create a sequence from a character
|
---|
| 55 | public Sequence(char ch)
|
---|
[11742] | 56 | : this(ch, maxIdx + 1) {
|
---|
| 57 | }
|
---|
| 58 |
|
---|
| 59 | protected Sequence(char ch, int maxLength)
|
---|
| 60 | : this(maxLength) {
|
---|
[11730] | 61 | this.len = 1;
|
---|
| 62 | symbols[0] = ch;
|
---|
| 63 |
|
---|
| 64 | if (ch >= 'A' && ch <= 'Z') idxOfFirstNt = 0;
|
---|
| 65 | else idxOfFirstNt = -1;
|
---|
| 66 | }
|
---|
| 67 |
|
---|
| 68 | // create a sequence from a string
|
---|
[11742] | 69 | public Sequence(string s) : this(s, maxIdx + 1) { }
|
---|
| 70 | protected Sequence(string s, int maxLength)
|
---|
| 71 | : this(maxLength) {
|
---|
[11730] | 72 | if (string.IsNullOrEmpty(s)) throw new ArgumentException();
|
---|
| 73 | if (s.Length > (maxIdx + 1)) throw new ArgumentException();
|
---|
| 74 | this.len = s.Length;
|
---|
| 75 | this.idxOfFirstNt = -1;
|
---|
| 76 |
|
---|
| 77 | for (int i = 0; i < len; i++) {
|
---|
| 78 | symbols[i] = s[i];
|
---|
| 79 | if (idxOfFirstNt == -1 && symbols[i] >= 'A' && symbols[i] <= 'Z') {
|
---|
| 80 | idxOfFirstNt = i;
|
---|
| 81 | }
|
---|
| 82 | }
|
---|
| 83 | }
|
---|
| 84 |
|
---|
| 85 | // cloning ctor
|
---|
[11742] | 86 | public Sequence(Sequence original) : this(original, maxIdx + 1) { }
|
---|
| 87 | protected Sequence(Sequence original, int maxLength)
|
---|
| 88 | : this(maxLength) {
|
---|
[11730] | 89 | this.len = original.len;
|
---|
| 90 | Array.Copy(original.symbols, this.symbols, len);
|
---|
| 91 | this.idxOfFirstNt = original.idxOfFirstNt;
|
---|
| 92 | }
|
---|
| 93 |
|
---|
[11793] | 94 | // empty constructor does not allocate the symbol array
|
---|
| 95 | protected Sequence() { }
|
---|
| 96 |
|
---|
[11732] | 97 | public virtual void ReplaceAt(int position, int len, Sequence replacement) {
|
---|
[11730] | 98 | if (replacement == null) throw new ArgumentNullException();
|
---|
| 99 | if (len <= 0) throw new ArgumentException();
|
---|
| 100 | if (position + len >= maxIdx) throw new ArgumentException();
|
---|
| 101 | if (Length - len + replacement.Length > (maxIdx + 1)) throw new ArgumentException();
|
---|
| 102 | var lenDelta = replacement.Length - len;
|
---|
| 103 | var remainingPartLen = Length - position - len;
|
---|
| 104 | var startIdxOfRemainingPart = position + len + lenDelta;
|
---|
| 105 | Array.Copy(symbols, position + len, symbols, startIdxOfRemainingPart, remainingPartLen);
|
---|
| 106 | Array.Copy(replacement.symbols, 0, symbols, position, replacement.Length);
|
---|
| 107 |
|
---|
| 108 | this.len = Length + lenDelta;
|
---|
| 109 | // when the first part contains an NT then it's not necessary to update the index
|
---|
| 110 | if (idxOfFirstNt >= 0 && idxOfFirstNt < position) return;
|
---|
| 111 | // if the replacement contains an NT then we can directly calculate the idx of that NT in the new sequence
|
---|
| 112 | if (replacement.idxOfFirstNt >= 0) {
|
---|
| 113 | this.idxOfFirstNt = position + replacement.idxOfFirstNt;
|
---|
| 114 | } else {
|
---|
| 115 | // otherwise we must find the first NT in the remaining part
|
---|
| 116 | idxOfFirstNt = -1;
|
---|
| 117 | for (int i = startIdxOfRemainingPart; idxOfFirstNt == -1 && i < Length; i++) {
|
---|
[11793] | 118 | if (this[i] >= 'A' && this[i] <= 'Z') idxOfFirstNt = i;
|
---|
[11730] | 119 | }
|
---|
| 120 | }
|
---|
| 121 | }
|
---|
| 122 |
|
---|
[11793] | 123 | public virtual IEnumerator<char> GetEnumerator() {
|
---|
[11730] | 124 | return symbols.AsEnumerable().Take(len).GetEnumerator();
|
---|
| 125 | }
|
---|
| 126 |
|
---|
| 127 | public override string ToString() {
|
---|
| 128 | var sb = new StringBuilder(len);
|
---|
| 129 | sb.Append(symbols, 0, len);
|
---|
| 130 | return sb.ToString();
|
---|
| 131 | }
|
---|
| 132 |
|
---|
| 133 | IEnumerator IEnumerable.GetEnumerator() {
|
---|
| 134 | return GetEnumerator();
|
---|
| 135 | }
|
---|
| 136 |
|
---|
| 137 | public Sequence Subsequence(int startIdx, int len) {
|
---|
[11732] | 138 | if (startIdx < 0 || len < 0) throw new ArgumentException();
|
---|
[11730] | 139 | if (startIdx >= this.len) throw new ArgumentException();
|
---|
| 140 | if (startIdx + len > this.len) throw new ArgumentException();
|
---|
[11742] | 141 | var subsequence = new Sequence(maxIdx + 1) { len = len };
|
---|
[11730] | 142 |
|
---|
| 143 | Array.Copy(this.symbols, startIdx, subsequence.symbols, 0, len);
|
---|
| 144 | if (idxOfFirstNt < 0) {
|
---|
| 145 | subsequence.idxOfFirstNt = -1;
|
---|
| 146 | } else if (idxOfFirstNt < startIdx) {
|
---|
| 147 | // need to find first nt in subsequence
|
---|
| 148 | subsequence.idxOfFirstNt = -1;
|
---|
| 149 | for (int i = 0; subsequence.idxOfFirstNt == -1 && i < len; i++) {
|
---|
[11793] | 150 | if (subsequence[i] >= 'A' && subsequence[i] <= 'Z') subsequence.idxOfFirstNt = i;
|
---|
[11730] | 151 | }
|
---|
| 152 | } else if (idxOfFirstNt >= startIdx && idxOfFirstNt < startIdx + len) {
|
---|
[11793] | 153 | subsequence.idxOfFirstNt = idxOfFirstNt - startIdx;
|
---|
[11730] | 154 | } else {
|
---|
| 155 | Debug.Assert(idxOfFirstNt >= startIdx + len);
|
---|
| 156 | subsequence.idxOfFirstNt = -1;
|
---|
| 157 | }
|
---|
| 158 | return subsequence;
|
---|
| 159 | }
|
---|
| 160 | }
|
---|
| 161 | }
|
---|