Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.Problems.GPDL/CocoR/Scanner.frame @ 16946

Last change on this file since 16946 was 9724, checked in by gkronber, 11 years ago

#2026 changed ATG to Coco/R syntax and use Coco/R (C#) to generate scanner and parser for GPDL

File size: 11.4 KB
Line 
1/*----------------------------------------------------------------------
2Compiler Generator Coco/R,
3Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz
4extended by M. Loeberbauer & A. Woess, Univ. of Linz
5with improvements by Pat Terry, Rhodes University
6
7This program is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 2, or (at your option) any
10later version.
11
12This program is distributed in the hope that it will be useful, but
13WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License along
18with this program; if not, write to the Free Software Foundation, Inc.,
1959 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20
21As an exception, it is allowed to write an extension of Coco/R that is
22used as a plugin in non-free software.
23
24If not otherwise stated, any source code generated by Coco/R (other than
25Coco/R itself) does not fall under the GNU General Public License.
26-----------------------------------------------------------------------*/
27-->begin
28using System;
29using System.IO;
30using System.Collections;
31
32-->namespace
33
34public class Token {
35  public int kind;    // token kind
36  public int pos;     // token position in bytes in the source text (starting at 0)
37  public int charPos;  // token position in characters in the source text (starting at 0)
38  public int col;     // token column (starting at 1)
39  public int line;    // token line (starting at 1)
40  public string val;  // token value
41  public Token next;  // ML 2005-03-11 Tokens are kept in linked list
42}
43
44//-----------------------------------------------------------------------------------
45// Buffer
46//-----------------------------------------------------------------------------------
47public class Buffer {
48  // This Buffer supports the following cases:
49  // 1) seekable stream (file)
50  //    a) whole stream in buffer
51  //    b) part of stream in buffer
52  // 2) non seekable stream (network, console)
53
54  public const int EOF = char.MaxValue + 1;
55  const int MIN_BUFFER_LENGTH = 1024; // 1KB
56  const int MAX_BUFFER_LENGTH = MIN_BUFFER_LENGTH * 64; // 64KB
57  byte[] buf;         // input buffer
58  int bufStart;       // position of first byte in buffer relative to input stream
59  int bufLen;         // length of buffer
60  int fileLen;        // length of input stream (may change if the stream is no file)
61  int bufPos;         // current position in buffer
62  Stream stream;      // input stream (seekable)
63  bool isUserStream;  // was the stream opened by the user?
64 
65  public Buffer (Stream s, bool isUserStream) {
66    stream = s; this.isUserStream = isUserStream;
67   
68    if (stream.CanSeek) {
69      fileLen = (int) stream.Length;
70      bufLen = Math.Min(fileLen, MAX_BUFFER_LENGTH);
71      bufStart = Int32.MaxValue; // nothing in the buffer so far
72    } else {
73      fileLen = bufLen = bufStart = 0;
74    }
75
76    buf = new byte[(bufLen>0) ? bufLen : MIN_BUFFER_LENGTH];
77    if (fileLen > 0) Pos = 0; // setup buffer to position 0 (start)
78    else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid
79    if (bufLen == fileLen && stream.CanSeek) Close();
80  }
81 
82  protected Buffer(Buffer b) { // called in UTF8Buffer constructor
83    buf = b.buf;
84    bufStart = b.bufStart;
85    bufLen = b.bufLen;
86    fileLen = b.fileLen;
87    bufPos = b.bufPos;
88    stream = b.stream;
89    // keep destructor from closing the stream
90    b.stream = null;
91    isUserStream = b.isUserStream;
92  }
93
94  ~Buffer() { Close(); }
95 
96  protected void Close() {
97    if (!isUserStream && stream != null) {
98      stream.Close();
99      stream = null;
100    }
101  }
102 
103  public virtual int Read () {
104    if (bufPos < bufLen) {
105      return buf[bufPos++];
106    } else if (Pos < fileLen) {
107      Pos = Pos; // shift buffer start to Pos
108      return buf[bufPos++];
109    } else if (stream != null && !stream.CanSeek && ReadNextStreamChunk() > 0) {
110      return buf[bufPos++];
111    } else {
112      return EOF;
113    }
114  }
115
116  public int Peek () {
117    int curPos = Pos;
118    int ch = Read();
119    Pos = curPos;
120    return ch;
121  }
122 
123  // beg .. begin, zero-based, inclusive, in byte
124  // end .. end, zero-based, exclusive, in byte
125  public string GetString (int beg, int end) {
126    int len = 0;
127    char[] buf = new char[end - beg];
128    int oldPos = Pos;
129    Pos = beg;
130    while (Pos < end) buf[len++] = (char) Read();
131    Pos = oldPos;
132    return new String(buf, 0, len);
133  }
134
135  public int Pos {
136    get { return bufPos + bufStart; }
137    set {
138      if (value >= fileLen && stream != null && !stream.CanSeek) {
139        // Wanted position is after buffer and the stream
140        // is not seek-able e.g. network or console,
141        // thus we have to read the stream manually till
142        // the wanted position is in sight.
143        while (value >= fileLen && ReadNextStreamChunk() > 0);
144      }
145
146      if (value < 0 || value > fileLen) {
147        throw new FatalError("buffer out of bounds access, position: " + value);
148      }
149
150      if (value >= bufStart && value < bufStart + bufLen) { // already in buffer
151        bufPos = value - bufStart;
152      } else if (stream != null) { // must be swapped in
153        stream.Seek(value, SeekOrigin.Begin);
154        bufLen = stream.Read(buf, 0, buf.Length);
155        bufStart = value; bufPos = 0;
156      } else {
157        // set the position to the end of the file, Pos will return fileLen.
158        bufPos = fileLen - bufStart;
159      }
160    }
161  }
162 
163  // Read the next chunk of bytes from the stream, increases the buffer
164  // if needed and updates the fields fileLen and bufLen.
165  // Returns the number of bytes read.
166  private int ReadNextStreamChunk() {
167    int free = buf.Length - bufLen;
168    if (free == 0) {
169      // in the case of a growing input stream
170      // we can neither seek in the stream, nor can we
171      // foresee the maximum length, thus we must adapt
172      // the buffer size on demand.
173      byte[] newBuf = new byte[bufLen * 2];
174      Array.Copy(buf, newBuf, bufLen);
175      buf = newBuf;
176      free = bufLen;
177    }
178    int read = stream.Read(buf, bufLen, free);
179    if (read > 0) {
180      fileLen = bufLen = (bufLen + read);
181      return read;
182    }
183    // end of stream reached
184    return 0;
185  }
186}
187
188//-----------------------------------------------------------------------------------
189// UTF8Buffer
190//-----------------------------------------------------------------------------------
191public class UTF8Buffer: Buffer {
192  public UTF8Buffer(Buffer b): base(b) {}
193
194  public override int Read() {
195    int ch;
196    do {
197      ch = base.Read();
198      // until we find a utf8 start (0xxxxxxx or 11xxxxxx)
199    } while ((ch >= 128) && ((ch & 0xC0) != 0xC0) && (ch != EOF));
200    if (ch < 128 || ch == EOF) {
201      // nothing to do, first 127 chars are the same in ascii and utf8
202      // 0xxxxxxx or end of file character
203    } else if ((ch & 0xF0) == 0xF0) {
204      // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
205      int c1 = ch & 0x07; ch = base.Read();
206      int c2 = ch & 0x3F; ch = base.Read();
207      int c3 = ch & 0x3F; ch = base.Read();
208      int c4 = ch & 0x3F;
209      ch = (((((c1 << 6) | c2) << 6) | c3) << 6) | c4;
210    } else if ((ch & 0xE0) == 0xE0) {
211      // 1110xxxx 10xxxxxx 10xxxxxx
212      int c1 = ch & 0x0F; ch = base.Read();
213      int c2 = ch & 0x3F; ch = base.Read();
214      int c3 = ch & 0x3F;
215      ch = (((c1 << 6) | c2) << 6) | c3;
216    } else if ((ch & 0xC0) == 0xC0) {
217      // 110xxxxx 10xxxxxx
218      int c1 = ch & 0x1F; ch = base.Read();
219      int c2 = ch & 0x3F;
220      ch = (c1 << 6) | c2;
221    }
222    return ch;
223  }
224}
225
226//-----------------------------------------------------------------------------------
227// Scanner
228//-----------------------------------------------------------------------------------
229public class Scanner {
230  const char EOL = '\n';
231  const int eofSym = 0; /* pdt */
232-->declarations
233
234  public Buffer buffer; // scanner buffer
235 
236  Token t;          // current token
237  int ch;           // current input character
238  int pos;          // byte position of current character
239  int charPos;      // position by unicode characters starting with 0
240  int col;          // column number of current character
241  int line;         // line number of current character
242  int oldEols;      // EOLs that appeared in a comment;
243  static readonly Hashtable start; // maps first token character to start state
244
245  Token tokens;     // list of tokens already peeked (first token is a dummy)
246  Token pt;         // current peek token
247 
248  char[] tval = new char[128]; // text of current token
249  int tlen;         // length of current token
250 
251  static Scanner() {
252    start = new Hashtable(128);
253-->initialization
254  }
255 
256  public Scanner (string fileName) {
257    try {
258      Stream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read);
259      buffer = new Buffer(stream, false);
260      Init();
261    } catch (IOException) {
262      throw new FatalError("Cannot open file " + fileName);
263    }
264  }
265 
266  public Scanner (Stream s) {
267    buffer = new Buffer(s, true);
268    Init();
269  }
270 
271  void Init() {
272    pos = -1; line = 1; col = 0; charPos = -1;
273    oldEols = 0;
274    NextCh();
275    if (ch == 0xEF) { // check optional byte order mark for UTF-8
276      NextCh(); int ch1 = ch;
277      NextCh(); int ch2 = ch;
278      if (ch1 != 0xBB || ch2 != 0xBF) {
279        throw new FatalError(String.Format("illegal byte order mark: EF {0,2:X} {1,2:X}", ch1, ch2));
280      }
281      buffer = new UTF8Buffer(buffer); col = 0; charPos = -1;
282      NextCh();
283    }
284    pt = tokens = new Token();  // first token is a dummy
285  }
286 
287  void NextCh() {
288    if (oldEols > 0) { ch = EOL; oldEols--; }
289    else {
290      pos = buffer.Pos;
291      // buffer reads unicode chars, if UTF8 has been detected
292      ch = buffer.Read(); col++; charPos++;
293      // replace isolated '\r' by '\n' in order to make
294      // eol handling uniform across Windows, Unix and Mac
295      if (ch == '\r' && buffer.Peek() != '\n') ch = EOL;
296      if (ch == EOL) { line++; col = 0; }
297    }
298-->casing1
299  }
300
301  void AddCh() {
302    if (tlen >= tval.Length) {
303      char[] newBuf = new char[2 * tval.Length];
304      Array.Copy(tval, 0, newBuf, 0, tval.Length);
305      tval = newBuf;
306    }
307    if (ch != Buffer.EOF) {
308-->casing2
309      NextCh();
310    }
311  }
312
313
314-->comments
315
316  void CheckLiteral() {
317-->literals
318  }
319
320  Token NextToken() {
321    while (ch == ' ' ||
322-->scan1
323    ) NextCh();
324-->scan2
325    int recKind = noSym;
326    int recEnd = pos;
327    t = new Token();
328    t.pos = pos; t.col = col; t.line = line; t.charPos = charPos;
329    int state;
330    if (start.ContainsKey(ch)) { state = (int) start[ch]; }
331    else { state = 0; }
332    tlen = 0; AddCh();
333   
334    switch (state) {
335      case -1: { t.kind = eofSym; break; } // NextCh already done
336      case 0: {
337        if (recKind != noSym) {
338          tlen = recEnd - t.pos;
339          SetScannerBehindT();
340        }
341        t.kind = recKind; break;
342      } // NextCh already done
343-->scan3
344    }
345    t.val = new String(tval, 0, tlen);
346    return t;
347  }
348 
349  private void SetScannerBehindT() {
350    buffer.Pos = t.pos;
351    NextCh();
352    line = t.line; col = t.col; charPos = t.charPos;
353    for (int i = 0; i < tlen; i++) NextCh();
354  }
355 
356  // get the next token (possibly a token already seen during peeking)
357  public Token Scan () {
358    if (tokens.next == null) {
359      return NextToken();
360    } else {
361      pt = tokens = tokens.next;
362      return tokens;
363    }
364  }
365
366  // peek for the next token, ignore pragmas
367  public Token Peek () {
368    do {
369      if (pt.next == null) {
370        pt.next = NextToken();
371      }
372      pt = pt.next;
373    } while (pt.kind > maxT); // skip pragmas
374 
375    return pt;
376  }
377
378  // make sure that peeking starts at the current scan position
379  public void ResetPeek () { pt = tokens; }
380
381} // end Scanner
Note: See TracBrowser for help on using the repository browser.