// GPDefSyntaxAnalyzerLex.cs HDO, 2006-08-28 // ------------- // Lexical analyzer (finite-state machine interpreter). // Generated by Coco-2 (SG). //=====================================|======================================== #undef TEST_Lex using System; using System.Collections; using System.Collections.Specialized; using System.IO; using System.Text; public class GPDefSyntaxAnalyzerLex { public const String MODULENAME = "GPDefSyntaxAnalyzerLex"; public const int EOFSPIX = 1; public static TextReader src; // --- token information --- public static int token; // current token public static int tokenLine, tokenCol; // position of current token public static String tokenStr; // token string recognized // --- current char. info, for "power users" only --- public static char curCh; // current input character public static int curLine, curCol; // position of curCh public static void GPDefSyntaxAnalyzerLexMethod(Utils.ModuleAction action, out String moduleName) { //-----------------------------------|---------------------------------------- moduleName = MODULENAME; switch (action) { case Utils.ModuleAction.getModuleName: return; case Utils.ModuleAction.initModule: caseSensitive = true; lt = new LexicalTable(); tokenStrArr = new char[256]; kwHt = CreateHashtable(); nHt = CreateHashtable(); nl = new ArrayList(); break; case Utils.ModuleAction.resetModule: kwHt.Clear(); nHt.Clear(); nl.Clear(); break; case Utils.ModuleAction.cleanupModule: lt = null; tokenStrArr = null; kwHt = null; nHt = null; nl = null; break; } // switch } // GPDefSyntaxAnalyzerLexMethod private static Hashtable CreateHashtable() { if (caseSensitive) return new Hashtable(); else return CollectionsUtil.CreateCaseInsensitiveHashtable(); } // CreateHashtable public static void InitLex() { //-----------------------------------|---------------------------------------- // --- initialize keyword hash table --- kwHt.Clear(); EnterKeyword( 1, "PROBLEM"); EnterKeyword( 2, "END"); EnterKeyword( 3, "EPS"); EnterKeyword( 4, "LOCAL"); EnterKeyword( 5, "NONTERMINALS"); EnterKeyword( 6, "RULES"); EnterKeyword( 7, "SEM"); EnterKeyword( 8, "MAXIMIZE"); EnterKeyword( 9, "MINIMIZE"); EnterKeyword( 10, "TERMINALS"); EnterKeyword( 11, "CONSTRAINTS"); EnterKeyword( 12, "INIT"); EnterKeyword( 13, "CODE"); EnterKeyword( 14, "IN"); EnterKeyword( 15, "SET"); EnterKeyword( 16, "RANGE"); // --- initialize name data structures --- nHt.Clear(); nl.Clear(); nl.Add(""); // so spix = 0 is the empty string nl.Add("!EOF!"); // so EOFSPIX = 1 // --- (re)set global data --- curLineStr = ""; curCh = ' '; curLine = 0; curCol = 2; tokenStr = null; token = 0; tokenCol = 0; tokenLine = 0; tokenStrLen = 0; pendingEOLs = 0; apxLen = pendingEOLs; // to prevent warning } // InitLex private static void EnterKeyword(int token, String keyword) { kwHt.Add(keyword, token); } // EnterKeyword // *** start of global LEX declarations from ATG *** // *** end of global LEX declarations from ATG *** public static bool caseSensitive; private class LexicalTable { public int header = 4; public int[] startTab = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 2, 10, 0, 0, 16, 16, 16, 16, 16, 13, 16, 13, 16, 13, 13, 16, 16, 16, 16, 16, 13, 16, 16, 16, 16, 13, 13, 16, 13, 16, 6, 0, 7, 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 8, 3, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; public Sets.Set256 ignoredChars = new Sets.Set256( 0x0600, 0x0000, 0x0001 ); public Sets.Set256 commentStart = new Sets.Set256( 0x0000, 0x0000, 0x8000 ); public Sets.Set256[] cls = { new Sets.Set256( 0x0000, 0x0000, 0x0000, 0x0000, 0xfffe, 0x07ff, 0xfffe, 0x07ff), new Sets.Set256( 0x0000, 0x0000, 0x0000, 0x03ff), new Sets.Set256( 0x0000, 0x0000, 0x0000, 0x0000, 0xf2be, 0x053d), new Sets.Set256( 0x0000, 0x0000, 0x0000, 0x0000, 0x0d40, 0x02c2, 0xfffe, 0x07ff) }; } // LexicalTable private static LexicalTable lt; private static Hashtable kwHt; // hash table for keywords: string -> token private static Hashtable nHt; // hash table for names: string -> null private static ArrayList nl; // name list for names, index is spix private static String curLineStr; // current source line private static char[] tokenStrArr; // token string in an array private static int tokenStrLen; // length of token string in tokenStrArr private static int state; // current automaton state private static int apxLen; // length of appendix in token string private static int pendingEOLs; // nr of EOLs found in comment // --- to save and restore scanner state --- private static char savedCh; private static int savedCol, savedLine; private static void SaveScannerState() { savedCh = curCh; savedCol = curCol; savedLine = curLine; } // SaveScannerState private static void RestoreScannerState() { tokenStrLen -= apxLen; apxLen = 0; curCh = savedCh; curCol = savedCol; curLine = savedLine; } // RestoreScannerState public static void NextCh() { //-----------------------------------|---------------------------------------- for (;;) { if (curCol < curLineStr.Length) { // within line curCol++; curCh = curLineStr[curCol - 1]; return; } else if (curCol == curLineStr.Length) { // end of line curCol++; curCh = Utils.LF; // to separate lines return; } else { // curCol > curLineStr.Length curLineStr = src.ReadLine(); curLine++; curCol = 0; if (curLineStr == null) { // end of file curLineStr = ""; curCh = Utils.EF; return; } // if } // else } // for } // NextCh private static void CommentErr() { Errors.LexError(curLine, curCol, "end of file in comment"); } // CommentErr private static bool Comment() { int level = 1; SaveScannerState(); // --- from '/' '*' to '*' '/' nested --- NextCh(); if (curCh == '*') { NextCh(); for (;;) switch (curCh) { case '*': NextCh(); if (curCh == '/') { NextCh(); level--; if (level == 0) { return true; } // if } // if break; case '/': NextCh(); if (curCh == '*') { level++; NextCh(); } break; case Utils.EF: CommentErr(); return true; default: NextCh(); break; } // switch } else RestoreScannerState(); return false; } // Comment private static void HandleLexErr() { Errors.LexError(curLine, curCol, "invalid character '{0}' (hex {1:X})", curCh, (int)curCh); if (apxLen > 0) RestoreScannerState(); } // HandleLexErr public static void GetToken() { //-----------------------------------|---------------------------------------- token = -1; tokenStr = null; do { // --- skip ignored chars and comments --- for (;;) { while (Sets.member(curCh, lt.ignoredChars)) NextCh(); if (!Sets.member(curCh, lt.commentStart)) break; if (!Comment()) break; } // for // --- scan for next token --- tokenLine = curLine; tokenCol = curCol; tokenStrLen = 0; apxLen = 0; state = (curCh == Utils.EF) ? 1 : lt.startTab[curCh]; for (;;) { tokenStrArr[tokenStrLen++]= curCh; NextCh(); switch (state) { case 0: Errors.LexError(tokenLine, tokenCol, "invalid token start"); break; case 1: token = 0; tokenStrLen = 0; break; // EOF recognized case 2: token = 17; break; // '=' recognized case 3: token = 18; break; // '|' recognized case 4: token = 20; break; // '(' recognized case 5: token = 21; break; // ')' recognized case 6: token = 22; break; // '[' recognized case 7: token = 23; break; // ']' recognized case 8: token = 24; break; // '{' recognized case 9: token = 25; break; // '}' recognized case 10: if (curCh == '>') { state = 11; continue; } // if else { HandleLexErr(); break; } // else case 11: token = 26; break; // '>>' recognized case 12: token = 27; break; // '..' recognized case 13: if (Sets.member(curCh, lt.cls[0])) { state = 13; continue; } // if else if (Sets.member(curCh, lt.cls[1])) { state = 13; continue; } // if else { token = KeywordCheck(); if (token >= 0) break; // keyword recognized token = 28; break; // ident recognized } // else case 14: if (curCh == '<') { state = 15; continue; } // if else { HandleLexErr(); break; } // else case 15: token = 29; break; // source recognized case 16: if (Sets.member(curCh, lt.cls[1])) { state = 13; continue; } // if else if (Sets.member(curCh, lt.cls[2])) { state = 16; continue; } // if else if (Sets.member(curCh, lt.cls[3])) { state = 13; continue; } // if else { token = KeywordCheck(); if (token >= 0) break; // keyword recognized token = 28; break; // ident recognized } // else case 17: if (curCh == '.') { state = 12; continue; } // if else { token = 19; break; // '.' recognized } // else } // switch break; } // for } while (token < 0); if (tokenStr == null) tokenStr = new String(tokenStrArr, 0, tokenStrLen); } // GetToken private static int KeywordCheck() { tokenStr = new String(tokenStrArr, 0, tokenStrLen - apxLen); Object token = kwHt[tokenStr]; return (token == null) ? -1 : (int)token; } // KeywordCheck public static int Hash(String s) { //-----------------------------------|---------------------------------------- Object spix = nHt[s]; if (spix == null) { if (caseSensitive) nl.Add(s); else nl.Add(s.ToUpper()); spix = nl.Count - 1; nHt[s] = spix; } // if return (int)spix; } // Hash public static String HashedStr(int spix) { //-----------------------------------|---------------------------------------- return (String)nl[spix]; } // HashedStr public static void GETidentAttr() { } // GETidentAttr public static void GETsourceAttr() { } // GETsourceAttr } // GPDefSyntaxAnalyzerLex // End of GPDefSyntaxAnalyzerLex.cs //=====================================|========================================