Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

lexer.go @ 17328

Visit:

Last change on this file since 17328 was 16620, checked in by hmaislin, 6 years ago
#2929: Reorganized folder structure for make script, removed explicit marshalling, erased go-side logging
File size: 9.1 KB

Line
1	package symexpr
2
3	import (
4	"fmt"
5	"strings"
6	"unicode"
7	"unicode/utf8"
8	)
9
10	func (l *lexer) run() {
11	for state := lexExpr; state != nil; {
12	state = state(l)
13	}
14	l.items <- item{itemEOF, "eof"}
15	close(l.items) // No more tokens will be delivered.
16	}
17
18	type item struct {
19	typ itemType
20	val string
21	}
22
23	func (i item) String() string {
24	switch i.typ {
25	case itemEOF:
26	return "EOF"
27	case itemError:
28	return i.val
29	}
30	if len(i.val) > 10 {
31	return fmt.Sprintf("%.10q...", i.val)
32	}
33	return fmt.Sprintf("%d-'%s'", i.typ, i.val)
34	}
35
36	type itemType int
37
38	const (
39	itemNil itemType = iota
40	itemError
41	itemEOF // EOF
42
43	itemAdd // +
44	itemMul // *
45	itemDiv // /
46	itemDiv2 // {...}//{...}
47	itemNeg // -
48	itemLParen // (
49	itemRParen // )
50	itemLBrack // {
51	itemRBrack // }
52	itemCarrot // ^
53
54	itemNumber
55	itemIndex
56	itemIdentifier // vars,coeffs,funcs
57
58	itemKeyword // placeholder
59	itemFrac // \frac
60	itemCDot // \cdot (latex multiplication)
61	itemSin
62	itemCos
63	itemTan
64	itemAbs
65	itemSqrt
66	itemExp
67	itemLog
68	)
69
70	func isLeaf(it itemType) bool {
71	switch it {
72	case itemNumber, itemIdentifier:
73	return true
74	}
75	return false
76	}
77	func isUnary(it itemType) bool {
78	// switch it {
79	// case itemNeg:
80	// return true
81	// }
82	return false
83	}
84	func isBinary(it itemType) bool {
85	switch it {
86	case itemAdd, itemNeg, itemMul, itemDiv, itemDiv2, itemCarrot:
87	return true
88	}
89	return false
90	}
91
92	var itemName = map[itemType]string{
93	itemNil: "nil",
94	itemError: "error",
95	itemEOF: "EOF",
96
97	itemAdd: "add",
98	itemMul: "mul",
99	itemDiv: "div",
100	itemDiv2: "div2",
101	itemNeg: "neg",
102	itemLParen: "lParen",
103	itemRParen: "rParen",
104	itemLBrack: "lBrack",
105	itemRBrack: "rBrack",
106	itemCarrot: "carrot",
107
108	itemNumber: "number",
109	itemIndex: "index",
110	itemIdentifier: "identifier",
111
112	// keywords
113	itemCDot: "cdot",
114	itemFrac: "frac",
115	itemSin: "sin",
116	itemCos: "cos",
117	itemTan: "tan",
118	itemAbs: "abs",
119	itemSqrt: "sqrt",
120	itemExp: "exp",
121	itemLog: "ln",
122	}
123
124	func (i itemType) String() string {
125	s := itemName[i]
126	if s == "" {
127	return fmt.Sprintf("item%d", int(i))
128	}
129	return s
130	}
131
132	var key = map[string]itemType{
133	"\\cdot": itemCDot,
134	"\\frac": itemFrac,
135	"sin": itemSin,
136	"Sin": itemSin,
137	"cos": itemCos,
138	"Cos": itemCos,
139	"tan": itemTan,
140	"Tan": itemTan,
141	"abs": itemAbs,
142	"Abs": itemAbs,
143	"sqrt": itemSqrt,
144	"Sqrt": itemSqrt,
145	"e": itemExp,
146	"exp": itemExp,
147	"Exp": itemExp,
148	"ln": itemLog,
149	}
150
151	const eof = -1
152
153	// stateFn represents the state of the scanner as a function that returns the next state.
154	type stateFn func(*lexer) stateFn
155
156	// lexer holds the state of the scanner.
157	type lexer struct {
158	name string // used only for error reports.
159	input string // the string being scanned.
160	vars []string // allowable variables
161	state stateFn // the next lexing function to enter.
162	start int // start position of this item.
163	pos int // current position in the input.
164	width int // width of last rune read from input.
165	items chan item // channel of scanned items.
166	nextToken item
167	}
168
169	// next returns the next rune in the input.
170	func (l *lexer) next() (r rune) {
171	if l.pos >= len(l.input) {
172	l.width = 0
173	return eof
174	}
175	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
176	l.pos += l.width
177	return r
178	}
179
180	// peek returns but does not consume the next rune in the input.
181	func (l *lexer) peek() rune {
182	r := l.next()
183	l.backup()
184	return r
185	}
186
187	// backup steps back one rune. Can only be called once per call of next.
188	func (l *lexer) backup() {
189	l.pos -= l.width
190	}
191
192	// emit passes an item back to the client.
193	func (l *lexer) emit(t itemType) {
194	l.items <- item{t, l.input[l.start:l.pos]}
195	l.start = l.pos
196	}
197
198	// ignore skips over the pending input before this point.
199	func (l *lexer) ignore() {
200	l.start = l.pos
201	}
202
203	// accept consumes the next rune if it's from the valid set.
204	func (l *lexer) accept(valid string) bool {
205	if strings.IndexRune(valid, l.next()) >= 0 {
206	return true
207	}
208	l.backup()
209	return false
210	}
211
212	// acceptRun consumes a run of runes from the valid set.
213	func (l *lexer) acceptRun(valid string) {
214	for strings.IndexRune(valid, l.next()) >= 0 {
215	}
216	l.backup()
217	}
218
219	// lineNumber reports which line we're on. Doing it this way
220	// means we don't have to worry about peek double counting.
221	func (l *lexer) lineNumber() int {
222	return 1 + strings.Count(l.input[:l.pos], "\n")
223	}
224
225	// error returns an error token and terminates the scan by passing
226	// back a nil pointer that will be the next state, terminating l.nextItem.
227	func (l *lexer) errorf(format string, args ...interface{}) stateFn {
228	l.items <- item{itemError, fmt.Sprintf(format, args...)}
229	return nil
230	}
231
232	// nextItem returns the next item from the input.
233	func (l *lexer) nextItem() item {
234	for {
235	select {
236	case item := <-l.items:
237	return item
238	default:
239	l.state = l.state(l)
240	}
241	}
242	panic("not reached")
243	}
244
245	// lex creates a new scanner for the input string.
246	func lex(name, input string, vars []string) *lexer {
247	l := &lexer{
248	name: name,
249	input: input,
250	vars: vars,
251	state: lexExpr,
252	items: make(chan item, 2), // Two items of buffering is sufficient for all state functions
253	}
254	return l
255	}
256
257	/*** State Functions ***/
258
259	// lexExpr is the top level scanner
260	func lexExpr(l *lexer) stateFn {
261	switch r := l.next(); {
262	case r == eof \|\| r == '\n':
263	return nil
264	case isSpace(r):
265	l.ignore()
266	case r == '+':
267	l.emit(itemAdd)
268	return lexExpr
269	case r == '*':
270	l.emit(itemMul)
271	return lexExpr
272	case r == '^':
273	l.emit(itemCarrot)
274	return lexExpr
275	case r == '/':
276	if l.peek() == '/' {
277	l.emit(itemDiv2)
278	} else {
279	l.emit(itemDiv)
280	}
281	return lexExpr
282	case r == '-':
283	l.emit(itemNeg)
284	return lexExpr
285	case r == '(':
286	l.emit(itemLParen)
287	return lexExpr
288	case r == ')':
289	l.emit(itemRParen)
290	return lexExpr
291	case r == '{':
292	l.emit(itemLBrack)
293	return lexExpr
294	case r == '}':
295	l.emit(itemRBrack)
296	return lexExpr
297	case r == '\\':
298	// l.backup()
299	return lexIdentifier
300	case r == '_':
301	l.ignore()
302	return lexIndex
303	case '0' <= r && r <= '9':
304	l.backup()
305	return lexNumber
306	case isAlphaNumeric(r):
307	l.backup()
308	return lexIdentifier
309	default:
310	return l.errorf("unrecognized character in action: %#U", r)
311	}
312	return lexExpr
313	}
314
315	// lexIdentifier scans an alphanumeric or field.
316	func lexIdentifier(l *lexer) stateFn {
317	Loop:
318	for {
319	switch r := l.next(); {
320	case isAlphaNumeric(r):
321	// absorb.
322	case r == '_':
323	// absorb and scanIndex
324	digits := "0123456789"
325	l.acceptRun(digits)
326	l.emit(itemIdentifier)
327	return lexExpr
328	case r == '.' && (l.input[l.start] == '.' \|\| l.input[l.start] == '$'):
329	// field chaining; absorb into one token.
330	default:
331	l.backup()
332	word := l.input[l.start:l.pos]
333	if !l.atTerminator() {
334	return l.errorf("unexpected character %c", r)
335	}
336	switch {
337	case key[word] > itemKeyword:
338	l.emit(key[word])
339	default:
340	l.emit(itemIdentifier)
341	}
342	break Loop
343	}
344	}
345	return lexExpr
346	}
347
348	// atTerminator reports whether the input is at valid termination character to
349	// appear after an identifier. Mostly to catch cases like "$x+2" not being
350	// acceptable without a space, in case we decide one day to implement
351	// arithmetic.
352	func (l *lexer) atTerminator() bool {
353	r := l.peek()
354	if isSpace(r) {
355	return true
356	}
357	switch r {
358	case eof, '+', '-', '(', ')', '{', '}', '/', '*', '^':
359	return true
360	}
361	return false
362	}
363
364	// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
365	// isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
366	// and "089" - but when it's wrong the input is invalid and the parser (via
367	// strconv) will notice.
368	func lexNumber(l *lexer) stateFn {
369	if !l.scanNumber() {
370	return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
371	} else {
372	l.emit(itemNumber)
373	}
374	return lexExpr
375	}
376
377	func lexIndex(l *lexer) stateFn {
378	if !l.scanIndex() {
379	return l.errorf("bad index syntax: %q", l.input[l.start:l.pos])
380	} else {
381	l.emit(itemIndex)
382	}
383	return lexExpr
384	}
385
386	func (l *lexer) scanIndex() bool {
387	// Optional leading sign.
388	digits := "0123456789"
389	l.acceptRun(digits)
390
391	// Next thing mustn't be alphanumeric.
392	// if isAlphaNumeric(l.peek()) {
393	// l.next()
394	// return false
395	// }
396	return true
397	}
398
399	func (l *lexer) scanNumber() bool {
400	// Optional leading sign.
401	l.accept("+-")
402	digits := "0123456789"
403	l.acceptRun(digits)
404	if l.accept(".") {
405	l.acceptRun(digits)
406	}
407	if l.accept("eE") {
408	l.accept("+-")
409	l.acceptRun("0123456789")
410	}
411	// Is it imaginary?
412	l.accept("i")
413	// Next thing mustn't be alphanumeric.
414	// if isAlphaNumeric(l.peek()) {
415	// l.next()
416	// return false
417	// }
418	return true
419	}
420
421	// isSpace reports whether r is a space character.
422	func isSpace(r rune) bool {
423	switch r {
424	case ' ', '\t', '\n', '\r':
425	return true
426	}
427	return false
428	}
429
430	// isAlphaNumeric reports whether r is an alphabetic or digit
431	func isAlphaNumeric(r rune) bool {
432	return unicode.IsLetter(r) \|\| unicode.IsDigit(r)
433	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format