Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

lexer.go @ 16620

Visit:

Last change on this file since 16620 was 16620, checked in by hmaislin, 5 years ago
#2929: Reorganized folder structure for make script, removed explicit marshalling, erased go-side logging
File size: 9.1 KB

Rev	Line
[16620]	1	package symexpr
	2
	3	import (
	4	"fmt"
	5	"strings"
	6	"unicode"
	7	"unicode/utf8"
	8	)
	9
	10	func (l *lexer) run() {
	11	for state := lexExpr; state != nil; {
	12	state = state(l)
	13	}
	14	l.items <- item{itemEOF, "eof"}
	15	close(l.items) // No more tokens will be delivered.
	16	}
	17
	18	type item struct {
	19	typ itemType
	20	val string
	21	}
	22
	23	func (i item) String() string {
	24	switch i.typ {
	25	case itemEOF:
	26	return "EOF"
	27	case itemError:
	28	return i.val
	29	}
	30	if len(i.val) > 10 {
	31	return fmt.Sprintf("%.10q...", i.val)
	32	}
	33	return fmt.Sprintf("%d-'%s'", i.typ, i.val)
	34	}
	35
	36	type itemType int
	37
	38	const (
	39	itemNil itemType = iota
	40	itemError
	41	itemEOF // EOF
	42
	43	itemAdd // +
	44	itemMul // *
	45	itemDiv // /
	46	itemDiv2 // {...}//{...}
	47	itemNeg // -
	48	itemLParen // (
	49	itemRParen // )
	50	itemLBrack // {
	51	itemRBrack // }
	52	itemCarrot // ^
	53
	54	itemNumber
	55	itemIndex
	56	itemIdentifier // vars,coeffs,funcs
	57
	58	itemKeyword // placeholder
	59	itemFrac // \frac
	60	itemCDot // \cdot (latex multiplication)
	61	itemSin
	62	itemCos
	63	itemTan
	64	itemAbs
	65	itemSqrt
	66	itemExp
	67	itemLog
	68	)
	69
	70	func isLeaf(it itemType) bool {
	71	switch it {
	72	case itemNumber, itemIdentifier:
	73	return true
	74	}
	75	return false
	76	}
	77	func isUnary(it itemType) bool {
	78	// switch it {
	79	// case itemNeg:
	80	// return true
	81	// }
	82	return false
	83	}
	84	func isBinary(it itemType) bool {
	85	switch it {
	86	case itemAdd, itemNeg, itemMul, itemDiv, itemDiv2, itemCarrot:
	87	return true
	88	}
	89	return false
	90	}
	91
	92	var itemName = map[itemType]string{
	93	itemNil: "nil",
	94	itemError: "error",
	95	itemEOF: "EOF",
	96
	97	itemAdd: "add",
	98	itemMul: "mul",
	99	itemDiv: "div",
	100	itemDiv2: "div2",
	101	itemNeg: "neg",
	102	itemLParen: "lParen",
	103	itemRParen: "rParen",
	104	itemLBrack: "lBrack",
	105	itemRBrack: "rBrack",
	106	itemCarrot: "carrot",
	107
	108	itemNumber: "number",
	109	itemIndex: "index",
	110	itemIdentifier: "identifier",
	111
	112	// keywords
	113	itemCDot: "cdot",
	114	itemFrac: "frac",
	115	itemSin: "sin",
	116	itemCos: "cos",
	117	itemTan: "tan",
	118	itemAbs: "abs",
	119	itemSqrt: "sqrt",
	120	itemExp: "exp",
	121	itemLog: "ln",
	122	}
	123
	124	func (i itemType) String() string {
	125	s := itemName[i]
	126	if s == "" {
	127	return fmt.Sprintf("item%d", int(i))
	128	}
	129	return s
	130	}
	131
	132	var key = map[string]itemType{
	133	"\\cdot": itemCDot,
	134	"\\frac": itemFrac,
	135	"sin": itemSin,
	136	"Sin": itemSin,
	137	"cos": itemCos,
	138	"Cos": itemCos,
	139	"tan": itemTan,
	140	"Tan": itemTan,
	141	"abs": itemAbs,
	142	"Abs": itemAbs,
	143	"sqrt": itemSqrt,
	144	"Sqrt": itemSqrt,
	145	"e": itemExp,
	146	"exp": itemExp,
	147	"Exp": itemExp,
	148	"ln": itemLog,
	149	}
	150
	151	const eof = -1
	152
	153	// stateFn represents the state of the scanner as a function that returns the next state.
	154	type stateFn func(*lexer) stateFn
	155
	156	// lexer holds the state of the scanner.
	157	type lexer struct {
	158	name string // used only for error reports.
	159	input string // the string being scanned.
	160	vars []string // allowable variables
	161	state stateFn // the next lexing function to enter.
	162	start int // start position of this item.
	163	pos int // current position in the input.
	164	width int // width of last rune read from input.
	165	items chan item // channel of scanned items.
	166	nextToken item
	167	}
	168
	169	// next returns the next rune in the input.
	170	func (l *lexer) next() (r rune) {
	171	if l.pos >= len(l.input) {
	172	l.width = 0
	173	return eof
	174	}
	175	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
	176	l.pos += l.width
	177	return r
	178	}
	179
	180	// peek returns but does not consume the next rune in the input.
	181	func (l *lexer) peek() rune {
	182	r := l.next()
	183	l.backup()
	184	return r
	185	}
	186
	187	// backup steps back one rune. Can only be called once per call of next.
	188	func (l *lexer) backup() {
	189	l.pos -= l.width
	190	}
	191
	192	// emit passes an item back to the client.
	193	func (l *lexer) emit(t itemType) {
	194	l.items <- item{t, l.input[l.start:l.pos]}
	195	l.start = l.pos
	196	}
	197
	198	// ignore skips over the pending input before this point.
	199	func (l *lexer) ignore() {
	200	l.start = l.pos
	201	}
	202
	203	// accept consumes the next rune if it's from the valid set.
	204	func (l *lexer) accept(valid string) bool {
	205	if strings.IndexRune(valid, l.next()) >= 0 {
	206	return true
	207	}
	208	l.backup()
	209	return false
	210	}
	211
	212	// acceptRun consumes a run of runes from the valid set.
	213	func (l *lexer) acceptRun(valid string) {
	214	for strings.IndexRune(valid, l.next()) >= 0 {
	215	}
	216	l.backup()
	217	}
	218
	219	// lineNumber reports which line we're on. Doing it this way
	220	// means we don't have to worry about peek double counting.
	221	func (l *lexer) lineNumber() int {
	222	return 1 + strings.Count(l.input[:l.pos], "\n")
	223	}
	224
	225	// error returns an error token and terminates the scan by passing
	226	// back a nil pointer that will be the next state, terminating l.nextItem.
	227	func (l *lexer) errorf(format string, args ...interface{}) stateFn {
	228	l.items <- item{itemError, fmt.Sprintf(format, args...)}
	229	return nil
	230	}
	231
	232	// nextItem returns the next item from the input.
	233	func (l *lexer) nextItem() item {
	234	for {
	235	select {
	236	case item := <-l.items:
	237	return item
	238	default:
	239	l.state = l.state(l)
	240	}
	241	}
	242	panic("not reached")
	243	}
	244
	245	// lex creates a new scanner for the input string.
	246	func lex(name, input string, vars []string) *lexer {
	247	l := &lexer{
	248	name: name,
	249	input: input,
	250	vars: vars,
	251	state: lexExpr,
	252	items: make(chan item, 2), // Two items of buffering is sufficient for all state functions
	253	}
	254	return l
	255	}
	256
	257	/*** State Functions ***/
	258
	259	// lexExpr is the top level scanner
	260	func lexExpr(l *lexer) stateFn {
	261	switch r := l.next(); {
	262	case r == eof \|\| r == '\n':
	263	return nil
	264	case isSpace(r):
	265	l.ignore()
	266	case r == '+':
	267	l.emit(itemAdd)
	268	return lexExpr
	269	case r == '*':
	270	l.emit(itemMul)
	271	return lexExpr
	272	case r == '^':
	273	l.emit(itemCarrot)
	274	return lexExpr
	275	case r == '/':
	276	if l.peek() == '/' {
	277	l.emit(itemDiv2)
	278	} else {
	279	l.emit(itemDiv)
	280	}
	281	return lexExpr
	282	case r == '-':
	283	l.emit(itemNeg)
	284	return lexExpr
	285	case r == '(':
	286	l.emit(itemLParen)
	287	return lexExpr
	288	case r == ')':
	289	l.emit(itemRParen)
	290	return lexExpr
	291	case r == '{':
	292	l.emit(itemLBrack)
	293	return lexExpr
	294	case r == '}':
	295	l.emit(itemRBrack)
	296	return lexExpr
	297	case r == '\\':
	298	// l.backup()
	299	return lexIdentifier
	300	case r == '_':
	301	l.ignore()
	302	return lexIndex
	303	case '0' <= r && r <= '9':
	304	l.backup()
	305	return lexNumber
	306	case isAlphaNumeric(r):
	307	l.backup()
	308	return lexIdentifier
	309	default:
	310	return l.errorf("unrecognized character in action: %#U", r)
	311	}
	312	return lexExpr
	313	}
	314
	315	// lexIdentifier scans an alphanumeric or field.
	316	func lexIdentifier(l *lexer) stateFn {
	317	Loop:
	318	for {
	319	switch r := l.next(); {
	320	case isAlphaNumeric(r):
	321	// absorb.
	322	case r == '_':
	323	// absorb and scanIndex
	324	digits := "0123456789"
	325	l.acceptRun(digits)
	326	l.emit(itemIdentifier)
	327	return lexExpr
	328	case r == '.' && (l.input[l.start] == '.' \|\| l.input[l.start] == '$'):
	329	// field chaining; absorb into one token.
	330	default:
	331	l.backup()
	332	word := l.input[l.start:l.pos]
	333	if !l.atTerminator() {
	334	return l.errorf("unexpected character %c", r)
	335	}
	336	switch {
	337	case key[word] > itemKeyword:
	338	l.emit(key[word])
	339	default:
	340	l.emit(itemIdentifier)
	341	}
	342	break Loop
	343	}
	344	}
	345	return lexExpr
	346	}
	347
	348	// atTerminator reports whether the input is at valid termination character to
	349	// appear after an identifier. Mostly to catch cases like "$x+2" not being
	350	// acceptable without a space, in case we decide one day to implement
	351	// arithmetic.
	352	func (l *lexer) atTerminator() bool {
	353	r := l.peek()
	354	if isSpace(r) {
	355	return true
	356	}
	357	switch r {
	358	case eof, '+', '-', '(', ')', '{', '}', '/', '*', '^':
	359	return true
	360	}
	361	return false
	362	}
	363
	364	// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
	365	// isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
	366	// and "089" - but when it's wrong the input is invalid and the parser (via
	367	// strconv) will notice.
	368	func lexNumber(l *lexer) stateFn {
	369	if !l.scanNumber() {
	370	return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
	371	} else {
	372	l.emit(itemNumber)
	373	}
	374	return lexExpr
	375	}
	376
	377	func lexIndex(l *lexer) stateFn {
	378	if !l.scanIndex() {
	379	return l.errorf("bad index syntax: %q", l.input[l.start:l.pos])
	380	} else {
	381	l.emit(itemIndex)
	382	}
	383	return lexExpr
	384	}
	385
	386	func (l *lexer) scanIndex() bool {
	387	// Optional leading sign.
	388	digits := "0123456789"
	389	l.acceptRun(digits)
	390
	391	// Next thing mustn't be alphanumeric.
	392	// if isAlphaNumeric(l.peek()) {
	393	// l.next()
	394	// return false
	395	// }
	396	return true
	397	}
	398
	399	func (l *lexer) scanNumber() bool {
	400	// Optional leading sign.
	401	l.accept("+-")
	402	digits := "0123456789"
	403	l.acceptRun(digits)
	404	if l.accept(".") {
	405	l.acceptRun(digits)
	406	}
	407	if l.accept("eE") {
	408	l.accept("+-")
	409	l.acceptRun("0123456789")
	410	}
	411	// Is it imaginary?
	412	l.accept("i")
	413	// Next thing mustn't be alphanumeric.
	414	// if isAlphaNumeric(l.peek()) {
	415	// l.next()
	416	// return false
	417	// }
	418	return true
	419	}
	420
	421	// isSpace reports whether r is a space character.
	422	func isSpace(r rune) bool {
	423	switch r {
	424	case ' ', '\t', '\n', '\r':
	425	return true
	426	}
	427	return false
	428	}
	429
	430	// isAlphaNumeric reports whether r is an alphabetic or digit
	431	func isAlphaNumeric(r rune) bool {
	432	return unicode.IsLetter(r) \|\| unicode.IsDigit(r)
	433	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format