1 | package symexpr |
---|
2 | |
---|
3 | import ( |
---|
4 | "fmt" |
---|
5 | "strings" |
---|
6 | "unicode" |
---|
7 | "unicode/utf8" |
---|
8 | ) |
---|
9 | |
---|
10 | func (l *lexer) run() { |
---|
11 | for state := lexExpr; state != nil; { |
---|
12 | state = state(l) |
---|
13 | } |
---|
14 | l.items <- item{itemEOF, "eof"} |
---|
15 | close(l.items) // No more tokens will be delivered. |
---|
16 | } |
---|
17 | |
---|
18 | type item struct { |
---|
19 | typ itemType |
---|
20 | val string |
---|
21 | } |
---|
22 | |
---|
23 | func (i item) String() string { |
---|
24 | switch i.typ { |
---|
25 | case itemEOF: |
---|
26 | return "EOF" |
---|
27 | case itemError: |
---|
28 | return i.val |
---|
29 | } |
---|
30 | if len(i.val) > 10 { |
---|
31 | return fmt.Sprintf("%.10q...", i.val) |
---|
32 | } |
---|
33 | return fmt.Sprintf("%d-'%s'", i.typ, i.val) |
---|
34 | } |
---|
35 | |
---|
36 | type itemType int |
---|
37 | |
---|
38 | const ( |
---|
39 | itemNil itemType = iota |
---|
40 | itemError |
---|
41 | itemEOF // EOF |
---|
42 | |
---|
43 | itemAdd // + |
---|
44 | itemMul // * |
---|
45 | itemDiv // / |
---|
46 | itemDiv2 // {...}//{...} |
---|
47 | itemNeg // - |
---|
48 | itemLParen // ( |
---|
49 | itemRParen // ) |
---|
50 | itemLBrack // { |
---|
51 | itemRBrack // } |
---|
52 | itemCarrot // ^ |
---|
53 | |
---|
54 | itemNumber |
---|
55 | itemIndex |
---|
56 | itemIdentifier // vars,coeffs,funcs |
---|
57 | |
---|
58 | itemKeyword // placeholder |
---|
59 | itemFrac // \frac |
---|
60 | itemCDot // \cdot (latex multiplication) |
---|
61 | itemSin |
---|
62 | itemCos |
---|
63 | itemTan |
---|
64 | itemAbs |
---|
65 | itemSqrt |
---|
66 | itemExp |
---|
67 | itemLog |
---|
68 | ) |
---|
69 | |
---|
70 | func isLeaf(it itemType) bool { |
---|
71 | switch it { |
---|
72 | case itemNumber, itemIdentifier: |
---|
73 | return true |
---|
74 | } |
---|
75 | return false |
---|
76 | } |
---|
77 | func isUnary(it itemType) bool { |
---|
78 | // switch it { |
---|
79 | // case itemNeg: |
---|
80 | // return true |
---|
81 | // } |
---|
82 | return false |
---|
83 | } |
---|
84 | func isBinary(it itemType) bool { |
---|
85 | switch it { |
---|
86 | case itemAdd, itemNeg, itemMul, itemDiv, itemDiv2, itemCarrot: |
---|
87 | return true |
---|
88 | } |
---|
89 | return false |
---|
90 | } |
---|
91 | |
---|
92 | var itemName = map[itemType]string{ |
---|
93 | itemNil: "nil", |
---|
94 | itemError: "error", |
---|
95 | itemEOF: "EOF", |
---|
96 | |
---|
97 | itemAdd: "add", |
---|
98 | itemMul: "mul", |
---|
99 | itemDiv: "div", |
---|
100 | itemDiv2: "div2", |
---|
101 | itemNeg: "neg", |
---|
102 | itemLParen: "lParen", |
---|
103 | itemRParen: "rParen", |
---|
104 | itemLBrack: "lBrack", |
---|
105 | itemRBrack: "rBrack", |
---|
106 | itemCarrot: "carrot", |
---|
107 | |
---|
108 | itemNumber: "number", |
---|
109 | itemIndex: "index", |
---|
110 | itemIdentifier: "identifier", |
---|
111 | |
---|
112 | // keywords |
---|
113 | itemCDot: "cdot", |
---|
114 | itemFrac: "frac", |
---|
115 | itemSin: "sin", |
---|
116 | itemCos: "cos", |
---|
117 | itemTan: "tan", |
---|
118 | itemAbs: "abs", |
---|
119 | itemSqrt: "sqrt", |
---|
120 | itemExp: "exp", |
---|
121 | itemLog: "ln", |
---|
122 | } |
---|
123 | |
---|
124 | func (i itemType) String() string { |
---|
125 | s := itemName[i] |
---|
126 | if s == "" { |
---|
127 | return fmt.Sprintf("item%d", int(i)) |
---|
128 | } |
---|
129 | return s |
---|
130 | } |
---|
131 | |
---|
132 | var key = map[string]itemType{ |
---|
133 | "\\cdot": itemCDot, |
---|
134 | "\\frac": itemFrac, |
---|
135 | "sin": itemSin, |
---|
136 | "Sin": itemSin, |
---|
137 | "cos": itemCos, |
---|
138 | "Cos": itemCos, |
---|
139 | "tan": itemTan, |
---|
140 | "Tan": itemTan, |
---|
141 | "abs": itemAbs, |
---|
142 | "Abs": itemAbs, |
---|
143 | "sqrt": itemSqrt, |
---|
144 | "Sqrt": itemSqrt, |
---|
145 | "e": itemExp, |
---|
146 | "exp": itemExp, |
---|
147 | "Exp": itemExp, |
---|
148 | "ln": itemLog, |
---|
149 | } |
---|
150 | |
---|
151 | const eof = -1 |
---|
152 | |
---|
153 | // stateFn represents the state of the scanner as a function that returns the next state. |
---|
154 | type stateFn func(*lexer) stateFn |
---|
155 | |
---|
156 | // lexer holds the state of the scanner. |
---|
157 | type lexer struct { |
---|
158 | name string // used only for error reports. |
---|
159 | input string // the string being scanned. |
---|
160 | vars []string // allowable variables |
---|
161 | state stateFn // the next lexing function to enter. |
---|
162 | start int // start position of this item. |
---|
163 | pos int // current position in the input. |
---|
164 | width int // width of last rune read from input. |
---|
165 | items chan item // channel of scanned items. |
---|
166 | nextToken item |
---|
167 | } |
---|
168 | |
---|
169 | // next returns the next rune in the input. |
---|
170 | func (l *lexer) next() (r rune) { |
---|
171 | if l.pos >= len(l.input) { |
---|
172 | l.width = 0 |
---|
173 | return eof |
---|
174 | } |
---|
175 | r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) |
---|
176 | l.pos += l.width |
---|
177 | return r |
---|
178 | } |
---|
179 | |
---|
180 | // peek returns but does not consume the next rune in the input. |
---|
181 | func (l *lexer) peek() rune { |
---|
182 | r := l.next() |
---|
183 | l.backup() |
---|
184 | return r |
---|
185 | } |
---|
186 | |
---|
187 | // backup steps back one rune. Can only be called once per call of next. |
---|
188 | func (l *lexer) backup() { |
---|
189 | l.pos -= l.width |
---|
190 | } |
---|
191 | |
---|
192 | // emit passes an item back to the client. |
---|
193 | func (l *lexer) emit(t itemType) { |
---|
194 | l.items <- item{t, l.input[l.start:l.pos]} |
---|
195 | l.start = l.pos |
---|
196 | } |
---|
197 | |
---|
198 | // ignore skips over the pending input before this point. |
---|
199 | func (l *lexer) ignore() { |
---|
200 | l.start = l.pos |
---|
201 | } |
---|
202 | |
---|
203 | // accept consumes the next rune if it's from the valid set. |
---|
204 | func (l *lexer) accept(valid string) bool { |
---|
205 | if strings.IndexRune(valid, l.next()) >= 0 { |
---|
206 | return true |
---|
207 | } |
---|
208 | l.backup() |
---|
209 | return false |
---|
210 | } |
---|
211 | |
---|
212 | // acceptRun consumes a run of runes from the valid set. |
---|
213 | func (l *lexer) acceptRun(valid string) { |
---|
214 | for strings.IndexRune(valid, l.next()) >= 0 { |
---|
215 | } |
---|
216 | l.backup() |
---|
217 | } |
---|
218 | |
---|
219 | // lineNumber reports which line we're on. Doing it this way |
---|
220 | // means we don't have to worry about peek double counting. |
---|
221 | func (l *lexer) lineNumber() int { |
---|
222 | return 1 + strings.Count(l.input[:l.pos], "\n") |
---|
223 | } |
---|
224 | |
---|
225 | // error returns an error token and terminates the scan by passing |
---|
226 | // back a nil pointer that will be the next state, terminating l.nextItem. |
---|
227 | func (l *lexer) errorf(format string, args ...interface{}) stateFn { |
---|
228 | l.items <- item{itemError, fmt.Sprintf(format, args...)} |
---|
229 | return nil |
---|
230 | } |
---|
231 | |
---|
232 | // nextItem returns the next item from the input. |
---|
233 | func (l *lexer) nextItem() item { |
---|
234 | for { |
---|
235 | select { |
---|
236 | case item := <-l.items: |
---|
237 | return item |
---|
238 | default: |
---|
239 | l.state = l.state(l) |
---|
240 | } |
---|
241 | } |
---|
242 | panic("not reached") |
---|
243 | } |
---|
244 | |
---|
245 | // lex creates a new scanner for the input string. |
---|
246 | func lex(name, input string, vars []string) *lexer { |
---|
247 | l := &lexer{ |
---|
248 | name: name, |
---|
249 | input: input, |
---|
250 | vars: vars, |
---|
251 | state: lexExpr, |
---|
252 | items: make(chan item, 2), // Two items of buffering is sufficient for all state functions |
---|
253 | } |
---|
254 | return l |
---|
255 | } |
---|
256 | |
---|
257 | /***** State Functions *****/ |
---|
258 | |
---|
259 | // lexExpr is the top level scanner |
---|
260 | func lexExpr(l *lexer) stateFn { |
---|
261 | switch r := l.next(); { |
---|
262 | case r == eof || r == '\n': |
---|
263 | return nil |
---|
264 | case isSpace(r): |
---|
265 | l.ignore() |
---|
266 | case r == '+': |
---|
267 | l.emit(itemAdd) |
---|
268 | return lexExpr |
---|
269 | case r == '*': |
---|
270 | l.emit(itemMul) |
---|
271 | return lexExpr |
---|
272 | case r == '^': |
---|
273 | l.emit(itemCarrot) |
---|
274 | return lexExpr |
---|
275 | case r == '/': |
---|
276 | if l.peek() == '/' { |
---|
277 | l.emit(itemDiv2) |
---|
278 | } else { |
---|
279 | l.emit(itemDiv) |
---|
280 | } |
---|
281 | return lexExpr |
---|
282 | case r == '-': |
---|
283 | l.emit(itemNeg) |
---|
284 | return lexExpr |
---|
285 | case r == '(': |
---|
286 | l.emit(itemLParen) |
---|
287 | return lexExpr |
---|
288 | case r == ')': |
---|
289 | l.emit(itemRParen) |
---|
290 | return lexExpr |
---|
291 | case r == '{': |
---|
292 | l.emit(itemLBrack) |
---|
293 | return lexExpr |
---|
294 | case r == '}': |
---|
295 | l.emit(itemRBrack) |
---|
296 | return lexExpr |
---|
297 | case r == '\\': |
---|
298 | // l.backup() |
---|
299 | return lexIdentifier |
---|
300 | case r == '_': |
---|
301 | l.ignore() |
---|
302 | return lexIndex |
---|
303 | case '0' <= r && r <= '9': |
---|
304 | l.backup() |
---|
305 | return lexNumber |
---|
306 | case isAlphaNumeric(r): |
---|
307 | l.backup() |
---|
308 | return lexIdentifier |
---|
309 | default: |
---|
310 | return l.errorf("unrecognized character in action: %#U", r) |
---|
311 | } |
---|
312 | return lexExpr |
---|
313 | } |
---|
314 | |
---|
315 | // lexIdentifier scans an alphanumeric or field. |
---|
316 | func lexIdentifier(l *lexer) stateFn { |
---|
317 | Loop: |
---|
318 | for { |
---|
319 | switch r := l.next(); { |
---|
320 | case isAlphaNumeric(r): |
---|
321 | // absorb. |
---|
322 | case r == '_': |
---|
323 | // absorb and scanIndex |
---|
324 | digits := "0123456789" |
---|
325 | l.acceptRun(digits) |
---|
326 | l.emit(itemIdentifier) |
---|
327 | return lexExpr |
---|
328 | case r == '.' && (l.input[l.start] == '.' || l.input[l.start] == '$'): |
---|
329 | // field chaining; absorb into one token. |
---|
330 | default: |
---|
331 | l.backup() |
---|
332 | word := l.input[l.start:l.pos] |
---|
333 | if !l.atTerminator() { |
---|
334 | return l.errorf("unexpected character %c", r) |
---|
335 | } |
---|
336 | switch { |
---|
337 | case key[word] > itemKeyword: |
---|
338 | l.emit(key[word]) |
---|
339 | default: |
---|
340 | l.emit(itemIdentifier) |
---|
341 | } |
---|
342 | break Loop |
---|
343 | } |
---|
344 | } |
---|
345 | return lexExpr |
---|
346 | } |
---|
347 | |
---|
348 | // atTerminator reports whether the input is at valid termination character to |
---|
349 | // appear after an identifier. Mostly to catch cases like "$x+2" not being |
---|
350 | // acceptable without a space, in case we decide one day to implement |
---|
351 | // arithmetic. |
---|
352 | func (l *lexer) atTerminator() bool { |
---|
353 | r := l.peek() |
---|
354 | if isSpace(r) { |
---|
355 | return true |
---|
356 | } |
---|
357 | switch r { |
---|
358 | case eof, '+', '-', '(', ')', '{', '}', '/', '*', '^': |
---|
359 | return true |
---|
360 | } |
---|
361 | return false |
---|
362 | } |
---|
363 | |
---|
364 | // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This |
---|
365 | // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" |
---|
366 | // and "089" - but when it's wrong the input is invalid and the parser (via |
---|
367 | // strconv) will notice. |
---|
368 | func lexNumber(l *lexer) stateFn { |
---|
369 | if !l.scanNumber() { |
---|
370 | return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) |
---|
371 | } else { |
---|
372 | l.emit(itemNumber) |
---|
373 | } |
---|
374 | return lexExpr |
---|
375 | } |
---|
376 | |
---|
377 | func lexIndex(l *lexer) stateFn { |
---|
378 | if !l.scanIndex() { |
---|
379 | return l.errorf("bad index syntax: %q", l.input[l.start:l.pos]) |
---|
380 | } else { |
---|
381 | l.emit(itemIndex) |
---|
382 | } |
---|
383 | return lexExpr |
---|
384 | } |
---|
385 | |
---|
386 | func (l *lexer) scanIndex() bool { |
---|
387 | // Optional leading sign. |
---|
388 | digits := "0123456789" |
---|
389 | l.acceptRun(digits) |
---|
390 | |
---|
391 | // Next thing mustn't be alphanumeric. |
---|
392 | // if isAlphaNumeric(l.peek()) { |
---|
393 | // l.next() |
---|
394 | // return false |
---|
395 | // } |
---|
396 | return true |
---|
397 | } |
---|
398 | |
---|
399 | func (l *lexer) scanNumber() bool { |
---|
400 | // Optional leading sign. |
---|
401 | l.accept("+-") |
---|
402 | digits := "0123456789" |
---|
403 | l.acceptRun(digits) |
---|
404 | if l.accept(".") { |
---|
405 | l.acceptRun(digits) |
---|
406 | } |
---|
407 | if l.accept("eE") { |
---|
408 | l.accept("+-") |
---|
409 | l.acceptRun("0123456789") |
---|
410 | } |
---|
411 | // Is it imaginary? |
---|
412 | l.accept("i") |
---|
413 | // Next thing mustn't be alphanumeric. |
---|
414 | // if isAlphaNumeric(l.peek()) { |
---|
415 | // l.next() |
---|
416 | // return false |
---|
417 | // } |
---|
418 | return true |
---|
419 | } |
---|
420 | |
---|
421 | // isSpace reports whether r is a space character. |
---|
422 | func isSpace(r rune) bool { |
---|
423 | switch r { |
---|
424 | case ' ', '\t', '\n', '\r': |
---|
425 | return true |
---|
426 | } |
---|
427 | return false |
---|
428 | } |
---|
429 | |
---|
430 | // isAlphaNumeric reports whether r is an alphabetic or digit |
---|
431 | func isAlphaNumeric(r rune) bool { |
---|
432 | return unicode.IsLetter(r) || unicode.IsDigit(r) |
---|
433 | } |
---|