1 | package symexpr
|
---|
2 |
|
---|
3 | import (
|
---|
4 | "fmt"
|
---|
5 | "strings"
|
---|
6 | "unicode"
|
---|
7 | "unicode/utf8"
|
---|
8 | )
|
---|
9 |
|
---|
10 | func (l *lexer) run() {
|
---|
11 | for state := lexExpr; state != nil; {
|
---|
12 | state = state(l)
|
---|
13 | }
|
---|
14 | l.items <- item{itemEOF, "eof"}
|
---|
15 | close(l.items) // No more tokens will be delivered.
|
---|
16 | }
|
---|
17 |
|
---|
18 | type item struct {
|
---|
19 | typ itemType
|
---|
20 | val string
|
---|
21 | }
|
---|
22 |
|
---|
23 | func (i item) String() string {
|
---|
24 | switch i.typ {
|
---|
25 | case itemEOF:
|
---|
26 | return "EOF"
|
---|
27 | case itemError:
|
---|
28 | return i.val
|
---|
29 | }
|
---|
30 | if len(i.val) > 10 {
|
---|
31 | return fmt.Sprintf("%.10q...", i.val)
|
---|
32 | }
|
---|
33 | return fmt.Sprintf("%d-'%s'", i.typ, i.val)
|
---|
34 | }
|
---|
35 |
|
---|
36 | type itemType int
|
---|
37 |
|
---|
38 | const (
|
---|
39 | itemNil itemType = iota
|
---|
40 | itemError
|
---|
41 | itemEOF // EOF
|
---|
42 |
|
---|
43 | itemAdd // +
|
---|
44 | itemMul // *
|
---|
45 | itemDiv // /
|
---|
46 | itemDiv2 // {...}//{...}
|
---|
47 | itemNeg // -
|
---|
48 | itemLParen // (
|
---|
49 | itemRParen // )
|
---|
50 | itemLBrack // {
|
---|
51 | itemRBrack // }
|
---|
52 | itemCarrot // ^
|
---|
53 |
|
---|
54 | itemNumber
|
---|
55 | itemIndex
|
---|
56 | itemIdentifier // vars,coeffs,funcs
|
---|
57 |
|
---|
58 | itemKeyword // placeholder
|
---|
59 | itemFrac // \frac
|
---|
60 | itemCDot // \cdot (latex multiplication)
|
---|
61 | itemSin
|
---|
62 | itemCos
|
---|
63 | itemTan
|
---|
64 | itemAbs
|
---|
65 | itemSqrt
|
---|
66 | itemExp
|
---|
67 | itemLog
|
---|
68 | )
|
---|
69 |
|
---|
70 | func isLeaf(it itemType) bool {
|
---|
71 | switch it {
|
---|
72 | case itemNumber, itemIdentifier:
|
---|
73 | return true
|
---|
74 | }
|
---|
75 | return false
|
---|
76 | }
|
---|
77 | func isUnary(it itemType) bool {
|
---|
78 | // switch it {
|
---|
79 | // case itemNeg:
|
---|
80 | // return true
|
---|
81 | // }
|
---|
82 | return false
|
---|
83 | }
|
---|
84 | func isBinary(it itemType) bool {
|
---|
85 | switch it {
|
---|
86 | case itemAdd, itemNeg, itemMul, itemDiv, itemDiv2, itemCarrot:
|
---|
87 | return true
|
---|
88 | }
|
---|
89 | return false
|
---|
90 | }
|
---|
91 |
|
---|
92 | var itemName = map[itemType]string{
|
---|
93 | itemNil: "nil",
|
---|
94 | itemError: "error",
|
---|
95 | itemEOF: "EOF",
|
---|
96 |
|
---|
97 | itemAdd: "add",
|
---|
98 | itemMul: "mul",
|
---|
99 | itemDiv: "div",
|
---|
100 | itemDiv2: "div2",
|
---|
101 | itemNeg: "neg",
|
---|
102 | itemLParen: "lParen",
|
---|
103 | itemRParen: "rParen",
|
---|
104 | itemLBrack: "lBrack",
|
---|
105 | itemRBrack: "rBrack",
|
---|
106 | itemCarrot: "carrot",
|
---|
107 |
|
---|
108 | itemNumber: "number",
|
---|
109 | itemIndex: "index",
|
---|
110 | itemIdentifier: "identifier",
|
---|
111 |
|
---|
112 | // keywords
|
---|
113 | itemCDot: "cdot",
|
---|
114 | itemFrac: "frac",
|
---|
115 | itemSin: "sin",
|
---|
116 | itemCos: "cos",
|
---|
117 | itemTan: "tan",
|
---|
118 | itemAbs: "abs",
|
---|
119 | itemSqrt: "sqrt",
|
---|
120 | itemExp: "exp",
|
---|
121 | itemLog: "ln",
|
---|
122 | }
|
---|
123 |
|
---|
124 | func (i itemType) String() string {
|
---|
125 | s := itemName[i]
|
---|
126 | if s == "" {
|
---|
127 | return fmt.Sprintf("item%d", int(i))
|
---|
128 | }
|
---|
129 | return s
|
---|
130 | }
|
---|
131 |
|
---|
132 | var key = map[string]itemType{
|
---|
133 | "\\cdot": itemCDot,
|
---|
134 | "\\frac": itemFrac,
|
---|
135 | "sin": itemSin,
|
---|
136 | "Sin": itemSin,
|
---|
137 | "cos": itemCos,
|
---|
138 | "Cos": itemCos,
|
---|
139 | "tan": itemTan,
|
---|
140 | "Tan": itemTan,
|
---|
141 | "abs": itemAbs,
|
---|
142 | "Abs": itemAbs,
|
---|
143 | "sqrt": itemSqrt,
|
---|
144 | "Sqrt": itemSqrt,
|
---|
145 | "e": itemExp,
|
---|
146 | "exp": itemExp,
|
---|
147 | "Exp": itemExp,
|
---|
148 | "ln": itemLog,
|
---|
149 | }
|
---|
150 |
|
---|
151 | const eof = -1
|
---|
152 |
|
---|
153 | // stateFn represents the state of the scanner as a function that returns the next state.
|
---|
154 | type stateFn func(*lexer) stateFn
|
---|
155 |
|
---|
156 | // lexer holds the state of the scanner.
|
---|
157 | type lexer struct {
|
---|
158 | name string // used only for error reports.
|
---|
159 | input string // the string being scanned.
|
---|
160 | vars []string // allowable variables
|
---|
161 | state stateFn // the next lexing function to enter.
|
---|
162 | start int // start position of this item.
|
---|
163 | pos int // current position in the input.
|
---|
164 | width int // width of last rune read from input.
|
---|
165 | items chan item // channel of scanned items.
|
---|
166 | nextToken item
|
---|
167 | }
|
---|
168 |
|
---|
169 | // next returns the next rune in the input.
|
---|
170 | func (l *lexer) next() (r rune) {
|
---|
171 | if l.pos >= len(l.input) {
|
---|
172 | l.width = 0
|
---|
173 | return eof
|
---|
174 | }
|
---|
175 | r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
|
---|
176 | l.pos += l.width
|
---|
177 | return r
|
---|
178 | }
|
---|
179 |
|
---|
180 | // peek returns but does not consume the next rune in the input.
|
---|
181 | func (l *lexer) peek() rune {
|
---|
182 | r := l.next()
|
---|
183 | l.backup()
|
---|
184 | return r
|
---|
185 | }
|
---|
186 |
|
---|
187 | // backup steps back one rune. Can only be called once per call of next.
|
---|
188 | func (l *lexer) backup() {
|
---|
189 | l.pos -= l.width
|
---|
190 | }
|
---|
191 |
|
---|
192 | // emit passes an item back to the client.
|
---|
193 | func (l *lexer) emit(t itemType) {
|
---|
194 | l.items <- item{t, l.input[l.start:l.pos]}
|
---|
195 | l.start = l.pos
|
---|
196 | }
|
---|
197 |
|
---|
198 | // ignore skips over the pending input before this point.
|
---|
199 | func (l *lexer) ignore() {
|
---|
200 | l.start = l.pos
|
---|
201 | }
|
---|
202 |
|
---|
203 | // accept consumes the next rune if it's from the valid set.
|
---|
204 | func (l *lexer) accept(valid string) bool {
|
---|
205 | if strings.IndexRune(valid, l.next()) >= 0 {
|
---|
206 | return true
|
---|
207 | }
|
---|
208 | l.backup()
|
---|
209 | return false
|
---|
210 | }
|
---|
211 |
|
---|
212 | // acceptRun consumes a run of runes from the valid set.
|
---|
213 | func (l *lexer) acceptRun(valid string) {
|
---|
214 | for strings.IndexRune(valid, l.next()) >= 0 {
|
---|
215 | }
|
---|
216 | l.backup()
|
---|
217 | }
|
---|
218 |
|
---|
219 | // lineNumber reports which line we're on. Doing it this way
|
---|
220 | // means we don't have to worry about peek double counting.
|
---|
221 | func (l *lexer) lineNumber() int {
|
---|
222 | return 1 + strings.Count(l.input[:l.pos], "\n")
|
---|
223 | }
|
---|
224 |
|
---|
225 | // error returns an error token and terminates the scan by passing
|
---|
226 | // back a nil pointer that will be the next state, terminating l.nextItem.
|
---|
227 | func (l *lexer) errorf(format string, args ...interface{}) stateFn {
|
---|
228 | l.items <- item{itemError, fmt.Sprintf(format, args...)}
|
---|
229 | return nil
|
---|
230 | }
|
---|
231 |
|
---|
232 | // nextItem returns the next item from the input.
|
---|
233 | func (l *lexer) nextItem() item {
|
---|
234 | for {
|
---|
235 | select {
|
---|
236 | case item := <-l.items:
|
---|
237 | return item
|
---|
238 | default:
|
---|
239 | l.state = l.state(l)
|
---|
240 | }
|
---|
241 | }
|
---|
242 | panic("not reached")
|
---|
243 | }
|
---|
244 |
|
---|
245 | // lex creates a new scanner for the input string.
|
---|
246 | func lex(name, input string, vars []string) *lexer {
|
---|
247 | l := &lexer{
|
---|
248 | name: name,
|
---|
249 | input: input,
|
---|
250 | vars: vars,
|
---|
251 | state: lexExpr,
|
---|
252 | items: make(chan item, 2), // Two items of buffering is sufficient for all state functions
|
---|
253 | }
|
---|
254 | return l
|
---|
255 | }
|
---|
256 |
|
---|
257 | /***** State Functions *****/
|
---|
258 |
|
---|
259 | // lexExpr is the top level scanner
|
---|
260 | func lexExpr(l *lexer) stateFn {
|
---|
261 | switch r := l.next(); {
|
---|
262 | case r == eof || r == '\n':
|
---|
263 | return nil
|
---|
264 | case isSpace(r):
|
---|
265 | l.ignore()
|
---|
266 | case r == '+':
|
---|
267 | l.emit(itemAdd)
|
---|
268 | return lexExpr
|
---|
269 | case r == '*':
|
---|
270 | l.emit(itemMul)
|
---|
271 | return lexExpr
|
---|
272 | case r == '^':
|
---|
273 | l.emit(itemCarrot)
|
---|
274 | return lexExpr
|
---|
275 | case r == '/':
|
---|
276 | if l.peek() == '/' {
|
---|
277 | l.emit(itemDiv2)
|
---|
278 | } else {
|
---|
279 | l.emit(itemDiv)
|
---|
280 | }
|
---|
281 | return lexExpr
|
---|
282 | case r == '-':
|
---|
283 | l.emit(itemNeg)
|
---|
284 | return lexExpr
|
---|
285 | case r == '(':
|
---|
286 | l.emit(itemLParen)
|
---|
287 | return lexExpr
|
---|
288 | case r == ')':
|
---|
289 | l.emit(itemRParen)
|
---|
290 | return lexExpr
|
---|
291 | case r == '{':
|
---|
292 | l.emit(itemLBrack)
|
---|
293 | return lexExpr
|
---|
294 | case r == '}':
|
---|
295 | l.emit(itemRBrack)
|
---|
296 | return lexExpr
|
---|
297 | case r == '\\':
|
---|
298 | // l.backup()
|
---|
299 | return lexIdentifier
|
---|
300 | case r == '_':
|
---|
301 | l.ignore()
|
---|
302 | return lexIndex
|
---|
303 | case '0' <= r && r <= '9':
|
---|
304 | l.backup()
|
---|
305 | return lexNumber
|
---|
306 | case isAlphaNumeric(r):
|
---|
307 | l.backup()
|
---|
308 | return lexIdentifier
|
---|
309 | default:
|
---|
310 | return l.errorf("unrecognized character in action: %#U", r)
|
---|
311 | }
|
---|
312 | return lexExpr
|
---|
313 | }
|
---|
314 |
|
---|
315 | // lexIdentifier scans an alphanumeric or field.
|
---|
316 | func lexIdentifier(l *lexer) stateFn {
|
---|
317 | Loop:
|
---|
318 | for {
|
---|
319 | switch r := l.next(); {
|
---|
320 | case isAlphaNumeric(r):
|
---|
321 | // absorb.
|
---|
322 | case r == '_':
|
---|
323 | // absorb and scanIndex
|
---|
324 | digits := "0123456789"
|
---|
325 | l.acceptRun(digits)
|
---|
326 | l.emit(itemIdentifier)
|
---|
327 | return lexExpr
|
---|
328 | case r == '.' && (l.input[l.start] == '.' || l.input[l.start] == '$'):
|
---|
329 | // field chaining; absorb into one token.
|
---|
330 | default:
|
---|
331 | l.backup()
|
---|
332 | word := l.input[l.start:l.pos]
|
---|
333 | if !l.atTerminator() {
|
---|
334 | return l.errorf("unexpected character %c", r)
|
---|
335 | }
|
---|
336 | switch {
|
---|
337 | case key[word] > itemKeyword:
|
---|
338 | l.emit(key[word])
|
---|
339 | default:
|
---|
340 | l.emit(itemIdentifier)
|
---|
341 | }
|
---|
342 | break Loop
|
---|
343 | }
|
---|
344 | }
|
---|
345 | return lexExpr
|
---|
346 | }
|
---|
347 |
|
---|
348 | // atTerminator reports whether the input is at valid termination character to
|
---|
349 | // appear after an identifier. Mostly to catch cases like "$x+2" not being
|
---|
350 | // acceptable without a space, in case we decide one day to implement
|
---|
351 | // arithmetic.
|
---|
352 | func (l *lexer) atTerminator() bool {
|
---|
353 | r := l.peek()
|
---|
354 | if isSpace(r) {
|
---|
355 | return true
|
---|
356 | }
|
---|
357 | switch r {
|
---|
358 | case eof, '+', '-', '(', ')', '{', '}', '/', '*', '^':
|
---|
359 | return true
|
---|
360 | }
|
---|
361 | return false
|
---|
362 | }
|
---|
363 |
|
---|
364 | // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
|
---|
365 | // isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
|
---|
366 | // and "089" - but when it's wrong the input is invalid and the parser (via
|
---|
367 | // strconv) will notice.
|
---|
368 | func lexNumber(l *lexer) stateFn {
|
---|
369 | if !l.scanNumber() {
|
---|
370 | return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
|
---|
371 | } else {
|
---|
372 | l.emit(itemNumber)
|
---|
373 | }
|
---|
374 | return lexExpr
|
---|
375 | }
|
---|
376 |
|
---|
377 | func lexIndex(l *lexer) stateFn {
|
---|
378 | if !l.scanIndex() {
|
---|
379 | return l.errorf("bad index syntax: %q", l.input[l.start:l.pos])
|
---|
380 | } else {
|
---|
381 | l.emit(itemIndex)
|
---|
382 | }
|
---|
383 | return lexExpr
|
---|
384 | }
|
---|
385 |
|
---|
386 | func (l *lexer) scanIndex() bool {
|
---|
387 | // Optional leading sign.
|
---|
388 | digits := "0123456789"
|
---|
389 | l.acceptRun(digits)
|
---|
390 |
|
---|
391 | // Next thing mustn't be alphanumeric.
|
---|
392 | // if isAlphaNumeric(l.peek()) {
|
---|
393 | // l.next()
|
---|
394 | // return false
|
---|
395 | // }
|
---|
396 | return true
|
---|
397 | }
|
---|
398 |
|
---|
399 | func (l *lexer) scanNumber() bool {
|
---|
400 | // Optional leading sign.
|
---|
401 | l.accept("+-")
|
---|
402 | digits := "0123456789"
|
---|
403 | l.acceptRun(digits)
|
---|
404 | if l.accept(".") {
|
---|
405 | l.acceptRun(digits)
|
---|
406 | }
|
---|
407 | if l.accept("eE") {
|
---|
408 | l.accept("+-")
|
---|
409 | l.acceptRun("0123456789")
|
---|
410 | }
|
---|
411 | // Is it imaginary?
|
---|
412 | l.accept("i")
|
---|
413 | // Next thing mustn't be alphanumeric.
|
---|
414 | // if isAlphaNumeric(l.peek()) {
|
---|
415 | // l.next()
|
---|
416 | // return false
|
---|
417 | // }
|
---|
418 | return true
|
---|
419 | }
|
---|
420 |
|
---|
421 | // isSpace reports whether r is a space character.
|
---|
422 | func isSpace(r rune) bool {
|
---|
423 | switch r {
|
---|
424 | case ' ', '\t', '\n', '\r':
|
---|
425 | return true
|
---|
426 | }
|
---|
427 | return false
|
---|
428 | }
|
---|
429 |
|
---|
430 | // isAlphaNumeric reports whether r is an alphabetic or digit
|
---|
431 | func isAlphaNumeric(r rune) bool {
|
---|
432 | return unicode.IsLetter(r) || unicode.IsDigit(r)
|
---|
433 | }
|
---|