1 | /*******************************************************************************
|
---|
2 | * You may amend and distribute as you like, but don't remove this header!
|
---|
3 | *
|
---|
4 | * EPPlus provides server-side generation of Excel 2007/2010 spreadsheets.
|
---|
5 | * See http://www.codeplex.com/EPPlus for details.
|
---|
6 | *
|
---|
7 | * Copyright (C) 2011 Jan Källman
|
---|
8 | *
|
---|
9 | * This library is free software; you can redistribute it and/or
|
---|
10 | * modify it under the terms of the GNU Lesser General Public
|
---|
11 | * License as published by the Free Software Foundation; either
|
---|
12 | * version 2.1 of the License, or (at your option) any later version.
|
---|
13 |
|
---|
14 | * This library is distributed in the hope that it will be useful,
|
---|
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
---|
17 | * See the GNU Lesser General Public License for more details.
|
---|
18 | *
|
---|
19 | * The GNU Lesser General Public License can be viewed at http://www.opensource.org/licenses/lgpl-license.php
|
---|
20 | * If you unfamiliar with this license or have questions about it, here is an http://www.gnu.org/licenses/gpl-faq.html
|
---|
21 | *
|
---|
22 | * All code and executables are provided "as is" with no warranty either express or implied.
|
---|
23 | * The author accepts no liability for any damage or loss of business that this product may cause.
|
---|
24 | *
|
---|
25 | * Code change notes:
|
---|
26 | *
|
---|
27 | * Author Change Date
|
---|
28 | * ******************************************************************************
|
---|
29 | * Mats Alm Added 2013-03-01 (Prior file history on https://github.com/swmal/ExcelFormulaParser)
|
---|
30 | *******************************************************************************/
|
---|
31 | using System;
|
---|
32 | using System.Collections.Generic;
|
---|
33 | using System.Globalization;
|
---|
34 | using System.Linq;
|
---|
35 | using System.Text;
|
---|
36 | using System.Text.RegularExpressions;
|
---|
37 | using OfficeOpenXml.FormulaParsing.Excel.Functions;
|
---|
38 | namespace OfficeOpenXml.FormulaParsing.LexicalAnalysis
|
---|
39 | {
|
---|
40 | public class SourceCodeTokenizer : ISourceCodeTokenizer
|
---|
41 | {
|
---|
42 | public static ISourceCodeTokenizer Default
|
---|
43 | {
|
---|
44 | get { return new SourceCodeTokenizer(FunctionNameProvider.Empty, NameValueProvider.Empty); }
|
---|
45 | }
|
---|
46 |
|
---|
47 | public SourceCodeTokenizer(IFunctionNameProvider functionRepository, INameValueProvider nameValueProvider)
|
---|
48 | : this(new TokenFactory(functionRepository, nameValueProvider), new TokenSeparatorProvider())
|
---|
49 | {
|
---|
50 |
|
---|
51 | }
|
---|
52 | public SourceCodeTokenizer(ITokenFactory tokenFactory, ITokenSeparatorProvider tokenProvider)
|
---|
53 | {
|
---|
54 | _tokenFactory = tokenFactory;
|
---|
55 | _tokenProvider = tokenProvider;
|
---|
56 | }
|
---|
57 |
|
---|
58 | private readonly ITokenSeparatorProvider _tokenProvider;
|
---|
59 | private readonly ITokenFactory _tokenFactory;
|
---|
60 |
|
---|
61 | public IEnumerable<Token> Tokenize(string input)
|
---|
62 | {
|
---|
63 | return Tokenize(input, null);
|
---|
64 | }
|
---|
65 | public IEnumerable<Token> Tokenize(string input, string worksheet)
|
---|
66 | {
|
---|
67 | if (string.IsNullOrEmpty(input))
|
---|
68 | {
|
---|
69 | return Enumerable.Empty<Token>();
|
---|
70 | }
|
---|
71 | // MA 1401: Ignore leading plus in formula.
|
---|
72 | input = input.TrimStart('+');
|
---|
73 | var context = new TokenizerContext(input);
|
---|
74 | for (int i = 0; i<context.FormulaChars.Length;i++)
|
---|
75 | {
|
---|
76 | var c = context.FormulaChars[i];
|
---|
77 | Token tokenSeparator;
|
---|
78 | if (CharIsTokenSeparator(c, out tokenSeparator))
|
---|
79 | {
|
---|
80 | if (context.IsInString)
|
---|
81 | {
|
---|
82 | if (IsDoubleQuote(tokenSeparator, i, context))
|
---|
83 | {
|
---|
84 | i ++;
|
---|
85 | context.AppendToCurrentToken(c);
|
---|
86 | continue;
|
---|
87 | }
|
---|
88 | if(tokenSeparator.TokenType != TokenType.String)
|
---|
89 | {
|
---|
90 | context.AppendToCurrentToken(c);
|
---|
91 | continue;
|
---|
92 | }
|
---|
93 | }
|
---|
94 | if (tokenSeparator.TokenType == TokenType.OpeningBracket)
|
---|
95 | {
|
---|
96 | context.AppendToCurrentToken(c);
|
---|
97 | context.BracketCount++;
|
---|
98 | continue;
|
---|
99 | }
|
---|
100 | if (tokenSeparator.TokenType == TokenType.ClosingBracket)
|
---|
101 | {
|
---|
102 | context.AppendToCurrentToken(c);
|
---|
103 | context.BracketCount--;
|
---|
104 | continue;
|
---|
105 | }
|
---|
106 | if (context.BracketCount > 0)
|
---|
107 | {
|
---|
108 | context.AppendToCurrentToken(c);
|
---|
109 | continue;
|
---|
110 | }
|
---|
111 | // two operators in sequence could be "<=" or ">="
|
---|
112 | if (IsPartOfMultipleCharSeparator(context, c))
|
---|
113 | {
|
---|
114 | var sOp = context.LastToken.Value + c.ToString(CultureInfo.InvariantCulture);
|
---|
115 | var op = _tokenProvider.Tokens[sOp];
|
---|
116 | context.ReplaceLastToken(op);
|
---|
117 | context.NewToken();
|
---|
118 | continue;
|
---|
119 | }
|
---|
120 | if (tokenSeparator.TokenType == TokenType.String)
|
---|
121 | {
|
---|
122 | if (context.LastToken != null && context.LastToken.TokenType == TokenType.OpeningEnumerable)
|
---|
123 | {
|
---|
124 | context.AppendToCurrentToken(c);
|
---|
125 | context.ToggleIsInString();
|
---|
126 | continue;
|
---|
127 | }
|
---|
128 | if (context.LastToken != null && context.LastToken.TokenType == TokenType.String)
|
---|
129 | {
|
---|
130 | context.AddToken(!context.CurrentTokenHasValue
|
---|
131 | ? new Token(string.Empty, TokenType.StringContent)
|
---|
132 | : new Token(context.CurrentToken, TokenType.StringContent));
|
---|
133 | }
|
---|
134 | context.AddToken(new Token("\"", TokenType.String));
|
---|
135 | context.ToggleIsInString();
|
---|
136 | context.NewToken();
|
---|
137 | continue;
|
---|
138 | }
|
---|
139 | if (context.CurrentTokenHasValue)
|
---|
140 | {
|
---|
141 | if (Regex.IsMatch(context.CurrentToken, "^\"*$"))
|
---|
142 | {
|
---|
143 | context.AddToken(_tokenFactory.Create(context.CurrentToken, TokenType.StringContent));
|
---|
144 | }
|
---|
145 | else
|
---|
146 | {
|
---|
147 | context.AddToken(CreateToken(context, worksheet));
|
---|
148 | }
|
---|
149 |
|
---|
150 |
|
---|
151 | //If the a next token is an opening parantheses and the previous token is interpeted as an address or name, then the currenct token is a function
|
---|
152 | if(tokenSeparator.TokenType==TokenType.OpeningParenthesis && (context.LastToken.TokenType==TokenType.ExcelAddress || context.LastToken.TokenType==TokenType.NameValue))
|
---|
153 | {
|
---|
154 | context.LastToken.TokenType=TokenType.Function;
|
---|
155 | }
|
---|
156 | }
|
---|
157 | if (tokenSeparator.Value == "-")
|
---|
158 | {
|
---|
159 | if (TokenIsNegator(context))
|
---|
160 | {
|
---|
161 | context.AddToken(new Token("-", TokenType.Negator));
|
---|
162 | continue;
|
---|
163 | }
|
---|
164 | }
|
---|
165 | context.AddToken(tokenSeparator);
|
---|
166 | context.NewToken();
|
---|
167 | continue;
|
---|
168 | }
|
---|
169 | context.AppendToCurrentToken(c);
|
---|
170 | }
|
---|
171 | if (context.CurrentTokenHasValue)
|
---|
172 | {
|
---|
173 | context.AddToken(CreateToken(context, worksheet));
|
---|
174 | }
|
---|
175 |
|
---|
176 | CleanupTokens(context, _tokenProvider.Tokens);
|
---|
177 |
|
---|
178 | return context.Result;
|
---|
179 | }
|
---|
180 |
|
---|
181 | private static bool IsDoubleQuote(Token tokenSeparator, int formulaCharIndex, TokenizerContext context)
|
---|
182 | {
|
---|
183 | return tokenSeparator.TokenType == TokenType.String && formulaCharIndex + 1 < context.FormulaChars.Length && context.FormulaChars[formulaCharIndex + 1] == '\"';
|
---|
184 | }
|
---|
185 |
|
---|
186 |
|
---|
187 | private static void CleanupTokens(TokenizerContext context, IDictionary<string, Token> tokens)
|
---|
188 | {
|
---|
189 | for (int i = 0; i < context.Result.Count; i++)
|
---|
190 | {
|
---|
191 | var token=context.Result[i];
|
---|
192 | if (token.TokenType == TokenType.Unrecognized)
|
---|
193 | {
|
---|
194 | if (i < context.Result.Count - 1)
|
---|
195 | {
|
---|
196 | if (context.Result[i+1].TokenType == TokenType.OpeningParenthesis)
|
---|
197 | {
|
---|
198 | token.TokenType = TokenType.Function;
|
---|
199 | }
|
---|
200 | else
|
---|
201 | {
|
---|
202 | token.TokenType = TokenType.NameValue;
|
---|
203 | }
|
---|
204 | }
|
---|
205 | else
|
---|
206 | {
|
---|
207 | token.TokenType = TokenType.NameValue;
|
---|
208 | }
|
---|
209 | }
|
---|
210 | else if ((token.TokenType == TokenType.Operator || token.TokenType == TokenType.Negator) && i < context.Result.Count - 1 &&
|
---|
211 | (token.Value=="+" || token.Value=="-"))
|
---|
212 | {
|
---|
213 | if (i > 0 && token.Value == "+") //Remove any + with an opening parenthesis before.
|
---|
214 | {
|
---|
215 | if (context.Result[i - 1].TokenType == TokenType.OpeningParenthesis)
|
---|
216 | {
|
---|
217 | context.Result.RemoveAt(i);
|
---|
218 | SetNegatorOperator(context, i, tokens);
|
---|
219 | i--;
|
---|
220 | continue;
|
---|
221 | }
|
---|
222 | }
|
---|
223 |
|
---|
224 | var nextToken = context.Result[i + 1];
|
---|
225 | if (nextToken.TokenType == TokenType.Operator || nextToken.TokenType == TokenType.Negator)
|
---|
226 | {
|
---|
227 | if (token.Value == "+" && (nextToken.Value=="+" || nextToken.Value == "-"))
|
---|
228 | {
|
---|
229 | //Remove first
|
---|
230 | context.Result.RemoveAt(i);
|
---|
231 | SetNegatorOperator(context, i, tokens);
|
---|
232 | i--;
|
---|
233 | }
|
---|
234 | else if (token.Value == "-" && nextToken.Value == "+")
|
---|
235 | {
|
---|
236 | //Remove second
|
---|
237 | context.Result.RemoveAt(i+1);
|
---|
238 | SetNegatorOperator(context, i, tokens);
|
---|
239 | i--;
|
---|
240 | }
|
---|
241 | else if (token.Value == "-" && nextToken.Value == "-")
|
---|
242 | {
|
---|
243 | //Remove first and set operator to +
|
---|
244 | context.Result.RemoveAt(i);
|
---|
245 | if (i == 0)
|
---|
246 | {
|
---|
247 | context.Result.RemoveAt(i+1);
|
---|
248 | i += 2;
|
---|
249 | }
|
---|
250 | else
|
---|
251 | {
|
---|
252 | //context.Result[i].TokenType = TokenType.Operator;
|
---|
253 | //context.Result[i].Value = "+";
|
---|
254 | context.Result[i] = tokens["+"];
|
---|
255 | SetNegatorOperator(context, i, tokens);
|
---|
256 | i--;
|
---|
257 | }
|
---|
258 | }
|
---|
259 | }
|
---|
260 | }
|
---|
261 | }
|
---|
262 | }
|
---|
263 |
|
---|
264 | private static void SetNegatorOperator(TokenizerContext context, int i, IDictionary<string, Token> tokens)
|
---|
265 | {
|
---|
266 | if (context.Result[i].Value == "-" && i > 0 && (context.Result[i].TokenType == TokenType.Operator || context.Result[i].TokenType == TokenType.Negator))
|
---|
267 | {
|
---|
268 | if (TokenIsNegator(context.Result[i - 1]))
|
---|
269 | {
|
---|
270 | context.Result[i] = new Token("-", TokenType.Negator);
|
---|
271 | }
|
---|
272 | else
|
---|
273 | {
|
---|
274 | context.Result[i] = tokens["-"];
|
---|
275 | }
|
---|
276 | }
|
---|
277 | }
|
---|
278 |
|
---|
279 | private static bool TokenIsNegator(TokenizerContext context)
|
---|
280 | {
|
---|
281 | return TokenIsNegator(context.LastToken);
|
---|
282 | }
|
---|
283 | private static bool TokenIsNegator(Token t)
|
---|
284 | {
|
---|
285 | return t == null
|
---|
286 | ||
|
---|
287 | t.TokenType == TokenType.Operator
|
---|
288 | ||
|
---|
289 | t.TokenType == TokenType.OpeningParenthesis
|
---|
290 | ||
|
---|
291 | t.TokenType == TokenType.Comma
|
---|
292 | ||
|
---|
293 | t.TokenType == TokenType.SemiColon
|
---|
294 | ||
|
---|
295 | t.TokenType == TokenType.OpeningEnumerable;
|
---|
296 | }
|
---|
297 |
|
---|
298 | private bool IsPartOfMultipleCharSeparator(TokenizerContext context, char c)
|
---|
299 | {
|
---|
300 | var lastToken = context.LastToken != null ? context.LastToken.Value : string.Empty;
|
---|
301 | return _tokenProvider.IsOperator(lastToken)
|
---|
302 | && _tokenProvider.IsPossibleLastPartOfMultipleCharOperator(c.ToString(CultureInfo.InvariantCulture))
|
---|
303 | && !context.CurrentTokenHasValue;
|
---|
304 | }
|
---|
305 |
|
---|
306 | private Token CreateToken(TokenizerContext context, string worksheet)
|
---|
307 | {
|
---|
308 | if (context.CurrentToken == "-")
|
---|
309 | {
|
---|
310 | if (context.LastToken == null && context.LastToken.TokenType == TokenType.Operator)
|
---|
311 | {
|
---|
312 | return new Token("-", TokenType.Negator);
|
---|
313 | }
|
---|
314 | }
|
---|
315 | return _tokenFactory.Create(context.Result, context.CurrentToken, worksheet);
|
---|
316 | }
|
---|
317 |
|
---|
318 | private bool CharIsTokenSeparator(char c, out Token token)
|
---|
319 | {
|
---|
320 | var result = _tokenProvider.Tokens.ContainsKey(c.ToString());
|
---|
321 | token = result ? token = _tokenProvider.Tokens[c.ToString()] : null;
|
---|
322 | return result;
|
---|
323 | }
|
---|
324 | }
|
---|
325 | }
|
---|