1 | // Copyright (c) 2009-2013 AlphaSierraPapa for the SharpDevelop Team |
---|
2 | // |
---|
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this |
---|
4 | // software and associated documentation files (the "Software"), to deal in the Software |
---|
5 | // without restriction, including without limitation the rights to use, copy, modify, merge, |
---|
6 | // publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons |
---|
7 | // to whom the Software is furnished to do so, subject to the following conditions: |
---|
8 | // |
---|
9 | // The above copyright notice and this permission notice shall be included in all copies or |
---|
10 | // substantial portions of the Software. |
---|
11 | // |
---|
12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, |
---|
13 | // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
---|
14 | // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE |
---|
15 | // FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
---|
16 | // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
---|
17 | // DEALINGS IN THE SOFTWARE. |
---|
18 | |
---|
19 | using System; |
---|
20 | using System.Collections.Generic; |
---|
21 | using System.Diagnostics; |
---|
22 | using System.Globalization; |
---|
23 | using System.Linq; |
---|
24 | using System.Text; |
---|
25 | using System.Threading; |
---|
26 | using ICSharpCode.NRefactory.Editor; |
---|
27 | |
---|
28 | namespace ICSharpCode.NRefactory.Xml |
---|
29 | { |
---|
30 | class TagReader : TokenReader |
---|
31 | { |
---|
32 | readonly AXmlParser tagSoupParser; |
---|
33 | readonly Stack<string> elementNameStack; |
---|
34 | |
---|
35 | public TagReader(AXmlParser tagSoupParser, ITextSource input, bool collapseProperlyNestedElements) : base(input) |
---|
36 | { |
---|
37 | this.tagSoupParser = tagSoupParser; |
---|
38 | if (collapseProperlyNestedElements) |
---|
39 | elementNameStack = new Stack<string>(); |
---|
40 | } |
---|
41 | |
---|
42 | public List<InternalObject> ReadAllObjects(CancellationToken cancellationToken) |
---|
43 | { |
---|
44 | while (HasMoreData()) { |
---|
45 | cancellationToken.ThrowIfCancellationRequested(); |
---|
46 | ReadObject(); |
---|
47 | } |
---|
48 | return objects; |
---|
49 | } |
---|
50 | |
---|
51 | public List<InternalObject> ReadAllObjectsIncremental(InternalObject[] oldObjects, List<UnchangedSegment> reuseMap, CancellationToken cancellationToken) |
---|
52 | { |
---|
53 | ObjectIterator oldObjectIterator = new ObjectIterator(oldObjects); |
---|
54 | int reuseMapIndex = 0; |
---|
55 | while (reuseMapIndex < reuseMap.Count) { |
---|
56 | var reuseEntry = reuseMap[reuseMapIndex]; |
---|
57 | while (this.CurrentLocation < reuseEntry.NewOffset) { |
---|
58 | cancellationToken.ThrowIfCancellationRequested(); |
---|
59 | ReadObject(); |
---|
60 | } |
---|
61 | if (this.CurrentLocation >= reuseEntry.NewOffset + reuseEntry.Length) { |
---|
62 | reuseMapIndex++; |
---|
63 | continue; |
---|
64 | } |
---|
65 | Debug.Assert(reuseEntry.NewOffset <= this.CurrentLocation && this.CurrentLocation < reuseEntry.NewOffset + reuseEntry.Length); |
---|
66 | // reuse the nodes within this reuseEntry starting at oldOffset: |
---|
67 | int oldOffset = this.CurrentLocation - reuseEntry.NewOffset + reuseEntry.OldOffset; |
---|
68 | // seek to oldOffset in the oldObjects array: |
---|
69 | oldObjectIterator.SkipTo(oldOffset); |
---|
70 | if (oldObjectIterator.CurrentPosition == oldOffset) { |
---|
71 | // reuse old objects within this reuse entry: |
---|
72 | int reuseEnd = reuseEntry.OldOffset + reuseEntry.Length; |
---|
73 | while (oldObjectIterator.CurrentObject != null && oldObjectIterator.CurrentPosition + oldObjectIterator.CurrentObject.LengthTouched < reuseEnd) { |
---|
74 | StoreObject(oldObjectIterator.CurrentObject); |
---|
75 | Skip(oldObjectIterator.CurrentObject.Length); |
---|
76 | oldObjectIterator.MoveNext(); |
---|
77 | } |
---|
78 | reuseMapIndex++; // go to next re-use map |
---|
79 | } else { |
---|
80 | // We are in a region where old objects are available, but aren't aligned correctly. |
---|
81 | // Don't skip this reuse entry, and read a single object so that we can re-align |
---|
82 | ReadObject(); |
---|
83 | } |
---|
84 | } |
---|
85 | while (HasMoreData()) { |
---|
86 | cancellationToken.ThrowIfCancellationRequested(); |
---|
87 | ReadObject(); |
---|
88 | } |
---|
89 | return objects; |
---|
90 | } |
---|
91 | |
---|
92 | void StoreObject(InternalObject obj) |
---|
93 | { |
---|
94 | objects.Add(obj); |
---|
95 | |
---|
96 | // Now combine properly-nested elements: |
---|
97 | if (elementNameStack == null) |
---|
98 | return; // parsing tag soup |
---|
99 | InternalTag tag = obj as InternalTag; |
---|
100 | if (tag == null) |
---|
101 | return; |
---|
102 | if (tag.IsEmptyTag) { |
---|
103 | // the tag is its own element |
---|
104 | objects[objects.Count - 1] = new InternalElement(tag) { |
---|
105 | Length = tag.Length, |
---|
106 | LengthTouched = tag.LengthTouched, |
---|
107 | IsPropertyNested = true, |
---|
108 | StartRelativeToParent = tag.StartRelativeToParent, |
---|
109 | NestedObjects = new [] { tag.SetStartRelativeToParent(0) } |
---|
110 | }; |
---|
111 | } else if (tag.IsStartTag) { |
---|
112 | elementNameStack.Push(tag.Name); |
---|
113 | } else if (tag.IsEndTag && elementNameStack.Count > 0) { |
---|
114 | // Now look for the start element: |
---|
115 | int startIndex = objects.Count - 2; |
---|
116 | bool ok = false; |
---|
117 | string expectedName = elementNameStack.Pop(); |
---|
118 | if (tag.Name == expectedName) { |
---|
119 | while (startIndex > 0) { |
---|
120 | var startTag = objects[startIndex] as InternalTag; |
---|
121 | if (startTag != null) { |
---|
122 | if (startTag.IsStartTag) { |
---|
123 | ok = (startTag.Name == expectedName); |
---|
124 | break; |
---|
125 | } else if (startTag.IsEndTag) { |
---|
126 | break; |
---|
127 | } |
---|
128 | } |
---|
129 | startIndex--; |
---|
130 | } |
---|
131 | } |
---|
132 | if (ok) { |
---|
133 | // We found a correct nesting, let's create an element: |
---|
134 | InternalObject[] nestedObjects = new InternalObject[objects.Count - startIndex]; |
---|
135 | int oldStartRelativeToParent = objects[startIndex].StartRelativeToParent; |
---|
136 | int pos = 0; |
---|
137 | int maxLengthTouched = 0; |
---|
138 | for (int i = 0; i < nestedObjects.Length; i++) { |
---|
139 | nestedObjects[i] = objects[startIndex + i].SetStartRelativeToParent(pos); |
---|
140 | maxLengthTouched = Math.Max(maxLengthTouched, pos + nestedObjects[i].LengthTouched); |
---|
141 | pos += nestedObjects[i].Length; |
---|
142 | } |
---|
143 | objects.RemoveRange(startIndex, nestedObjects.Length); |
---|
144 | objects.Add( |
---|
145 | new InternalElement((InternalTag)nestedObjects[0]) { |
---|
146 | HasEndTag = true, |
---|
147 | IsPropertyNested = true, |
---|
148 | Length = pos, |
---|
149 | LengthTouched = maxLengthTouched, |
---|
150 | StartRelativeToParent = oldStartRelativeToParent, |
---|
151 | NestedObjects = nestedObjects |
---|
152 | }); |
---|
153 | } else { |
---|
154 | // Mismatched name - the nesting isn't properly; |
---|
155 | // clear the whole stack so that none of the currently open elements are closed as properly-nested. |
---|
156 | elementNameStack.Clear(); |
---|
157 | } |
---|
158 | } |
---|
159 | } |
---|
160 | |
---|
161 | /// <summary> |
---|
162 | /// Reads one or more objects. |
---|
163 | /// </summary> |
---|
164 | void ReadObject() |
---|
165 | { |
---|
166 | if (TryPeek('<')) { |
---|
167 | ReadTag(); |
---|
168 | } else { |
---|
169 | ReadText(TextType.CharacterData); |
---|
170 | } |
---|
171 | } |
---|
172 | |
---|
173 | #region BeginInternalObject / EndInternalObject |
---|
174 | List<InternalObject> objects = new List<InternalObject>(); |
---|
175 | int internalObjectStartPosition; |
---|
176 | |
---|
177 | int CurrentRelativeLocation { |
---|
178 | get { return CurrentLocation - internalObjectStartPosition; } |
---|
179 | } |
---|
180 | |
---|
181 | struct InternalObjectFrame |
---|
182 | { |
---|
183 | public readonly InternalObject InternalObject; |
---|
184 | public readonly int ParentStartPosition; |
---|
185 | |
---|
186 | public InternalObjectFrame(InternalObject internalObject, int parentStartPosition) |
---|
187 | { |
---|
188 | this.InternalObject = internalObject; |
---|
189 | this.ParentStartPosition = parentStartPosition; |
---|
190 | } |
---|
191 | } |
---|
192 | |
---|
193 | InternalObjectFrame BeginInternalObject(InternalObject internalObject) |
---|
194 | { |
---|
195 | return BeginInternalObject(internalObject, this.CurrentLocation); |
---|
196 | } |
---|
197 | |
---|
198 | InternalObjectFrame BeginInternalObject(InternalObject internalObject, int beginLocation) |
---|
199 | { |
---|
200 | internalObject.StartRelativeToParent = beginLocation - internalObjectStartPosition; |
---|
201 | |
---|
202 | var frame = new InternalObjectFrame(internalObject, internalObjectStartPosition); |
---|
203 | |
---|
204 | internalObjectStartPosition = CurrentLocation; |
---|
205 | return frame; |
---|
206 | } |
---|
207 | |
---|
208 | void EndInternalObject(InternalObjectFrame frame, bool storeNewObject = true) |
---|
209 | { |
---|
210 | frame.InternalObject.Length = this.CurrentLocation - internalObjectStartPosition; |
---|
211 | frame.InternalObject.LengthTouched = this.MaxTouchedLocation - internalObjectStartPosition; |
---|
212 | frame.InternalObject.SyntaxErrors = GetSyntaxErrors(); |
---|
213 | if (storeNewObject) |
---|
214 | StoreObject(frame.InternalObject); |
---|
215 | internalObjectStartPosition = frame.ParentStartPosition; |
---|
216 | } |
---|
217 | #endregion |
---|
218 | |
---|
219 | #region Read Tag |
---|
220 | /// <summary> |
---|
221 | /// Context: "<" |
---|
222 | /// </summary> |
---|
223 | void ReadTag() |
---|
224 | { |
---|
225 | AssertHasMoreData(); |
---|
226 | |
---|
227 | int tagStart = this.CurrentLocation; |
---|
228 | InternalTag tag = new InternalTag(); |
---|
229 | var frame = BeginInternalObject(tag); |
---|
230 | |
---|
231 | // Read the opening bracket |
---|
232 | // It identifies the type of tag and parsing behavior for the rest of it |
---|
233 | tag.OpeningBracket = ReadOpeningBracket(); |
---|
234 | |
---|
235 | if (tag.IsUnknownBang && !TryPeekWhiteSpace()) |
---|
236 | OnSyntaxError(tagStart, this.CurrentLocation, "Unknown tag"); |
---|
237 | |
---|
238 | if (tag.IsStartOrEmptyTag || tag.IsEndTag || tag.IsProcessingInstruction) { |
---|
239 | // Read the name |
---|
240 | TryMoveToNonWhiteSpace(); |
---|
241 | tag.RelativeNameStart = this.CurrentRelativeLocation; |
---|
242 | string name; |
---|
243 | if (TryReadName(out name)) { |
---|
244 | if (!IsValidName(name)) { |
---|
245 | OnSyntaxError(this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); |
---|
246 | } |
---|
247 | } else { |
---|
248 | OnSyntaxError("Element name expected"); |
---|
249 | } |
---|
250 | tag.Name = name; |
---|
251 | } else { |
---|
252 | tag.Name = string.Empty; |
---|
253 | } |
---|
254 | |
---|
255 | bool isXmlDeclr = tag.Name == "xml" && tag.IsProcessingInstruction; |
---|
256 | int oldObjectCount = objects.Count; |
---|
257 | |
---|
258 | if (tag.IsStartOrEmptyTag || tag.IsEndTag || isXmlDeclr) { |
---|
259 | // Read attributes for the tag |
---|
260 | while (HasMoreData()) { |
---|
261 | // Chech for all forbiden 'name' characters first - see ReadName |
---|
262 | TryMoveToNonWhiteSpace(); |
---|
263 | if (TryPeek('<')) break; |
---|
264 | string endBr; |
---|
265 | int endBrStart = this.CurrentLocation; // Just peek |
---|
266 | if (TryReadClosingBracket(out endBr)) { // End tag |
---|
267 | GoBack(endBrStart); |
---|
268 | break; |
---|
269 | } |
---|
270 | |
---|
271 | // We have "=\'\"" or name - read attribute |
---|
272 | int attrStartOffset = this.CurrentLocation; |
---|
273 | ReadAttribute(); |
---|
274 | if (tag.IsEndTag) |
---|
275 | OnSyntaxError(attrStartOffset, this.CurrentLocation, "Attribute not allowed in end tag."); |
---|
276 | } |
---|
277 | } else if (tag.IsDocumentType) { |
---|
278 | ReadContentOfDTD(); |
---|
279 | } else { |
---|
280 | int start = this.CurrentLocation; |
---|
281 | if (tag.IsComment) { |
---|
282 | ReadText(TextType.Comment); |
---|
283 | } else if (tag.IsCData) { |
---|
284 | ReadText(TextType.CData); |
---|
285 | } else if (tag.IsProcessingInstruction) { |
---|
286 | ReadText(TextType.ProcessingInstruction); |
---|
287 | } else if (tag.IsUnknownBang) { |
---|
288 | ReadText(TextType.UnknownBang); |
---|
289 | } else { |
---|
290 | throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket)); |
---|
291 | } |
---|
292 | // Backtrack at complete start |
---|
293 | if (IsEndOfFile() || (tag.IsUnknownBang && TryPeek('<'))) { |
---|
294 | GoBack(start); |
---|
295 | objects.RemoveRange(oldObjectCount, objects.Count - oldObjectCount); |
---|
296 | } |
---|
297 | } |
---|
298 | |
---|
299 | // Read closing bracket |
---|
300 | string bracket; |
---|
301 | TryReadClosingBracket(out bracket); |
---|
302 | tag.ClosingBracket = bracket; |
---|
303 | |
---|
304 | // Error check |
---|
305 | int brStart = this.CurrentLocation - (tag.ClosingBracket ?? string.Empty).Length; |
---|
306 | int brEnd = this.CurrentLocation; |
---|
307 | if (tag.Name == null) { |
---|
308 | // One error was reported already |
---|
309 | } else if (tag.IsStartOrEmptyTag) { |
---|
310 | if (tag.ClosingBracket != ">" && tag.ClosingBracket != "/>") OnSyntaxError(brStart, brEnd, "'>' or '/>' expected"); |
---|
311 | } else if (tag.IsEndTag) { |
---|
312 | if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected"); |
---|
313 | } else if (tag.IsComment) { |
---|
314 | if (tag.ClosingBracket != "-->") OnSyntaxError(brStart, brEnd, "'-->' expected"); |
---|
315 | } else if (tag.IsCData) { |
---|
316 | if (tag.ClosingBracket != "]]>") OnSyntaxError(brStart, brEnd, "']]>' expected"); |
---|
317 | } else if (tag.IsProcessingInstruction) { |
---|
318 | if (tag.ClosingBracket != "?>") OnSyntaxError(brStart, brEnd, "'?>' expected"); |
---|
319 | } else if (tag.IsUnknownBang) { |
---|
320 | if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected"); |
---|
321 | } else if (tag.IsDocumentType) { |
---|
322 | if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected"); |
---|
323 | } else { |
---|
324 | throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket)); |
---|
325 | } |
---|
326 | |
---|
327 | // Attribute name may not apper multiple times |
---|
328 | if (objects.Count > oldObjectCount) { |
---|
329 | // Move nested objects into tag.NestedObjects: |
---|
330 | tag.NestedObjects = new InternalObject[objects.Count - oldObjectCount]; |
---|
331 | objects.CopyTo(oldObjectCount, tag.NestedObjects, 0, tag.NestedObjects.Length); |
---|
332 | objects.RemoveRange(oldObjectCount, objects.Count - oldObjectCount); |
---|
333 | |
---|
334 | // Look for duplicate attributes: |
---|
335 | HashSet<string> attributeNames = new HashSet<string>(); |
---|
336 | foreach (var obj in tag.NestedObjects) { |
---|
337 | InternalAttribute attr = obj as InternalAttribute; |
---|
338 | if (attr != null && !attributeNames.Add(attr.Name)) { |
---|
339 | int attrStart = tagStart + attr.StartRelativeToParent; |
---|
340 | OnSyntaxError(attrStart, attrStart + attr.Name.Length, "Attribute with name '{0}' already exists", attr.Name); |
---|
341 | } |
---|
342 | } |
---|
343 | } |
---|
344 | |
---|
345 | EndInternalObject(frame); |
---|
346 | } |
---|
347 | #endregion |
---|
348 | |
---|
349 | #region Read DTD |
---|
350 | void ReadContentOfDTD() |
---|
351 | { |
---|
352 | int start = this.CurrentLocation; |
---|
353 | while (HasMoreData()) { |
---|
354 | TryMoveToNonWhiteSpace(); // Skip whitespace |
---|
355 | if (TryRead('\'')) TryMoveTo('\''); // Skip single quoted string TODO: Bug |
---|
356 | if (TryRead('\"')) TryMoveTo('\"'); // Skip single quoted string |
---|
357 | if (TryRead('[')) { // Start of nested infoset |
---|
358 | // Reading infoset |
---|
359 | while (HasMoreData()) { |
---|
360 | TryMoveToAnyOf('<', ']'); |
---|
361 | if (TryPeek('<')) { |
---|
362 | if (start != this.CurrentLocation) { // Two following tags |
---|
363 | MakeText(start, this.CurrentLocation); |
---|
364 | } |
---|
365 | ReadTag(); |
---|
366 | start = this.CurrentLocation; |
---|
367 | } |
---|
368 | if (TryPeek(']')) break; |
---|
369 | } |
---|
370 | } |
---|
371 | TryRead(']'); // End of nested infoset |
---|
372 | if (TryPeek('>')) break; // Proper closing |
---|
373 | if (TryPeek('<')) break; // Malformed XML |
---|
374 | TryMoveNext(); // Skip anything else |
---|
375 | } |
---|
376 | if (start != this.CurrentLocation) { |
---|
377 | MakeText(start, this.CurrentLocation); |
---|
378 | } |
---|
379 | } |
---|
380 | |
---|
381 | void MakeText(int start, int end) |
---|
382 | { |
---|
383 | Log.DebugAssert(end > start, "Empty text"); |
---|
384 | Log.DebugAssert(end == this.CurrentLocation, "end == current location"); |
---|
385 | |
---|
386 | InternalText text = new InternalText(); |
---|
387 | var frame = BeginInternalObject(text, start); |
---|
388 | text.Type = TextType.Other; |
---|
389 | text.Value = GetText(start, end); |
---|
390 | EndInternalObject(frame); |
---|
391 | } |
---|
392 | #endregion |
---|
393 | |
---|
394 | #region Read Brackets |
---|
395 | /// <summary> |
---|
396 | /// Reads any of the know opening brackets. (only full bracket) |
---|
397 | /// Context: "<" |
---|
398 | /// </summary> |
---|
399 | string ReadOpeningBracket() |
---|
400 | { |
---|
401 | // We are using a lot of string literals so that the memory instances are shared |
---|
402 | //int start = this.CurrentLocation; |
---|
403 | if (TryRead('<')) { |
---|
404 | if (TryRead('/')) { |
---|
405 | return "</"; |
---|
406 | } else if (TryRead('?')) { |
---|
407 | return "<?"; |
---|
408 | } else if (TryRead('!')) { |
---|
409 | if (TryRead("--")) { |
---|
410 | return "<!--"; |
---|
411 | } else if (TryRead("[CDATA[")) { |
---|
412 | return "<![CDATA["; |
---|
413 | } else { |
---|
414 | foreach (string dtdName in AXmlTag.DtdNames) { |
---|
415 | // the dtdName includes "<!" |
---|
416 | if (TryRead(dtdName.Remove(0, 2))) return dtdName; |
---|
417 | } |
---|
418 | return "<!"; |
---|
419 | } |
---|
420 | } else { |
---|
421 | return "<"; |
---|
422 | } |
---|
423 | } else { |
---|
424 | throw new InternalException("'<' expected"); |
---|
425 | } |
---|
426 | } |
---|
427 | |
---|
428 | /// <summary> |
---|
429 | /// Reads any of the know closing brackets. (only full bracket) |
---|
430 | /// Context: any |
---|
431 | /// </summary> |
---|
432 | bool TryReadClosingBracket(out string bracket) |
---|
433 | { |
---|
434 | // We are using a lot of string literals so that the memory instances are shared |
---|
435 | if (TryRead('>')) { |
---|
436 | bracket = ">"; |
---|
437 | } else if (TryRead("/>")) { |
---|
438 | bracket = "/>"; |
---|
439 | } else if (TryRead("?>")) { |
---|
440 | bracket = "?>"; |
---|
441 | } else if (TryRead("-->")) { |
---|
442 | bracket = "-->"; |
---|
443 | } else if (TryRead("]]>")) { |
---|
444 | bracket = "]]>"; |
---|
445 | } else { |
---|
446 | bracket = string.Empty; |
---|
447 | return false; |
---|
448 | } |
---|
449 | return true; |
---|
450 | } |
---|
451 | #endregion |
---|
452 | |
---|
453 | #region Attributes |
---|
454 | /// <summary> |
---|
455 | /// Context: name or "=\'\"" |
---|
456 | /// </summary> |
---|
457 | void ReadAttribute() |
---|
458 | { |
---|
459 | AssertHasMoreData(); |
---|
460 | |
---|
461 | InternalAttribute attr = new InternalAttribute(); |
---|
462 | var frame = BeginInternalObject(attr); |
---|
463 | |
---|
464 | // Read name |
---|
465 | string name; |
---|
466 | if (TryReadName(out name)) { |
---|
467 | if (!IsValidName(name)) { |
---|
468 | OnSyntaxError(this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); |
---|
469 | } |
---|
470 | } else { |
---|
471 | OnSyntaxError("Attribute name expected"); |
---|
472 | } |
---|
473 | attr.Name = name; |
---|
474 | |
---|
475 | // Read equals sign and surrounding whitespace |
---|
476 | int checkpoint = this.CurrentLocation; |
---|
477 | TryMoveToNonWhiteSpace(); |
---|
478 | if (TryRead('=')) { |
---|
479 | int chk2 = this.CurrentLocation; |
---|
480 | TryMoveToNonWhiteSpace(); |
---|
481 | if (!TryPeek('"') && !TryPeek('\'')) { |
---|
482 | // Do not read whitespace if quote does not follow |
---|
483 | GoBack(chk2); |
---|
484 | } |
---|
485 | attr.EqualsSignLength = this.CurrentLocation - checkpoint; |
---|
486 | } else { |
---|
487 | GoBack(checkpoint); |
---|
488 | OnSyntaxError("'=' expected"); |
---|
489 | attr.EqualsSignLength = 0; |
---|
490 | } |
---|
491 | |
---|
492 | // Read attribute value |
---|
493 | int start = this.CurrentLocation; |
---|
494 | char quoteChar = TryPeek('"') ? '"' : '\''; |
---|
495 | bool startsWithQuote; |
---|
496 | if (TryRead(quoteChar)) { |
---|
497 | startsWithQuote = true; |
---|
498 | int valueStart = this.CurrentLocation; |
---|
499 | TryMoveToAnyOf(quoteChar, '<'); |
---|
500 | if (TryRead(quoteChar)) { |
---|
501 | if (!TryPeekAnyOf(' ', '\t', '\n', '\r', '/', '>', '?')) { |
---|
502 | if (TryPeekPrevious('=', 2) || (TryPeekPrevious('=', 3) && TryPeekPrevious(' ', 2))) { |
---|
503 | // This actually most likely means that we are in the next attribute value |
---|
504 | GoBack(valueStart); |
---|
505 | ReadAttributeValue(quoteChar); |
---|
506 | if (TryRead(quoteChar)) { |
---|
507 | OnSyntaxError("White space or end of tag expected"); |
---|
508 | } else { |
---|
509 | OnSyntaxError("Quote {0} expected (or add whitespace after the following one)", quoteChar); |
---|
510 | } |
---|
511 | } else { |
---|
512 | OnSyntaxError("White space or end of tag expected"); |
---|
513 | } |
---|
514 | } |
---|
515 | } else { |
---|
516 | // '<' or end of file |
---|
517 | GoBack(valueStart); |
---|
518 | ReadAttributeValue(quoteChar); |
---|
519 | OnSyntaxError("Quote {0} expected", quoteChar); |
---|
520 | } |
---|
521 | } else { |
---|
522 | startsWithQuote = false; |
---|
523 | int valueStart = this.CurrentLocation; |
---|
524 | ReadAttributeValue(null); |
---|
525 | TryRead('\"'); |
---|
526 | TryRead('\''); |
---|
527 | if (valueStart == this.CurrentLocation) { |
---|
528 | OnSyntaxError("Attribute value expected"); |
---|
529 | } else { |
---|
530 | OnSyntaxError(valueStart, this.CurrentLocation, "Attribute value must be quoted"); |
---|
531 | } |
---|
532 | } |
---|
533 | string val = GetText(start, this.CurrentLocation); |
---|
534 | val = Unquote(val); |
---|
535 | attr.Value = Dereference(val, startsWithQuote ? start + 1 : start); |
---|
536 | |
---|
537 | EndInternalObject(frame); |
---|
538 | } |
---|
539 | |
---|
540 | /// <summary> |
---|
541 | /// Read everything up to quote (excluding), opening/closing tag or attribute signature |
---|
542 | /// </summary> |
---|
543 | void ReadAttributeValue(char? quote) |
---|
544 | { |
---|
545 | while (HasMoreData()) { |
---|
546 | // What is next? |
---|
547 | int start = this.CurrentLocation; |
---|
548 | TryMoveToNonWhiteSpace(); // Read white space (if any) |
---|
549 | if (quote.HasValue) { |
---|
550 | if (TryPeek(quote.Value)) return; |
---|
551 | } else { |
---|
552 | if (TryPeek('"') || TryPeek('\'')) return; |
---|
553 | } |
---|
554 | // Opening/closing tag |
---|
555 | string endBr; |
---|
556 | if (TryPeek('<') || TryReadClosingBracket(out endBr)) { |
---|
557 | GoBack(start); |
---|
558 | return; |
---|
559 | } |
---|
560 | // Try reading attribute signature |
---|
561 | if (TryReadName()) { |
---|
562 | int nameEnd = this.CurrentLocation; |
---|
563 | if (TryMoveToNonWhiteSpace() && TryRead("=") && |
---|
564 | TryMoveToNonWhiteSpace() && TryPeekAnyOf('"', '\'')) |
---|
565 | { |
---|
566 | // Start of attribute. Great |
---|
567 | GoBack(start); |
---|
568 | return; // Done |
---|
569 | } else { |
---|
570 | // Just some gargabe - make it part of the value |
---|
571 | GoBack(nameEnd); |
---|
572 | continue; // Read more |
---|
573 | } |
---|
574 | } |
---|
575 | TryMoveNext(); // Accept everyting else |
---|
576 | } |
---|
577 | } |
---|
578 | |
---|
579 | /// <summary> Remove quoting from the given string </summary> |
---|
580 | static string Unquote(string quoted) |
---|
581 | { |
---|
582 | if (string.IsNullOrEmpty(quoted)) return string.Empty; |
---|
583 | char first = quoted[0]; |
---|
584 | if (quoted.Length == 1) return (first == '"' || first == '\'') ? string.Empty : quoted; |
---|
585 | char last = quoted[quoted.Length - 1]; |
---|
586 | if (first == '"' || first == '\'') { |
---|
587 | if (first == last) { |
---|
588 | // Remove both quotes |
---|
589 | return quoted.Substring(1, quoted.Length - 2); |
---|
590 | } else { |
---|
591 | // Remove first quote |
---|
592 | return quoted.Remove(0, 1); |
---|
593 | } |
---|
594 | } else { |
---|
595 | if (last == '"' || last == '\'') { |
---|
596 | // Remove last quote |
---|
597 | return quoted.Substring(0, quoted.Length - 1); |
---|
598 | } else { |
---|
599 | // Keep whole string |
---|
600 | return quoted; |
---|
601 | } |
---|
602 | } |
---|
603 | } |
---|
604 | #endregion |
---|
605 | |
---|
606 | #region Text |
---|
607 | /// <summary> |
---|
608 | /// Reads text. |
---|
609 | /// </summary> |
---|
610 | void ReadText(TextType type) |
---|
611 | { |
---|
612 | var text = new InternalText(); |
---|
613 | var frame = BeginInternalObject(text); |
---|
614 | text.Type = type; |
---|
615 | |
---|
616 | int start = this.CurrentLocation; |
---|
617 | int fragmentEnd = inputLength; |
---|
618 | |
---|
619 | // Whitespace would be skipped anyway by any operation |
---|
620 | TryMoveToNonWhiteSpace(fragmentEnd); |
---|
621 | int wsEnd = this.CurrentLocation; |
---|
622 | |
---|
623 | // Try move to the terminator given by the context |
---|
624 | if (type == TextType.WhiteSpace) { |
---|
625 | TryMoveToNonWhiteSpace(fragmentEnd); |
---|
626 | } else if (type == TextType.CharacterData) { |
---|
627 | while(true) { |
---|
628 | if (!TryMoveToAnyOf(new char[] {'<', ']'}, fragmentEnd)) break; // End of fragment |
---|
629 | if (TryPeek('<')) break; |
---|
630 | if (TryPeek(']')) { |
---|
631 | if (TryPeek("]]>")) { |
---|
632 | OnSyntaxError(this.CurrentLocation, this.CurrentLocation + 3, "']]>' is not allowed in text"); |
---|
633 | } |
---|
634 | TryMoveNext(); |
---|
635 | continue; |
---|
636 | } |
---|
637 | throw new InternalException("Infinite loop"); |
---|
638 | } |
---|
639 | } else if (type == TextType.Comment) { |
---|
640 | // Do not report too many errors |
---|
641 | bool errorReported = false; |
---|
642 | while(true) { |
---|
643 | if (!TryMoveTo('-', fragmentEnd)) break; // End of fragment |
---|
644 | if (TryPeek("-->")) break; |
---|
645 | if (TryPeek("--") && !errorReported) { |
---|
646 | OnSyntaxError(this.CurrentLocation, this.CurrentLocation + 2, "'--' is not allowed in comment"); |
---|
647 | errorReported = true; |
---|
648 | } |
---|
649 | TryMoveNext(); |
---|
650 | } |
---|
651 | } else if (type == TextType.CData) { |
---|
652 | while(true) { |
---|
653 | // We can not use use TryMoveTo("]]>", fragmentEnd) because it may incorectly accept "]" at the end of fragment |
---|
654 | if (!TryMoveTo(']', fragmentEnd)) break; // End of fragment |
---|
655 | if (TryPeek("]]>")) break; |
---|
656 | TryMoveNext(); |
---|
657 | } |
---|
658 | } else if (type == TextType.ProcessingInstruction) { |
---|
659 | while(true) { |
---|
660 | if (!TryMoveTo('?', fragmentEnd)) break; // End of fragment |
---|
661 | if (TryPeek("?>")) break; |
---|
662 | TryMoveNext(); |
---|
663 | } |
---|
664 | } else if (type == TextType.UnknownBang) { |
---|
665 | TryMoveToAnyOf(new char[] {'<', '>'}, fragmentEnd); |
---|
666 | } else { |
---|
667 | throw new InternalException("Unknown type " + type); |
---|
668 | } |
---|
669 | |
---|
670 | text.ContainsOnlyWhitespace = (wsEnd == this.CurrentLocation); |
---|
671 | |
---|
672 | string escapedValue = GetText(start, this.CurrentLocation); |
---|
673 | if (type == TextType.CharacterData) { |
---|
674 | text.Value = Dereference(escapedValue, start); |
---|
675 | } else { |
---|
676 | text.Value = escapedValue; |
---|
677 | } |
---|
678 | text.Value = GetCachedString(text.Value); |
---|
679 | |
---|
680 | EndInternalObject(frame, storeNewObject: this.CurrentLocation > start); |
---|
681 | } |
---|
682 | #endregion |
---|
683 | |
---|
684 | #region Dereference |
---|
685 | const int maxEntityLength = 16; // The longest built-in one is 10 ("") |
---|
686 | |
---|
687 | string Dereference(string text, int textLocation) |
---|
688 | { |
---|
689 | StringBuilder sb = null; // The dereferenced text so far (all up to 'curr') |
---|
690 | int curr = 0; |
---|
691 | while(true) { |
---|
692 | // Reached end of input |
---|
693 | if (curr == text.Length) { |
---|
694 | if (sb != null) { |
---|
695 | return sb.ToString(); |
---|
696 | } else { |
---|
697 | return text; |
---|
698 | } |
---|
699 | } |
---|
700 | |
---|
701 | // Try to find reference |
---|
702 | int start = text.IndexOf('&', curr); |
---|
703 | |
---|
704 | // No more references found |
---|
705 | if (start == -1) { |
---|
706 | if (sb != null) { |
---|
707 | sb.Append(text, curr, text.Length - curr); // Add rest |
---|
708 | return sb.ToString(); |
---|
709 | } else { |
---|
710 | return text; |
---|
711 | } |
---|
712 | } |
---|
713 | |
---|
714 | // Append text before the enitiy reference |
---|
715 | if (sb == null) sb = new StringBuilder(text.Length); |
---|
716 | sb.Append(text, curr, start - curr); |
---|
717 | curr = start; |
---|
718 | |
---|
719 | // Process the entity |
---|
720 | int errorLoc = textLocation + sb.Length; |
---|
721 | |
---|
722 | // Find entity name |
---|
723 | int end = text.IndexOfAny(new char[] {'&', ';'}, start + 1, Math.Min(maxEntityLength, text.Length - (start + 1))); |
---|
724 | if (end == -1 || text[end] == '&') { |
---|
725 | // Not found |
---|
726 | OnSyntaxError(errorLoc, errorLoc + 1, "Entity reference must be terminated with ';'"); |
---|
727 | // Keep '&' |
---|
728 | sb.Append('&'); |
---|
729 | curr++; |
---|
730 | continue; // Restart and next character location |
---|
731 | } |
---|
732 | string name = text.Substring(start + 1, end - (start + 1)); |
---|
733 | |
---|
734 | // Resolve the name |
---|
735 | string replacement; |
---|
736 | if (name.Length == 0) { |
---|
737 | replacement = null; |
---|
738 | OnSyntaxError(errorLoc + 1, errorLoc + 1, "Entity name expected"); |
---|
739 | } else if (name == "amp") { |
---|
740 | replacement = "&"; |
---|
741 | } else if (name == "lt") { |
---|
742 | replacement = "<"; |
---|
743 | } else if (name == "gt") { |
---|
744 | replacement = ">"; |
---|
745 | } else if (name == "apos") { |
---|
746 | replacement = "'"; |
---|
747 | } else if (name == "quot") { |
---|
748 | replacement = "\""; |
---|
749 | } else if (name.Length > 0 && name[0] == '#') { |
---|
750 | int num; |
---|
751 | if (name.Length > 1 && name[1] == 'x') { |
---|
752 | if (!int.TryParse(name.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture.NumberFormat, out num)) { |
---|
753 | num = -1; |
---|
754 | OnSyntaxError(errorLoc + 3, errorLoc + 1 + name.Length, "Hexadecimal code of unicode character expected"); |
---|
755 | } |
---|
756 | } else { |
---|
757 | if (!int.TryParse(name.Substring(1), NumberStyles.None, CultureInfo.InvariantCulture.NumberFormat, out num)) { |
---|
758 | num = -1; |
---|
759 | OnSyntaxError(errorLoc + 2, errorLoc + 1 + name.Length, "Numeric code of unicode character expected"); |
---|
760 | } |
---|
761 | } |
---|
762 | if (num != -1) { |
---|
763 | try { |
---|
764 | replacement = char.ConvertFromUtf32(num); |
---|
765 | } catch (ArgumentOutOfRangeException) { |
---|
766 | replacement = null; |
---|
767 | OnSyntaxError(errorLoc + 2, errorLoc + 1 + name.Length, "Invalid unicode character U+{0:X} ({0})", num); |
---|
768 | } |
---|
769 | } else { |
---|
770 | replacement = null; |
---|
771 | } |
---|
772 | } else if (!IsValidName(name)) { |
---|
773 | replacement = null; |
---|
774 | OnSyntaxError(errorLoc + 1, errorLoc + 1, "Invalid entity name"); |
---|
775 | } else { |
---|
776 | replacement = null; |
---|
777 | if (tagSoupParser.UnknownEntityReferenceIsError) { |
---|
778 | OnSyntaxError(errorLoc, errorLoc + 1 + name.Length + 1, "Unknown entity reference '{0}'", name); |
---|
779 | } |
---|
780 | } |
---|
781 | |
---|
782 | // Append the replacement to output |
---|
783 | if (replacement != null) { |
---|
784 | sb.Append(replacement); |
---|
785 | } else { |
---|
786 | sb.Append('&'); |
---|
787 | sb.Append(name); |
---|
788 | sb.Append(';'); |
---|
789 | } |
---|
790 | curr = end + 1; |
---|
791 | continue; |
---|
792 | } |
---|
793 | } |
---|
794 | #endregion |
---|
795 | |
---|
796 | #region Syntax Errors |
---|
797 | List<InternalSyntaxError> syntaxErrors = new List<InternalSyntaxError>(); |
---|
798 | |
---|
799 | InternalSyntaxError[] GetSyntaxErrors() |
---|
800 | { |
---|
801 | if (syntaxErrors.Count > 0) { |
---|
802 | var arr = syntaxErrors.ToArray(); |
---|
803 | syntaxErrors.Clear(); |
---|
804 | return arr; |
---|
805 | } else { |
---|
806 | return null; |
---|
807 | } |
---|
808 | } |
---|
809 | |
---|
810 | void OnSyntaxError(string message, params object[] args) |
---|
811 | { |
---|
812 | OnSyntaxError(this.CurrentLocation, this.CurrentLocation + 1, message, args); |
---|
813 | } |
---|
814 | |
---|
815 | void OnSyntaxError(int start, int end, string message, params object[] args) |
---|
816 | { |
---|
817 | if (end <= start) end = start + 1; |
---|
818 | string formattedMessage = string.Format(CultureInfo.InvariantCulture, message, args); |
---|
819 | Log.WriteLine("Syntax error ({0}-{1}): {2}", start, end, formattedMessage); |
---|
820 | syntaxErrors.Add(new InternalSyntaxError(start - internalObjectStartPosition, end - internalObjectStartPosition, formattedMessage)); |
---|
821 | } |
---|
822 | #endregion |
---|
823 | |
---|
824 | #region Helper functions |
---|
825 | internal static bool IsValidName(string name) |
---|
826 | { |
---|
827 | try { |
---|
828 | System.Xml.XmlConvert.VerifyName(name); |
---|
829 | return true; |
---|
830 | } catch (System.Xml.XmlException) { |
---|
831 | return false; |
---|
832 | } |
---|
833 | } |
---|
834 | #endregion |
---|
835 | } |
---|
836 | } |
---|