Free cookie consent management tool by TermsFeed Policy Generator

source: stable/HeuristicLab.ExtLibs/HeuristicLab.NRefactory/5.5.0/NRefactory.Xml-5.5.0/TagReader.cs @ 17246

Last change on this file since 17246 was 11804, checked in by jkarder, 9 years ago

#2077:

  • added fancy xml documentation
  • fixed configurations and plattforms
File size: 26.7 KB
Line 
1// Copyright (c) 2009-2013 AlphaSierraPapa for the SharpDevelop Team
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy of this
4// software and associated documentation files (the "Software"), to deal in the Software
5// without restriction, including without limitation the rights to use, copy, modify, merge,
6// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
7// to whom the Software is furnished to do so, subject to the following conditions:
8//
9// The above copyright notice and this permission notice shall be included in all copies or
10// substantial portions of the Software.
11//
12// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
13// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
14// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
15// FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
16// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
17// DEALINGS IN THE SOFTWARE.
18
19using System;
20using System.Collections.Generic;
21using System.Diagnostics;
22using System.Globalization;
23using System.Linq;
24using System.Text;
25using System.Threading;
26using ICSharpCode.NRefactory.Editor;
27
28namespace ICSharpCode.NRefactory.Xml
29{
30  class TagReader : TokenReader
31  {
32    readonly AXmlParser tagSoupParser;
33    readonly Stack<string> elementNameStack;
34   
35    public TagReader(AXmlParser tagSoupParser, ITextSource input, bool collapseProperlyNestedElements) : base(input)
36    {
37      this.tagSoupParser = tagSoupParser;
38      if (collapseProperlyNestedElements)
39        elementNameStack = new Stack<string>();
40    }
41   
42    public List<InternalObject> ReadAllObjects(CancellationToken cancellationToken)
43    {
44      while (HasMoreData()) {
45        cancellationToken.ThrowIfCancellationRequested();
46        ReadObject();
47      }
48      return objects;
49    }
50   
51    public List<InternalObject> ReadAllObjectsIncremental(InternalObject[] oldObjects, List<UnchangedSegment> reuseMap, CancellationToken cancellationToken)
52    {
53      ObjectIterator oldObjectIterator = new ObjectIterator(oldObjects);
54      int reuseMapIndex = 0;
55      while (reuseMapIndex < reuseMap.Count) {
56        var reuseEntry = reuseMap[reuseMapIndex];
57        while (this.CurrentLocation < reuseEntry.NewOffset) {
58          cancellationToken.ThrowIfCancellationRequested();
59          ReadObject();
60        }
61        if (this.CurrentLocation >= reuseEntry.NewOffset + reuseEntry.Length) {
62          reuseMapIndex++;
63          continue;
64        }
65        Debug.Assert(reuseEntry.NewOffset <= this.CurrentLocation && this.CurrentLocation < reuseEntry.NewOffset + reuseEntry.Length);
66        // reuse the nodes within this reuseEntry starting at oldOffset:
67        int oldOffset = this.CurrentLocation - reuseEntry.NewOffset + reuseEntry.OldOffset;
68        // seek to oldOffset in the oldObjects array:
69        oldObjectIterator.SkipTo(oldOffset);
70        if (oldObjectIterator.CurrentPosition == oldOffset) {
71          // reuse old objects within this reuse entry:
72          int reuseEnd = reuseEntry.OldOffset + reuseEntry.Length;
73          while (oldObjectIterator.CurrentObject != null && oldObjectIterator.CurrentPosition + oldObjectIterator.CurrentObject.LengthTouched < reuseEnd) {
74            StoreObject(oldObjectIterator.CurrentObject);
75            Skip(oldObjectIterator.CurrentObject.Length);
76            oldObjectIterator.MoveNext();
77          }
78          reuseMapIndex++; // go to next re-use map
79        } else {
80          // We are in a region where old objects are available, but aren't aligned correctly.
81          // Don't skip this reuse entry, and read a single object so that we can re-align
82          ReadObject();
83        }
84      }
85      while (HasMoreData()) {
86        cancellationToken.ThrowIfCancellationRequested();
87        ReadObject();
88      }
89      return objects;
90    }
91   
92    void StoreObject(InternalObject obj)
93    {
94      objects.Add(obj);
95     
96      // Now combine properly-nested elements:
97      if (elementNameStack == null)
98        return; // parsing tag soup
99      InternalTag tag = obj as InternalTag;
100      if (tag == null)
101        return;
102      if (tag.IsEmptyTag) {
103        // the tag is its own element
104        objects[objects.Count - 1] = new InternalElement(tag) {
105          Length = tag.Length,
106          LengthTouched = tag.LengthTouched,
107          IsPropertyNested = true,
108          StartRelativeToParent = tag.StartRelativeToParent,
109          NestedObjects = new [] { tag.SetStartRelativeToParent(0) }
110        };
111      } else if (tag.IsStartTag) {
112        elementNameStack.Push(tag.Name);
113      } else if (tag.IsEndTag && elementNameStack.Count > 0) {
114        // Now look for the start element:
115        int startIndex = objects.Count - 2;
116        bool ok = false;
117        string expectedName = elementNameStack.Pop();
118        if (tag.Name == expectedName) {
119          while (startIndex > 0) {
120            var startTag = objects[startIndex] as InternalTag;
121            if (startTag != null) {
122              if (startTag.IsStartTag) {
123                ok = (startTag.Name == expectedName);
124                break;
125              } else if (startTag.IsEndTag) {
126                break;
127              }
128            }
129            startIndex--;
130          }
131        }
132        if (ok) {
133          // We found a correct nesting, let's create an element:
134          InternalObject[] nestedObjects = new InternalObject[objects.Count - startIndex];
135          int oldStartRelativeToParent = objects[startIndex].StartRelativeToParent;
136          int pos = 0;
137          int maxLengthTouched = 0;
138          for (int i = 0; i < nestedObjects.Length; i++) {
139            nestedObjects[i] = objects[startIndex + i].SetStartRelativeToParent(pos);
140            maxLengthTouched = Math.Max(maxLengthTouched, pos + nestedObjects[i].LengthTouched);
141            pos += nestedObjects[i].Length;
142          }
143          objects.RemoveRange(startIndex, nestedObjects.Length);
144          objects.Add(
145            new InternalElement((InternalTag)nestedObjects[0]) {
146              HasEndTag = true,
147              IsPropertyNested = true,
148              Length = pos,
149              LengthTouched = maxLengthTouched,
150              StartRelativeToParent = oldStartRelativeToParent,
151              NestedObjects = nestedObjects
152            });
153        } else {
154          // Mismatched name - the nesting isn't properly;
155          // clear the whole stack so that none of the currently open elements are closed as properly-nested.
156          elementNameStack.Clear();
157        }
158      }
159    }
160   
161    /// <summary>
162    /// Reads one or more objects.
163    /// </summary>
164    void ReadObject()
165    {
166      if (TryPeek('<')) {
167        ReadTag();
168      } else {
169        ReadText(TextType.CharacterData);
170      }
171    }
172   
173    #region BeginInternalObject / EndInternalObject
174    List<InternalObject> objects = new List<InternalObject>();
175    int internalObjectStartPosition;
176   
177    int CurrentRelativeLocation {
178      get { return CurrentLocation - internalObjectStartPosition; }
179    }
180   
181    struct InternalObjectFrame
182    {
183      public readonly InternalObject InternalObject;
184      public readonly int ParentStartPosition;
185     
186      public InternalObjectFrame(InternalObject internalObject, int parentStartPosition)
187      {
188        this.InternalObject = internalObject;
189        this.ParentStartPosition = parentStartPosition;
190      }
191    }
192   
193    InternalObjectFrame BeginInternalObject(InternalObject internalObject)
194    {
195      return BeginInternalObject(internalObject, this.CurrentLocation);
196    }
197   
198    InternalObjectFrame BeginInternalObject(InternalObject internalObject, int beginLocation)
199    {
200      internalObject.StartRelativeToParent = beginLocation - internalObjectStartPosition;
201     
202      var frame = new InternalObjectFrame(internalObject, internalObjectStartPosition);
203     
204      internalObjectStartPosition = CurrentLocation;
205      return frame;
206    }
207   
208    void EndInternalObject(InternalObjectFrame frame, bool storeNewObject = true)
209    {
210      frame.InternalObject.Length = this.CurrentLocation - internalObjectStartPosition;
211      frame.InternalObject.LengthTouched = this.MaxTouchedLocation - internalObjectStartPosition;
212      frame.InternalObject.SyntaxErrors = GetSyntaxErrors();
213      if (storeNewObject)
214        StoreObject(frame.InternalObject);
215      internalObjectStartPosition = frame.ParentStartPosition;
216    }
217    #endregion
218   
219    #region Read Tag
220    /// <summary>
221    /// Context: "&lt;"
222    /// </summary>
223    void ReadTag()
224    {
225      AssertHasMoreData();
226     
227      int tagStart = this.CurrentLocation;
228      InternalTag tag = new InternalTag();
229      var frame = BeginInternalObject(tag);
230     
231      // Read the opening bracket
232      // It identifies the type of tag and parsing behavior for the rest of it
233      tag.OpeningBracket = ReadOpeningBracket();
234     
235      if (tag.IsUnknownBang && !TryPeekWhiteSpace())
236        OnSyntaxError(tagStart, this.CurrentLocation, "Unknown tag");
237     
238      if (tag.IsStartOrEmptyTag || tag.IsEndTag || tag.IsProcessingInstruction) {
239        // Read the name
240        TryMoveToNonWhiteSpace();
241        tag.RelativeNameStart = this.CurrentRelativeLocation;
242        string name;
243        if (TryReadName(out name)) {
244          if (!IsValidName(name)) {
245            OnSyntaxError(this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name);
246          }
247        } else {
248          OnSyntaxError("Element name expected");
249        }
250        tag.Name = name;
251      } else {
252        tag.Name = string.Empty;
253      }
254     
255      bool isXmlDeclr = tag.Name == "xml" && tag.IsProcessingInstruction;
256      int oldObjectCount = objects.Count;
257     
258      if (tag.IsStartOrEmptyTag || tag.IsEndTag || isXmlDeclr) {
259        // Read attributes for the tag
260        while (HasMoreData()) {
261          // Chech for all forbiden 'name' characters first - see ReadName
262          TryMoveToNonWhiteSpace();
263          if (TryPeek('<')) break;
264          string endBr;
265          int endBrStart = this.CurrentLocation; // Just peek
266          if (TryReadClosingBracket(out endBr)) {  // End tag
267            GoBack(endBrStart);
268            break;
269          }
270         
271          // We have "=\'\"" or name - read attribute
272          int attrStartOffset = this.CurrentLocation;
273          ReadAttribute();
274          if (tag.IsEndTag)
275            OnSyntaxError(attrStartOffset, this.CurrentLocation, "Attribute not allowed in end tag.");
276        }
277      } else if (tag.IsDocumentType) {
278        ReadContentOfDTD();
279      } else {
280        int start = this.CurrentLocation;
281        if (tag.IsComment) {
282          ReadText(TextType.Comment);
283        } else if (tag.IsCData) {
284          ReadText(TextType.CData);
285        } else if (tag.IsProcessingInstruction) {
286          ReadText(TextType.ProcessingInstruction);
287        } else if (tag.IsUnknownBang) {
288          ReadText(TextType.UnknownBang);
289        } else {
290          throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket));
291        }
292        // Backtrack at complete start
293        if (IsEndOfFile() || (tag.IsUnknownBang && TryPeek('<'))) {
294          GoBack(start);
295          objects.RemoveRange(oldObjectCount, objects.Count - oldObjectCount);
296        }
297      }
298     
299      // Read closing bracket
300      string bracket;
301      TryReadClosingBracket(out bracket);
302      tag.ClosingBracket = bracket;
303     
304      // Error check
305      int brStart = this.CurrentLocation - (tag.ClosingBracket ?? string.Empty).Length;
306      int brEnd = this.CurrentLocation;
307      if (tag.Name == null) {
308        // One error was reported already
309      } else if (tag.IsStartOrEmptyTag) {
310        if (tag.ClosingBracket != ">" && tag.ClosingBracket != "/>") OnSyntaxError(brStart, brEnd, "'>' or '/>' expected");
311      } else if (tag.IsEndTag) {
312        if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected");
313      } else if (tag.IsComment) {
314        if (tag.ClosingBracket != "-->") OnSyntaxError(brStart, brEnd, "'-->' expected");
315      } else if (tag.IsCData) {
316        if (tag.ClosingBracket != "]]>") OnSyntaxError(brStart, brEnd, "']]>' expected");
317      } else if (tag.IsProcessingInstruction) {
318        if (tag.ClosingBracket != "?>") OnSyntaxError(brStart, brEnd, "'?>' expected");
319      } else if (tag.IsUnknownBang) {
320        if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected");
321      } else if (tag.IsDocumentType) {
322        if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected");
323      } else {
324        throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket));
325      }
326     
327      // Attribute name may not apper multiple times
328      if (objects.Count > oldObjectCount) {
329        // Move nested objects into tag.NestedObjects:
330        tag.NestedObjects = new InternalObject[objects.Count - oldObjectCount];
331        objects.CopyTo(oldObjectCount, tag.NestedObjects, 0, tag.NestedObjects.Length);
332        objects.RemoveRange(oldObjectCount, objects.Count - oldObjectCount);
333       
334        // Look for duplicate attributes:
335        HashSet<string> attributeNames = new HashSet<string>();
336        foreach (var obj in tag.NestedObjects) {
337          InternalAttribute attr = obj as InternalAttribute;
338          if (attr != null && !attributeNames.Add(attr.Name)) {
339            int attrStart = tagStart + attr.StartRelativeToParent;
340            OnSyntaxError(attrStart, attrStart + attr.Name.Length, "Attribute with name '{0}' already exists", attr.Name);
341          }
342        }
343      }
344     
345      EndInternalObject(frame);
346    }
347    #endregion
348   
349    #region Read DTD
350    void ReadContentOfDTD()
351    {
352      int start = this.CurrentLocation;
353      while (HasMoreData()) {
354        TryMoveToNonWhiteSpace();            // Skip whitespace
355        if (TryRead('\'')) TryMoveTo('\'');  // Skip single quoted string TODO: Bug
356        if (TryRead('\"')) TryMoveTo('\"');  // Skip single quoted string
357        if (TryRead('[')) {                  // Start of nested infoset
358          // Reading infoset
359          while (HasMoreData()) {
360            TryMoveToAnyOf('<', ']');
361            if (TryPeek('<')) {
362              if (start != this.CurrentLocation) {  // Two following tags
363                MakeText(start, this.CurrentLocation);
364              }
365              ReadTag();
366              start = this.CurrentLocation;
367            }
368            if (TryPeek(']')) break;
369          }
370        }
371        TryRead(']');                        // End of nested infoset
372        if (TryPeek('>')) break;             // Proper closing
373        if (TryPeek('<')) break;             // Malformed XML
374        TryMoveNext();                       // Skip anything else
375      }
376      if (start != this.CurrentLocation) {
377        MakeText(start, this.CurrentLocation);
378      }
379    }
380   
381    void MakeText(int start, int end)
382    {
383      Log.DebugAssert(end > start, "Empty text");
384      Log.DebugAssert(end == this.CurrentLocation, "end == current location");
385     
386      InternalText text = new InternalText();
387      var frame = BeginInternalObject(text, start);
388      text.Type = TextType.Other;
389      text.Value = GetText(start, end);
390      EndInternalObject(frame);
391    }
392    #endregion
393   
394    #region Read Brackets
395    /// <summary>
396    /// Reads any of the know opening brackets.  (only full bracket)
397    /// Context: "&lt;"
398    /// </summary>
399    string ReadOpeningBracket()
400    {
401      // We are using a lot of string literals so that the memory instances are shared
402      //int start = this.CurrentLocation;
403      if (TryRead('<')) {
404        if (TryRead('/')) {
405          return "</";
406        } else if (TryRead('?')) {
407          return "<?";
408        } else if (TryRead('!')) {
409          if (TryRead("--")) {
410            return "<!--";
411          } else if (TryRead("[CDATA[")) {
412            return "<![CDATA[";
413          } else {
414            foreach (string dtdName in AXmlTag.DtdNames) {
415              // the dtdName includes "<!"
416              if (TryRead(dtdName.Remove(0, 2))) return dtdName;
417            }
418            return "<!";
419          }
420        } else {
421          return "<";
422        }
423      } else {
424        throw new InternalException("'<' expected");
425      }
426    }
427   
428    /// <summary>
429    /// Reads any of the know closing brackets.  (only full bracket)
430    /// Context: any
431    /// </summary>
432    bool TryReadClosingBracket(out string bracket)
433    {
434      // We are using a lot of string literals so that the memory instances are shared
435      if (TryRead('>')) {
436        bracket = ">";
437      } else  if (TryRead("/>")) {
438        bracket = "/>";
439      } else  if (TryRead("?>")) {
440        bracket = "?>";
441      } else if (TryRead("-->")) {
442        bracket = "-->";
443      } else if (TryRead("]]>")) {
444        bracket = "]]>";
445      } else {
446        bracket = string.Empty;
447        return false;
448      }
449      return true;
450    }
451    #endregion
452   
453    #region Attributes
454    /// <summary>
455    /// Context: name or "=\'\""
456    /// </summary>
457    void ReadAttribute()
458    {
459      AssertHasMoreData();
460     
461      InternalAttribute attr = new InternalAttribute();
462      var frame = BeginInternalObject(attr);
463     
464      // Read name
465      string name;
466      if (TryReadName(out name)) {
467        if (!IsValidName(name)) {
468          OnSyntaxError(this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name);
469        }
470      } else {
471        OnSyntaxError("Attribute name expected");
472      }
473      attr.Name = name;
474     
475      // Read equals sign and surrounding whitespace
476      int checkpoint = this.CurrentLocation;
477      TryMoveToNonWhiteSpace();
478      if (TryRead('=')) {
479        int chk2 = this.CurrentLocation;
480        TryMoveToNonWhiteSpace();
481        if (!TryPeek('"') && !TryPeek('\'')) {
482          // Do not read whitespace if quote does not follow
483          GoBack(chk2);
484        }
485        attr.EqualsSignLength = this.CurrentLocation - checkpoint;
486      } else {
487        GoBack(checkpoint);
488        OnSyntaxError("'=' expected");
489        attr.EqualsSignLength = 0;
490      }
491     
492      // Read attribute value
493      int start = this.CurrentLocation;
494      char quoteChar = TryPeek('"') ? '"' : '\'';
495      bool startsWithQuote;
496      if (TryRead(quoteChar)) {
497        startsWithQuote = true;
498        int valueStart = this.CurrentLocation;
499        TryMoveToAnyOf(quoteChar, '<');
500        if (TryRead(quoteChar)) {
501          if (!TryPeekAnyOf(' ', '\t', '\n', '\r', '/', '>', '?')) {
502            if (TryPeekPrevious('=', 2) || (TryPeekPrevious('=', 3) && TryPeekPrevious(' ', 2))) {
503              // This actually most likely means that we are in the next attribute value
504              GoBack(valueStart);
505              ReadAttributeValue(quoteChar);
506              if (TryRead(quoteChar)) {
507                OnSyntaxError("White space or end of tag expected");
508              } else {
509                OnSyntaxError("Quote {0} expected (or add whitespace after the following one)", quoteChar);
510              }
511            } else {
512              OnSyntaxError("White space or end of tag expected");
513            }
514          }
515        } else {
516          // '<' or end of file
517          GoBack(valueStart);
518          ReadAttributeValue(quoteChar);
519          OnSyntaxError("Quote {0} expected", quoteChar);
520        }
521      } else {
522        startsWithQuote = false;
523        int valueStart = this.CurrentLocation;
524        ReadAttributeValue(null);
525        TryRead('\"');
526        TryRead('\'');
527        if (valueStart == this.CurrentLocation) {
528          OnSyntaxError("Attribute value expected");
529        } else {
530          OnSyntaxError(valueStart, this.CurrentLocation, "Attribute value must be quoted");
531        }
532      }
533      string val = GetText(start, this.CurrentLocation);
534      val = Unquote(val);
535      attr.Value = Dereference(val, startsWithQuote ? start + 1 : start);
536     
537      EndInternalObject(frame);
538    }
539   
540    /// <summary>
541    /// Read everything up to quote (excluding), opening/closing tag or attribute signature
542    /// </summary>
543    void ReadAttributeValue(char? quote)
544    {
545      while (HasMoreData()) {
546        // What is next?
547        int start = this.CurrentLocation;
548        TryMoveToNonWhiteSpace();  // Read white space (if any)
549        if (quote.HasValue) {
550          if (TryPeek(quote.Value)) return;
551        } else {
552          if (TryPeek('"') || TryPeek('\'')) return;
553        }
554        // Opening/closing tag
555        string endBr;
556        if (TryPeek('<') || TryReadClosingBracket(out endBr)) {
557          GoBack(start);
558          return;
559        }
560        // Try reading attribute signature
561        if (TryReadName()) {
562          int nameEnd = this.CurrentLocation;
563          if (TryMoveToNonWhiteSpace() && TryRead("=") &&
564              TryMoveToNonWhiteSpace() && TryPeekAnyOf('"', '\''))
565          {
566            // Start of attribute.  Great
567            GoBack(start);
568            return;  // Done
569          } else {
570            // Just some gargabe - make it part of the value
571            GoBack(nameEnd);
572            continue;  // Read more
573          }
574        }
575        TryMoveNext(); // Accept everyting else
576      }
577    }
578   
579    /// <summary> Remove quoting from the given string </summary>
580    static string Unquote(string quoted)
581    {
582      if (string.IsNullOrEmpty(quoted)) return string.Empty;
583      char first = quoted[0];
584      if (quoted.Length == 1) return (first == '"' || first == '\'') ? string.Empty : quoted;
585      char last  = quoted[quoted.Length - 1];
586      if (first == '"' || first == '\'') {
587        if (first == last) {
588          // Remove both quotes
589          return quoted.Substring(1, quoted.Length - 2);
590        } else {
591          // Remove first quote
592          return quoted.Remove(0, 1);
593        }
594      } else {
595        if (last == '"' || last == '\'') {
596          // Remove last quote
597          return quoted.Substring(0, quoted.Length - 1);
598        } else {
599          // Keep whole string
600          return quoted;
601        }
602      }
603    }
604    #endregion
605   
606    #region Text
607    /// <summary>
608    /// Reads text.
609    /// </summary>
610    void ReadText(TextType type)
611    {
612      var text = new InternalText();
613      var frame = BeginInternalObject(text);
614      text.Type = type;
615     
616      int start = this.CurrentLocation;
617      int fragmentEnd = inputLength;
618     
619      // Whitespace would be skipped anyway by any operation
620      TryMoveToNonWhiteSpace(fragmentEnd);
621      int wsEnd = this.CurrentLocation;
622     
623      // Try move to the terminator given by the context
624      if (type == TextType.WhiteSpace) {
625        TryMoveToNonWhiteSpace(fragmentEnd);
626      } else if (type == TextType.CharacterData) {
627        while(true) {
628          if (!TryMoveToAnyOf(new char[] {'<', ']'}, fragmentEnd)) break; // End of fragment
629          if (TryPeek('<')) break;
630          if (TryPeek(']')) {
631            if (TryPeek("]]>")) {
632              OnSyntaxError(this.CurrentLocation, this.CurrentLocation + 3, "']]>' is not allowed in text");
633            }
634            TryMoveNext();
635            continue;
636          }
637          throw new InternalException("Infinite loop");
638        }
639      } else if (type == TextType.Comment) {
640        // Do not report too many errors
641        bool errorReported = false;
642        while(true) {
643          if (!TryMoveTo('-', fragmentEnd)) break; // End of fragment
644          if (TryPeek("-->")) break;
645          if (TryPeek("--") && !errorReported) {
646            OnSyntaxError(this.CurrentLocation, this.CurrentLocation + 2, "'--' is not allowed in comment");
647            errorReported = true;
648          }
649          TryMoveNext();
650        }
651      } else if (type == TextType.CData) {
652        while(true) {
653          // We can not use use TryMoveTo("]]>", fragmentEnd) because it may incorectly accept "]" at the end of fragment
654          if (!TryMoveTo(']', fragmentEnd)) break; // End of fragment
655          if (TryPeek("]]>")) break;
656          TryMoveNext();
657        }
658      } else if (type == TextType.ProcessingInstruction) {
659        while(true) {
660          if (!TryMoveTo('?', fragmentEnd)) break; // End of fragment
661          if (TryPeek("?>")) break;
662          TryMoveNext();
663        }
664      } else if (type == TextType.UnknownBang) {
665        TryMoveToAnyOf(new char[] {'<', '>'}, fragmentEnd);
666      } else {
667        throw new InternalException("Unknown type " + type);
668      }
669     
670      text.ContainsOnlyWhitespace = (wsEnd == this.CurrentLocation);
671     
672      string escapedValue = GetText(start, this.CurrentLocation);
673      if (type == TextType.CharacterData) {
674        text.Value = Dereference(escapedValue, start);
675      } else {
676        text.Value = escapedValue;
677      }
678      text.Value = GetCachedString(text.Value);
679     
680      EndInternalObject(frame, storeNewObject: this.CurrentLocation > start);
681    }
682    #endregion
683   
684    #region Dereference
685    const int maxEntityLength = 16; // The longest built-in one is 10 ("&#1114111;")
686   
687    string Dereference(string text, int textLocation)
688    {
689      StringBuilder sb = null;  // The dereferenced text so far (all up to 'curr')
690      int curr = 0;
691      while(true) {
692        // Reached end of input
693        if (curr == text.Length) {
694          if (sb != null) {
695            return sb.ToString();
696          } else {
697            return text;
698          }
699        }
700       
701        // Try to find reference
702        int start = text.IndexOf('&', curr);
703       
704        // No more references found
705        if (start == -1) {
706          if (sb != null) {
707            sb.Append(text, curr, text.Length - curr); // Add rest
708            return sb.ToString();
709          } else {
710            return text;
711          }
712        }
713       
714        // Append text before the enitiy reference
715        if (sb == null) sb = new StringBuilder(text.Length);
716        sb.Append(text, curr, start - curr);
717        curr = start;
718       
719        // Process the entity
720        int errorLoc = textLocation + sb.Length;
721       
722        // Find entity name
723        int end = text.IndexOfAny(new char[] {'&', ';'}, start + 1, Math.Min(maxEntityLength, text.Length - (start + 1)));
724        if (end == -1 || text[end] == '&') {
725          // Not found
726          OnSyntaxError(errorLoc, errorLoc + 1, "Entity reference must be terminated with ';'");
727          // Keep '&'
728          sb.Append('&');
729          curr++;
730          continue;  // Restart and next character location
731        }
732        string name = text.Substring(start + 1, end - (start + 1));
733       
734        // Resolve the name
735        string replacement;
736        if (name.Length == 0) {
737          replacement = null;
738          OnSyntaxError(errorLoc + 1, errorLoc + 1, "Entity name expected");
739        } else if (name == "amp") {
740          replacement = "&";
741        } else if (name == "lt") {
742          replacement = "<";
743        } else if (name == "gt") {
744          replacement = ">";
745        } else if (name == "apos") {
746          replacement = "'";
747        } else if (name == "quot") {
748          replacement = "\"";
749        } else if (name.Length > 0 && name[0] == '#') {
750          int num;
751          if (name.Length > 1 && name[1] == 'x') {
752            if (!int.TryParse(name.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture.NumberFormat, out num)) {
753              num = -1;
754              OnSyntaxError(errorLoc + 3, errorLoc + 1 + name.Length, "Hexadecimal code of unicode character expected");
755            }
756          } else {
757            if (!int.TryParse(name.Substring(1), NumberStyles.None, CultureInfo.InvariantCulture.NumberFormat, out num)) {
758              num = -1;
759              OnSyntaxError(errorLoc + 2, errorLoc + 1 + name.Length, "Numeric code of unicode character expected");
760            }
761          }
762          if (num != -1) {
763            try {
764              replacement = char.ConvertFromUtf32(num);
765            } catch (ArgumentOutOfRangeException) {
766              replacement = null;
767              OnSyntaxError(errorLoc + 2, errorLoc + 1 + name.Length, "Invalid unicode character U+{0:X} ({0})", num);
768            }
769          } else {
770            replacement = null;
771          }
772        } else if (!IsValidName(name)) {
773          replacement = null;
774          OnSyntaxError(errorLoc + 1, errorLoc + 1, "Invalid entity name");
775        } else {
776          replacement = null;
777          if (tagSoupParser.UnknownEntityReferenceIsError) {
778            OnSyntaxError(errorLoc, errorLoc + 1 + name.Length + 1, "Unknown entity reference '{0}'", name);
779          }
780        }
781       
782        // Append the replacement to output
783        if (replacement != null) {
784          sb.Append(replacement);
785        } else {
786          sb.Append('&');
787          sb.Append(name);
788          sb.Append(';');
789        }
790        curr = end + 1;
791        continue;
792      }
793    }
794    #endregion
795   
796    #region Syntax Errors
797    List<InternalSyntaxError> syntaxErrors = new List<InternalSyntaxError>();
798   
799    InternalSyntaxError[] GetSyntaxErrors()
800    {
801      if (syntaxErrors.Count > 0) {
802        var arr = syntaxErrors.ToArray();
803        syntaxErrors.Clear();
804        return arr;
805      } else {
806        return null;
807      }
808    }
809   
810    void OnSyntaxError(string message, params object[] args)
811    {
812      OnSyntaxError(this.CurrentLocation, this.CurrentLocation + 1, message, args);
813    }
814   
815    void OnSyntaxError(int start, int end, string message, params object[] args)
816    {
817      if (end <= start) end = start + 1;
818      string formattedMessage = string.Format(CultureInfo.InvariantCulture, message, args);
819      Log.WriteLine("Syntax error ({0}-{1}): {2}", start, end, formattedMessage);
820      syntaxErrors.Add(new InternalSyntaxError(start - internalObjectStartPosition, end - internalObjectStartPosition, formattedMessage));
821    }
822    #endregion
823   
824    #region Helper functions
825    internal static bool IsValidName(string name)
826    {
827      try {
828        System.Xml.XmlConvert.VerifyName(name);
829        return true;
830      } catch (System.Xml.XmlException) {
831        return false;
832      }
833    }
834    #endregion
835  }
836}
Note: See TracBrowser for help on using the repository browser.