[11804] | 1 | // Copyright (c) 2009-2013 AlphaSierraPapa for the SharpDevelop Team |
---|
| 2 | // |
---|
| 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this |
---|
| 4 | // software and associated documentation files (the "Software"), to deal in the Software |
---|
| 5 | // without restriction, including without limitation the rights to use, copy, modify, merge, |
---|
| 6 | // publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons |
---|
| 7 | // to whom the Software is furnished to do so, subject to the following conditions: |
---|
| 8 | // |
---|
| 9 | // The above copyright notice and this permission notice shall be included in all copies or |
---|
| 10 | // substantial portions of the Software. |
---|
| 11 | // |
---|
| 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, |
---|
| 13 | // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR |
---|
| 14 | // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE |
---|
| 15 | // FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
---|
| 16 | // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
---|
| 17 | // DEALINGS IN THE SOFTWARE. |
---|
| 18 | |
---|
| 19 | using System; |
---|
| 20 | using System.Collections.Generic; |
---|
| 21 | using System.Diagnostics; |
---|
| 22 | using System.Globalization; |
---|
| 23 | using System.Linq; |
---|
| 24 | using System.Text; |
---|
| 25 | using System.Threading; |
---|
| 26 | using ICSharpCode.NRefactory.Editor; |
---|
| 27 | |
---|
| 28 | namespace ICSharpCode.NRefactory.Xml |
---|
| 29 | { |
---|
| 30 | class TagReader : TokenReader |
---|
| 31 | { |
---|
| 32 | readonly AXmlParser tagSoupParser; |
---|
| 33 | readonly Stack<string> elementNameStack; |
---|
| 34 | |
---|
| 35 | public TagReader(AXmlParser tagSoupParser, ITextSource input, bool collapseProperlyNestedElements) : base(input) |
---|
| 36 | { |
---|
| 37 | this.tagSoupParser = tagSoupParser; |
---|
| 38 | if (collapseProperlyNestedElements) |
---|
| 39 | elementNameStack = new Stack<string>(); |
---|
| 40 | } |
---|
| 41 | |
---|
| 42 | public List<InternalObject> ReadAllObjects(CancellationToken cancellationToken) |
---|
| 43 | { |
---|
| 44 | while (HasMoreData()) { |
---|
| 45 | cancellationToken.ThrowIfCancellationRequested(); |
---|
| 46 | ReadObject(); |
---|
| 47 | } |
---|
| 48 | return objects; |
---|
| 49 | } |
---|
| 50 | |
---|
| 51 | public List<InternalObject> ReadAllObjectsIncremental(InternalObject[] oldObjects, List<UnchangedSegment> reuseMap, CancellationToken cancellationToken) |
---|
| 52 | { |
---|
| 53 | ObjectIterator oldObjectIterator = new ObjectIterator(oldObjects); |
---|
| 54 | int reuseMapIndex = 0; |
---|
| 55 | while (reuseMapIndex < reuseMap.Count) { |
---|
| 56 | var reuseEntry = reuseMap[reuseMapIndex]; |
---|
| 57 | while (this.CurrentLocation < reuseEntry.NewOffset) { |
---|
| 58 | cancellationToken.ThrowIfCancellationRequested(); |
---|
| 59 | ReadObject(); |
---|
| 60 | } |
---|
| 61 | if (this.CurrentLocation >= reuseEntry.NewOffset + reuseEntry.Length) { |
---|
| 62 | reuseMapIndex++; |
---|
| 63 | continue; |
---|
| 64 | } |
---|
| 65 | Debug.Assert(reuseEntry.NewOffset <= this.CurrentLocation && this.CurrentLocation < reuseEntry.NewOffset + reuseEntry.Length); |
---|
| 66 | // reuse the nodes within this reuseEntry starting at oldOffset: |
---|
| 67 | int oldOffset = this.CurrentLocation - reuseEntry.NewOffset + reuseEntry.OldOffset; |
---|
| 68 | // seek to oldOffset in the oldObjects array: |
---|
| 69 | oldObjectIterator.SkipTo(oldOffset); |
---|
| 70 | if (oldObjectIterator.CurrentPosition == oldOffset) { |
---|
| 71 | // reuse old objects within this reuse entry: |
---|
| 72 | int reuseEnd = reuseEntry.OldOffset + reuseEntry.Length; |
---|
| 73 | while (oldObjectIterator.CurrentObject != null && oldObjectIterator.CurrentPosition + oldObjectIterator.CurrentObject.LengthTouched < reuseEnd) { |
---|
| 74 | StoreObject(oldObjectIterator.CurrentObject); |
---|
| 75 | Skip(oldObjectIterator.CurrentObject.Length); |
---|
| 76 | oldObjectIterator.MoveNext(); |
---|
| 77 | } |
---|
| 78 | reuseMapIndex++; // go to next re-use map |
---|
| 79 | } else { |
---|
| 80 | // We are in a region where old objects are available, but aren't aligned correctly. |
---|
| 81 | // Don't skip this reuse entry, and read a single object so that we can re-align |
---|
| 82 | ReadObject(); |
---|
| 83 | } |
---|
| 84 | } |
---|
| 85 | while (HasMoreData()) { |
---|
| 86 | cancellationToken.ThrowIfCancellationRequested(); |
---|
| 87 | ReadObject(); |
---|
| 88 | } |
---|
| 89 | return objects; |
---|
| 90 | } |
---|
| 91 | |
---|
| 92 | void StoreObject(InternalObject obj) |
---|
| 93 | { |
---|
| 94 | objects.Add(obj); |
---|
| 95 | |
---|
| 96 | // Now combine properly-nested elements: |
---|
| 97 | if (elementNameStack == null) |
---|
| 98 | return; // parsing tag soup |
---|
| 99 | InternalTag tag = obj as InternalTag; |
---|
| 100 | if (tag == null) |
---|
| 101 | return; |
---|
| 102 | if (tag.IsEmptyTag) { |
---|
| 103 | // the tag is its own element |
---|
| 104 | objects[objects.Count - 1] = new InternalElement(tag) { |
---|
| 105 | Length = tag.Length, |
---|
| 106 | LengthTouched = tag.LengthTouched, |
---|
| 107 | IsPropertyNested = true, |
---|
| 108 | StartRelativeToParent = tag.StartRelativeToParent, |
---|
| 109 | NestedObjects = new [] { tag.SetStartRelativeToParent(0) } |
---|
| 110 | }; |
---|
| 111 | } else if (tag.IsStartTag) { |
---|
| 112 | elementNameStack.Push(tag.Name); |
---|
| 113 | } else if (tag.IsEndTag && elementNameStack.Count > 0) { |
---|
| 114 | // Now look for the start element: |
---|
| 115 | int startIndex = objects.Count - 2; |
---|
| 116 | bool ok = false; |
---|
| 117 | string expectedName = elementNameStack.Pop(); |
---|
| 118 | if (tag.Name == expectedName) { |
---|
| 119 | while (startIndex > 0) { |
---|
| 120 | var startTag = objects[startIndex] as InternalTag; |
---|
| 121 | if (startTag != null) { |
---|
| 122 | if (startTag.IsStartTag) { |
---|
| 123 | ok = (startTag.Name == expectedName); |
---|
| 124 | break; |
---|
| 125 | } else if (startTag.IsEndTag) { |
---|
| 126 | break; |
---|
| 127 | } |
---|
| 128 | } |
---|
| 129 | startIndex--; |
---|
| 130 | } |
---|
| 131 | } |
---|
| 132 | if (ok) { |
---|
| 133 | // We found a correct nesting, let's create an element: |
---|
| 134 | InternalObject[] nestedObjects = new InternalObject[objects.Count - startIndex]; |
---|
| 135 | int oldStartRelativeToParent = objects[startIndex].StartRelativeToParent; |
---|
| 136 | int pos = 0; |
---|
| 137 | int maxLengthTouched = 0; |
---|
| 138 | for (int i = 0; i < nestedObjects.Length; i++) { |
---|
| 139 | nestedObjects[i] = objects[startIndex + i].SetStartRelativeToParent(pos); |
---|
| 140 | maxLengthTouched = Math.Max(maxLengthTouched, pos + nestedObjects[i].LengthTouched); |
---|
| 141 | pos += nestedObjects[i].Length; |
---|
| 142 | } |
---|
| 143 | objects.RemoveRange(startIndex, nestedObjects.Length); |
---|
| 144 | objects.Add( |
---|
| 145 | new InternalElement((InternalTag)nestedObjects[0]) { |
---|
| 146 | HasEndTag = true, |
---|
| 147 | IsPropertyNested = true, |
---|
| 148 | Length = pos, |
---|
| 149 | LengthTouched = maxLengthTouched, |
---|
| 150 | StartRelativeToParent = oldStartRelativeToParent, |
---|
| 151 | NestedObjects = nestedObjects |
---|
| 152 | }); |
---|
| 153 | } else { |
---|
| 154 | // Mismatched name - the nesting isn't properly; |
---|
| 155 | // clear the whole stack so that none of the currently open elements are closed as properly-nested. |
---|
| 156 | elementNameStack.Clear(); |
---|
| 157 | } |
---|
| 158 | } |
---|
| 159 | } |
---|
| 160 | |
---|
| 161 | /// <summary> |
---|
| 162 | /// Reads one or more objects. |
---|
| 163 | /// </summary> |
---|
| 164 | void ReadObject() |
---|
| 165 | { |
---|
| 166 | if (TryPeek('<')) { |
---|
| 167 | ReadTag(); |
---|
| 168 | } else { |
---|
| 169 | ReadText(TextType.CharacterData); |
---|
| 170 | } |
---|
| 171 | } |
---|
| 172 | |
---|
| 173 | #region BeginInternalObject / EndInternalObject |
---|
| 174 | List<InternalObject> objects = new List<InternalObject>(); |
---|
| 175 | int internalObjectStartPosition; |
---|
| 176 | |
---|
| 177 | int CurrentRelativeLocation { |
---|
| 178 | get { return CurrentLocation - internalObjectStartPosition; } |
---|
| 179 | } |
---|
| 180 | |
---|
| 181 | struct InternalObjectFrame |
---|
| 182 | { |
---|
| 183 | public readonly InternalObject InternalObject; |
---|
| 184 | public readonly int ParentStartPosition; |
---|
| 185 | |
---|
| 186 | public InternalObjectFrame(InternalObject internalObject, int parentStartPosition) |
---|
| 187 | { |
---|
| 188 | this.InternalObject = internalObject; |
---|
| 189 | this.ParentStartPosition = parentStartPosition; |
---|
| 190 | } |
---|
| 191 | } |
---|
| 192 | |
---|
| 193 | InternalObjectFrame BeginInternalObject(InternalObject internalObject) |
---|
| 194 | { |
---|
| 195 | return BeginInternalObject(internalObject, this.CurrentLocation); |
---|
| 196 | } |
---|
| 197 | |
---|
| 198 | InternalObjectFrame BeginInternalObject(InternalObject internalObject, int beginLocation) |
---|
| 199 | { |
---|
| 200 | internalObject.StartRelativeToParent = beginLocation - internalObjectStartPosition; |
---|
| 201 | |
---|
| 202 | var frame = new InternalObjectFrame(internalObject, internalObjectStartPosition); |
---|
| 203 | |
---|
| 204 | internalObjectStartPosition = CurrentLocation; |
---|
| 205 | return frame; |
---|
| 206 | } |
---|
| 207 | |
---|
| 208 | void EndInternalObject(InternalObjectFrame frame, bool storeNewObject = true) |
---|
| 209 | { |
---|
| 210 | frame.InternalObject.Length = this.CurrentLocation - internalObjectStartPosition; |
---|
| 211 | frame.InternalObject.LengthTouched = this.MaxTouchedLocation - internalObjectStartPosition; |
---|
| 212 | frame.InternalObject.SyntaxErrors = GetSyntaxErrors(); |
---|
| 213 | if (storeNewObject) |
---|
| 214 | StoreObject(frame.InternalObject); |
---|
| 215 | internalObjectStartPosition = frame.ParentStartPosition; |
---|
| 216 | } |
---|
| 217 | #endregion |
---|
| 218 | |
---|
| 219 | #region Read Tag |
---|
| 220 | /// <summary> |
---|
| 221 | /// Context: "<" |
---|
| 222 | /// </summary> |
---|
| 223 | void ReadTag() |
---|
| 224 | { |
---|
| 225 | AssertHasMoreData(); |
---|
| 226 | |
---|
| 227 | int tagStart = this.CurrentLocation; |
---|
| 228 | InternalTag tag = new InternalTag(); |
---|
| 229 | var frame = BeginInternalObject(tag); |
---|
| 230 | |
---|
| 231 | // Read the opening bracket |
---|
| 232 | // It identifies the type of tag and parsing behavior for the rest of it |
---|
| 233 | tag.OpeningBracket = ReadOpeningBracket(); |
---|
| 234 | |
---|
| 235 | if (tag.IsUnknownBang && !TryPeekWhiteSpace()) |
---|
| 236 | OnSyntaxError(tagStart, this.CurrentLocation, "Unknown tag"); |
---|
| 237 | |
---|
| 238 | if (tag.IsStartOrEmptyTag || tag.IsEndTag || tag.IsProcessingInstruction) { |
---|
| 239 | // Read the name |
---|
| 240 | TryMoveToNonWhiteSpace(); |
---|
| 241 | tag.RelativeNameStart = this.CurrentRelativeLocation; |
---|
| 242 | string name; |
---|
| 243 | if (TryReadName(out name)) { |
---|
| 244 | if (!IsValidName(name)) { |
---|
| 245 | OnSyntaxError(this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); |
---|
| 246 | } |
---|
| 247 | } else { |
---|
| 248 | OnSyntaxError("Element name expected"); |
---|
| 249 | } |
---|
| 250 | tag.Name = name; |
---|
| 251 | } else { |
---|
| 252 | tag.Name = string.Empty; |
---|
| 253 | } |
---|
| 254 | |
---|
| 255 | bool isXmlDeclr = tag.Name == "xml" && tag.IsProcessingInstruction; |
---|
| 256 | int oldObjectCount = objects.Count; |
---|
| 257 | |
---|
| 258 | if (tag.IsStartOrEmptyTag || tag.IsEndTag || isXmlDeclr) { |
---|
| 259 | // Read attributes for the tag |
---|
| 260 | while (HasMoreData()) { |
---|
| 261 | // Chech for all forbiden 'name' characters first - see ReadName |
---|
| 262 | TryMoveToNonWhiteSpace(); |
---|
| 263 | if (TryPeek('<')) break; |
---|
| 264 | string endBr; |
---|
| 265 | int endBrStart = this.CurrentLocation; // Just peek |
---|
| 266 | if (TryReadClosingBracket(out endBr)) { // End tag |
---|
| 267 | GoBack(endBrStart); |
---|
| 268 | break; |
---|
| 269 | } |
---|
| 270 | |
---|
| 271 | // We have "=\'\"" or name - read attribute |
---|
| 272 | int attrStartOffset = this.CurrentLocation; |
---|
| 273 | ReadAttribute(); |
---|
| 274 | if (tag.IsEndTag) |
---|
| 275 | OnSyntaxError(attrStartOffset, this.CurrentLocation, "Attribute not allowed in end tag."); |
---|
| 276 | } |
---|
| 277 | } else if (tag.IsDocumentType) { |
---|
| 278 | ReadContentOfDTD(); |
---|
| 279 | } else { |
---|
| 280 | int start = this.CurrentLocation; |
---|
| 281 | if (tag.IsComment) { |
---|
| 282 | ReadText(TextType.Comment); |
---|
| 283 | } else if (tag.IsCData) { |
---|
| 284 | ReadText(TextType.CData); |
---|
| 285 | } else if (tag.IsProcessingInstruction) { |
---|
| 286 | ReadText(TextType.ProcessingInstruction); |
---|
| 287 | } else if (tag.IsUnknownBang) { |
---|
| 288 | ReadText(TextType.UnknownBang); |
---|
| 289 | } else { |
---|
| 290 | throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket)); |
---|
| 291 | } |
---|
| 292 | // Backtrack at complete start |
---|
| 293 | if (IsEndOfFile() || (tag.IsUnknownBang && TryPeek('<'))) { |
---|
| 294 | GoBack(start); |
---|
| 295 | objects.RemoveRange(oldObjectCount, objects.Count - oldObjectCount); |
---|
| 296 | } |
---|
| 297 | } |
---|
| 298 | |
---|
| 299 | // Read closing bracket |
---|
| 300 | string bracket; |
---|
| 301 | TryReadClosingBracket(out bracket); |
---|
| 302 | tag.ClosingBracket = bracket; |
---|
| 303 | |
---|
| 304 | // Error check |
---|
| 305 | int brStart = this.CurrentLocation - (tag.ClosingBracket ?? string.Empty).Length; |
---|
| 306 | int brEnd = this.CurrentLocation; |
---|
| 307 | if (tag.Name == null) { |
---|
| 308 | // One error was reported already |
---|
| 309 | } else if (tag.IsStartOrEmptyTag) { |
---|
| 310 | if (tag.ClosingBracket != ">" && tag.ClosingBracket != "/>") OnSyntaxError(brStart, brEnd, "'>' or '/>' expected"); |
---|
| 311 | } else if (tag.IsEndTag) { |
---|
| 312 | if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected"); |
---|
| 313 | } else if (tag.IsComment) { |
---|
| 314 | if (tag.ClosingBracket != "-->") OnSyntaxError(brStart, brEnd, "'-->' expected"); |
---|
| 315 | } else if (tag.IsCData) { |
---|
| 316 | if (tag.ClosingBracket != "]]>") OnSyntaxError(brStart, brEnd, "']]>' expected"); |
---|
| 317 | } else if (tag.IsProcessingInstruction) { |
---|
| 318 | if (tag.ClosingBracket != "?>") OnSyntaxError(brStart, brEnd, "'?>' expected"); |
---|
| 319 | } else if (tag.IsUnknownBang) { |
---|
| 320 | if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected"); |
---|
| 321 | } else if (tag.IsDocumentType) { |
---|
| 322 | if (tag.ClosingBracket != ">") OnSyntaxError(brStart, brEnd, "'>' expected"); |
---|
| 323 | } else { |
---|
| 324 | throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket)); |
---|
| 325 | } |
---|
| 326 | |
---|
| 327 | // Attribute name may not apper multiple times |
---|
| 328 | if (objects.Count > oldObjectCount) { |
---|
| 329 | // Move nested objects into tag.NestedObjects: |
---|
| 330 | tag.NestedObjects = new InternalObject[objects.Count - oldObjectCount]; |
---|
| 331 | objects.CopyTo(oldObjectCount, tag.NestedObjects, 0, tag.NestedObjects.Length); |
---|
| 332 | objects.RemoveRange(oldObjectCount, objects.Count - oldObjectCount); |
---|
| 333 | |
---|
| 334 | // Look for duplicate attributes: |
---|
| 335 | HashSet<string> attributeNames = new HashSet<string>(); |
---|
| 336 | foreach (var obj in tag.NestedObjects) { |
---|
| 337 | InternalAttribute attr = obj as InternalAttribute; |
---|
| 338 | if (attr != null && !attributeNames.Add(attr.Name)) { |
---|
| 339 | int attrStart = tagStart + attr.StartRelativeToParent; |
---|
| 340 | OnSyntaxError(attrStart, attrStart + attr.Name.Length, "Attribute with name '{0}' already exists", attr.Name); |
---|
| 341 | } |
---|
| 342 | } |
---|
| 343 | } |
---|
| 344 | |
---|
| 345 | EndInternalObject(frame); |
---|
| 346 | } |
---|
| 347 | #endregion |
---|
| 348 | |
---|
| 349 | #region Read DTD |
---|
| 350 | void ReadContentOfDTD() |
---|
| 351 | { |
---|
| 352 | int start = this.CurrentLocation; |
---|
| 353 | while (HasMoreData()) { |
---|
| 354 | TryMoveToNonWhiteSpace(); // Skip whitespace |
---|
| 355 | if (TryRead('\'')) TryMoveTo('\''); // Skip single quoted string TODO: Bug |
---|
| 356 | if (TryRead('\"')) TryMoveTo('\"'); // Skip single quoted string |
---|
| 357 | if (TryRead('[')) { // Start of nested infoset |
---|
| 358 | // Reading infoset |
---|
| 359 | while (HasMoreData()) { |
---|
| 360 | TryMoveToAnyOf('<', ']'); |
---|
| 361 | if (TryPeek('<')) { |
---|
| 362 | if (start != this.CurrentLocation) { // Two following tags |
---|
| 363 | MakeText(start, this.CurrentLocation); |
---|
| 364 | } |
---|
| 365 | ReadTag(); |
---|
| 366 | start = this.CurrentLocation; |
---|
| 367 | } |
---|
| 368 | if (TryPeek(']')) break; |
---|
| 369 | } |
---|
| 370 | } |
---|
| 371 | TryRead(']'); // End of nested infoset |
---|
| 372 | if (TryPeek('>')) break; // Proper closing |
---|
| 373 | if (TryPeek('<')) break; // Malformed XML |
---|
| 374 | TryMoveNext(); // Skip anything else |
---|
| 375 | } |
---|
| 376 | if (start != this.CurrentLocation) { |
---|
| 377 | MakeText(start, this.CurrentLocation); |
---|
| 378 | } |
---|
| 379 | } |
---|
| 380 | |
---|
| 381 | void MakeText(int start, int end) |
---|
| 382 | { |
---|
| 383 | Log.DebugAssert(end > start, "Empty text"); |
---|
| 384 | Log.DebugAssert(end == this.CurrentLocation, "end == current location"); |
---|
| 385 | |
---|
| 386 | InternalText text = new InternalText(); |
---|
| 387 | var frame = BeginInternalObject(text, start); |
---|
| 388 | text.Type = TextType.Other; |
---|
| 389 | text.Value = GetText(start, end); |
---|
| 390 | EndInternalObject(frame); |
---|
| 391 | } |
---|
| 392 | #endregion |
---|
| 393 | |
---|
| 394 | #region Read Brackets |
---|
| 395 | /// <summary> |
---|
| 396 | /// Reads any of the know opening brackets. (only full bracket) |
---|
| 397 | /// Context: "<" |
---|
| 398 | /// </summary> |
---|
| 399 | string ReadOpeningBracket() |
---|
| 400 | { |
---|
| 401 | // We are using a lot of string literals so that the memory instances are shared |
---|
| 402 | //int start = this.CurrentLocation; |
---|
| 403 | if (TryRead('<')) { |
---|
| 404 | if (TryRead('/')) { |
---|
| 405 | return "</"; |
---|
| 406 | } else if (TryRead('?')) { |
---|
| 407 | return "<?"; |
---|
| 408 | } else if (TryRead('!')) { |
---|
| 409 | if (TryRead("--")) { |
---|
| 410 | return "<!--"; |
---|
| 411 | } else if (TryRead("[CDATA[")) { |
---|
| 412 | return "<![CDATA["; |
---|
| 413 | } else { |
---|
| 414 | foreach (string dtdName in AXmlTag.DtdNames) { |
---|
| 415 | // the dtdName includes "<!" |
---|
| 416 | if (TryRead(dtdName.Remove(0, 2))) return dtdName; |
---|
| 417 | } |
---|
| 418 | return "<!"; |
---|
| 419 | } |
---|
| 420 | } else { |
---|
| 421 | return "<"; |
---|
| 422 | } |
---|
| 423 | } else { |
---|
| 424 | throw new InternalException("'<' expected"); |
---|
| 425 | } |
---|
| 426 | } |
---|
| 427 | |
---|
| 428 | /// <summary> |
---|
| 429 | /// Reads any of the know closing brackets. (only full bracket) |
---|
| 430 | /// Context: any |
---|
| 431 | /// </summary> |
---|
| 432 | bool TryReadClosingBracket(out string bracket) |
---|
| 433 | { |
---|
| 434 | // We are using a lot of string literals so that the memory instances are shared |
---|
| 435 | if (TryRead('>')) { |
---|
| 436 | bracket = ">"; |
---|
| 437 | } else if (TryRead("/>")) { |
---|
| 438 | bracket = "/>"; |
---|
| 439 | } else if (TryRead("?>")) { |
---|
| 440 | bracket = "?>"; |
---|
| 441 | } else if (TryRead("-->")) { |
---|
| 442 | bracket = "-->"; |
---|
| 443 | } else if (TryRead("]]>")) { |
---|
| 444 | bracket = "]]>"; |
---|
| 445 | } else { |
---|
| 446 | bracket = string.Empty; |
---|
| 447 | return false; |
---|
| 448 | } |
---|
| 449 | return true; |
---|
| 450 | } |
---|
| 451 | #endregion |
---|
| 452 | |
---|
| 453 | #region Attributes |
---|
| 454 | /// <summary> |
---|
| 455 | /// Context: name or "=\'\"" |
---|
| 456 | /// </summary> |
---|
| 457 | void ReadAttribute() |
---|
| 458 | { |
---|
| 459 | AssertHasMoreData(); |
---|
| 460 | |
---|
| 461 | InternalAttribute attr = new InternalAttribute(); |
---|
| 462 | var frame = BeginInternalObject(attr); |
---|
| 463 | |
---|
| 464 | // Read name |
---|
| 465 | string name; |
---|
| 466 | if (TryReadName(out name)) { |
---|
| 467 | if (!IsValidName(name)) { |
---|
| 468 | OnSyntaxError(this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); |
---|
| 469 | } |
---|
| 470 | } else { |
---|
| 471 | OnSyntaxError("Attribute name expected"); |
---|
| 472 | } |
---|
| 473 | attr.Name = name; |
---|
| 474 | |
---|
| 475 | // Read equals sign and surrounding whitespace |
---|
| 476 | int checkpoint = this.CurrentLocation; |
---|
| 477 | TryMoveToNonWhiteSpace(); |
---|
| 478 | if (TryRead('=')) { |
---|
| 479 | int chk2 = this.CurrentLocation; |
---|
| 480 | TryMoveToNonWhiteSpace(); |
---|
| 481 | if (!TryPeek('"') && !TryPeek('\'')) { |
---|
| 482 | // Do not read whitespace if quote does not follow |
---|
| 483 | GoBack(chk2); |
---|
| 484 | } |
---|
| 485 | attr.EqualsSignLength = this.CurrentLocation - checkpoint; |
---|
| 486 | } else { |
---|
| 487 | GoBack(checkpoint); |
---|
| 488 | OnSyntaxError("'=' expected"); |
---|
| 489 | attr.EqualsSignLength = 0; |
---|
| 490 | } |
---|
| 491 | |
---|
| 492 | // Read attribute value |
---|
| 493 | int start = this.CurrentLocation; |
---|
| 494 | char quoteChar = TryPeek('"') ? '"' : '\''; |
---|
| 495 | bool startsWithQuote; |
---|
| 496 | if (TryRead(quoteChar)) { |
---|
| 497 | startsWithQuote = true; |
---|
| 498 | int valueStart = this.CurrentLocation; |
---|
| 499 | TryMoveToAnyOf(quoteChar, '<'); |
---|
| 500 | if (TryRead(quoteChar)) { |
---|
| 501 | if (!TryPeekAnyOf(' ', '\t', '\n', '\r', '/', '>', '?')) { |
---|
| 502 | if (TryPeekPrevious('=', 2) || (TryPeekPrevious('=', 3) && TryPeekPrevious(' ', 2))) { |
---|
| 503 | // This actually most likely means that we are in the next attribute value |
---|
| 504 | GoBack(valueStart); |
---|
| 505 | ReadAttributeValue(quoteChar); |
---|
| 506 | if (TryRead(quoteChar)) { |
---|
| 507 | OnSyntaxError("White space or end of tag expected"); |
---|
| 508 | } else { |
---|
| 509 | OnSyntaxError("Quote {0} expected (or add whitespace after the following one)", quoteChar); |
---|
| 510 | } |
---|
| 511 | } else { |
---|
| 512 | OnSyntaxError("White space or end of tag expected"); |
---|
| 513 | } |
---|
| 514 | } |
---|
| 515 | } else { |
---|
| 516 | // '<' or end of file |
---|
| 517 | GoBack(valueStart); |
---|
| 518 | ReadAttributeValue(quoteChar); |
---|
| 519 | OnSyntaxError("Quote {0} expected", quoteChar); |
---|
| 520 | } |
---|
| 521 | } else { |
---|
| 522 | startsWithQuote = false; |
---|
| 523 | int valueStart = this.CurrentLocation; |
---|
| 524 | ReadAttributeValue(null); |
---|
| 525 | TryRead('\"'); |
---|
| 526 | TryRead('\''); |
---|
| 527 | if (valueStart == this.CurrentLocation) { |
---|
| 528 | OnSyntaxError("Attribute value expected"); |
---|
| 529 | } else { |
---|
| 530 | OnSyntaxError(valueStart, this.CurrentLocation, "Attribute value must be quoted"); |
---|
| 531 | } |
---|
| 532 | } |
---|
| 533 | string val = GetText(start, this.CurrentLocation); |
---|
| 534 | val = Unquote(val); |
---|
| 535 | attr.Value = Dereference(val, startsWithQuote ? start + 1 : start); |
---|
| 536 | |
---|
| 537 | EndInternalObject(frame); |
---|
| 538 | } |
---|
| 539 | |
---|
| 540 | /// <summary> |
---|
| 541 | /// Read everything up to quote (excluding), opening/closing tag or attribute signature |
---|
| 542 | /// </summary> |
---|
| 543 | void ReadAttributeValue(char? quote) |
---|
| 544 | { |
---|
| 545 | while (HasMoreData()) { |
---|
| 546 | // What is next? |
---|
| 547 | int start = this.CurrentLocation; |
---|
| 548 | TryMoveToNonWhiteSpace(); // Read white space (if any) |
---|
| 549 | if (quote.HasValue) { |
---|
| 550 | if (TryPeek(quote.Value)) return; |
---|
| 551 | } else { |
---|
| 552 | if (TryPeek('"') || TryPeek('\'')) return; |
---|
| 553 | } |
---|
| 554 | // Opening/closing tag |
---|
| 555 | string endBr; |
---|
| 556 | if (TryPeek('<') || TryReadClosingBracket(out endBr)) { |
---|
| 557 | GoBack(start); |
---|
| 558 | return; |
---|
| 559 | } |
---|
| 560 | // Try reading attribute signature |
---|
| 561 | if (TryReadName()) { |
---|
| 562 | int nameEnd = this.CurrentLocation; |
---|
| 563 | if (TryMoveToNonWhiteSpace() && TryRead("=") && |
---|
| 564 | TryMoveToNonWhiteSpace() && TryPeekAnyOf('"', '\'')) |
---|
| 565 | { |
---|
| 566 | // Start of attribute. Great |
---|
| 567 | GoBack(start); |
---|
| 568 | return; // Done |
---|
| 569 | } else { |
---|
| 570 | // Just some gargabe - make it part of the value |
---|
| 571 | GoBack(nameEnd); |
---|
| 572 | continue; // Read more |
---|
| 573 | } |
---|
| 574 | } |
---|
| 575 | TryMoveNext(); // Accept everyting else |
---|
| 576 | } |
---|
| 577 | } |
---|
| 578 | |
---|
| 579 | /// <summary> Remove quoting from the given string </summary> |
---|
| 580 | static string Unquote(string quoted) |
---|
| 581 | { |
---|
| 582 | if (string.IsNullOrEmpty(quoted)) return string.Empty; |
---|
| 583 | char first = quoted[0]; |
---|
| 584 | if (quoted.Length == 1) return (first == '"' || first == '\'') ? string.Empty : quoted; |
---|
| 585 | char last = quoted[quoted.Length - 1]; |
---|
| 586 | if (first == '"' || first == '\'') { |
---|
| 587 | if (first == last) { |
---|
| 588 | // Remove both quotes |
---|
| 589 | return quoted.Substring(1, quoted.Length - 2); |
---|
| 590 | } else { |
---|
| 591 | // Remove first quote |
---|
| 592 | return quoted.Remove(0, 1); |
---|
| 593 | } |
---|
| 594 | } else { |
---|
| 595 | if (last == '"' || last == '\'') { |
---|
| 596 | // Remove last quote |
---|
| 597 | return quoted.Substring(0, quoted.Length - 1); |
---|
| 598 | } else { |
---|
| 599 | // Keep whole string |
---|
| 600 | return quoted; |
---|
| 601 | } |
---|
| 602 | } |
---|
| 603 | } |
---|
| 604 | #endregion |
---|
| 605 | |
---|
| 606 | #region Text |
---|
| 607 | /// <summary> |
---|
| 608 | /// Reads text. |
---|
| 609 | /// </summary> |
---|
| 610 | void ReadText(TextType type) |
---|
| 611 | { |
---|
| 612 | var text = new InternalText(); |
---|
| 613 | var frame = BeginInternalObject(text); |
---|
| 614 | text.Type = type; |
---|
| 615 | |
---|
| 616 | int start = this.CurrentLocation; |
---|
| 617 | int fragmentEnd = inputLength; |
---|
| 618 | |
---|
| 619 | // Whitespace would be skipped anyway by any operation |
---|
| 620 | TryMoveToNonWhiteSpace(fragmentEnd); |
---|
| 621 | int wsEnd = this.CurrentLocation; |
---|
| 622 | |
---|
| 623 | // Try move to the terminator given by the context |
---|
| 624 | if (type == TextType.WhiteSpace) { |
---|
| 625 | TryMoveToNonWhiteSpace(fragmentEnd); |
---|
| 626 | } else if (type == TextType.CharacterData) { |
---|
| 627 | while(true) { |
---|
| 628 | if (!TryMoveToAnyOf(new char[] {'<', ']'}, fragmentEnd)) break; // End of fragment |
---|
| 629 | if (TryPeek('<')) break; |
---|
| 630 | if (TryPeek(']')) { |
---|
| 631 | if (TryPeek("]]>")) { |
---|
| 632 | OnSyntaxError(this.CurrentLocation, this.CurrentLocation + 3, "']]>' is not allowed in text"); |
---|
| 633 | } |
---|
| 634 | TryMoveNext(); |
---|
| 635 | continue; |
---|
| 636 | } |
---|
| 637 | throw new InternalException("Infinite loop"); |
---|
| 638 | } |
---|
| 639 | } else if (type == TextType.Comment) { |
---|
| 640 | // Do not report too many errors |
---|
| 641 | bool errorReported = false; |
---|
| 642 | while(true) { |
---|
| 643 | if (!TryMoveTo('-', fragmentEnd)) break; // End of fragment |
---|
| 644 | if (TryPeek("-->")) break; |
---|
| 645 | if (TryPeek("--") && !errorReported) { |
---|
| 646 | OnSyntaxError(this.CurrentLocation, this.CurrentLocation + 2, "'--' is not allowed in comment"); |
---|
| 647 | errorReported = true; |
---|
| 648 | } |
---|
| 649 | TryMoveNext(); |
---|
| 650 | } |
---|
| 651 | } else if (type == TextType.CData) { |
---|
| 652 | while(true) { |
---|
| 653 | // We can not use use TryMoveTo("]]>", fragmentEnd) because it may incorectly accept "]" at the end of fragment |
---|
| 654 | if (!TryMoveTo(']', fragmentEnd)) break; // End of fragment |
---|
| 655 | if (TryPeek("]]>")) break; |
---|
| 656 | TryMoveNext(); |
---|
| 657 | } |
---|
| 658 | } else if (type == TextType.ProcessingInstruction) { |
---|
| 659 | while(true) { |
---|
| 660 | if (!TryMoveTo('?', fragmentEnd)) break; // End of fragment |
---|
| 661 | if (TryPeek("?>")) break; |
---|
| 662 | TryMoveNext(); |
---|
| 663 | } |
---|
| 664 | } else if (type == TextType.UnknownBang) { |
---|
| 665 | TryMoveToAnyOf(new char[] {'<', '>'}, fragmentEnd); |
---|
| 666 | } else { |
---|
| 667 | throw new InternalException("Unknown type " + type); |
---|
| 668 | } |
---|
| 669 | |
---|
| 670 | text.ContainsOnlyWhitespace = (wsEnd == this.CurrentLocation); |
---|
| 671 | |
---|
| 672 | string escapedValue = GetText(start, this.CurrentLocation); |
---|
| 673 | if (type == TextType.CharacterData) { |
---|
| 674 | text.Value = Dereference(escapedValue, start); |
---|
| 675 | } else { |
---|
| 676 | text.Value = escapedValue; |
---|
| 677 | } |
---|
| 678 | text.Value = GetCachedString(text.Value); |
---|
| 679 | |
---|
| 680 | EndInternalObject(frame, storeNewObject: this.CurrentLocation > start); |
---|
| 681 | } |
---|
| 682 | #endregion |
---|
| 683 | |
---|
| 684 | #region Dereference |
---|
| 685 | const int maxEntityLength = 16; // The longest built-in one is 10 ("") |
---|
| 686 | |
---|
| 687 | string Dereference(string text, int textLocation) |
---|
| 688 | { |
---|
| 689 | StringBuilder sb = null; // The dereferenced text so far (all up to 'curr') |
---|
| 690 | int curr = 0; |
---|
| 691 | while(true) { |
---|
| 692 | // Reached end of input |
---|
| 693 | if (curr == text.Length) { |
---|
| 694 | if (sb != null) { |
---|
| 695 | return sb.ToString(); |
---|
| 696 | } else { |
---|
| 697 | return text; |
---|
| 698 | } |
---|
| 699 | } |
---|
| 700 | |
---|
| 701 | // Try to find reference |
---|
| 702 | int start = text.IndexOf('&', curr); |
---|
| 703 | |
---|
| 704 | // No more references found |
---|
| 705 | if (start == -1) { |
---|
| 706 | if (sb != null) { |
---|
| 707 | sb.Append(text, curr, text.Length - curr); // Add rest |
---|
| 708 | return sb.ToString(); |
---|
| 709 | } else { |
---|
| 710 | return text; |
---|
| 711 | } |
---|
| 712 | } |
---|
| 713 | |
---|
| 714 | // Append text before the enitiy reference |
---|
| 715 | if (sb == null) sb = new StringBuilder(text.Length); |
---|
| 716 | sb.Append(text, curr, start - curr); |
---|
| 717 | curr = start; |
---|
| 718 | |
---|
| 719 | // Process the entity |
---|
| 720 | int errorLoc = textLocation + sb.Length; |
---|
| 721 | |
---|
| 722 | // Find entity name |
---|
| 723 | int end = text.IndexOfAny(new char[] {'&', ';'}, start + 1, Math.Min(maxEntityLength, text.Length - (start + 1))); |
---|
| 724 | if (end == -1 || text[end] == '&') { |
---|
| 725 | // Not found |
---|
| 726 | OnSyntaxError(errorLoc, errorLoc + 1, "Entity reference must be terminated with ';'"); |
---|
| 727 | // Keep '&' |
---|
| 728 | sb.Append('&'); |
---|
| 729 | curr++; |
---|
| 730 | continue; // Restart and next character location |
---|
| 731 | } |
---|
| 732 | string name = text.Substring(start + 1, end - (start + 1)); |
---|
| 733 | |
---|
| 734 | // Resolve the name |
---|
| 735 | string replacement; |
---|
| 736 | if (name.Length == 0) { |
---|
| 737 | replacement = null; |
---|
| 738 | OnSyntaxError(errorLoc + 1, errorLoc + 1, "Entity name expected"); |
---|
| 739 | } else if (name == "amp") { |
---|
| 740 | replacement = "&"; |
---|
| 741 | } else if (name == "lt") { |
---|
| 742 | replacement = "<"; |
---|
| 743 | } else if (name == "gt") { |
---|
| 744 | replacement = ">"; |
---|
| 745 | } else if (name == "apos") { |
---|
| 746 | replacement = "'"; |
---|
| 747 | } else if (name == "quot") { |
---|
| 748 | replacement = "\""; |
---|
| 749 | } else if (name.Length > 0 && name[0] == '#') { |
---|
| 750 | int num; |
---|
| 751 | if (name.Length > 1 && name[1] == 'x') { |
---|
| 752 | if (!int.TryParse(name.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture.NumberFormat, out num)) { |
---|
| 753 | num = -1; |
---|
| 754 | OnSyntaxError(errorLoc + 3, errorLoc + 1 + name.Length, "Hexadecimal code of unicode character expected"); |
---|
| 755 | } |
---|
| 756 | } else { |
---|
| 757 | if (!int.TryParse(name.Substring(1), NumberStyles.None, CultureInfo.InvariantCulture.NumberFormat, out num)) { |
---|
| 758 | num = -1; |
---|
| 759 | OnSyntaxError(errorLoc + 2, errorLoc + 1 + name.Length, "Numeric code of unicode character expected"); |
---|
| 760 | } |
---|
| 761 | } |
---|
| 762 | if (num != -1) { |
---|
| 763 | try { |
---|
| 764 | replacement = char.ConvertFromUtf32(num); |
---|
| 765 | } catch (ArgumentOutOfRangeException) { |
---|
| 766 | replacement = null; |
---|
| 767 | OnSyntaxError(errorLoc + 2, errorLoc + 1 + name.Length, "Invalid unicode character U+{0:X} ({0})", num); |
---|
| 768 | } |
---|
| 769 | } else { |
---|
| 770 | replacement = null; |
---|
| 771 | } |
---|
| 772 | } else if (!IsValidName(name)) { |
---|
| 773 | replacement = null; |
---|
| 774 | OnSyntaxError(errorLoc + 1, errorLoc + 1, "Invalid entity name"); |
---|
| 775 | } else { |
---|
| 776 | replacement = null; |
---|
| 777 | if (tagSoupParser.UnknownEntityReferenceIsError) { |
---|
| 778 | OnSyntaxError(errorLoc, errorLoc + 1 + name.Length + 1, "Unknown entity reference '{0}'", name); |
---|
| 779 | } |
---|
| 780 | } |
---|
| 781 | |
---|
| 782 | // Append the replacement to output |
---|
| 783 | if (replacement != null) { |
---|
| 784 | sb.Append(replacement); |
---|
| 785 | } else { |
---|
| 786 | sb.Append('&'); |
---|
| 787 | sb.Append(name); |
---|
| 788 | sb.Append(';'); |
---|
| 789 | } |
---|
| 790 | curr = end + 1; |
---|
| 791 | continue; |
---|
| 792 | } |
---|
| 793 | } |
---|
| 794 | #endregion |
---|
| 795 | |
---|
| 796 | #region Syntax Errors |
---|
| 797 | List<InternalSyntaxError> syntaxErrors = new List<InternalSyntaxError>(); |
---|
| 798 | |
---|
| 799 | InternalSyntaxError[] GetSyntaxErrors() |
---|
| 800 | { |
---|
| 801 | if (syntaxErrors.Count > 0) { |
---|
| 802 | var arr = syntaxErrors.ToArray(); |
---|
| 803 | syntaxErrors.Clear(); |
---|
| 804 | return arr; |
---|
| 805 | } else { |
---|
| 806 | return null; |
---|
| 807 | } |
---|
| 808 | } |
---|
| 809 | |
---|
| 810 | void OnSyntaxError(string message, params object[] args) |
---|
| 811 | { |
---|
| 812 | OnSyntaxError(this.CurrentLocation, this.CurrentLocation + 1, message, args); |
---|
| 813 | } |
---|
| 814 | |
---|
| 815 | void OnSyntaxError(int start, int end, string message, params object[] args) |
---|
| 816 | { |
---|
| 817 | if (end <= start) end = start + 1; |
---|
| 818 | string formattedMessage = string.Format(CultureInfo.InvariantCulture, message, args); |
---|
| 819 | Log.WriteLine("Syntax error ({0}-{1}): {2}", start, end, formattedMessage); |
---|
| 820 | syntaxErrors.Add(new InternalSyntaxError(start - internalObjectStartPosition, end - internalObjectStartPosition, formattedMessage)); |
---|
| 821 | } |
---|
| 822 | #endregion |
---|
| 823 | |
---|
| 824 | #region Helper functions |
---|
| 825 | internal static bool IsValidName(string name) |
---|
| 826 | { |
---|
| 827 | try { |
---|
| 828 | System.Xml.XmlConvert.VerifyName(name); |
---|
| 829 | return true; |
---|
| 830 | } catch (System.Xml.XmlException) { |
---|
| 831 | return false; |
---|
| 832 | } |
---|
| 833 | } |
---|
| 834 | #endregion |
---|
| 835 | } |
---|
| 836 | } |
---|