1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
|
// Copyright (c) AlphaSierraPapa for the SharpDevelop Team (for details please see \doc\copyright.txt)
// This code is distributed under the GNU LGPL (for details please see \doc\license.txt)
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using System.Threading;
using Tango.Scripting.Editors.Document;
namespace Tango.Scripting.Editors.Xml
{
/// <summary>
/// Creates object tree from XML document.
/// </summary>
/// <remarks>
/// The created tree fully describes the document and thus the orginal XML file can be
/// exactly reproduced.
///
/// Any further parses will reparse only the changed parts and the existing tree will
/// be updated with the changes. The user can add event handlers to be notified of
/// the changes. The parser tries to minimize the number of changes to the tree.
/// (for example, it will add a single child at the start of collection rather than
/// clearing the collection and adding new children)
///
/// The object tree consists of following types:
/// RawObject - Abstact base class for all types
/// RawContainer - Abstact base class for all types that can contain child nodes
/// RawDocument - The root object of the XML document
/// RawElement - Logical grouping of other nodes together. The first child is always the start tag.
/// RawTag - Represents any markup starting with "<" and (hopefully) ending with ">"
/// RawAttribute - Name-value pair in a tag
/// RawText - Whitespace or character data
///
/// For example, see the following XML and the produced object tree:
/// <![CDATA[
/// <!-- My favourite quote -->
/// <quote author="Albert Einstein">
/// Make everything as simple as possible, but not simpler.
/// </quote>
///
/// RawDocument
/// RawTag "<!--" "-->"
/// RawText " My favourite quote "
/// RawElement
/// RawTag "<" "quote" ">"
/// RawText " "
/// RawAttribute 'author="Albert Einstein"'
/// RawText "\n Make everything as simple as possible, but not simpler.\n"
/// RawTag "</" "quote" ">"
/// ]]>
///
/// The precise content of RawTag depends on what it represents:
/// <![CDATA[
/// Start tag: "<" Name? (RawText+ RawAttribute)* RawText* (">" | "/>")
/// End tag: "</" Name? (RawText+ RawAttribute)* RawText* ">"
/// P.instr.: "<?" Name? (RawText)* "?>"
/// Comment: "<!--" (RawText)* "-->"
/// CData: "<![CDATA[" (RawText)* "]]" ">"
/// DTD: "<!DOCTYPE" (RawText+ RawTag)* RawText* ">" (DOCTYPE or other DTD names)
/// UknownBang: "<!" (RawText)* ">"
/// ]]>
///
/// The type of tag can be identified by the opening backet.
/// There are helpper properties in the RawTag class to identify the type, exactly
/// one of the properties will be true.
///
/// The closing bracket may be missing or may be different for mallformed XML.
///
/// Note that there can always be multiple consequtive RawText nodes.
/// This is to ensure that idividual texts are not too long.
///
/// XML Spec: http://www.w3.org/TR/xml/
/// XML EBNF: http://www.jelks.nu/XML/xmlebnf.html
///
/// Internals:
///
/// "Try" methods can silently fail by returning false.
/// MoveTo methods do not move if they are already at the given target
/// If methods return some object, it must be no-empty. It is up to the caller to ensure
/// the context is appropriate for reading.
///
/// </remarks>
public class AXmlParser
{
AXmlDocument userDocument;
internal TrackedSegmentCollection TrackedSegments { get; private set; }
/// <summary>
/// Generate syntax error when seeing enity reference other then the build-in ones
/// </summary>
public bool UnknownEntityReferenceIsError { get; set; }
/// <summary> Create new parser </summary>
public AXmlParser()
{
this.Lock = new ReaderWriterLockSlim(LockRecursionPolicy.SupportsRecursion);
ClearInternal();
}
/// <summary> Throws exception if condition is false </summary>
internal static void Assert(bool condition, string message)
{
if (!condition) {
throw new InternalException("Assertion failed: " + message);
}
}
/// <summary> Throws exception if condition is false </summary>
[Conditional("DEBUG")]
internal static void DebugAssert(bool condition, string message)
{
if (!condition) {
throw new InternalException("Assertion failed: " + message);
}
}
[Conditional("DEBUG")]
internal static void Log(string text, params object[] pars)
{
//System.Diagnostics.Debug.WriteLine(string.Format(CultureInfo.InvariantCulture, "XML: " + text, pars));
}
/// <summary>
/// Incrementaly parse the given text.
/// You have to hold the write lock.
/// </summary>
/// <param name="input">
/// The full XML text of the new document.
/// </param>
/// <param name="changesSinceLastParse">
/// Changes since last parse. Null will cause full reparse.
/// </param>
public AXmlDocument Parse(string input, IEnumerable<DocumentChangeEventArgs> changesSinceLastParse)
{
if (!Lock.IsWriteLockHeld)
throw new InvalidOperationException("Lock needed!");
// Use changes to invalidate cache
if (changesSinceLastParse != null) {
this.TrackedSegments.UpdateOffsetsAndInvalidate(changesSinceLastParse);
} else {
this.TrackedSegments.InvalidateAll();
}
TagReader tagReader = new TagReader(this, input);
List<AXmlObject> tags = tagReader.ReadAllTags();
AXmlDocument parsedDocument = new TagMatchingHeuristics(this, input, tags).ReadDocument();
tagReader.PrintStringCacheStats();
AXmlParser.Log("Updating main DOM tree...");
userDocument.UpdateTreeFrom(parsedDocument);
userDocument.DebugCheckConsistency(true);
Assert(userDocument.GetSelfAndAllChildren().Count() == parsedDocument.GetSelfAndAllChildren().Count(), "Parsed document and updated document have different number of children");
return userDocument;
}
/// <summary>
/// Makes calls to Parse() thread-safe. Use Lock everywhere Parse() is called.
/// </summary>
public ReaderWriterLockSlim Lock { get; private set; }
/// <summary>
/// Returns the last cached version of the document.
/// </summary>
/// <exception cref="InvalidOperationException">No read lock is held by the current thread.</exception>
public AXmlDocument LastDocument {
get {
if (!Lock.IsReadLockHeld)
throw new InvalidOperationException("Read lock needed!");
return userDocument;
}
}
/// <summary>
/// Clears the parser data.
/// </summary>
/// <exception cref="InvalidOperationException">No write lock is held by the current thread.</exception>
public void Clear()
{
if (!Lock.IsWriteLockHeld)
throw new InvalidOperationException("Write lock needed!");
ClearInternal();
}
void ClearInternal()
{
this.UnknownEntityReferenceIsError = true;
this.TrackedSegments = new TrackedSegmentCollection();
this.userDocument = new AXmlDocument() { Parser = this };
this.userDocument.Document = this.userDocument;
// Track the document
this.TrackedSegments.AddParsedObject(this.userDocument, null);
this.userDocument.IsCached = false;
}
}
}
|