dxml.parser
This implements a range-based
    StAX parser for XML 1.0 (which
    will work with XML 1.1 documents assuming that they don't use any
    1.1-specific features). For the sake of simplicity, sanity, and efficiency,
    the DTD
    section is not supported beyond what is required to parse past it.
Start tags, end tags, comments, cdata sections, and processing instructions
    are all supported and reported to the application. Anything in the DTD is
    skipped (though it's parsed enough to parse past it correctly, and that
    can result in an XMLParsingException if that XML isn't valid
    enough to be correctly skipped), and the
    XML declaration at the
    top is skipped if present (XML 1.1 requires that it be there, but XML 1.0
    does not).
    Regardless of what the XML declaration says (if present), any range of
    char will be treated as being encoded in UTF-8, any range of wchar
    will be treated as being encoded in UTF-16, and any range of dchar will
    be treated as having been encoded in UTF-32. Strings will be treated as
    ranges of their code units, not code points.
    Since the DTD is skipped, entity references other than the five which are
    predefined by the XML spec cannot be fully processed (since wherever they
    were used in the document would be replaced by what they referred to, which
    could be arbitrarily complex XML). As such, by default, if any entity
    references which are not predefined are encountered outside of the DTD, an
    XMLParsingException will be thrown (see
    Config.throwOnEntityRef for how that can be configured). The
    predefined entity references and any character references encountered will
    be checked to verify that they're valid, but they will not be replaced
    (since that does not work with returning slices of the original input).
    However, decodeXML or
    parseStdEntityRef from
    dxml.util can be used to convert the predefined entity references
    to what the refer to, and decodeXML or
    parseCharRef from
    dxml.util can be used to convert character references to what they
    refer to.
            
            
            
            
Primary Symbols
| Symbol | Description | 
|---|---|
| parseXML | The function used to initiate the parsing of an XML document. | 
| EntityRange | The range returned by parseXML. | 
| EntityRange.Entity | The element type of EntityRange. | 
Parser Configuration Helpers
| Symbol | Description | 
|---|---|
| Config | Used to configure how EntityRange parses the XML. | 
| simpleXML | A user-friendly configuration for when the application just wants the element tags and the data in between them. | 
| makeConfig | A convenience function for constructing a custom Config. | 
| SkipComments | A std.typecons.Flag used with Config
                  to tell the parserto skip comments. | 
| SkipPI | A std.typecons.Flag used with Config
                  to tell the parserto skip processing instructions. | 
| SplitEmpty | A std.typecons.Flag used with Config
                  to configure how the parserdeals with empty element tags. | 
Helper Types Used When Parsing
| Symbol | Description | 
|---|---|
| EntityType | The type of an entity in the XML (e.g. a start tag or a comment). | 
| TextPos | Gives the line and column number in the XML document. | 
| XMLParsingException | Thrown by EntityRange when it encounters invalid XML. | 
Helper Functions Used When Parsing
| Symbol | Description | 
|---|---|
| skipContents | Iterates an EntityRange from a start tag to its matching end tag. | 
| skipToPath | Used to navigate from one start tag to another as if the start tag names formed a file path. | 
| skipToEntityType | Skips to the next entity of the given type in the range. | 
| skipToParentEndTag | Iterates an EntityRange until it reaches the end tag that matches the start tag which is the parent of of the current entity. | 
License:
 Boost License 1.0.
See Also:
 Official Specification for XML 1.0
Examples:
 
auto xml = "<!-- comment -->\n" ~ "<root>\n" ~ " <foo>some text<whatever/></foo>\n" ~ " <bar/>\n" ~ " <baz></baz>\n" ~ "</root>"; { auto range = parseXML(xml); assert(range.front.type == EntityType.comment); assert(range.front.text == " comment "); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "some text"); range.popFront(); assert(range.front.type == EntityType.elementEmpty); assert(range.front.name == "whatever"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "foo"); range.popFront(); assert(range.front.type == EntityType.elementEmpty); assert(range.front.name == "bar"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "baz"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "baz"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); range.popFront(); assert(range.empty); } { auto range = parseXML!simpleXML(xml); // simpleXML skips comments assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "some text"); range.popFront(); // simpleXML splits empty element tags into a start tag and end tag // so that the code doesn't have to care whether a start tag with no // content is an empty tag or a start tag and end tag with nothing but // whitespace in between. assert(range.front.type == EntityType.elementStart); assert(range.front.name == "whatever"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "whatever"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "foo"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "bar"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "bar"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "baz"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "baz"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); range.popFront(); assert(range.empty); }
- classXMLParsingException: object.Exception;
- The exception type thrown when the XML parser encounters invalid XML.- TextPospos;
- The position in the XML input where the problem is.
 
- structTextPos;
- Where in the XML document an entity is.The line and column numbers are 1-based. The primary use case forTextPosis XMLParsingException, but an application may have other uses for it. TheTextPosfor an Entity can be obtained from Entity.pos.- intline;
- Alinenumber in the XML file.
- intcol;
- A column number in a line of the XML file.Each code unit is considered a column, so depending on what a program is looking to do with the column number, it may need to examine the actual text on that line and calculate the number that represents what the program wants to display (e.g. the number of graphemes).
 
- structConfig;
- Used to configure how the parser works.- FlagskipComments;
- Whether the comments should be skipped while parsing.IfskipComments== SkipComments.yes, any entities of type EntityType.comment will be omitted from the parsing results, and they will not be validated beyond what is required to parse past them. Defaults to SkipComments.no.
- FlagskipPI;
- Whether processing instructions should be skipped.IfskipPI== SkipPI.yes, any entities of type EntityType.pi will be skipped, and they will not be validated beyond what is required to parse past them. Defaults to SkipPI.no.
- FlagsplitEmpty;
- Whether the parser should report empty element tags as if they were a start tag followed by an end tag with nothing in between.IfsplitEmpty== SplitEmpty.yes, then whenever an EntityType.elementEmpty is encountered, the parser will claim that that entity is an EntityType.elementStart, and then it will provide an EntityType.elementEnd as the next entity before the entity that actually follows it. The purpose of this is to simplify the code using the parser, since most code does not care about the difference between an empty tag and a start and end tag with nothing in between. But since some code may care about the difference, the behavior is configurable. Defaults to SplitEmpty.no.Examples:enum configSplitYes = makeConfig(SplitEmpty.yes); { auto range = parseXML("<root></root>"); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); range.popFront(); assert(range.empty); } { // No difference if the tags are already split. auto range = parseXML!configSplitYes("<root></root>"); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); range.popFront(); assert(range.empty); } { // This treats <root></root> and <root/> as distinct. auto range = parseXML("<root/>"); assert(range.front.type == EntityType.elementEmpty); assert(range.front.name == "root"); range.popFront(); assert(range.empty); } { // This is parsed as if it were <root></root> insead of <root/>. auto range = parseXML!configSplitYes("<root/>"); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); range.popFront(); assert(range.empty); } 
- FlagthrowOnEntityRef;
- Whether the parser should throw when it encounters any entity references other than the five entity references defined in the XML standard.Any other entity references would have to be defined in the DTD in order to be valid. And in order to know what XML they represent (which could be arbitrarily complex, even effectively inserting entire XML documents into the middle of the XML), the DTD would have to be parsed. However, dxml does not support parsing the DTD beyond what is required to correctly parse past it, and replacing entity references with what they represent would not work with the slicing semantics that EntityRange provides. As such, it is not possible for dxml to correctly handle any entity references other than the five which are defined in the XML standard, and even those are only parsed by using dxml.util.decodeXML or dxml.util.parseStdEntityRef. EntityRange always validates that entity references are one of the five, predefined entity references, but otherwise, it lets them pass through as normal text. It does not replace them with what they represent. As such, the default behavior of EntityRange is to throw an XMLParsingException when it encounters an entity reference which is not one of the five defined by the XML standard. With that behavior, there is no risk of processing an XML document as if it had no entity references and ending up with what the program using the parser would probably consider incorrect results. However, there are cases where a program may find it acceptable to treat entity references as normal text and ignore them. As such, if a program wishes to take that approach, it can setthrowOnEntityRefto ThrowOnEntityRef.no. IfthrowOnEntityRef== ThrowOnEntityRef.no, then any entity reference that it encounters will be validated to ensure that it is syntactically valid (i.e. that the characters it contains form what could be a valid entity reference assuming that the DTD declared it properly), but otherwise, EntityRange will treat it as normal text, just like it treats the five, predefined entity references as normal text. Note that any valid XML entity reference which contains start or end tags must contain matching start or end tags, and entity references cannot contain incomplete fragments of XML (e.g. the start or end of a comment). So, missing entity references should only affect the data in the XML document and not its overall structure (if that were not true, attempting to ignore entity references such as ThrowOnEntityRef.no does would be a disaster in the making). However, how reasonable it is to miss that data depends entirely on the application and what the XML documents it's parsing contain - hence, the behavior is configurable.See Also: dxml.util.StdEntityRef
 dxml.util.parseStdEntityRef
 dxml.util.parseCharRef
 dxml.util.encodeCharRef
 dxml.util.decodeXML
 dxml.util.asDecodedXMLExamples:import std.exception : assertThrown; import dxml.util : decodeXML; auto xml = "<root>\n" ~ " <std>&'><"</std>\n" ~ " <other>&foobar;</other>\n" ~ " <invalid>&--;</invalid>\n" ~ "</root>"; // ThrowOnEntityRef.yes { auto range = parseXML(xml); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "std"); range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "&'><""); assert(range.front.text.decodeXML() == `&'><"`); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "std"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "other"); // Attempted to parse past "&foobar;", which is syntactically // valid, but it's not one of the five predefined entity references. assertThrown!XMLParsingException(range.popFront()); } // ThrowOnEntityRef.no { auto range = parseXML!(makeConfig(ThrowOnEntityRef.no))(xml); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "std"); range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "&'><""); assert(range.front.text.decodeXML() == `&'><"`); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "std"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "other"); // Doesn't throw, because "&foobar;" is syntactically valid. range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "&foobar;"); // decodeXML has no effect on non-standard entity references. assert(range.front.text.decodeXML() == "&foobar;"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "other"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "invalid"); // Attempted to parse past "&--;", which is not syntactically valid, // because -- is not a valid name for an entity reference. assertThrown!XMLParsingException(range.popFront()); } 
 
- aliasSkipComments= std.typecons.Flag!"SkipComments".Flag;
- See Also: skipComments
- aliasSkipPI= std.typecons.Flag!"SkipPI".Flag;
- See Also: skipPI
- aliasSplitEmpty= std.typecons.Flag!"SplitEmpty".Flag;
- See Also: splitEmpty
- aliasThrowOnEntityRef= std.typecons.Flag!"ThrowOnEntityRef".Flag;
- See Also: throwOnEntityRef
- ConfigmakeConfig(Args...)(Args args);
- Helper function for creating a custom config. It makes it easy to set one or more of the member variables to something other than the default without having to worry about explicitly setting them individually or setting them all at once via a constructor.The order of the arguments does not matter. The types of each of the members of Config are unique, so that information alone is sufficient to determine which argument should be assigned to which member.Examples:{ auto config = makeConfig(SkipComments.yes); assert(config.skipComments == SkipComments.yes); assert(config.skipPI == Config.init.skipPI); assert(config.splitEmpty == Config.init.splitEmpty); assert(config.throwOnEntityRef == Config.init.throwOnEntityRef); } { auto config = makeConfig(SkipComments.yes, SkipPI.yes); assert(config.skipComments == SkipComments.yes); assert(config.skipPI == SkipPI.yes); assert(config.splitEmpty == Config.init.splitEmpty); assert(config.throwOnEntityRef == Config.init.throwOnEntityRef); } { auto config = makeConfig(SplitEmpty.yes, SkipComments.yes, ThrowOnEntityRef.no); assert(config.skipComments == SkipComments.yes); assert(config.skipPI == Config.init.skipPI); assert(config.splitEmpty == SplitEmpty.yes); assert(config.throwOnEntityRef == ThrowOnEntityRef.no); }
- enum ConfigsimpleXML;
- This Config is intended for making it easy to parse XML by skipping everything that isn't the actual data as well as making it simpler to deal with empty element tags by treating them the same as a start tag and end tag with nothing but whitespace between them.Examples:static assert(simpleXML.skipComments == SkipComments.yes); static assert(simpleXML.skipPI == SkipPI.yes); static assert(simpleXML.splitEmpty == SplitEmpty.yes); static assert(simpleXML.throwOnEntityRef == ThrowOnEntityRef.yes); 
- enumEntityType: int;
- Represents the type of an XML entity. Used by EntityRange.Entity.- cdata
- Acdatasection: <![CDATA[ ... ]]>.
- comment
- An XMLcomment: <!-- ... -->.
- elementStart
- The start tag for an element. e.g. <foo name="value">.
- elementEnd
- The end tag for an element. e.g. </foo>.
- elementEmpty
- The tag for an element with no contents or matching end tag. e.g. <foo name="value"/>.
- pi
- A processing instruction such as <?foo?>. Note that the <?xml ... ?> is skipped and not treated as an EntityType.pi.See Also: http://www.w3.org/TR/REC-xml/#sec-pi
- text
- The content of an element tag that is simpletext.If there is an entity other than the end tag following thetext, then thetextincludes up to that entity. Note however that character references (e.g. "*") and the predefined entity references (e.g. "'") are left unprocessed in thetext. In order for them to be processed, thetextshould be passed to either decodeXML or asDecodedXML. Entity references which are not predefined are considered invalid XML, because the DTD section is skipped, and thus they cannot be processed properly.
 
- structEntityRange(Config cfg, R) if (isForwardRange!R && isSomeChar!(ElementType!R)); EntityRange!(config, R)parseXML(Config config = Config.init, R)(R xmlText)
 if(isForwardRange!R && isSomeChar!(ElementType!R));
- Lazily parses the given range of characters as an XML document.EntityRangeis essentially a StAX parser, though it evolved into that rather than being based on what Java did, and it's range-based rather than iterator-based, so its API is likely to differ from other implementations. The basic concept should be the same though. One of the core design goals of this parser is to slice the original input rather than having to allocate strings for the output or wrap it in a lazy range that produces a mutated version of the data. So, all of the text that the parser provides is either a slice or std.range.takeExactly of the input. However, in some cases, for the parser to be fully compliant with the XML spec, dxml.util.decodeXML must be called on the text to mutate certain constructs (e.g. removing any '\r' in the text or converting "<" to '<'). But that's left up to the application. The parser is not @nogc, but it allocates memory very minimally. It allocates some of its state on the heap so it can validate attributes and end tags. However, that state is shared among all the ranges that came from the same call toparseXML(only the range farthest along in parsing validates attributes or end tags), so save does not allocate memory unless save on the underlying range allocates memory. The shared state currently uses a couple of dynamic arrays to validate the tags and attributes, and if the document has a particularly deep tag depth or has a lot of attributes on a start tag, then some reallocations may occur until the maximum is reached, but enough is reserved that for most documents, no reallocations will occur. The only other times that the parser would allocate would be if an exception were thrown or if the range that was passed toparseXMLallocates for any reason when calling any of the range primitives. If invalid XML is encountered at any point during the parsing process, an XMLParsingException will be thrown. If an exception has been thrown, then the parser is in an invalid state, and it is an error to call any functions on it. However, note that XML validation is reduced for any entities that are skipped (e.g. for anything in the DTD, validation is reduced to what is required to correctly parse past it, and when Config.skipPI == SkipPI.yes, processing instructions are only validated enough to correctly skip past them). As the module documentation says, this parser does not provide any DTD support. It is not possible to properly support the DTD while returning slices of the original input, and the DTD portion of the spec makes parsing XML far, far more complicated. A quick note about carriage returns: per the XML spec, they are all supposed to either be stripped out or replaced with newlines or spaces before the XML parser even processes the text. That doesn't work when the parser is slicing the original text and not mutating it at all. So, for the purposes of parsing, this parser treats all carriage returns as if they were newlines or spaces (though they won't count as newlines when counting the lines for TextPos). However, they will appear in any text fields or attribute values if they are in the document (since the text fields and attribute values are slices of the original text). dxml.util.decodeXML can be used to strip them along with converting any character references in the text. Alternatively, the application can remove them all before callingparseXML, but it's not necessary.Examples:auto xml = "<?xml version='1.0'?>\n" ~ "<?instruction start?>\n" ~ "<foo attr='42'>\n" ~ " <bar/>\n" ~ " <!-- no comment -->\n" ~ " <baz hello='world'>\n" ~ " nothing to say.\n" ~ " nothing at all...\n" ~ " </baz>\n" ~ "</foo>\n" ~ "<?some foo?>"; { auto range = parseXML(xml); assert(range.front.type == EntityType.pi); assert(range.front.name == "instruction"); assert(range.front.text == "start"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); { auto attrs = range.front.attributes; assert(walkLength(attrs.save) == 1); assert(attrs.front.name == "attr"); assert(attrs.front.value == "42"); } range.popFront(); assert(range.front.type == EntityType.elementEmpty); assert(range.front.name == "bar"); range.popFront(); assert(range.front.type == EntityType.comment); assert(range.front.text == " no comment "); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "baz"); { auto attrs = range.front.attributes; assert(walkLength(attrs.save) == 1); assert(attrs.front.name == "hello"); assert(attrs.front.value == "world"); } range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "\n nothing to say.\n nothing at all...\n "); range.popFront(); assert(range.front.type == EntityType.elementEnd); // </baz> range.popFront(); assert(range.front.type == EntityType.elementEnd); // </foo> range.popFront(); assert(range.front.type == EntityType.pi); assert(range.front.name == "some"); assert(range.front.text == "foo"); range.popFront(); assert(range.empty); } { auto range = parseXML!simpleXML(xml); // simpleXML is set to skip processing instructions. assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); { auto attrs = range.front.attributes; assert(walkLength(attrs.save) == 1); assert(attrs.front.name == "attr"); assert(attrs.front.value == "42"); } // simpleXML is set to split empty tags so that <bar/> is treated // as the same as <bar></bar> so that code does not have to // explicitly handle empty tags. range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "bar"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "bar"); // simpleXML is set to skip comments. range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "baz"); { auto attrs = range.front.attributes; assert(walkLength(attrs.save) == 1); assert(attrs.front.name == "hello"); assert(attrs.front.value == "world"); } range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "\n nothing to say.\n nothing at all...\n "); range.popFront(); assert(range.front.type == EntityType.elementEnd); // </baz> range.popFront(); assert(range.front.type == EntityType.elementEnd); // </foo> range.popFront(); assert(range.empty); } - aliasconfig= cfg;
- The Config used for when parsing the XML.
- aliasInput= R;
- The type of the range that EntityRange is parsing.
- aliasSliceOfR= R;
- The type used when any slice of the original input is used. If R is a string or supports slicing, thenSliceOfRis the same as R; otherwise, it's the result of calling std.range.takeExactly on the input.import std.algorithm : filter; import std.range : takeExactly; static assert(is(EntityRange!(Config.init, string).SliceOfR == string)); auto range = filter!(a => true)("some xml"); static assert(is(EntityRange!(Config.init, typeof(range)).SliceOfR == typeof(takeExactly(range, 42)))); 
- structEntity;
- Represents an entity in the XML document.Note that the type determines which properties can be used, and it can determine whether functions which anEntityor EntityRange is passed to are allowed to be called. Each function lists which EntityTypes are allowed, and it is an error to call them with any other EntityType.- pure nothrow @nogc @property @safe EntityTypetype() const;
- The EntityType for this Entity.Examples:auto xml = "<root>\n" ~ " <!--no comment-->\n" ~ " <![CDATA[cdata run]]>\n" ~ " <text>I am text!</text>\n" ~ " <empty/>\n" ~ " <?pi?>\n" ~ "</root>"; auto range = parseXML(xml); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.comment); assert(range.front.text == "no comment"); range.popFront(); assert(range.front.type == EntityType.cdata); assert(range.front.text == "cdata run"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "text"); range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "I am text!"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "text"); range.popFront(); assert(range.front.type == EntityType.elementEmpty); assert(range.front.name == "empty"); range.popFront(); assert(range.front.type == EntityType.pi); assert(range.front.name == "pi"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); range.popFront(); assert(range.empty); 
- pure nothrow @nogc @property @safe TextPospos() const;
- The position in the the original text where the entity starts.Examples:auto xml = "<root>\n" ~ " <foo>\n" ~ " Foo and bar. Always foo and bar...\n" ~ " </foo>\n" ~ "</root>"; auto range = parseXML(xml); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); assert(range.front.pos == TextPos(1, 1)); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); assert(range.front.pos == TextPos(2, 5)); range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "\n" ~ " Foo and bar. Always foo and bar...\n" ~ " "); assert(range.front.pos == TextPos(2, 10)); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "foo"); assert(range.front.pos == TextPos(4, 5)); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); assert(range.front.pos == TextPos(5, 1)); range.popFront(); assert(range.empty); 
- @property SliceOfRname();
- Gives thenameof this Entity.Note that this is the directnamein the XML for this entity and does not contain any of the names of any of the parent entities that this entity has. If an application wants the full "path" of the entity, then it will have to keep track of that itself. The parser does not do that as it would require allocating memory.Supported EntityTypes: elementStart elementEnd elementEmpty pi Examples:auto xml = "<root>\n" ~ " <empty/>\n" ~ " <?pi?>\n" ~ "</root>"; auto range = parseXML(xml); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementEmpty); assert(range.front.name == "empty"); range.popFront(); assert(range.front.type == EntityType.pi); assert(range.front.name == "pi"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); range.popFront(); assert(range.empty); 
- @property autoattributes();
- Returns a lazy range ofattributesfor a start tag where each attribute is represented as a
 Tuple!( SliceOfR, "name", SliceOfR, "value", TextPos, "pos").Supported EntityTypes: elementStart elementEmpty Examples:import std.algorithm.comparison : equal; import std.algorithm.iteration : filter; { auto xml = "<root/>"; auto range = parseXML(xml); assert(range.front.type == EntityType.elementEmpty); assert(range.front.attributes.empty); } { auto xml = "<root a='42' q='29' w='hello'/>"; auto range = parseXML(xml); assert(range.front.type == EntityType.elementEmpty); auto attrs = range.front.attributes; assert(attrs.front.name == "a"); assert(attrs.front.value == "42"); assert(attrs.front.pos == TextPos(1, 7)); attrs.popFront(); assert(attrs.front.name == "q"); assert(attrs.front.value == "29"); assert(attrs.front.pos == TextPos(1, 14)); attrs.popFront(); assert(attrs.front.name == "w"); assert(attrs.front.value == "hello"); assert(attrs.front.pos == TextPos(1, 21)); attrs.popFront(); assert(attrs.empty); } // Because the type of name and value is SliceOfR, == with a string // only works if the range passed to parseXML was string. { auto xml = filter!(a => true)("<root a='42' q='29' w='hello'/>"); auto range = parseXML(xml); assert(range.front.type == EntityType.elementEmpty); auto attrs = range.front.attributes; assert(equal(attrs.front.name, "a")); assert(equal(attrs.front.value, "42")); assert(attrs.front.pos == TextPos(1, 7)); attrs.popFront(); assert(equal(attrs.front.name, "q")); assert(equal(attrs.front.value, "29")); assert(attrs.front.pos == TextPos(1, 14)); attrs.popFront(); assert(equal(attrs.front.name, "w")); assert(equal(attrs.front.value, "hello")); assert(attrs.front.pos == TextPos(1, 21)); attrs.popFront(); assert(attrs.empty); } 
- @property SliceOfRtext();
- Returns the textual value of this Entity.In the case of EntityType.pi, this is thetextthat follows the name, whereas in the other cases, thetextis the entire contents of the entity (save for the delimeters on the ends if that entity has them).Supported EntityTypes: cdata comment pi text Examples:auto xml = "<?xml version='1.0'?>\n" ~ "<?instructionName?>\n" ~ "<?foo here is something to say?>\n" ~ "<root>\n" ~ " <![CDATA[ Yay! random text >> << ]]>\n" ~ " <!-- some random comment -->\n" ~ " <p>something here</p>\n" ~ " <p>\n" ~ " something else\n" ~ " here</p>\n" ~ "</root>"; auto range = parseXML(xml); // "<?instructionName?>\n" ~ assert(range.front.type == EntityType.pi); assert(range.front.name == "instructionName"); assert(range.front.text.empty); // "<?foo here is something to say?>\n" ~ range.popFront(); assert(range.front.type == EntityType.pi); assert(range.front.name == "foo"); assert(range.front.text == "here is something to say"); // "<root>\n" ~ range.popFront(); assert(range.front.type == EntityType.elementStart); // " <![CDATA[ Yay! random text >> << ]]>\n" ~ range.popFront(); assert(range.front.type == EntityType.cdata); assert(range.front.text == " Yay! random text >> << "); // " <!-- some random comment -->\n" ~ range.popFront(); assert(range.front.type == EntityType.comment); assert(range.front.text == " some random comment "); // " <p>something here</p>\n" ~ range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "p"); range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "something here"); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "p"); // " <p>\n" ~ // " something else\n" ~ // " here</p>\n" ~ range.popFront(); assert(range.front.type == EntityType.elementStart); range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "\n something else\n here"); range.popFront(); assert(range.front.type == EntityType.elementEnd); // "</root>" range.popFront(); assert(range.front.type == EntityType.elementEnd); range.popFront(); assert(range.empty); 
 
- @property Entityfront();
- Returns the Entity representing the entity in the XML document which was most recently parsed.
- voidpopFront();
- Move to the next entity.The next entity is the next one that is linearly in the XML document. So, if the current entity has child entities, the next entity will be the first child entity, whereas if it has no child entities, it will be the next entity at the same level.Throws: XMLParsingException on invalid XML.
- pure nothrow @nogc @property @safe boolempty() const;
- Whether the end of the XML document has been reached.Note that because an XMLParsingException will be thrown an invalid XML, it's actually possible to call front and popFront without checkingemptyif the only way thatemptywould betrueis if the XML were invalid (e.g. if at a start tag, it's a given that there's at least one end tag left in the document unless it's invalid XML). However, of course, caution should be used to ensure that incorrect assumptions are not made that allow the document to reach its end earlier than predicted without throwing an XMLParsingException, since it's still an error to call front or popFront ifemptywould returnfalse.
- @property autosave();
- Forward range function for obtaining a copy of the range which can then be iterated independently of the original.
- EntityRangetakeNone();
- Returns an empty range. This corresponds to std.range.takeNone except that it doesn't create a wrapper type.
 
- RskipContents(R)(R entityRange)
 if(isInstanceOf!(EntityRange, R));
- Takes an EntityRange which is at a start tag and iterates it until it is at its corresponding end tag. It is an error to callskipContentswhen the current entity is not EntityType.elementStart.Supported EntityTypes: elementStart Returns: The range with its front now at the end tag corresponding to the start tag that was front when the function was called.Throws: XMLParsingException on invalid XML.Examples:auto xml = "<root>\n" ~ " <foo>\n" ~ " <bar>\n" ~ " Some text\n" ~ " </bar>\n" ~ " </foo>\n" ~ " <!-- no comment -->\n" ~ "</root>"; auto range = parseXML(xml); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); range = range.skipContents(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "foo"); range.popFront(); assert(range.front.type == EntityType.comment); assert(range.front.text == " no comment "); range.popFront(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); range.popFront(); assert(range.empty); 
- RskipToEntityType(R)(R entityRange, EntityType[] entityTypes...)
 if(isInstanceOf!(EntityRange, R));
- Skips entities until the given EntityType is reached.If multiple EntityTypes are given, then any one of them counts as a match. The current entity is skipped regardless of whether it is the given EntityType. This is essentially a slightly optimized equivalent toif(!range.empty()) { range.popFront(); range = range.find!((a, b) => a.type == b.type)(entityTypes); } Returns: The given range with its front now at the first entity which matched one of the given EntityTypes or an empty range if none were found.Throws: XMLParsingException on invalid XML.Examples:auto xml = "<root>\n" ~ " <!-- blah blah blah -->\n" ~ " <foo>nothing to say</foo>\n" ~ "</root>"; auto range = parseXML(xml); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range = range.skipToEntityType(EntityType.elementStart, EntityType.elementEmpty); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); assert(range.skipToEntityType(EntityType.comment).empty); // skipToEntityType will work on an empty range but will always // return an empty range. assert(range.takeNone().skipToEntityType(EntityType.comment).empty); 
- RskipToParentEndTag(R)(R entityRange)
 if(isInstanceOf!(EntityRange, R));
- Skips entities until the end tag is reached that corresponds to the start tag that is the parent of the current entity.Returns: The given range with its front now at the end tag which corresponds to the parent start tag of the entity that was front whenskipToParentEndTagwas called. If the current entity does not have a parent start tag (which means that it's either the root element or a comment or PI outside of the root element), then an empty range is returned.Throws: XMLParsingException on invalid XML.Examples:auto xml = "<root>\n" ~ " <foo>\n" ~ " <!-- comment -->\n" ~ " <bar>exam</bar>\n" ~ " </foo>\n" ~ " <!-- another comment -->\n" ~ "</root>"; { auto range = parseXML(xml); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); range.popFront(); assert(range.front.type == EntityType.comment); assert(range.front.text == " comment "); range = range.skipToParentEndTag(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "foo"); range = range.skipToParentEndTag(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); range = range.skipToParentEndTag(); assert(range.empty); } { auto range = parseXML(xml); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); range.popFront(); assert(range.front.type == EntityType.comment); assert(range.front.text == " comment "); range.popFront(); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "bar"); range.popFront(); assert(range.front.type == EntityType.text); assert(range.front.text == "exam"); range = range.skipToParentEndTag(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "bar"); range = range.skipToParentEndTag(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "foo"); range.popFront(); assert(range.front.type == EntityType.comment); assert(range.front.text == " another comment "); range = range.skipToParentEndTag(); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "root"); assert(range.skipToParentEndTag().empty); } { auto range = parseXML("<root><foo>bar</foo></root>"); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); assert(range.skipToParentEndTag().empty); } 
- RskipToPath(R)(R entityRange, string path)
 if(isInstanceOf!(EntityRange, R));
- Treats the given string like a file path except that each directory corresponds to the name of a start tag. Note that this does not try to implement XPath as that would be quite complicated, and it really doesn't fit with a StAX parser.A start tag should be thought of as a directory, with its child start tags as the directories it contains. All paths should be relative. EntityRange can only move forward through the document, so using an absolute path would only make sense at the beginning of the document. As such, absolute paths are treated as invalid paths. "./" and "../" are supported. Repeated slashes such as in "foo//bar" are not supported and are treated as an invalid path. If range.front.type == EntityType.elementStart, then range.skiptoPath("foo") will search for the first child start tag (be it EntityType.elementStart or EntityType.elementEmpty) with the name "foo". That start tag must be a direct child of the current start tag. If range.front.type is any other EntityType, then range.skipToPath("foo") will return an empty range, because no other EntityTypes have child start tags. For any EntityType, range.skipToPath("../foo") will search for the first start tag with the name "foo" at the same level as the current entity. If the current entity is a start tag with the name "foo", it will not be considered a match. range.skipToPath("./") is a no-op. However, range.skipToPath("../") will result in the empty range (since it doesn't target a specific start tag). range.skipToPath("foo/bar") is equivalent to range.skipToPath("foo").skipToPath("bar"), and range.skipToPath("../foo/bar") is equivalent to range.skipToPath("../foo").skipToPath("bar").Returns: The given range with its front now at the requested entity if the path is valid; otherwise, an empty range is returned.Throws: XMLParsingException on invalid XML.Examples:{ auto xml = "<carrot>\n" ~ " <foo>\n" ~ " <bar>\n" ~ " <baz/>\n" ~ " <other/>\n" ~ " </bar>\n" ~ " </foo>\n" ~ "</carrot>"; auto range = parseXML(xml); // "<carrot>" assert(range.front.type == EntityType.elementStart); assert(range.front.name == "carrot"); range = range.skipToPath("foo/bar"); // " <bar> assert(!range.empty); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "bar"); range = range.skipToPath("baz"); // " <baz/> assert(!range.empty); assert(range.front.type == EntityType.elementEmpty); // other is not a child element of baz assert(range.skipToPath("other").empty); range = range.skipToPath("../other"); // " <other/>" assert(!range.empty); assert(range.front.type == EntityType.elementEmpty); } { auto xml = "<potato>\n" ~ " <foo>\n" ~ " <bar>\n "~ " </bar>\n" ~ " <crazy>\n" ~ " </crazy>\n" ~ " <fou/>\n" ~ " </foo>\n" ~ " <buzz/>\n" ~ "</potato>"; auto range = parseXML(xml); // "<potato>" assert(range.front.type == EntityType.elementStart); range = range.skipToPath("./"); // "<potato>" assert(!range.empty); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "potato"); range = range.skipToPath("./foo/bar"); // " <bar>" assert(!range.empty); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "bar"); range = range.skipToPath("../crazy"); // " <crazy>" assert(!range.empty); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "crazy"); // Whether popFront is called here before the call to // range.skipToPath("../fou") below, the result is the same, because // both <crazy> and </crazy> are at the same level. range.popFront(); // " </crazy>" assert(!range.empty); assert(range.front.type == EntityType.elementEnd); assert(range.front.name == "crazy"); range = range.skipToPath("../fou"); // " <fou/>" assert(!range.empty); assert(range.front.type == EntityType.elementEmpty); } // Searching stops at the first matching start tag. { auto xml = "<beet>\n" ~ " <foo a='42'>\n" ~ " </foo>\n" ~ " <foo b='451'>\n" ~ " </foo>\n" ~ "</beet>"; auto range = parseXML(xml); range = range.skipToPath("foo"); assert(!range.empty); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); { auto attrs = range.front.attributes; assert(attrs.front.name == "a"); assert(attrs.front.value == "42"); } range = range.skipToPath("../foo"); assert(!range.empty); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "foo"); { auto attrs = range.front.attributes; assert(attrs.front.name == "b"); assert(attrs.front.value == "451"); } } // skipToPath will work on an empty range but will always return an // empty range. { auto range = parseXML("<root/>"); assert(range.takeNone().skipToPath("nowhere").empty); } // Empty and absolute paths will also result in an empty range as will // "../" without any actual tag name on the end. { auto range = parseXML("<root/>"); assert(range.skipToPath("").empty); assert(range.skipToPath("/").empty); assert(range.skipToPath("../").empty); } // Only non-empty start tags have children; all other EntityTypes result // in an empty range unless "../" is used. { auto xml = "<!-- comment -->\n" ~ "<root>\n" ~ " <foo/>\n" ~ "</root>"; auto range = parseXML(xml); assert(range.skipToPath("root").empty); assert(range.skipToPath("foo").empty); range = range.skipToPath("../root"); assert(!range.empty); assert(range.front.type == EntityType.elementStart); assert(range.front.name == "root"); }