Basic XML support (#7947)

This PR includes * Reading XML from a file, stream, or string * Reading XML via Data.fetch * Accessing the root element, element children, and attributes * Accessing tag text contents * Get tags by name * Inner / Outer XML string
2024-12-23 02:21:54 +03:00 · 2023-10-06 13:52:19 -04:00 · 2023-10-06 13:52:19 -04:00 · 9ba7be20af
commit 9ba7be20af
parent 16c8d2e302
12 changed files with 835 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -581,6 +581,7 @@
 - [Added `Table.expand_column` and improved JSON deserialization.][7859]
 - [Implemented `Table.auto_value_types` for in-memory tables.][7908]
 - [Implemented Text.substring to easily select part of a Text field][7913]
+- [Implemented basic XML support][7947]

 [debug-shortcuts]:
  https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -826,6 +827,7 @@
 [7859]: https://github.com/enso-org/enso/pull/7859
 [7908]: https://github.com/enso-org/enso/pull/7908
 [7913]: https://github.com/enso-org/enso/pull/7913
+[7947]: https://github.com/enso-org/enso/pull/7947

 #### Enso Compiler

--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/XML.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/XML.enso
@ -0,0 +1,399 @@
+import project.Any.Any
+import project.Data.Boolean.Boolean
+import project.Data.Json.Extensions
+import project.Data.Json.JS_Object
+import project.Data.Map.Map
+import project.Data.Numbers.Integer
+import project.Data.Text.Text
+import project.Data.Vector.Vector
+import project.Error.Error
+import project.Errors.Common.Index_Out_Of_Bounds
+import project.Errors.File_Error.File_Error
+import project.Errors.Illegal_State.Illegal_State
+import project.Errors.No_Such_Key.No_Such_Key
+import project.Nothing.Nothing
+import project.Panic.Panic
+import project.System.File.File
+import project.System.File.File_Access.File_Access
+import project.System.Input_Stream.Input_Stream
+from project.Data.Range.Extensions import all
+from project.Data.Text.Extensions import all
+
+polyglot java import java.io.StringReader
+polyglot java import java.lang.Exception as JException
+polyglot java import javax.xml.parsers.DocumentBuilderFactory
+polyglot java import javax.xml.parsers.DocumentBuilder
+polyglot java import javax.xml.xpath.XPathConstants
+polyglot java import javax.xml.xpath.XPathFactory
+polyglot java import org.w3c.dom.Document
+polyglot java import org.w3c.dom.Element
+polyglot java import org.w3c.dom.Node
+polyglot java import org.w3c.dom.NodeList
+polyglot java import org.w3c.dom.Text as Java_Text
+polyglot java import org.xml.sax.InputSource
+polyglot java import org.xml.sax.SAXException
+polyglot java import org.xml.sax.SAXParseException
+
+polyglot java import org.enso.base.XML_Utils
+
+type XML_Document
+    ## Read an XML document from a file.
+
+       Arguments:
+       - file: the `File` to read the XML document from.
+
+       If there is an error reading the file, `File_Error` is thrown.
+
+       If there is a parsing error, `XML_Error.Parse_Error` is thrown.
+
+       > Example
+         Read an XML document in 'doc.xml'.
+
+             file = enso_project.data / "doc.xml"
+             doc = XML_Document.from_file test_file
+    from_file : File -> XML_Document ! XML_Error | File_Error
+    from_file file:File =
+        File_Error.handle_java_exceptions file <|
+            file.with_input_stream [File_Access.Read] XML_Document.from_stream
+
+    ## Read an XML document from an input stream.
+
+       Arguments:
+       - input_stream: the input stread to read the XML document from.
+
+       If there is a parsing error, `XML_Error.Parse_Error` is thrown.
+
+       > Example
+         Read an XML document from an input_stream.
+
+             doc = XML_Document.from_stream input_stream
+    from_stream : Input_Stream -> XML_Document ! XML_Error
+    from_stream input_stream:Input_Stream =
+        XML_Error.handle_java_exceptions <|
+           input_stream.with_java_stream java_stream-> XML_Document.from_source java_stream
+
+    ## Read an XML document from a string.
+
+       Arguments:
+       - xml_string: The string to read the XML document from.
+
+       If there is a parsing error, `XML_Error.Parse_Error` is thrown.
+
+       > Example
+         Read an XML document from an string.
+
+             xml_string = "<foo></foo>"
+             doc = XML_Document.from_text xml_string
+    from_text : Text -> XML_Document ! XML_Error
+    from_text xml_string:Text =
+        XML_Error.handle_java_exceptions <|
+            string_reader = StringReader.new xml_string
+            XML_Document.from_source (InputSource.new string_reader)
+
+    ## PRIVATE
+       Read XML from an input source.
+    from_source : Any -> XML_Document ! XML_Error
+    from_source input_source =
+        document_builder_factory = DocumentBuilderFactory.newInstance
+        document_builder = document_builder_factory.newDocumentBuilder
+        XML_Utils.setCustomErrorHandler document_builder
+        XML_Document.Value (document_builder.parse input_source)
+
+    ## Get the root element of the document.
+
+       > Example
+         Get the root element of a document.
+
+             doc = XML_Document.from_file file
+             root = doc.root_element
+    root_element : XML_Element ! XML_Error
+    root_element self =
+        XML_Error.handle_java_exceptions <|
+            java_element = self.java_document.getDocumentElement
+            XML_Element.Value java_element
+
+    ## PRIVATE
+       Convert to a JavaScript Object representing this XML_Document.
+    to_js_object : JS_Object
+    to_js_object self = self.root_element.to_js_object
+
+    ## PRIVATE
+       Convert to a display representation of this XML_Document.
+    to_display_text : Text
+    to_display_text self = "XML_Document (" + self.root_element.to_display_text + ")"
+
+    ## PRIVATE
+    Value (java_document:Document)
+
+type XML_Element
+    ## Gets the tag of an XML element.
+
+       > Example
+         Get the tag of an XML element.
+
+             XML_Document.from_text '<foo>hello</foo>' . root_element . name
+             # => "foo"
+    name : Text ! XML_Error
+    name self =
+        XML_Error.handle_java_exceptions <|
+            self.java_element.getNodeName
+
+    ## Gets a child of an XML element.
+
+       Arguments:
+       - key: If an `Integer`, returns the element at position `at` in its list
+         of children. If a `Text`, treats `key` as an XPath specifier, and
+         returns the elements it points to. If a `Text` that starts with `"@"`,
+         returns the attribute with the given name.
+
+       > Example
+           XML_Document.from_text '<foo><baz>hello</baz></foo>' . root_element . get 0
+           # => XML_Document.from_text "<baz>hello</baz>" . root_element
+
+       > Example
+         Get a tag attribute.
+
+             root = XML_Document.from_text '<foo bar="one">hello</foo>' . root_element
+             root.get "@bar"
+             # => "one"
+    get : Text | Integer -> Any -> Any | Text | XML_Element | Vector (Text | XML_Element) ! No_Such_Key | Index_Out_Of_Bounds | XML_Error
+    get self key:(Text|Integer) ~if_missing=Nothing =
+        case key of
+            _ : Integer -> self.children.get key if_missing
+            _ : Text -> if is_attribute_key key then self.get_xpath key . get 0 if_missing else self.get_xpath key
+
+    ## Gets a child or attribute of an XML element.
+
+       Arguments:
+       - key: If an `Integer`, returns the element at position `at` in its list
+         of children. If a `Text`, treats `key` as an XPath specifier, and
+         returns the elements it points to. If a `Text` that starts with `"@"`,
+         returns the attribute with the given name.
+
+       > Example
+         Get a nested tag:
+
+             XML_Document.from_text '<foo><baz>hello</baz></foo>' . root_element . at 0
+             # => XML_Document.from_text "<baz>hello</baz>" . root_element
+
+       > Example
+         Get a tag attribute.
+
+             root = XML_Document.from_text '<foo bar="one">hello</foo>' . root_element
+             root.at "@bar"
+             # => "one"
+    at : Text | Integer -> Text | XML_Element | Vector (Text | XML_Element) ! No_Such_Key | Index_Out_Of_Bounds | XML_Error
+    at self key:(Text|Integer) =
+        if_missing = case key of
+            _ : Integer -> Error.throw (Index_Out_Of_Bounds.Error key self.child_count)
+            _ : Text -> Error.throw (No_Such_Key.Error self key)
+        self.get key if_missing
+
+    ## Get elements denoted by the given XPath key.
+
+       Arguments:
+       - key: The XPath string to use to search for elements.
+
+       > Example
+         Get an element by xpath.
+
+             root = XML_Document.from_file test_file . root_element
+             root.at "/class/teacher[1]/firstname"
+             # => [XML_Document.from_text "<firstname>Alice</firstname>" . root_element]
+    get_xpath : Text -> Vector (Text | XML_Element) ! XML_Error
+    get_xpath self key:Text =
+        XML_Error.handle_java_exceptions <|
+            xpath = XPathFactory.newInstance.newXPath
+            only_wanted_nodes (xpath.evaluate key self.java_element XPathConstants.NODESET)
+
+    ## Gets the child elements of an XML element.
+
+       `children` only returns child elements and child text nodes that are not
+       100% whitespace. Other node types, such as comments, are not included.
+
+       > Example
+             XML_Document.from_text '<foo><baz>hello</baz></foo>' . root_element . children
+             # => [XML_Document.from_text "<baz>hello</baz>"]
+    children : Vector (XML_Element | Text) ! XML_Error
+    children self =
+        XML_Error.handle_java_exceptions <|
+            only_wanted_nodes self.java_element.getChildNodes
+
+    ## Gets the number children of an XML element.
+
+       `child_count` only counts child elements and child text nodes that are
+       not 100% whitespace. Other node types, such as comments, are not included
+       in the count.
+
+       > Example
+         Get the number of children of an element.
+
+             XML_Document.from_text '<foo> <bar>hello</bar> <bar>hello2</bar>< </foo>' . root_element . child_count
+             # => 2
+    child_count : Integer ! XML_Error
+    child_count self = self.children.length
+
+    ## Get an attribute of an XML element.
+
+       Arguments:
+       - name: The name of the attribute to get.
+       - if_missing: The value returned if the attribute does not exist.
+
+       > Example
+         Get an attribute of an element.
+
+             root = XML_Document.from_text '<foo bar="one">hello</foo>' . root_element
+             root.attribute "bar"
+             # => "one"
+    attribute : Text -> Any -> Text | Any ! XML_Error
+    attribute self name:Text ~if_missing=(Error.throw (No_Such_Key.Error self name)) =
+        XML_Error.handle_java_exceptions <|
+            attr = self.java_element.getAttributeNode name
+            if attr.is_nothing then if_missing else attr.getValue
+
+    ## Gets a map containing f the attributes of an XML element.
+
+       > Example
+           XML_Document.from_text '<foo bar="one">hello</foo>' . root_element . attributes
+           # => Map.from_vector [["bar", "one"]]
+    attributes : Map Text Text ! XML_Error
+    attributes self =
+        XML_Error.handle_java_exceptions <|
+            named_node_map = self.java_element.getAttributes
+            keys_and_values = 0.up_to named_node_map.getLength . map i->
+                node = named_node_map.item i
+                [node.getNodeName, node.getNodeValue]
+            Map.from_vector keys_and_values
+
+    ## Gets the text (non-markup) contents of the element and its descendants,
+       if any.
+
+       > Example
+         Get the text content of an element.
+
+             XML_Document.from_text '<foo>hello</foo>' . root_element . text
+             # => "hello"
+    text : Text ! XML_Error
+    text self =
+        XML_Error.handle_java_exceptions <|
+            self.java_element.getTextContent
+
+    ## Gets the raw XML of the element (including tag, attributes and contents).
+
+       > Example
+         Get the outer XML of an element.
+
+             XML_Document.from_text '<foo>hello</foo>' . root_element . outer_xml
+             # => '<foo>hello</foo>'
+    outer_xml : Text ! XML_Error
+    outer_xml self =
+        XML_Error.handle_java_exceptions <|
+            XML_Utils.outerXML self.java_element
+
+    ## Gets the raw XML of the contents of the element, not including the
+       outermost tag and attributes.
+
+       > Example
+         Get the inner XML of an element.
+
+             XML_Document.from_text '<foo><bar>hello</bar></foo>' . root_element . inner_xml
+             # => '<bar>hello</bar>'
+    inner_xml : Text ! XML_Error
+    inner_xml self =
+        XML_Error.handle_java_exceptions <|
+            XML_Utils.innerXML self.java_element
+
+    ## Gets elements matching a given tag name.
+
+       This searches through all descendants of the node, not just immediate children.
+
+       > Example
+             XML_Document.from_text '<foo> <baz>hello</baz> <bar>and</bar> <baz>goodbye</baz> </foo>' . root_element . get_elements_by_tag_name "baz"
+             # => [XML_Document.from_text "<baz>hello</baz>" . root_element, XML_Document.from_text "<baz>goodbye</baz>" . root_element]
+    get_elements_by_tag_name : Text -> Vector XML_Element ! XML_Error
+    get_elements_by_tag_name self tag_name:Text =
+        XML_Error.handle_java_exceptions <|
+            only_wanted_nodes (self.java_element.getElementsByTagName tag_name)
+
+    ## PRIVATE
+       Convert to a display representation of this XML_Element.
+    to_display_text : Text
+    to_display_text self = 'XML_Element "' + self.name + '"'
+
+    ## PRIVATE
+       Convert to a JavaScript Object representing this XML_Element.
+    to_js_object : JS_Object ! XML_Error
+    to_js_object self =
+        builder = Vector.new_builder 4
+        builder.append ["type", "XML_Element"]
+        builder.append ["tag", self.name]
+        builder.append ["attributes", self.attributes.to_js_object]
+        builder.append ["children", self.children.to_js_object]
+        JS_Object.from_pairs builder.to_vector
+
+    ## PRIVATE
+    Value (java_element:Element)
+
+type XML_Error
+    # An error that indicates that the XML data could not be parsed.
+
+      Arguments:
+      - line_number: the line on which the parse failed.
+      - column_number: the column at which the parse failed.
+    Parse_Error (line_number : Integer) (column_number : Integer)
+
+    # Any other XML-related Java exception.
+    Other (error : Text)
+
+    ## PRIVATE
+
+       Utility method for running an action with Java exceptions mapping.
+    handle_java_exceptions : Any -> Any ! XML_Error
+    handle_java_exceptions ~action =
+        Panic.catch JException action caught_panic->
+            XML_Error.wrap_java_exception caught_panic.payload
+
+    ## PRIVATE
+
+       Converts a Java `Exception` into its Enso counterpart.
+    wrap_java_exception : JException -> XML_Error
+    wrap_java_exception exception:JException = case exception of
+        _ : SAXParseException -> Error.throw (XML_Error.Parse_Error exception.getLineNumber exception.getColumnNumber)
+        _ -> Error.throw (XML_Error.Other "An Exception has occurred: "+exception.to_text)
+
+    ## PRIVATE
+       Convert the XML_Error to a human-readable format.
+    to_display_text : Text
+    to_display_text self = case self of
+        XML_Error.Parse_Error line_number column_number -> "The XML document could not be parsed at line " + line_number.to_text + ", column " + column_number.to_text
+        XML_Error.Other error -> error
+
+## PRIVATE
+   Filter out unwanted nodes.
+   Wanted nodes are:
+   - Elements
+   - Text (if not completely whitespace)
+   - Attribute values (which only arise in the case of XPath keys)
+only_wanted_nodes : NodeList -> Vector (Text | XML_Element)
+only_wanted_nodes node_list:NodeList =
+    nodes = 0.up_to (node_list.getLength) . map i->
+        node_list.item i
+    is_wanted : Node -> Boolean
+    is_wanted node:Node =
+        is_element = node.getNodeType == Node.ELEMENT_NODE
+        is_attribute = node.getNodeType == Node.ATTRIBUTE_NODE
+        is_non_whitespace_text = node.getNodeType == Node.TEXT_NODE && node.getNodeValue.is_whitespace.not
+        is_element || is_attribute || is_non_whitespace_text
+
+    # If an Element, wrap in XML_Element. If Java_Text, extract the string. If an attribute, extract the value.
+    convert node =
+        if node.getNodeType == Node.ELEMENT_NODE then XML_Element.Value node else
+            if node.getNodeType == Node.TEXT_NODE then node.getNodeValue else
+                if node.getNodeType == Node.ATTRIBUTE_NODE then node.getValue else
+                    Panic.throw (Illegal_State.Error ("Unexpected child type " + node.getNodeType.to_text))
+    nodes.filter is_wanted . map convert
+
+## PRIVATE
+   Returns true if `key` starts with "@".
+is_attribute_key : Text -> Boolean
+is_attribute_key s:Text = s.starts_with "@"
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/XML/XML_Format.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/XML/XML_Format.enso
@ -0,0 +1,50 @@
+import project.Any.Any
+import project.Data.Text.Text
+import project.Data.XML.XML_Document
+import project.Errors.Problem_Behavior.Problem_Behavior
+import project.Network.URI.URI
+import project.Nothing.Nothing
+import project.System.File.File
+import project.System.Input_Stream.Input_Stream
+from project.Data.Text.Extensions import all
+
+## A `File_Format` for reading and writing XML files.
+type XML_Format
+    ## PRIVATE
+       If the File_Format supports reading from the file, return a configured instance.
+    for_file_read : File -> XML_Format | Nothing
+    for_file_read file:File =
+        case file.extension of
+            ".xml" -> XML_Format
+            _ -> Nothing
+
+    ## PRIVATE
+       If this File_Format should be used for writing to that file, return a configured instance.
+    for_file_write : File -> XML_Format | Nothing
+    for_file_write file:File =
+        _ = [file]
+        Nothing
+
+    ## PRIVATE
+       If the File_Format supports reading from the web response, return a configured instance.
+    for_web : Text -> URI|Text -> XML_Format | Nothing
+    for_web content_type:Text uri:(URI|Text) =
+        _ = [uri]
+        first = content_type.split ';' . first . trim
+        case first of
+            "application/xml" -> XML_Format
+            "text/xml" -> XML_Format
+            _ -> Nothing
+
+    ## PRIVATE
+       Implements the `File.read` for this `File_Format`
+    read : File -> Problem_Behavior -> Any
+    read self file:File on_problems:Problem_Behavior =
+        _ = [on_problems]
+        XML_Document.from_file file
+
+    ## PRIVATE
+       Implements the `Data.parse` for this `File_Format`
+    read_stream : Input_Stream -> Any
+    read_stream self stream:Input_Stream =
+        XML_Document.from_stream stream
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Main.enso
@ -48,6 +48,9 @@ import project.Data.Time.Time_Of_Day.Time_Of_Day
 import project.Data.Time.Time_Period.Time_Period
 import project.Data.Time.Time_Zone.Time_Zone
 import project.Data.Vector.Vector
+import project.Data.XML.XML_Document
+import project.Data.XML.XML_Error
+import project.Data.XML.XML_Format.XML_Format
 import project.Error.Error
 import project.Errors
 import project.Errors.Problem_Behavior.Problem_Behavior
@ -136,6 +139,9 @@ export project.Data.Time.Time_Of_Day.Time_Of_Day
 export project.Data.Time.Time_Period.Time_Period
 export project.Data.Time.Time_Zone.Time_Zone
 export project.Data.Vector.Vector
+export project.Data.XML.XML_Document
+export project.Data.XML.XML_Error
+export project.Data.XML.XML_Format.XML_Format
 export project.Error.Error
 export project.Errors
 export project.Errors.Problem_Behavior.Problem_Behavior
--- a/std-bits/base/src/main/java/org/enso/base/XML_Utils.java
+++ b/std-bits/base/src/main/java/org/enso/base/XML_Utils.java
@ -0,0 +1,76 @@
+package org.enso.base;
+
+import java.io.ByteArrayOutputStream;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Result;
+import javax.xml.transform.Source;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+import org.xml.sax.ErrorHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+
+public class XML_Utils {
+  /**
+   * Return the string representation of an XML element, including its tag and all its contents.
+   *
+   * @param element the element to convert to a string
+   * @return the string representation of the element
+   * @throws TransformerException
+   */
+  public static String outerXML(Element element) throws TransformerException {
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    Transformer transformer = TransformerFactory.newInstance().newTransformer();
+    transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+    Source source = new DOMSource(element);
+    Result target = new StreamResult(out);
+    transformer.transform(source, target);
+    return out.toString();
+  }
+
+  /**
+   * Return the string representation of the contents of an XML element, not including its tag.
+   *
+   * @param element the element to convert to a string
+   * @return the string representation of the element's contents
+   * @throws TransformerException
+   */
+  public static String innerXML(Element element) throws TransformerException {
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    Transformer transformer = TransformerFactory.newInstance().newTransformer();
+    transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
+    Result target = new StreamResult(out);
+    NodeList childNodes = element.getChildNodes();
+    for (int i = 0; i < childNodes.getLength(); ++i) {
+      Source source = new DOMSource(childNodes.item(i));
+      transformer.transform(source, target);
+    }
+    return out.toString();
+  }
+
+  public static void setCustomErrorHandler(DocumentBuilder documentBuilder) {
+    documentBuilder.setErrorHandler(
+        new ErrorHandler() {
+          @Override
+          public void warning(SAXParseException e) throws SAXException {
+            ;
+          }
+
+          @Override
+          public void fatalError(SAXParseException e) throws SAXException {
+            throw e;
+          }
+
+          @Override
+          public void error(SAXParseException e) throws SAXException {
+            throw e;
+          }
+        });
+  }
+}
--- a/std-bits/base/src/main/java/org/enso/base/file_format/XMLFormatSPI.java
+++ b/std-bits/base/src/main/java/org/enso/base/file_format/XMLFormatSPI.java
@ -0,0 +1,14 @@
+package org.enso.base.file_format;
+
+@org.openide.util.lookup.ServiceProvider(service = FileFormatSPI.class)
+public class XMLFormatSPI extends FileFormatSPI {
+  @Override
+  protected String getModuleName() {
+    return "Standard.Base.Data.XML.XML_Format";
+  }
+
+  @Override
+  protected String getTypeName() {
+    return "XML_Format";
+  }
+}
--- a/test/Tests/data/xml/nested.xml
+++ b/test/Tests/data/xml/nested.xml
@ -0,0 +1,13 @@
+<?xml version = "1.0"?>
+<foo id="1">
+    <bar id="2">
+    </bar>
+    <baz id="3">
+        <bar id="4">
+            <bar id="5">
+            </bar>
+        </bar>
+    </baz>
+    <bar id="6">
+    </bar>
+</foo>
--- a/test/Tests/data/xml/sample.xml
+++ b/test/Tests/data/xml/sample.xml
@ -0,0 +1,35 @@
+<?xml version = "1.0"?>
+<class>
+    <teacher id="100">
+        <firstname>Mary</firstname>
+        <lastname>Smith</lastname>
+        <bio>
+            Blah blah
+        </bio>
+    </teacher>
+    <teacher id="101">
+        <firstname>Bob</firstname>
+        <lastname>Jones</lastname>
+        <bio>
+            This that
+        </bio>
+    </teacher>
+    <student studentId="1000" year="2">
+        <firstname>Alice</firstname>
+        <lastname>Wright</lastname>
+        <gpa>4.01</gpa>
+    </student>
+    <student studentId="1001" year="3">
+        <firstname>Jessi</firstname>
+        <lastname>Cooper</lastname>
+        <gpa>3.99</gpa>
+    </student>
+    <student studentId="1002" year="1">
+        Some
+        <firstname>Randy</firstname>
+        Extra
+        <lastname>Brown</lastname>
+        Text
+        <gpa>3.99</gpa>
+    </student>
+</class>
--- a/test/Tests/data/xml/small.xml
+++ b/test/Tests/data/xml/small.xml
@ -0,0 +1,9 @@
+<class>
+    <teacher id="100">
+        <firstname>Mary</firstname>
+        <lastname>Smith</lastname>
+        <bio>
+            Blah blah
+        </bio>
+    </teacher>
+</class>
--- a/test/Tests/src/Data/XML/XML_Spec.enso
+++ b/test/Tests/src/Data/XML/XML_Spec.enso
@ -0,0 +1,228 @@
+from Standard.Base import all
+import Standard.Base.Errors.Common.Syntax_Error
+import Standard.Base.Errors.File_Error.File_Error
+
+from Standard.Test import Test, Test_Suite
+import Standard.Test.Extensions
+
+spec =
+    test_file = enso_project.data / "xml" / "sample.xml"
+    document = XML_Document.from_file test_file
+    root = document . root_element
+
+    fix_windows_newlines s = s.replace '\r\n' '\n'
+
+    Test.group "Read XML" <|
+        Test.specify "Can read from a file" <|
+            root.name . should_equal "class"
+
+        Test.specify "Error if file does not exist" <|
+            test_file = enso_project.data / "xml" / "sample.xmlnotexists"
+            XML_Document.from_file test_file . should_fail_with File_Error
+
+        Test.specify "Can read from a stream" <|
+            test_file.with_input_stream [File_Access.Read] input_stream->
+                doc = XML_Document.from_stream input_stream
+                doc.root_element.name . should_equal "class"
+
+        Test.specify "Can read from a string" <|
+            xml_string = test_file.read_text
+            doc = XML_Document.from_text xml_string
+            doc.root_element.name . should_equal "class"
+
+        Test.specify "Can read from a short string" <|
+            xml_string = "<class></class>"
+            doc = XML_Document.from_text xml_string
+            doc.root_element.name . should_equal "class"
+
+        Test.specify "Parse error from file" <|
+            test_file = enso_project.data / "sample.txt"
+            XML_Document.from_file test_file . catch . should_be_a XML_Error.Parse_Error
+
+        Test.specify "Parse error from string" <|
+            xml_string = "<<<<</"
+            XML_Document.from_text xml_string . catch . should_be_a XML_Error.Parse_Error
+
+    Test.group "at/get" <|
+        Test.specify "Can get children by index" <|
+            root.at 0 . name . should_equal "teacher"
+
+            root.at 0 . at 0 . name . should_equal "firstname"
+            root.at 0 . at 1 . name . should_equal "lastname"
+            root.at 0 . at 2 . name . should_equal "bio"
+            root.at 0 . at 2 . at 0 . should_equal '\n            Blah blah\n        '
+
+            root.at 3 . at 0 . name . should_equal "firstname"
+            root.at 3 . at 1 . name . should_equal "lastname"
+            root.at 3 . at 2 . name . should_equal "gpa"
+            root.at 3 . at 2 . at 0 . should_equal "3.99"
+
+        Test.specify "Can get text children by index" <|
+            root.at 4 . at 0 . should_equal '\n        Some\n        '
+            root.at 4 . at 2 . should_equal '\n        Extra\n        '
+            root.at 4 . at 4 . should_equal '\n        Text\n        '
+
+        Test.specify "Can get element attributes" <|
+            root.at 0 . at "@id" . should_equal "100"
+            root.at 1 . at "@id" . should_equal "101"
+            root.at 2 . at "@studentId" . should_equal "1000"
+            root.at 3 . at "@studentId" . should_equal "1001"
+
+            root.at 0 . attribute "id" . should_equal "100"
+            root.at 1 . attribute "id" . should_equal "101"
+            root.at 2 . attribute "studentId" . should_equal "1000"
+            root.at 3 . attribute "studentId" . should_equal "1001"
+
+            root.at 3 . attribute "does_not_exist" if_missing="if_missing" . should_equal "if_missing"
+
+        Test.specify "Can get element an attribute map" <|
+            root.at 2 . attributes . should_equal (Map.from_vector [["studentId", "1000"], ["year", "2"]])
+            root.at 3 . attributes . should_equal (Map.from_vector [["studentId", "1001"], ["year", "3"]])
+
+        Test.specify "Can get nodes via xpath" <|
+            classes = root.at "/class"
+            classes.length . should_equal 1
+            classes.at 0 . name . should_equal "class"
+
+            teachers = root.at "/class/teacher"
+            teachers.length . should_equal 2
+            teachers.at 0 . at "@id" . should_equal "100"
+            teachers.at 1 . at "@id" . should_equal "101"
+
+            students = root.at "/class/student"
+            students.length . should_equal 3
+            students.at 0 . at "@studentId" . should_equal "1000"
+            students.at 1 . at "@studentId" . should_equal "1001"
+
+            root.at "/class/teacher[1]/firstname" . at 0 . text . should_equal "Mary"
+            root.at "/class/teacher[2]/firstname" . at 0 . text . should_equal "Bob"
+            root.at "/class/teacher[1]/firstname/text()" . should_equal ["Mary"]
+            root.at "/class/teacher[2]/firstname/text()" . should_equal ["Bob"]
+            root.at "/class/teacher/firstname/text()" . should_equal ["Mary", "Bob"]
+            root.at "/class/teacher[1]/bio" . at 0 . text . should_equal '\n            Blah blah\n        '
+            root.at "/class/teacher[2]/bio" . at 0 . text . should_equal '\n            This that\n        '
+            root.get "/class/teacher[23]" . should_equal []
+
+            root.at "teacher[1]/firstname" . at 0 . text . should_equal "Mary"
+            root.at "teacher[2]/firstname" . at 0 . text . should_equal "Bob"
+            root.at "teacher[1]/bio" . at 0 . text . should_equal '\n            Blah blah\n        '
+            root.at "teacher[2]/bio" . at 0 . text . should_equal '\n            This that\n        '
+
+        Test.specify "Can get children using .get" <|
+            root.get 0 . get 0 . name . should_equal "firstname"
+            root.get 0 . get "@id" . should_equal "100"
+            root.get "/class/teacher[1]/firstname" . get 0 . text . should_equal "Mary"
+
+            root.get 0 . get 32 "if_missing" . should_equal "if_missing"
+            root.get 0 . get "@not_there" "if_missing" . should_equal "if_missing"
+
+        Test.specify "Can handle a bad xpath" <|
+            root.at "/qqq[[[[1" . at 0 . text . should_fail_with XML_Error
+
+    Test.group "tag name" <|
+        Test.specify "Can get the tag name" <|
+            root.name . should_equal "class"
+            root.at 0 . name . should_equal "teacher"
+            root.at 1 . at 1 . name . should_equal "lastname"
+
+    Test.group "children" <|
+        Test.specify "Can get the list of children" <|
+            children = root.children
+            children.length . should_equal 5
+            children.at 0 . at "@id" . should_equal "100"
+            children.at 1 . at "@id" . should_equal "101"
+            children.at 2 . at "@studentId" . should_equal "1000"
+            children.at 3 . at "@studentId" . should_equal "1001"
+            children.at 4 . at "@studentId" . should_equal "1002"
+
+        Test.specify "Can get the number of children" <|
+            root.child_count . should_equal 5
+
+    Test.group "text contents" <|
+        Test.specify "Can get child text contents" <|
+            root.at 4 . at 1 . text . should_equal "Randy"
+            root.at 4 . text . should_equal '\n        Some\n        Randy\n        Extra\n        Brown\n        Text\n        3.99\n    '
+
+    Test.group "inner / outer xml" <|
+        Test.specify "Can get the inner xml" <|
+            fix_windows_newlines (root.at "/class/teacher[1]" . at 0 . inner_xml) . should_equal '\n        <firstname>Mary</firstname>\n        <lastname>Smith</lastname>\n        <bio>\n            Blah blah\n        </bio>\n    '
+            fix_windows_newlines (root.at "/class/teacher[1]/bio" . at 0 . inner_xml) . should_equal '\n            Blah blah\n        '
+            fix_windows_newlines (root.at "/class/teacher[2]/bio" . at 0 . inner_xml) . should_equal '\n            This that\n        '
+            fix_windows_newlines (root.at "/class/teacher[2]" . at 0 . inner_xml) . should_equal '\n        <firstname>Bob</firstname>\n        <lastname>Jones</lastname>\n        <bio>\n            This that\n        </bio>\n    '
+
+        Test.specify "Can get the outer xml" <|
+            fix_windows_newlines (root.at "/class/teacher[1]/bio" . at 0 . outer_xml) . should_equal '<bio>\n            Blah blah\n        </bio>'
+            fix_windows_newlines (root.at "/class/teacher[2]/bio" . at 0 . outer_xml) . should_equal '<bio>\n            This that\n        </bio>'
+
+    Test.group "get_elements_by_tag_name" <|
+        Test.specify "Can get elements by tag name" <|
+            teachers = root.get_elements_by_tag_name "teacher"
+            students = root.get_elements_by_tag_name "student"
+            gpas = root.get_elements_by_tag_name "gpa"
+
+            teachers.length . should_equal 2
+            students.length . should_equal 3
+            gpas.length . should_equal 3
+
+            teachers.at 0 . at "@id" . should_equal "100"
+            teachers.at 1 . at "@id" . should_equal "101"
+            students.at 0 . at "@studentId" . should_equal "1000"
+            students.at 1 . at "@studentId" . should_equal "1001"
+            students.at 2 . at "@studentId" . should_equal "1002"
+            gpas.at 0 . text . should_equal "4.01"
+            gpas.at 1 . text . should_equal "3.99"
+            gpas.at 2 . text . should_equal "3.99"
+
+        Test.specify "Can get nested elements" <|
+            test_file = enso_project.data / "xml" / "nested.xml"
+            root = XML_Document.from_file test_file . root_element
+            bars = root.get_elements_by_tag_name "bar"
+            bars.length . should_equal 4
+            bars.map (t-> t.at "@id") . should_equal ["2", "4", "5", "6"]
+
+        Test.specify "Can get elements by name with a wildcard" <|
+            root.get_elements_by_tag_name "*" . length . should_equal 20
+
+    Test.group "Data.read / File_Format" <|
+        Test.specify "Can read from a file" <|
+            doc = Data.read test_file
+            doc.root_element.name . should_equal "class"
+
+        Test.specify "Can read from an endpoint" <|
+            doc = Data.fetch "https://enso-data-samples.s3.us-west-1.amazonaws.com/sample.xml"
+            doc.root_element.name . should_equal "class"
+            doc.root_element.at 1 . name . should_equal "teacher"
+
+    Test.group "display text" <|
+        Test.specify "Can generate display text" <|
+            document.to_display_text . should_equal 'XML_Document (XML_Element "class")'
+            root.to_display_text .  should_equal 'XML_Element "class"'
+
+    Test.group "JSON" <|
+        Test.specify "Can convert to JS_Object" <|
+            test_file = enso_project.data / "xml" / "small.xml"
+            document = XML_Document.from_file test_file
+            root = document . root_element
+            expected = Json.parse '''
+                {
+                "type": "XML_Element",
+                "tag": "class",
+                "attributes": [],
+                "children": [
+                    {
+                    "type": "XML_Element",
+                    "tag": "teacher",
+                    "attributes": [ [ "id", "100" ]
+                    ],
+                    "children": [
+                        { "type": "XML_Element", "tag": "firstname", "attributes": [], "children": [ "Mary" ] },
+                        { "type": "XML_Element", "tag": "lastname", "attributes": [], "children": [ "Smith" ] },
+                        { "type": "XML_Element", "tag": "bio", "attributes": [], "children": [ "\\n            Blah blah\\n        " ] }
+                    ]
+                    }
+                ]
+                }
+            js = root.to_js_object
+            js.should_equal expected
+
+main = Test_Suite.run_main spec
--- a/test/Tests/src/Main.enso
+++ b/test/Tests/src/Main.enso
@ -56,6 +56,7 @@ import project.Data.Text.Parse_Spec
 import project.Data.Text.Regex_Spec
 import project.Data.Text.Span_Spec
 import project.Data.Text.Utils_Spec
+import project.Data.XML.XML_Spec

 import project.Data.Vector.Slicing_Helpers_Spec

@ -155,3 +156,4 @@ main = Test_Suite.run_main <|
    Warnings_Spec.spec
    System_Spec.spec
    Random_Spec.spec
+    XML_Spec.spec
--- a/test/Tests/src/System/File_Spec.enso
+++ b/test/Tests/src/System/File_Spec.enso
@ -745,7 +745,7 @@ spec =

        Test.specify "should list files in a directory" <|
            immediate = enso_project.data.list . map .to_text
-            immediate.sort.should_equal (resolve ["books.json", "helloworld.txt", "sample-json.weird-extension", "sample-malformed.json", "sample.json", "sample.png", "sample.txt", "sample.xxx", "transient", "tree", "windows.log", "windows.txt"])
+            immediate.sort.should_equal (resolve ["books.json", "helloworld.txt", "sample-json.weird-extension", "sample-malformed.json", "sample.json", "sample.png", "sample.txt", "sample.xxx", "transient", "tree", "windows.log", "windows.txt", 'xml'])

            filtered1 = enso_project.data.list name_filter="s[a-cw]mple.{t?t,md}" . map .to_text
            filtered1.should_equal (resolve ["sample.txt"])