Basic XML support (#7947)

This PR includes
* Reading XML from a file, stream, or string
* Reading XML via Data.fetch
* Accessing the root element, element children, and attributes
* Accessing tag text contents
* Get tags by name
* Inner / Outer XML string
This commit is contained in:
GregoryTravis 2023-10-06 13:52:19 -04:00 committed by GitHub
parent 16c8d2e302
commit 9ba7be20af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 835 additions and 1 deletions

View File

@ -581,6 +581,7 @@
- [Added `Table.expand_column` and improved JSON deserialization.][7859]
- [Implemented `Table.auto_value_types` for in-memory tables.][7908]
- [Implemented Text.substring to easily select part of a Text field][7913]
- [Implemented basic XML support][7947]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -826,6 +827,7 @@
[7859]: https://github.com/enso-org/enso/pull/7859
[7908]: https://github.com/enso-org/enso/pull/7908
[7913]: https://github.com/enso-org/enso/pull/7913
[7947]: https://github.com/enso-org/enso/pull/7947
#### Enso Compiler

View File

@ -0,0 +1,399 @@
import project.Any.Any
import project.Data.Boolean.Boolean
import project.Data.Json.Extensions
import project.Data.Json.JS_Object
import project.Data.Map.Map
import project.Data.Numbers.Integer
import project.Data.Text.Text
import project.Data.Vector.Vector
import project.Error.Error
import project.Errors.Common.Index_Out_Of_Bounds
import project.Errors.File_Error.File_Error
import project.Errors.Illegal_State.Illegal_State
import project.Errors.No_Such_Key.No_Such_Key
import project.Nothing.Nothing
import project.Panic.Panic
import project.System.File.File
import project.System.File.File_Access.File_Access
import project.System.Input_Stream.Input_Stream
from project.Data.Range.Extensions import all
from project.Data.Text.Extensions import all
polyglot java import java.io.StringReader
polyglot java import java.lang.Exception as JException
polyglot java import javax.xml.parsers.DocumentBuilderFactory
polyglot java import javax.xml.parsers.DocumentBuilder
polyglot java import javax.xml.xpath.XPathConstants
polyglot java import javax.xml.xpath.XPathFactory
polyglot java import org.w3c.dom.Document
polyglot java import org.w3c.dom.Element
polyglot java import org.w3c.dom.Node
polyglot java import org.w3c.dom.NodeList
polyglot java import org.w3c.dom.Text as Java_Text
polyglot java import org.xml.sax.InputSource
polyglot java import org.xml.sax.SAXException
polyglot java import org.xml.sax.SAXParseException
polyglot java import org.enso.base.XML_Utils
type XML_Document
## Read an XML document from a file.
Arguments:
- file: the `File` to read the XML document from.
If there is an error reading the file, `File_Error` is thrown.
If there is a parsing error, `XML_Error.Parse_Error` is thrown.
> Example
Read an XML document in 'doc.xml'.
file = enso_project.data / "doc.xml"
doc = XML_Document.from_file test_file
from_file : File -> XML_Document ! XML_Error | File_Error
from_file file:File =
File_Error.handle_java_exceptions file <|
file.with_input_stream [File_Access.Read] XML_Document.from_stream
## Read an XML document from an input stream.
Arguments:
- input_stream: the input stread to read the XML document from.
If there is a parsing error, `XML_Error.Parse_Error` is thrown.
> Example
Read an XML document from an input_stream.
doc = XML_Document.from_stream input_stream
from_stream : Input_Stream -> XML_Document ! XML_Error
from_stream input_stream:Input_Stream =
XML_Error.handle_java_exceptions <|
input_stream.with_java_stream java_stream-> XML_Document.from_source java_stream
## Read an XML document from a string.
Arguments:
- xml_string: The string to read the XML document from.
If there is a parsing error, `XML_Error.Parse_Error` is thrown.
> Example
Read an XML document from an string.
xml_string = "<foo></foo>"
doc = XML_Document.from_text xml_string
from_text : Text -> XML_Document ! XML_Error
from_text xml_string:Text =
XML_Error.handle_java_exceptions <|
string_reader = StringReader.new xml_string
XML_Document.from_source (InputSource.new string_reader)
## PRIVATE
Read XML from an input source.
from_source : Any -> XML_Document ! XML_Error
from_source input_source =
document_builder_factory = DocumentBuilderFactory.newInstance
document_builder = document_builder_factory.newDocumentBuilder
XML_Utils.setCustomErrorHandler document_builder
XML_Document.Value (document_builder.parse input_source)
## Get the root element of the document.
> Example
Get the root element of a document.
doc = XML_Document.from_file file
root = doc.root_element
root_element : XML_Element ! XML_Error
root_element self =
XML_Error.handle_java_exceptions <|
java_element = self.java_document.getDocumentElement
XML_Element.Value java_element
## PRIVATE
Convert to a JavaScript Object representing this XML_Document.
to_js_object : JS_Object
to_js_object self = self.root_element.to_js_object
## PRIVATE
Convert to a display representation of this XML_Document.
to_display_text : Text
to_display_text self = "XML_Document (" + self.root_element.to_display_text + ")"
## PRIVATE
Value (java_document:Document)
type XML_Element
## Gets the tag of an XML element.
> Example
Get the tag of an XML element.
XML_Document.from_text '<foo>hello</foo>' . root_element . name
# => "foo"
name : Text ! XML_Error
name self =
XML_Error.handle_java_exceptions <|
self.java_element.getNodeName
## Gets a child of an XML element.
Arguments:
- key: If an `Integer`, returns the element at position `at` in its list
of children. If a `Text`, treats `key` as an XPath specifier, and
returns the elements it points to. If a `Text` that starts with `"@"`,
returns the attribute with the given name.
> Example
XML_Document.from_text '<foo><baz>hello</baz></foo>' . root_element . get 0
# => XML_Document.from_text "<baz>hello</baz>" . root_element
> Example
Get a tag attribute.
root = XML_Document.from_text '<foo bar="one">hello</foo>' . root_element
root.get "@bar"
# => "one"
get : Text | Integer -> Any -> Any | Text | XML_Element | Vector (Text | XML_Element) ! No_Such_Key | Index_Out_Of_Bounds | XML_Error
get self key:(Text|Integer) ~if_missing=Nothing =
case key of
_ : Integer -> self.children.get key if_missing
_ : Text -> if is_attribute_key key then self.get_xpath key . get 0 if_missing else self.get_xpath key
## Gets a child or attribute of an XML element.
Arguments:
- key: If an `Integer`, returns the element at position `at` in its list
of children. If a `Text`, treats `key` as an XPath specifier, and
returns the elements it points to. If a `Text` that starts with `"@"`,
returns the attribute with the given name.
> Example
Get a nested tag:
XML_Document.from_text '<foo><baz>hello</baz></foo>' . root_element . at 0
# => XML_Document.from_text "<baz>hello</baz>" . root_element
> Example
Get a tag attribute.
root = XML_Document.from_text '<foo bar="one">hello</foo>' . root_element
root.at "@bar"
# => "one"
at : Text | Integer -> Text | XML_Element | Vector (Text | XML_Element) ! No_Such_Key | Index_Out_Of_Bounds | XML_Error
at self key:(Text|Integer) =
if_missing = case key of
_ : Integer -> Error.throw (Index_Out_Of_Bounds.Error key self.child_count)
_ : Text -> Error.throw (No_Such_Key.Error self key)
self.get key if_missing
## Get elements denoted by the given XPath key.
Arguments:
- key: The XPath string to use to search for elements.
> Example
Get an element by xpath.
root = XML_Document.from_file test_file . root_element
root.at "/class/teacher[1]/firstname"
# => [XML_Document.from_text "<firstname>Alice</firstname>" . root_element]
get_xpath : Text -> Vector (Text | XML_Element) ! XML_Error
get_xpath self key:Text =
XML_Error.handle_java_exceptions <|
xpath = XPathFactory.newInstance.newXPath
only_wanted_nodes (xpath.evaluate key self.java_element XPathConstants.NODESET)
## Gets the child elements of an XML element.
`children` only returns child elements and child text nodes that are not
100% whitespace. Other node types, such as comments, are not included.
> Example
XML_Document.from_text '<foo><baz>hello</baz></foo>' . root_element . children
# => [XML_Document.from_text "<baz>hello</baz>"]
children : Vector (XML_Element | Text) ! XML_Error
children self =
XML_Error.handle_java_exceptions <|
only_wanted_nodes self.java_element.getChildNodes
## Gets the number children of an XML element.
`child_count` only counts child elements and child text nodes that are
not 100% whitespace. Other node types, such as comments, are not included
in the count.
> Example
Get the number of children of an element.
XML_Document.from_text '<foo> <bar>hello</bar> <bar>hello2</bar>< </foo>' . root_element . child_count
# => 2
child_count : Integer ! XML_Error
child_count self = self.children.length
## Get an attribute of an XML element.
Arguments:
- name: The name of the attribute to get.
- if_missing: The value returned if the attribute does not exist.
> Example
Get an attribute of an element.
root = XML_Document.from_text '<foo bar="one">hello</foo>' . root_element
root.attribute "bar"
# => "one"
attribute : Text -> Any -> Text | Any ! XML_Error
attribute self name:Text ~if_missing=(Error.throw (No_Such_Key.Error self name)) =
XML_Error.handle_java_exceptions <|
attr = self.java_element.getAttributeNode name
if attr.is_nothing then if_missing else attr.getValue
## Gets a map containing f the attributes of an XML element.
> Example
XML_Document.from_text '<foo bar="one">hello</foo>' . root_element . attributes
# => Map.from_vector [["bar", "one"]]
attributes : Map Text Text ! XML_Error
attributes self =
XML_Error.handle_java_exceptions <|
named_node_map = self.java_element.getAttributes
keys_and_values = 0.up_to named_node_map.getLength . map i->
node = named_node_map.item i
[node.getNodeName, node.getNodeValue]
Map.from_vector keys_and_values
## Gets the text (non-markup) contents of the element and its descendants,
if any.
> Example
Get the text content of an element.
XML_Document.from_text '<foo>hello</foo>' . root_element . text
# => "hello"
text : Text ! XML_Error
text self =
XML_Error.handle_java_exceptions <|
self.java_element.getTextContent
## Gets the raw XML of the element (including tag, attributes and contents).
> Example
Get the outer XML of an element.
XML_Document.from_text '<foo>hello</foo>' . root_element . outer_xml
# => '<foo>hello</foo>'
outer_xml : Text ! XML_Error
outer_xml self =
XML_Error.handle_java_exceptions <|
XML_Utils.outerXML self.java_element
## Gets the raw XML of the contents of the element, not including the
outermost tag and attributes.
> Example
Get the inner XML of an element.
XML_Document.from_text '<foo><bar>hello</bar></foo>' . root_element . inner_xml
# => '<bar>hello</bar>'
inner_xml : Text ! XML_Error
inner_xml self =
XML_Error.handle_java_exceptions <|
XML_Utils.innerXML self.java_element
## Gets elements matching a given tag name.
This searches through all descendants of the node, not just immediate children.
> Example
XML_Document.from_text '<foo> <baz>hello</baz> <bar>and</bar> <baz>goodbye</baz> </foo>' . root_element . get_elements_by_tag_name "baz"
# => [XML_Document.from_text "<baz>hello</baz>" . root_element, XML_Document.from_text "<baz>goodbye</baz>" . root_element]
get_elements_by_tag_name : Text -> Vector XML_Element ! XML_Error
get_elements_by_tag_name self tag_name:Text =
XML_Error.handle_java_exceptions <|
only_wanted_nodes (self.java_element.getElementsByTagName tag_name)
## PRIVATE
Convert to a display representation of this XML_Element.
to_display_text : Text
to_display_text self = 'XML_Element "' + self.name + '"'
## PRIVATE
Convert to a JavaScript Object representing this XML_Element.
to_js_object : JS_Object ! XML_Error
to_js_object self =
builder = Vector.new_builder 4
builder.append ["type", "XML_Element"]
builder.append ["tag", self.name]
builder.append ["attributes", self.attributes.to_js_object]
builder.append ["children", self.children.to_js_object]
JS_Object.from_pairs builder.to_vector
## PRIVATE
Value (java_element:Element)
type XML_Error
# An error that indicates that the XML data could not be parsed.
Arguments:
- line_number: the line on which the parse failed.
- column_number: the column at which the parse failed.
Parse_Error (line_number : Integer) (column_number : Integer)
# Any other XML-related Java exception.
Other (error : Text)
## PRIVATE
Utility method for running an action with Java exceptions mapping.
handle_java_exceptions : Any -> Any ! XML_Error
handle_java_exceptions ~action =
Panic.catch JException action caught_panic->
XML_Error.wrap_java_exception caught_panic.payload
## PRIVATE
Converts a Java `Exception` into its Enso counterpart.
wrap_java_exception : JException -> XML_Error
wrap_java_exception exception:JException = case exception of
_ : SAXParseException -> Error.throw (XML_Error.Parse_Error exception.getLineNumber exception.getColumnNumber)
_ -> Error.throw (XML_Error.Other "An Exception has occurred: "+exception.to_text)
## PRIVATE
Convert the XML_Error to a human-readable format.
to_display_text : Text
to_display_text self = case self of
XML_Error.Parse_Error line_number column_number -> "The XML document could not be parsed at line " + line_number.to_text + ", column " + column_number.to_text
XML_Error.Other error -> error
## PRIVATE
Filter out unwanted nodes.
Wanted nodes are:
- Elements
- Text (if not completely whitespace)
- Attribute values (which only arise in the case of XPath keys)
only_wanted_nodes : NodeList -> Vector (Text | XML_Element)
only_wanted_nodes node_list:NodeList =
nodes = 0.up_to (node_list.getLength) . map i->
node_list.item i
is_wanted : Node -> Boolean
is_wanted node:Node =
is_element = node.getNodeType == Node.ELEMENT_NODE
is_attribute = node.getNodeType == Node.ATTRIBUTE_NODE
is_non_whitespace_text = node.getNodeType == Node.TEXT_NODE && node.getNodeValue.is_whitespace.not
is_element || is_attribute || is_non_whitespace_text
# If an Element, wrap in XML_Element. If Java_Text, extract the string. If an attribute, extract the value.
convert node =
if node.getNodeType == Node.ELEMENT_NODE then XML_Element.Value node else
if node.getNodeType == Node.TEXT_NODE then node.getNodeValue else
if node.getNodeType == Node.ATTRIBUTE_NODE then node.getValue else
Panic.throw (Illegal_State.Error ("Unexpected child type " + node.getNodeType.to_text))
nodes.filter is_wanted . map convert
## PRIVATE
Returns true if `key` starts with "@".
is_attribute_key : Text -> Boolean
is_attribute_key s:Text = s.starts_with "@"

View File

@ -0,0 +1,50 @@
import project.Any.Any
import project.Data.Text.Text
import project.Data.XML.XML_Document
import project.Errors.Problem_Behavior.Problem_Behavior
import project.Network.URI.URI
import project.Nothing.Nothing
import project.System.File.File
import project.System.Input_Stream.Input_Stream
from project.Data.Text.Extensions import all
## A `File_Format` for reading and writing XML files.
type XML_Format
## PRIVATE
If the File_Format supports reading from the file, return a configured instance.
for_file_read : File -> XML_Format | Nothing
for_file_read file:File =
case file.extension of
".xml" -> XML_Format
_ -> Nothing
## PRIVATE
If this File_Format should be used for writing to that file, return a configured instance.
for_file_write : File -> XML_Format | Nothing
for_file_write file:File =
_ = [file]
Nothing
## PRIVATE
If the File_Format supports reading from the web response, return a configured instance.
for_web : Text -> URI|Text -> XML_Format | Nothing
for_web content_type:Text uri:(URI|Text) =
_ = [uri]
first = content_type.split ';' . first . trim
case first of
"application/xml" -> XML_Format
"text/xml" -> XML_Format
_ -> Nothing
## PRIVATE
Implements the `File.read` for this `File_Format`
read : File -> Problem_Behavior -> Any
read self file:File on_problems:Problem_Behavior =
_ = [on_problems]
XML_Document.from_file file
## PRIVATE
Implements the `Data.parse` for this `File_Format`
read_stream : Input_Stream -> Any
read_stream self stream:Input_Stream =
XML_Document.from_stream stream

View File

@ -48,6 +48,9 @@ import project.Data.Time.Time_Of_Day.Time_Of_Day
import project.Data.Time.Time_Period.Time_Period
import project.Data.Time.Time_Zone.Time_Zone
import project.Data.Vector.Vector
import project.Data.XML.XML_Document
import project.Data.XML.XML_Error
import project.Data.XML.XML_Format.XML_Format
import project.Error.Error
import project.Errors
import project.Errors.Problem_Behavior.Problem_Behavior
@ -136,6 +139,9 @@ export project.Data.Time.Time_Of_Day.Time_Of_Day
export project.Data.Time.Time_Period.Time_Period
export project.Data.Time.Time_Zone.Time_Zone
export project.Data.Vector.Vector
export project.Data.XML.XML_Document
export project.Data.XML.XML_Error
export project.Data.XML.XML_Format.XML_Format
export project.Error.Error
export project.Errors
export project.Errors.Problem_Behavior.Problem_Behavior

View File

@ -0,0 +1,76 @@
package org.enso.base;
import java.io.ByteArrayOutputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
public class XML_Utils {
/**
* Return the string representation of an XML element, including its tag and all its contents.
*
* @param element the element to convert to a string
* @return the string representation of the element
* @throws TransformerException
*/
public static String outerXML(Element element) throws TransformerException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
Source source = new DOMSource(element);
Result target = new StreamResult(out);
transformer.transform(source, target);
return out.toString();
}
/**
* Return the string representation of the contents of an XML element, not including its tag.
*
* @param element the element to convert to a string
* @return the string representation of the element's contents
* @throws TransformerException
*/
public static String innerXML(Element element) throws TransformerException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
Result target = new StreamResult(out);
NodeList childNodes = element.getChildNodes();
for (int i = 0; i < childNodes.getLength(); ++i) {
Source source = new DOMSource(childNodes.item(i));
transformer.transform(source, target);
}
return out.toString();
}
public static void setCustomErrorHandler(DocumentBuilder documentBuilder) {
documentBuilder.setErrorHandler(
new ErrorHandler() {
@Override
public void warning(SAXParseException e) throws SAXException {
;
}
@Override
public void fatalError(SAXParseException e) throws SAXException {
throw e;
}
@Override
public void error(SAXParseException e) throws SAXException {
throw e;
}
});
}
}

View File

@ -0,0 +1,14 @@
package org.enso.base.file_format;
@org.openide.util.lookup.ServiceProvider(service = FileFormatSPI.class)
public class XMLFormatSPI extends FileFormatSPI {
@Override
protected String getModuleName() {
return "Standard.Base.Data.XML.XML_Format";
}
@Override
protected String getTypeName() {
return "XML_Format";
}
}

View File

@ -0,0 +1,13 @@
<?xml version = "1.0"?>
<foo id="1">
<bar id="2">
</bar>
<baz id="3">
<bar id="4">
<bar id="5">
</bar>
</bar>
</baz>
<bar id="6">
</bar>
</foo>

View File

@ -0,0 +1,35 @@
<?xml version = "1.0"?>
<class>
<teacher id="100">
<firstname>Mary</firstname>
<lastname>Smith</lastname>
<bio>
Blah blah
</bio>
</teacher>
<teacher id="101">
<firstname>Bob</firstname>
<lastname>Jones</lastname>
<bio>
This that
</bio>
</teacher>
<student studentId="1000" year="2">
<firstname>Alice</firstname>
<lastname>Wright</lastname>
<gpa>4.01</gpa>
</student>
<student studentId="1001" year="3">
<firstname>Jessi</firstname>
<lastname>Cooper</lastname>
<gpa>3.99</gpa>
</student>
<student studentId="1002" year="1">
Some
<firstname>Randy</firstname>
Extra
<lastname>Brown</lastname>
Text
<gpa>3.99</gpa>
</student>
</class>

View File

@ -0,0 +1,9 @@
<class>
<teacher id="100">
<firstname>Mary</firstname>
<lastname>Smith</lastname>
<bio>
Blah blah
</bio>
</teacher>
</class>

View File

@ -0,0 +1,228 @@
from Standard.Base import all
import Standard.Base.Errors.Common.Syntax_Error
import Standard.Base.Errors.File_Error.File_Error
from Standard.Test import Test, Test_Suite
import Standard.Test.Extensions
spec =
test_file = enso_project.data / "xml" / "sample.xml"
document = XML_Document.from_file test_file
root = document . root_element
fix_windows_newlines s = s.replace '\r\n' '\n'
Test.group "Read XML" <|
Test.specify "Can read from a file" <|
root.name . should_equal "class"
Test.specify "Error if file does not exist" <|
test_file = enso_project.data / "xml" / "sample.xmlnotexists"
XML_Document.from_file test_file . should_fail_with File_Error
Test.specify "Can read from a stream" <|
test_file.with_input_stream [File_Access.Read] input_stream->
doc = XML_Document.from_stream input_stream
doc.root_element.name . should_equal "class"
Test.specify "Can read from a string" <|
xml_string = test_file.read_text
doc = XML_Document.from_text xml_string
doc.root_element.name . should_equal "class"
Test.specify "Can read from a short string" <|
xml_string = "<class></class>"
doc = XML_Document.from_text xml_string
doc.root_element.name . should_equal "class"
Test.specify "Parse error from file" <|
test_file = enso_project.data / "sample.txt"
XML_Document.from_file test_file . catch . should_be_a XML_Error.Parse_Error
Test.specify "Parse error from string" <|
xml_string = "<<<<</"
XML_Document.from_text xml_string . catch . should_be_a XML_Error.Parse_Error
Test.group "at/get" <|
Test.specify "Can get children by index" <|
root.at 0 . name . should_equal "teacher"
root.at 0 . at 0 . name . should_equal "firstname"
root.at 0 . at 1 . name . should_equal "lastname"
root.at 0 . at 2 . name . should_equal "bio"
root.at 0 . at 2 . at 0 . should_equal '\n Blah blah\n '
root.at 3 . at 0 . name . should_equal "firstname"
root.at 3 . at 1 . name . should_equal "lastname"
root.at 3 . at 2 . name . should_equal "gpa"
root.at 3 . at 2 . at 0 . should_equal "3.99"
Test.specify "Can get text children by index" <|
root.at 4 . at 0 . should_equal '\n Some\n '
root.at 4 . at 2 . should_equal '\n Extra\n '
root.at 4 . at 4 . should_equal '\n Text\n '
Test.specify "Can get element attributes" <|
root.at 0 . at "@id" . should_equal "100"
root.at 1 . at "@id" . should_equal "101"
root.at 2 . at "@studentId" . should_equal "1000"
root.at 3 . at "@studentId" . should_equal "1001"
root.at 0 . attribute "id" . should_equal "100"
root.at 1 . attribute "id" . should_equal "101"
root.at 2 . attribute "studentId" . should_equal "1000"
root.at 3 . attribute "studentId" . should_equal "1001"
root.at 3 . attribute "does_not_exist" if_missing="if_missing" . should_equal "if_missing"
Test.specify "Can get element an attribute map" <|
root.at 2 . attributes . should_equal (Map.from_vector [["studentId", "1000"], ["year", "2"]])
root.at 3 . attributes . should_equal (Map.from_vector [["studentId", "1001"], ["year", "3"]])
Test.specify "Can get nodes via xpath" <|
classes = root.at "/class"
classes.length . should_equal 1
classes.at 0 . name . should_equal "class"
teachers = root.at "/class/teacher"
teachers.length . should_equal 2
teachers.at 0 . at "@id" . should_equal "100"
teachers.at 1 . at "@id" . should_equal "101"
students = root.at "/class/student"
students.length . should_equal 3
students.at 0 . at "@studentId" . should_equal "1000"
students.at 1 . at "@studentId" . should_equal "1001"
root.at "/class/teacher[1]/firstname" . at 0 . text . should_equal "Mary"
root.at "/class/teacher[2]/firstname" . at 0 . text . should_equal "Bob"
root.at "/class/teacher[1]/firstname/text()" . should_equal ["Mary"]
root.at "/class/teacher[2]/firstname/text()" . should_equal ["Bob"]
root.at "/class/teacher/firstname/text()" . should_equal ["Mary", "Bob"]
root.at "/class/teacher[1]/bio" . at 0 . text . should_equal '\n Blah blah\n '
root.at "/class/teacher[2]/bio" . at 0 . text . should_equal '\n This that\n '
root.get "/class/teacher[23]" . should_equal []
root.at "teacher[1]/firstname" . at 0 . text . should_equal "Mary"
root.at "teacher[2]/firstname" . at 0 . text . should_equal "Bob"
root.at "teacher[1]/bio" . at 0 . text . should_equal '\n Blah blah\n '
root.at "teacher[2]/bio" . at 0 . text . should_equal '\n This that\n '
Test.specify "Can get children using .get" <|
root.get 0 . get 0 . name . should_equal "firstname"
root.get 0 . get "@id" . should_equal "100"
root.get "/class/teacher[1]/firstname" . get 0 . text . should_equal "Mary"
root.get 0 . get 32 "if_missing" . should_equal "if_missing"
root.get 0 . get "@not_there" "if_missing" . should_equal "if_missing"
Test.specify "Can handle a bad xpath" <|
root.at "/qqq[[[[1" . at 0 . text . should_fail_with XML_Error
Test.group "tag name" <|
Test.specify "Can get the tag name" <|
root.name . should_equal "class"
root.at 0 . name . should_equal "teacher"
root.at 1 . at 1 . name . should_equal "lastname"
Test.group "children" <|
Test.specify "Can get the list of children" <|
children = root.children
children.length . should_equal 5
children.at 0 . at "@id" . should_equal "100"
children.at 1 . at "@id" . should_equal "101"
children.at 2 . at "@studentId" . should_equal "1000"
children.at 3 . at "@studentId" . should_equal "1001"
children.at 4 . at "@studentId" . should_equal "1002"
Test.specify "Can get the number of children" <|
root.child_count . should_equal 5
Test.group "text contents" <|
Test.specify "Can get child text contents" <|
root.at 4 . at 1 . text . should_equal "Randy"
root.at 4 . text . should_equal '\n Some\n Randy\n Extra\n Brown\n Text\n 3.99\n '
Test.group "inner / outer xml" <|
Test.specify "Can get the inner xml" <|
fix_windows_newlines (root.at "/class/teacher[1]" . at 0 . inner_xml) . should_equal '\n <firstname>Mary</firstname>\n <lastname>Smith</lastname>\n <bio>\n Blah blah\n </bio>\n '
fix_windows_newlines (root.at "/class/teacher[1]/bio" . at 0 . inner_xml) . should_equal '\n Blah blah\n '
fix_windows_newlines (root.at "/class/teacher[2]/bio" . at 0 . inner_xml) . should_equal '\n This that\n '
fix_windows_newlines (root.at "/class/teacher[2]" . at 0 . inner_xml) . should_equal '\n <firstname>Bob</firstname>\n <lastname>Jones</lastname>\n <bio>\n This that\n </bio>\n '
Test.specify "Can get the outer xml" <|
fix_windows_newlines (root.at "/class/teacher[1]/bio" . at 0 . outer_xml) . should_equal '<bio>\n Blah blah\n </bio>'
fix_windows_newlines (root.at "/class/teacher[2]/bio" . at 0 . outer_xml) . should_equal '<bio>\n This that\n </bio>'
Test.group "get_elements_by_tag_name" <|
Test.specify "Can get elements by tag name" <|
teachers = root.get_elements_by_tag_name "teacher"
students = root.get_elements_by_tag_name "student"
gpas = root.get_elements_by_tag_name "gpa"
teachers.length . should_equal 2
students.length . should_equal 3
gpas.length . should_equal 3
teachers.at 0 . at "@id" . should_equal "100"
teachers.at 1 . at "@id" . should_equal "101"
students.at 0 . at "@studentId" . should_equal "1000"
students.at 1 . at "@studentId" . should_equal "1001"
students.at 2 . at "@studentId" . should_equal "1002"
gpas.at 0 . text . should_equal "4.01"
gpas.at 1 . text . should_equal "3.99"
gpas.at 2 . text . should_equal "3.99"
Test.specify "Can get nested elements" <|
test_file = enso_project.data / "xml" / "nested.xml"
root = XML_Document.from_file test_file . root_element
bars = root.get_elements_by_tag_name "bar"
bars.length . should_equal 4
bars.map (t-> t.at "@id") . should_equal ["2", "4", "5", "6"]
Test.specify "Can get elements by name with a wildcard" <|
root.get_elements_by_tag_name "*" . length . should_equal 20
Test.group "Data.read / File_Format" <|
Test.specify "Can read from a file" <|
doc = Data.read test_file
doc.root_element.name . should_equal "class"
Test.specify "Can read from an endpoint" <|
doc = Data.fetch "https://enso-data-samples.s3.us-west-1.amazonaws.com/sample.xml"
doc.root_element.name . should_equal "class"
doc.root_element.at 1 . name . should_equal "teacher"
Test.group "display text" <|
Test.specify "Can generate display text" <|
document.to_display_text . should_equal 'XML_Document (XML_Element "class")'
root.to_display_text . should_equal 'XML_Element "class"'
Test.group "JSON" <|
Test.specify "Can convert to JS_Object" <|
test_file = enso_project.data / "xml" / "small.xml"
document = XML_Document.from_file test_file
root = document . root_element
expected = Json.parse '''
{
"type": "XML_Element",
"tag": "class",
"attributes": [],
"children": [
{
"type": "XML_Element",
"tag": "teacher",
"attributes": [ [ "id", "100" ]
],
"children": [
{ "type": "XML_Element", "tag": "firstname", "attributes": [], "children": [ "Mary" ] },
{ "type": "XML_Element", "tag": "lastname", "attributes": [], "children": [ "Smith" ] },
{ "type": "XML_Element", "tag": "bio", "attributes": [], "children": [ "\\n Blah blah\\n " ] }
]
}
]
}
js = root.to_js_object
js.should_equal expected
main = Test_Suite.run_main spec

View File

@ -56,6 +56,7 @@ import project.Data.Text.Parse_Spec
import project.Data.Text.Regex_Spec
import project.Data.Text.Span_Spec
import project.Data.Text.Utils_Spec
import project.Data.XML.XML_Spec
import project.Data.Vector.Slicing_Helpers_Spec
@ -155,3 +156,4 @@ main = Test_Suite.run_main <|
Warnings_Spec.spec
System_Spec.spec
Random_Spec.spec
XML_Spec.spec

View File

@ -745,7 +745,7 @@ spec =
Test.specify "should list files in a directory" <|
immediate = enso_project.data.list . map .to_text
immediate.sort.should_equal (resolve ["books.json", "helloworld.txt", "sample-json.weird-extension", "sample-malformed.json", "sample.json", "sample.png", "sample.txt", "sample.xxx", "transient", "tree", "windows.log", "windows.txt"])
immediate.sort.should_equal (resolve ["books.json", "helloworld.txt", "sample-json.weird-extension", "sample-malformed.json", "sample.json", "sample.png", "sample.txt", "sample.xxx", "transient", "tree", "windows.log", "windows.txt", 'xml'])
filtered1 = enso_project.data.list name_filter="s[a-cw]mple.{t?t,md}" . map .to_text
filtered1.should_equal (resolve ["sample.txt"])