mirror of
https://github.com/enso-org/enso.git
synced 2024-12-23 02:21:54 +03:00
Basic XML support (#7947)
This PR includes * Reading XML from a file, stream, or string * Reading XML via Data.fetch * Accessing the root element, element children, and attributes * Accessing tag text contents * Get tags by name * Inner / Outer XML string
This commit is contained in:
parent
16c8d2e302
commit
9ba7be20af
@ -581,6 +581,7 @@
|
||||
- [Added `Table.expand_column` and improved JSON deserialization.][7859]
|
||||
- [Implemented `Table.auto_value_types` for in-memory tables.][7908]
|
||||
- [Implemented Text.substring to easily select part of a Text field][7913]
|
||||
- [Implemented basic XML support][7947]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -826,6 +827,7 @@
|
||||
[7859]: https://github.com/enso-org/enso/pull/7859
|
||||
[7908]: https://github.com/enso-org/enso/pull/7908
|
||||
[7913]: https://github.com/enso-org/enso/pull/7913
|
||||
[7947]: https://github.com/enso-org/enso/pull/7947
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
399
distribution/lib/Standard/Base/0.0.0-dev/src/Data/XML.enso
Normal file
399
distribution/lib/Standard/Base/0.0.0-dev/src/Data/XML.enso
Normal file
@ -0,0 +1,399 @@
|
||||
import project.Any.Any
|
||||
import project.Data.Boolean.Boolean
|
||||
import project.Data.Json.Extensions
|
||||
import project.Data.Json.JS_Object
|
||||
import project.Data.Map.Map
|
||||
import project.Data.Numbers.Integer
|
||||
import project.Data.Text.Text
|
||||
import project.Data.Vector.Vector
|
||||
import project.Error.Error
|
||||
import project.Errors.Common.Index_Out_Of_Bounds
|
||||
import project.Errors.File_Error.File_Error
|
||||
import project.Errors.Illegal_State.Illegal_State
|
||||
import project.Errors.No_Such_Key.No_Such_Key
|
||||
import project.Nothing.Nothing
|
||||
import project.Panic.Panic
|
||||
import project.System.File.File
|
||||
import project.System.File.File_Access.File_Access
|
||||
import project.System.Input_Stream.Input_Stream
|
||||
from project.Data.Range.Extensions import all
|
||||
from project.Data.Text.Extensions import all
|
||||
|
||||
polyglot java import java.io.StringReader
|
||||
polyglot java import java.lang.Exception as JException
|
||||
polyglot java import javax.xml.parsers.DocumentBuilderFactory
|
||||
polyglot java import javax.xml.parsers.DocumentBuilder
|
||||
polyglot java import javax.xml.xpath.XPathConstants
|
||||
polyglot java import javax.xml.xpath.XPathFactory
|
||||
polyglot java import org.w3c.dom.Document
|
||||
polyglot java import org.w3c.dom.Element
|
||||
polyglot java import org.w3c.dom.Node
|
||||
polyglot java import org.w3c.dom.NodeList
|
||||
polyglot java import org.w3c.dom.Text as Java_Text
|
||||
polyglot java import org.xml.sax.InputSource
|
||||
polyglot java import org.xml.sax.SAXException
|
||||
polyglot java import org.xml.sax.SAXParseException
|
||||
|
||||
polyglot java import org.enso.base.XML_Utils
|
||||
|
||||
type XML_Document
|
||||
## Read an XML document from a file.
|
||||
|
||||
Arguments:
|
||||
- file: the `File` to read the XML document from.
|
||||
|
||||
If there is an error reading the file, `File_Error` is thrown.
|
||||
|
||||
If there is a parsing error, `XML_Error.Parse_Error` is thrown.
|
||||
|
||||
> Example
|
||||
Read an XML document in 'doc.xml'.
|
||||
|
||||
file = enso_project.data / "doc.xml"
|
||||
doc = XML_Document.from_file test_file
|
||||
from_file : File -> XML_Document ! XML_Error | File_Error
|
||||
from_file file:File =
|
||||
File_Error.handle_java_exceptions file <|
|
||||
file.with_input_stream [File_Access.Read] XML_Document.from_stream
|
||||
|
||||
## Read an XML document from an input stream.
|
||||
|
||||
Arguments:
|
||||
- input_stream: the input stread to read the XML document from.
|
||||
|
||||
If there is a parsing error, `XML_Error.Parse_Error` is thrown.
|
||||
|
||||
> Example
|
||||
Read an XML document from an input_stream.
|
||||
|
||||
doc = XML_Document.from_stream input_stream
|
||||
from_stream : Input_Stream -> XML_Document ! XML_Error
|
||||
from_stream input_stream:Input_Stream =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
input_stream.with_java_stream java_stream-> XML_Document.from_source java_stream
|
||||
|
||||
## Read an XML document from a string.
|
||||
|
||||
Arguments:
|
||||
- xml_string: The string to read the XML document from.
|
||||
|
||||
If there is a parsing error, `XML_Error.Parse_Error` is thrown.
|
||||
|
||||
> Example
|
||||
Read an XML document from an string.
|
||||
|
||||
xml_string = "<foo></foo>"
|
||||
doc = XML_Document.from_text xml_string
|
||||
from_text : Text -> XML_Document ! XML_Error
|
||||
from_text xml_string:Text =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
string_reader = StringReader.new xml_string
|
||||
XML_Document.from_source (InputSource.new string_reader)
|
||||
|
||||
## PRIVATE
|
||||
Read XML from an input source.
|
||||
from_source : Any -> XML_Document ! XML_Error
|
||||
from_source input_source =
|
||||
document_builder_factory = DocumentBuilderFactory.newInstance
|
||||
document_builder = document_builder_factory.newDocumentBuilder
|
||||
XML_Utils.setCustomErrorHandler document_builder
|
||||
XML_Document.Value (document_builder.parse input_source)
|
||||
|
||||
## Get the root element of the document.
|
||||
|
||||
> Example
|
||||
Get the root element of a document.
|
||||
|
||||
doc = XML_Document.from_file file
|
||||
root = doc.root_element
|
||||
root_element : XML_Element ! XML_Error
|
||||
root_element self =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
java_element = self.java_document.getDocumentElement
|
||||
XML_Element.Value java_element
|
||||
|
||||
## PRIVATE
|
||||
Convert to a JavaScript Object representing this XML_Document.
|
||||
to_js_object : JS_Object
|
||||
to_js_object self = self.root_element.to_js_object
|
||||
|
||||
## PRIVATE
|
||||
Convert to a display representation of this XML_Document.
|
||||
to_display_text : Text
|
||||
to_display_text self = "XML_Document (" + self.root_element.to_display_text + ")"
|
||||
|
||||
## PRIVATE
|
||||
Value (java_document:Document)
|
||||
|
||||
type XML_Element
|
||||
## Gets the tag of an XML element.
|
||||
|
||||
> Example
|
||||
Get the tag of an XML element.
|
||||
|
||||
XML_Document.from_text '<foo>hello</foo>' . root_element . name
|
||||
# => "foo"
|
||||
name : Text ! XML_Error
|
||||
name self =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
self.java_element.getNodeName
|
||||
|
||||
## Gets a child of an XML element.
|
||||
|
||||
Arguments:
|
||||
- key: If an `Integer`, returns the element at position `at` in its list
|
||||
of children. If a `Text`, treats `key` as an XPath specifier, and
|
||||
returns the elements it points to. If a `Text` that starts with `"@"`,
|
||||
returns the attribute with the given name.
|
||||
|
||||
> Example
|
||||
XML_Document.from_text '<foo><baz>hello</baz></foo>' . root_element . get 0
|
||||
# => XML_Document.from_text "<baz>hello</baz>" . root_element
|
||||
|
||||
> Example
|
||||
Get a tag attribute.
|
||||
|
||||
root = XML_Document.from_text '<foo bar="one">hello</foo>' . root_element
|
||||
root.get "@bar"
|
||||
# => "one"
|
||||
get : Text | Integer -> Any -> Any | Text | XML_Element | Vector (Text | XML_Element) ! No_Such_Key | Index_Out_Of_Bounds | XML_Error
|
||||
get self key:(Text|Integer) ~if_missing=Nothing =
|
||||
case key of
|
||||
_ : Integer -> self.children.get key if_missing
|
||||
_ : Text -> if is_attribute_key key then self.get_xpath key . get 0 if_missing else self.get_xpath key
|
||||
|
||||
## Gets a child or attribute of an XML element.
|
||||
|
||||
Arguments:
|
||||
- key: If an `Integer`, returns the element at position `at` in its list
|
||||
of children. If a `Text`, treats `key` as an XPath specifier, and
|
||||
returns the elements it points to. If a `Text` that starts with `"@"`,
|
||||
returns the attribute with the given name.
|
||||
|
||||
> Example
|
||||
Get a nested tag:
|
||||
|
||||
XML_Document.from_text '<foo><baz>hello</baz></foo>' . root_element . at 0
|
||||
# => XML_Document.from_text "<baz>hello</baz>" . root_element
|
||||
|
||||
> Example
|
||||
Get a tag attribute.
|
||||
|
||||
root = XML_Document.from_text '<foo bar="one">hello</foo>' . root_element
|
||||
root.at "@bar"
|
||||
# => "one"
|
||||
at : Text | Integer -> Text | XML_Element | Vector (Text | XML_Element) ! No_Such_Key | Index_Out_Of_Bounds | XML_Error
|
||||
at self key:(Text|Integer) =
|
||||
if_missing = case key of
|
||||
_ : Integer -> Error.throw (Index_Out_Of_Bounds.Error key self.child_count)
|
||||
_ : Text -> Error.throw (No_Such_Key.Error self key)
|
||||
self.get key if_missing
|
||||
|
||||
## Get elements denoted by the given XPath key.
|
||||
|
||||
Arguments:
|
||||
- key: The XPath string to use to search for elements.
|
||||
|
||||
> Example
|
||||
Get an element by xpath.
|
||||
|
||||
root = XML_Document.from_file test_file . root_element
|
||||
root.at "/class/teacher[1]/firstname"
|
||||
# => [XML_Document.from_text "<firstname>Alice</firstname>" . root_element]
|
||||
get_xpath : Text -> Vector (Text | XML_Element) ! XML_Error
|
||||
get_xpath self key:Text =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
xpath = XPathFactory.newInstance.newXPath
|
||||
only_wanted_nodes (xpath.evaluate key self.java_element XPathConstants.NODESET)
|
||||
|
||||
## Gets the child elements of an XML element.
|
||||
|
||||
`children` only returns child elements and child text nodes that are not
|
||||
100% whitespace. Other node types, such as comments, are not included.
|
||||
|
||||
> Example
|
||||
XML_Document.from_text '<foo><baz>hello</baz></foo>' . root_element . children
|
||||
# => [XML_Document.from_text "<baz>hello</baz>"]
|
||||
children : Vector (XML_Element | Text) ! XML_Error
|
||||
children self =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
only_wanted_nodes self.java_element.getChildNodes
|
||||
|
||||
## Gets the number children of an XML element.
|
||||
|
||||
`child_count` only counts child elements and child text nodes that are
|
||||
not 100% whitespace. Other node types, such as comments, are not included
|
||||
in the count.
|
||||
|
||||
> Example
|
||||
Get the number of children of an element.
|
||||
|
||||
XML_Document.from_text '<foo> <bar>hello</bar> <bar>hello2</bar>< </foo>' . root_element . child_count
|
||||
# => 2
|
||||
child_count : Integer ! XML_Error
|
||||
child_count self = self.children.length
|
||||
|
||||
## Get an attribute of an XML element.
|
||||
|
||||
Arguments:
|
||||
- name: The name of the attribute to get.
|
||||
- if_missing: The value returned if the attribute does not exist.
|
||||
|
||||
> Example
|
||||
Get an attribute of an element.
|
||||
|
||||
root = XML_Document.from_text '<foo bar="one">hello</foo>' . root_element
|
||||
root.attribute "bar"
|
||||
# => "one"
|
||||
attribute : Text -> Any -> Text | Any ! XML_Error
|
||||
attribute self name:Text ~if_missing=(Error.throw (No_Such_Key.Error self name)) =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
attr = self.java_element.getAttributeNode name
|
||||
if attr.is_nothing then if_missing else attr.getValue
|
||||
|
||||
## Gets a map containing f the attributes of an XML element.
|
||||
|
||||
> Example
|
||||
XML_Document.from_text '<foo bar="one">hello</foo>' . root_element . attributes
|
||||
# => Map.from_vector [["bar", "one"]]
|
||||
attributes : Map Text Text ! XML_Error
|
||||
attributes self =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
named_node_map = self.java_element.getAttributes
|
||||
keys_and_values = 0.up_to named_node_map.getLength . map i->
|
||||
node = named_node_map.item i
|
||||
[node.getNodeName, node.getNodeValue]
|
||||
Map.from_vector keys_and_values
|
||||
|
||||
## Gets the text (non-markup) contents of the element and its descendants,
|
||||
if any.
|
||||
|
||||
> Example
|
||||
Get the text content of an element.
|
||||
|
||||
XML_Document.from_text '<foo>hello</foo>' . root_element . text
|
||||
# => "hello"
|
||||
text : Text ! XML_Error
|
||||
text self =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
self.java_element.getTextContent
|
||||
|
||||
## Gets the raw XML of the element (including tag, attributes and contents).
|
||||
|
||||
> Example
|
||||
Get the outer XML of an element.
|
||||
|
||||
XML_Document.from_text '<foo>hello</foo>' . root_element . outer_xml
|
||||
# => '<foo>hello</foo>'
|
||||
outer_xml : Text ! XML_Error
|
||||
outer_xml self =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
XML_Utils.outerXML self.java_element
|
||||
|
||||
## Gets the raw XML of the contents of the element, not including the
|
||||
outermost tag and attributes.
|
||||
|
||||
> Example
|
||||
Get the inner XML of an element.
|
||||
|
||||
XML_Document.from_text '<foo><bar>hello</bar></foo>' . root_element . inner_xml
|
||||
# => '<bar>hello</bar>'
|
||||
inner_xml : Text ! XML_Error
|
||||
inner_xml self =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
XML_Utils.innerXML self.java_element
|
||||
|
||||
## Gets elements matching a given tag name.
|
||||
|
||||
This searches through all descendants of the node, not just immediate children.
|
||||
|
||||
> Example
|
||||
XML_Document.from_text '<foo> <baz>hello</baz> <bar>and</bar> <baz>goodbye</baz> </foo>' . root_element . get_elements_by_tag_name "baz"
|
||||
# => [XML_Document.from_text "<baz>hello</baz>" . root_element, XML_Document.from_text "<baz>goodbye</baz>" . root_element]
|
||||
get_elements_by_tag_name : Text -> Vector XML_Element ! XML_Error
|
||||
get_elements_by_tag_name self tag_name:Text =
|
||||
XML_Error.handle_java_exceptions <|
|
||||
only_wanted_nodes (self.java_element.getElementsByTagName tag_name)
|
||||
|
||||
## PRIVATE
|
||||
Convert to a display representation of this XML_Element.
|
||||
to_display_text : Text
|
||||
to_display_text self = 'XML_Element "' + self.name + '"'
|
||||
|
||||
## PRIVATE
|
||||
Convert to a JavaScript Object representing this XML_Element.
|
||||
to_js_object : JS_Object ! XML_Error
|
||||
to_js_object self =
|
||||
builder = Vector.new_builder 4
|
||||
builder.append ["type", "XML_Element"]
|
||||
builder.append ["tag", self.name]
|
||||
builder.append ["attributes", self.attributes.to_js_object]
|
||||
builder.append ["children", self.children.to_js_object]
|
||||
JS_Object.from_pairs builder.to_vector
|
||||
|
||||
## PRIVATE
|
||||
Value (java_element:Element)
|
||||
|
||||
type XML_Error
|
||||
# An error that indicates that the XML data could not be parsed.
|
||||
|
||||
Arguments:
|
||||
- line_number: the line on which the parse failed.
|
||||
- column_number: the column at which the parse failed.
|
||||
Parse_Error (line_number : Integer) (column_number : Integer)
|
||||
|
||||
# Any other XML-related Java exception.
|
||||
Other (error : Text)
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Utility method for running an action with Java exceptions mapping.
|
||||
handle_java_exceptions : Any -> Any ! XML_Error
|
||||
handle_java_exceptions ~action =
|
||||
Panic.catch JException action caught_panic->
|
||||
XML_Error.wrap_java_exception caught_panic.payload
|
||||
|
||||
## PRIVATE
|
||||
|
||||
Converts a Java `Exception` into its Enso counterpart.
|
||||
wrap_java_exception : JException -> XML_Error
|
||||
wrap_java_exception exception:JException = case exception of
|
||||
_ : SAXParseException -> Error.throw (XML_Error.Parse_Error exception.getLineNumber exception.getColumnNumber)
|
||||
_ -> Error.throw (XML_Error.Other "An Exception has occurred: "+exception.to_text)
|
||||
|
||||
## PRIVATE
|
||||
Convert the XML_Error to a human-readable format.
|
||||
to_display_text : Text
|
||||
to_display_text self = case self of
|
||||
XML_Error.Parse_Error line_number column_number -> "The XML document could not be parsed at line " + line_number.to_text + ", column " + column_number.to_text
|
||||
XML_Error.Other error -> error
|
||||
|
||||
## PRIVATE
|
||||
Filter out unwanted nodes.
|
||||
Wanted nodes are:
|
||||
- Elements
|
||||
- Text (if not completely whitespace)
|
||||
- Attribute values (which only arise in the case of XPath keys)
|
||||
only_wanted_nodes : NodeList -> Vector (Text | XML_Element)
|
||||
only_wanted_nodes node_list:NodeList =
|
||||
nodes = 0.up_to (node_list.getLength) . map i->
|
||||
node_list.item i
|
||||
is_wanted : Node -> Boolean
|
||||
is_wanted node:Node =
|
||||
is_element = node.getNodeType == Node.ELEMENT_NODE
|
||||
is_attribute = node.getNodeType == Node.ATTRIBUTE_NODE
|
||||
is_non_whitespace_text = node.getNodeType == Node.TEXT_NODE && node.getNodeValue.is_whitespace.not
|
||||
is_element || is_attribute || is_non_whitespace_text
|
||||
|
||||
# If an Element, wrap in XML_Element. If Java_Text, extract the string. If an attribute, extract the value.
|
||||
convert node =
|
||||
if node.getNodeType == Node.ELEMENT_NODE then XML_Element.Value node else
|
||||
if node.getNodeType == Node.TEXT_NODE then node.getNodeValue else
|
||||
if node.getNodeType == Node.ATTRIBUTE_NODE then node.getValue else
|
||||
Panic.throw (Illegal_State.Error ("Unexpected child type " + node.getNodeType.to_text))
|
||||
nodes.filter is_wanted . map convert
|
||||
|
||||
## PRIVATE
|
||||
Returns true if `key` starts with "@".
|
||||
is_attribute_key : Text -> Boolean
|
||||
is_attribute_key s:Text = s.starts_with "@"
|
@ -0,0 +1,50 @@
|
||||
import project.Any.Any
|
||||
import project.Data.Text.Text
|
||||
import project.Data.XML.XML_Document
|
||||
import project.Errors.Problem_Behavior.Problem_Behavior
|
||||
import project.Network.URI.URI
|
||||
import project.Nothing.Nothing
|
||||
import project.System.File.File
|
||||
import project.System.Input_Stream.Input_Stream
|
||||
from project.Data.Text.Extensions import all
|
||||
|
||||
## A `File_Format` for reading and writing XML files.
|
||||
type XML_Format
|
||||
## PRIVATE
|
||||
If the File_Format supports reading from the file, return a configured instance.
|
||||
for_file_read : File -> XML_Format | Nothing
|
||||
for_file_read file:File =
|
||||
case file.extension of
|
||||
".xml" -> XML_Format
|
||||
_ -> Nothing
|
||||
|
||||
## PRIVATE
|
||||
If this File_Format should be used for writing to that file, return a configured instance.
|
||||
for_file_write : File -> XML_Format | Nothing
|
||||
for_file_write file:File =
|
||||
_ = [file]
|
||||
Nothing
|
||||
|
||||
## PRIVATE
|
||||
If the File_Format supports reading from the web response, return a configured instance.
|
||||
for_web : Text -> URI|Text -> XML_Format | Nothing
|
||||
for_web content_type:Text uri:(URI|Text) =
|
||||
_ = [uri]
|
||||
first = content_type.split ';' . first . trim
|
||||
case first of
|
||||
"application/xml" -> XML_Format
|
||||
"text/xml" -> XML_Format
|
||||
_ -> Nothing
|
||||
|
||||
## PRIVATE
|
||||
Implements the `File.read` for this `File_Format`
|
||||
read : File -> Problem_Behavior -> Any
|
||||
read self file:File on_problems:Problem_Behavior =
|
||||
_ = [on_problems]
|
||||
XML_Document.from_file file
|
||||
|
||||
## PRIVATE
|
||||
Implements the `Data.parse` for this `File_Format`
|
||||
read_stream : Input_Stream -> Any
|
||||
read_stream self stream:Input_Stream =
|
||||
XML_Document.from_stream stream
|
@ -48,6 +48,9 @@ import project.Data.Time.Time_Of_Day.Time_Of_Day
|
||||
import project.Data.Time.Time_Period.Time_Period
|
||||
import project.Data.Time.Time_Zone.Time_Zone
|
||||
import project.Data.Vector.Vector
|
||||
import project.Data.XML.XML_Document
|
||||
import project.Data.XML.XML_Error
|
||||
import project.Data.XML.XML_Format.XML_Format
|
||||
import project.Error.Error
|
||||
import project.Errors
|
||||
import project.Errors.Problem_Behavior.Problem_Behavior
|
||||
@ -136,6 +139,9 @@ export project.Data.Time.Time_Of_Day.Time_Of_Day
|
||||
export project.Data.Time.Time_Period.Time_Period
|
||||
export project.Data.Time.Time_Zone.Time_Zone
|
||||
export project.Data.Vector.Vector
|
||||
export project.Data.XML.XML_Document
|
||||
export project.Data.XML.XML_Error
|
||||
export project.Data.XML.XML_Format.XML_Format
|
||||
export project.Error.Error
|
||||
export project.Errors
|
||||
export project.Errors.Problem_Behavior.Problem_Behavior
|
||||
|
76
std-bits/base/src/main/java/org/enso/base/XML_Utils.java
Normal file
76
std-bits/base/src/main/java/org/enso/base/XML_Utils.java
Normal file
@ -0,0 +1,76 @@
|
||||
package org.enso.base;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.Result;
|
||||
import javax.xml.transform.Source;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerException;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.ErrorHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.SAXParseException;
|
||||
|
||||
public class XML_Utils {
|
||||
/**
|
||||
* Return the string representation of an XML element, including its tag and all its contents.
|
||||
*
|
||||
* @param element the element to convert to a string
|
||||
* @return the string representation of the element
|
||||
* @throws TransformerException
|
||||
*/
|
||||
public static String outerXML(Element element) throws TransformerException {
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
Transformer transformer = TransformerFactory.newInstance().newTransformer();
|
||||
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
|
||||
Source source = new DOMSource(element);
|
||||
Result target = new StreamResult(out);
|
||||
transformer.transform(source, target);
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the string representation of the contents of an XML element, not including its tag.
|
||||
*
|
||||
* @param element the element to convert to a string
|
||||
* @return the string representation of the element's contents
|
||||
* @throws TransformerException
|
||||
*/
|
||||
public static String innerXML(Element element) throws TransformerException {
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
Transformer transformer = TransformerFactory.newInstance().newTransformer();
|
||||
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
|
||||
Result target = new StreamResult(out);
|
||||
NodeList childNodes = element.getChildNodes();
|
||||
for (int i = 0; i < childNodes.getLength(); ++i) {
|
||||
Source source = new DOMSource(childNodes.item(i));
|
||||
transformer.transform(source, target);
|
||||
}
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
public static void setCustomErrorHandler(DocumentBuilder documentBuilder) {
|
||||
documentBuilder.setErrorHandler(
|
||||
new ErrorHandler() {
|
||||
@Override
|
||||
public void warning(SAXParseException e) throws SAXException {
|
||||
;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fatalError(SAXParseException e) throws SAXException {
|
||||
throw e;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void error(SAXParseException e) throws SAXException {
|
||||
throw e;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
package org.enso.base.file_format;
|
||||
|
||||
@org.openide.util.lookup.ServiceProvider(service = FileFormatSPI.class)
|
||||
public class XMLFormatSPI extends FileFormatSPI {
|
||||
@Override
|
||||
protected String getModuleName() {
|
||||
return "Standard.Base.Data.XML.XML_Format";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getTypeName() {
|
||||
return "XML_Format";
|
||||
}
|
||||
}
|
13
test/Tests/data/xml/nested.xml
Normal file
13
test/Tests/data/xml/nested.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<?xml version = "1.0"?>
|
||||
<foo id="1">
|
||||
<bar id="2">
|
||||
</bar>
|
||||
<baz id="3">
|
||||
<bar id="4">
|
||||
<bar id="5">
|
||||
</bar>
|
||||
</bar>
|
||||
</baz>
|
||||
<bar id="6">
|
||||
</bar>
|
||||
</foo>
|
35
test/Tests/data/xml/sample.xml
Normal file
35
test/Tests/data/xml/sample.xml
Normal file
@ -0,0 +1,35 @@
|
||||
<?xml version = "1.0"?>
|
||||
<class>
|
||||
<teacher id="100">
|
||||
<firstname>Mary</firstname>
|
||||
<lastname>Smith</lastname>
|
||||
<bio>
|
||||
Blah blah
|
||||
</bio>
|
||||
</teacher>
|
||||
<teacher id="101">
|
||||
<firstname>Bob</firstname>
|
||||
<lastname>Jones</lastname>
|
||||
<bio>
|
||||
This that
|
||||
</bio>
|
||||
</teacher>
|
||||
<student studentId="1000" year="2">
|
||||
<firstname>Alice</firstname>
|
||||
<lastname>Wright</lastname>
|
||||
<gpa>4.01</gpa>
|
||||
</student>
|
||||
<student studentId="1001" year="3">
|
||||
<firstname>Jessi</firstname>
|
||||
<lastname>Cooper</lastname>
|
||||
<gpa>3.99</gpa>
|
||||
</student>
|
||||
<student studentId="1002" year="1">
|
||||
Some
|
||||
<firstname>Randy</firstname>
|
||||
Extra
|
||||
<lastname>Brown</lastname>
|
||||
Text
|
||||
<gpa>3.99</gpa>
|
||||
</student>
|
||||
</class>
|
9
test/Tests/data/xml/small.xml
Normal file
9
test/Tests/data/xml/small.xml
Normal file
@ -0,0 +1,9 @@
|
||||
<class>
|
||||
<teacher id="100">
|
||||
<firstname>Mary</firstname>
|
||||
<lastname>Smith</lastname>
|
||||
<bio>
|
||||
Blah blah
|
||||
</bio>
|
||||
</teacher>
|
||||
</class>
|
228
test/Tests/src/Data/XML/XML_Spec.enso
Normal file
228
test/Tests/src/Data/XML/XML_Spec.enso
Normal file
@ -0,0 +1,228 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Common.Syntax_Error
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
|
||||
from Standard.Test import Test, Test_Suite
|
||||
import Standard.Test.Extensions
|
||||
|
||||
spec =
|
||||
test_file = enso_project.data / "xml" / "sample.xml"
|
||||
document = XML_Document.from_file test_file
|
||||
root = document . root_element
|
||||
|
||||
fix_windows_newlines s = s.replace '\r\n' '\n'
|
||||
|
||||
Test.group "Read XML" <|
|
||||
Test.specify "Can read from a file" <|
|
||||
root.name . should_equal "class"
|
||||
|
||||
Test.specify "Error if file does not exist" <|
|
||||
test_file = enso_project.data / "xml" / "sample.xmlnotexists"
|
||||
XML_Document.from_file test_file . should_fail_with File_Error
|
||||
|
||||
Test.specify "Can read from a stream" <|
|
||||
test_file.with_input_stream [File_Access.Read] input_stream->
|
||||
doc = XML_Document.from_stream input_stream
|
||||
doc.root_element.name . should_equal "class"
|
||||
|
||||
Test.specify "Can read from a string" <|
|
||||
xml_string = test_file.read_text
|
||||
doc = XML_Document.from_text xml_string
|
||||
doc.root_element.name . should_equal "class"
|
||||
|
||||
Test.specify "Can read from a short string" <|
|
||||
xml_string = "<class></class>"
|
||||
doc = XML_Document.from_text xml_string
|
||||
doc.root_element.name . should_equal "class"
|
||||
|
||||
Test.specify "Parse error from file" <|
|
||||
test_file = enso_project.data / "sample.txt"
|
||||
XML_Document.from_file test_file . catch . should_be_a XML_Error.Parse_Error
|
||||
|
||||
Test.specify "Parse error from string" <|
|
||||
xml_string = "<<<<</"
|
||||
XML_Document.from_text xml_string . catch . should_be_a XML_Error.Parse_Error
|
||||
|
||||
Test.group "at/get" <|
|
||||
Test.specify "Can get children by index" <|
|
||||
root.at 0 . name . should_equal "teacher"
|
||||
|
||||
root.at 0 . at 0 . name . should_equal "firstname"
|
||||
root.at 0 . at 1 . name . should_equal "lastname"
|
||||
root.at 0 . at 2 . name . should_equal "bio"
|
||||
root.at 0 . at 2 . at 0 . should_equal '\n Blah blah\n '
|
||||
|
||||
root.at 3 . at 0 . name . should_equal "firstname"
|
||||
root.at 3 . at 1 . name . should_equal "lastname"
|
||||
root.at 3 . at 2 . name . should_equal "gpa"
|
||||
root.at 3 . at 2 . at 0 . should_equal "3.99"
|
||||
|
||||
Test.specify "Can get text children by index" <|
|
||||
root.at 4 . at 0 . should_equal '\n Some\n '
|
||||
root.at 4 . at 2 . should_equal '\n Extra\n '
|
||||
root.at 4 . at 4 . should_equal '\n Text\n '
|
||||
|
||||
Test.specify "Can get element attributes" <|
|
||||
root.at 0 . at "@id" . should_equal "100"
|
||||
root.at 1 . at "@id" . should_equal "101"
|
||||
root.at 2 . at "@studentId" . should_equal "1000"
|
||||
root.at 3 . at "@studentId" . should_equal "1001"
|
||||
|
||||
root.at 0 . attribute "id" . should_equal "100"
|
||||
root.at 1 . attribute "id" . should_equal "101"
|
||||
root.at 2 . attribute "studentId" . should_equal "1000"
|
||||
root.at 3 . attribute "studentId" . should_equal "1001"
|
||||
|
||||
root.at 3 . attribute "does_not_exist" if_missing="if_missing" . should_equal "if_missing"
|
||||
|
||||
Test.specify "Can get element an attribute map" <|
|
||||
root.at 2 . attributes . should_equal (Map.from_vector [["studentId", "1000"], ["year", "2"]])
|
||||
root.at 3 . attributes . should_equal (Map.from_vector [["studentId", "1001"], ["year", "3"]])
|
||||
|
||||
Test.specify "Can get nodes via xpath" <|
|
||||
classes = root.at "/class"
|
||||
classes.length . should_equal 1
|
||||
classes.at 0 . name . should_equal "class"
|
||||
|
||||
teachers = root.at "/class/teacher"
|
||||
teachers.length . should_equal 2
|
||||
teachers.at 0 . at "@id" . should_equal "100"
|
||||
teachers.at 1 . at "@id" . should_equal "101"
|
||||
|
||||
students = root.at "/class/student"
|
||||
students.length . should_equal 3
|
||||
students.at 0 . at "@studentId" . should_equal "1000"
|
||||
students.at 1 . at "@studentId" . should_equal "1001"
|
||||
|
||||
root.at "/class/teacher[1]/firstname" . at 0 . text . should_equal "Mary"
|
||||
root.at "/class/teacher[2]/firstname" . at 0 . text . should_equal "Bob"
|
||||
root.at "/class/teacher[1]/firstname/text()" . should_equal ["Mary"]
|
||||
root.at "/class/teacher[2]/firstname/text()" . should_equal ["Bob"]
|
||||
root.at "/class/teacher/firstname/text()" . should_equal ["Mary", "Bob"]
|
||||
root.at "/class/teacher[1]/bio" . at 0 . text . should_equal '\n Blah blah\n '
|
||||
root.at "/class/teacher[2]/bio" . at 0 . text . should_equal '\n This that\n '
|
||||
root.get "/class/teacher[23]" . should_equal []
|
||||
|
||||
root.at "teacher[1]/firstname" . at 0 . text . should_equal "Mary"
|
||||
root.at "teacher[2]/firstname" . at 0 . text . should_equal "Bob"
|
||||
root.at "teacher[1]/bio" . at 0 . text . should_equal '\n Blah blah\n '
|
||||
root.at "teacher[2]/bio" . at 0 . text . should_equal '\n This that\n '
|
||||
|
||||
Test.specify "Can get children using .get" <|
|
||||
root.get 0 . get 0 . name . should_equal "firstname"
|
||||
root.get 0 . get "@id" . should_equal "100"
|
||||
root.get "/class/teacher[1]/firstname" . get 0 . text . should_equal "Mary"
|
||||
|
||||
root.get 0 . get 32 "if_missing" . should_equal "if_missing"
|
||||
root.get 0 . get "@not_there" "if_missing" . should_equal "if_missing"
|
||||
|
||||
Test.specify "Can handle a bad xpath" <|
|
||||
root.at "/qqq[[[[1" . at 0 . text . should_fail_with XML_Error
|
||||
|
||||
Test.group "tag name" <|
|
||||
Test.specify "Can get the tag name" <|
|
||||
root.name . should_equal "class"
|
||||
root.at 0 . name . should_equal "teacher"
|
||||
root.at 1 . at 1 . name . should_equal "lastname"
|
||||
|
||||
Test.group "children" <|
|
||||
Test.specify "Can get the list of children" <|
|
||||
children = root.children
|
||||
children.length . should_equal 5
|
||||
children.at 0 . at "@id" . should_equal "100"
|
||||
children.at 1 . at "@id" . should_equal "101"
|
||||
children.at 2 . at "@studentId" . should_equal "1000"
|
||||
children.at 3 . at "@studentId" . should_equal "1001"
|
||||
children.at 4 . at "@studentId" . should_equal "1002"
|
||||
|
||||
Test.specify "Can get the number of children" <|
|
||||
root.child_count . should_equal 5
|
||||
|
||||
Test.group "text contents" <|
|
||||
Test.specify "Can get child text contents" <|
|
||||
root.at 4 . at 1 . text . should_equal "Randy"
|
||||
root.at 4 . text . should_equal '\n Some\n Randy\n Extra\n Brown\n Text\n 3.99\n '
|
||||
|
||||
Test.group "inner / outer xml" <|
|
||||
Test.specify "Can get the inner xml" <|
|
||||
fix_windows_newlines (root.at "/class/teacher[1]" . at 0 . inner_xml) . should_equal '\n <firstname>Mary</firstname>\n <lastname>Smith</lastname>\n <bio>\n Blah blah\n </bio>\n '
|
||||
fix_windows_newlines (root.at "/class/teacher[1]/bio" . at 0 . inner_xml) . should_equal '\n Blah blah\n '
|
||||
fix_windows_newlines (root.at "/class/teacher[2]/bio" . at 0 . inner_xml) . should_equal '\n This that\n '
|
||||
fix_windows_newlines (root.at "/class/teacher[2]" . at 0 . inner_xml) . should_equal '\n <firstname>Bob</firstname>\n <lastname>Jones</lastname>\n <bio>\n This that\n </bio>\n '
|
||||
|
||||
Test.specify "Can get the outer xml" <|
|
||||
fix_windows_newlines (root.at "/class/teacher[1]/bio" . at 0 . outer_xml) . should_equal '<bio>\n Blah blah\n </bio>'
|
||||
fix_windows_newlines (root.at "/class/teacher[2]/bio" . at 0 . outer_xml) . should_equal '<bio>\n This that\n </bio>'
|
||||
|
||||
Test.group "get_elements_by_tag_name" <|
|
||||
Test.specify "Can get elements by tag name" <|
|
||||
teachers = root.get_elements_by_tag_name "teacher"
|
||||
students = root.get_elements_by_tag_name "student"
|
||||
gpas = root.get_elements_by_tag_name "gpa"
|
||||
|
||||
teachers.length . should_equal 2
|
||||
students.length . should_equal 3
|
||||
gpas.length . should_equal 3
|
||||
|
||||
teachers.at 0 . at "@id" . should_equal "100"
|
||||
teachers.at 1 . at "@id" . should_equal "101"
|
||||
students.at 0 . at "@studentId" . should_equal "1000"
|
||||
students.at 1 . at "@studentId" . should_equal "1001"
|
||||
students.at 2 . at "@studentId" . should_equal "1002"
|
||||
gpas.at 0 . text . should_equal "4.01"
|
||||
gpas.at 1 . text . should_equal "3.99"
|
||||
gpas.at 2 . text . should_equal "3.99"
|
||||
|
||||
Test.specify "Can get nested elements" <|
|
||||
test_file = enso_project.data / "xml" / "nested.xml"
|
||||
root = XML_Document.from_file test_file . root_element
|
||||
bars = root.get_elements_by_tag_name "bar"
|
||||
bars.length . should_equal 4
|
||||
bars.map (t-> t.at "@id") . should_equal ["2", "4", "5", "6"]
|
||||
|
||||
Test.specify "Can get elements by name with a wildcard" <|
|
||||
root.get_elements_by_tag_name "*" . length . should_equal 20
|
||||
|
||||
Test.group "Data.read / File_Format" <|
|
||||
Test.specify "Can read from a file" <|
|
||||
doc = Data.read test_file
|
||||
doc.root_element.name . should_equal "class"
|
||||
|
||||
Test.specify "Can read from an endpoint" <|
|
||||
doc = Data.fetch "https://enso-data-samples.s3.us-west-1.amazonaws.com/sample.xml"
|
||||
doc.root_element.name . should_equal "class"
|
||||
doc.root_element.at 1 . name . should_equal "teacher"
|
||||
|
||||
Test.group "display text" <|
|
||||
Test.specify "Can generate display text" <|
|
||||
document.to_display_text . should_equal 'XML_Document (XML_Element "class")'
|
||||
root.to_display_text . should_equal 'XML_Element "class"'
|
||||
|
||||
Test.group "JSON" <|
|
||||
Test.specify "Can convert to JS_Object" <|
|
||||
test_file = enso_project.data / "xml" / "small.xml"
|
||||
document = XML_Document.from_file test_file
|
||||
root = document . root_element
|
||||
expected = Json.parse '''
|
||||
{
|
||||
"type": "XML_Element",
|
||||
"tag": "class",
|
||||
"attributes": [],
|
||||
"children": [
|
||||
{
|
||||
"type": "XML_Element",
|
||||
"tag": "teacher",
|
||||
"attributes": [ [ "id", "100" ]
|
||||
],
|
||||
"children": [
|
||||
{ "type": "XML_Element", "tag": "firstname", "attributes": [], "children": [ "Mary" ] },
|
||||
{ "type": "XML_Element", "tag": "lastname", "attributes": [], "children": [ "Smith" ] },
|
||||
{ "type": "XML_Element", "tag": "bio", "attributes": [], "children": [ "\\n Blah blah\\n " ] }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
js = root.to_js_object
|
||||
js.should_equal expected
|
||||
|
||||
main = Test_Suite.run_main spec
|
@ -56,6 +56,7 @@ import project.Data.Text.Parse_Spec
|
||||
import project.Data.Text.Regex_Spec
|
||||
import project.Data.Text.Span_Spec
|
||||
import project.Data.Text.Utils_Spec
|
||||
import project.Data.XML.XML_Spec
|
||||
|
||||
import project.Data.Vector.Slicing_Helpers_Spec
|
||||
|
||||
@ -155,3 +156,4 @@ main = Test_Suite.run_main <|
|
||||
Warnings_Spec.spec
|
||||
System_Spec.spec
|
||||
Random_Spec.spec
|
||||
XML_Spec.spec
|
||||
|
@ -745,7 +745,7 @@ spec =
|
||||
|
||||
Test.specify "should list files in a directory" <|
|
||||
immediate = enso_project.data.list . map .to_text
|
||||
immediate.sort.should_equal (resolve ["books.json", "helloworld.txt", "sample-json.weird-extension", "sample-malformed.json", "sample.json", "sample.png", "sample.txt", "sample.xxx", "transient", "tree", "windows.log", "windows.txt"])
|
||||
immediate.sort.should_equal (resolve ["books.json", "helloworld.txt", "sample-json.weird-extension", "sample-malformed.json", "sample.json", "sample.png", "sample.txt", "sample.xxx", "transient", "tree", "windows.log", "windows.txt", 'xml'])
|
||||
|
||||
filtered1 = enso_project.data.list name_filter="s[a-cw]mple.{t?t,md}" . map .to_text
|
||||
filtered1.should_equal (resolve ["sample.txt"])
|
||||
|
Loading…
Reference in New Issue
Block a user