Reworking Excel support to allow for reading of big files (#8403)

- Closes #8111 by making sure that all Excel workbooks are read using a backing file (which should be more memory efficient).
- If the workbook is being opened from an input stream, that stream is materialized to a `Temporary_File`.
- Adds tests fetching Table formats from HTTP.
- Extends `simple-httpbin` with ability to serve files for our tests.
- Ensures that the `Infer` option on `Excel` format also works with streams, if content-type metadata is available (e.g. from HTTP headers).
- Implements a `Temporary_File` facility that can be used to create a temporary file that is deleted once all references to the `Temporary_File` instance are GCed.
This commit is contained in:
Radosław Waśko 2023-12-15 01:02:15 +01:00 committed by GitHub
parent 95f11abe2c
commit b5c995a7bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
47 changed files with 1917 additions and 544 deletions

View File

@ -21,6 +21,7 @@ resources/python
# The files in the `data` directory of our tests may have specific structure or
# even be malformed on purpose, so we do not want to run prettier on them.
test/**/data
tools/simple-httpbin/www-files
# GUI
**/scala-parser.js

View File

@ -594,6 +594,7 @@
- [Implemented truncate `Date_Time` for database backend (Postgres only).][8235]
- [Initial Enso Cloud APIs.][8006]
- [Errors thrown inside `map` are wrapped in `Map_Error`.][8307]
- [Support for loading big Excel files.][8403]
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -850,6 +851,7 @@
[8150]: https://github.com/enso-org/enso/pull/8150
[8235]: https://github.com/enso-org/enso/pull/8235
[8307]: https://github.com/enso-org/enso/pull/8307
[8403]: https://github.com/enso-org/enso/pull/8403
#### Enso Compiler

View File

@ -1330,13 +1330,16 @@ lazy val truffleDslSuppressWarnsSetting = Seq(
)
/** A setting to replace javac with Frgaal compiler, allowing to use latest Java features in the code
* and still compile down to JDK 11
* and still compile down to JDK 17
*/
lazy val frgaalJavaCompilerSetting = Seq(
lazy val frgaalJavaCompilerSetting =
customFrgaalJavaCompilerSettings(targetJavaVersion)
def customFrgaalJavaCompilerSettings(targetJdk: String) = Seq(
Compile / compile / compilers := FrgaalJavaCompiler.compilers(
(Compile / dependencyClasspath).value,
compilers.value,
targetJavaVersion
targetJdk
),
// This dependency is needed only so that developers don't download Frgaal manually.
// Sadly it cannot be placed under plugins either because meta dependencies are not easily
@ -2731,11 +2734,16 @@ val allStdBits: Parser[String] =
lazy val `simple-httpbin` = project
.in(file("tools") / "simple-httpbin")
.settings(
frgaalJavaCompilerSetting,
customFrgaalJavaCompilerSettings(targetJdk = "21"),
autoScalaLibrary := false,
Compile / javacOptions ++= Seq("-Xlint:all"),
Compile / run / mainClass := Some("org.enso.shttp.SimpleHTTPBin"),
assembly / mainClass := (Compile / run / mainClass).value,
libraryDependencies ++= Seq(
"org.apache.commons" % "commons-text" % commonsTextVersion
)
),
(Compile / run / fork) := true,
(Compile / run / connectInput) := true
)
.configs(Test)

View File

@ -4,8 +4,10 @@ import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Unimplemented.Unimplemented
import Standard.Base.System.File_Format.File_For_Read
import Standard.Base.System.File_Format.File_Format_Metadata
import Standard.Base.System.Input_Stream.Input_Stream
import Standard.Base.System.Output_Stream.Output_Stream
from Standard.Base.System.File import find_extension_from_name
import project.AWS_Credential.AWS_Credential
import project.Errors.S3_Error
@ -117,7 +119,9 @@ type S3_File
Auto_Detect -> if self.is_directory then format.read self on_problems else
response = S3.get_object self.bucket self.prefix self.credentials
response.decode Auto_Detect
_ -> self.with_input_stream [File_Access.Read] format.read_stream
_ ->
metadata = File_Format_Metadata.Value file_name=self.name
self.with_input_stream [File_Access.Read] (stream-> format.read_stream stream metadata)
## ALIAS load bytes, open bytes
ICON data_input
@ -187,11 +191,7 @@ type S3_File
Returns the extension of the file.
extension : Text
extension self = if self.is_directory then Error.throw (S3_Error.Error "Directories do not have extensions." self.uri) else
name = self.name
last_dot = name.locate "." mode=Matching_Mode.Last
if last_dot.is_nothing then "" else
extension = name.drop (Index_Sub_Range.First last_dot.start)
if extension == "." then "" else extension
find_extension_from_name self.name
## GROUP Standard.Base.Input
Lists files contained in the directory denoted by this file.

View File

@ -299,7 +299,7 @@ type Any
is_nothing self = False
## GROUP Logical
If `self` is Nothing then returns `function`.
If `self` is Nothing then returns `other`.
> Example
If the value "Hello" is nothing return "".
@ -309,6 +309,16 @@ type Any
if_nothing self ~other =
const self other
## If `self` is Nothing then returns Nothing, otherwise returns the result
of running the provided `action`.
> Example
Transform a value only if it is not nothing.
my_result.if_not_nothing <| my_result + 1
if_not_nothing : Any -> Any
if_not_nothing self ~action = action
## GROUP Errors
Executes the provided handler on an error, or returns the value unchanged.

View File

@ -17,6 +17,7 @@ import project.Network.HTTP.HTTP_Method.HTTP_Method
import project.Nothing.Nothing
import project.System.File.File_Access.File_Access
import project.System.File_Format.File_For_Read
import project.System.File_Format.File_Format_Metadata
import project.System.Input_Stream.Input_Stream
import project.System.Output_Stream.Output_Stream
from project.Data.Boolean import Boolean, False, True
@ -129,7 +130,9 @@ type Enso_File
real_format = Auto_Detect.get_reading_format self
if real_format == Nothing then Error.throw (File_Error.Unsupported_Type self) else
self.read real_format on_problems
_ -> self.with_input_stream [File_Access.Read] format.read_stream
_ ->
metadata = File_Format_Metadata.Value file_name=self.name
self.with_input_stream [File_Access.Read] (stream-> format.read_stream stream metadata)
## ALIAS load bytes, open bytes
ICON data_input

View File

@ -6,6 +6,7 @@ import project.Network.URI.URI
import project.Nothing.Nothing
import project.System.File.File
import project.System.File_Format.File_For_Read
import Standard.Base.System.File_Format.File_Format_Metadata
import project.System.Input_Stream.Input_Stream
from project.Data.Text.Extensions import all
@ -45,7 +46,8 @@ type XML_Format
XML_Document.from_file file
## PRIVATE
Implements the `Data.parse` for this `File_Format`
read_stream : Input_Stream -> Any
read_stream self stream:Input_Stream =
Implements decoding the format from a stream.
read_stream : Input_Stream -> File_Format_Metadata -> Any
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
_ = metadata
XML_Document.from_stream stream

View File

@ -8,6 +8,7 @@ import project.System.File.File
import project.System.File_Format.File_For_Read
import project.System.File_Format.File_Format
polyglot java import java.io.FileNotFoundException
polyglot java import java.io.IOException
polyglot java import java.nio.file.AccessDeniedException
polyglot java import java.nio.file.FileAlreadyExistsException
@ -33,7 +34,7 @@ type File_Error
Arguments:
- file: The file that couldn't be read.
- message: The message for the error.
IO_Error (file : File) (message : Text)
IO_Error (file : File | Nothing) (message : Text)
## Indicates that the given file's type is not supported.
Unsupported_Type (file : File_For_Read)
@ -51,7 +52,9 @@ type File_Error
to_display_text : Text
to_display_text self = case self of
File_Error.Not_Found file -> "The file at " + file.path + " does not exist."
File_Error.IO_Error file msg -> msg + " (" + file.path + ")."
File_Error.IO_Error file msg ->
suffix = if file.is_nothing then "" else " (" + file.path + ")."
msg + suffix
File_Error.Already_Exists file -> "The file at "+file.path+" already exists."
File_Error.Access_Denied file -> "Insufficient permissions to perform the desired operation on the file at "+file.path+"."
File_Error.Unsupported_Type file -> "The "+file.path+" has a type that is not supported."
@ -65,7 +68,7 @@ type File_Error
## PRIVATE
Utility method for running an action with Java exceptions mapping.
handle_java_exceptions file ~action =
handle_java_exceptions (file : File | Nothing) ~action =
Panic.catch IOException action caught_panic->
File_Error.wrap_io_exception file caught_panic.payload
@ -78,8 +81,14 @@ type File_Error
## PRIVATE
Converts a Java `IOException` into its Enso counterpart.
wrap_io_exception file io_exception = case io_exception of
_ : NoSuchFileException -> Error.throw (File_Error.Not_Found file)
_ : FileAlreadyExistsException -> Error.throw (File_Error.Already_Exists file)
_ : AccessDeniedException -> File_Error.access_denied file
_ -> Error.throw (File_Error.IO_Error file "An IO error has occurred: "+io_exception.to_text)
wrap_io_exception (file : File | Nothing) io_exception =
## If the file is not known, all we can do is throw a generic IO error.
This will only usually matter on stream operations, where there is no relevant file -
and so the exceptions like `NoSuchFileException` should not occur in such context.
But instead of risking a Type_Error, we just throw the more generic IO_Error.
if file.is_nothing then Error.throw (File_Error.IO_Error Nothing "An IO error has occurred: "+io_exception.to_text) else case io_exception of
_ : NoSuchFileException -> Error.throw (File_Error.Not_Found file)
_ : FileNotFoundException -> Error.throw (File_Error.Not_Found file)
_ : FileAlreadyExistsException -> Error.throw (File_Error.Already_Exists file)
_ : AccessDeniedException -> File_Error.access_denied file
_ -> Error.throw (File_Error.IO_Error file "An IO error has occurred: "+io_exception.to_text)

View File

@ -1,5 +1,9 @@
import project.Data.Text.Text
import project.Error.Error
import project.Nothing.Nothing
import project.Panic.Panic
polyglot java import java.lang.IllegalStateException
type Illegal_State
## PRIVATE
@ -19,3 +23,8 @@ type Illegal_State
Provides a human-readable representation of the encoding error.
to_display_text : Text
to_display_text self = "Illegal State: " + self.message
## PRIVATE
Capture a Java `IllegalStateException` and convert it to an Enso dataflow error - `Illegal_State.Error`.
handle_java_exception =
Panic.catch IllegalStateException handler=(cause-> Error.throw (Illegal_State.Error cause.payload.getMessage cause.payload))

View File

@ -98,7 +98,10 @@ type HTTP
if fetch_methods.contains req.method || Context.Output.is_enabled then action else
Error.throw (Forbidden_Operation.Error ("Method " + req.method.to_text + " requests are forbidden as the Output context is disabled."))
handle_request_error =
Panic.catch JException handler=(cause-> Error.throw (Request_Error.Error 'IllegalArgumentException' cause.payload.getMessage))
handler caught_panic =
exception = caught_panic.payload
Error.throw (Request_Error.Error (Meta.type_of exception . to_text) exception.getMessage)
Panic.catch JException handler=handler
Panic.recover Any <| handle_request_error <| check_output_context <|
headers = resolve_headers req

View File

@ -16,6 +16,7 @@ import project.Network.URI.URI
import project.Nothing.Nothing
import project.Runtime.Context
import project.Runtime.Managed_Resource.Managed_Resource
import project.System.File.Advanced.Temporary_File.Temporary_File
import project.System.File.Existing_File_Behavior.Existing_File_Behavior
import project.System.File.File
import project.System.File.File_Access.File_Access
@ -23,6 +24,7 @@ import project.System.File.Write_Extensions
import project.System.File_Format.Auto_Detect
import project.System.File_Format.Bytes
import project.System.File_Format.File_Format
import project.System.File_Format.File_Format_Metadata
import project.System.File_Format.Plain_Text_Format
import project.System.Input_Stream.Input_Stream
from project.Data.Boolean import Boolean, False, True
@ -58,23 +60,23 @@ type Response_Body
Raw_Stream (raw_stream:Input_Stream) (content_type:Text|Nothing) uri:URI
## PRIVATE
Byte_Array (bytes:Vector) (content_type:Text|Nothing) uri:URI
Materialized_Byte_Array (bytes:Vector) (content_type:Text|Nothing) uri:URI
## PRIVATE
Temporary_File (file_resource:Managed_Resource) (content_type:Text|Nothing) uri:URI
Materialized_Temporary_File (temporary_file:Temporary_File) (content_type:Text|Nothing) uri:URI
## PRIVATE
with_stream : (Input_Stream -> Any ! HTTP_Error) -> Any ! HTTP_Error
with_stream self action = case self of
Response_Body.Raw_Stream raw_stream _ _ ->
Managed_Resource.bracket raw_stream (_.close) action
Response_Body.Byte_Array bytes _ _ ->
Response_Body.Materialized_Byte_Array bytes _ _ ->
byte_stream = Input_Stream.new (ByteArrayInputStream.new bytes) (HTTP_Error.handle_java_exceptions self.uri)
Managed_Resource.bracket byte_stream (_.close) action
Response_Body.Temporary_File file_resource _ _ -> file_resource.with file->
Response_Body.Materialized_Temporary_File temporary_file _ _ -> temporary_file.with_file file->
opts = [File_Access.Read.to_java]
stream = HTTP_Error.handle_java_exceptions self.uri (file.input_stream_builtin opts)
file_stream = Input_Stream.new stream (HTTP_Error.handle_java_exceptions self.uri)
file_stream = Input_Stream.new stream (HTTP_Error.handle_java_exceptions self.uri) associated_file=temporary_file
Managed_Resource.bracket (file_stream) (_.close) action
## PRIVATE
@ -88,23 +90,19 @@ type Response_Body
body_stream.with_java_stream body_java_stream->
first_block = body_java_stream.readNBytes maximum_body_in_memory
case first_block.length < maximum_body_in_memory of
True -> Response_Body.Byte_Array (Vector.from_polyglot_array first_block) self.content_type self.uri
False ->
file = File.create_temporary_file self.uri.host
## Write contents to temporary file
Context.Output.with_enabled <|
True -> Response_Body.Materialized_Byte_Array (Vector.from_polyglot_array first_block) self.content_type self.uri
False -> Context.Output.with_enabled <|
## Write contents to a temporary file
temp_file = Temporary_File.new self.uri.host
r = temp_file.with_file file->
file.with_output_stream [File_Access.Write, File_Access.Create, File_Access.Truncate_Existing] output_stream->
output_stream.with_java_stream java_output_stream->
java_output_stream.write first_block
body_java_stream.transferTo java_output_stream
java_output_stream.flush
Nothing
output_stream.close
## Have a file with the correct set up
resource = Managed_Resource.register file delete_file
Response_Body.Temporary_File resource self.content_type self.uri
r.if_not_error <|
Response_Body.Materialized_Temporary_File temp_file self.content_type self.uri
_ -> self
## ALIAS parse
@ -128,7 +126,9 @@ type Response_Body
_ ->
type_obj = Meta.type_of format
if can_decode type_obj . not then Error.throw (Illegal_Argument.Error type_obj.to_text+" cannot be used to decode from a stream. It must be saved to a file first.") else
self.with_stream format.read_stream
metadata = File_Format_Metadata.Value content_type=self.content_type
self.with_stream stream->
format.read_stream stream metadata
## ALIAS bytes
GROUP Input

View File

@ -2,6 +2,7 @@ import project.Any.Any
import project.Data.Numbers.Integer
import project.Data.Text.Text
from project.Data.Boolean import Boolean, False, True
from project.Function import const
## The type that has only a singleton value. Nothing in Enso is used as an
universal value to indicate the lack of presence of a value.
@ -30,6 +31,16 @@ type Nothing
if_nothing : Any -> Any
if_nothing self ~function = function
## If `self` is Nothing then returns Nothing, otherwise returns the result
of running the provided `action`.
> Example
Transform a value only if it is not nothing.
my_result.if_not_nothing <| my_result + 1
if_not_nothing : Any -> Any
if_not_nothing self ~action = const Nothing action
## Get a value for the key of the object.
As `Nothing` has no keys, returns `if_missing`.

View File

@ -90,17 +90,28 @@ type File
## PRIVATE
Create a dry run temporary file which will be deleted when Enso exits.
For an absolute path the same temporary file is returned.
The same temporary file is returned for paths that point to the same
location (not accounting for symlinks).
If this file is a temporary file that was generated by
`create_dry_run_file` on another file, it is returned as-is.
Arguments:
- copy_original: If `True`, the created dry run file is 'synchronized'
with the original file - the file is copied to the temporary file, or
if the original file does not exist - it is ensured that the temporary
file also does not exist. If `False`, no actions are taken.
create_dry_run_file : Boolean -> File ! File_Error
create_dry_run_file self copy_original=False =
temp_path = DryRunFileManager.getTemporaryFile self.absolute.path
temp_path = DryRunFileManager.getTemporaryFile self.absolute.normalize.path
if temp_path.is_nothing then Error.throw (File_Error.IO_Error "Unable to create a temporary file.") else
temp = File.new temp_path
if self.exists && copy_original then
Context.Output.with_enabled <|
self.copy_to temp replace_existing=True
if copy_original then Context.Output.with_enabled <| Panic.rethrow <|
case self.exists of
True ->
self.copy_to temp replace_existing=True
False ->
temp.delete_if_exists
## Attach a warning to the file that it is a dry run
warning = Dry_Run_Operation.Warning "Only a dry run has occurred, with data written to a temporary file."
@ -803,3 +814,11 @@ get_child_widget file =
children = file.list
options = children.map c-> Option c.name c.name.pretty
Widget.Single_Choice values=options display=Display.Always
## PRIVATE
find_extension_from_name : Text -> Text
find_extension_from_name name =
last_dot = name.locate "." mode=Matching_Mode.Last
if last_dot.is_nothing then "" else
extension = name.drop (Index_Sub_Range.First last_dot.start)
if extension == "." then "" else extension

View File

@ -0,0 +1,106 @@
import project.Any.Any
import project.Data.Text.Text
import project.Errors.File_Error.File_Error
import project.Errors.Illegal_State.Illegal_State
import project.Nothing.Nothing
import project.Panic.Panic
import project.Runtime.Context
import project.Runtime.Managed_Resource.Managed_Resource
import project.Runtime.Ref.Ref
import project.System.File.File
import project.System.Input_Stream.Input_Stream
## PRIVATE
ADVANCED
A reference to a temporary file that will be deleted once all references to
it are gone. This is an advanced helper facility for library developers, not
aimed to be used by regular users. Most users should just use
`File.create_temporary_file` instead.
This is helpful when implementing internal temporary files needed for some
operation, whose lifetime may have to be bound with some object (e.g. Excel
Workbook instance).
The advantage of using `Temporary_File` over `File.create_temporary_file` is
that the file is likely to get cleaned up earlier than JVM exit. If helper
files are only cleaned at JVM exit, we run the risk of exhausting disk
storage if the operation is ran numerous times. With this approach, the risk
should be much lower, because the files are cleaned up sooner - as soon as
the first GC run after the file is no longer reachable.
It has to be used with extra care, as anywhere where the internal File
reference is leaked without ensuring the lifetime of the `Temporary_File`
instance, that file reference may easily become invalid (i.e. its file may
get deleted).
type Temporary_File
## PRIVATE
Instance (file_resource_reference : Ref (Nothing | Managed_Resource File))
## PRIVATE
with_file : (File -> Any) -> Any
with_file self action = self.access_resource.with action
## PRIVATE
Gets the raw file reference.
Note that the underlying file may be deleted at any time, once `self` is
unreachable - so this method has to be used with extra care.
unsafe_get : File
unsafe_get self = self.with_file (f->f)
## PRIVATE
Deletes the temporary file, invalidating the reference.
dispose : Nothing
dispose self =
self.access_resource.finalize
self.file_resource_reference.put Nothing
## PRIVATE
access_resource : Managed_Resource File
access_resource self = case self.file_resource_reference.get of
Nothing -> Panic.throw (Illegal_State.Error "Using the Temporary_File after it has been disposed is not allowed.")
resource -> resource
## PRIVATE
to_text : Text
to_text self = case self.file_resource_reference.get of
Nothing -> "Temporary_File (disposed)"
resource -> "Temporary_File (" + (resource.with .to_text) + ")"
## PRIVATE
Creates a new empty `Temporary_File`.
new : Text -> Text -> Temporary_File
new prefix="temp" suffix=".tmp" =
# The file will be deleted on JVM exit, but we will try to delete it sooner.
file = File.create_temporary_file prefix suffix
resource = Managed_Resource.register file cleanup_tmp_file
Temporary_File.Instance (Ref.new resource)
## PRIVATE
Materializes the provided `Input_Stream` into a `Temporary_File`.
It will work the same regardless of whether the output context is enabled.
from_stream : Input_Stream -> Temporary_File
from_stream stream = Context.Output.with_enabled <|
tmp_file = Temporary_File.new "enso-materialized-stream" ".tmp"
handler caught_panic =
tmp_file.dispose
Panic.throw caught_panic
Panic.catch Any handler=handler <|
tmp_file.with_file file->
Panic.rethrow <|
File_Error.handle_java_exceptions Nothing <|
stream.write_to_file file
tmp_file
## PRIVATE
Materializes the provided `Input_Stream` into a `Temporary_File`.
If the stream is already backed by a temporary or regular file, that file is returned.
from_stream_light : Input_Stream -> Temporary_File | File
from_stream_light stream =
case stream.associated_file of
tmp : Temporary_File -> tmp
file : File -> file
_ -> Temporary_File.from_stream stream
## PRIVATE
cleanup_tmp_file file =
file.delete_if_exists

View File

@ -77,6 +77,23 @@ type Existing_File_Behavior
action output_stream . catch Any dataflow_error->
Panic.throw (Internal_Write_Operation_Errored.Error dataflow_error)
## PRIVATE
A counterpart of `write` that will also handle the dry-run mode if the
Output context is disabled. It relies on a default logic for handling the
dry runs, which may not always be the right choice, depending on the file
format.
The `action` takes 2 arguments - the effective file that will be written to
(the original file or the dry run file) and the output stream to write to.
write_handling_dry_run : File -> (File -> Output_Stream -> Any) -> Any ! File_Error
write_handling_dry_run self file action =
if Context.Output.is_enabled then self.write file (action file) else
effective_behavior = self.get_effective_behavior file is_enabled=False
needs_original = effective_behavior == Existing_File_Behavior.Append
dry_run_file = file.create_dry_run_file copy_original=needs_original
Context.Output.with_enabled <|
effective_behavior.write dry_run_file (action dry_run_file)
## PRIVATE
write_file_backing_up_old_one : File -> (Output_Stream -> Nothing) -> Nothing ! File_Error
write_file_backing_up_old_one file action = recover_io_and_not_found <|
@ -100,7 +117,7 @@ write_file_backing_up_old_one file action = recover_io_and_not_found <|
handle_internal_dataflow = Panic.catch Internal_Write_Operation_Errored handler=handle_write_failure_dataflow
handle_internal_panic = Panic.catch Internal_Write_Operation_Panicked handler=handle_write_failure_panic
handle_file_already_exists <| handle_internal_dataflow <| handle_internal_panic <|
Panic.rethrow <|
result = Panic.rethrow <|
new_file.with_output_stream [File_Access.Write, File_Access.Create_New] output_stream->
result = Panic.catch Any (action output_stream) caught_panic->
Panic.throw (Internal_Write_Operation_Panicked.Panic caught_panic)
@ -118,6 +135,8 @@ write_file_backing_up_old_one file action = recover_io_and_not_found <|
Panic.catch File_Error handler=not_found_handler <|
Panic.rethrow <| file.move_to bak_file replace_existing=True
Panic.rethrow <| new_file.move_to file
result
## Here we manually check if the target file is writable. This is necessary,
because the `Backup` scenario would go around the original file not being
writable by moving it (which is permitted for read-only files too) and

View File

@ -1,5 +1,6 @@
import project.Any.Any
import project.Data.Json.Json
import project.Data.Numbers.Integer
import project.Data.Text.Encoding.Encoding
import project.Data.Text.Text
import project.Data.Vector.Vector
@ -155,9 +156,10 @@ type Plain_Text_Format
file.read_text self.encoding on_problems
## PRIVATE
Implements the `Data.parse` for this `File_Format`
read_stream : Input_Stream -> Any
read_stream self stream:Input_Stream =
Implements decoding the format from a stream.
read_stream : Input_Stream -> File_Format_Metadata -> Any
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
_ = metadata
Text.from_bytes (stream.read_all_bytes) self.encoding
## A file format for reading or writing files as a sequence of bytes.
@ -191,9 +193,10 @@ type Bytes
file.read_bytes
## PRIVATE
Implements the `Data.parse` for this `File_Format`
read_stream : Input_Stream -> Any
read_stream self stream:Input_Stream =
Implements decoding the format from a stream.
read_stream : Input_Stream -> File_Format_Metadata -> Any
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
_ = metadata
stream.read_all_bytes
## A file format for reading and writing files as JSON.
@ -232,9 +235,10 @@ type JSON_Format
Error.throw (File_Error.Corrupted_Format file error.to_display_text error)
## PRIVATE
Implements the `Data.parse` for this `File_Format`
read_stream : Input_Stream -> Any
read_stream self stream:Input_Stream =
Implements decoding the format from a stream.
read_stream : Input_Stream -> File_Format_Metadata -> Any
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
_ = metadata
Text.from_bytes (stream.read_all_bytes) Encoding.utf_8 . parse_json
## A setting to infer the default behaviour of some option.
@ -245,11 +249,25 @@ type Infer
type File_For_Read
## PRIVATE
Arguments:
- `path` - the path or the URI of the file.
- `name` - the name of the file.
- `extension` - the extension of the file.
- `read_first_bytes` - a function that reads the first bytes of the file.
Value path:Text name:Text extension:Text (read_first_bytes:Function=(_->Nothing))
- path: the path or the URI of the file.
- name: the name of the file.
- extension: the extension of the file.
- read_first_bytes: a function that reads the first bytes of the file.
- content_type: the content type of the file.
Value path:Text|Nothing name:Text|Nothing extension:Text|Nothing (read_first_bytes:(Integer -> Nothing | Vector Integer)=(_->Nothing))
## PRIVATE
File_For_Read.from (that:File) = File_For_Read.Value that.path that.name that.extension that.read_first_bytes
## PRIVATE
Metadata that may aid `read_stream`.
type File_Format_Metadata
## PRIVATE
Arguments:
- file_name: the name of the file.
- content_type: the content type of the file.
Value (file_name : Text | Nothing = Nothing) (content_type : Text | Nothing = Nothing)
## PRIVATE
no_information : File_Format_Metadata
no_information = File_Format_Metadata.Value

View File

@ -6,6 +6,9 @@ import project.Errors.Encoding_Error.Encoding_Error
import project.Errors.Problem_Behavior.Problem_Behavior
import project.Nothing.Nothing
import project.Runtime.Managed_Resource.Managed_Resource
import project.System.File.Advanced.Temporary_File.Temporary_File
import project.System.File.File
import project.System.File.File_Access.File_Access
polyglot java import java.io.InputStream as Java_Input_Stream
polyglot java import org.enso.base.encoding.ReportingStreamDecoder
@ -19,10 +22,10 @@ type Input_Stream
Given a Java InputStream, wraps as a Managed_Resource and returns a new
Input_Stream.
new : Java_Input_Stream -> Any -> Input_Stream
new java_stream error_handler =
new : Java_Input_Stream -> Any -> (Nothing | File | Temporary_File) -> Input_Stream
new java_stream error_handler associated_file=Nothing =
resource = Managed_Resource.register java_stream close_stream
Input_Stream.Value resource error_handler
Input_Stream.Value resource error_handler associated_file
## PRIVATE
An input stream, allowing for interactive reading of contents.
@ -31,7 +34,8 @@ type Input_Stream
- stream_resource: The internal resource that represents the underlying
stream.
- error_handler: An error handler for IOExceptions thrown when reading.
Value stream_resource error_handler
- associated_file: The file associated with this stream, if any.
Value stream_resource error_handler (associated_file:Nothing|File|Temporary_File)
## PRIVATE
ADVANCED
@ -100,6 +104,16 @@ type Input_Stream
problems = Vector.from_polyglot_array results.problems . map Encoding_Error.Error
on_problems.attach_problems_after results.result problems
## PRIVATE
Reads the contents of this stream into a given file.
write_to_file : File -> File
write_to_file self file =
result = self.with_java_stream java_input_stream->
file.with_output_stream [File_Access.Create, File_Access.Truncate_Existing, File_Access.Write] output_stream->
output_stream.with_java_stream java_output_stream->
java_input_stream.transferTo java_output_stream
result.if_not_error file
## PRIVATE
Utility method for closing primitive Java streams. Provided to avoid
accidental scope capture with `Managed_Resource` finalizers.

View File

@ -7,6 +7,7 @@ import Standard.Base.Data.Vector.No_Wrap
import Standard.Base.Errors.Common.Additional_Warnings
import Standard.Base.Errors.Common.Incomparable_Values
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
import Standard.Base.Errors.Common.No_Such_Method
import Standard.Base.Errors.Common.Out_Of_Memory
import Standard.Base.Errors.Common.Type_Error
import Standard.Base.Errors.File_Error.File_Error
@ -2492,16 +2493,13 @@ type Table
if base_format == Nothing then Error.throw (File_Error.Unsupported_Output_Type file Table) else
self.write file format=base_format on_existing_file match_columns on_problems
_ ->
methods = if format == JSON_Format then ["write_table"] else Meta.meta (Meta.type_of format) . methods
if methods.contains "write_table" . not then Error.throw (File_Error.Unsupported_Output_Type format Table) else
effective_existing_behaviour = on_existing_file.get_effective_behavior file
tgt_file = if Context.Output.is_enabled then file else
should_copy_file = on_existing_file==Existing_File_Behavior.Append
file.create_dry_run_file copy_original=should_copy_file
handle_no_write_method caught_panic =
is_write = caught_panic.payload.method_name == "write_table"
if is_write.not then Panic.throw caught_panic else
Error.throw (File_Error.Unsupported_Output_Type format Table)
Panic.catch No_Such_Method handler=handle_no_write_method <|
to_write = if Context.Output.is_enabled then self else self.take 1000
Context.Output.with_enabled <|
format.write_table tgt_file to_write effective_existing_behaviour match_columns on_problems
format.write_table file to_write on_existing_file match_columns on_problems
## Creates a text representation of the table using the CSV format.
to_csv : Text

View File

@ -1,6 +1,7 @@
from Standard.Base import all
import Standard.Base.Network.HTTP.Response.Response
import Standard.Base.System.File_Format.File_For_Read
import Standard.Base.System.File_Format.File_Format_Metadata
import Standard.Base.System.Input_Stream.Input_Stream
from Standard.Base.Widget_Helpers import make_delimiter_selector
@ -98,9 +99,10 @@ type Delimited_Format
Delimited_Reader.read_file self file on_problems
## PRIVATE
Implements the `Data.parse` for this `File_Format`
read_stream : Input_Stream -> Any
read_stream self stream:Input_Stream =
Implements decoding the format from a stream.
read_stream : Input_Stream -> File_Format_Metadata -> Any
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
_ = metadata
Delimited_Reader.read_stream self stream on_problems=Report_Warning
## PRIVATE
@ -108,8 +110,7 @@ type Delimited_Format
Implements the `Table.write` for this `File_Format`.
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> File
write_table self file table on_existing_file match_columns on_problems =
r = Delimited_Writer.write_file table self file on_existing_file match_columns on_problems
r.if_not_error file
Delimited_Writer.write_file table self file on_existing_file match_columns on_problems
## PRIVATE
Clone the instance with some properties overridden.

View File

@ -1,7 +1,9 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.System.File_Format.File_For_Read
import Standard.Base.System.File_Format.File_Format_Metadata
import Standard.Base.System.Input_Stream.Input_Stream
from Standard.Base.System.File import find_extension_from_name
import project.Data.Match_Columns.Match_Columns
import project.Data.Table.Table
@ -15,12 +17,9 @@ import project.Internal.Excel_Writer
should_treat_as_xls_format : (Boolean|Infer) -> File -> Boolean ! Illegal_Argument
should_treat_as_xls_format xls_format file =
if xls_format != Infer then xls_format else
case file.extension of
".xlsx" -> False
".xlsm" -> False
".xls" -> True
".xlt" -> True
_ -> Error.throw (Illegal_Argument.Error ("Unknown file extension for Excel file (" + file.extension + ")"))
inferred_xls_format = xls_format_from_file_extension file.extension
inferred_xls_format.if_nothing <|
Error.throw (Illegal_Argument.Error ("Unknown file extension for Excel file (" + file.extension + ")"))
## Read the file to a `Table` from an Excel file
type Excel_Format
@ -67,11 +66,9 @@ type Excel_Format
for_web : Text -> URI -> Excel_Format | Nothing
for_web content_type uri =
_ = [uri]
parts = content_type.split ";" . map .trim
case parts.first of
"application/vnd.ms-excel" -> Excel_Format.Excel xls_format=True
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" -> Excel_Format.Excel xls_format=False
_ -> Nothing
inferred_xls_format = xls_format_from_content_type content_type
inferred_xls_format.if_not_nothing <|
Excel_Format.Excel xls_format=inferred_xls_format
## PRIVATE
ADVANCED
@ -84,10 +81,13 @@ type Excel_Format
_ -> Excel_Reader.read_file file self.section self.headers on_problems format
## PRIVATE
Implements the `Data.parse` for this `File_Format`
read_stream : Input_Stream -> Any
read_stream self stream:Input_Stream =
xls_format = if self.xls_format == Infer then False else self.xls_format
Implements decoding the format from a stream.
read_stream : Input_Stream -> File_Format_Metadata -> Any
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
xls_format = if self.xls_format != Infer then self.xls_format else
inferred_xls_format = xls_format_from_metadata metadata
# If still unable to infer it, we default to False
inferred_xls_format.if_nothing False
bad_format _ =
message = case self.xls_format of
@ -124,10 +124,41 @@ type Excel_Format
write_table self file table on_existing_file match_columns on_problems =
format = should_treat_as_xls_format self.xls_format file
r = case self.section of
case self.section of
Excel_Section.Sheet_Names -> Error.throw (Illegal_Argument.Error "Sheet_Names cannot be used for `write`.")
Excel_Section.Range_Names -> Error.throw (Illegal_Argument.Error "Range_Names cannot be used for `write`.")
Excel_Section.Workbook ->
Excel_Writer.write_file file table on_existing_file (Excel_Section.Worksheet self.default_sheet) True match_columns on_problems format
_ -> Excel_Writer.write_file file table on_existing_file self.section self.headers match_columns on_problems format
r.if_not_error file
## PRIVATE
Infers the xls format from the Content-Type.
Returns Nothing if the content type is not a known Excel format.
xls_format_from_content_type : Text -> Boolean | Nothing
xls_format_from_content_type content_type =
parts = content_type.split ";" . map .trim
case parts.first of
"application/vnd.ms-excel" -> True
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" -> False
_ -> Nothing
## PRIVATE
Infers the xls format from the file extension.
Returns Nothing if the content type is not a known Excel format.
xls_format_from_file_extension : Text -> Boolean | Nothing
xls_format_from_file_extension extension =
case extension of
".xlsx" -> False
".xlsm" -> False
".xls" -> True
".xlt" -> True
_ -> Nothing
## PRIVATE
xls_format_from_metadata : File_Format_Metadata -> Boolean | Nothing
xls_format_from_metadata metadata =
from_content_type = metadata.content_type.if_not_nothing (xls_format_from_content_type metadata.content_type)
from_content_type.if_nothing <|
metadata.file_name.if_not_nothing <|
extension = find_extension_from_name metadata.file_name
xls_format_from_file_extension extension

View File

@ -1,8 +1,12 @@
from Standard.Base import all
import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Metadata.Display
import Standard.Base.Runtime.Managed_Resource.Managed_Resource
import Standard.Base.Runtime.Ref.Ref
import Standard.Base.System.Input_Stream.Input_Stream
import Standard.Base.System.File.Advanced.Temporary_File.Temporary_File
from Standard.Base.Data.Filter_Condition import sql_like_to_regex
from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Single_Choice
@ -13,8 +17,12 @@ import project.Excel.Excel_Section.Excel_Section
import project.Internal.Excel_Reader
import project.Internal.Java_Problems
polyglot java import java.io.File as Java_File
polyglot java import org.apache.poi.ss.usermodel.Workbook
polyglot java import org.enso.table.read.ExcelReader
polyglot java import org.enso.table.excel.ExcelConnectionPool
polyglot java import org.enso.table.excel.ExcelFileFormat
polyglot java import org.enso.table.excel.ReadOnlyExcelConnection
type Excel_Workbook
## PRIVATE
@ -25,12 +33,24 @@ type Excel_Workbook
- file: The file to load.
- xls_format: Whether to use the old XLS format (default is XLSX).
- headers: Whether to use the first row as headers (default is to infer).
new : File | Text -> Boolean -> Boolean | Infer -> Excel_Workbook
new : File | Text | Temporary_File -> Boolean -> Boolean | Infer -> Excel_Workbook
new file xls_format=False headers=Infer =
file_obj = File.new file . normalize
File_Error.handle_java_exceptions file_obj <| Excel_Reader.handle_bad_format file_obj <|
file_obj.with_input_stream [File_Access.Read] stream->
Excel_Workbook.from_stream stream xls_format headers file_obj
file_obj = case file of
tmp : Temporary_File -> tmp
other -> File.new other
file_for_errors = if file_obj.is_a Temporary_File then Nothing else file_obj
continuation raw_file =
format = if xls_format then ExcelFileFormat.XLS else ExcelFileFormat.XLSX
File_Error.handle_java_exceptions raw_file <| Excel_Reader.handle_bad_format file_for_errors <| Illegal_State.handle_java_exception <|
# The `java_file` depends on the liveness of the possible `Temporary_File` but that is ensured by storing the `file_obj` in the resulting workbook instance.
java_file = Java_File.new raw_file.absolute.normalize.path
excel_connection_resource = Managed_Resource.register (ExcelConnectionPool.INSTANCE.openReadOnlyConnection java_file format) close_connection
Excel_Workbook.Value (Ref.new excel_connection_resource) file_obj xls_format headers
case file_obj of
tmp : Temporary_File -> tmp.with_file continuation
f : File -> continuation f
## PRIVATE
ADVANCED
@ -43,13 +63,12 @@ type Excel_Workbook
- file: Optional file reference.
from_stream : Input_Stream -> Boolean -> Boolean | Infer -> File | Nothing -> Excel_Workbook
from_stream stream xls_format=False headers=Infer file=Nothing = Excel_Reader.handle_bad_format file <|
stream.with_java_stream java_stream->
workbook = ExcelReader.readWorkbook java_stream xls_format
Excel_Workbook.Value workbook file xls_format headers
temp_file = Temporary_File.from_stream_light stream
Excel_Workbook.new temp_file xls_format headers
## PRIVATE
Creates an Excel_Workbook connection.
Value workbook:Workbook (file:(File|Nothing)) xls_format:Boolean headers:(Boolean|Infer)
Value (excel_connection_resource_ref : Ref (Managed_Resource ReadOnlyExcelConnection)) (file:(File|Temporary_File|Nothing)) xls_format:Boolean headers:(Boolean|Infer)
## Returns the list of databases (or catalogs) for the connection.
databases : Nothing
@ -57,7 +76,11 @@ type Excel_Workbook
## Returns the name of the current database (or catalog).
database : Text
database self = if self.file.is_nothing then "" else self.file.path
database self = case self.file of
regular_file : File -> regular_file.path
## A Temporary_File is still visualized as no path, because the fact that the workbook is stored as a file is
just an implementation detail - it is coming form a stream so there is no logical file it is associated with.
_ -> ""
## Returns a new Connection with the specified database set as default.
@ -92,22 +115,24 @@ type Excel_Workbook
## GROUP Standard.Base.Metadata
Gets the number of sheets.
sheet_count : Integer
sheet_count self = self.workbook.getNumberOfSheets
sheet_count self = self.with_java_workbook .getNumberOfSheets
## GROUP Standard.Base.Metadata
Gets the names of all the sheets.
sheet_names : Vector Text
sheet_names self = Vector.from_polyglot_array (ExcelReader.readSheetNames self.workbook)
sheet_names self = self.with_java_workbook java_workbook->
Vector.from_polyglot_array (ExcelReader.readSheetNames java_workbook)
## GROUP Standard.Base.Metadata
Gets the number of named ranges.
named_ranges_count : Integer
named_ranges_count self = self.workbook.getNumberOfNames
named_ranges_count self = self.with_java_workbook .getNumberOfNames
## GROUP Standard.Base.Metadata
Gets the names of all the named ranges.
named_ranges : Vector Text
named_ranges self = Vector.from_polyglot_array (ExcelReader.readRangeNames self.workbook)
named_ranges self = self.with_java_workbook java_workbook->
Vector.from_polyglot_array (ExcelReader.readRangeNames java_workbook)
## Gets a list of the table types.
table_types : Vector Text
@ -167,9 +192,9 @@ type Excel_Workbook
read self query (limit : Integer | Nothing = Nothing) =
java_headers = Excel_Reader.make_java_headers self.headers
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
case query of
_ : Excel_Range -> ExcelReader.readRange self.workbook query.java_range java_headers 0 limit java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName self.workbook query java_headers 0 limit java_problem_aggregator
self.with_java_workbook java_workbook-> case query of
_ : Excel_Range -> ExcelReader.readRange java_workbook query.java_range java_headers 0 limit java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_workbook query java_headers 0 limit java_problem_aggregator
Table.Value java_table
## GROUP Standard.Base.Input
@ -194,13 +219,14 @@ type Excel_Workbook
names.at (sheet - 1)
_ -> Error.throw (Illegal_Argument.Error "Worksheet must be either Text or an Integer.")
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
ExcelReader.readRangeByName self.workbook sheet_name java_headers skip_rows row_limit java_problem_aggregator
self.with_java_workbook java_workbook->
ExcelReader.readRangeByName java_workbook sheet_name java_headers skip_rows row_limit java_problem_aggregator
Table.Value java_table
Excel_Section.Cell_Range address skip_rows row_limit ->
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
case address of
_ : Excel_Range -> ExcelReader.readRange self.workbook address.java_range java_headers skip_rows row_limit java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName self.workbook address java_headers skip_rows row_limit java_problem_aggregator
self.with_java_workbook java_workbook-> case address of
_ : Excel_Range -> ExcelReader.readRange java_workbook address.java_range java_headers skip_rows row_limit java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_workbook address java_headers skip_rows row_limit java_problem_aggregator
Table.Value java_table
## ALIAS get, worksheet
@ -213,3 +239,51 @@ type Excel_Workbook
sheet : Text | Integer -> Table
sheet self name:(Text | Integer) =
self.read_section (Excel_Section.Worksheet name 0 Nothing)
## ADVANCED
Closes the workbook, releasing any resources it holds.
This method may be used to release the underlying workbook file,
e.g. to be able to delete it.
After this method is called, this instance is not usable any more - any
operation on it will throw an `Illegal_State` error.
close : Nothing
close self =
case self.excel_connection_resource_ref.get of
Nothing -> Nothing
resource ->
resource.finalize
self.excel_connection_resource_ref.put Nothing
Nothing
## Returns a simple text description of the workbook.
to_text : Text
to_text self =
associated_regular_file = case self.file of
regular_file : File -> " (" + regular_file.name + ")"
_ -> ""
"Excel_Workbook"+associated_regular_file
## PRIVATE
Provides a JS object representation for use in visualizations.
to_js_object : JS_Object
to_js_object self =
headers = if self.headers == Infer then "Infer" else self.headers
additional_fields = case self.file of
regular_file : File -> [["file", regular_file.path]]
_ -> []
JS_Object.from_pairs <|
[["type", "Excel_Workbook"], ["headers", headers], ["xls_format", self.xls_format]] + additional_fields
## PRIVATE
with_java_workbook : (Workbook -> Any) -> Any ! Illegal_State
with_java_workbook self f = Illegal_State.handle_java_exception <|
case self.excel_connection_resource_ref.get of
Nothing ->
Error.throw (Illegal_State.Error "The workbook is being used after it was closed.")
resource -> resource.with connection->
connection.withWorkbook f
## PRIVATE
close_connection c = c.close

View File

@ -40,8 +40,9 @@ write_file table format file on_existing_file match_columns on_problems =
Existing_File_Behavior.Append ->
append_to_file table format file match_columns on_problems
_ ->
on_existing_file.write file stream->
write_to_stream table format stream on_problems related_file=file
on_existing_file.write_handling_dry_run file effective_file-> stream->
r = write_to_stream table format stream on_problems related_file=effective_file
r.if_not_error effective_file
## PRIVATE
Handles appending to an existing file, ensuring that the columns are matched
@ -85,8 +86,9 @@ append_to_file table format file match_columns on_problems =
False -> format.without_headers
needs_leading_newline =
metadata.has_any_content && metadata.ends_with_newline.not
Existing_File_Behavior.Append.write file stream->
write_to_stream reordered_table amended_format stream on_problems related_file=file separator_override=effective_line_separator needs_leading_newline=needs_leading_newline
Existing_File_Behavior.Append.write_handling_dry_run file effective_file-> stream->
r = write_to_stream reordered_table amended_format stream on_problems related_file=effective_file separator_override=effective_line_separator needs_leading_newline=needs_leading_newline
r.if_not_error effective_file
## PRIVATE
Returns a Text value representing the table in the delimited format.

View File

@ -9,9 +9,11 @@ import project.Excel.Excel_Section.Excel_Section
import project.Internal.Java_Problems
from project.Errors import Duplicate_Output_Column_Names, Empty_Sheet_Error, Invalid_Column_Names, Invalid_Location
polyglot java import java.io.File as Java_File
polyglot java import org.apache.poi.poifs.filesystem.NotOLE2FileException
polyglot java import org.apache.poi.UnsupportedFileFormatException
polyglot java import org.enso.table.error.InvalidLocationException
polyglot java import org.enso.table.excel.ExcelFileFormat
polyglot java import org.enso.table.excel.ExcelHeaders
polyglot java import org.enso.table.read.ExcelReader
@ -25,15 +27,16 @@ make_java_headers headers = case headers of
## PRIVATE
Handle and map the Java errors when reading an Excel file
handle_reader : File -> (Input_Stream -> (Table | Vector)) -> (Table | Vector)
handle_reader : File -> (Java_File -> (Table | Vector)) -> (Table | Vector)
handle_reader file reader =
bad_argument caught_panic = Error.throw (Invalid_Location.Error caught_panic.payload.getCause)
handle_bad_argument = Panic.catch InvalidLocationException handler=bad_argument
# TODO [RW] handle InvalidFormatException
File_Error.handle_java_exceptions file <| handle_bad_argument <|
handle_bad_format file <| Empty_Sheet_Error.handle_java_exception <|
file.with_input_stream [File_Access.Read] stream->
stream.with_java_stream reader
java_file = Java_File.new file.path
reader java_file
## PRIVATE
Reads an input Excel file according to the provided section.
@ -49,21 +52,22 @@ handle_reader file reader =
otherwise reads in Excel 2007+ format.
read_file : File -> Excel_Section -> (Boolean|Infer) -> Problem_Behavior -> Boolean -> (Table | Vector)
read_file file section headers on_problems xls_format=False =
reader stream = case section of
file_format = if xls_format then ExcelFileFormat.XLS else ExcelFileFormat.XLSX
reader java_file = case section of
Excel_Section.Workbook -> Error.throw (Illegal_Argument.Error "Cannot read an entire workbook.")
Excel_Section.Sheet_Names -> Vector.from_polyglot_array (ExcelReader.readSheetNames stream xls_format)
Excel_Section.Range_Names -> Vector.from_polyglot_array (ExcelReader.readRangeNames stream xls_format)
Excel_Section.Sheet_Names -> Vector.from_polyglot_array (ExcelReader.readSheetNames java_file file_format)
Excel_Section.Range_Names -> Vector.from_polyglot_array (ExcelReader.readRangeNames java_file file_format)
Excel_Section.Worksheet sheet skip_rows row_limit ->
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
java_table = case sheet of
_ : Integer -> ExcelReader.readSheetByIndex stream sheet (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator
_ : Text -> ExcelReader.readSheetByName stream sheet (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator
_ : Integer -> ExcelReader.readSheetByIndex java_file sheet (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
_ : Text -> ExcelReader.readSheetByName java_file sheet (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
Table.Value java_table
Excel_Section.Cell_Range address skip_rows row_limit ->
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
java_table = case address of
_ : Excel_Range -> ExcelReader.readRange stream address.java_range (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName stream address (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator
_ : Excel_Range -> ExcelReader.readRange java_file address.java_range (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
_ : Text -> ExcelReader.readRangeByName java_file address (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
Table.Value java_table
handle_reader file reader

View File

@ -1,6 +1,9 @@
from Standard.Base import all
import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Runtime.Context
import Standard.Base.Runtime.Managed_Resource.Managed_Resource
import project.Data.Match_Columns.Match_Columns
import project.Data.Table.Table
@ -9,23 +12,27 @@ import project.Excel.Excel_Section.Excel_Section
import project.Internal.Excel_Reader
from project.Errors import Column_Count_Mismatch, Column_Name_Mismatch, Existing_Data, Invalid_Location, Range_Exceeded
polyglot java import java.io.File as Java_File
polyglot java import java.lang.IllegalStateException
polyglot java import org.apache.poi.ss.usermodel.Workbook
polyglot java import org.enso.base.DryRunFileManager
polyglot java import org.enso.table.error.ExistingDataException
polyglot java import org.enso.table.error.InvalidLocationException
polyglot java import org.enso.table.error.RangeExceededException
polyglot java import org.enso.table.excel.ExcelConnectionPool
polyglot java import org.enso.table.excel.ExcelFileFormat
polyglot java import org.enso.table.read.ExcelReader
polyglot java import org.enso.table.write.ExcelWriter
polyglot java import org.enso.table.write.ExistingDataMode
polyglot java import org.enso.table.write.ExistingFileBehavior
## PRIVATE
make_java_existing_data_mode : Existing_File_Behavior -> Match_Columns -> ExistingDataMode
make_java_existing_data_mode on_existing_file match_columns = case on_existing_file of
Existing_File_Behavior.Error -> ExistingDataMode.ERROR
Existing_File_Behavior.Overwrite -> ExistingDataMode.REPLACE
Existing_File_Behavior.Backup -> ExistingDataMode.REPLACE
Existing_File_Behavior.Append -> case match_columns of
Match_Columns.By_Name -> ExistingDataMode.APPEND_BY_NAME
Match_Columns.By_Position -> ExistingDataMode.APPEND_BY_INDEX
make_java_existing_file_behavior : Existing_File_Behavior -> ExistingFileBehavior
make_java_existing_file_behavior on_existing_file = case on_existing_file of
Existing_File_Behavior.Error -> ExistingFileBehavior.ERROR
Existing_File_Behavior.Overwrite -> ExistingFileBehavior.OVERWRITE
Existing_File_Behavior.Backup -> ExistingFileBehavior.BACKUP
Existing_File_Behavior.Append -> ExistingFileBehavior.APPEND
## PRIVATE
Writes a Table to an Excel file.
@ -44,31 +51,81 @@ make_java_existing_data_mode on_existing_file match_columns = case on_existing_f
- xls_format: If `true`, the file is written in the legacy XLS format.
Otherwise, the file is written in the modern XLSX format.
write_file : File -> Table -> Existing_File_Behavior -> Excel_Section -> (Boolean|Infer) -> Match_Columns -> Problem_Behavior -> Boolean -> File
write_file file table on_existing_file section headers match_columns on_problems xls_format=False =
_ = [on_problems]
## If file does not exist or is empty then create a new workbook.
workbook = if file.exists.not || (file.size == 0) then ExcelWriter.createWorkbook xls_format else
Excel_Reader.handle_reader file stream->(ExcelReader.getWorkbook stream xls_format)
write_file (file : File) (table : Table) (on_existing_file : Existing_File_Behavior) (section : Excel_Section) (headers : Boolean|Infer) (match_columns:Match_Columns) (on_problems:Problem_Behavior) (xls_format:Boolean) =
_ = on_problems
if (on_existing_file == Existing_File_Behavior.Error) && file.exists then Error.throw (File_Error.Already_Exists file) else
java_file = file_as_java file
file_format = if xls_format then ExcelFileFormat.XLS else ExcelFileFormat.XLSX
ExcelWriter.setEnsoToTextCallbackIfUnset (.to_text)
modification_strategy = prepare_file_modification_strategy table section headers on_existing_file match_columns
is_dry_run = Context.Output.is_enabled.not
modification_strategy.if_not_error <| Panic.recover File_Error <| File_Error.handle_java_exceptions file <| handle_writer <|
possible_backup_file = if on_existing_file == Existing_File_Behavior.Backup then file_as_java (find_bak_file file) else Nothing
possible_dry_run_file = if is_dry_run.not then Nothing else
preexisting_dry_run_file = DryRunFileManager.preExistingTemporaryFile file.absolute.normalize.path
preexisting_dry_run_file.if_not_nothing <|
Java_File.new preexisting_dry_run_file
accompanying_files = [possible_backup_file, possible_dry_run_file].filter (!= Nothing) . filter (!= java_file)
ExcelConnectionPool.INSTANCE.lockForWriting java_file file_format accompanying_files write_helper-> Context.Output.with_enabled <|
temp_file = if is_dry_run then file.create_dry_run_file copy_original=False else
find_temp_file file
## We 'sync' the temp_file to reflect the original target file - if it exists we copy the contents, if the source
doesn't exist we also ensure that the temp file is not polluted with data from previous (dry-run) writes.
Panic.rethrow <|
if file.exists then (file.copy_to temp_file replace_existing=True) else (temp_file.delete_if_exists)
write_helper.writeWorkbook (file_as_java temp_file) modification_strategy
result_file = if is_dry_run then temp_file else
needs_backup = on_existing_file == Existing_File_Behavior.Backup
if needs_backup && file.exists then
backup_file = find_bak_file file
## We can move instead of copying because right in next line we will overwrite the original file
anyway. And move should be more efficient.
Panic.rethrow <| file.move_to backup_file replace_existing=True
Panic.rethrow <| temp_file.move_to file replace_existing=True
file
result_file
## PRIVATE
find_temp_file : File -> File
find_temp_file base_file =
parent = base_file.absolute.normalize.parent
name = base_file.name
go i =
temp_file = parent / (name + ".temp." + System.nano_time.to_text)
if temp_file.exists then go (i + 1) else temp_file
go 0
## PRIVATE
find_bak_file : File -> File
find_bak_file base_file =
parent = base_file.absolute.normalize.parent
parent / (base_file.name + ".bak")
## PRIVATE
file_as_java : File -> Java_File
file_as_java file =
Java_File.new file.absolute.normalize.path
## PRIVATE
prepare_file_modification_strategy : Table -> Excel_Section -> Boolean|Infer -> Existing_File_Behavior -> Match_Columns -> (Workbook -> Nothing)
prepare_file_modification_strategy table section headers on_existing_file match_columns =
existing_data_mode = make_java_existing_data_mode on_existing_file match_columns
java_headers = Excel_Reader.make_java_headers headers
ExcelWriter.setEnsoToTextCallbackIfUnset (.to_text)
result = handle_writer <| case section of
case section of
Excel_Section.Worksheet sheet skip_rows row_limit ->
ExcelWriter.writeTableToSheet workbook sheet existing_data_mode skip_rows table.java_table row_limit java_headers
Excel_Section.Cell_Range address skip_rows row_limit -> case address of
Excel_Range.Value java_range -> ExcelWriter.writeTableToRange workbook java_range existing_data_mode skip_rows table.java_table row_limit java_headers
_ : Text -> ExcelWriter.writeTableToRange workbook address existing_data_mode skip_rows table.java_table row_limit java_headers
_ : Excel_Section -> Error.throw (Illegal_Argument.Error "Only a Worksheet or Cell_Range is allowed in write_file")
if result.is_error then result else
write_stream stream = stream.with_java_stream java_stream->
workbook.write java_stream
case on_existing_file of
Existing_File_Behavior.Append ->
## Special handling - have successfully added the extra sheet/range so now overwrite file with backup.
Existing_File_Behavior.Backup.write file write_stream
_ -> on_existing_file.write file write_stream
workbook_to_modify->
ExcelWriter.writeTableToSheet workbook_to_modify sheet existing_data_mode skip_rows table.java_table row_limit java_headers
Excel_Section.Cell_Range address skip_rows row_limit ->
java_range = case address of
Excel_Range.Value java_range -> java_range
text : Text -> text
workbook_to_modify->
ExcelWriter.writeTableToRange workbook_to_modify java_range existing_data_mode skip_rows table.java_table row_limit java_headers
## PRIVATE
Handle and map the Java errors when writing an Excel file
@ -90,3 +147,13 @@ handle_writer ~writer =
Column_Count_Mismatch.handle_java_exception <| handle_bad_location <|
Illegal_Argument.handle_java_exception <| handle_range_exceeded <| handle_existing_data <|
writer
## PRIVATE
make_java_existing_data_mode : Existing_File_Behavior -> Match_Columns -> ExistingDataMode
make_java_existing_data_mode on_existing_file match_columns = case on_existing_file of
Existing_File_Behavior.Error -> ExistingDataMode.ERROR
Existing_File_Behavior.Overwrite -> ExistingDataMode.REPLACE
Existing_File_Behavior.Backup -> ExistingDataMode.REPLACE
Existing_File_Behavior.Append -> case match_columns of
Match_Columns.By_Name -> ExistingDataMode.APPEND_BY_NAME
Match_Columns.By_Position -> ExistingDataMode.APPEND_BY_INDEX

View File

@ -39,4 +39,12 @@ public class DryRunFileManager {
}
});
}
public static String preExistingTemporaryFile(String path) {
if (files.containsValue(path)) {
return path;
}
return files.get(path);
}
}

View File

@ -1,6 +1,6 @@
package org.enso.table.error;
public class InvalidLocationException extends Exception {
public class InvalidLocationException extends RuntimeException {
public InvalidLocationException(String errorMessage) {
super(errorMessage);
}

View File

@ -0,0 +1,295 @@
package org.enso.table.excel;
import org.apache.poi.UnsupportedFileFormatException;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.AccessMode;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.function.Function;
public class ExcelConnectionPool {
public static final ExcelConnectionPool INSTANCE = new ExcelConnectionPool();
private ExcelConnectionPool() {
}
public ReadOnlyExcelConnection openReadOnlyConnection(File file, ExcelFileFormat format) throws IOException {
synchronized (this) {
if (isCurrentlyWriting) {
throw new IllegalStateException("Cannot open a read-only Excel connection while an Excel file is being " +
"written to. This is a bug in the Table library.");
}
if (!file.exists()) {
throw new FileNotFoundException(file.toString());
}
String key = getKeyForFile(file);
ConnectionRecord existingRecord = records.get(key);
if (existingRecord != null) {
// Adapt the existing record
if (existingRecord.format != format) {
throw new ExcelFileFormatMismatchException("Requesting to open " + file + " as " + format + ", but it was " +
"already opened as " + existingRecord.format + ".");
}
existingRecord.refCount++;
return new ReadOnlyExcelConnection(this, key, existingRecord);
} else {
// Create the new record
ConnectionRecord record = new ConnectionRecord();
record.refCount = 1;
record.file = file;
record.format = format;
record.workbook = openWorkbook(file, format, false);
records.put(key, record);
return new ReadOnlyExcelConnection(this, key, record);
}
}
}
public static class WriteHelper {
private final ExcelFileFormat format;
public WriteHelper(ExcelFileFormat format) {
this.format = format;
}
public <R> R writeWorkbook(File file, Function<Workbook, R> writeAction) throws IOException {
boolean preExistingFile = file.exists() && Files.size(file.toPath()) > 0;
try (Workbook workbook = preExistingFile ? ExcelConnectionPool.openWorkbook(file, format, true) :
createEmptyWorkbook(format)) {
R result = writeAction.apply(workbook);
if (preExistingFile) {
// Save the file in place.
switch (workbook) {
case HSSFWorkbook wb -> {
wb.write();
}
case XSSFWorkbook wb -> {
try {
wb.write(null);
} catch (OpenXML4JRuntimeException e) {
// Ignore: Workaround for bug https://bz.apache.org/bugzilla/show_bug.cgi?id=59252
}
}
default -> throw new IllegalStateException("Unknown workbook type: " + workbook.getClass());
}
} else {
try (OutputStream fileOut = Files.newOutputStream(file.toPath())) {
try (BufferedOutputStream workbookOut = new BufferedOutputStream(fileOut)) {
workbook.write(workbookOut);
}
}
}
return result;
}
}
}
/**
* Executes a write action, ensuring that any other Excel connections are closed during the action, so that it can
* modify the file. Any existing connections are re-opened after the operation finishes (regardless of its success or
* error).
* <p>
* The action gets a {@link WriteHelper} object that can be used to open the workbook for reading or writing. The
* action must take care to close that workbook before returning.
* <p>
* Additional files that should be closed during the write action can be specified in the {@code accompanyingFiles}
* argument. These may be related temporary files that are written during the write operation and also need to get
* 'unlocked' for the time of write.
*/
public <R> R lockForWriting(File file, ExcelFileFormat format, File[] accompanyingFiles,
Function<WriteHelper, R> action) throws IOException {
synchronized (this) {
if (isCurrentlyWriting) {
throw new IllegalStateException("Another Excel write is in progress on the same thread. This is a bug in the " +
"Table library.");
}
isCurrentlyWriting = true;
try {
String key = getKeyForFile(file);
ArrayList<ConnectionRecord> recordsToReopen = new ArrayList<>(1 + accompanyingFiles.length);
try {
// Close the existing connection, if any - to avoid the write operation failing due to the file being locked.
ConnectionRecord existingRecord = records.get(key);
if (existingRecord != null) {
existingRecord.close();
recordsToReopen.add(existingRecord);
}
verifyIsWritable(file);
for (File accompanyingFile : accompanyingFiles) {
String accompanyingKey = getKeyForFile(accompanyingFile);
ConnectionRecord accompanyingRecord = records.get(accompanyingKey);
if (accompanyingRecord != null) {
accompanyingRecord.close();
recordsToReopen.add(accompanyingRecord);
}
verifyIsWritable(accompanyingFile);
}
WriteHelper helper = new WriteHelper(format);
return action.apply(helper);
} finally {
// Reopen the closed connections
for (ConnectionRecord record : recordsToReopen) {
record.reopen(false);
}
}
} finally {
isCurrentlyWriting = false;
}
}
}
private void verifyIsWritable(File file) throws IOException {
Path path = file.toPath();
if (!Files.exists(path)) {
// If the file does not exist, we assume that we can create it.
return;
}
path.getFileSystem().provider().checkAccess(path, AccessMode.WRITE, AccessMode.READ);
}
private String getKeyForFile(File file) throws IOException {
return file.getCanonicalPath();
}
void release(ReadOnlyExcelConnection excelConnection) throws IOException {
synchronized (this) {
excelConnection.record.refCount--;
if (excelConnection.record.refCount <= 0) {
excelConnection.record.close();
records.remove(excelConnection.key);
}
}
}
private final HashMap<String, ConnectionRecord> records = new HashMap<>();
private boolean isCurrentlyWriting = false;
static class ConnectionRecord {
private int refCount;
private File file;
private ExcelFileFormat format;
private Workbook workbook;
private IOException initializationException = null;
<T> T withWorkbook(Function<Workbook, T> action) throws IOException {
synchronized (this) {
return action.apply(accessCurrentWorkbook());
}
}
public void close() throws IOException {
synchronized (this) {
if (workbook != null) {
workbook.close();
}
workbook = null;
}
}
void reopen(boolean throwOnFailure) throws IOException {
synchronized (this) {
if (workbook != null) {
throw new IllegalStateException("The workbook is already open.");
}
try {
workbook = openWorkbook(file, format, false);
} catch (IOException e) {
initializationException = e;
if (throwOnFailure) {
throw e;
}
}
}
}
private Workbook accessCurrentWorkbook() throws IOException {
synchronized (this) {
if (workbook == null) {
if (initializationException != null) {
throw initializationException;
} else {
throw new IllegalStateException("The workbook is used after being closed.");
}
}
return workbook;
}
}
}
private static Workbook openWorkbook(File file, ExcelFileFormat format, boolean writeAccess) throws IOException {
return switch (format) {
case XLS -> {
boolean readOnly = !writeAccess;
POIFSFileSystem fs = new POIFSFileSystem(file, readOnly);
try {
// If the initialization succeeds, the POIFSFileSystem will be closed by the HSSFWorkbook::close.
yield new HSSFWorkbook(fs);
} catch (Exception e) {
fs.close();
throw e;
}
}
case XLSX -> {
try {
PackageAccess access = writeAccess ? PackageAccess.READ_WRITE : PackageAccess.READ;
OPCPackage pkg = OPCPackage.open(file, access);
try {
yield new XSSFWorkbook(pkg);
} catch (IOException e) {
pkg.close();
throw e;
}
} catch (InvalidFormatException e) {
throw new IOException("Invalid format encountered when opening the file " + file + " as " + format + ".", e);
}
}
};
}
private static Workbook createEmptyWorkbook(ExcelFileFormat format) {
return switch (format) {
case XLS -> new HSSFWorkbook();
case XLSX -> new XSSFWorkbook();
};
}
public static class ExcelFileFormatMismatchException extends UnsupportedFileFormatException {
public ExcelFileFormatMismatchException(String message) {
super(message);
}
}
}

View File

@ -0,0 +1,6 @@
package org.enso.table.excel;
public enum ExcelFileFormat {
XLS,
XLSX
}

View File

@ -0,0 +1,38 @@
package org.enso.table.excel;
import java.io.IOException;
import java.util.function.Function;
import org.apache.poi.ss.usermodel.Workbook;
public class ReadOnlyExcelConnection implements AutoCloseable {
private final ExcelConnectionPool myPool;
final String key;
ExcelConnectionPool.ConnectionRecord record;
ReadOnlyExcelConnection(
ExcelConnectionPool myPool, String key, ExcelConnectionPool.ConnectionRecord record) {
this.myPool = myPool;
this.key = key;
this.record = record;
}
@Override
public synchronized void close() throws IOException {
if (record == null) {
// already closed
return;
}
myPool.release(this);
record = null;
}
public synchronized <T> T withWorkbook(Function<Workbook, T> f) throws IOException {
if (record == null) {
throw new IllegalStateException("ReadOnlyExcelConnection is being used after it was closed.");
}
return record.withWorkbook(f);
}
}

View File

@ -1,16 +1,16 @@
package org.enso.table.read;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Name;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.enso.table.data.column.builder.Builder;
import org.enso.table.data.column.builder.InferredBuilder;
import org.enso.table.data.column.storage.ObjectStorage;
@ -18,38 +18,29 @@ import org.enso.table.data.table.Column;
import org.enso.table.data.table.Table;
import org.enso.table.error.EmptySheetException;
import org.enso.table.error.InvalidLocationException;
import org.enso.table.excel.ExcelConnectionPool;
import org.enso.table.excel.ExcelFileFormat;
import org.enso.table.excel.ExcelHeaders;
import org.enso.table.excel.ExcelRange;
import org.enso.table.excel.ExcelRow;
import org.enso.table.excel.ExcelSheet;
import org.enso.table.excel.ReadOnlyExcelConnection;
import org.enso.table.problems.ProblemAggregator;
import org.graalvm.polyglot.Context;
/** A table reader for MS Excel files. */
public class ExcelReader {
/**
* Loads a workbook (either XLSX or XLS format from the specified input stream.
*
* @param stream an {@link InputStream} allowing to read the XLS(X) file contents.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Workbook} containing the specified data.
* @throws IOException - when the input stream cannot be read.
*/
public static Workbook readWorkbook(InputStream stream, boolean xls_format) throws IOException {
return getWorkbook(stream, xls_format);
}
/**
* Reads a list of sheet names for the specified XLSX/XLS file into an array.
*
* @param stream an {@link InputStream} allowing to read the XLS(X) file contents.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @param file the {@link File} to load
* @param format specifies the file format
* @return a String[] containing the sheet names.
* @throws IOException when the input stream cannot be read.
* @throws IOException when the action fails
*/
public static String[] readSheetNames(InputStream stream, boolean xls_format) throws IOException {
Workbook workbook = getWorkbook(stream, xls_format);
return readSheetNames(workbook);
public static String[] readSheetNames(File file, ExcelFileFormat format)
throws IOException, InvalidFormatException {
return withWorkbook(file, format, ExcelReader::readSheetNames);
}
/**
@ -72,14 +63,14 @@ public class ExcelReader {
/**
* Reads a list of range names for the specified XLSX/XLS file into an array.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @param file the {@link File} to load
* @param format specifies the file format
* @return a String[] containing the range names.
* @throws IOException when the input stream cannot be read.
* @throws IOException when the action fails
*/
public static String[] readRangeNames(InputStream stream, boolean xls_format) throws IOException {
Workbook workbook = getWorkbook(stream, xls_format);
return readRangeNames(workbook);
public static String[] readRangeNames(File file, ExcelFileFormat format)
throws IOException, InvalidFormatException {
return withWorkbook(file, format, ExcelReader::readRangeNames);
}
/**
@ -96,106 +87,115 @@ public class ExcelReader {
/**
* Reads a sheet by name for the specified XLSX/XLS file into a table.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param file the {@link File} to load
* @param sheetName the name of the sheet to read.
* @param skip_rows skip rows from the top the sheet.
* @param row_limit maximum number of rows to read.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @param format specifies the file format
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
* @throws InvalidLocationException when the sheet name is not found.
*/
public static Table readSheetByName(
InputStream stream,
File file,
String sheetName,
ExcelHeaders.HeaderBehavior headers,
int skip_rows,
Integer row_limit,
boolean xls_format,
ExcelFileFormat format,
ProblemAggregator problemAggregator)
throws IOException, InvalidLocationException {
Workbook workbook = getWorkbook(stream, xls_format);
return withWorkbook(
file,
format,
workbook -> {
int sheetIndex = workbook.getSheetIndex(sheetName);
if (sheetIndex == -1) {
throw new InvalidLocationException("Unknown sheet '" + sheetName + "'.");
}
int sheetIndex = workbook.getSheetIndex(sheetName);
if (sheetIndex == -1) {
throw new InvalidLocationException("Unknown sheet '" + sheetName + "'.");
}
return readTable(
workbook,
sheetIndex,
null,
headers,
skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit,
problemAggregator);
return readTable(
workbook,
sheetIndex,
null,
headers,
skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit,
problemAggregator);
});
}
/**
* Reads a sheet by index for the specified XLSX/XLS file into a table.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param file the {@link File} to load
* @param index the 1-based index to the sheet.
* @param skip_rows skip rows from the top the sheet.
* @param row_limit maximum number of rows to read.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @param format specifies the file format
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
* @throws InvalidLocationException when the sheet index is not valid.
*/
public static Table readSheetByIndex(
InputStream stream,
File file,
int index,
ExcelHeaders.HeaderBehavior headers,
int skip_rows,
Integer row_limit,
boolean xls_format,
ExcelFileFormat format,
ProblemAggregator problemAggregator)
throws IOException, InvalidLocationException {
Workbook workbook = getWorkbook(stream, xls_format);
return withWorkbook(
file,
format,
workbook -> {
int sheetCount = workbook.getNumberOfSheets();
if (index < 1 || index > sheetCount) {
throw new InvalidLocationException(
"Sheet index is not in valid range (1 to " + sheetCount + " inclusive).");
}
int sheetCount = workbook.getNumberOfSheets();
if (index < 1 || index > sheetCount) {
throw new InvalidLocationException(
"Sheet index is not in valid range (1 to " + sheetCount + " inclusive).");
}
return readTable(
workbook,
index - 1,
null,
headers,
skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit,
problemAggregator);
return readTable(
workbook,
index - 1,
null,
headers,
skip_rows,
row_limit == null ? Integer.MAX_VALUE : row_limit,
problemAggregator);
});
}
/**
* Reads a range by sheet name, named range or address for the specified XLSX/XLS file into a
* table.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param file the {@link File} to load
* @param rangeNameOrAddress sheet name, range name or address to read.
* @param headers specifies whether the first row should be used as headers.
* @param skip_rows skip rows from the top of the range.
* @param row_limit maximum number of rows to read.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @param format specifies the file format
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
* @throws InvalidLocationException when the range name or address is not found.
*/
public static Table readRangeByName(
InputStream stream,
File file,
String rangeNameOrAddress,
ExcelHeaders.HeaderBehavior headers,
int skip_rows,
Integer row_limit,
boolean xls_format,
ExcelFileFormat format,
ProblemAggregator problemAggregator)
throws IOException, InvalidLocationException {
Workbook workbook = getWorkbook(stream, xls_format);
return readRangeByName(
workbook, rangeNameOrAddress, headers, skip_rows, row_limit, problemAggregator);
return withWorkbook(
file,
format,
workbook ->
readRangeByName(
workbook, rangeNameOrAddress, headers, skip_rows, row_limit, problemAggregator));
}
/**
@ -245,42 +245,36 @@ public class ExcelReader {
/**
* Reads a range for the specified XLSX/XLS file into a table.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param file the {@link File} to load
* @param excelRange the range to read.
* @param skip_rows skip rows from the top of the range.
* @param row_limit maximum number of rows to read.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @param format specifies the file format
* @return a {@link Table} containing the specified data.
* @throws IOException when the input stream cannot be read.
*/
public static Table readRange(
InputStream stream,
File file,
ExcelRange excelRange,
ExcelHeaders.HeaderBehavior headers,
int skip_rows,
Integer row_limit,
boolean xls_format,
ExcelFileFormat format,
ProblemAggregator problemAggregator)
throws IOException, InvalidLocationException {
return readRange(
getWorkbook(stream, xls_format),
excelRange,
headers,
skip_rows,
row_limit,
problemAggregator);
return withWorkbook(
file,
format,
workbook ->
readRange(workbook, excelRange, headers, skip_rows, row_limit, problemAggregator));
}
/**
* Load a workbook into memory from an InputStream.
*
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Workbook} containing the specified data.
* @throws IOException when the input stream cannot be read or an incorrect format occurs.
*/
public static Workbook getWorkbook(InputStream stream, boolean xls_format) throws IOException {
return xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream);
private static <T> T withWorkbook(File file, ExcelFileFormat format, Function<Workbook, T> action)
throws IOException {
try (ReadOnlyExcelConnection connection =
ExcelConnectionPool.INSTANCE.openReadOnlyConnection(file, format)) {
return connection.withWorkbook(action);
}
}
private static Table readRange(

View File

@ -145,15 +145,6 @@ public class ExcelWriter {
}
}
/**
* Creates an empty workbook.
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
* @return a {@link Workbook} containing the specified data.
*/
public static Workbook createWorkbook(boolean xls_format) {
return xls_format ? new HSSFWorkbook() : new XSSFWorkbook();
}
private static void appendRangeWithTable(Workbook workbook, ExcelRange range, ExistingDataMode existingDataMode, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers, ExcelSheet sheet, ExcelRange expanded)
throws RangeExceededException, ExistingDataException, ColumnNameMismatchException, ColumnCountMismatchException {
Table mappedTable = switch (existingDataMode) {

View File

@ -0,0 +1,8 @@
package org.enso.table.write;
public enum ExistingFileBehavior {
OVERWRITE,
BACKUP,
APPEND,
ERROR
}

View File

@ -954,113 +954,112 @@ spec setup =
op c 1 . should_fail_with Invalid_Value_Type
op a True . should_fail_with Invalid_Value_Type
Test.group prefix+"Column Operations - Text Replace" <|
do_replace column term new_text case_sensitivity=Case_Sensitivity.Default only_first=False expected =
case setup.is_database of
True ->
input_type = Meta.type_of term
params = Replace_Params.Value input_type case_sensitivity only_first
supported_replace_params = setup.test_selection.supported_replace_params
supported_replace_params . should_be_a Set
are_params_supported = supported_replace_params.contains params
case are_params_supported of
True -> column.replace term new_text case_sensitivity only_first . to_vector . should_equal expected
False -> column.replace term new_text case_sensitivity only_first . should_fail_with Unsupported_Database_Operation
False ->
result = column.replace term new_text case_sensitivity only_first
result.value_type . should_equal Value_Type.Char
result . to_vector . should_equal expected
do_replace column term new_text case_sensitivity=Case_Sensitivity.Default only_first=False expected =
case setup.is_database of
True ->
input_type = Meta.type_of term
params = Replace_Params.Value input_type case_sensitivity only_first
supported_replace_params = setup.test_selection.supported_replace_params
supported_replace_params . should_be_a Set
are_params_supported = supported_replace_params.contains params
case are_params_supported of
True -> column.replace term new_text case_sensitivity only_first . to_vector . should_equal expected
False -> column.replace term new_text case_sensitivity only_first . should_fail_with Unsupported_Database_Operation
False ->
result = column.replace term new_text case_sensitivity only_first
result.value_type . should_equal Value_Type.Char
result . to_vector . should_equal expected
Test.group prefix+"replace: literal text pattern and replacement" <|
col0 = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO']]] . at "x"
col1 = table_builder [["x", ['a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']]] . at "x"
Test.group prefix+"replace: literal text pattern and replacement" <|
col0 = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO']]] . at "x"
col1 = table_builder [["x", ['a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']]] . at "x"
Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=false"
do_replace col0 'hello' 'bye' expected=['bye Hello', 'bye bye', 'HELLO HELLO']
do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Default expected=['bye Hello', 'bye bye', 'HELLO HELLO']
do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Sensitive expected=['bye Hello', 'bye bye', 'HELLO HELLO']
do_replace col1 'a[bcd]' 'hey' expected=['hey A[bCd] hey', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=false"
do_replace col0 'hello' 'bye' expected=['bye Hello', 'bye bye', 'HELLO HELLO']
do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Default expected=['bye Hello', 'bye bye', 'HELLO HELLO']
do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Sensitive expected=['bye Hello', 'bye bye', 'HELLO HELLO']
do_replace col1 'a[bcd]' 'hey' expected=['hey A[bCd] hey', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=true"
do_replace col0 'hello' 'bye' only_first=True expected=['bye Hello', 'bye hello', 'HELLO HELLO']
do_replace col1 'a[bcd]' 'hey' only_first=True expected=['hey A[bCd] a[bcd]', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=true"
do_replace col0 'hello' 'bye' only_first=True expected=['bye Hello', 'bye hello', 'HELLO HELLO']
do_replace col1 'a[bcd]' 'hey' only_first=True expected=['hey A[bCd] a[bcd]', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=insensitive use_regex=false only_first=false"
do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Insensitive expected=['bye bye', 'bye bye', 'bye bye']
do_replace col1 'a[bcd]' 'hey' case_sensitivity=Case_Sensitivity.Insensitive expected=['hey hey hey', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=insensitive use_regex=false only_first=false"
do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Insensitive expected=['bye bye', 'bye bye', 'bye bye']
do_replace col1 'a[bcd]' 'hey' case_sensitivity=Case_Sensitivity.Insensitive expected=['hey hey hey', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=insensitive use_regex=false only_first=true"
do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['bye Hello', 'bye hello', 'bye HELLO']
do_replace col1 'a[bcd]' 'hey' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['hey A[bCd] a[bcd]', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=insensitive use_regex=false only_first=true"
do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['bye Hello', 'bye hello', 'bye HELLO']
do_replace col1 'a[bcd]' 'hey' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['hey A[bCd] a[bcd]', 'abac ad Ab aCAd']
Test.group prefix+"replace: literal regex pattern and replacement" <|
col1 = table_builder [["x", ['a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']]] . at "x"
Test.group prefix+"replace: literal regex pattern and replacement" <|
col1 = table_builder [["x", ['a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']]] . at "x"
Test.specify "case_sensitivity=sensitive/default use_regex=True only_first=false"
do_replace col1 'a[bcd]'.to_regex 'hey' expected=['a[bcd] A[bCd] a[bcd]', 'heyhey hey Ab aCAd']
Test.specify "case_sensitivity=sensitive/default use_regex=True only_first=false"
do_replace col1 'a[bcd]'.to_regex 'hey' expected=['a[bcd] A[bCd] a[bcd]', 'heyhey hey Ab aCAd']
Test.specify "case_sensitivity=sensitive/default use_regex=True only_first=true"
do_replace col1 'a[bcd]'.to_regex 'hey' only_first=True expected=['a[bcd] A[bCd] a[bcd]', 'heyac ad Ab aCAd']
Test.specify "case_sensitivity=sensitive/default use_regex=True only_first=true"
do_replace col1 'a[bcd]'.to_regex 'hey' only_first=True expected=['a[bcd] A[bCd] a[bcd]', 'heyac ad Ab aCAd']
Test.specify "case_sensitivity=insensitive use_regex=True only_first=false"
do_replace col1 'a[bcd]'.to_regex 'hey' case_sensitivity=Case_Sensitivity.Insensitive expected=['a[bcd] A[bCd] a[bcd]', 'heyhey hey hey heyhey']
Test.specify "case_sensitivity=insensitive use_regex=True only_first=false"
do_replace col1 'a[bcd]'.to_regex 'hey' case_sensitivity=Case_Sensitivity.Insensitive expected=['a[bcd] A[bCd] a[bcd]', 'heyhey hey hey heyhey']
Test.specify "case_sensitivity=insensitive use_regex=True only_first=true"
do_replace col1 'a[bcd]'.to_regex 'hey' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['a[bcd] A[bCd] a[bcd]', 'heyac ad Ab aCAd']
Test.specify "case_sensitivity=insensitive use_regex=True only_first=true"
do_replace col1 'a[bcd]'.to_regex 'hey' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['a[bcd] A[bCd] a[bcd]', 'heyac ad Ab aCAd']
Test.specify "can properly escape complex regexes" <|
regex = "^([^\(]+)|(?<foo>\w\d[a-z])+$"
col = table_builder [["x", [regex]]] . at "x"
do_replace col regex "asdf" ["asdf"]
Test.specify "can properly escape complex regexes" <|
regex = "^([^\(]+)|(?<foo>\w\d[a-z])+$"
col = table_builder [["x", [regex]]] . at "x"
do_replace col regex "asdf" ["asdf"]
Test.group prefix+"replace: pattern and replacement columns" <|
Test.group prefix+"replace: pattern and replacement columns" <|
table = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO', 'a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']], ["patterns", ['hello', 'hello', 'hello', 'a[bcd]', 'a[bcd]']], ["replacements", ['bye', 'bye', 'bye', 'hey', 'hey']]]
col = table.at "x"
patterns = table.at "patterns"
replacements = table.at "replacements"
Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=false"
do_replace col patterns replacements expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd']
do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Default expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd']
do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Sensitive expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=true"
do_replace col patterns replacements only_first=True expected=['bye Hello', 'bye hello', 'HELLO HELLO', 'hey A[bCd] a[bcd]', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=insensitive use_regex=false only_first=false"
do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Insensitive expected=['bye bye', 'bye bye', 'bye bye', 'hey hey hey', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=insensitive use_regex=false only_first=true"
do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['bye Hello', 'bye hello', 'bye HELLO', 'hey A[bCd] a[bcd]', 'abac ad Ab aCAd']
Test.group prefix+"replace: empty table and nothings" <|
Test.specify "should work on empty tables" <|
col = table_builder [["x", ['hello Hello']]] . filter "x" (Filter_Condition.Is_Nothing) . at "x"
do_replace col 'hello' 'bye' expected=[]
Test.specify "should work on Nothing text column" <|
col = table_builder [["x", ['hello Hello', Nothing]]] . filter "x" (Filter_Condition.Is_Nothing) . at "x"
do_replace col 'hello' 'bye' expected=[Nothing]
if setup.is_database then Test.group prefix+"replace: DB specific edge-cases" <|
col = table_builder [["A", ["Alpha", "Bravo", "Charlie", "Delta", "Echo", "Foxtrot"]]] . at 'A'
Test.specify "should not allow Case_Sensitivity.Insensitive with a non-default locale" <|
locale = Locale.new "en" "GB" "UTF-8"
col.replace 'asdf' 'zxcv' case_sensitivity=(Case_Sensitivity.Insensitive locale) . should_fail_with Illegal_Argument
Test.specify "column name" <|
table = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO', 'a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']], ["patterns", ['hello', 'hello', 'hello', 'a[bcd]', 'a[bcd]']], ["replacements", ['bye', 'bye', 'bye', 'hey', 'hey']]]
col = table.at "x"
patterns = table.at "patterns"
replacements = table.at "replacements"
Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=false"
do_replace col patterns replacements expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd']
do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Default expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd']
do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Sensitive expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=true"
do_replace col patterns replacements only_first=True expected=['bye Hello', 'bye hello', 'HELLO HELLO', 'hey A[bCd] a[bcd]', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=insensitive use_regex=false only_first=false"
do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Insensitive expected=['bye bye', 'bye bye', 'bye bye', 'hey hey hey', 'abac ad Ab aCAd']
Test.specify "case_sensitivity=insensitive use_regex=false only_first=true"
do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['bye Hello', 'bye hello', 'bye HELLO', 'hey A[bCd] a[bcd]', 'abac ad Ab aCAd']
Test.group prefix+"replace: empty table and nothings" <|
Test.specify "should work on empty tables" <|
col = table_builder [["x", ['hello Hello']]] . filter "x" (Filter_Condition.Is_Nothing) . at "x"
do_replace col 'hello' 'bye' expected=[]
Test.specify "should work on Nothing text column" <|
col = table_builder [["x", ['hello Hello', Nothing]]] . filter "x" (Filter_Condition.Is_Nothing) . at "x"
do_replace col 'hello' 'bye' expected=[Nothing]
if setup.is_database then
col = table_builder [["A", ["Alpha", "Bravo", "Charlie", "Delta", "Echo", "Foxtrot"]]] . at 'A'
Test.specify "should not allow Case_Sensitivity.Insensitive with a non-default locale" <|
locale = Locale.new "en" "GB" "UTF-8"
col.replace 'asdf' 'zxcv' case_sensitivity=(Case_Sensitivity.Insensitive locale) . should_fail_with Illegal_Argument
Test.specify "column name" <|
table = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO', 'a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']], ["patterns", ['hello', 'hello', 'hello', 'a[bcd]', 'a[bcd]']], ["replacements", ['bye', 'bye', 'bye', 'hey', 'hey']]]
col = table.at "x"
patterns = table.at "patterns"
replacements = table.at "replacements"
supported_replace_params = setup.test_selection.supported_replace_params
if supported_replace_params.contains (Replace_Params.Value Text Case_Sensitivity.Default False) then
col.replace 'hello' 'bye' . name . should_equal 'replace([x], \'hello\', \'bye\')'
if supported_replace_params.contains (Replace_Params.Value Regex Case_Sensitivity.Default False) then
col.replace 'a[bcd]'.to_regex 'hey' . name . should_equal 'replace([x], \'a[bcd]\', \'hey\')'
if supported_replace_params.contains (Replace_Params.Value Column Case_Sensitivity.Default False) then
col.replace patterns replacements . name . should_equal 'replace([x], [patterns], [replacements])'
supported_replace_params = setup.test_selection.supported_replace_params
if supported_replace_params.contains (Replace_Params.Value Text Case_Sensitivity.Default False) then
col.replace 'hello' 'bye' . name . should_equal 'replace([x], \'hello\', \'bye\')'
if supported_replace_params.contains (Replace_Params.Value Regex Case_Sensitivity.Default False) then
col.replace 'a[bcd]'.to_regex 'hey' . name . should_equal 'replace([x], \'a[bcd]\', \'hey\')'
if supported_replace_params.contains (Replace_Params.Value Column Case_Sensitivity.Default False) then
col.replace patterns replacements . name . should_equal 'replace([x], [patterns], [replacements])'
Test.group prefix+"Column Operations - Text Replace (in-memory only)" <|
if setup.is_database.not then

View File

@ -2,7 +2,10 @@ from Standard.Base import all
import Standard.Base.Errors.Common.Dry_Run_Operation
import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Runtime.Context
import Standard.Base.Runtime.Managed_Resource.Managed_Resource
import Standard.Base.Runtime.Ref.Ref
from Standard.Table import Table, Match_Columns, Excel, Excel_Range, Data_Formatter, Sheet_Names, Range_Names, Worksheet, Cell_Range, Delimited, Excel_Workbook
@ -15,6 +18,8 @@ import Standard.Examples
import project.Util
polyglot java import org.enso.table_test_helpers.RandomHelpers
spec_fmt header file read_method sheet_count=5 =
Test.group header <|
Test.specify "should read a workbook in" <|
@ -70,58 +75,67 @@ spec_fmt header file read_method sheet_count=5 =
spec_write suffix test_sheet_name =
Test.group ("Write " + suffix + " Files") <|
out = enso_project.data / ('out.' + suffix)
out_bak = enso_project.data / ('out.' + suffix + '.bak')
table = enso_project.data/'varied_column.csv' . read
clothes = enso_project.data/'clothes.csv' . read
sub_clothes = clothes.select_columns [0, 1]
counter = Ref.new 0
create_out =
i = counter.get + 1
counter.put i
f = enso_project.data / "transient" / ("out" + i.to_text + "." + suffix)
Panic.rethrow f.delete_if_exists
f
Test.specify 'should write a table to non-existent file as a new sheet with headers; and return the file object on success' <|
out.delete_if_exists
out = create_out
table.write out on_problems=Report_Error . should_succeed . should_equal out
written = out.read
written.sheet_count . should_equal 1
written.sheet_names . should_equal ['EnsoSheet']
written.read 'EnsoSheet' . should_equal table
out.delete_if_exists
written.close
out.delete_if_exists . should_succeed
Test.specify 'should write a table to non-existent file in append mode as a new sheet with headers' <|
out.delete_if_exists
out = create_out
table.write out on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read
written.sheet_count . should_equal 1
written.sheet_names . should_equal ['EnsoSheet']
written.read 'EnsoSheet' . should_equal table
out.delete_if_exists
written.close
out.delete_if_exists . should_succeed
Test.specify 'should write a table to existing file overriding EnsoSheet' <|
out.delete_if_exists
out = create_out
table.write out on_problems=Report_Error . should_succeed
table.write out on_problems=Report_Error . should_succeed
written = out.read
written.sheet_count . should_equal 1
written.sheet_names . should_equal ['EnsoSheet']
written.read 'EnsoSheet' . should_equal table
out.delete_if_exists
written_workbook = out.read
written_workbook.sheet_count . should_equal 1
written_workbook.sheet_names . should_equal ['EnsoSheet']
written_workbook.read 'EnsoSheet' . should_equal table
written_workbook.close
out.delete_if_exists . should_succeed
Test.specify 'should write a table to existing file in overwrite mode as a new sheet with headers' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
table.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another"))
written.should_equal table
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should write a table to existing file in overwrite mode as a new sheet without headers' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
table.write out (Excel (Worksheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "NoHeaders"))
written.should_equal (table.rename_columns ['A', 'B', 'C', 'D', 'E', 'F'])
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should create new sheets at the start if index is 0' <|
out.delete_if_exists
out = create_out
table.write out (Excel (Worksheet 0)) on_problems=Report_Error . should_succeed
clothes.write out (Excel (Worksheet 0)) on_problems=Report_Error . should_succeed
read_1 = out.read (Excel (Worksheet "Sheet1"))
@ -130,214 +144,318 @@ spec_write suffix test_sheet_name =
read_2 . should_equal clothes
read_3 = out.read (Excel (Sheet_Names))
read_3 . should_equal ["Sheet2", "Sheet1"]
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should write a table to specific single cell location of an existing sheet' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
table.write out (Excel (Cell_Range "Another!G1")) on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Another!G1"))
written.should_equal table
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should clear out an existing fixed range and replace' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
sub_clothes.write out (Excel (Cell_Range "Another!A1:D20")) on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Another!A1"))
written.should_equal sub_clothes
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should clear out an existing range and replace' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Another!A1"))
written.should_equal sub_clothes
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should result in Invalid_Location error if trying to write in a bad location' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
sub_clothes.write out (Excel (Cell_Range "DoesNotExist!A1")) . should_fail_with Invalid_Location
sub_clothes.write out (Excel (Cell_Range "DoesNotExist!A1:B2")) . should_fail_with Invalid_Location
sub_clothes.write out (Excel (Cell_Range "SillyRangeName")) . should_fail_with Invalid_Location
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should result in Range_Exceeded error if trying to write in too small a range' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
sub_clothes.write out (Excel (Cell_Range "Another!A1:B2")) . should_fail_with Range_Exceeded
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should result in Existing_Data error if in Error mode and trying to replace' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
sub_clothes.write out (Excel (Worksheet 1)) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
sub_clothes.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
sub_clothes.write out (Excel (Cell_Range "Sheet1!A9")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
out.last_modified_time.should_equal lmd
out.delete_if_exists
r1 = sub_clothes.write out (Excel (Worksheet 1)) on_existing_file=Existing_File_Behavior.Error
r1.should_fail_with File_Error
r1.catch.should_be_a File_Error.Already_Exists
Test.specify 'should not allow adding a new sheet if in Error mode and not clashing' <|
out.delete_if_exists
sub_clothes.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Error
sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Error
sub_clothes.write out (Excel (Cell_Range "Sheet1!A9")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Error
Test.with_clue "the original file should remain unmodified: " <|
out.last_modified_time.should_equal lmd
out.delete_if_exists . should_succeed
Test.specify 'should not allow adding a new sheet if in Error mode, even if sheet is not clashing' <|
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
result = sub_clothes.write out (Excel (Worksheet "Testing")) on_existing_file=Existing_File_Behavior.Error
result.should_fail_with File_Error
result.catch.should_be_a File_Error.Already_Exists
out.last_modified_time.should_equal lmd
out.delete_if_exists
Test.with_clue "the original file should remain unmodified: " <|
out.last_modified_time.should_equal lmd
out.delete_if_exists . should_succeed
Test.specify 'should write a table to non-existent file as a new sheet without headers' <|
out.delete_if_exists
out = create_out
table.write out (Excel (Worksheet "Sheet1") headers=False) on_problems=Report_Error . should_succeed
written = out.read
written.sheet_count . should_equal 1
written.sheet_names . should_equal ['Sheet1']
written.read 'Sheet1' . should_equal (table.rename_columns ['A', 'B', 'C', 'D', 'E', 'F'])
out.delete_if_exists
# We need to close the workbook to be able to delete it.
written.close
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a sheet by name' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a sheet by position' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a sheet by name out of order' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a single cell by name' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a single cell by position' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a single cell by name out of order' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a range by name' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['AA', ['d', 'e']], ['BB', [4, 5]], ['CC', [True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a', 'b', 'c', 'd', 'e']], ['BB', [1, 2, 3, 4, 5]], ['CC', [True, False, False, True, False]]]
extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a range by position' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a range by name not in top left' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (Excel (Cell_Range "Random!K9")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Random!K9")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a range by name after deduplication of names' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['AA 1',[True, False]], ['BB 1', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['AA 1',[True, False, False, True, False]]]
extra_another.write out (Excel (Cell_Range "Random!S3")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Random!S3")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a range by position not in top left' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (Excel (Cell_Range "Random!K9")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
written = out.read (Excel (Cell_Range "Random!K9")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to append to a range by name out of order' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]]
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
written.should_equal expected
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should be able to write to a new dry run file' <|
out.delete_if_exists
out = create_out
temp = Context.Output.with_disabled <|
result = table.write out on_problems=Report_Error . should_succeed
Problems.expect_only_warning Dry_Run_Operation result
result.exists.should_be_true
result.absolute.normalize.path . should_not_equal out.absolute.normalize.path
written = result.read
written.sheet_count . should_equal 1
written.sheet_names . should_equal ['EnsoSheet']
written.read 'EnsoSheet' . should_equal table
written.close
result
temp.delete_if_exists
Test.specify "should be able to write to a dry-run file, even if the dry-run workbook is open" <|
out = create_out
out.exists.should_be_false
temp = Context.Output.with_disabled <|
result = table.write out on_problems=Report_Error . should_succeed
Problems.expect_only_warning Dry_Run_Operation result
result.exists.should_be_true
result
temp.absolute.normalize.path . should_not_equal out.absolute.normalize.path
out.exists.should_be_false
opened_temp = temp.read
opened_temp.sheet_names . should_equal ['EnsoSheet']
temp2 = Context.Output.with_disabled <|
result = table.write out (Excel (Worksheet "Another")) on_problems=Report_Error . should_succeed
Problems.expect_only_warning Dry_Run_Operation result
result.exists.should_be_true
result
# The result should be written to the same dry-run file on second attempt.
temp2.absolute.normalize.path . should_equal temp.absolute.normalize.path
## The write operation replaces the dry run file, basing off of the _original_ out file
(which was empty in this example), so we still only get one sheet.
Different example is tested in the test below, if the subsequent file happens to the returned
dry-run object - then both updates are visible - see below.
opened_temp.sheet_names . should_equal ['Another']
opened_temp.close
temp.delete_if_exists
Test.specify "should be able to write to a dry-run file multiple times if the dry-run file object is threaded through" <|
out = create_out
temp1 = Context.Output.with_disabled <|
result = table.write out on_problems=Report_Error . should_succeed
Problems.expect_only_warning Dry_Run_Operation result
result.exists.should_be_true
result
temp1.absolute.normalize.path . should_not_equal out.absolute.normalize.path
opened_temp = temp1.read
opened_temp.sheet_names . should_equal ['EnsoSheet']
temp2 = Context.Output.with_disabled <|
result = table.write temp1 (Excel (Worksheet "Another")) on_problems=Report_Error . should_succeed
Problems.expect_only_warning Dry_Run_Operation result
result.exists.should_be_true
result
# The result should be written to the same file though.
temp2.absolute.normalize.path . should_equal temp1.absolute.normalize.path
# The write operation replaces the dry run file, basing off of the dry-run file itself - so both changes are visible.
opened_temp.sheet_names . should_equal ['EnsoSheet', 'Another']
opened_temp.close
temp1.delete_if_exists
Test.specify "should be able to create a backup, even if it is currently open" <|
out = create_out
bak = out.parent / (out.name+".bak")
t1 = Table.new [["X", [1]]]
t1.write out on_existing_file=Existing_File_Behavior.Backup on_problems=Report_Error . should_succeed
bak.exists.should_be_false
t2 = Table.new [["X", [2]]]
t2.write out on_existing_file=Existing_File_Behavior.Backup on_problems=Report_Error . should_succeed
bak.exists.should_be_true
opened_out = out.read
# We need to specify explicit format for the backup, because the extension is changed:
opened_backup = bak.read (Excel xls_format=(suffix=="xls"))
opened_out.read 'EnsoSheet' . should_equal t2
opened_backup.read 'EnsoSheet' . should_equal t1
t3 = Table.new [["X", [3]]]
t3.write out on_existing_file=Existing_File_Behavior.Backup on_problems=Report_Error . should_succeed
opened_out.read 'EnsoSheet' . should_equal t3
# The backup should actually have been updated
opened_backup.read 'EnsoSheet' . should_equal t2
opened_out.close
opened_backup.close
out.delete_if_exists . should_succeed
bak.delete_if_exists . should_succeed
Test.specify 'should be able to write to an existing empty file' <|
out.delete_if_exists
out = create_out
[].write_bytes out
out_bak.delete_if_exists
out_bak = out.parent / (out.name+".bak")
table.write out on_problems=Report_Error . should_succeed . should_equal out
written = out.read
@ -345,73 +463,75 @@ spec_write suffix test_sheet_name =
written.sheet_names . should_equal ['EnsoSheet']
written.read 'EnsoSheet' . should_equal table
out_bak.exists.should_be_true
out_bak.size.should_equal 0
Test.with_clue "should have created a backup file: " <|
out_bak.exists.should_be_true
out_bak.size.should_equal 0
out.delete_if_exists
out_bak.delete_if_exists
written.close
out.delete_if_exists . should_succeed
out_bak.delete_if_exists . should_succeed
Test.specify 'should fail to append to a sheet by name if missing columns' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']]]
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should fail to append to a sheet by name if extra columns' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]]
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch
out.last_modified_time.should_equal lmd
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should fail to append to a sheet by name if no headers' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]]
extra_another.write out (Excel (Worksheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument
extra_another.write out (Excel (Worksheet "Another") False) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument
extra_another.write out (Excel (Worksheet "Another") headers=False) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument
out.last_modified_time.should_equal lmd
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should fail to append to a sheet by position if too few columns' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']]]
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch
out.last_modified_time.should_equal lmd
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should fail to append to a sheet by position if too many columns' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]]
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch
out.last_modified_time.should_equal lmd
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should fail to append to a range by name if not large enough' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
extra_another.write out (Excel (Cell_Range "Another!A1:D5")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Range_Exceeded
out.last_modified_time.should_equal lmd
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify 'should fail to append to a range by name if it hits another table' <|
out.delete_if_exists
out = create_out
(enso_project.data / test_sheet_name) . copy_to out
lmd = out.last_modified_time
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
extra_another.write out (Excel (Cell_Range "Random!B3")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Existing_Data
out.last_modified_time.should_equal lmd
out.delete_if_exists
out.delete_if_exists . should_succeed
Test.specify "should fail if the target file is read-only" <|
f = enso_project.data / "transient" / "permission."+suffix
@ -419,19 +539,56 @@ spec_write suffix test_sheet_name =
f.delete_if_exists
initial_data = Table.new [["Y", [10, 20, 30]]]
initial_data.write f
Util.set_writable f False
initial_data.write f . should_succeed
Util.set_writable f False . should_succeed
t1 = Table.new [["X", [1, 2, 3]]]
[Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite, Existing_File_Behavior.Append].each behavior->
[Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite, Existing_File_Behavior.Append].each behavior-> Test.with_clue behavior.to_text+": " <|
f.exists . should_be_true
r1 = t1.write f (Excel (Worksheet "Another")) on_existing_file=behavior
r1.should_fail_with File_Error
r1.catch.should_be_a File_Error.Access_Denied
f.read . read "EnsoSheet" . should_equal initial_data
Test.with_clue "("+r1.catch.to_display_text+") " <|
r1.should_fail_with File_Error
r1.catch.should_be_a File_Error.Access_Denied
read_table = Managed_Resource.bracket (f.read) (.close) workbook->
workbook.read "EnsoSheet"
read_table.should_equal initial_data
Util.set_writable f True
f.delete
Test.specify "should allow to write to a workbook that is open, and reflect that changes when the sheet is read again" <|
out = create_out
table.write out on_problems=Report_Error . should_succeed
workbook = out.read (Excel headers=True)
workbook.sheet_names.should_equal ["EnsoSheet"]
workbook.to_text . should_equal "Excel_Workbook ("+out.name+")"
# We can have the workbook open multiple times in parallel too.
w2 = out.read (Excel headers=True)
t1 = workbook.read "EnsoSheet"
t1.should_equal table
[Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite].each behavior-> Test.with_clue behavior.to_text+": " <|
t2 = Table.new [["X", [behavior.to_text, "B", "C", behavior.to_text+"..."]]]
t2.write out on_existing_file=behavior . should_succeed
workbook.sheet_names.should_equal ["EnsoSheet"]
# If we read the table again, it has the new values in it:
t3 = workbook.read "EnsoSheet"
t3.should_equal t2
t4 = w2.read "EnsoSheet"
t4.should_equal t2
workbook.close
w2.close
out.delete_if_exists . should_succeed
Test.specify "should fail if the parent directory does not exist" <|
parent = enso_project.data / "transient" / "nonexistent"
parent.exists.should_be_false
@ -439,19 +596,34 @@ spec_write suffix test_sheet_name =
f = parent / "foo."+suffix
t1 = Table.new [["X", [1, 2, 3]]]
r1 = t1.write f (Excel (Worksheet "Another"))
r1.should_fail_with File_Error
r1.catch.should_be_a File_Error.Not_Found
Test.with_clue "("+r1.catch.to_display_text+") " <|
r1.should_fail_with File_Error
r1.catch.should_be_a File_Error.Not_Found
Test.specify "should allow to write and read-back Unicode characters" <|
encodings = enso_project.data / "transient" / "encodings."+suffix
encodings.delete_if_exists . should_succeed
t1 = Table.new [["A", ["A", "B", "😊", "D"]], ["B", [1, 2, 3, 4]]]
t1.write encodings (Excel (Worksheet "Another"))
t1.write encodings (Excel (Worksheet "Another")) . should_succeed
t2 = encodings.read (Excel (Worksheet "Another"))
t2.at "A" . to_vector . should_equal ["A", "B", "😊", "D"]
encodings.delete
out.delete_if_exists
out_bak.delete_if_exists
Test.specify "should be able to overwrite a pre-existing empty file" <|
empty = enso_project.data / "transient" / "empty."+suffix
[Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite, Existing_File_Behavior.Append].each behavior-> Test.with_clue behavior.to_text+": " <|
empty.delete_if_exists . should_succeed
"".write empty
empty.exists.should_be_true
empty.size.should_equal 0
t1 = Table.new [["A", [behavior.to_text, "B", "C", "D"]], ["B", [1, 2, 3, 4]]]
t1.write empty on_existing_file=behavior . should_succeed
empty.exists.should_be_true
t2 = empty.read (Excel (Worksheet "EnsoSheet"))
t2.should_equal t1
spec =
Test.group 'Excel Range' <|
@ -703,10 +875,24 @@ spec =
result.catch.should_be_a File_Error.Not_Found
Test.specify "should handle wrong xls_format gracefully" <|
xlsx_sheet_copy = enso_project.data / "transient" / "TestSheetCopy.xlsx"
xlsx_sheet.copy_to xlsx_sheet_copy
# At first, it fails with File_Error
r1 = xlsx_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=True)
r1.should_fail_with File_Error
r1.catch.should_be_a File_Error.Corrupted_Format
# If we now open it correctly
r1_2 = xlsx_sheet.read
r1_2.should_succeed
# And then wrong again
r1_3 = xlsx_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=True)
# It should still fail the same:
r1_3.should_fail_with File_Error
r1_3.catch.should_be_a File_Error.Corrupted_Format
r2 = xls_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=False)
r2.should_fail_with File_Error
r2.catch.should_be_a File_Error.Corrupted_Format
@ -735,6 +921,71 @@ spec =
bad_file.delete
Test.specify "will fail if an operation is performed on a closed workbook" <|
workbook = xlsx_sheet.read
workbook.sheet_count . should_equal 4
workbook.close . should_equal Nothing
workbook.sheet_count . should_fail_with Illegal_State
workbook.close . should_equal Nothing
workbook.read "Sheet1" . should_fail_with Illegal_State
ci_pending = if Environment.get "CI" != Nothing then "This test takes a lot of time so it is disabled on CI."
Test.specify "should be able to write and read a big XLSX file (>110MB)" pending=ci_pending <|
n = 10^6
IO.println "Generating big XLSX file "+Time_Of_Day.now.to_text
rng = RandomHelpers.new 123
v = Vector.new n _->
rng.makeRandomString 190
table = Table.new [["X", v]]
big_file = enso_project.data / "transient" / "big.xlsx"
big_file.delete_if_exists
table.write big_file on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
IO.println "Done "+Time_Of_Day.now.to_text
# Verify that the file is as big as we expected.
size = big_file.size / (1024*1024)
Test.with_clue "size="+size.to_text+"MB " <|
(size > 110).should_be_true
workbook = big_file.read
sheets = workbook.sheet_names
sheets.length . should_equal 1
read_table = workbook.read (sheets.at 0)
read_table.row_count . should_equal n+1
read_table.column_names.length . should_equal 1
read_table.at 0 . at 0 . should_equal "X"
read_table.at 0 . at 1 . should_equal (v.at 0)
Test.specify "should be able to write and read a big XLS file (>110MB)" pending=ci_pending <|
IO.println "Generating big XLS file "+Time_Of_Day.now.to_text
rng = RandomHelpers.new 123
# Here we instead create a 2D table, because XLS has a limit of 65536 rows and 16k columns.
rows = 65000
cols = 20
table = Table.new <| Vector.new cols i->
v = Vector.new rows _-> rng.makeRandomString 100
["col" + i.to_text, v]
big_file = enso_project.data / "transient" / "big.xls"
big_file.delete_if_exists
table.write big_file on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
IO.println "Done "+Time_Of_Day.now.to_text
# Verify that the file is as big as we expected.
size = big_file.size / (1024*1024)
Test.with_clue "size="+size.to_text+"MB " <|
(size > 110).should_be_true
workbook = big_file.read
sheets = workbook.sheet_names
sheets.length . should_equal 1
read_table = workbook.read (sheets.at 0)
read_table.row_count . should_equal rows+1
read_table.column_names.length . should_equal cols
spec_fmt 'XLSX reading' Examples.xlsx .read
spec_fmt 'XLS reading' Examples.xls .read
@ -784,6 +1035,8 @@ spec =
problems = [Duplicate_Output_Column_Names.Error ["DD"]]
Problems.test_problem_handling action problems tester
# Cleanup any leftovers from previous runs
enso_project.data/"transient" . list "out*" . each .delete
spec_write "xlsx" 'TestSheet.xlsx'
spec_write "xls" 'TestSheetOld.xls'

View File

@ -0,0 +1,60 @@
from Standard.Base import all
import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Runtime.Context
from Standard.Table import all
import Standard.Table.Errors.Invalid_JSON_Format
from Standard.Test import Test, Test_Suite
import Standard.Test.Extensions
import project.Util
main = Test_Suite.run_main spec
spec =
## To run this test locally:
$ sbt 'simple-httpbin/run localhost 8080'
$ export ENSO_HTTP_TEST_HTTPBIN_URL=http://localhost:8080/
base_url = Environment.get "ENSO_HTTP_TEST_HTTPBIN_URL"
base_url_with_slash = base_url.if_not_nothing <|
if base_url.ends_with "/" then base_url else base_url + "/"
pending_has_url = if base_url != Nothing then Nothing else
"The HTTP tests only run when the `ENSO_HTTP_TEST_HTTPBIN_URL` environment variable is set to URL of the httpbin server"
Test.group "fetching files using HTTP" pending=pending_has_url <|
Test.specify "fetching json" <|
r = Data.fetch base_url_with_slash+"testfiles/table.json"
expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]]
r.to Table . should_equal expected_table
Test.specify "fetching csv" <|
r = Data.fetch base_url_with_slash+"testfiles/table.csv"
expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]]
r.to Table . should_equal expected_table
Test.specify "fetching xls" <|
url = base_url_with_slash+"testfiles/table.xls"
r = Data.fetch url
expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]]
r.should_be_a Excel_Workbook
r.sheet_names . should_equal ["MyTestSheet"]
r.read "MyTestSheet" . should_equal expected_table
r2 = Data.fetch url try_auto_parse_response=False . decode (Excel (Excel_Section.Worksheet "MyTestSheet"))
r2.should_be_a Table
r2 . should_equal expected_table
Test.specify "fetching xlsx" <|
url = base_url_with_slash+"testfiles/table.xlsx"
r = Data.fetch url
expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]]
r.should_be_a Excel_Workbook
r.sheet_names . should_equal ["MyTestSheet"]
r.read "MyTestSheet" . should_equal expected_table
r2 = Data.fetch url try_auto_parse_response=False . decode (Excel (Excel_Section.Worksheet "MyTestSheet"))
r2.should_be_a Table
r2 . should_equal expected_table

View File

@ -10,109 +10,155 @@ import Standard.Test.Extensions
import project.Util
spec = Test.group 'Various File Format support on Table' <|
t1 = Table.new [["X", [1, 2, 3]]]
spec =
transient = enso_project.data / "transient"
simple_empty = enso_project.data/'simple_empty.csv' . read
Test.specify "should be able to be written as CSV, Excel" <|
f1 = transient / "test2.csv"
f2 = transient / "test3.xlsx"
[f1, f2].each f->
f.delete_if_exists
t1.write f . should_succeed
f.exists.should_be_true
f.delete
Test.specify "should be able to be written as JSON using Table.write" <|
f1 = transient / "test1.json"
f1.delete_if_exists
t1.write f1 . should_succeed
f1.exists.should_be_true
f1.delete
Test.specify 'should write JSON tables' <|
Test.group 'Various File Format support on Table' <|
t1 = Table.new [["X", [1, 2, 3]]]
simple_empty = enso_project.data/'simple_empty.csv' . read
out = transient / 'out.json'
out.delete_if_exists
simple_empty.write out . should_equal out
Table.from_objects (Json.parse out.read_text) ['a', 'b', 'c'] . should_equal simple_empty
out.delete_if_exists
Test.specify 'should append to JSON tables' <|
out = transient / 'out.json'
out.delete_if_exists
simple_empty.write out . should_equal out
simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_equal out
Table.from_objects (Json.parse out.read_text) ['a', 'b', 'c'] . row_count . should_equal 2*simple_empty.row_count
out.delete_if_exists
Test.specify "should be able to be written as CSV, Excel" <|
f1 = transient / "test2.csv"
f2 = transient / "test3.xlsx"
[f1, f2].each f->
f.delete_if_exists
t1.write f . should_succeed
f.exists.should_be_true
f.delete
Test.specify 'should fail to append to JSON non-arrays' <|
out = transient / 'out.json'
out.delete_if_exists
'1'.write out
simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format
out.delete_if_exists
Test.specify "should be able to be written as JSON using Table.write" <|
f1 = transient / "test1.json"
f1.delete_if_exists
t1.write f1 . should_succeed
f1.exists.should_be_true
f1.delete
'"Hello World"'.write out
simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format
out.delete_if_exists
Test.specify 'should write JSON tables' <|
simple_empty = enso_project.data/'simple_empty.csv' . read
out = transient / 'out.json'
out.delete_if_exists
simple_empty.write out . should_equal out
Table.from_objects (Json.parse out.read_text) ['a', 'b', 'c'] . should_equal simple_empty
out.delete_if_exists
'{}'.write out
simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format
out.delete_if_exists
Test.specify 'should append to JSON tables' <|
out = transient / 'out.json'
out.delete_if_exists
simple_empty.write out . should_equal out
simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_equal out
Table.from_objects (Json.parse out.read_text) ['a', 'b', 'c'] . row_count . should_equal 2*simple_empty.row_count
out.delete_if_exists
Test.specify "should fail gracefully when provided with an unsupported format" <|
f1 = (transient / "test4.unknown-format")
f1.delete_if_exists
r1 = t1.write f1
r1.should_fail_with File_Error
r1.catch.should_be_a File_Error.Unsupported_Output_Type
r1.catch.format . should_equal f1
r1.catch.to_display_text . should_equal "Values of type Table cannot be written as format test4.unknown-format."
Test.specify 'should fail to append to JSON non-arrays' <|
out = transient / 'out.json'
out.delete_if_exists
'1'.write out
simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format
out.delete_if_exists
f2 = (transient / "test5.txt")
f2.delete_if_exists
my_format = Plain_Text Encoding.ascii
r2 = t1.write f2 my_format
r2.should_fail_with File_Error
r2.catch.should_be_a File_Error.Unsupported_Output_Type
r2.catch.format . should_equal my_format
'"Hello World"'.write out
simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format
out.delete_if_exists
write_test extension =
f = transient / ("big." + extension)
f.delete_if_exists
f_bak = transient / ("big." + extension + ".bak")
f_bak.delete_if_exists
'{}'.write out
simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format
out.delete_if_exists
big_table = Table.new [["a", 1.up_to 2000 . to_vector]]
big_table.write f
Test.specify "should fail gracefully when provided with an unsupported format" <|
f1 = (transient / "test4.unknown-format")
f1.delete_if_exists
r1 = t1.write f1
r1.should_fail_with File_Error
r1.catch.should_be_a File_Error.Unsupported_Output_Type
r1.catch.format . should_equal f1
r1.catch.to_display_text . should_equal "Values of type Table cannot be written as format test4.unknown-format."
new_table = Table.new [["a", 2000.up_to 4000 . to_vector]]
r = Context.Output.with_disabled <|
s = new_table.write f
s.exists.should_be_true
f2 = (transient / "test5.txt")
f2.delete_if_exists
my_format = Plain_Text Encoding.ascii
r2 = t1.write f2 my_format
r2.should_fail_with File_Error
r2.catch.should_be_a File_Error.Unsupported_Output_Type
r2.catch.format . should_equal my_format
r_data = s.read
row_count = if r_data . is_a Table then r_data.row_count else r_data.length
row_count . should_equal 1000
s
write_tests extension = Test.group 'Writing to '+extension+' files' <|
count result =
if result . is_a Table then result.row_count else result.length
f_bak.exists.should_be_false
Test.specify "should write to a temporary "+extension+" file part of the data if context is disabled" <|
f = transient / ("big." + extension)
f.delete_if_exists
f_bak = transient / ("big." + extension + ".bak")
f_bak.delete_if_exists
f.exists.should_be_true
f_data = f.read
f_row_count = if f_data . is_a Table then f_data.row_count else f_data.length
f_row_count . should_equal 1999
big_table = Table.new [["a", 1.up_to 2000 . to_vector]]
big_table.write f
f.delete_if_exists
f_bak.delete_if_exists
r.delete_if_exists
new_table = Table.new [["a", 2000.up_to 4000 . to_vector]]
r = Context.Output.with_disabled <|
s = new_table.write f
s.exists.should_be_true
Test.specify "should write to a temporary CSV file part of the data if context disabled" <|
write_test "csv"
r_data = s.read
count r_data . should_equal 1000
s
Test.specify "should write to a temporary JSON file part of the data if context disabled" <|
write_test "json"
f_bak.exists.should_be_false
f.exists.should_be_true
f_data = f.read
count f_data . should_equal 1999
f.delete_if_exists
f_bak.delete_if_exists
r.delete_if_exists
Test.specify "should create a backup file if overwriting" <|
f = transient / ("test." + extension)
f.delete_if_exists
f_bak = transient / ("test." + extension + ".bak")
f_bak.delete_if_exists
t1 = Table.new [["a", 0.up_to 10 . to_vector]]
t1.write f . should_succeed . should_equal f
f.exists.should_be_true
f_bak.exists.should_be_false
t2 = Table.new [["a", 0.up_to 30 . to_vector]]
t2.write f . should_succeed . should_equal f
f.exists.should_be_true
f_bak.exists.should_be_true
count f.read . should_equal 30
# ensure correct format is used for reading the .bak file
format = Auto_Detect.get_reading_format f
count (f_bak.read format) . should_equal 10
f.delete_if_exists
f_bak.delete_if_exists
Test.specify "should support appending" <|
f = transient / ("test." + extension)
f.delete_if_exists
f_bak = transient / ("test." + extension + ".bak")
f_bak.delete_if_exists
t1 = Table.new [["a", 0.up_to 10 . to_vector]]
t1.write f on_existing_file=Existing_File_Behavior.Append . should_succeed . should_equal f
f.exists.should_be_true
f_bak.exists.should_be_false
t2 = Table.new [["a", 0.up_to 30 . to_vector]]
t2.write f on_existing_file=Existing_File_Behavior.Append . should_succeed . should_equal f
f.exists.should_be_true
f_bak.exists.should_be_false
count f.read . should_equal 40
f.delete_if_exists
write_tests "csv"
write_tests "json"
main = Test_Suite.run_main spec

View File

@ -6,6 +6,7 @@ import project.IO.Csv_Spec
import project.IO.Delimited_Read_Spec
import project.IO.Delimited_Write_Spec
import project.IO.Excel_Spec
import project.IO.Fetch_Spec
import project.IO.Formats_Spec
import project.IO.Json_Spec
@ -15,6 +16,7 @@ spec =
Delimited_Write_Spec.spec
Excel_Spec.spec
Formats_Spec.spec
Fetch_Spec.spec
Json_Spec.spec
main = Test_Suite.run_main spec

View File

@ -0,0 +1,20 @@
package org.enso.table_test_helpers;
import java.util.Random;
public class RandomHelpers {
private final Random rng;
public RandomHelpers(int seed) {
this.rng = new Random(seed);
}
public String makeRandomString(int length) {
StringBuilder sb = new StringBuilder();
int n = 'z' - 'A';
for (int i = 0; i < length; i++) {
sb.append((char) (rng.nextInt(n) + 'A'));
}
return sb.toString();
}
}

View File

@ -81,6 +81,7 @@ import project.System.Process_Spec
import project.System.Reporting_Stream_Decoder_Spec
import project.System.Reporting_Stream_Encoder_Spec
import project.System.System_Spec
import project.System.Temporary_File_Spec
import project.Random_Spec
@ -97,6 +98,7 @@ main = Test_Suite.run_main <|
Error_Spec.spec
Environment_Spec.spec
File_Spec.spec
Temporary_File_Spec.spec
File_Read_Spec.spec
Reporting_Stream_Decoder_Spec.spec
Reporting_Stream_Encoder_Spec.spec

View File

@ -173,6 +173,19 @@ spec =
f.copy_to g . should_fail_with Forbidden_Operation
g.exists.should_be_false
"A".write f on_existing_file=Existing_File_Behavior.Overwrite
"B".write g on_existing_file=Existing_File_Behavior.Overwrite
r = f.copy_to g
r.should_fail_with File_Error
r.catch.should_be_a File_Error.Already_Exists
f.read . should_equal "A"
g.read . should_equal "B"
f.copy_to g replace_existing=True . should_succeed
f.read . should_equal "A"
g.read . should_equal "A"
f.delete_if_exists
g.delete_if_exists
@ -193,6 +206,20 @@ spec =
f.exists.should_be_false
g.exists.should_be_true
"A".write f on_existing_file=Existing_File_Behavior.Overwrite
"B".write g on_existing_file=Existing_File_Behavior.Overwrite
r = f.move_to g
r.should_fail_with File_Error
r.catch.should_be_a File_Error.Already_Exists
f.exists.should_be_true
g.exists.should_be_true
g.read . should_equal "B"
f.move_to g replace_existing=True . should_succeed
f.exists.should_be_false
g.exists.should_be_true
g.read . should_equal "A"
f.delete_if_exists
g.delete_if_exists
@ -448,7 +475,7 @@ spec =
Context.Output.with_enabled <| r.delete_if_exists
Test.specify "should perform a dry run creating and appending text to a file if Context.Output is disabled" <|
Test.specify "if Context.Output is disabled, will always start from the file given - so the effects of previous dry run are not visible" <|
f = transient / "dry_append.txt"
f.delete_if_exists
@ -461,12 +488,38 @@ spec =
Problems.expect_only_warning Dry_Run_Operation s
s.exists.should_be_true
s.read_text.should_equal 'line 1!\nline 2!'
# We only see the second line, because the base file `f` was not updated and the second append still starts from an empty file.
s.read_text.should_equal '\nline 2!'
# But the dry run file for the same `f` target should be kept the same:
s.should_equal r
f.exists.should_be_false
Context.Output.with_enabled <| r.delete_if_exists
Context.Output.with_enabled r.delete_if_exists
Test.specify "if Context.Output is disabled, will append to the dry run file if the dry run file descriptor is passed as the write target" <|
f = transient / "dry_append.txt"
f.delete_if_exists
Context.Output.with_disabled <|
dry_run_file = "line 1!".write f on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error
Problems.expect_only_warning Dry_Run_Operation dry_run_file
dry_run_file.exists.should_be_true
# Contrary to example above, we write to the returned file, not just `f`.
s = '\nline 2!'.write dry_run_file on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error
Problems.expect_only_warning Dry_Run_Operation s
s.exists.should_be_true
# We see both updates, because we've thread through the resulting dry run file descriptor:
s.read_text.should_equal 'line 1!\nline 2!'
# The returned file is also the same:
s.should_equal dry_run_file
f.exists.should_be_false
Context.Output.with_enabled dry_run_file.delete_if_exists
Test.specify "should allow to overwrite files" <|
f = transient / "work.txt"

View File

@ -0,0 +1,124 @@
from Standard.Base import all
import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.System.File.Advanced.Temporary_File.Temporary_File
import Standard.Base.System.Input_Stream.Input_Stream
from Standard.Test import Test, Test_Suite
from Standard.Test.Execution_Context_Helpers import run_with_and_without_output
import Standard.Test.Extensions
polyglot java import java.io.File as Java_File
polyglot java import java.io.ByteArrayInputStream
polyglot java import java.io.FileInputStream
polyglot java import java.io.InputStream
main = Test_Suite.run_main spec
spec =
Test.group "Temporary_File facility" <|
Test.specify "should allow to create a new file and allow to dispose it manually" <|
tmp = Temporary_File.new
tmp.with_file f->
"test".write f
r = tmp.with_file f->
f.read Plain_Text
r.should_equal "test"
raw_file = tmp.unsafe_get
raw_file.exists . should_be_true
tmp.dispose
Test.expect_panic Illegal_State <| tmp.with_file (f->f.read Plain_Text)
raw_file.exists . should_be_false
Test.specify "should allow to create a new file and allow to dispose it once the reference is dropped" <|
f foo =
# The tmp file is limited to the scope of the function.
tmp = Temporary_File.new
tmp.with_file f->
"["+foo+"]" . write f
raw_file = tmp.unsafe_get
raw_file.exists . should_be_true
res = tmp.with_file f->
f.read Plain_Text
[res, raw_file]
result = f "foobar"
# At this point the `tmp` from within the function is ready for collection.
result.first.should_equal "[foobar]"
raw_file = result.second
repeat_gc count =
if raw_file.exists.not then Nothing else
if count <= 0 then Test.fail "The temporary file was not cleaned up after numerous GC retries. Perhaps this is a bug?" else
if count % 100 == 0 then
IO.println "Trying to GC the temporary file (still "+count.to_text+" to go), but the file ("+raw_file.to_text+") still exists... "
Runtime.gc
@Tail_Call repeat_gc count-1
repeat_gc 999
raw_file.exists . should_be_false
Test.specify "should allow to materialize an input stream, regardless of Output Context settings" <|
run_with_and_without_output <|
stream = make_stream "test payload 1"
tmp = Temporary_File.from_stream stream
tmp.with_file f->
f.read Plain_Text . should_equal "test payload 1"
Test.specify "will fail if materializing an already closed input stream" <|
stream = Input_Stream.new (InputStream.nullInputStream) (File_Error.handle_java_exceptions Nothing)
stream.close
Test.expect_panic File_Error <| Temporary_File.from_stream stream
Test.specify "should be able to be converted to text, and indicate if it was disposed" <|
tmp = Temporary_File.new "pref" ".suf"
tmp.to_text.should_contain "Temporary_File"
tmp.to_text.should_contain "pref"
tmp.to_text.should_contain ".suf"
tmp.dispose
tmp.to_text.should_contain "(disposed)"
tmp.to_text.should_contain "Temporary_File"
tmp.to_text . should_not_contain "pref"
tmp.to_text . should_not_contain "suf"
Test.specify "should allow to materialize an input stream that is already associated with a temporary file without copying it" <|
tmp = Temporary_File.new
tmp.with_file f->
"test payload 3" . write f
java_file = Java_File.new tmp.unsafe_get.absolute.path
stream = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=tmp
tmp2 = Temporary_File.from_stream_light stream
# The returned tmp file should be the same one as original.
tmp2.should_be_a Temporary_File
tmp2.unsafe_get.absolute.path . should_equal tmp.unsafe_get.absolute.path
# If the raw file is associated, the stream will return that File descriptor (not as temporary file, but regular one):
stream3 = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=tmp.unsafe_get
f3 = Temporary_File.from_stream_light stream3
f3.should_be_a File
f3.absolute.path . should_equal tmp.unsafe_get.absolute.path
# But if there's no association, a new temporary file gets created:
stream4 = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=Nothing
tmp4 = Temporary_File.from_stream_light stream4
tmp4.should_be_a Temporary_File
tmp4.unsafe_get.absolute.path . should_not_equal tmp.unsafe_get.absolute.path
# The base variant of from_stream also always copies:
stream5 = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=tmp
tmp5 = Temporary_File.from_stream stream5
tmp5.should_be_a Temporary_File
tmp5.unsafe_get.absolute.path . should_not_equal tmp.unsafe_get.absolute.path
make_stream text =
raw_stream = ByteArrayInputStream.new text.utf_8
Input_Stream.new raw_stream (File_Error.handle_java_exceptions Nothing)

View File

@ -2,9 +2,14 @@ package org.enso.shttp;
import com.sun.net.httpserver.HttpHandler;
import com.sun.net.httpserver.HttpServer;
import com.sun.net.httpserver.SimpleFileServer;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.stream.Stream;
import sun.misc.Signal;
import sun.misc.SignalHandler;
@ -31,7 +36,9 @@ public class SimpleHTTPBin {
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
server.stop(0);
System.out.println("Finalizing server...");
server.stop(3);
System.out.println("Server stopped.");
}
}
@ -59,17 +66,19 @@ public class SimpleHTTPBin {
server.addHandler(path, new TestHandler());
}
setupFileServer(server);
final SimpleHTTPBin server1 = server;
SignalHandler stopServerHandler =
(Signal sig) -> {
System.out.println("Stopping server...");
System.out.println("Stopping server... (interrupt)");
server1.stop();
};
for (String signalName : List.of("TERM", "INT")) {
Signal.handle(new Signal(signalName), stopServerHandler);
}
server.start();
} catch (IOException e) {
} catch (IOException | URISyntaxException e) {
e.printStackTrace();
} finally {
if (server != null) {
@ -93,4 +102,34 @@ public class SimpleHTTPBin {
return running;
}
}
private static void setupFileServer(SimpleHTTPBin server) throws URISyntaxException {
Path myRuntimeJar =
Path.of(SimpleHTTPBin.class.getProtectionDomain().getCodeSource().getLocation().toURI())
.toAbsolutePath();
Path projectRoot = findProjectRoot(myRuntimeJar);
Path testFilesRoot = projectRoot.resolve(pathToWWW);
System.out.println("Serving files from directory " + testFilesRoot);
server.addHandler("/testfiles", SimpleFileServer.createFileHandler(testFilesRoot));
}
private static Path findProjectRoot(Path startingPoint) {
if (looksLikeProjectRoot(startingPoint)) {
return startingPoint;
} else {
Path parent = startingPoint.getParent();
if (parent == null) {
throw new RuntimeException("Could not find project root");
}
return findProjectRoot(parent);
}
}
private static final String pathToWWW = "tools/simple-httpbin/www-files";
private static boolean looksLikeProjectRoot(Path path) {
return Stream.of("build.sbt", "tools", "project", pathToWWW)
.allMatch(p -> Files.exists(path.resolve(p)));
}
}

View File

@ -19,9 +19,26 @@ public class TestHandler implements HttpHandler {
private static final Set<String> ignoredHeaders = Set.of("Host");
private static final Pattern textEncodingRegex = Pattern.compile(".*; charset=([^;]+).*");
private final boolean logRequests = false;
@Override
public void handle(HttpExchange exchange) throws IOException {
try {
if (logRequests) {
System.out.println(
"Handling request: " + exchange.getRequestMethod() + " " + exchange.getRequestURI());
}
doHandle(exchange);
} catch (IOException e) {
e.printStackTrace();
throw e;
} catch (Exception e) {
e.printStackTrace();
}
}
public void doHandle(HttpExchange exchange) throws IOException {
boolean first = true;
String contentType = null;
String textEncoding = "UTF-8";

View File

@ -0,0 +1,3 @@
A,B
1,x
3,y
1 A B
2 1 x
3 3 y

View File

@ -0,0 +1,4 @@
[
{"A": 1, "B": "x"},
{"A": 3, "B": "y"}
]

Binary file not shown.

Binary file not shown.