mirror of
https://github.com/enso-org/enso.git
synced 2024-12-22 23:01:29 +03:00
Reworking Excel support to allow for reading of big files (#8403)
- Closes #8111 by making sure that all Excel workbooks are read using a backing file (which should be more memory efficient). - If the workbook is being opened from an input stream, that stream is materialized to a `Temporary_File`. - Adds tests fetching Table formats from HTTP. - Extends `simple-httpbin` with ability to serve files for our tests. - Ensures that the `Infer` option on `Excel` format also works with streams, if content-type metadata is available (e.g. from HTTP headers). - Implements a `Temporary_File` facility that can be used to create a temporary file that is deleted once all references to the `Temporary_File` instance are GCed.
This commit is contained in:
parent
95f11abe2c
commit
b5c995a7bf
@ -21,6 +21,7 @@ resources/python
|
||||
# The files in the `data` directory of our tests may have specific structure or
|
||||
# even be malformed on purpose, so we do not want to run prettier on them.
|
||||
test/**/data
|
||||
tools/simple-httpbin/www-files
|
||||
|
||||
# GUI
|
||||
**/scala-parser.js
|
||||
|
@ -594,6 +594,7 @@
|
||||
- [Implemented truncate `Date_Time` for database backend (Postgres only).][8235]
|
||||
- [Initial Enso Cloud APIs.][8006]
|
||||
- [Errors thrown inside `map` are wrapped in `Map_Error`.][8307]
|
||||
- [Support for loading big Excel files.][8403]
|
||||
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
@ -850,6 +851,7 @@
|
||||
[8150]: https://github.com/enso-org/enso/pull/8150
|
||||
[8235]: https://github.com/enso-org/enso/pull/8235
|
||||
[8307]: https://github.com/enso-org/enso/pull/8307
|
||||
[8403]: https://github.com/enso-org/enso/pull/8403
|
||||
|
||||
#### Enso Compiler
|
||||
|
||||
|
18
build.sbt
18
build.sbt
@ -1330,13 +1330,16 @@ lazy val truffleDslSuppressWarnsSetting = Seq(
|
||||
)
|
||||
|
||||
/** A setting to replace javac with Frgaal compiler, allowing to use latest Java features in the code
|
||||
* and still compile down to JDK 11
|
||||
* and still compile down to JDK 17
|
||||
*/
|
||||
lazy val frgaalJavaCompilerSetting = Seq(
|
||||
lazy val frgaalJavaCompilerSetting =
|
||||
customFrgaalJavaCompilerSettings(targetJavaVersion)
|
||||
|
||||
def customFrgaalJavaCompilerSettings(targetJdk: String) = Seq(
|
||||
Compile / compile / compilers := FrgaalJavaCompiler.compilers(
|
||||
(Compile / dependencyClasspath).value,
|
||||
compilers.value,
|
||||
targetJavaVersion
|
||||
targetJdk
|
||||
),
|
||||
// This dependency is needed only so that developers don't download Frgaal manually.
|
||||
// Sadly it cannot be placed under plugins either because meta dependencies are not easily
|
||||
@ -2731,11 +2734,16 @@ val allStdBits: Parser[String] =
|
||||
lazy val `simple-httpbin` = project
|
||||
.in(file("tools") / "simple-httpbin")
|
||||
.settings(
|
||||
frgaalJavaCompilerSetting,
|
||||
customFrgaalJavaCompilerSettings(targetJdk = "21"),
|
||||
autoScalaLibrary := false,
|
||||
Compile / javacOptions ++= Seq("-Xlint:all"),
|
||||
Compile / run / mainClass := Some("org.enso.shttp.SimpleHTTPBin"),
|
||||
assembly / mainClass := (Compile / run / mainClass).value,
|
||||
libraryDependencies ++= Seq(
|
||||
"org.apache.commons" % "commons-text" % commonsTextVersion
|
||||
)
|
||||
),
|
||||
(Compile / run / fork) := true,
|
||||
(Compile / run / connectInput) := true
|
||||
)
|
||||
.configs(Test)
|
||||
|
||||
|
@ -4,8 +4,10 @@ import Standard.Base.Errors.File_Error.File_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Unimplemented.Unimplemented
|
||||
import Standard.Base.System.File_Format.File_For_Read
|
||||
import Standard.Base.System.File_Format.File_Format_Metadata
|
||||
import Standard.Base.System.Input_Stream.Input_Stream
|
||||
import Standard.Base.System.Output_Stream.Output_Stream
|
||||
from Standard.Base.System.File import find_extension_from_name
|
||||
|
||||
import project.AWS_Credential.AWS_Credential
|
||||
import project.Errors.S3_Error
|
||||
@ -117,7 +119,9 @@ type S3_File
|
||||
Auto_Detect -> if self.is_directory then format.read self on_problems else
|
||||
response = S3.get_object self.bucket self.prefix self.credentials
|
||||
response.decode Auto_Detect
|
||||
_ -> self.with_input_stream [File_Access.Read] format.read_stream
|
||||
_ ->
|
||||
metadata = File_Format_Metadata.Value file_name=self.name
|
||||
self.with_input_stream [File_Access.Read] (stream-> format.read_stream stream metadata)
|
||||
|
||||
## ALIAS load bytes, open bytes
|
||||
ICON data_input
|
||||
@ -187,11 +191,7 @@ type S3_File
|
||||
Returns the extension of the file.
|
||||
extension : Text
|
||||
extension self = if self.is_directory then Error.throw (S3_Error.Error "Directories do not have extensions." self.uri) else
|
||||
name = self.name
|
||||
last_dot = name.locate "." mode=Matching_Mode.Last
|
||||
if last_dot.is_nothing then "" else
|
||||
extension = name.drop (Index_Sub_Range.First last_dot.start)
|
||||
if extension == "." then "" else extension
|
||||
find_extension_from_name self.name
|
||||
|
||||
## GROUP Standard.Base.Input
|
||||
Lists files contained in the directory denoted by this file.
|
||||
|
@ -299,7 +299,7 @@ type Any
|
||||
is_nothing self = False
|
||||
|
||||
## GROUP Logical
|
||||
If `self` is Nothing then returns `function`.
|
||||
If `self` is Nothing then returns `other`.
|
||||
|
||||
> Example
|
||||
If the value "Hello" is nothing return "".
|
||||
@ -309,6 +309,16 @@ type Any
|
||||
if_nothing self ~other =
|
||||
const self other
|
||||
|
||||
## If `self` is Nothing then returns Nothing, otherwise returns the result
|
||||
of running the provided `action`.
|
||||
|
||||
> Example
|
||||
Transform a value only if it is not nothing.
|
||||
|
||||
my_result.if_not_nothing <| my_result + 1
|
||||
if_not_nothing : Any -> Any
|
||||
if_not_nothing self ~action = action
|
||||
|
||||
## GROUP Errors
|
||||
Executes the provided handler on an error, or returns the value unchanged.
|
||||
|
||||
|
@ -17,6 +17,7 @@ import project.Network.HTTP.HTTP_Method.HTTP_Method
|
||||
import project.Nothing.Nothing
|
||||
import project.System.File.File_Access.File_Access
|
||||
import project.System.File_Format.File_For_Read
|
||||
import project.System.File_Format.File_Format_Metadata
|
||||
import project.System.Input_Stream.Input_Stream
|
||||
import project.System.Output_Stream.Output_Stream
|
||||
from project.Data.Boolean import Boolean, False, True
|
||||
@ -129,7 +130,9 @@ type Enso_File
|
||||
real_format = Auto_Detect.get_reading_format self
|
||||
if real_format == Nothing then Error.throw (File_Error.Unsupported_Type self) else
|
||||
self.read real_format on_problems
|
||||
_ -> self.with_input_stream [File_Access.Read] format.read_stream
|
||||
_ ->
|
||||
metadata = File_Format_Metadata.Value file_name=self.name
|
||||
self.with_input_stream [File_Access.Read] (stream-> format.read_stream stream metadata)
|
||||
|
||||
## ALIAS load bytes, open bytes
|
||||
ICON data_input
|
||||
|
@ -6,6 +6,7 @@ import project.Network.URI.URI
|
||||
import project.Nothing.Nothing
|
||||
import project.System.File.File
|
||||
import project.System.File_Format.File_For_Read
|
||||
import Standard.Base.System.File_Format.File_Format_Metadata
|
||||
import project.System.Input_Stream.Input_Stream
|
||||
from project.Data.Text.Extensions import all
|
||||
|
||||
@ -45,7 +46,8 @@ type XML_Format
|
||||
XML_Document.from_file file
|
||||
|
||||
## PRIVATE
|
||||
Implements the `Data.parse` for this `File_Format`
|
||||
read_stream : Input_Stream -> Any
|
||||
read_stream self stream:Input_Stream =
|
||||
Implements decoding the format from a stream.
|
||||
read_stream : Input_Stream -> File_Format_Metadata -> Any
|
||||
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
|
||||
_ = metadata
|
||||
XML_Document.from_stream stream
|
||||
|
@ -8,6 +8,7 @@ import project.System.File.File
|
||||
import project.System.File_Format.File_For_Read
|
||||
import project.System.File_Format.File_Format
|
||||
|
||||
polyglot java import java.io.FileNotFoundException
|
||||
polyglot java import java.io.IOException
|
||||
polyglot java import java.nio.file.AccessDeniedException
|
||||
polyglot java import java.nio.file.FileAlreadyExistsException
|
||||
@ -33,7 +34,7 @@ type File_Error
|
||||
Arguments:
|
||||
- file: The file that couldn't be read.
|
||||
- message: The message for the error.
|
||||
IO_Error (file : File) (message : Text)
|
||||
IO_Error (file : File | Nothing) (message : Text)
|
||||
|
||||
## Indicates that the given file's type is not supported.
|
||||
Unsupported_Type (file : File_For_Read)
|
||||
@ -51,7 +52,9 @@ type File_Error
|
||||
to_display_text : Text
|
||||
to_display_text self = case self of
|
||||
File_Error.Not_Found file -> "The file at " + file.path + " does not exist."
|
||||
File_Error.IO_Error file msg -> msg + " (" + file.path + ")."
|
||||
File_Error.IO_Error file msg ->
|
||||
suffix = if file.is_nothing then "" else " (" + file.path + ")."
|
||||
msg + suffix
|
||||
File_Error.Already_Exists file -> "The file at "+file.path+" already exists."
|
||||
File_Error.Access_Denied file -> "Insufficient permissions to perform the desired operation on the file at "+file.path+"."
|
||||
File_Error.Unsupported_Type file -> "The "+file.path+" has a type that is not supported."
|
||||
@ -65,7 +68,7 @@ type File_Error
|
||||
## PRIVATE
|
||||
|
||||
Utility method for running an action with Java exceptions mapping.
|
||||
handle_java_exceptions file ~action =
|
||||
handle_java_exceptions (file : File | Nothing) ~action =
|
||||
Panic.catch IOException action caught_panic->
|
||||
File_Error.wrap_io_exception file caught_panic.payload
|
||||
|
||||
@ -78,8 +81,14 @@ type File_Error
|
||||
## PRIVATE
|
||||
|
||||
Converts a Java `IOException` into its Enso counterpart.
|
||||
wrap_io_exception file io_exception = case io_exception of
|
||||
wrap_io_exception (file : File | Nothing) io_exception =
|
||||
## If the file is not known, all we can do is throw a generic IO error.
|
||||
This will only usually matter on stream operations, where there is no relevant file -
|
||||
and so the exceptions like `NoSuchFileException` should not occur in such context.
|
||||
But instead of risking a Type_Error, we just throw the more generic IO_Error.
|
||||
if file.is_nothing then Error.throw (File_Error.IO_Error Nothing "An IO error has occurred: "+io_exception.to_text) else case io_exception of
|
||||
_ : NoSuchFileException -> Error.throw (File_Error.Not_Found file)
|
||||
_ : FileNotFoundException -> Error.throw (File_Error.Not_Found file)
|
||||
_ : FileAlreadyExistsException -> Error.throw (File_Error.Already_Exists file)
|
||||
_ : AccessDeniedException -> File_Error.access_denied file
|
||||
_ -> Error.throw (File_Error.IO_Error file "An IO error has occurred: "+io_exception.to_text)
|
||||
|
@ -1,5 +1,9 @@
|
||||
import project.Data.Text.Text
|
||||
import project.Error.Error
|
||||
import project.Nothing.Nothing
|
||||
import project.Panic.Panic
|
||||
|
||||
polyglot java import java.lang.IllegalStateException
|
||||
|
||||
type Illegal_State
|
||||
## PRIVATE
|
||||
@ -19,3 +23,8 @@ type Illegal_State
|
||||
Provides a human-readable representation of the encoding error.
|
||||
to_display_text : Text
|
||||
to_display_text self = "Illegal State: " + self.message
|
||||
|
||||
## PRIVATE
|
||||
Capture a Java `IllegalStateException` and convert it to an Enso dataflow error - `Illegal_State.Error`.
|
||||
handle_java_exception =
|
||||
Panic.catch IllegalStateException handler=(cause-> Error.throw (Illegal_State.Error cause.payload.getMessage cause.payload))
|
||||
|
@ -98,7 +98,10 @@ type HTTP
|
||||
if fetch_methods.contains req.method || Context.Output.is_enabled then action else
|
||||
Error.throw (Forbidden_Operation.Error ("Method " + req.method.to_text + " requests are forbidden as the Output context is disabled."))
|
||||
handle_request_error =
|
||||
Panic.catch JException handler=(cause-> Error.throw (Request_Error.Error 'IllegalArgumentException' cause.payload.getMessage))
|
||||
handler caught_panic =
|
||||
exception = caught_panic.payload
|
||||
Error.throw (Request_Error.Error (Meta.type_of exception . to_text) exception.getMessage)
|
||||
Panic.catch JException handler=handler
|
||||
|
||||
Panic.recover Any <| handle_request_error <| check_output_context <|
|
||||
headers = resolve_headers req
|
||||
|
@ -16,6 +16,7 @@ import project.Network.URI.URI
|
||||
import project.Nothing.Nothing
|
||||
import project.Runtime.Context
|
||||
import project.Runtime.Managed_Resource.Managed_Resource
|
||||
import project.System.File.Advanced.Temporary_File.Temporary_File
|
||||
import project.System.File.Existing_File_Behavior.Existing_File_Behavior
|
||||
import project.System.File.File
|
||||
import project.System.File.File_Access.File_Access
|
||||
@ -23,6 +24,7 @@ import project.System.File.Write_Extensions
|
||||
import project.System.File_Format.Auto_Detect
|
||||
import project.System.File_Format.Bytes
|
||||
import project.System.File_Format.File_Format
|
||||
import project.System.File_Format.File_Format_Metadata
|
||||
import project.System.File_Format.Plain_Text_Format
|
||||
import project.System.Input_Stream.Input_Stream
|
||||
from project.Data.Boolean import Boolean, False, True
|
||||
@ -58,23 +60,23 @@ type Response_Body
|
||||
Raw_Stream (raw_stream:Input_Stream) (content_type:Text|Nothing) uri:URI
|
||||
|
||||
## PRIVATE
|
||||
Byte_Array (bytes:Vector) (content_type:Text|Nothing) uri:URI
|
||||
Materialized_Byte_Array (bytes:Vector) (content_type:Text|Nothing) uri:URI
|
||||
|
||||
## PRIVATE
|
||||
Temporary_File (file_resource:Managed_Resource) (content_type:Text|Nothing) uri:URI
|
||||
Materialized_Temporary_File (temporary_file:Temporary_File) (content_type:Text|Nothing) uri:URI
|
||||
|
||||
## PRIVATE
|
||||
with_stream : (Input_Stream -> Any ! HTTP_Error) -> Any ! HTTP_Error
|
||||
with_stream self action = case self of
|
||||
Response_Body.Raw_Stream raw_stream _ _ ->
|
||||
Managed_Resource.bracket raw_stream (_.close) action
|
||||
Response_Body.Byte_Array bytes _ _ ->
|
||||
Response_Body.Materialized_Byte_Array bytes _ _ ->
|
||||
byte_stream = Input_Stream.new (ByteArrayInputStream.new bytes) (HTTP_Error.handle_java_exceptions self.uri)
|
||||
Managed_Resource.bracket byte_stream (_.close) action
|
||||
Response_Body.Temporary_File file_resource _ _ -> file_resource.with file->
|
||||
Response_Body.Materialized_Temporary_File temporary_file _ _ -> temporary_file.with_file file->
|
||||
opts = [File_Access.Read.to_java]
|
||||
stream = HTTP_Error.handle_java_exceptions self.uri (file.input_stream_builtin opts)
|
||||
file_stream = Input_Stream.new stream (HTTP_Error.handle_java_exceptions self.uri)
|
||||
file_stream = Input_Stream.new stream (HTTP_Error.handle_java_exceptions self.uri) associated_file=temporary_file
|
||||
Managed_Resource.bracket (file_stream) (_.close) action
|
||||
|
||||
## PRIVATE
|
||||
@ -88,23 +90,19 @@ type Response_Body
|
||||
body_stream.with_java_stream body_java_stream->
|
||||
first_block = body_java_stream.readNBytes maximum_body_in_memory
|
||||
case first_block.length < maximum_body_in_memory of
|
||||
True -> Response_Body.Byte_Array (Vector.from_polyglot_array first_block) self.content_type self.uri
|
||||
False ->
|
||||
file = File.create_temporary_file self.uri.host
|
||||
|
||||
## Write contents to temporary file
|
||||
Context.Output.with_enabled <|
|
||||
True -> Response_Body.Materialized_Byte_Array (Vector.from_polyglot_array first_block) self.content_type self.uri
|
||||
False -> Context.Output.with_enabled <|
|
||||
## Write contents to a temporary file
|
||||
temp_file = Temporary_File.new self.uri.host
|
||||
r = temp_file.with_file file->
|
||||
file.with_output_stream [File_Access.Write, File_Access.Create, File_Access.Truncate_Existing] output_stream->
|
||||
output_stream.with_java_stream java_output_stream->
|
||||
java_output_stream.write first_block
|
||||
body_java_stream.transferTo java_output_stream
|
||||
java_output_stream.flush
|
||||
Nothing
|
||||
output_stream.close
|
||||
|
||||
## Have a file with the correct set up
|
||||
resource = Managed_Resource.register file delete_file
|
||||
Response_Body.Temporary_File resource self.content_type self.uri
|
||||
r.if_not_error <|
|
||||
Response_Body.Materialized_Temporary_File temp_file self.content_type self.uri
|
||||
_ -> self
|
||||
|
||||
## ALIAS parse
|
||||
@ -128,7 +126,9 @@ type Response_Body
|
||||
_ ->
|
||||
type_obj = Meta.type_of format
|
||||
if can_decode type_obj . not then Error.throw (Illegal_Argument.Error type_obj.to_text+" cannot be used to decode from a stream. It must be saved to a file first.") else
|
||||
self.with_stream format.read_stream
|
||||
metadata = File_Format_Metadata.Value content_type=self.content_type
|
||||
self.with_stream stream->
|
||||
format.read_stream stream metadata
|
||||
|
||||
## ALIAS bytes
|
||||
GROUP Input
|
||||
|
@ -2,6 +2,7 @@ import project.Any.Any
|
||||
import project.Data.Numbers.Integer
|
||||
import project.Data.Text.Text
|
||||
from project.Data.Boolean import Boolean, False, True
|
||||
from project.Function import const
|
||||
|
||||
## The type that has only a singleton value. Nothing in Enso is used as an
|
||||
universal value to indicate the lack of presence of a value.
|
||||
@ -30,6 +31,16 @@ type Nothing
|
||||
if_nothing : Any -> Any
|
||||
if_nothing self ~function = function
|
||||
|
||||
## If `self` is Nothing then returns Nothing, otherwise returns the result
|
||||
of running the provided `action`.
|
||||
|
||||
> Example
|
||||
Transform a value only if it is not nothing.
|
||||
|
||||
my_result.if_not_nothing <| my_result + 1
|
||||
if_not_nothing : Any -> Any
|
||||
if_not_nothing self ~action = const Nothing action
|
||||
|
||||
## Get a value for the key of the object.
|
||||
As `Nothing` has no keys, returns `if_missing`.
|
||||
|
||||
|
@ -90,17 +90,28 @@ type File
|
||||
## PRIVATE
|
||||
Create a dry run temporary file which will be deleted when Enso exits.
|
||||
|
||||
For an absolute path the same temporary file is returned.
|
||||
The same temporary file is returned for paths that point to the same
|
||||
location (not accounting for symlinks).
|
||||
|
||||
If this file is a temporary file that was generated by
|
||||
`create_dry_run_file` on another file, it is returned as-is.
|
||||
|
||||
Arguments:
|
||||
- copy_original: If `True`, the created dry run file is 'synchronized'
|
||||
with the original file - the file is copied to the temporary file, or
|
||||
if the original file does not exist - it is ensured that the temporary
|
||||
file also does not exist. If `False`, no actions are taken.
|
||||
create_dry_run_file : Boolean -> File ! File_Error
|
||||
create_dry_run_file self copy_original=False =
|
||||
temp_path = DryRunFileManager.getTemporaryFile self.absolute.path
|
||||
temp_path = DryRunFileManager.getTemporaryFile self.absolute.normalize.path
|
||||
if temp_path.is_nothing then Error.throw (File_Error.IO_Error "Unable to create a temporary file.") else
|
||||
temp = File.new temp_path
|
||||
if self.exists && copy_original then
|
||||
Context.Output.with_enabled <|
|
||||
if copy_original then Context.Output.with_enabled <| Panic.rethrow <|
|
||||
case self.exists of
|
||||
True ->
|
||||
self.copy_to temp replace_existing=True
|
||||
False ->
|
||||
temp.delete_if_exists
|
||||
|
||||
## Attach a warning to the file that it is a dry run
|
||||
warning = Dry_Run_Operation.Warning "Only a dry run has occurred, with data written to a temporary file."
|
||||
@ -803,3 +814,11 @@ get_child_widget file =
|
||||
children = file.list
|
||||
options = children.map c-> Option c.name c.name.pretty
|
||||
Widget.Single_Choice values=options display=Display.Always
|
||||
|
||||
## PRIVATE
|
||||
find_extension_from_name : Text -> Text
|
||||
find_extension_from_name name =
|
||||
last_dot = name.locate "." mode=Matching_Mode.Last
|
||||
if last_dot.is_nothing then "" else
|
||||
extension = name.drop (Index_Sub_Range.First last_dot.start)
|
||||
if extension == "." then "" else extension
|
||||
|
@ -0,0 +1,106 @@
|
||||
import project.Any.Any
|
||||
import project.Data.Text.Text
|
||||
import project.Errors.File_Error.File_Error
|
||||
import project.Errors.Illegal_State.Illegal_State
|
||||
import project.Nothing.Nothing
|
||||
import project.Panic.Panic
|
||||
import project.Runtime.Context
|
||||
import project.Runtime.Managed_Resource.Managed_Resource
|
||||
import project.Runtime.Ref.Ref
|
||||
import project.System.File.File
|
||||
import project.System.Input_Stream.Input_Stream
|
||||
|
||||
## PRIVATE
|
||||
ADVANCED
|
||||
A reference to a temporary file that will be deleted once all references to
|
||||
it are gone. This is an advanced helper facility for library developers, not
|
||||
aimed to be used by regular users. Most users should just use
|
||||
`File.create_temporary_file` instead.
|
||||
|
||||
This is helpful when implementing internal temporary files needed for some
|
||||
operation, whose lifetime may have to be bound with some object (e.g. Excel
|
||||
Workbook instance).
|
||||
|
||||
The advantage of using `Temporary_File` over `File.create_temporary_file` is
|
||||
that the file is likely to get cleaned up earlier than JVM exit. If helper
|
||||
files are only cleaned at JVM exit, we run the risk of exhausting disk
|
||||
storage if the operation is ran numerous times. With this approach, the risk
|
||||
should be much lower, because the files are cleaned up sooner - as soon as
|
||||
the first GC run after the file is no longer reachable.
|
||||
|
||||
It has to be used with extra care, as anywhere where the internal File
|
||||
reference is leaked without ensuring the lifetime of the `Temporary_File`
|
||||
instance, that file reference may easily become invalid (i.e. its file may
|
||||
get deleted).
|
||||
type Temporary_File
|
||||
## PRIVATE
|
||||
Instance (file_resource_reference : Ref (Nothing | Managed_Resource File))
|
||||
|
||||
## PRIVATE
|
||||
with_file : (File -> Any) -> Any
|
||||
with_file self action = self.access_resource.with action
|
||||
|
||||
## PRIVATE
|
||||
Gets the raw file reference.
|
||||
Note that the underlying file may be deleted at any time, once `self` is
|
||||
unreachable - so this method has to be used with extra care.
|
||||
unsafe_get : File
|
||||
unsafe_get self = self.with_file (f->f)
|
||||
|
||||
## PRIVATE
|
||||
Deletes the temporary file, invalidating the reference.
|
||||
dispose : Nothing
|
||||
dispose self =
|
||||
self.access_resource.finalize
|
||||
self.file_resource_reference.put Nothing
|
||||
|
||||
## PRIVATE
|
||||
access_resource : Managed_Resource File
|
||||
access_resource self = case self.file_resource_reference.get of
|
||||
Nothing -> Panic.throw (Illegal_State.Error "Using the Temporary_File after it has been disposed is not allowed.")
|
||||
resource -> resource
|
||||
|
||||
## PRIVATE
|
||||
to_text : Text
|
||||
to_text self = case self.file_resource_reference.get of
|
||||
Nothing -> "Temporary_File (disposed)"
|
||||
resource -> "Temporary_File (" + (resource.with .to_text) + ")"
|
||||
|
||||
## PRIVATE
|
||||
Creates a new empty `Temporary_File`.
|
||||
new : Text -> Text -> Temporary_File
|
||||
new prefix="temp" suffix=".tmp" =
|
||||
# The file will be deleted on JVM exit, but we will try to delete it sooner.
|
||||
file = File.create_temporary_file prefix suffix
|
||||
resource = Managed_Resource.register file cleanup_tmp_file
|
||||
Temporary_File.Instance (Ref.new resource)
|
||||
|
||||
## PRIVATE
|
||||
Materializes the provided `Input_Stream` into a `Temporary_File`.
|
||||
It will work the same regardless of whether the output context is enabled.
|
||||
from_stream : Input_Stream -> Temporary_File
|
||||
from_stream stream = Context.Output.with_enabled <|
|
||||
tmp_file = Temporary_File.new "enso-materialized-stream" ".tmp"
|
||||
handler caught_panic =
|
||||
tmp_file.dispose
|
||||
Panic.throw caught_panic
|
||||
Panic.catch Any handler=handler <|
|
||||
tmp_file.with_file file->
|
||||
Panic.rethrow <|
|
||||
File_Error.handle_java_exceptions Nothing <|
|
||||
stream.write_to_file file
|
||||
tmp_file
|
||||
|
||||
## PRIVATE
|
||||
Materializes the provided `Input_Stream` into a `Temporary_File`.
|
||||
If the stream is already backed by a temporary or regular file, that file is returned.
|
||||
from_stream_light : Input_Stream -> Temporary_File | File
|
||||
from_stream_light stream =
|
||||
case stream.associated_file of
|
||||
tmp : Temporary_File -> tmp
|
||||
file : File -> file
|
||||
_ -> Temporary_File.from_stream stream
|
||||
|
||||
## PRIVATE
|
||||
cleanup_tmp_file file =
|
||||
file.delete_if_exists
|
@ -77,6 +77,23 @@ type Existing_File_Behavior
|
||||
action output_stream . catch Any dataflow_error->
|
||||
Panic.throw (Internal_Write_Operation_Errored.Error dataflow_error)
|
||||
|
||||
## PRIVATE
|
||||
A counterpart of `write` that will also handle the dry-run mode if the
|
||||
Output context is disabled. It relies on a default logic for handling the
|
||||
dry runs, which may not always be the right choice, depending on the file
|
||||
format.
|
||||
|
||||
The `action` takes 2 arguments - the effective file that will be written to
|
||||
(the original file or the dry run file) and the output stream to write to.
|
||||
write_handling_dry_run : File -> (File -> Output_Stream -> Any) -> Any ! File_Error
|
||||
write_handling_dry_run self file action =
|
||||
if Context.Output.is_enabled then self.write file (action file) else
|
||||
effective_behavior = self.get_effective_behavior file is_enabled=False
|
||||
needs_original = effective_behavior == Existing_File_Behavior.Append
|
||||
dry_run_file = file.create_dry_run_file copy_original=needs_original
|
||||
Context.Output.with_enabled <|
|
||||
effective_behavior.write dry_run_file (action dry_run_file)
|
||||
|
||||
## PRIVATE
|
||||
write_file_backing_up_old_one : File -> (Output_Stream -> Nothing) -> Nothing ! File_Error
|
||||
write_file_backing_up_old_one file action = recover_io_and_not_found <|
|
||||
@ -100,7 +117,7 @@ write_file_backing_up_old_one file action = recover_io_and_not_found <|
|
||||
handle_internal_dataflow = Panic.catch Internal_Write_Operation_Errored handler=handle_write_failure_dataflow
|
||||
handle_internal_panic = Panic.catch Internal_Write_Operation_Panicked handler=handle_write_failure_panic
|
||||
handle_file_already_exists <| handle_internal_dataflow <| handle_internal_panic <|
|
||||
Panic.rethrow <|
|
||||
result = Panic.rethrow <|
|
||||
new_file.with_output_stream [File_Access.Write, File_Access.Create_New] output_stream->
|
||||
result = Panic.catch Any (action output_stream) caught_panic->
|
||||
Panic.throw (Internal_Write_Operation_Panicked.Panic caught_panic)
|
||||
@ -118,6 +135,8 @@ write_file_backing_up_old_one file action = recover_io_and_not_found <|
|
||||
Panic.catch File_Error handler=not_found_handler <|
|
||||
Panic.rethrow <| file.move_to bak_file replace_existing=True
|
||||
Panic.rethrow <| new_file.move_to file
|
||||
result
|
||||
|
||||
## Here we manually check if the target file is writable. This is necessary,
|
||||
because the `Backup` scenario would go around the original file not being
|
||||
writable by moving it (which is permitted for read-only files too) and
|
||||
|
@ -1,5 +1,6 @@
|
||||
import project.Any.Any
|
||||
import project.Data.Json.Json
|
||||
import project.Data.Numbers.Integer
|
||||
import project.Data.Text.Encoding.Encoding
|
||||
import project.Data.Text.Text
|
||||
import project.Data.Vector.Vector
|
||||
@ -155,9 +156,10 @@ type Plain_Text_Format
|
||||
file.read_text self.encoding on_problems
|
||||
|
||||
## PRIVATE
|
||||
Implements the `Data.parse` for this `File_Format`
|
||||
read_stream : Input_Stream -> Any
|
||||
read_stream self stream:Input_Stream =
|
||||
Implements decoding the format from a stream.
|
||||
read_stream : Input_Stream -> File_Format_Metadata -> Any
|
||||
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
|
||||
_ = metadata
|
||||
Text.from_bytes (stream.read_all_bytes) self.encoding
|
||||
|
||||
## A file format for reading or writing files as a sequence of bytes.
|
||||
@ -191,9 +193,10 @@ type Bytes
|
||||
file.read_bytes
|
||||
|
||||
## PRIVATE
|
||||
Implements the `Data.parse` for this `File_Format`
|
||||
read_stream : Input_Stream -> Any
|
||||
read_stream self stream:Input_Stream =
|
||||
Implements decoding the format from a stream.
|
||||
read_stream : Input_Stream -> File_Format_Metadata -> Any
|
||||
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
|
||||
_ = metadata
|
||||
stream.read_all_bytes
|
||||
|
||||
## A file format for reading and writing files as JSON.
|
||||
@ -232,9 +235,10 @@ type JSON_Format
|
||||
Error.throw (File_Error.Corrupted_Format file error.to_display_text error)
|
||||
|
||||
## PRIVATE
|
||||
Implements the `Data.parse` for this `File_Format`
|
||||
read_stream : Input_Stream -> Any
|
||||
read_stream self stream:Input_Stream =
|
||||
Implements decoding the format from a stream.
|
||||
read_stream : Input_Stream -> File_Format_Metadata -> Any
|
||||
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
|
||||
_ = metadata
|
||||
Text.from_bytes (stream.read_all_bytes) Encoding.utf_8 . parse_json
|
||||
|
||||
## A setting to infer the default behaviour of some option.
|
||||
@ -245,11 +249,25 @@ type Infer
|
||||
type File_For_Read
|
||||
## PRIVATE
|
||||
Arguments:
|
||||
- `path` - the path or the URI of the file.
|
||||
- `name` - the name of the file.
|
||||
- `extension` - the extension of the file.
|
||||
- `read_first_bytes` - a function that reads the first bytes of the file.
|
||||
Value path:Text name:Text extension:Text (read_first_bytes:Function=(_->Nothing))
|
||||
- path: the path or the URI of the file.
|
||||
- name: the name of the file.
|
||||
- extension: the extension of the file.
|
||||
- read_first_bytes: a function that reads the first bytes of the file.
|
||||
- content_type: the content type of the file.
|
||||
Value path:Text|Nothing name:Text|Nothing extension:Text|Nothing (read_first_bytes:(Integer -> Nothing | Vector Integer)=(_->Nothing))
|
||||
|
||||
## PRIVATE
|
||||
File_For_Read.from (that:File) = File_For_Read.Value that.path that.name that.extension that.read_first_bytes
|
||||
|
||||
## PRIVATE
|
||||
Metadata that may aid `read_stream`.
|
||||
type File_Format_Metadata
|
||||
## PRIVATE
|
||||
Arguments:
|
||||
- file_name: the name of the file.
|
||||
- content_type: the content type of the file.
|
||||
Value (file_name : Text | Nothing = Nothing) (content_type : Text | Nothing = Nothing)
|
||||
|
||||
## PRIVATE
|
||||
no_information : File_Format_Metadata
|
||||
no_information = File_Format_Metadata.Value
|
||||
|
@ -6,6 +6,9 @@ import project.Errors.Encoding_Error.Encoding_Error
|
||||
import project.Errors.Problem_Behavior.Problem_Behavior
|
||||
import project.Nothing.Nothing
|
||||
import project.Runtime.Managed_Resource.Managed_Resource
|
||||
import project.System.File.Advanced.Temporary_File.Temporary_File
|
||||
import project.System.File.File
|
||||
import project.System.File.File_Access.File_Access
|
||||
|
||||
polyglot java import java.io.InputStream as Java_Input_Stream
|
||||
polyglot java import org.enso.base.encoding.ReportingStreamDecoder
|
||||
@ -19,10 +22,10 @@ type Input_Stream
|
||||
|
||||
Given a Java InputStream, wraps as a Managed_Resource and returns a new
|
||||
Input_Stream.
|
||||
new : Java_Input_Stream -> Any -> Input_Stream
|
||||
new java_stream error_handler =
|
||||
new : Java_Input_Stream -> Any -> (Nothing | File | Temporary_File) -> Input_Stream
|
||||
new java_stream error_handler associated_file=Nothing =
|
||||
resource = Managed_Resource.register java_stream close_stream
|
||||
Input_Stream.Value resource error_handler
|
||||
Input_Stream.Value resource error_handler associated_file
|
||||
|
||||
## PRIVATE
|
||||
An input stream, allowing for interactive reading of contents.
|
||||
@ -31,7 +34,8 @@ type Input_Stream
|
||||
- stream_resource: The internal resource that represents the underlying
|
||||
stream.
|
||||
- error_handler: An error handler for IOExceptions thrown when reading.
|
||||
Value stream_resource error_handler
|
||||
- associated_file: The file associated with this stream, if any.
|
||||
Value stream_resource error_handler (associated_file:Nothing|File|Temporary_File)
|
||||
|
||||
## PRIVATE
|
||||
ADVANCED
|
||||
@ -100,6 +104,16 @@ type Input_Stream
|
||||
problems = Vector.from_polyglot_array results.problems . map Encoding_Error.Error
|
||||
on_problems.attach_problems_after results.result problems
|
||||
|
||||
## PRIVATE
|
||||
Reads the contents of this stream into a given file.
|
||||
write_to_file : File -> File
|
||||
write_to_file self file =
|
||||
result = self.with_java_stream java_input_stream->
|
||||
file.with_output_stream [File_Access.Create, File_Access.Truncate_Existing, File_Access.Write] output_stream->
|
||||
output_stream.with_java_stream java_output_stream->
|
||||
java_input_stream.transferTo java_output_stream
|
||||
result.if_not_error file
|
||||
|
||||
## PRIVATE
|
||||
Utility method for closing primitive Java streams. Provided to avoid
|
||||
accidental scope capture with `Managed_Resource` finalizers.
|
||||
|
@ -7,6 +7,7 @@ import Standard.Base.Data.Vector.No_Wrap
|
||||
import Standard.Base.Errors.Common.Additional_Warnings
|
||||
import Standard.Base.Errors.Common.Incomparable_Values
|
||||
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
||||
import Standard.Base.Errors.Common.No_Such_Method
|
||||
import Standard.Base.Errors.Common.Out_Of_Memory
|
||||
import Standard.Base.Errors.Common.Type_Error
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
@ -2492,16 +2493,13 @@ type Table
|
||||
if base_format == Nothing then Error.throw (File_Error.Unsupported_Output_Type file Table) else
|
||||
self.write file format=base_format on_existing_file match_columns on_problems
|
||||
_ ->
|
||||
methods = if format == JSON_Format then ["write_table"] else Meta.meta (Meta.type_of format) . methods
|
||||
if methods.contains "write_table" . not then Error.throw (File_Error.Unsupported_Output_Type format Table) else
|
||||
effective_existing_behaviour = on_existing_file.get_effective_behavior file
|
||||
tgt_file = if Context.Output.is_enabled then file else
|
||||
should_copy_file = on_existing_file==Existing_File_Behavior.Append
|
||||
file.create_dry_run_file copy_original=should_copy_file
|
||||
|
||||
handle_no_write_method caught_panic =
|
||||
is_write = caught_panic.payload.method_name == "write_table"
|
||||
if is_write.not then Panic.throw caught_panic else
|
||||
Error.throw (File_Error.Unsupported_Output_Type format Table)
|
||||
Panic.catch No_Such_Method handler=handle_no_write_method <|
|
||||
to_write = if Context.Output.is_enabled then self else self.take 1000
|
||||
Context.Output.with_enabled <|
|
||||
format.write_table tgt_file to_write effective_existing_behaviour match_columns on_problems
|
||||
format.write_table file to_write on_existing_file match_columns on_problems
|
||||
|
||||
## Creates a text representation of the table using the CSV format.
|
||||
to_csv : Text
|
||||
|
@ -1,6 +1,7 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Network.HTTP.Response.Response
|
||||
import Standard.Base.System.File_Format.File_For_Read
|
||||
import Standard.Base.System.File_Format.File_Format_Metadata
|
||||
import Standard.Base.System.Input_Stream.Input_Stream
|
||||
from Standard.Base.Widget_Helpers import make_delimiter_selector
|
||||
|
||||
@ -98,9 +99,10 @@ type Delimited_Format
|
||||
Delimited_Reader.read_file self file on_problems
|
||||
|
||||
## PRIVATE
|
||||
Implements the `Data.parse` for this `File_Format`
|
||||
read_stream : Input_Stream -> Any
|
||||
read_stream self stream:Input_Stream =
|
||||
Implements decoding the format from a stream.
|
||||
read_stream : Input_Stream -> File_Format_Metadata -> Any
|
||||
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
|
||||
_ = metadata
|
||||
Delimited_Reader.read_stream self stream on_problems=Report_Warning
|
||||
|
||||
## PRIVATE
|
||||
@ -108,8 +110,7 @@ type Delimited_Format
|
||||
Implements the `Table.write` for this `File_Format`.
|
||||
write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> File
|
||||
write_table self file table on_existing_file match_columns on_problems =
|
||||
r = Delimited_Writer.write_file table self file on_existing_file match_columns on_problems
|
||||
r.if_not_error file
|
||||
Delimited_Writer.write_file table self file on_existing_file match_columns on_problems
|
||||
|
||||
## PRIVATE
|
||||
Clone the instance with some properties overridden.
|
||||
|
@ -1,7 +1,9 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.System.File_Format.File_For_Read
|
||||
import Standard.Base.System.File_Format.File_Format_Metadata
|
||||
import Standard.Base.System.Input_Stream.Input_Stream
|
||||
from Standard.Base.System.File import find_extension_from_name
|
||||
|
||||
import project.Data.Match_Columns.Match_Columns
|
||||
import project.Data.Table.Table
|
||||
@ -15,12 +17,9 @@ import project.Internal.Excel_Writer
|
||||
should_treat_as_xls_format : (Boolean|Infer) -> File -> Boolean ! Illegal_Argument
|
||||
should_treat_as_xls_format xls_format file =
|
||||
if xls_format != Infer then xls_format else
|
||||
case file.extension of
|
||||
".xlsx" -> False
|
||||
".xlsm" -> False
|
||||
".xls" -> True
|
||||
".xlt" -> True
|
||||
_ -> Error.throw (Illegal_Argument.Error ("Unknown file extension for Excel file (" + file.extension + ")"))
|
||||
inferred_xls_format = xls_format_from_file_extension file.extension
|
||||
inferred_xls_format.if_nothing <|
|
||||
Error.throw (Illegal_Argument.Error ("Unknown file extension for Excel file (" + file.extension + ")"))
|
||||
|
||||
## Read the file to a `Table` from an Excel file
|
||||
type Excel_Format
|
||||
@ -67,11 +66,9 @@ type Excel_Format
|
||||
for_web : Text -> URI -> Excel_Format | Nothing
|
||||
for_web content_type uri =
|
||||
_ = [uri]
|
||||
parts = content_type.split ";" . map .trim
|
||||
case parts.first of
|
||||
"application/vnd.ms-excel" -> Excel_Format.Excel xls_format=True
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" -> Excel_Format.Excel xls_format=False
|
||||
_ -> Nothing
|
||||
inferred_xls_format = xls_format_from_content_type content_type
|
||||
inferred_xls_format.if_not_nothing <|
|
||||
Excel_Format.Excel xls_format=inferred_xls_format
|
||||
|
||||
## PRIVATE
|
||||
ADVANCED
|
||||
@ -84,10 +81,13 @@ type Excel_Format
|
||||
_ -> Excel_Reader.read_file file self.section self.headers on_problems format
|
||||
|
||||
## PRIVATE
|
||||
Implements the `Data.parse` for this `File_Format`
|
||||
read_stream : Input_Stream -> Any
|
||||
read_stream self stream:Input_Stream =
|
||||
xls_format = if self.xls_format == Infer then False else self.xls_format
|
||||
Implements decoding the format from a stream.
|
||||
read_stream : Input_Stream -> File_Format_Metadata -> Any
|
||||
read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) =
|
||||
xls_format = if self.xls_format != Infer then self.xls_format else
|
||||
inferred_xls_format = xls_format_from_metadata metadata
|
||||
# If still unable to infer it, we default to False
|
||||
inferred_xls_format.if_nothing False
|
||||
|
||||
bad_format _ =
|
||||
message = case self.xls_format of
|
||||
@ -124,10 +124,41 @@ type Excel_Format
|
||||
write_table self file table on_existing_file match_columns on_problems =
|
||||
format = should_treat_as_xls_format self.xls_format file
|
||||
|
||||
r = case self.section of
|
||||
case self.section of
|
||||
Excel_Section.Sheet_Names -> Error.throw (Illegal_Argument.Error "Sheet_Names cannot be used for `write`.")
|
||||
Excel_Section.Range_Names -> Error.throw (Illegal_Argument.Error "Range_Names cannot be used for `write`.")
|
||||
Excel_Section.Workbook ->
|
||||
Excel_Writer.write_file file table on_existing_file (Excel_Section.Worksheet self.default_sheet) True match_columns on_problems format
|
||||
_ -> Excel_Writer.write_file file table on_existing_file self.section self.headers match_columns on_problems format
|
||||
r.if_not_error file
|
||||
|
||||
## PRIVATE
|
||||
Infers the xls format from the Content-Type.
|
||||
Returns Nothing if the content type is not a known Excel format.
|
||||
xls_format_from_content_type : Text -> Boolean | Nothing
|
||||
xls_format_from_content_type content_type =
|
||||
parts = content_type.split ";" . map .trim
|
||||
case parts.first of
|
||||
"application/vnd.ms-excel" -> True
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" -> False
|
||||
_ -> Nothing
|
||||
|
||||
## PRIVATE
|
||||
Infers the xls format from the file extension.
|
||||
Returns Nothing if the content type is not a known Excel format.
|
||||
xls_format_from_file_extension : Text -> Boolean | Nothing
|
||||
xls_format_from_file_extension extension =
|
||||
case extension of
|
||||
".xlsx" -> False
|
||||
".xlsm" -> False
|
||||
".xls" -> True
|
||||
".xlt" -> True
|
||||
_ -> Nothing
|
||||
|
||||
## PRIVATE
|
||||
xls_format_from_metadata : File_Format_Metadata -> Boolean | Nothing
|
||||
xls_format_from_metadata metadata =
|
||||
from_content_type = metadata.content_type.if_not_nothing (xls_format_from_content_type metadata.content_type)
|
||||
from_content_type.if_nothing <|
|
||||
metadata.file_name.if_not_nothing <|
|
||||
extension = find_extension_from_name metadata.file_name
|
||||
xls_format_from_file_extension extension
|
||||
|
@ -1,8 +1,12 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
import Standard.Base.Metadata.Display
|
||||
import Standard.Base.Runtime.Managed_Resource.Managed_Resource
|
||||
import Standard.Base.Runtime.Ref.Ref
|
||||
import Standard.Base.System.Input_Stream.Input_Stream
|
||||
import Standard.Base.System.File.Advanced.Temporary_File.Temporary_File
|
||||
from Standard.Base.Data.Filter_Condition import sql_like_to_regex
|
||||
from Standard.Base.Metadata.Choice import Option
|
||||
from Standard.Base.Metadata.Widget import Single_Choice
|
||||
@ -13,8 +17,12 @@ import project.Excel.Excel_Section.Excel_Section
|
||||
import project.Internal.Excel_Reader
|
||||
import project.Internal.Java_Problems
|
||||
|
||||
polyglot java import java.io.File as Java_File
|
||||
polyglot java import org.apache.poi.ss.usermodel.Workbook
|
||||
polyglot java import org.enso.table.read.ExcelReader
|
||||
polyglot java import org.enso.table.excel.ExcelConnectionPool
|
||||
polyglot java import org.enso.table.excel.ExcelFileFormat
|
||||
polyglot java import org.enso.table.excel.ReadOnlyExcelConnection
|
||||
|
||||
type Excel_Workbook
|
||||
## PRIVATE
|
||||
@ -25,12 +33,24 @@ type Excel_Workbook
|
||||
- file: The file to load.
|
||||
- xls_format: Whether to use the old XLS format (default is XLSX).
|
||||
- headers: Whether to use the first row as headers (default is to infer).
|
||||
new : File | Text -> Boolean -> Boolean | Infer -> Excel_Workbook
|
||||
new : File | Text | Temporary_File -> Boolean -> Boolean | Infer -> Excel_Workbook
|
||||
new file xls_format=False headers=Infer =
|
||||
file_obj = File.new file . normalize
|
||||
File_Error.handle_java_exceptions file_obj <| Excel_Reader.handle_bad_format file_obj <|
|
||||
file_obj.with_input_stream [File_Access.Read] stream->
|
||||
Excel_Workbook.from_stream stream xls_format headers file_obj
|
||||
file_obj = case file of
|
||||
tmp : Temporary_File -> tmp
|
||||
other -> File.new other
|
||||
file_for_errors = if file_obj.is_a Temporary_File then Nothing else file_obj
|
||||
|
||||
continuation raw_file =
|
||||
format = if xls_format then ExcelFileFormat.XLS else ExcelFileFormat.XLSX
|
||||
File_Error.handle_java_exceptions raw_file <| Excel_Reader.handle_bad_format file_for_errors <| Illegal_State.handle_java_exception <|
|
||||
# The `java_file` depends on the liveness of the possible `Temporary_File` but that is ensured by storing the `file_obj` in the resulting workbook instance.
|
||||
java_file = Java_File.new raw_file.absolute.normalize.path
|
||||
excel_connection_resource = Managed_Resource.register (ExcelConnectionPool.INSTANCE.openReadOnlyConnection java_file format) close_connection
|
||||
Excel_Workbook.Value (Ref.new excel_connection_resource) file_obj xls_format headers
|
||||
|
||||
case file_obj of
|
||||
tmp : Temporary_File -> tmp.with_file continuation
|
||||
f : File -> continuation f
|
||||
|
||||
## PRIVATE
|
||||
ADVANCED
|
||||
@ -43,13 +63,12 @@ type Excel_Workbook
|
||||
- file: Optional file reference.
|
||||
from_stream : Input_Stream -> Boolean -> Boolean | Infer -> File | Nothing -> Excel_Workbook
|
||||
from_stream stream xls_format=False headers=Infer file=Nothing = Excel_Reader.handle_bad_format file <|
|
||||
stream.with_java_stream java_stream->
|
||||
workbook = ExcelReader.readWorkbook java_stream xls_format
|
||||
Excel_Workbook.Value workbook file xls_format headers
|
||||
temp_file = Temporary_File.from_stream_light stream
|
||||
Excel_Workbook.new temp_file xls_format headers
|
||||
|
||||
## PRIVATE
|
||||
Creates an Excel_Workbook connection.
|
||||
Value workbook:Workbook (file:(File|Nothing)) xls_format:Boolean headers:(Boolean|Infer)
|
||||
Value (excel_connection_resource_ref : Ref (Managed_Resource ReadOnlyExcelConnection)) (file:(File|Temporary_File|Nothing)) xls_format:Boolean headers:(Boolean|Infer)
|
||||
|
||||
## Returns the list of databases (or catalogs) for the connection.
|
||||
databases : Nothing
|
||||
@ -57,7 +76,11 @@ type Excel_Workbook
|
||||
|
||||
## Returns the name of the current database (or catalog).
|
||||
database : Text
|
||||
database self = if self.file.is_nothing then "" else self.file.path
|
||||
database self = case self.file of
|
||||
regular_file : File -> regular_file.path
|
||||
## A Temporary_File is still visualized as no path, because the fact that the workbook is stored as a file is
|
||||
just an implementation detail - it is coming form a stream so there is no logical file it is associated with.
|
||||
_ -> ""
|
||||
|
||||
## Returns a new Connection with the specified database set as default.
|
||||
|
||||
@ -92,22 +115,24 @@ type Excel_Workbook
|
||||
## GROUP Standard.Base.Metadata
|
||||
Gets the number of sheets.
|
||||
sheet_count : Integer
|
||||
sheet_count self = self.workbook.getNumberOfSheets
|
||||
sheet_count self = self.with_java_workbook .getNumberOfSheets
|
||||
|
||||
## GROUP Standard.Base.Metadata
|
||||
Gets the names of all the sheets.
|
||||
sheet_names : Vector Text
|
||||
sheet_names self = Vector.from_polyglot_array (ExcelReader.readSheetNames self.workbook)
|
||||
sheet_names self = self.with_java_workbook java_workbook->
|
||||
Vector.from_polyglot_array (ExcelReader.readSheetNames java_workbook)
|
||||
|
||||
## GROUP Standard.Base.Metadata
|
||||
Gets the number of named ranges.
|
||||
named_ranges_count : Integer
|
||||
named_ranges_count self = self.workbook.getNumberOfNames
|
||||
named_ranges_count self = self.with_java_workbook .getNumberOfNames
|
||||
|
||||
## GROUP Standard.Base.Metadata
|
||||
Gets the names of all the named ranges.
|
||||
named_ranges : Vector Text
|
||||
named_ranges self = Vector.from_polyglot_array (ExcelReader.readRangeNames self.workbook)
|
||||
named_ranges self = self.with_java_workbook java_workbook->
|
||||
Vector.from_polyglot_array (ExcelReader.readRangeNames java_workbook)
|
||||
|
||||
## Gets a list of the table types.
|
||||
table_types : Vector Text
|
||||
@ -167,9 +192,9 @@ type Excel_Workbook
|
||||
read self query (limit : Integer | Nothing = Nothing) =
|
||||
java_headers = Excel_Reader.make_java_headers self.headers
|
||||
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
|
||||
case query of
|
||||
_ : Excel_Range -> ExcelReader.readRange self.workbook query.java_range java_headers 0 limit java_problem_aggregator
|
||||
_ : Text -> ExcelReader.readRangeByName self.workbook query java_headers 0 limit java_problem_aggregator
|
||||
self.with_java_workbook java_workbook-> case query of
|
||||
_ : Excel_Range -> ExcelReader.readRange java_workbook query.java_range java_headers 0 limit java_problem_aggregator
|
||||
_ : Text -> ExcelReader.readRangeByName java_workbook query java_headers 0 limit java_problem_aggregator
|
||||
Table.Value java_table
|
||||
|
||||
## GROUP Standard.Base.Input
|
||||
@ -194,13 +219,14 @@ type Excel_Workbook
|
||||
names.at (sheet - 1)
|
||||
_ -> Error.throw (Illegal_Argument.Error "Worksheet must be either Text or an Integer.")
|
||||
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
|
||||
ExcelReader.readRangeByName self.workbook sheet_name java_headers skip_rows row_limit java_problem_aggregator
|
||||
self.with_java_workbook java_workbook->
|
||||
ExcelReader.readRangeByName java_workbook sheet_name java_headers skip_rows row_limit java_problem_aggregator
|
||||
Table.Value java_table
|
||||
Excel_Section.Cell_Range address skip_rows row_limit ->
|
||||
java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
|
||||
case address of
|
||||
_ : Excel_Range -> ExcelReader.readRange self.workbook address.java_range java_headers skip_rows row_limit java_problem_aggregator
|
||||
_ : Text -> ExcelReader.readRangeByName self.workbook address java_headers skip_rows row_limit java_problem_aggregator
|
||||
self.with_java_workbook java_workbook-> case address of
|
||||
_ : Excel_Range -> ExcelReader.readRange java_workbook address.java_range java_headers skip_rows row_limit java_problem_aggregator
|
||||
_ : Text -> ExcelReader.readRangeByName java_workbook address java_headers skip_rows row_limit java_problem_aggregator
|
||||
Table.Value java_table
|
||||
|
||||
## ALIAS get, worksheet
|
||||
@ -213,3 +239,51 @@ type Excel_Workbook
|
||||
sheet : Text | Integer -> Table
|
||||
sheet self name:(Text | Integer) =
|
||||
self.read_section (Excel_Section.Worksheet name 0 Nothing)
|
||||
|
||||
## ADVANCED
|
||||
Closes the workbook, releasing any resources it holds.
|
||||
|
||||
This method may be used to release the underlying workbook file,
|
||||
e.g. to be able to delete it.
|
||||
|
||||
After this method is called, this instance is not usable any more - any
|
||||
operation on it will throw an `Illegal_State` error.
|
||||
close : Nothing
|
||||
close self =
|
||||
case self.excel_connection_resource_ref.get of
|
||||
Nothing -> Nothing
|
||||
resource ->
|
||||
resource.finalize
|
||||
self.excel_connection_resource_ref.put Nothing
|
||||
Nothing
|
||||
|
||||
## Returns a simple text description of the workbook.
|
||||
to_text : Text
|
||||
to_text self =
|
||||
associated_regular_file = case self.file of
|
||||
regular_file : File -> " (" + regular_file.name + ")"
|
||||
_ -> ""
|
||||
"Excel_Workbook"+associated_regular_file
|
||||
|
||||
## PRIVATE
|
||||
Provides a JS object representation for use in visualizations.
|
||||
to_js_object : JS_Object
|
||||
to_js_object self =
|
||||
headers = if self.headers == Infer then "Infer" else self.headers
|
||||
additional_fields = case self.file of
|
||||
regular_file : File -> [["file", regular_file.path]]
|
||||
_ -> []
|
||||
JS_Object.from_pairs <|
|
||||
[["type", "Excel_Workbook"], ["headers", headers], ["xls_format", self.xls_format]] + additional_fields
|
||||
|
||||
## PRIVATE
|
||||
with_java_workbook : (Workbook -> Any) -> Any ! Illegal_State
|
||||
with_java_workbook self f = Illegal_State.handle_java_exception <|
|
||||
case self.excel_connection_resource_ref.get of
|
||||
Nothing ->
|
||||
Error.throw (Illegal_State.Error "The workbook is being used after it was closed.")
|
||||
resource -> resource.with connection->
|
||||
connection.withWorkbook f
|
||||
|
||||
## PRIVATE
|
||||
close_connection c = c.close
|
||||
|
@ -40,8 +40,9 @@ write_file table format file on_existing_file match_columns on_problems =
|
||||
Existing_File_Behavior.Append ->
|
||||
append_to_file table format file match_columns on_problems
|
||||
_ ->
|
||||
on_existing_file.write file stream->
|
||||
write_to_stream table format stream on_problems related_file=file
|
||||
on_existing_file.write_handling_dry_run file effective_file-> stream->
|
||||
r = write_to_stream table format stream on_problems related_file=effective_file
|
||||
r.if_not_error effective_file
|
||||
|
||||
## PRIVATE
|
||||
Handles appending to an existing file, ensuring that the columns are matched
|
||||
@ -85,8 +86,9 @@ append_to_file table format file match_columns on_problems =
|
||||
False -> format.without_headers
|
||||
needs_leading_newline =
|
||||
metadata.has_any_content && metadata.ends_with_newline.not
|
||||
Existing_File_Behavior.Append.write file stream->
|
||||
write_to_stream reordered_table amended_format stream on_problems related_file=file separator_override=effective_line_separator needs_leading_newline=needs_leading_newline
|
||||
Existing_File_Behavior.Append.write_handling_dry_run file effective_file-> stream->
|
||||
r = write_to_stream reordered_table amended_format stream on_problems related_file=effective_file separator_override=effective_line_separator needs_leading_newline=needs_leading_newline
|
||||
r.if_not_error effective_file
|
||||
|
||||
## PRIVATE
|
||||
Returns a Text value representing the table in the delimited format.
|
||||
|
@ -9,9 +9,11 @@ import project.Excel.Excel_Section.Excel_Section
|
||||
import project.Internal.Java_Problems
|
||||
from project.Errors import Duplicate_Output_Column_Names, Empty_Sheet_Error, Invalid_Column_Names, Invalid_Location
|
||||
|
||||
polyglot java import java.io.File as Java_File
|
||||
polyglot java import org.apache.poi.poifs.filesystem.NotOLE2FileException
|
||||
polyglot java import org.apache.poi.UnsupportedFileFormatException
|
||||
polyglot java import org.enso.table.error.InvalidLocationException
|
||||
polyglot java import org.enso.table.excel.ExcelFileFormat
|
||||
polyglot java import org.enso.table.excel.ExcelHeaders
|
||||
polyglot java import org.enso.table.read.ExcelReader
|
||||
|
||||
@ -25,15 +27,16 @@ make_java_headers headers = case headers of
|
||||
|
||||
## PRIVATE
|
||||
Handle and map the Java errors when reading an Excel file
|
||||
handle_reader : File -> (Input_Stream -> (Table | Vector)) -> (Table | Vector)
|
||||
handle_reader : File -> (Java_File -> (Table | Vector)) -> (Table | Vector)
|
||||
handle_reader file reader =
|
||||
bad_argument caught_panic = Error.throw (Invalid_Location.Error caught_panic.payload.getCause)
|
||||
handle_bad_argument = Panic.catch InvalidLocationException handler=bad_argument
|
||||
|
||||
# TODO [RW] handle InvalidFormatException
|
||||
File_Error.handle_java_exceptions file <| handle_bad_argument <|
|
||||
handle_bad_format file <| Empty_Sheet_Error.handle_java_exception <|
|
||||
file.with_input_stream [File_Access.Read] stream->
|
||||
stream.with_java_stream reader
|
||||
java_file = Java_File.new file.path
|
||||
reader java_file
|
||||
|
||||
## PRIVATE
|
||||
Reads an input Excel file according to the provided section.
|
||||
@ -49,21 +52,22 @@ handle_reader file reader =
|
||||
otherwise reads in Excel 2007+ format.
|
||||
read_file : File -> Excel_Section -> (Boolean|Infer) -> Problem_Behavior -> Boolean -> (Table | Vector)
|
||||
read_file file section headers on_problems xls_format=False =
|
||||
reader stream = case section of
|
||||
file_format = if xls_format then ExcelFileFormat.XLS else ExcelFileFormat.XLSX
|
||||
reader java_file = case section of
|
||||
Excel_Section.Workbook -> Error.throw (Illegal_Argument.Error "Cannot read an entire workbook.")
|
||||
Excel_Section.Sheet_Names -> Vector.from_polyglot_array (ExcelReader.readSheetNames stream xls_format)
|
||||
Excel_Section.Range_Names -> Vector.from_polyglot_array (ExcelReader.readRangeNames stream xls_format)
|
||||
Excel_Section.Sheet_Names -> Vector.from_polyglot_array (ExcelReader.readSheetNames java_file file_format)
|
||||
Excel_Section.Range_Names -> Vector.from_polyglot_array (ExcelReader.readRangeNames java_file file_format)
|
||||
Excel_Section.Worksheet sheet skip_rows row_limit ->
|
||||
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
|
||||
java_table = case sheet of
|
||||
_ : Integer -> ExcelReader.readSheetByIndex stream sheet (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator
|
||||
_ : Text -> ExcelReader.readSheetByName stream sheet (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator
|
||||
_ : Integer -> ExcelReader.readSheetByIndex java_file sheet (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
|
||||
_ : Text -> ExcelReader.readSheetByName java_file sheet (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
|
||||
Table.Value java_table
|
||||
Excel_Section.Cell_Range address skip_rows row_limit ->
|
||||
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
|
||||
java_table = case address of
|
||||
_ : Excel_Range -> ExcelReader.readRange stream address.java_range (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator
|
||||
_ : Text -> ExcelReader.readRangeByName stream address (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator
|
||||
_ : Excel_Range -> ExcelReader.readRange java_file address.java_range (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
|
||||
_ : Text -> ExcelReader.readRangeByName java_file address (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator
|
||||
Table.Value java_table
|
||||
|
||||
handle_reader file reader
|
||||
|
@ -1,6 +1,9 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
import Standard.Base.Runtime.Context
|
||||
import Standard.Base.Runtime.Managed_Resource.Managed_Resource
|
||||
|
||||
import project.Data.Match_Columns.Match_Columns
|
||||
import project.Data.Table.Table
|
||||
@ -9,23 +12,27 @@ import project.Excel.Excel_Section.Excel_Section
|
||||
import project.Internal.Excel_Reader
|
||||
from project.Errors import Column_Count_Mismatch, Column_Name_Mismatch, Existing_Data, Invalid_Location, Range_Exceeded
|
||||
|
||||
polyglot java import java.io.File as Java_File
|
||||
polyglot java import java.lang.IllegalStateException
|
||||
polyglot java import org.apache.poi.ss.usermodel.Workbook
|
||||
polyglot java import org.enso.base.DryRunFileManager
|
||||
polyglot java import org.enso.table.error.ExistingDataException
|
||||
polyglot java import org.enso.table.error.InvalidLocationException
|
||||
polyglot java import org.enso.table.error.RangeExceededException
|
||||
polyglot java import org.enso.table.excel.ExcelConnectionPool
|
||||
polyglot java import org.enso.table.excel.ExcelFileFormat
|
||||
polyglot java import org.enso.table.read.ExcelReader
|
||||
polyglot java import org.enso.table.write.ExcelWriter
|
||||
polyglot java import org.enso.table.write.ExistingDataMode
|
||||
polyglot java import org.enso.table.write.ExistingFileBehavior
|
||||
|
||||
## PRIVATE
|
||||
make_java_existing_data_mode : Existing_File_Behavior -> Match_Columns -> ExistingDataMode
|
||||
make_java_existing_data_mode on_existing_file match_columns = case on_existing_file of
|
||||
Existing_File_Behavior.Error -> ExistingDataMode.ERROR
|
||||
Existing_File_Behavior.Overwrite -> ExistingDataMode.REPLACE
|
||||
Existing_File_Behavior.Backup -> ExistingDataMode.REPLACE
|
||||
Existing_File_Behavior.Append -> case match_columns of
|
||||
Match_Columns.By_Name -> ExistingDataMode.APPEND_BY_NAME
|
||||
Match_Columns.By_Position -> ExistingDataMode.APPEND_BY_INDEX
|
||||
make_java_existing_file_behavior : Existing_File_Behavior -> ExistingFileBehavior
|
||||
make_java_existing_file_behavior on_existing_file = case on_existing_file of
|
||||
Existing_File_Behavior.Error -> ExistingFileBehavior.ERROR
|
||||
Existing_File_Behavior.Overwrite -> ExistingFileBehavior.OVERWRITE
|
||||
Existing_File_Behavior.Backup -> ExistingFileBehavior.BACKUP
|
||||
Existing_File_Behavior.Append -> ExistingFileBehavior.APPEND
|
||||
|
||||
## PRIVATE
|
||||
Writes a Table to an Excel file.
|
||||
@ -44,31 +51,81 @@ make_java_existing_data_mode on_existing_file match_columns = case on_existing_f
|
||||
- xls_format: If `true`, the file is written in the legacy XLS format.
|
||||
Otherwise, the file is written in the modern XLSX format.
|
||||
write_file : File -> Table -> Existing_File_Behavior -> Excel_Section -> (Boolean|Infer) -> Match_Columns -> Problem_Behavior -> Boolean -> File
|
||||
write_file file table on_existing_file section headers match_columns on_problems xls_format=False =
|
||||
_ = [on_problems]
|
||||
## If file does not exist or is empty then create a new workbook.
|
||||
workbook = if file.exists.not || (file.size == 0) then ExcelWriter.createWorkbook xls_format else
|
||||
Excel_Reader.handle_reader file stream->(ExcelReader.getWorkbook stream xls_format)
|
||||
write_file (file : File) (table : Table) (on_existing_file : Existing_File_Behavior) (section : Excel_Section) (headers : Boolean|Infer) (match_columns:Match_Columns) (on_problems:Problem_Behavior) (xls_format:Boolean) =
|
||||
_ = on_problems
|
||||
if (on_existing_file == Existing_File_Behavior.Error) && file.exists then Error.throw (File_Error.Already_Exists file) else
|
||||
java_file = file_as_java file
|
||||
file_format = if xls_format then ExcelFileFormat.XLS else ExcelFileFormat.XLSX
|
||||
|
||||
ExcelWriter.setEnsoToTextCallbackIfUnset (.to_text)
|
||||
modification_strategy = prepare_file_modification_strategy table section headers on_existing_file match_columns
|
||||
is_dry_run = Context.Output.is_enabled.not
|
||||
|
||||
modification_strategy.if_not_error <| Panic.recover File_Error <| File_Error.handle_java_exceptions file <| handle_writer <|
|
||||
possible_backup_file = if on_existing_file == Existing_File_Behavior.Backup then file_as_java (find_bak_file file) else Nothing
|
||||
possible_dry_run_file = if is_dry_run.not then Nothing else
|
||||
preexisting_dry_run_file = DryRunFileManager.preExistingTemporaryFile file.absolute.normalize.path
|
||||
preexisting_dry_run_file.if_not_nothing <|
|
||||
Java_File.new preexisting_dry_run_file
|
||||
accompanying_files = [possible_backup_file, possible_dry_run_file].filter (!= Nothing) . filter (!= java_file)
|
||||
|
||||
ExcelConnectionPool.INSTANCE.lockForWriting java_file file_format accompanying_files write_helper-> Context.Output.with_enabled <|
|
||||
temp_file = if is_dry_run then file.create_dry_run_file copy_original=False else
|
||||
find_temp_file file
|
||||
|
||||
## We 'sync' the temp_file to reflect the original target file - if it exists we copy the contents, if the source
|
||||
doesn't exist we also ensure that the temp file is not polluted with data from previous (dry-run) writes.
|
||||
Panic.rethrow <|
|
||||
if file.exists then (file.copy_to temp_file replace_existing=True) else (temp_file.delete_if_exists)
|
||||
write_helper.writeWorkbook (file_as_java temp_file) modification_strategy
|
||||
result_file = if is_dry_run then temp_file else
|
||||
needs_backup = on_existing_file == Existing_File_Behavior.Backup
|
||||
if needs_backup && file.exists then
|
||||
backup_file = find_bak_file file
|
||||
## We can move instead of copying because right in next line we will overwrite the original file
|
||||
anyway. And move should be more efficient.
|
||||
Panic.rethrow <| file.move_to backup_file replace_existing=True
|
||||
|
||||
Panic.rethrow <| temp_file.move_to file replace_existing=True
|
||||
file
|
||||
result_file
|
||||
|
||||
## PRIVATE
|
||||
find_temp_file : File -> File
|
||||
find_temp_file base_file =
|
||||
parent = base_file.absolute.normalize.parent
|
||||
name = base_file.name
|
||||
go i =
|
||||
temp_file = parent / (name + ".temp." + System.nano_time.to_text)
|
||||
if temp_file.exists then go (i + 1) else temp_file
|
||||
go 0
|
||||
|
||||
## PRIVATE
|
||||
find_bak_file : File -> File
|
||||
find_bak_file base_file =
|
||||
parent = base_file.absolute.normalize.parent
|
||||
parent / (base_file.name + ".bak")
|
||||
|
||||
## PRIVATE
|
||||
file_as_java : File -> Java_File
|
||||
file_as_java file =
|
||||
Java_File.new file.absolute.normalize.path
|
||||
|
||||
## PRIVATE
|
||||
prepare_file_modification_strategy : Table -> Excel_Section -> Boolean|Infer -> Existing_File_Behavior -> Match_Columns -> (Workbook -> Nothing)
|
||||
prepare_file_modification_strategy table section headers on_existing_file match_columns =
|
||||
existing_data_mode = make_java_existing_data_mode on_existing_file match_columns
|
||||
java_headers = Excel_Reader.make_java_headers headers
|
||||
ExcelWriter.setEnsoToTextCallbackIfUnset (.to_text)
|
||||
result = handle_writer <| case section of
|
||||
case section of
|
||||
Excel_Section.Worksheet sheet skip_rows row_limit ->
|
||||
ExcelWriter.writeTableToSheet workbook sheet existing_data_mode skip_rows table.java_table row_limit java_headers
|
||||
Excel_Section.Cell_Range address skip_rows row_limit -> case address of
|
||||
Excel_Range.Value java_range -> ExcelWriter.writeTableToRange workbook java_range existing_data_mode skip_rows table.java_table row_limit java_headers
|
||||
_ : Text -> ExcelWriter.writeTableToRange workbook address existing_data_mode skip_rows table.java_table row_limit java_headers
|
||||
_ : Excel_Section -> Error.throw (Illegal_Argument.Error "Only a Worksheet or Cell_Range is allowed in write_file")
|
||||
|
||||
if result.is_error then result else
|
||||
write_stream stream = stream.with_java_stream java_stream->
|
||||
workbook.write java_stream
|
||||
case on_existing_file of
|
||||
Existing_File_Behavior.Append ->
|
||||
## Special handling - have successfully added the extra sheet/range so now overwrite file with backup.
|
||||
Existing_File_Behavior.Backup.write file write_stream
|
||||
_ -> on_existing_file.write file write_stream
|
||||
workbook_to_modify->
|
||||
ExcelWriter.writeTableToSheet workbook_to_modify sheet existing_data_mode skip_rows table.java_table row_limit java_headers
|
||||
Excel_Section.Cell_Range address skip_rows row_limit ->
|
||||
java_range = case address of
|
||||
Excel_Range.Value java_range -> java_range
|
||||
text : Text -> text
|
||||
workbook_to_modify->
|
||||
ExcelWriter.writeTableToRange workbook_to_modify java_range existing_data_mode skip_rows table.java_table row_limit java_headers
|
||||
|
||||
## PRIVATE
|
||||
Handle and map the Java errors when writing an Excel file
|
||||
@ -90,3 +147,13 @@ handle_writer ~writer =
|
||||
Column_Count_Mismatch.handle_java_exception <| handle_bad_location <|
|
||||
Illegal_Argument.handle_java_exception <| handle_range_exceeded <| handle_existing_data <|
|
||||
writer
|
||||
|
||||
## PRIVATE
|
||||
make_java_existing_data_mode : Existing_File_Behavior -> Match_Columns -> ExistingDataMode
|
||||
make_java_existing_data_mode on_existing_file match_columns = case on_existing_file of
|
||||
Existing_File_Behavior.Error -> ExistingDataMode.ERROR
|
||||
Existing_File_Behavior.Overwrite -> ExistingDataMode.REPLACE
|
||||
Existing_File_Behavior.Backup -> ExistingDataMode.REPLACE
|
||||
Existing_File_Behavior.Append -> case match_columns of
|
||||
Match_Columns.By_Name -> ExistingDataMode.APPEND_BY_NAME
|
||||
Match_Columns.By_Position -> ExistingDataMode.APPEND_BY_INDEX
|
||||
|
@ -39,4 +39,12 @@ public class DryRunFileManager {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static String preExistingTemporaryFile(String path) {
|
||||
if (files.containsValue(path)) {
|
||||
return path;
|
||||
}
|
||||
|
||||
return files.get(path);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.enso.table.error;
|
||||
|
||||
public class InvalidLocationException extends Exception {
|
||||
public class InvalidLocationException extends RuntimeException {
|
||||
public InvalidLocationException(String errorMessage) {
|
||||
super(errorMessage);
|
||||
}
|
||||
|
@ -0,0 +1,295 @@
|
||||
package org.enso.table.excel;
|
||||
|
||||
import org.apache.poi.UnsupportedFileFormatException;
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackageAccess;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.AccessMode;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.function.Function;
|
||||
|
||||
public class ExcelConnectionPool {
|
||||
public static final ExcelConnectionPool INSTANCE = new ExcelConnectionPool();
|
||||
|
||||
private ExcelConnectionPool() {
|
||||
}
|
||||
|
||||
public ReadOnlyExcelConnection openReadOnlyConnection(File file, ExcelFileFormat format) throws IOException {
|
||||
synchronized (this) {
|
||||
if (isCurrentlyWriting) {
|
||||
throw new IllegalStateException("Cannot open a read-only Excel connection while an Excel file is being " +
|
||||
"written to. This is a bug in the Table library.");
|
||||
}
|
||||
|
||||
if (!file.exists()) {
|
||||
throw new FileNotFoundException(file.toString());
|
||||
}
|
||||
|
||||
String key = getKeyForFile(file);
|
||||
ConnectionRecord existingRecord = records.get(key);
|
||||
if (existingRecord != null) {
|
||||
// Adapt the existing record
|
||||
if (existingRecord.format != format) {
|
||||
throw new ExcelFileFormatMismatchException("Requesting to open " + file + " as " + format + ", but it was " +
|
||||
"already opened as " + existingRecord.format + ".");
|
||||
}
|
||||
|
||||
existingRecord.refCount++;
|
||||
|
||||
return new ReadOnlyExcelConnection(this, key, existingRecord);
|
||||
} else {
|
||||
// Create the new record
|
||||
ConnectionRecord record = new ConnectionRecord();
|
||||
record.refCount = 1;
|
||||
record.file = file;
|
||||
record.format = format;
|
||||
record.workbook = openWorkbook(file, format, false);
|
||||
records.put(key, record);
|
||||
return new ReadOnlyExcelConnection(this, key, record);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class WriteHelper {
|
||||
private final ExcelFileFormat format;
|
||||
|
||||
public WriteHelper(ExcelFileFormat format) {
|
||||
this.format = format;
|
||||
}
|
||||
|
||||
public <R> R writeWorkbook(File file, Function<Workbook, R> writeAction) throws IOException {
|
||||
boolean preExistingFile = file.exists() && Files.size(file.toPath()) > 0;
|
||||
|
||||
try (Workbook workbook = preExistingFile ? ExcelConnectionPool.openWorkbook(file, format, true) :
|
||||
createEmptyWorkbook(format)) {
|
||||
R result = writeAction.apply(workbook);
|
||||
|
||||
if (preExistingFile) {
|
||||
// Save the file in place.
|
||||
switch (workbook) {
|
||||
case HSSFWorkbook wb -> {
|
||||
wb.write();
|
||||
}
|
||||
case XSSFWorkbook wb -> {
|
||||
try {
|
||||
wb.write(null);
|
||||
} catch (OpenXML4JRuntimeException e) {
|
||||
// Ignore: Workaround for bug https://bz.apache.org/bugzilla/show_bug.cgi?id=59252
|
||||
}
|
||||
}
|
||||
default -> throw new IllegalStateException("Unknown workbook type: " + workbook.getClass());
|
||||
}
|
||||
} else {
|
||||
try (OutputStream fileOut = Files.newOutputStream(file.toPath())) {
|
||||
try (BufferedOutputStream workbookOut = new BufferedOutputStream(fileOut)) {
|
||||
workbook.write(workbookOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes a write action, ensuring that any other Excel connections are closed during the action, so that it can
|
||||
* modify the file. Any existing connections are re-opened after the operation finishes (regardless of its success or
|
||||
* error).
|
||||
* <p>
|
||||
* The action gets a {@link WriteHelper} object that can be used to open the workbook for reading or writing. The
|
||||
* action must take care to close that workbook before returning.
|
||||
* <p>
|
||||
* Additional files that should be closed during the write action can be specified in the {@code accompanyingFiles}
|
||||
* argument. These may be related temporary files that are written during the write operation and also need to get
|
||||
* 'unlocked' for the time of write.
|
||||
*/
|
||||
public <R> R lockForWriting(File file, ExcelFileFormat format, File[] accompanyingFiles,
|
||||
Function<WriteHelper, R> action) throws IOException {
|
||||
synchronized (this) {
|
||||
if (isCurrentlyWriting) {
|
||||
throw new IllegalStateException("Another Excel write is in progress on the same thread. This is a bug in the " +
|
||||
"Table library.");
|
||||
}
|
||||
|
||||
isCurrentlyWriting = true;
|
||||
try {
|
||||
String key = getKeyForFile(file);
|
||||
ArrayList<ConnectionRecord> recordsToReopen = new ArrayList<>(1 + accompanyingFiles.length);
|
||||
|
||||
try {
|
||||
// Close the existing connection, if any - to avoid the write operation failing due to the file being locked.
|
||||
ConnectionRecord existingRecord = records.get(key);
|
||||
if (existingRecord != null) {
|
||||
existingRecord.close();
|
||||
recordsToReopen.add(existingRecord);
|
||||
}
|
||||
|
||||
verifyIsWritable(file);
|
||||
|
||||
for (File accompanyingFile : accompanyingFiles) {
|
||||
String accompanyingKey = getKeyForFile(accompanyingFile);
|
||||
ConnectionRecord accompanyingRecord = records.get(accompanyingKey);
|
||||
if (accompanyingRecord != null) {
|
||||
accompanyingRecord.close();
|
||||
recordsToReopen.add(accompanyingRecord);
|
||||
}
|
||||
|
||||
verifyIsWritable(accompanyingFile);
|
||||
}
|
||||
|
||||
WriteHelper helper = new WriteHelper(format);
|
||||
return action.apply(helper);
|
||||
} finally {
|
||||
// Reopen the closed connections
|
||||
for (ConnectionRecord record : recordsToReopen) {
|
||||
record.reopen(false);
|
||||
}
|
||||
}
|
||||
|
||||
} finally {
|
||||
isCurrentlyWriting = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyIsWritable(File file) throws IOException {
|
||||
Path path = file.toPath();
|
||||
|
||||
if (!Files.exists(path)) {
|
||||
// If the file does not exist, we assume that we can create it.
|
||||
return;
|
||||
}
|
||||
|
||||
path.getFileSystem().provider().checkAccess(path, AccessMode.WRITE, AccessMode.READ);
|
||||
}
|
||||
|
||||
private String getKeyForFile(File file) throws IOException {
|
||||
return file.getCanonicalPath();
|
||||
}
|
||||
|
||||
void release(ReadOnlyExcelConnection excelConnection) throws IOException {
|
||||
synchronized (this) {
|
||||
excelConnection.record.refCount--;
|
||||
if (excelConnection.record.refCount <= 0) {
|
||||
excelConnection.record.close();
|
||||
records.remove(excelConnection.key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final HashMap<String, ConnectionRecord> records = new HashMap<>();
|
||||
private boolean isCurrentlyWriting = false;
|
||||
|
||||
static class ConnectionRecord {
|
||||
private int refCount;
|
||||
private File file;
|
||||
private ExcelFileFormat format;
|
||||
private Workbook workbook;
|
||||
private IOException initializationException = null;
|
||||
|
||||
<T> T withWorkbook(Function<Workbook, T> action) throws IOException {
|
||||
synchronized (this) {
|
||||
return action.apply(accessCurrentWorkbook());
|
||||
}
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
synchronized (this) {
|
||||
if (workbook != null) {
|
||||
workbook.close();
|
||||
}
|
||||
|
||||
workbook = null;
|
||||
}
|
||||
}
|
||||
|
||||
void reopen(boolean throwOnFailure) throws IOException {
|
||||
synchronized (this) {
|
||||
if (workbook != null) {
|
||||
throw new IllegalStateException("The workbook is already open.");
|
||||
}
|
||||
|
||||
try {
|
||||
workbook = openWorkbook(file, format, false);
|
||||
} catch (IOException e) {
|
||||
initializationException = e;
|
||||
if (throwOnFailure) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Workbook accessCurrentWorkbook() throws IOException {
|
||||
synchronized (this) {
|
||||
if (workbook == null) {
|
||||
if (initializationException != null) {
|
||||
throw initializationException;
|
||||
} else {
|
||||
throw new IllegalStateException("The workbook is used after being closed.");
|
||||
}
|
||||
}
|
||||
|
||||
return workbook;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static Workbook openWorkbook(File file, ExcelFileFormat format, boolean writeAccess) throws IOException {
|
||||
return switch (format) {
|
||||
case XLS -> {
|
||||
boolean readOnly = !writeAccess;
|
||||
POIFSFileSystem fs = new POIFSFileSystem(file, readOnly);
|
||||
try {
|
||||
// If the initialization succeeds, the POIFSFileSystem will be closed by the HSSFWorkbook::close.
|
||||
yield new HSSFWorkbook(fs);
|
||||
} catch (Exception e) {
|
||||
fs.close();
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
case XLSX -> {
|
||||
try {
|
||||
PackageAccess access = writeAccess ? PackageAccess.READ_WRITE : PackageAccess.READ;
|
||||
OPCPackage pkg = OPCPackage.open(file, access);
|
||||
try {
|
||||
yield new XSSFWorkbook(pkg);
|
||||
} catch (IOException e) {
|
||||
pkg.close();
|
||||
throw e;
|
||||
}
|
||||
} catch (InvalidFormatException e) {
|
||||
throw new IOException("Invalid format encountered when opening the file " + file + " as " + format + ".", e);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static Workbook createEmptyWorkbook(ExcelFileFormat format) {
|
||||
return switch (format) {
|
||||
case XLS -> new HSSFWorkbook();
|
||||
case XLSX -> new XSSFWorkbook();
|
||||
};
|
||||
}
|
||||
|
||||
public static class ExcelFileFormatMismatchException extends UnsupportedFileFormatException {
|
||||
public ExcelFileFormatMismatchException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
package org.enso.table.excel;
|
||||
|
||||
public enum ExcelFileFormat {
|
||||
XLS,
|
||||
XLSX
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
package org.enso.table.excel;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.function.Function;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
|
||||
public class ReadOnlyExcelConnection implements AutoCloseable {
|
||||
|
||||
private final ExcelConnectionPool myPool;
|
||||
final String key;
|
||||
ExcelConnectionPool.ConnectionRecord record;
|
||||
|
||||
ReadOnlyExcelConnection(
|
||||
ExcelConnectionPool myPool, String key, ExcelConnectionPool.ConnectionRecord record) {
|
||||
this.myPool = myPool;
|
||||
this.key = key;
|
||||
this.record = record;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() throws IOException {
|
||||
if (record == null) {
|
||||
// already closed
|
||||
return;
|
||||
}
|
||||
|
||||
myPool.release(this);
|
||||
record = null;
|
||||
}
|
||||
|
||||
public synchronized <T> T withWorkbook(Function<Workbook, T> f) throws IOException {
|
||||
if (record == null) {
|
||||
throw new IllegalStateException("ReadOnlyExcelConnection is being used after it was closed.");
|
||||
}
|
||||
|
||||
return record.withWorkbook(f);
|
||||
}
|
||||
}
|
@ -1,16 +1,16 @@
|
||||
package org.enso.table.read;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.ss.usermodel.Name;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.util.CellReference;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.enso.table.data.column.builder.Builder;
|
||||
import org.enso.table.data.column.builder.InferredBuilder;
|
||||
import org.enso.table.data.column.storage.ObjectStorage;
|
||||
@ -18,38 +18,29 @@ import org.enso.table.data.table.Column;
|
||||
import org.enso.table.data.table.Table;
|
||||
import org.enso.table.error.EmptySheetException;
|
||||
import org.enso.table.error.InvalidLocationException;
|
||||
import org.enso.table.excel.ExcelConnectionPool;
|
||||
import org.enso.table.excel.ExcelFileFormat;
|
||||
import org.enso.table.excel.ExcelHeaders;
|
||||
import org.enso.table.excel.ExcelRange;
|
||||
import org.enso.table.excel.ExcelRow;
|
||||
import org.enso.table.excel.ExcelSheet;
|
||||
import org.enso.table.excel.ReadOnlyExcelConnection;
|
||||
import org.enso.table.problems.ProblemAggregator;
|
||||
import org.graalvm.polyglot.Context;
|
||||
|
||||
/** A table reader for MS Excel files. */
|
||||
public class ExcelReader {
|
||||
/**
|
||||
* Loads a workbook (either XLSX or XLS format from the specified input stream.
|
||||
*
|
||||
* @param stream an {@link InputStream} allowing to read the XLS(X) file contents.
|
||||
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
|
||||
* @return a {@link Workbook} containing the specified data.
|
||||
* @throws IOException - when the input stream cannot be read.
|
||||
*/
|
||||
public static Workbook readWorkbook(InputStream stream, boolean xls_format) throws IOException {
|
||||
return getWorkbook(stream, xls_format);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a list of sheet names for the specified XLSX/XLS file into an array.
|
||||
*
|
||||
* @param stream an {@link InputStream} allowing to read the XLS(X) file contents.
|
||||
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
|
||||
* @param file the {@link File} to load
|
||||
* @param format specifies the file format
|
||||
* @return a String[] containing the sheet names.
|
||||
* @throws IOException when the input stream cannot be read.
|
||||
* @throws IOException when the action fails
|
||||
*/
|
||||
public static String[] readSheetNames(InputStream stream, boolean xls_format) throws IOException {
|
||||
Workbook workbook = getWorkbook(stream, xls_format);
|
||||
return readSheetNames(workbook);
|
||||
public static String[] readSheetNames(File file, ExcelFileFormat format)
|
||||
throws IOException, InvalidFormatException {
|
||||
return withWorkbook(file, format, ExcelReader::readSheetNames);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -72,14 +63,14 @@ public class ExcelReader {
|
||||
/**
|
||||
* Reads a list of range names for the specified XLSX/XLS file into an array.
|
||||
*
|
||||
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
|
||||
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
|
||||
* @param file the {@link File} to load
|
||||
* @param format specifies the file format
|
||||
* @return a String[] containing the range names.
|
||||
* @throws IOException when the input stream cannot be read.
|
||||
* @throws IOException when the action fails
|
||||
*/
|
||||
public static String[] readRangeNames(InputStream stream, boolean xls_format) throws IOException {
|
||||
Workbook workbook = getWorkbook(stream, xls_format);
|
||||
return readRangeNames(workbook);
|
||||
public static String[] readRangeNames(File file, ExcelFileFormat format)
|
||||
throws IOException, InvalidFormatException {
|
||||
return withWorkbook(file, format, ExcelReader::readRangeNames);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -96,26 +87,28 @@ public class ExcelReader {
|
||||
/**
|
||||
* Reads a sheet by name for the specified XLSX/XLS file into a table.
|
||||
*
|
||||
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
|
||||
* @param file the {@link File} to load
|
||||
* @param sheetName the name of the sheet to read.
|
||||
* @param skip_rows skip rows from the top the sheet.
|
||||
* @param row_limit maximum number of rows to read.
|
||||
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
|
||||
* @param format specifies the file format
|
||||
* @return a {@link Table} containing the specified data.
|
||||
* @throws IOException when the input stream cannot be read.
|
||||
* @throws InvalidLocationException when the sheet name is not found.
|
||||
*/
|
||||
public static Table readSheetByName(
|
||||
InputStream stream,
|
||||
File file,
|
||||
String sheetName,
|
||||
ExcelHeaders.HeaderBehavior headers,
|
||||
int skip_rows,
|
||||
Integer row_limit,
|
||||
boolean xls_format,
|
||||
ExcelFileFormat format,
|
||||
ProblemAggregator problemAggregator)
|
||||
throws IOException, InvalidLocationException {
|
||||
Workbook workbook = getWorkbook(stream, xls_format);
|
||||
|
||||
return withWorkbook(
|
||||
file,
|
||||
format,
|
||||
workbook -> {
|
||||
int sheetIndex = workbook.getSheetIndex(sheetName);
|
||||
if (sheetIndex == -1) {
|
||||
throw new InvalidLocationException("Unknown sheet '" + sheetName + "'.");
|
||||
@ -129,31 +122,34 @@ public class ExcelReader {
|
||||
skip_rows,
|
||||
row_limit == null ? Integer.MAX_VALUE : row_limit,
|
||||
problemAggregator);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a sheet by index for the specified XLSX/XLS file into a table.
|
||||
*
|
||||
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
|
||||
* @param file the {@link File} to load
|
||||
* @param index the 1-based index to the sheet.
|
||||
* @param skip_rows skip rows from the top the sheet.
|
||||
* @param row_limit maximum number of rows to read.
|
||||
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
|
||||
* @param format specifies the file format
|
||||
* @return a {@link Table} containing the specified data.
|
||||
* @throws IOException when the input stream cannot be read.
|
||||
* @throws InvalidLocationException when the sheet index is not valid.
|
||||
*/
|
||||
public static Table readSheetByIndex(
|
||||
InputStream stream,
|
||||
File file,
|
||||
int index,
|
||||
ExcelHeaders.HeaderBehavior headers,
|
||||
int skip_rows,
|
||||
Integer row_limit,
|
||||
boolean xls_format,
|
||||
ExcelFileFormat format,
|
||||
ProblemAggregator problemAggregator)
|
||||
throws IOException, InvalidLocationException {
|
||||
Workbook workbook = getWorkbook(stream, xls_format);
|
||||
|
||||
return withWorkbook(
|
||||
file,
|
||||
format,
|
||||
workbook -> {
|
||||
int sheetCount = workbook.getNumberOfSheets();
|
||||
if (index < 1 || index > sheetCount) {
|
||||
throw new InvalidLocationException(
|
||||
@ -168,34 +164,38 @@ public class ExcelReader {
|
||||
skip_rows,
|
||||
row_limit == null ? Integer.MAX_VALUE : row_limit,
|
||||
problemAggregator);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a range by sheet name, named range or address for the specified XLSX/XLS file into a
|
||||
* table.
|
||||
*
|
||||
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
|
||||
* @param file the {@link File} to load
|
||||
* @param rangeNameOrAddress sheet name, range name or address to read.
|
||||
* @param headers specifies whether the first row should be used as headers.
|
||||
* @param skip_rows skip rows from the top of the range.
|
||||
* @param row_limit maximum number of rows to read.
|
||||
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
|
||||
* @param format specifies the file format
|
||||
* @return a {@link Table} containing the specified data.
|
||||
* @throws IOException when the input stream cannot be read.
|
||||
* @throws InvalidLocationException when the range name or address is not found.
|
||||
*/
|
||||
public static Table readRangeByName(
|
||||
InputStream stream,
|
||||
File file,
|
||||
String rangeNameOrAddress,
|
||||
ExcelHeaders.HeaderBehavior headers,
|
||||
int skip_rows,
|
||||
Integer row_limit,
|
||||
boolean xls_format,
|
||||
ExcelFileFormat format,
|
||||
ProblemAggregator problemAggregator)
|
||||
throws IOException, InvalidLocationException {
|
||||
Workbook workbook = getWorkbook(stream, xls_format);
|
||||
return readRangeByName(
|
||||
workbook, rangeNameOrAddress, headers, skip_rows, row_limit, problemAggregator);
|
||||
return withWorkbook(
|
||||
file,
|
||||
format,
|
||||
workbook ->
|
||||
readRangeByName(
|
||||
workbook, rangeNameOrAddress, headers, skip_rows, row_limit, problemAggregator));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -245,42 +245,36 @@ public class ExcelReader {
|
||||
/**
|
||||
* Reads a range for the specified XLSX/XLS file into a table.
|
||||
*
|
||||
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
|
||||
* @param file the {@link File} to load
|
||||
* @param excelRange the range to read.
|
||||
* @param skip_rows skip rows from the top of the range.
|
||||
* @param row_limit maximum number of rows to read.
|
||||
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
|
||||
* @param format specifies the file format
|
||||
* @return a {@link Table} containing the specified data.
|
||||
* @throws IOException when the input stream cannot be read.
|
||||
*/
|
||||
public static Table readRange(
|
||||
InputStream stream,
|
||||
File file,
|
||||
ExcelRange excelRange,
|
||||
ExcelHeaders.HeaderBehavior headers,
|
||||
int skip_rows,
|
||||
Integer row_limit,
|
||||
boolean xls_format,
|
||||
ExcelFileFormat format,
|
||||
ProblemAggregator problemAggregator)
|
||||
throws IOException, InvalidLocationException {
|
||||
return readRange(
|
||||
getWorkbook(stream, xls_format),
|
||||
excelRange,
|
||||
headers,
|
||||
skip_rows,
|
||||
row_limit,
|
||||
problemAggregator);
|
||||
return withWorkbook(
|
||||
file,
|
||||
format,
|
||||
workbook ->
|
||||
readRange(workbook, excelRange, headers, skip_rows, row_limit, problemAggregator));
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a workbook into memory from an InputStream.
|
||||
*
|
||||
* @param stream an {@link InputStream} allowing to read the XLSX file contents.
|
||||
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
|
||||
* @return a {@link Workbook} containing the specified data.
|
||||
* @throws IOException when the input stream cannot be read or an incorrect format occurs.
|
||||
*/
|
||||
public static Workbook getWorkbook(InputStream stream, boolean xls_format) throws IOException {
|
||||
return xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream);
|
||||
private static <T> T withWorkbook(File file, ExcelFileFormat format, Function<Workbook, T> action)
|
||||
throws IOException {
|
||||
try (ReadOnlyExcelConnection connection =
|
||||
ExcelConnectionPool.INSTANCE.openReadOnlyConnection(file, format)) {
|
||||
return connection.withWorkbook(action);
|
||||
}
|
||||
}
|
||||
|
||||
private static Table readRange(
|
||||
|
@ -145,15 +145,6 @@ public class ExcelWriter {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an empty workbook.
|
||||
* @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format).
|
||||
* @return a {@link Workbook} containing the specified data.
|
||||
*/
|
||||
public static Workbook createWorkbook(boolean xls_format) {
|
||||
return xls_format ? new HSSFWorkbook() : new XSSFWorkbook();
|
||||
}
|
||||
|
||||
private static void appendRangeWithTable(Workbook workbook, ExcelRange range, ExistingDataMode existingDataMode, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers, ExcelSheet sheet, ExcelRange expanded)
|
||||
throws RangeExceededException, ExistingDataException, ColumnNameMismatchException, ColumnCountMismatchException {
|
||||
Table mappedTable = switch (existingDataMode) {
|
||||
|
@ -0,0 +1,8 @@
|
||||
package org.enso.table.write;
|
||||
|
||||
public enum ExistingFileBehavior {
|
||||
OVERWRITE,
|
||||
BACKUP,
|
||||
APPEND,
|
||||
ERROR
|
||||
}
|
@ -954,7 +954,6 @@ spec setup =
|
||||
op c 1 . should_fail_with Invalid_Value_Type
|
||||
op a True . should_fail_with Invalid_Value_Type
|
||||
|
||||
Test.group prefix+"Column Operations - Text Replace" <|
|
||||
do_replace column term new_text case_sensitivity=Case_Sensitivity.Default only_first=False expected =
|
||||
case setup.is_database of
|
||||
True ->
|
||||
@ -1042,7 +1041,7 @@ spec setup =
|
||||
col = table_builder [["x", ['hello Hello', Nothing]]] . filter "x" (Filter_Condition.Is_Nothing) . at "x"
|
||||
do_replace col 'hello' 'bye' expected=[Nothing]
|
||||
|
||||
if setup.is_database then
|
||||
if setup.is_database then Test.group prefix+"replace: DB specific edge-cases" <|
|
||||
col = table_builder [["A", ["Alpha", "Bravo", "Charlie", "Delta", "Echo", "Foxtrot"]]] . at 'A'
|
||||
Test.specify "should not allow Case_Sensitivity.Insensitive with a non-default locale" <|
|
||||
locale = Locale.new "en" "GB" "UTF-8"
|
||||
|
@ -2,7 +2,10 @@ from Standard.Base import all
|
||||
import Standard.Base.Errors.Common.Dry_Run_Operation
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
import Standard.Base.Runtime.Context
|
||||
import Standard.Base.Runtime.Managed_Resource.Managed_Resource
|
||||
import Standard.Base.Runtime.Ref.Ref
|
||||
|
||||
from Standard.Table import Table, Match_Columns, Excel, Excel_Range, Data_Formatter, Sheet_Names, Range_Names, Worksheet, Cell_Range, Delimited, Excel_Workbook
|
||||
|
||||
@ -15,6 +18,8 @@ import Standard.Examples
|
||||
|
||||
import project.Util
|
||||
|
||||
polyglot java import org.enso.table_test_helpers.RandomHelpers
|
||||
|
||||
spec_fmt header file read_method sheet_count=5 =
|
||||
Test.group header <|
|
||||
Test.specify "should read a workbook in" <|
|
||||
@ -70,58 +75,67 @@ spec_fmt header file read_method sheet_count=5 =
|
||||
|
||||
spec_write suffix test_sheet_name =
|
||||
Test.group ("Write " + suffix + " Files") <|
|
||||
out = enso_project.data / ('out.' + suffix)
|
||||
out_bak = enso_project.data / ('out.' + suffix + '.bak')
|
||||
table = enso_project.data/'varied_column.csv' . read
|
||||
clothes = enso_project.data/'clothes.csv' . read
|
||||
sub_clothes = clothes.select_columns [0, 1]
|
||||
|
||||
counter = Ref.new 0
|
||||
create_out =
|
||||
i = counter.get + 1
|
||||
counter.put i
|
||||
f = enso_project.data / "transient" / ("out" + i.to_text + "." + suffix)
|
||||
Panic.rethrow f.delete_if_exists
|
||||
f
|
||||
|
||||
Test.specify 'should write a table to non-existent file as a new sheet with headers; and return the file object on success' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
table.write out on_problems=Report_Error . should_succeed . should_equal out
|
||||
written = out.read
|
||||
written.sheet_count . should_equal 1
|
||||
written.sheet_names . should_equal ['EnsoSheet']
|
||||
written.read 'EnsoSheet' . should_equal table
|
||||
out.delete_if_exists
|
||||
written.close
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should write a table to non-existent file in append mode as a new sheet with headers' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
table.write out on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
written = out.read
|
||||
written.sheet_count . should_equal 1
|
||||
written.sheet_names . should_equal ['EnsoSheet']
|
||||
written.read 'EnsoSheet' . should_equal table
|
||||
out.delete_if_exists
|
||||
written.close
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should write a table to existing file overriding EnsoSheet' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
table.write out on_problems=Report_Error . should_succeed
|
||||
table.write out on_problems=Report_Error . should_succeed
|
||||
written = out.read
|
||||
written.sheet_count . should_equal 1
|
||||
written.sheet_names . should_equal ['EnsoSheet']
|
||||
written.read 'EnsoSheet' . should_equal table
|
||||
out.delete_if_exists
|
||||
written_workbook = out.read
|
||||
written_workbook.sheet_count . should_equal 1
|
||||
written_workbook.sheet_names . should_equal ['EnsoSheet']
|
||||
written_workbook.read 'EnsoSheet' . should_equal table
|
||||
written_workbook.close
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should write a table to existing file in overwrite mode as a new sheet with headers' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
table.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "Another"))
|
||||
written.should_equal table
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should write a table to existing file in overwrite mode as a new sheet without headers' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
table.write out (Excel (Worksheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "NoHeaders"))
|
||||
written.should_equal (table.rename_columns ['A', 'B', 'C', 'D', 'E', 'F'])
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should create new sheets at the start if index is 0' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
table.write out (Excel (Worksheet 0)) on_problems=Report_Error . should_succeed
|
||||
clothes.write out (Excel (Worksheet 0)) on_problems=Report_Error . should_succeed
|
||||
read_1 = out.read (Excel (Worksheet "Sheet1"))
|
||||
@ -130,214 +144,318 @@ spec_write suffix test_sheet_name =
|
||||
read_2 . should_equal clothes
|
||||
read_3 = out.read (Excel (Sheet_Names))
|
||||
read_3 . should_equal ["Sheet2", "Sheet1"]
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should write a table to specific single cell location of an existing sheet' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
table.write out (Excel (Cell_Range "Another!G1")) on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Cell_Range "Another!G1"))
|
||||
written.should_equal table
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should clear out an existing fixed range and replace' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
sub_clothes.write out (Excel (Cell_Range "Another!A1:D20")) on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Cell_Range "Another!A1"))
|
||||
written.should_equal sub_clothes
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should clear out an existing range and replace' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Cell_Range "Another!A1"))
|
||||
written.should_equal sub_clothes
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should result in Invalid_Location error if trying to write in a bad location' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
sub_clothes.write out (Excel (Cell_Range "DoesNotExist!A1")) . should_fail_with Invalid_Location
|
||||
sub_clothes.write out (Excel (Cell_Range "DoesNotExist!A1:B2")) . should_fail_with Invalid_Location
|
||||
sub_clothes.write out (Excel (Cell_Range "SillyRangeName")) . should_fail_with Invalid_Location
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should result in Range_Exceeded error if trying to write in too small a range' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
sub_clothes.write out (Excel (Cell_Range "Another!A1:B2")) . should_fail_with Range_Exceeded
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should result in Existing_Data error if in Error mode and trying to replace' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
lmd = out.last_modified_time
|
||||
sub_clothes.write out (Excel (Worksheet 1)) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
|
||||
sub_clothes.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
|
||||
sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
|
||||
sub_clothes.write out (Excel (Cell_Range "Sheet1!A9")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data
|
||||
out.last_modified_time.should_equal lmd
|
||||
out.delete_if_exists
|
||||
r1 = sub_clothes.write out (Excel (Worksheet 1)) on_existing_file=Existing_File_Behavior.Error
|
||||
r1.should_fail_with File_Error
|
||||
r1.catch.should_be_a File_Error.Already_Exists
|
||||
|
||||
Test.specify 'should not allow adding a new sheet if in Error mode and not clashing' <|
|
||||
out.delete_if_exists
|
||||
sub_clothes.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Error
|
||||
sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Error
|
||||
sub_clothes.write out (Excel (Cell_Range "Sheet1!A9")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Error
|
||||
|
||||
Test.with_clue "the original file should remain unmodified: " <|
|
||||
out.last_modified_time.should_equal lmd
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should not allow adding a new sheet if in Error mode, even if sheet is not clashing' <|
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
lmd = out.last_modified_time
|
||||
result = sub_clothes.write out (Excel (Worksheet "Testing")) on_existing_file=Existing_File_Behavior.Error
|
||||
result.should_fail_with File_Error
|
||||
result.catch.should_be_a File_Error.Already_Exists
|
||||
Test.with_clue "the original file should remain unmodified: " <|
|
||||
out.last_modified_time.should_equal lmd
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should write a table to non-existent file as a new sheet without headers' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
table.write out (Excel (Worksheet "Sheet1") headers=False) on_problems=Report_Error . should_succeed
|
||||
written = out.read
|
||||
written.sheet_count . should_equal 1
|
||||
written.sheet_names . should_equal ['Sheet1']
|
||||
written.read 'Sheet1' . should_equal (table.rename_columns ['A', 'B', 'C', 'D', 'E', 'F'])
|
||||
out.delete_if_exists
|
||||
|
||||
# We need to close the workbook to be able to delete it.
|
||||
written.close
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a sheet by name' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a sheet by position' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a sheet by name out of order' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a single cell by name' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a single cell by position' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a single cell by name out of order' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a range by name' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['AA', ['d', 'e']], ['BB', [4, 5]], ['CC', [True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['a', 'b', 'c', 'd', 'e']], ['BB', [1, 2, 3, 4, 5]], ['CC', [True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a range by position' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a range by name not in top left' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['CC',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Cell_Range "Random!K9")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Cell_Range "Random!K9")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a range by name after deduplication of names' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['AA 1',[True, False]], ['BB 1', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['AA 1',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Cell_Range "Random!S3")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Cell_Range "Random!S3")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a range by position not in top left' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['CC',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Cell_Range "Random!K9")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Cell_Range "Random!K9")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to append to a range by name out of order' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]]
|
||||
expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]]
|
||||
extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed
|
||||
written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2]
|
||||
written.should_equal expected
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to write to a new dry run file' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
temp = Context.Output.with_disabled <|
|
||||
result = table.write out on_problems=Report_Error . should_succeed
|
||||
Problems.expect_only_warning Dry_Run_Operation result
|
||||
result.exists.should_be_true
|
||||
|
||||
result.absolute.normalize.path . should_not_equal out.absolute.normalize.path
|
||||
|
||||
written = result.read
|
||||
written.sheet_count . should_equal 1
|
||||
written.sheet_names . should_equal ['EnsoSheet']
|
||||
written.read 'EnsoSheet' . should_equal table
|
||||
written.close
|
||||
result
|
||||
temp.delete_if_exists
|
||||
|
||||
Test.specify "should be able to write to a dry-run file, even if the dry-run workbook is open" <|
|
||||
out = create_out
|
||||
out.exists.should_be_false
|
||||
temp = Context.Output.with_disabled <|
|
||||
result = table.write out on_problems=Report_Error . should_succeed
|
||||
Problems.expect_only_warning Dry_Run_Operation result
|
||||
result.exists.should_be_true
|
||||
result
|
||||
temp.absolute.normalize.path . should_not_equal out.absolute.normalize.path
|
||||
out.exists.should_be_false
|
||||
|
||||
opened_temp = temp.read
|
||||
opened_temp.sheet_names . should_equal ['EnsoSheet']
|
||||
|
||||
temp2 = Context.Output.with_disabled <|
|
||||
result = table.write out (Excel (Worksheet "Another")) on_problems=Report_Error . should_succeed
|
||||
Problems.expect_only_warning Dry_Run_Operation result
|
||||
result.exists.should_be_true
|
||||
result
|
||||
|
||||
# The result should be written to the same dry-run file on second attempt.
|
||||
temp2.absolute.normalize.path . should_equal temp.absolute.normalize.path
|
||||
|
||||
## The write operation replaces the dry run file, basing off of the _original_ out file
|
||||
(which was empty in this example), so we still only get one sheet.
|
||||
Different example is tested in the test below, if the subsequent file happens to the returned
|
||||
dry-run object - then both updates are visible - see below.
|
||||
opened_temp.sheet_names . should_equal ['Another']
|
||||
|
||||
opened_temp.close
|
||||
temp.delete_if_exists
|
||||
|
||||
Test.specify "should be able to write to a dry-run file multiple times if the dry-run file object is threaded through" <|
|
||||
out = create_out
|
||||
temp1 = Context.Output.with_disabled <|
|
||||
result = table.write out on_problems=Report_Error . should_succeed
|
||||
Problems.expect_only_warning Dry_Run_Operation result
|
||||
result.exists.should_be_true
|
||||
result
|
||||
temp1.absolute.normalize.path . should_not_equal out.absolute.normalize.path
|
||||
|
||||
opened_temp = temp1.read
|
||||
opened_temp.sheet_names . should_equal ['EnsoSheet']
|
||||
|
||||
temp2 = Context.Output.with_disabled <|
|
||||
result = table.write temp1 (Excel (Worksheet "Another")) on_problems=Report_Error . should_succeed
|
||||
Problems.expect_only_warning Dry_Run_Operation result
|
||||
result.exists.should_be_true
|
||||
result
|
||||
|
||||
# The result should be written to the same file though.
|
||||
temp2.absolute.normalize.path . should_equal temp1.absolute.normalize.path
|
||||
|
||||
# The write operation replaces the dry run file, basing off of the dry-run file itself - so both changes are visible.
|
||||
opened_temp.sheet_names . should_equal ['EnsoSheet', 'Another']
|
||||
|
||||
opened_temp.close
|
||||
temp1.delete_if_exists
|
||||
|
||||
Test.specify "should be able to create a backup, even if it is currently open" <|
|
||||
out = create_out
|
||||
bak = out.parent / (out.name+".bak")
|
||||
|
||||
t1 = Table.new [["X", [1]]]
|
||||
t1.write out on_existing_file=Existing_File_Behavior.Backup on_problems=Report_Error . should_succeed
|
||||
bak.exists.should_be_false
|
||||
|
||||
t2 = Table.new [["X", [2]]]
|
||||
t2.write out on_existing_file=Existing_File_Behavior.Backup on_problems=Report_Error . should_succeed
|
||||
bak.exists.should_be_true
|
||||
|
||||
opened_out = out.read
|
||||
# We need to specify explicit format for the backup, because the extension is changed:
|
||||
opened_backup = bak.read (Excel xls_format=(suffix=="xls"))
|
||||
|
||||
opened_out.read 'EnsoSheet' . should_equal t2
|
||||
opened_backup.read 'EnsoSheet' . should_equal t1
|
||||
|
||||
t3 = Table.new [["X", [3]]]
|
||||
t3.write out on_existing_file=Existing_File_Behavior.Backup on_problems=Report_Error . should_succeed
|
||||
|
||||
opened_out.read 'EnsoSheet' . should_equal t3
|
||||
# The backup should actually have been updated
|
||||
opened_backup.read 'EnsoSheet' . should_equal t2
|
||||
|
||||
opened_out.close
|
||||
opened_backup.close
|
||||
|
||||
out.delete_if_exists . should_succeed
|
||||
bak.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should be able to write to an existing empty file' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
[].write_bytes out
|
||||
|
||||
out_bak.delete_if_exists
|
||||
out_bak = out.parent / (out.name+".bak")
|
||||
|
||||
table.write out on_problems=Report_Error . should_succeed . should_equal out
|
||||
written = out.read
|
||||
@ -345,73 +463,75 @@ spec_write suffix test_sheet_name =
|
||||
written.sheet_names . should_equal ['EnsoSheet']
|
||||
written.read 'EnsoSheet' . should_equal table
|
||||
|
||||
Test.with_clue "should have created a backup file: " <|
|
||||
out_bak.exists.should_be_true
|
||||
out_bak.size.should_equal 0
|
||||
|
||||
out.delete_if_exists
|
||||
out_bak.delete_if_exists
|
||||
written.close
|
||||
out.delete_if_exists . should_succeed
|
||||
out_bak.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should fail to append to a sheet by name if missing columns' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']]]
|
||||
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should fail to append to a sheet by name if extra columns' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
lmd = out.last_modified_time
|
||||
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]]
|
||||
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch
|
||||
out.last_modified_time.should_equal lmd
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should fail to append to a sheet by name if no headers' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
lmd = out.last_modified_time
|
||||
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]]
|
||||
extra_another.write out (Excel (Worksheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument
|
||||
extra_another.write out (Excel (Worksheet "Another") False) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument
|
||||
extra_another.write out (Excel (Worksheet "Another") headers=False) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument
|
||||
out.last_modified_time.should_equal lmd
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should fail to append to a sheet by position if too few columns' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
lmd = out.last_modified_time
|
||||
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']]]
|
||||
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch
|
||||
out.last_modified_time.should_equal lmd
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should fail to append to a sheet by position if too many columns' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
lmd = out.last_modified_time
|
||||
extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]]
|
||||
extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch
|
||||
out.last_modified_time.should_equal lmd
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should fail to append to a range by name if not large enough' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
lmd = out.last_modified_time
|
||||
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
|
||||
extra_another.write out (Excel (Cell_Range "Another!A1:D5")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Range_Exceeded
|
||||
out.last_modified_time.should_equal lmd
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify 'should fail to append to a range by name if it hits another table' <|
|
||||
out.delete_if_exists
|
||||
out = create_out
|
||||
(enso_project.data / test_sheet_name) . copy_to out
|
||||
lmd = out.last_modified_time
|
||||
extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]]
|
||||
extra_another.write out (Excel (Cell_Range "Random!B3")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Existing_Data
|
||||
out.last_modified_time.should_equal lmd
|
||||
out.delete_if_exists
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify "should fail if the target file is read-only" <|
|
||||
f = enso_project.data / "transient" / "permission."+suffix
|
||||
@ -419,19 +539,56 @@ spec_write suffix test_sheet_name =
|
||||
f.delete_if_exists
|
||||
|
||||
initial_data = Table.new [["Y", [10, 20, 30]]]
|
||||
initial_data.write f
|
||||
Util.set_writable f False
|
||||
initial_data.write f . should_succeed
|
||||
Util.set_writable f False . should_succeed
|
||||
|
||||
t1 = Table.new [["X", [1, 2, 3]]]
|
||||
[Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite, Existing_File_Behavior.Append].each behavior->
|
||||
[Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite, Existing_File_Behavior.Append].each behavior-> Test.with_clue behavior.to_text+": " <|
|
||||
f.exists . should_be_true
|
||||
|
||||
r1 = t1.write f (Excel (Worksheet "Another")) on_existing_file=behavior
|
||||
Test.with_clue "("+r1.catch.to_display_text+") " <|
|
||||
r1.should_fail_with File_Error
|
||||
r1.catch.should_be_a File_Error.Access_Denied
|
||||
f.read . read "EnsoSheet" . should_equal initial_data
|
||||
|
||||
read_table = Managed_Resource.bracket (f.read) (.close) workbook->
|
||||
workbook.read "EnsoSheet"
|
||||
read_table.should_equal initial_data
|
||||
|
||||
Util.set_writable f True
|
||||
f.delete
|
||||
|
||||
Test.specify "should allow to write to a workbook that is open, and reflect that changes when the sheet is read again" <|
|
||||
out = create_out
|
||||
table.write out on_problems=Report_Error . should_succeed
|
||||
|
||||
workbook = out.read (Excel headers=True)
|
||||
workbook.sheet_names.should_equal ["EnsoSheet"]
|
||||
workbook.to_text . should_equal "Excel_Workbook ("+out.name+")"
|
||||
|
||||
# We can have the workbook open multiple times in parallel too.
|
||||
w2 = out.read (Excel headers=True)
|
||||
|
||||
t1 = workbook.read "EnsoSheet"
|
||||
t1.should_equal table
|
||||
|
||||
[Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite].each behavior-> Test.with_clue behavior.to_text+": " <|
|
||||
t2 = Table.new [["X", [behavior.to_text, "B", "C", behavior.to_text+"..."]]]
|
||||
t2.write out on_existing_file=behavior . should_succeed
|
||||
|
||||
workbook.sheet_names.should_equal ["EnsoSheet"]
|
||||
|
||||
# If we read the table again, it has the new values in it:
|
||||
t3 = workbook.read "EnsoSheet"
|
||||
t3.should_equal t2
|
||||
|
||||
t4 = w2.read "EnsoSheet"
|
||||
t4.should_equal t2
|
||||
|
||||
workbook.close
|
||||
w2.close
|
||||
out.delete_if_exists . should_succeed
|
||||
|
||||
Test.specify "should fail if the parent directory does not exist" <|
|
||||
parent = enso_project.data / "transient" / "nonexistent"
|
||||
parent.exists.should_be_false
|
||||
@ -439,19 +596,34 @@ spec_write suffix test_sheet_name =
|
||||
f = parent / "foo."+suffix
|
||||
t1 = Table.new [["X", [1, 2, 3]]]
|
||||
r1 = t1.write f (Excel (Worksheet "Another"))
|
||||
Test.with_clue "("+r1.catch.to_display_text+") " <|
|
||||
r1.should_fail_with File_Error
|
||||
r1.catch.should_be_a File_Error.Not_Found
|
||||
|
||||
Test.specify "should allow to write and read-back Unicode characters" <|
|
||||
encodings = enso_project.data / "transient" / "encodings."+suffix
|
||||
encodings.delete_if_exists . should_succeed
|
||||
|
||||
t1 = Table.new [["A", ["A", "B", "😊", "D"]], ["B", [1, 2, 3, 4]]]
|
||||
t1.write encodings (Excel (Worksheet "Another"))
|
||||
t1.write encodings (Excel (Worksheet "Another")) . should_succeed
|
||||
t2 = encodings.read (Excel (Worksheet "Another"))
|
||||
t2.at "A" . to_vector . should_equal ["A", "B", "😊", "D"]
|
||||
encodings.delete
|
||||
|
||||
out.delete_if_exists
|
||||
out_bak.delete_if_exists
|
||||
Test.specify "should be able to overwrite a pre-existing empty file" <|
|
||||
empty = enso_project.data / "transient" / "empty."+suffix
|
||||
[Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite, Existing_File_Behavior.Append].each behavior-> Test.with_clue behavior.to_text+": " <|
|
||||
empty.delete_if_exists . should_succeed
|
||||
"".write empty
|
||||
empty.exists.should_be_true
|
||||
empty.size.should_equal 0
|
||||
|
||||
t1 = Table.new [["A", [behavior.to_text, "B", "C", "D"]], ["B", [1, 2, 3, 4]]]
|
||||
t1.write empty on_existing_file=behavior . should_succeed
|
||||
empty.exists.should_be_true
|
||||
|
||||
t2 = empty.read (Excel (Worksheet "EnsoSheet"))
|
||||
t2.should_equal t1
|
||||
|
||||
spec =
|
||||
Test.group 'Excel Range' <|
|
||||
@ -703,10 +875,24 @@ spec =
|
||||
result.catch.should_be_a File_Error.Not_Found
|
||||
|
||||
Test.specify "should handle wrong xls_format gracefully" <|
|
||||
xlsx_sheet_copy = enso_project.data / "transient" / "TestSheetCopy.xlsx"
|
||||
xlsx_sheet.copy_to xlsx_sheet_copy
|
||||
|
||||
# At first, it fails with File_Error
|
||||
r1 = xlsx_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=True)
|
||||
r1.should_fail_with File_Error
|
||||
r1.catch.should_be_a File_Error.Corrupted_Format
|
||||
|
||||
# If we now open it correctly
|
||||
r1_2 = xlsx_sheet.read
|
||||
r1_2.should_succeed
|
||||
|
||||
# And then wrong again
|
||||
r1_3 = xlsx_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=True)
|
||||
# It should still fail the same:
|
||||
r1_3.should_fail_with File_Error
|
||||
r1_3.catch.should_be_a File_Error.Corrupted_Format
|
||||
|
||||
r2 = xls_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=False)
|
||||
r2.should_fail_with File_Error
|
||||
r2.catch.should_be_a File_Error.Corrupted_Format
|
||||
@ -735,6 +921,71 @@ spec =
|
||||
|
||||
bad_file.delete
|
||||
|
||||
Test.specify "will fail if an operation is performed on a closed workbook" <|
|
||||
workbook = xlsx_sheet.read
|
||||
workbook.sheet_count . should_equal 4
|
||||
|
||||
workbook.close . should_equal Nothing
|
||||
|
||||
workbook.sheet_count . should_fail_with Illegal_State
|
||||
workbook.close . should_equal Nothing
|
||||
workbook.read "Sheet1" . should_fail_with Illegal_State
|
||||
|
||||
ci_pending = if Environment.get "CI" != Nothing then "This test takes a lot of time so it is disabled on CI."
|
||||
Test.specify "should be able to write and read a big XLSX file (>110MB)" pending=ci_pending <|
|
||||
n = 10^6
|
||||
IO.println "Generating big XLSX file "+Time_Of_Day.now.to_text
|
||||
rng = RandomHelpers.new 123
|
||||
v = Vector.new n _->
|
||||
rng.makeRandomString 190
|
||||
table = Table.new [["X", v]]
|
||||
big_file = enso_project.data / "transient" / "big.xlsx"
|
||||
big_file.delete_if_exists
|
||||
|
||||
table.write big_file on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
|
||||
IO.println "Done "+Time_Of_Day.now.to_text
|
||||
|
||||
# Verify that the file is as big as we expected.
|
||||
size = big_file.size / (1024*1024)
|
||||
Test.with_clue "size="+size.to_text+"MB " <|
|
||||
(size > 110).should_be_true
|
||||
|
||||
workbook = big_file.read
|
||||
sheets = workbook.sheet_names
|
||||
sheets.length . should_equal 1
|
||||
read_table = workbook.read (sheets.at 0)
|
||||
read_table.row_count . should_equal n+1
|
||||
read_table.column_names.length . should_equal 1
|
||||
read_table.at 0 . at 0 . should_equal "X"
|
||||
read_table.at 0 . at 1 . should_equal (v.at 0)
|
||||
|
||||
Test.specify "should be able to write and read a big XLS file (>110MB)" pending=ci_pending <|
|
||||
IO.println "Generating big XLS file "+Time_Of_Day.now.to_text
|
||||
rng = RandomHelpers.new 123
|
||||
# Here we instead create a 2D table, because XLS has a limit of 65536 rows and 16k columns.
|
||||
rows = 65000
|
||||
cols = 20
|
||||
table = Table.new <| Vector.new cols i->
|
||||
v = Vector.new rows _-> rng.makeRandomString 100
|
||||
["col" + i.to_text, v]
|
||||
big_file = enso_project.data / "transient" / "big.xls"
|
||||
big_file.delete_if_exists
|
||||
|
||||
table.write big_file on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed
|
||||
IO.println "Done "+Time_Of_Day.now.to_text
|
||||
|
||||
# Verify that the file is as big as we expected.
|
||||
size = big_file.size / (1024*1024)
|
||||
Test.with_clue "size="+size.to_text+"MB " <|
|
||||
(size > 110).should_be_true
|
||||
|
||||
workbook = big_file.read
|
||||
sheets = workbook.sheet_names
|
||||
sheets.length . should_equal 1
|
||||
read_table = workbook.read (sheets.at 0)
|
||||
read_table.row_count . should_equal rows+1
|
||||
read_table.column_names.length . should_equal cols
|
||||
|
||||
spec_fmt 'XLSX reading' Examples.xlsx .read
|
||||
|
||||
spec_fmt 'XLS reading' Examples.xls .read
|
||||
@ -784,6 +1035,8 @@ spec =
|
||||
problems = [Duplicate_Output_Column_Names.Error ["DD"]]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
# Cleanup any leftovers from previous runs
|
||||
enso_project.data/"transient" . list "out*" . each .delete
|
||||
spec_write "xlsx" 'TestSheet.xlsx'
|
||||
spec_write "xls" 'TestSheetOld.xls'
|
||||
|
||||
|
60
test/Table_Tests/src/IO/Fetch_Spec.enso
Normal file
60
test/Table_Tests/src/IO/Fetch_Spec.enso
Normal file
@ -0,0 +1,60 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
import Standard.Base.Runtime.Context
|
||||
|
||||
from Standard.Table import all
|
||||
import Standard.Table.Errors.Invalid_JSON_Format
|
||||
|
||||
from Standard.Test import Test, Test_Suite
|
||||
import Standard.Test.Extensions
|
||||
|
||||
import project.Util
|
||||
|
||||
main = Test_Suite.run_main spec
|
||||
|
||||
spec =
|
||||
## To run this test locally:
|
||||
$ sbt 'simple-httpbin/run localhost 8080'
|
||||
$ export ENSO_HTTP_TEST_HTTPBIN_URL=http://localhost:8080/
|
||||
base_url = Environment.get "ENSO_HTTP_TEST_HTTPBIN_URL"
|
||||
base_url_with_slash = base_url.if_not_nothing <|
|
||||
if base_url.ends_with "/" then base_url else base_url + "/"
|
||||
pending_has_url = if base_url != Nothing then Nothing else
|
||||
"The HTTP tests only run when the `ENSO_HTTP_TEST_HTTPBIN_URL` environment variable is set to URL of the httpbin server"
|
||||
|
||||
Test.group "fetching files using HTTP" pending=pending_has_url <|
|
||||
Test.specify "fetching json" <|
|
||||
r = Data.fetch base_url_with_slash+"testfiles/table.json"
|
||||
expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]]
|
||||
r.to Table . should_equal expected_table
|
||||
|
||||
Test.specify "fetching csv" <|
|
||||
r = Data.fetch base_url_with_slash+"testfiles/table.csv"
|
||||
expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]]
|
||||
r.to Table . should_equal expected_table
|
||||
|
||||
Test.specify "fetching xls" <|
|
||||
url = base_url_with_slash+"testfiles/table.xls"
|
||||
r = Data.fetch url
|
||||
expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]]
|
||||
|
||||
r.should_be_a Excel_Workbook
|
||||
r.sheet_names . should_equal ["MyTestSheet"]
|
||||
r.read "MyTestSheet" . should_equal expected_table
|
||||
|
||||
r2 = Data.fetch url try_auto_parse_response=False . decode (Excel (Excel_Section.Worksheet "MyTestSheet"))
|
||||
r2.should_be_a Table
|
||||
r2 . should_equal expected_table
|
||||
|
||||
Test.specify "fetching xlsx" <|
|
||||
url = base_url_with_slash+"testfiles/table.xlsx"
|
||||
r = Data.fetch url
|
||||
expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]]
|
||||
|
||||
r.should_be_a Excel_Workbook
|
||||
r.sheet_names . should_equal ["MyTestSheet"]
|
||||
r.read "MyTestSheet" . should_equal expected_table
|
||||
|
||||
r2 = Data.fetch url try_auto_parse_response=False . decode (Excel (Excel_Section.Worksheet "MyTestSheet"))
|
||||
r2.should_be_a Table
|
||||
r2 . should_equal expected_table
|
@ -10,9 +10,10 @@ import Standard.Test.Extensions
|
||||
|
||||
import project.Util
|
||||
|
||||
spec = Test.group 'Various File Format support on Table' <|
|
||||
t1 = Table.new [["X", [1, 2, 3]]]
|
||||
spec =
|
||||
transient = enso_project.data / "transient"
|
||||
Test.group 'Various File Format support on Table' <|
|
||||
t1 = Table.new [["X", [1, 2, 3]]]
|
||||
simple_empty = enso_project.data/'simple_empty.csv' . read
|
||||
|
||||
Test.specify "should be able to be written as CSV, Excel" <|
|
||||
@ -79,7 +80,11 @@ spec = Test.group 'Various File Format support on Table' <|
|
||||
r2.catch.should_be_a File_Error.Unsupported_Output_Type
|
||||
r2.catch.format . should_equal my_format
|
||||
|
||||
write_test extension =
|
||||
write_tests extension = Test.group 'Writing to '+extension+' files' <|
|
||||
count result =
|
||||
if result . is_a Table then result.row_count else result.length
|
||||
|
||||
Test.specify "should write to a temporary "+extension+" file part of the data if context is disabled" <|
|
||||
f = transient / ("big." + extension)
|
||||
f.delete_if_exists
|
||||
f_bak = transient / ("big." + extension + ".bak")
|
||||
@ -94,25 +99,66 @@ spec = Test.group 'Various File Format support on Table' <|
|
||||
s.exists.should_be_true
|
||||
|
||||
r_data = s.read
|
||||
row_count = if r_data . is_a Table then r_data.row_count else r_data.length
|
||||
row_count . should_equal 1000
|
||||
count r_data . should_equal 1000
|
||||
s
|
||||
|
||||
f_bak.exists.should_be_false
|
||||
|
||||
f.exists.should_be_true
|
||||
f_data = f.read
|
||||
f_row_count = if f_data . is_a Table then f_data.row_count else f_data.length
|
||||
f_row_count . should_equal 1999
|
||||
count f_data . should_equal 1999
|
||||
|
||||
f.delete_if_exists
|
||||
f_bak.delete_if_exists
|
||||
r.delete_if_exists
|
||||
|
||||
Test.specify "should write to a temporary CSV file part of the data if context disabled" <|
|
||||
write_test "csv"
|
||||
Test.specify "should create a backup file if overwriting" <|
|
||||
f = transient / ("test." + extension)
|
||||
f.delete_if_exists
|
||||
f_bak = transient / ("test." + extension + ".bak")
|
||||
f_bak.delete_if_exists
|
||||
|
||||
Test.specify "should write to a temporary JSON file part of the data if context disabled" <|
|
||||
write_test "json"
|
||||
t1 = Table.new [["a", 0.up_to 10 . to_vector]]
|
||||
t1.write f . should_succeed . should_equal f
|
||||
f.exists.should_be_true
|
||||
f_bak.exists.should_be_false
|
||||
|
||||
t2 = Table.new [["a", 0.up_to 30 . to_vector]]
|
||||
t2.write f . should_succeed . should_equal f
|
||||
f.exists.should_be_true
|
||||
f_bak.exists.should_be_true
|
||||
|
||||
count f.read . should_equal 30
|
||||
|
||||
# ensure correct format is used for reading the .bak file
|
||||
format = Auto_Detect.get_reading_format f
|
||||
count (f_bak.read format) . should_equal 10
|
||||
|
||||
f.delete_if_exists
|
||||
f_bak.delete_if_exists
|
||||
|
||||
|
||||
Test.specify "should support appending" <|
|
||||
f = transient / ("test." + extension)
|
||||
f.delete_if_exists
|
||||
f_bak = transient / ("test." + extension + ".bak")
|
||||
f_bak.delete_if_exists
|
||||
|
||||
t1 = Table.new [["a", 0.up_to 10 . to_vector]]
|
||||
t1.write f on_existing_file=Existing_File_Behavior.Append . should_succeed . should_equal f
|
||||
f.exists.should_be_true
|
||||
f_bak.exists.should_be_false
|
||||
|
||||
t2 = Table.new [["a", 0.up_to 30 . to_vector]]
|
||||
t2.write f on_existing_file=Existing_File_Behavior.Append . should_succeed . should_equal f
|
||||
f.exists.should_be_true
|
||||
f_bak.exists.should_be_false
|
||||
|
||||
count f.read . should_equal 40
|
||||
|
||||
f.delete_if_exists
|
||||
|
||||
write_tests "csv"
|
||||
write_tests "json"
|
||||
|
||||
main = Test_Suite.run_main spec
|
||||
|
@ -6,6 +6,7 @@ import project.IO.Csv_Spec
|
||||
import project.IO.Delimited_Read_Spec
|
||||
import project.IO.Delimited_Write_Spec
|
||||
import project.IO.Excel_Spec
|
||||
import project.IO.Fetch_Spec
|
||||
import project.IO.Formats_Spec
|
||||
import project.IO.Json_Spec
|
||||
|
||||
@ -15,6 +16,7 @@ spec =
|
||||
Delimited_Write_Spec.spec
|
||||
Excel_Spec.spec
|
||||
Formats_Spec.spec
|
||||
Fetch_Spec.spec
|
||||
Json_Spec.spec
|
||||
|
||||
main = Test_Suite.run_main spec
|
||||
|
@ -0,0 +1,20 @@
|
||||
package org.enso.table_test_helpers;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
public class RandomHelpers {
|
||||
private final Random rng;
|
||||
|
||||
public RandomHelpers(int seed) {
|
||||
this.rng = new Random(seed);
|
||||
}
|
||||
|
||||
public String makeRandomString(int length) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int n = 'z' - 'A';
|
||||
for (int i = 0; i < length; i++) {
|
||||
sb.append((char) (rng.nextInt(n) + 'A'));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
@ -81,6 +81,7 @@ import project.System.Process_Spec
|
||||
import project.System.Reporting_Stream_Decoder_Spec
|
||||
import project.System.Reporting_Stream_Encoder_Spec
|
||||
import project.System.System_Spec
|
||||
import project.System.Temporary_File_Spec
|
||||
|
||||
import project.Random_Spec
|
||||
|
||||
@ -97,6 +98,7 @@ main = Test_Suite.run_main <|
|
||||
Error_Spec.spec
|
||||
Environment_Spec.spec
|
||||
File_Spec.spec
|
||||
Temporary_File_Spec.spec
|
||||
File_Read_Spec.spec
|
||||
Reporting_Stream_Decoder_Spec.spec
|
||||
Reporting_Stream_Encoder_Spec.spec
|
||||
|
@ -173,6 +173,19 @@ spec =
|
||||
f.copy_to g . should_fail_with Forbidden_Operation
|
||||
g.exists.should_be_false
|
||||
|
||||
"A".write f on_existing_file=Existing_File_Behavior.Overwrite
|
||||
"B".write g on_existing_file=Existing_File_Behavior.Overwrite
|
||||
|
||||
r = f.copy_to g
|
||||
r.should_fail_with File_Error
|
||||
r.catch.should_be_a File_Error.Already_Exists
|
||||
f.read . should_equal "A"
|
||||
g.read . should_equal "B"
|
||||
|
||||
f.copy_to g replace_existing=True . should_succeed
|
||||
f.read . should_equal "A"
|
||||
g.read . should_equal "A"
|
||||
|
||||
f.delete_if_exists
|
||||
g.delete_if_exists
|
||||
|
||||
@ -193,6 +206,20 @@ spec =
|
||||
f.exists.should_be_false
|
||||
g.exists.should_be_true
|
||||
|
||||
"A".write f on_existing_file=Existing_File_Behavior.Overwrite
|
||||
"B".write g on_existing_file=Existing_File_Behavior.Overwrite
|
||||
r = f.move_to g
|
||||
r.should_fail_with File_Error
|
||||
r.catch.should_be_a File_Error.Already_Exists
|
||||
f.exists.should_be_true
|
||||
g.exists.should_be_true
|
||||
g.read . should_equal "B"
|
||||
|
||||
f.move_to g replace_existing=True . should_succeed
|
||||
f.exists.should_be_false
|
||||
g.exists.should_be_true
|
||||
g.read . should_equal "A"
|
||||
|
||||
f.delete_if_exists
|
||||
g.delete_if_exists
|
||||
|
||||
@ -448,7 +475,7 @@ spec =
|
||||
|
||||
Context.Output.with_enabled <| r.delete_if_exists
|
||||
|
||||
Test.specify "should perform a dry run creating and appending text to a file if Context.Output is disabled" <|
|
||||
Test.specify "if Context.Output is disabled, will always start from the file given - so the effects of previous dry run are not visible" <|
|
||||
f = transient / "dry_append.txt"
|
||||
f.delete_if_exists
|
||||
|
||||
@ -461,12 +488,38 @@ spec =
|
||||
Problems.expect_only_warning Dry_Run_Operation s
|
||||
s.exists.should_be_true
|
||||
|
||||
s.read_text.should_equal 'line 1!\nline 2!'
|
||||
# We only see the second line, because the base file `f` was not updated and the second append still starts from an empty file.
|
||||
s.read_text.should_equal '\nline 2!'
|
||||
|
||||
# But the dry run file for the same `f` target should be kept the same:
|
||||
s.should_equal r
|
||||
|
||||
f.exists.should_be_false
|
||||
|
||||
Context.Output.with_enabled <| r.delete_if_exists
|
||||
Context.Output.with_enabled r.delete_if_exists
|
||||
|
||||
Test.specify "if Context.Output is disabled, will append to the dry run file if the dry run file descriptor is passed as the write target" <|
|
||||
f = transient / "dry_append.txt"
|
||||
f.delete_if_exists
|
||||
|
||||
Context.Output.with_disabled <|
|
||||
dry_run_file = "line 1!".write f on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error
|
||||
Problems.expect_only_warning Dry_Run_Operation dry_run_file
|
||||
dry_run_file.exists.should_be_true
|
||||
|
||||
# Contrary to example above, we write to the returned file, not just `f`.
|
||||
s = '\nline 2!'.write dry_run_file on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error
|
||||
Problems.expect_only_warning Dry_Run_Operation s
|
||||
s.exists.should_be_true
|
||||
|
||||
# We see both updates, because we've thread through the resulting dry run file descriptor:
|
||||
s.read_text.should_equal 'line 1!\nline 2!'
|
||||
# The returned file is also the same:
|
||||
s.should_equal dry_run_file
|
||||
|
||||
f.exists.should_be_false
|
||||
|
||||
Context.Output.with_enabled dry_run_file.delete_if_exists
|
||||
|
||||
Test.specify "should allow to overwrite files" <|
|
||||
f = transient / "work.txt"
|
||||
|
124
test/Tests/src/System/Temporary_File_Spec.enso
Normal file
124
test/Tests/src/System/Temporary_File_Spec.enso
Normal file
@ -0,0 +1,124 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
import Standard.Base.System.File.Advanced.Temporary_File.Temporary_File
|
||||
import Standard.Base.System.Input_Stream.Input_Stream
|
||||
|
||||
from Standard.Test import Test, Test_Suite
|
||||
from Standard.Test.Execution_Context_Helpers import run_with_and_without_output
|
||||
import Standard.Test.Extensions
|
||||
|
||||
polyglot java import java.io.File as Java_File
|
||||
polyglot java import java.io.ByteArrayInputStream
|
||||
polyglot java import java.io.FileInputStream
|
||||
polyglot java import java.io.InputStream
|
||||
|
||||
main = Test_Suite.run_main spec
|
||||
|
||||
spec =
|
||||
Test.group "Temporary_File facility" <|
|
||||
Test.specify "should allow to create a new file and allow to dispose it manually" <|
|
||||
tmp = Temporary_File.new
|
||||
tmp.with_file f->
|
||||
"test".write f
|
||||
r = tmp.with_file f->
|
||||
f.read Plain_Text
|
||||
r.should_equal "test"
|
||||
|
||||
raw_file = tmp.unsafe_get
|
||||
raw_file.exists . should_be_true
|
||||
|
||||
tmp.dispose
|
||||
|
||||
Test.expect_panic Illegal_State <| tmp.with_file (f->f.read Plain_Text)
|
||||
raw_file.exists . should_be_false
|
||||
|
||||
Test.specify "should allow to create a new file and allow to dispose it once the reference is dropped" <|
|
||||
f foo =
|
||||
# The tmp file is limited to the scope of the function.
|
||||
tmp = Temporary_File.new
|
||||
tmp.with_file f->
|
||||
"["+foo+"]" . write f
|
||||
|
||||
raw_file = tmp.unsafe_get
|
||||
raw_file.exists . should_be_true
|
||||
|
||||
res = tmp.with_file f->
|
||||
f.read Plain_Text
|
||||
[res, raw_file]
|
||||
|
||||
result = f "foobar"
|
||||
# At this point the `tmp` from within the function is ready for collection.
|
||||
result.first.should_equal "[foobar]"
|
||||
raw_file = result.second
|
||||
|
||||
repeat_gc count =
|
||||
if raw_file.exists.not then Nothing else
|
||||
if count <= 0 then Test.fail "The temporary file was not cleaned up after numerous GC retries. Perhaps this is a bug?" else
|
||||
if count % 100 == 0 then
|
||||
IO.println "Trying to GC the temporary file (still "+count.to_text+" to go), but the file ("+raw_file.to_text+") still exists... "
|
||||
Runtime.gc
|
||||
@Tail_Call repeat_gc count-1
|
||||
|
||||
repeat_gc 999
|
||||
raw_file.exists . should_be_false
|
||||
|
||||
Test.specify "should allow to materialize an input stream, regardless of Output Context settings" <|
|
||||
run_with_and_without_output <|
|
||||
stream = make_stream "test payload 1"
|
||||
tmp = Temporary_File.from_stream stream
|
||||
tmp.with_file f->
|
||||
f.read Plain_Text . should_equal "test payload 1"
|
||||
|
||||
Test.specify "will fail if materializing an already closed input stream" <|
|
||||
stream = Input_Stream.new (InputStream.nullInputStream) (File_Error.handle_java_exceptions Nothing)
|
||||
stream.close
|
||||
|
||||
Test.expect_panic File_Error <| Temporary_File.from_stream stream
|
||||
|
||||
Test.specify "should be able to be converted to text, and indicate if it was disposed" <|
|
||||
tmp = Temporary_File.new "pref" ".suf"
|
||||
tmp.to_text.should_contain "Temporary_File"
|
||||
tmp.to_text.should_contain "pref"
|
||||
tmp.to_text.should_contain ".suf"
|
||||
|
||||
tmp.dispose
|
||||
tmp.to_text.should_contain "(disposed)"
|
||||
tmp.to_text.should_contain "Temporary_File"
|
||||
tmp.to_text . should_not_contain "pref"
|
||||
tmp.to_text . should_not_contain "suf"
|
||||
|
||||
Test.specify "should allow to materialize an input stream that is already associated with a temporary file without copying it" <|
|
||||
tmp = Temporary_File.new
|
||||
tmp.with_file f->
|
||||
"test payload 3" . write f
|
||||
|
||||
java_file = Java_File.new tmp.unsafe_get.absolute.path
|
||||
stream = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=tmp
|
||||
|
||||
tmp2 = Temporary_File.from_stream_light stream
|
||||
# The returned tmp file should be the same one as original.
|
||||
tmp2.should_be_a Temporary_File
|
||||
tmp2.unsafe_get.absolute.path . should_equal tmp.unsafe_get.absolute.path
|
||||
|
||||
# If the raw file is associated, the stream will return that File descriptor (not as temporary file, but regular one):
|
||||
stream3 = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=tmp.unsafe_get
|
||||
f3 = Temporary_File.from_stream_light stream3
|
||||
f3.should_be_a File
|
||||
f3.absolute.path . should_equal tmp.unsafe_get.absolute.path
|
||||
|
||||
# But if there's no association, a new temporary file gets created:
|
||||
stream4 = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=Nothing
|
||||
tmp4 = Temporary_File.from_stream_light stream4
|
||||
tmp4.should_be_a Temporary_File
|
||||
tmp4.unsafe_get.absolute.path . should_not_equal tmp.unsafe_get.absolute.path
|
||||
|
||||
# The base variant of from_stream also always copies:
|
||||
stream5 = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=tmp
|
||||
tmp5 = Temporary_File.from_stream stream5
|
||||
tmp5.should_be_a Temporary_File
|
||||
tmp5.unsafe_get.absolute.path . should_not_equal tmp.unsafe_get.absolute.path
|
||||
|
||||
make_stream text =
|
||||
raw_stream = ByteArrayInputStream.new text.utf_8
|
||||
Input_Stream.new raw_stream (File_Error.handle_java_exceptions Nothing)
|
@ -2,9 +2,14 @@ package org.enso.shttp;
|
||||
|
||||
import com.sun.net.httpserver.HttpHandler;
|
||||
import com.sun.net.httpserver.HttpServer;
|
||||
import com.sun.net.httpserver.SimpleFileServer;
|
||||
import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
import sun.misc.Signal;
|
||||
import sun.misc.SignalHandler;
|
||||
|
||||
@ -31,7 +36,9 @@ public class SimpleHTTPBin {
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
server.stop(0);
|
||||
System.out.println("Finalizing server...");
|
||||
server.stop(3);
|
||||
System.out.println("Server stopped.");
|
||||
}
|
||||
}
|
||||
|
||||
@ -59,17 +66,19 @@ public class SimpleHTTPBin {
|
||||
server.addHandler(path, new TestHandler());
|
||||
}
|
||||
|
||||
setupFileServer(server);
|
||||
|
||||
final SimpleHTTPBin server1 = server;
|
||||
SignalHandler stopServerHandler =
|
||||
(Signal sig) -> {
|
||||
System.out.println("Stopping server...");
|
||||
System.out.println("Stopping server... (interrupt)");
|
||||
server1.stop();
|
||||
};
|
||||
for (String signalName : List.of("TERM", "INT")) {
|
||||
Signal.handle(new Signal(signalName), stopServerHandler);
|
||||
}
|
||||
server.start();
|
||||
} catch (IOException e) {
|
||||
} catch (IOException | URISyntaxException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
if (server != null) {
|
||||
@ -93,4 +102,34 @@ public class SimpleHTTPBin {
|
||||
return running;
|
||||
}
|
||||
}
|
||||
|
||||
private static void setupFileServer(SimpleHTTPBin server) throws URISyntaxException {
|
||||
Path myRuntimeJar =
|
||||
Path.of(SimpleHTTPBin.class.getProtectionDomain().getCodeSource().getLocation().toURI())
|
||||
.toAbsolutePath();
|
||||
Path projectRoot = findProjectRoot(myRuntimeJar);
|
||||
Path testFilesRoot = projectRoot.resolve(pathToWWW);
|
||||
System.out.println("Serving files from directory " + testFilesRoot);
|
||||
server.addHandler("/testfiles", SimpleFileServer.createFileHandler(testFilesRoot));
|
||||
}
|
||||
|
||||
private static Path findProjectRoot(Path startingPoint) {
|
||||
if (looksLikeProjectRoot(startingPoint)) {
|
||||
return startingPoint;
|
||||
} else {
|
||||
Path parent = startingPoint.getParent();
|
||||
if (parent == null) {
|
||||
throw new RuntimeException("Could not find project root");
|
||||
}
|
||||
|
||||
return findProjectRoot(parent);
|
||||
}
|
||||
}
|
||||
|
||||
private static final String pathToWWW = "tools/simple-httpbin/www-files";
|
||||
|
||||
private static boolean looksLikeProjectRoot(Path path) {
|
||||
return Stream.of("build.sbt", "tools", "project", pathToWWW)
|
||||
.allMatch(p -> Files.exists(path.resolve(p)));
|
||||
}
|
||||
}
|
||||
|
@ -19,9 +19,26 @@ public class TestHandler implements HttpHandler {
|
||||
private static final Set<String> ignoredHeaders = Set.of("Host");
|
||||
|
||||
private static final Pattern textEncodingRegex = Pattern.compile(".*; charset=([^;]+).*");
|
||||
private final boolean logRequests = false;
|
||||
|
||||
@Override
|
||||
public void handle(HttpExchange exchange) throws IOException {
|
||||
try {
|
||||
if (logRequests) {
|
||||
System.out.println(
|
||||
"Handling request: " + exchange.getRequestMethod() + " " + exchange.getRequestURI());
|
||||
}
|
||||
|
||||
doHandle(exchange);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
public void doHandle(HttpExchange exchange) throws IOException {
|
||||
boolean first = true;
|
||||
String contentType = null;
|
||||
String textEncoding = "UTF-8";
|
||||
|
3
tools/simple-httpbin/www-files/table.csv
Normal file
3
tools/simple-httpbin/www-files/table.csv
Normal file
@ -0,0 +1,3 @@
|
||||
A,B
|
||||
1,x
|
||||
3,y
|
|
4
tools/simple-httpbin/www-files/table.json
Normal file
4
tools/simple-httpbin/www-files/table.json
Normal file
@ -0,0 +1,4 @@
|
||||
[
|
||||
{"A": 1, "B": "x"},
|
||||
{"A": 3, "B": "y"}
|
||||
]
|
BIN
tools/simple-httpbin/www-files/table.xls
Normal file
BIN
tools/simple-httpbin/www-files/table.xls
Normal file
Binary file not shown.
BIN
tools/simple-httpbin/www-files/table.xlsx
Normal file
BIN
tools/simple-httpbin/www-files/table.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user