From b5c995a7bfe7f0a58beee2d34686dedc04afb145 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 15 Dec 2023 01:02:15 +0100 Subject: [PATCH] Reworking Excel support to allow for reading of big files (#8403) - Closes #8111 by making sure that all Excel workbooks are read using a backing file (which should be more memory efficient). - If the workbook is being opened from an input stream, that stream is materialized to a `Temporary_File`. - Adds tests fetching Table formats from HTTP. - Extends `simple-httpbin` with ability to serve files for our tests. - Ensures that the `Infer` option on `Excel` format also works with streams, if content-type metadata is available (e.g. from HTTP headers). - Implements a `Temporary_File` facility that can be used to create a temporary file that is deleted once all references to the `Temporary_File` instance are GCed. --- .prettierignore | 1 + CHANGELOG.md | 2 + build.sbt | 18 +- .../AWS/0.0.0-dev/src/S3/S3_File.enso | 12 +- .../lib/Standard/Base/0.0.0-dev/src/Any.enso | 12 +- .../src/Data/Enso_Cloud/Enso_File.enso | 5 +- .../0.0.0-dev/src/Data/XML/XML_Format.enso | 8 +- .../Base/0.0.0-dev/src/Errors/File_Error.enso | 25 +- .../0.0.0-dev/src/Errors/Illegal_State.enso | 9 + .../Base/0.0.0-dev/src/Network/HTTP.enso | 5 +- .../src/Network/HTTP/Response_Body.enso | 34 +- .../Standard/Base/0.0.0-dev/src/Nothing.enso | 11 + .../Base/0.0.0-dev/src/System/File.enso | 29 +- .../System/File/Advanced/Temporary_File.enso | 106 +++++ .../System/File/Existing_File_Behavior.enso | 21 +- .../0.0.0-dev/src/System/File_Format.enso | 46 +- .../0.0.0-dev/src/System/Input_Stream.enso | 22 +- .../Table/0.0.0-dev/src/Data/Table.enso | 16 +- .../src/Delimited/Delimited_Format.enso | 11 +- .../0.0.0-dev/src/Excel/Excel_Format.enso | 65 ++- .../0.0.0-dev/src/Excel/Excel_Workbook.enso | 116 ++++- .../src/Internal/Delimited_Writer.enso | 10 +- .../0.0.0-dev/src/Internal/Excel_Reader.enso | 24 +- .../0.0.0-dev/src/Internal/Excel_Writer.enso | 125 +++-- .../java/org/enso/base/DryRunFileManager.java | 8 + .../table/error/InvalidLocationException.java | 2 +- .../enso/table/excel/ExcelConnectionPool.java | 295 ++++++++++++ .../org/enso/table/excel/ExcelFileFormat.java | 6 + .../table/excel/ReadOnlyExcelConnection.java | 38 ++ .../java/org/enso/table/read/ExcelReader.java | 178 ++++--- .../org/enso/table/write/ExcelWriter.java | 9 - .../table/write/ExistingFileBehavior.java | 8 + .../Column_Operations_Spec.enso | 179 ++++--- test/Table_Tests/src/IO/Excel_Spec.enso | 449 ++++++++++++++---- test/Table_Tests/src/IO/Fetch_Spec.enso | 60 +++ test/Table_Tests/src/IO/Formats_Spec.enso | 220 +++++---- test/Table_Tests/src/IO/Main.enso | 2 + .../table_test_helpers/RandomHelpers.java | 20 + test/Tests/src/Main.enso | 2 + test/Tests/src/System/File_Spec.enso | 59 ++- .../Tests/src/System/Temporary_File_Spec.enso | 124 +++++ .../java/org/enso/shttp/SimpleHTTPBin.java | 45 +- .../main/java/org/enso/shttp/TestHandler.java | 17 + tools/simple-httpbin/www-files/table.csv | 3 + tools/simple-httpbin/www-files/table.json | 4 + tools/simple-httpbin/www-files/table.xls | Bin 0 -> 5632 bytes tools/simple-httpbin/www-files/table.xlsx | Bin 0 -> 4853 bytes 47 files changed, 1917 insertions(+), 544 deletions(-) create mode 100644 distribution/lib/Standard/Base/0.0.0-dev/src/System/File/Advanced/Temporary_File.enso create mode 100644 std-bits/table/src/main/java/org/enso/table/excel/ExcelConnectionPool.java create mode 100644 std-bits/table/src/main/java/org/enso/table/excel/ExcelFileFormat.java create mode 100644 std-bits/table/src/main/java/org/enso/table/excel/ReadOnlyExcelConnection.java create mode 100644 std-bits/table/src/main/java/org/enso/table/write/ExistingFileBehavior.java create mode 100644 test/Table_Tests/src/IO/Fetch_Spec.enso create mode 100644 test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/RandomHelpers.java create mode 100644 test/Tests/src/System/Temporary_File_Spec.enso create mode 100644 tools/simple-httpbin/www-files/table.csv create mode 100644 tools/simple-httpbin/www-files/table.json create mode 100644 tools/simple-httpbin/www-files/table.xls create mode 100644 tools/simple-httpbin/www-files/table.xlsx diff --git a/.prettierignore b/.prettierignore index 362aaaddbc3..8795df42116 100644 --- a/.prettierignore +++ b/.prettierignore @@ -21,6 +21,7 @@ resources/python # The files in the `data` directory of our tests may have specific structure or # even be malformed on purpose, so we do not want to run prettier on them. test/**/data +tools/simple-httpbin/www-files # GUI **/scala-parser.js diff --git a/CHANGELOG.md b/CHANGELOG.md index bd0b63e96af..95563cb9750 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -594,6 +594,7 @@ - [Implemented truncate `Date_Time` for database backend (Postgres only).][8235] - [Initial Enso Cloud APIs.][8006] - [Errors thrown inside `map` are wrapped in `Map_Error`.][8307] +- [Support for loading big Excel files.][8403] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -850,6 +851,7 @@ [8150]: https://github.com/enso-org/enso/pull/8150 [8235]: https://github.com/enso-org/enso/pull/8235 [8307]: https://github.com/enso-org/enso/pull/8307 +[8403]: https://github.com/enso-org/enso/pull/8403 #### Enso Compiler diff --git a/build.sbt b/build.sbt index 37bc4b59e7e..2e6bae28ff2 100644 --- a/build.sbt +++ b/build.sbt @@ -1330,13 +1330,16 @@ lazy val truffleDslSuppressWarnsSetting = Seq( ) /** A setting to replace javac with Frgaal compiler, allowing to use latest Java features in the code - * and still compile down to JDK 11 + * and still compile down to JDK 17 */ -lazy val frgaalJavaCompilerSetting = Seq( +lazy val frgaalJavaCompilerSetting = + customFrgaalJavaCompilerSettings(targetJavaVersion) + +def customFrgaalJavaCompilerSettings(targetJdk: String) = Seq( Compile / compile / compilers := FrgaalJavaCompiler.compilers( (Compile / dependencyClasspath).value, compilers.value, - targetJavaVersion + targetJdk ), // This dependency is needed only so that developers don't download Frgaal manually. // Sadly it cannot be placed under plugins either because meta dependencies are not easily @@ -2731,11 +2734,16 @@ val allStdBits: Parser[String] = lazy val `simple-httpbin` = project .in(file("tools") / "simple-httpbin") .settings( - frgaalJavaCompilerSetting, + customFrgaalJavaCompilerSettings(targetJdk = "21"), + autoScalaLibrary := false, Compile / javacOptions ++= Seq("-Xlint:all"), + Compile / run / mainClass := Some("org.enso.shttp.SimpleHTTPBin"), + assembly / mainClass := (Compile / run / mainClass).value, libraryDependencies ++= Seq( "org.apache.commons" % "commons-text" % commonsTextVersion - ) + ), + (Compile / run / fork) := true, + (Compile / run / connectInput) := true ) .configs(Test) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index 208576c2be7..a048a36b724 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -4,8 +4,10 @@ import Standard.Base.Errors.File_Error.File_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Unimplemented.Unimplemented import Standard.Base.System.File_Format.File_For_Read +import Standard.Base.System.File_Format.File_Format_Metadata import Standard.Base.System.Input_Stream.Input_Stream import Standard.Base.System.Output_Stream.Output_Stream +from Standard.Base.System.File import find_extension_from_name import project.AWS_Credential.AWS_Credential import project.Errors.S3_Error @@ -117,7 +119,9 @@ type S3_File Auto_Detect -> if self.is_directory then format.read self on_problems else response = S3.get_object self.bucket self.prefix self.credentials response.decode Auto_Detect - _ -> self.with_input_stream [File_Access.Read] format.read_stream + _ -> + metadata = File_Format_Metadata.Value file_name=self.name + self.with_input_stream [File_Access.Read] (stream-> format.read_stream stream metadata) ## ALIAS load bytes, open bytes ICON data_input @@ -187,11 +191,7 @@ type S3_File Returns the extension of the file. extension : Text extension self = if self.is_directory then Error.throw (S3_Error.Error "Directories do not have extensions." self.uri) else - name = self.name - last_dot = name.locate "." mode=Matching_Mode.Last - if last_dot.is_nothing then "" else - extension = name.drop (Index_Sub_Range.First last_dot.start) - if extension == "." then "" else extension + find_extension_from_name self.name ## GROUP Standard.Base.Input Lists files contained in the directory denoted by this file. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Any.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Any.enso index 6572b25630d..fd823a70442 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Any.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Any.enso @@ -299,7 +299,7 @@ type Any is_nothing self = False ## GROUP Logical - If `self` is Nothing then returns `function`. + If `self` is Nothing then returns `other`. > Example If the value "Hello" is nothing return "". @@ -309,6 +309,16 @@ type Any if_nothing self ~other = const self other + ## If `self` is Nothing then returns Nothing, otherwise returns the result + of running the provided `action`. + + > Example + Transform a value only if it is not nothing. + + my_result.if_not_nothing <| my_result + 1 + if_not_nothing : Any -> Any + if_not_nothing self ~action = action + ## GROUP Errors Executes the provided handler on an error, or returns the value unchanged. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Enso_Cloud/Enso_File.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Enso_Cloud/Enso_File.enso index 9d9b3b37e20..dbca3716d2a 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Enso_Cloud/Enso_File.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Enso_Cloud/Enso_File.enso @@ -17,6 +17,7 @@ import project.Network.HTTP.HTTP_Method.HTTP_Method import project.Nothing.Nothing import project.System.File.File_Access.File_Access import project.System.File_Format.File_For_Read +import project.System.File_Format.File_Format_Metadata import project.System.Input_Stream.Input_Stream import project.System.Output_Stream.Output_Stream from project.Data.Boolean import Boolean, False, True @@ -129,7 +130,9 @@ type Enso_File real_format = Auto_Detect.get_reading_format self if real_format == Nothing then Error.throw (File_Error.Unsupported_Type self) else self.read real_format on_problems - _ -> self.with_input_stream [File_Access.Read] format.read_stream + _ -> + metadata = File_Format_Metadata.Value file_name=self.name + self.with_input_stream [File_Access.Read] (stream-> format.read_stream stream metadata) ## ALIAS load bytes, open bytes ICON data_input diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/XML/XML_Format.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/XML/XML_Format.enso index 4a33154dc27..159ab88f949 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/XML/XML_Format.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/XML/XML_Format.enso @@ -6,6 +6,7 @@ import project.Network.URI.URI import project.Nothing.Nothing import project.System.File.File import project.System.File_Format.File_For_Read +import Standard.Base.System.File_Format.File_Format_Metadata import project.System.Input_Stream.Input_Stream from project.Data.Text.Extensions import all @@ -45,7 +46,8 @@ type XML_Format XML_Document.from_file file ## PRIVATE - Implements the `Data.parse` for this `File_Format` - read_stream : Input_Stream -> Any - read_stream self stream:Input_Stream = + Implements decoding the format from a stream. + read_stream : Input_Stream -> File_Format_Metadata -> Any + read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) = + _ = metadata XML_Document.from_stream stream diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/File_Error.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/File_Error.enso index 5425b6c0f60..b1c253036bd 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/File_Error.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/File_Error.enso @@ -8,6 +8,7 @@ import project.System.File.File import project.System.File_Format.File_For_Read import project.System.File_Format.File_Format +polyglot java import java.io.FileNotFoundException polyglot java import java.io.IOException polyglot java import java.nio.file.AccessDeniedException polyglot java import java.nio.file.FileAlreadyExistsException @@ -33,7 +34,7 @@ type File_Error Arguments: - file: The file that couldn't be read. - message: The message for the error. - IO_Error (file : File) (message : Text) + IO_Error (file : File | Nothing) (message : Text) ## Indicates that the given file's type is not supported. Unsupported_Type (file : File_For_Read) @@ -51,7 +52,9 @@ type File_Error to_display_text : Text to_display_text self = case self of File_Error.Not_Found file -> "The file at " + file.path + " does not exist." - File_Error.IO_Error file msg -> msg + " (" + file.path + ")." + File_Error.IO_Error file msg -> + suffix = if file.is_nothing then "" else " (" + file.path + ")." + msg + suffix File_Error.Already_Exists file -> "The file at "+file.path+" already exists." File_Error.Access_Denied file -> "Insufficient permissions to perform the desired operation on the file at "+file.path+"." File_Error.Unsupported_Type file -> "The "+file.path+" has a type that is not supported." @@ -65,7 +68,7 @@ type File_Error ## PRIVATE Utility method for running an action with Java exceptions mapping. - handle_java_exceptions file ~action = + handle_java_exceptions (file : File | Nothing) ~action = Panic.catch IOException action caught_panic-> File_Error.wrap_io_exception file caught_panic.payload @@ -78,8 +81,14 @@ type File_Error ## PRIVATE Converts a Java `IOException` into its Enso counterpart. - wrap_io_exception file io_exception = case io_exception of - _ : NoSuchFileException -> Error.throw (File_Error.Not_Found file) - _ : FileAlreadyExistsException -> Error.throw (File_Error.Already_Exists file) - _ : AccessDeniedException -> File_Error.access_denied file - _ -> Error.throw (File_Error.IO_Error file "An IO error has occurred: "+io_exception.to_text) + wrap_io_exception (file : File | Nothing) io_exception = + ## If the file is not known, all we can do is throw a generic IO error. + This will only usually matter on stream operations, where there is no relevant file - + and so the exceptions like `NoSuchFileException` should not occur in such context. + But instead of risking a Type_Error, we just throw the more generic IO_Error. + if file.is_nothing then Error.throw (File_Error.IO_Error Nothing "An IO error has occurred: "+io_exception.to_text) else case io_exception of + _ : NoSuchFileException -> Error.throw (File_Error.Not_Found file) + _ : FileNotFoundException -> Error.throw (File_Error.Not_Found file) + _ : FileAlreadyExistsException -> Error.throw (File_Error.Already_Exists file) + _ : AccessDeniedException -> File_Error.access_denied file + _ -> Error.throw (File_Error.IO_Error file "An IO error has occurred: "+io_exception.to_text) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/Illegal_State.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/Illegal_State.enso index d7c130d9de7..df801261340 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/Illegal_State.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/Illegal_State.enso @@ -1,5 +1,9 @@ import project.Data.Text.Text +import project.Error.Error import project.Nothing.Nothing +import project.Panic.Panic + +polyglot java import java.lang.IllegalStateException type Illegal_State ## PRIVATE @@ -19,3 +23,8 @@ type Illegal_State Provides a human-readable representation of the encoding error. to_display_text : Text to_display_text self = "Illegal State: " + self.message + + ## PRIVATE + Capture a Java `IllegalStateException` and convert it to an Enso dataflow error - `Illegal_State.Error`. + handle_java_exception = + Panic.catch IllegalStateException handler=(cause-> Error.throw (Illegal_State.Error cause.payload.getMessage cause.payload)) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP.enso index cc9b535f5fe..5ce967ddf80 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP.enso @@ -98,7 +98,10 @@ type HTTP if fetch_methods.contains req.method || Context.Output.is_enabled then action else Error.throw (Forbidden_Operation.Error ("Method " + req.method.to_text + " requests are forbidden as the Output context is disabled.")) handle_request_error = - Panic.catch JException handler=(cause-> Error.throw (Request_Error.Error 'IllegalArgumentException' cause.payload.getMessage)) + handler caught_panic = + exception = caught_panic.payload + Error.throw (Request_Error.Error (Meta.type_of exception . to_text) exception.getMessage) + Panic.catch JException handler=handler Panic.recover Any <| handle_request_error <| check_output_context <| headers = resolve_headers req diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP/Response_Body.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP/Response_Body.enso index 228ea2d4099..7b7280d3e77 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP/Response_Body.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Network/HTTP/Response_Body.enso @@ -16,6 +16,7 @@ import project.Network.URI.URI import project.Nothing.Nothing import project.Runtime.Context import project.Runtime.Managed_Resource.Managed_Resource +import project.System.File.Advanced.Temporary_File.Temporary_File import project.System.File.Existing_File_Behavior.Existing_File_Behavior import project.System.File.File import project.System.File.File_Access.File_Access @@ -23,6 +24,7 @@ import project.System.File.Write_Extensions import project.System.File_Format.Auto_Detect import project.System.File_Format.Bytes import project.System.File_Format.File_Format +import project.System.File_Format.File_Format_Metadata import project.System.File_Format.Plain_Text_Format import project.System.Input_Stream.Input_Stream from project.Data.Boolean import Boolean, False, True @@ -58,23 +60,23 @@ type Response_Body Raw_Stream (raw_stream:Input_Stream) (content_type:Text|Nothing) uri:URI ## PRIVATE - Byte_Array (bytes:Vector) (content_type:Text|Nothing) uri:URI + Materialized_Byte_Array (bytes:Vector) (content_type:Text|Nothing) uri:URI ## PRIVATE - Temporary_File (file_resource:Managed_Resource) (content_type:Text|Nothing) uri:URI + Materialized_Temporary_File (temporary_file:Temporary_File) (content_type:Text|Nothing) uri:URI ## PRIVATE with_stream : (Input_Stream -> Any ! HTTP_Error) -> Any ! HTTP_Error with_stream self action = case self of Response_Body.Raw_Stream raw_stream _ _ -> Managed_Resource.bracket raw_stream (_.close) action - Response_Body.Byte_Array bytes _ _ -> + Response_Body.Materialized_Byte_Array bytes _ _ -> byte_stream = Input_Stream.new (ByteArrayInputStream.new bytes) (HTTP_Error.handle_java_exceptions self.uri) Managed_Resource.bracket byte_stream (_.close) action - Response_Body.Temporary_File file_resource _ _ -> file_resource.with file-> + Response_Body.Materialized_Temporary_File temporary_file _ _ -> temporary_file.with_file file-> opts = [File_Access.Read.to_java] stream = HTTP_Error.handle_java_exceptions self.uri (file.input_stream_builtin opts) - file_stream = Input_Stream.new stream (HTTP_Error.handle_java_exceptions self.uri) + file_stream = Input_Stream.new stream (HTTP_Error.handle_java_exceptions self.uri) associated_file=temporary_file Managed_Resource.bracket (file_stream) (_.close) action ## PRIVATE @@ -88,23 +90,19 @@ type Response_Body body_stream.with_java_stream body_java_stream-> first_block = body_java_stream.readNBytes maximum_body_in_memory case first_block.length < maximum_body_in_memory of - True -> Response_Body.Byte_Array (Vector.from_polyglot_array first_block) self.content_type self.uri - False -> - file = File.create_temporary_file self.uri.host - - ## Write contents to temporary file - Context.Output.with_enabled <| + True -> Response_Body.Materialized_Byte_Array (Vector.from_polyglot_array first_block) self.content_type self.uri + False -> Context.Output.with_enabled <| + ## Write contents to a temporary file + temp_file = Temporary_File.new self.uri.host + r = temp_file.with_file file-> file.with_output_stream [File_Access.Write, File_Access.Create, File_Access.Truncate_Existing] output_stream-> output_stream.with_java_stream java_output_stream-> java_output_stream.write first_block body_java_stream.transferTo java_output_stream java_output_stream.flush Nothing - output_stream.close - - ## Have a file with the correct set up - resource = Managed_Resource.register file delete_file - Response_Body.Temporary_File resource self.content_type self.uri + r.if_not_error <| + Response_Body.Materialized_Temporary_File temp_file self.content_type self.uri _ -> self ## ALIAS parse @@ -128,7 +126,9 @@ type Response_Body _ -> type_obj = Meta.type_of format if can_decode type_obj . not then Error.throw (Illegal_Argument.Error type_obj.to_text+" cannot be used to decode from a stream. It must be saved to a file first.") else - self.with_stream format.read_stream + metadata = File_Format_Metadata.Value content_type=self.content_type + self.with_stream stream-> + format.read_stream stream metadata ## ALIAS bytes GROUP Input diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Nothing.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Nothing.enso index d428831263f..5ffeb007c7d 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Nothing.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Nothing.enso @@ -2,6 +2,7 @@ import project.Any.Any import project.Data.Numbers.Integer import project.Data.Text.Text from project.Data.Boolean import Boolean, False, True +from project.Function import const ## The type that has only a singleton value. Nothing in Enso is used as an universal value to indicate the lack of presence of a value. @@ -30,6 +31,16 @@ type Nothing if_nothing : Any -> Any if_nothing self ~function = function + ## If `self` is Nothing then returns Nothing, otherwise returns the result + of running the provided `action`. + + > Example + Transform a value only if it is not nothing. + + my_result.if_not_nothing <| my_result + 1 + if_not_nothing : Any -> Any + if_not_nothing self ~action = const Nothing action + ## Get a value for the key of the object. As `Nothing` has no keys, returns `if_missing`. diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso index 4e979155a16..0e3d9a677eb 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File.enso @@ -90,17 +90,28 @@ type File ## PRIVATE Create a dry run temporary file which will be deleted when Enso exits. - For an absolute path the same temporary file is returned. + The same temporary file is returned for paths that point to the same + location (not accounting for symlinks). + If this file is a temporary file that was generated by `create_dry_run_file` on another file, it is returned as-is. + + Arguments: + - copy_original: If `True`, the created dry run file is 'synchronized' + with the original file - the file is copied to the temporary file, or + if the original file does not exist - it is ensured that the temporary + file also does not exist. If `False`, no actions are taken. create_dry_run_file : Boolean -> File ! File_Error create_dry_run_file self copy_original=False = - temp_path = DryRunFileManager.getTemporaryFile self.absolute.path + temp_path = DryRunFileManager.getTemporaryFile self.absolute.normalize.path if temp_path.is_nothing then Error.throw (File_Error.IO_Error "Unable to create a temporary file.") else temp = File.new temp_path - if self.exists && copy_original then - Context.Output.with_enabled <| - self.copy_to temp replace_existing=True + if copy_original then Context.Output.with_enabled <| Panic.rethrow <| + case self.exists of + True -> + self.copy_to temp replace_existing=True + False -> + temp.delete_if_exists ## Attach a warning to the file that it is a dry run warning = Dry_Run_Operation.Warning "Only a dry run has occurred, with data written to a temporary file." @@ -803,3 +814,11 @@ get_child_widget file = children = file.list options = children.map c-> Option c.name c.name.pretty Widget.Single_Choice values=options display=Display.Always + +## PRIVATE +find_extension_from_name : Text -> Text +find_extension_from_name name = + last_dot = name.locate "." mode=Matching_Mode.Last + if last_dot.is_nothing then "" else + extension = name.drop (Index_Sub_Range.First last_dot.start) + if extension == "." then "" else extension diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File/Advanced/Temporary_File.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File/Advanced/Temporary_File.enso new file mode 100644 index 00000000000..6e55f053171 --- /dev/null +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File/Advanced/Temporary_File.enso @@ -0,0 +1,106 @@ +import project.Any.Any +import project.Data.Text.Text +import project.Errors.File_Error.File_Error +import project.Errors.Illegal_State.Illegal_State +import project.Nothing.Nothing +import project.Panic.Panic +import project.Runtime.Context +import project.Runtime.Managed_Resource.Managed_Resource +import project.Runtime.Ref.Ref +import project.System.File.File +import project.System.Input_Stream.Input_Stream + +## PRIVATE + ADVANCED + A reference to a temporary file that will be deleted once all references to + it are gone. This is an advanced helper facility for library developers, not + aimed to be used by regular users. Most users should just use + `File.create_temporary_file` instead. + + This is helpful when implementing internal temporary files needed for some + operation, whose lifetime may have to be bound with some object (e.g. Excel + Workbook instance). + + The advantage of using `Temporary_File` over `File.create_temporary_file` is + that the file is likely to get cleaned up earlier than JVM exit. If helper + files are only cleaned at JVM exit, we run the risk of exhausting disk + storage if the operation is ran numerous times. With this approach, the risk + should be much lower, because the files are cleaned up sooner - as soon as + the first GC run after the file is no longer reachable. + + It has to be used with extra care, as anywhere where the internal File + reference is leaked without ensuring the lifetime of the `Temporary_File` + instance, that file reference may easily become invalid (i.e. its file may + get deleted). +type Temporary_File + ## PRIVATE + Instance (file_resource_reference : Ref (Nothing | Managed_Resource File)) + + ## PRIVATE + with_file : (File -> Any) -> Any + with_file self action = self.access_resource.with action + + ## PRIVATE + Gets the raw file reference. + Note that the underlying file may be deleted at any time, once `self` is + unreachable - so this method has to be used with extra care. + unsafe_get : File + unsafe_get self = self.with_file (f->f) + + ## PRIVATE + Deletes the temporary file, invalidating the reference. + dispose : Nothing + dispose self = + self.access_resource.finalize + self.file_resource_reference.put Nothing + + ## PRIVATE + access_resource : Managed_Resource File + access_resource self = case self.file_resource_reference.get of + Nothing -> Panic.throw (Illegal_State.Error "Using the Temporary_File after it has been disposed is not allowed.") + resource -> resource + + ## PRIVATE + to_text : Text + to_text self = case self.file_resource_reference.get of + Nothing -> "Temporary_File (disposed)" + resource -> "Temporary_File (" + (resource.with .to_text) + ")" + + ## PRIVATE + Creates a new empty `Temporary_File`. + new : Text -> Text -> Temporary_File + new prefix="temp" suffix=".tmp" = + # The file will be deleted on JVM exit, but we will try to delete it sooner. + file = File.create_temporary_file prefix suffix + resource = Managed_Resource.register file cleanup_tmp_file + Temporary_File.Instance (Ref.new resource) + + ## PRIVATE + Materializes the provided `Input_Stream` into a `Temporary_File`. + It will work the same regardless of whether the output context is enabled. + from_stream : Input_Stream -> Temporary_File + from_stream stream = Context.Output.with_enabled <| + tmp_file = Temporary_File.new "enso-materialized-stream" ".tmp" + handler caught_panic = + tmp_file.dispose + Panic.throw caught_panic + Panic.catch Any handler=handler <| + tmp_file.with_file file-> + Panic.rethrow <| + File_Error.handle_java_exceptions Nothing <| + stream.write_to_file file + tmp_file + + ## PRIVATE + Materializes the provided `Input_Stream` into a `Temporary_File`. + If the stream is already backed by a temporary or regular file, that file is returned. + from_stream_light : Input_Stream -> Temporary_File | File + from_stream_light stream = + case stream.associated_file of + tmp : Temporary_File -> tmp + file : File -> file + _ -> Temporary_File.from_stream stream + +## PRIVATE +cleanup_tmp_file file = + file.delete_if_exists diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File/Existing_File_Behavior.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File/Existing_File_Behavior.enso index af2be17d97d..7231d3ec92e 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File/Existing_File_Behavior.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File/Existing_File_Behavior.enso @@ -77,6 +77,23 @@ type Existing_File_Behavior action output_stream . catch Any dataflow_error-> Panic.throw (Internal_Write_Operation_Errored.Error dataflow_error) + ## PRIVATE + A counterpart of `write` that will also handle the dry-run mode if the + Output context is disabled. It relies on a default logic for handling the + dry runs, which may not always be the right choice, depending on the file + format. + + The `action` takes 2 arguments - the effective file that will be written to + (the original file or the dry run file) and the output stream to write to. + write_handling_dry_run : File -> (File -> Output_Stream -> Any) -> Any ! File_Error + write_handling_dry_run self file action = + if Context.Output.is_enabled then self.write file (action file) else + effective_behavior = self.get_effective_behavior file is_enabled=False + needs_original = effective_behavior == Existing_File_Behavior.Append + dry_run_file = file.create_dry_run_file copy_original=needs_original + Context.Output.with_enabled <| + effective_behavior.write dry_run_file (action dry_run_file) + ## PRIVATE write_file_backing_up_old_one : File -> (Output_Stream -> Nothing) -> Nothing ! File_Error write_file_backing_up_old_one file action = recover_io_and_not_found <| @@ -100,7 +117,7 @@ write_file_backing_up_old_one file action = recover_io_and_not_found <| handle_internal_dataflow = Panic.catch Internal_Write_Operation_Errored handler=handle_write_failure_dataflow handle_internal_panic = Panic.catch Internal_Write_Operation_Panicked handler=handle_write_failure_panic handle_file_already_exists <| handle_internal_dataflow <| handle_internal_panic <| - Panic.rethrow <| + result = Panic.rethrow <| new_file.with_output_stream [File_Access.Write, File_Access.Create_New] output_stream-> result = Panic.catch Any (action output_stream) caught_panic-> Panic.throw (Internal_Write_Operation_Panicked.Panic caught_panic) @@ -118,6 +135,8 @@ write_file_backing_up_old_one file action = recover_io_and_not_found <| Panic.catch File_Error handler=not_found_handler <| Panic.rethrow <| file.move_to bak_file replace_existing=True Panic.rethrow <| new_file.move_to file + result + ## Here we manually check if the target file is writable. This is necessary, because the `Backup` scenario would go around the original file not being writable by moving it (which is permitted for read-only files too) and diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File_Format.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File_Format.enso index 7645dd419c1..7f83e17db59 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/System/File_Format.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/File_Format.enso @@ -1,5 +1,6 @@ import project.Any.Any import project.Data.Json.Json +import project.Data.Numbers.Integer import project.Data.Text.Encoding.Encoding import project.Data.Text.Text import project.Data.Vector.Vector @@ -155,9 +156,10 @@ type Plain_Text_Format file.read_text self.encoding on_problems ## PRIVATE - Implements the `Data.parse` for this `File_Format` - read_stream : Input_Stream -> Any - read_stream self stream:Input_Stream = + Implements decoding the format from a stream. + read_stream : Input_Stream -> File_Format_Metadata -> Any + read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) = + _ = metadata Text.from_bytes (stream.read_all_bytes) self.encoding ## A file format for reading or writing files as a sequence of bytes. @@ -191,9 +193,10 @@ type Bytes file.read_bytes ## PRIVATE - Implements the `Data.parse` for this `File_Format` - read_stream : Input_Stream -> Any - read_stream self stream:Input_Stream = + Implements decoding the format from a stream. + read_stream : Input_Stream -> File_Format_Metadata -> Any + read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) = + _ = metadata stream.read_all_bytes ## A file format for reading and writing files as JSON. @@ -232,9 +235,10 @@ type JSON_Format Error.throw (File_Error.Corrupted_Format file error.to_display_text error) ## PRIVATE - Implements the `Data.parse` for this `File_Format` - read_stream : Input_Stream -> Any - read_stream self stream:Input_Stream = + Implements decoding the format from a stream. + read_stream : Input_Stream -> File_Format_Metadata -> Any + read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) = + _ = metadata Text.from_bytes (stream.read_all_bytes) Encoding.utf_8 . parse_json ## A setting to infer the default behaviour of some option. @@ -245,11 +249,25 @@ type Infer type File_For_Read ## PRIVATE Arguments: - - `path` - the path or the URI of the file. - - `name` - the name of the file. - - `extension` - the extension of the file. - - `read_first_bytes` - a function that reads the first bytes of the file. - Value path:Text name:Text extension:Text (read_first_bytes:Function=(_->Nothing)) + - path: the path or the URI of the file. + - name: the name of the file. + - extension: the extension of the file. + - read_first_bytes: a function that reads the first bytes of the file. + - content_type: the content type of the file. + Value path:Text|Nothing name:Text|Nothing extension:Text|Nothing (read_first_bytes:(Integer -> Nothing | Vector Integer)=(_->Nothing)) ## PRIVATE File_For_Read.from (that:File) = File_For_Read.Value that.path that.name that.extension that.read_first_bytes + +## PRIVATE + Metadata that may aid `read_stream`. +type File_Format_Metadata + ## PRIVATE + Arguments: + - file_name: the name of the file. + - content_type: the content type of the file. + Value (file_name : Text | Nothing = Nothing) (content_type : Text | Nothing = Nothing) + + ## PRIVATE + no_information : File_Format_Metadata + no_information = File_Format_Metadata.Value diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/System/Input_Stream.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/System/Input_Stream.enso index af246be8c19..93ce18f1782 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/System/Input_Stream.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/System/Input_Stream.enso @@ -6,6 +6,9 @@ import project.Errors.Encoding_Error.Encoding_Error import project.Errors.Problem_Behavior.Problem_Behavior import project.Nothing.Nothing import project.Runtime.Managed_Resource.Managed_Resource +import project.System.File.Advanced.Temporary_File.Temporary_File +import project.System.File.File +import project.System.File.File_Access.File_Access polyglot java import java.io.InputStream as Java_Input_Stream polyglot java import org.enso.base.encoding.ReportingStreamDecoder @@ -19,10 +22,10 @@ type Input_Stream Given a Java InputStream, wraps as a Managed_Resource and returns a new Input_Stream. - new : Java_Input_Stream -> Any -> Input_Stream - new java_stream error_handler = + new : Java_Input_Stream -> Any -> (Nothing | File | Temporary_File) -> Input_Stream + new java_stream error_handler associated_file=Nothing = resource = Managed_Resource.register java_stream close_stream - Input_Stream.Value resource error_handler + Input_Stream.Value resource error_handler associated_file ## PRIVATE An input stream, allowing for interactive reading of contents. @@ -31,7 +34,8 @@ type Input_Stream - stream_resource: The internal resource that represents the underlying stream. - error_handler: An error handler for IOExceptions thrown when reading. - Value stream_resource error_handler + - associated_file: The file associated with this stream, if any. + Value stream_resource error_handler (associated_file:Nothing|File|Temporary_File) ## PRIVATE ADVANCED @@ -100,6 +104,16 @@ type Input_Stream problems = Vector.from_polyglot_array results.problems . map Encoding_Error.Error on_problems.attach_problems_after results.result problems + ## PRIVATE + Reads the contents of this stream into a given file. + write_to_file : File -> File + write_to_file self file = + result = self.with_java_stream java_input_stream-> + file.with_output_stream [File_Access.Create, File_Access.Truncate_Existing, File_Access.Write] output_stream-> + output_stream.with_java_stream java_output_stream-> + java_input_stream.transferTo java_output_stream + result.if_not_error file + ## PRIVATE Utility method for closing primitive Java streams. Provided to avoid accidental scope capture with `Managed_Resource` finalizers. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 1a0586fee0b..649a828b37d 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -7,6 +7,7 @@ import Standard.Base.Data.Vector.No_Wrap import Standard.Base.Errors.Common.Additional_Warnings import Standard.Base.Errors.Common.Incomparable_Values import Standard.Base.Errors.Common.Index_Out_Of_Bounds +import Standard.Base.Errors.Common.No_Such_Method import Standard.Base.Errors.Common.Out_Of_Memory import Standard.Base.Errors.Common.Type_Error import Standard.Base.Errors.File_Error.File_Error @@ -2492,16 +2493,13 @@ type Table if base_format == Nothing then Error.throw (File_Error.Unsupported_Output_Type file Table) else self.write file format=base_format on_existing_file match_columns on_problems _ -> - methods = if format == JSON_Format then ["write_table"] else Meta.meta (Meta.type_of format) . methods - if methods.contains "write_table" . not then Error.throw (File_Error.Unsupported_Output_Type format Table) else - effective_existing_behaviour = on_existing_file.get_effective_behavior file - tgt_file = if Context.Output.is_enabled then file else - should_copy_file = on_existing_file==Existing_File_Behavior.Append - file.create_dry_run_file copy_original=should_copy_file - + handle_no_write_method caught_panic = + is_write = caught_panic.payload.method_name == "write_table" + if is_write.not then Panic.throw caught_panic else + Error.throw (File_Error.Unsupported_Output_Type format Table) + Panic.catch No_Such_Method handler=handle_no_write_method <| to_write = if Context.Output.is_enabled then self else self.take 1000 - Context.Output.with_enabled <| - format.write_table tgt_file to_write effective_existing_behaviour match_columns on_problems + format.write_table file to_write on_existing_file match_columns on_problems ## Creates a text representation of the table using the CSV format. to_csv : Text diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso index 01b5423a445..08d7246bd07 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Delimited/Delimited_Format.enso @@ -1,6 +1,7 @@ from Standard.Base import all import Standard.Base.Network.HTTP.Response.Response import Standard.Base.System.File_Format.File_For_Read +import Standard.Base.System.File_Format.File_Format_Metadata import Standard.Base.System.Input_Stream.Input_Stream from Standard.Base.Widget_Helpers import make_delimiter_selector @@ -98,9 +99,10 @@ type Delimited_Format Delimited_Reader.read_file self file on_problems ## PRIVATE - Implements the `Data.parse` for this `File_Format` - read_stream : Input_Stream -> Any - read_stream self stream:Input_Stream = + Implements decoding the format from a stream. + read_stream : Input_Stream -> File_Format_Metadata -> Any + read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) = + _ = metadata Delimited_Reader.read_stream self stream on_problems=Report_Warning ## PRIVATE @@ -108,8 +110,7 @@ type Delimited_Format Implements the `Table.write` for this `File_Format`. write_table : File -> Table -> Existing_File_Behavior -> Match_Columns -> Problem_Behavior -> File write_table self file table on_existing_file match_columns on_problems = - r = Delimited_Writer.write_file table self file on_existing_file match_columns on_problems - r.if_not_error file + Delimited_Writer.write_file table self file on_existing_file match_columns on_problems ## PRIVATE Clone the instance with some properties overridden. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso index 69967c52194..e463031cfff 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Format.enso @@ -1,7 +1,9 @@ from Standard.Base import all import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.System.File_Format.File_For_Read +import Standard.Base.System.File_Format.File_Format_Metadata import Standard.Base.System.Input_Stream.Input_Stream +from Standard.Base.System.File import find_extension_from_name import project.Data.Match_Columns.Match_Columns import project.Data.Table.Table @@ -15,12 +17,9 @@ import project.Internal.Excel_Writer should_treat_as_xls_format : (Boolean|Infer) -> File -> Boolean ! Illegal_Argument should_treat_as_xls_format xls_format file = if xls_format != Infer then xls_format else - case file.extension of - ".xlsx" -> False - ".xlsm" -> False - ".xls" -> True - ".xlt" -> True - _ -> Error.throw (Illegal_Argument.Error ("Unknown file extension for Excel file (" + file.extension + ")")) + inferred_xls_format = xls_format_from_file_extension file.extension + inferred_xls_format.if_nothing <| + Error.throw (Illegal_Argument.Error ("Unknown file extension for Excel file (" + file.extension + ")")) ## Read the file to a `Table` from an Excel file type Excel_Format @@ -67,11 +66,9 @@ type Excel_Format for_web : Text -> URI -> Excel_Format | Nothing for_web content_type uri = _ = [uri] - parts = content_type.split ";" . map .trim - case parts.first of - "application/vnd.ms-excel" -> Excel_Format.Excel xls_format=True - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" -> Excel_Format.Excel xls_format=False - _ -> Nothing + inferred_xls_format = xls_format_from_content_type content_type + inferred_xls_format.if_not_nothing <| + Excel_Format.Excel xls_format=inferred_xls_format ## PRIVATE ADVANCED @@ -84,10 +81,13 @@ type Excel_Format _ -> Excel_Reader.read_file file self.section self.headers on_problems format ## PRIVATE - Implements the `Data.parse` for this `File_Format` - read_stream : Input_Stream -> Any - read_stream self stream:Input_Stream = - xls_format = if self.xls_format == Infer then False else self.xls_format + Implements decoding the format from a stream. + read_stream : Input_Stream -> File_Format_Metadata -> Any + read_stream self stream:Input_Stream (metadata : File_Format_Metadata = File_Format_Metadata.no_information) = + xls_format = if self.xls_format != Infer then self.xls_format else + inferred_xls_format = xls_format_from_metadata metadata + # If still unable to infer it, we default to False + inferred_xls_format.if_nothing False bad_format _ = message = case self.xls_format of @@ -124,10 +124,41 @@ type Excel_Format write_table self file table on_existing_file match_columns on_problems = format = should_treat_as_xls_format self.xls_format file - r = case self.section of + case self.section of Excel_Section.Sheet_Names -> Error.throw (Illegal_Argument.Error "Sheet_Names cannot be used for `write`.") Excel_Section.Range_Names -> Error.throw (Illegal_Argument.Error "Range_Names cannot be used for `write`.") Excel_Section.Workbook -> Excel_Writer.write_file file table on_existing_file (Excel_Section.Worksheet self.default_sheet) True match_columns on_problems format _ -> Excel_Writer.write_file file table on_existing_file self.section self.headers match_columns on_problems format - r.if_not_error file + +## PRIVATE + Infers the xls format from the Content-Type. + Returns Nothing if the content type is not a known Excel format. +xls_format_from_content_type : Text -> Boolean | Nothing +xls_format_from_content_type content_type = + parts = content_type.split ";" . map .trim + case parts.first of + "application/vnd.ms-excel" -> True + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" -> False + _ -> Nothing + +## PRIVATE + Infers the xls format from the file extension. + Returns Nothing if the content type is not a known Excel format. +xls_format_from_file_extension : Text -> Boolean | Nothing +xls_format_from_file_extension extension = + case extension of + ".xlsx" -> False + ".xlsm" -> False + ".xls" -> True + ".xlt" -> True + _ -> Nothing + +## PRIVATE +xls_format_from_metadata : File_Format_Metadata -> Boolean | Nothing +xls_format_from_metadata metadata = + from_content_type = metadata.content_type.if_not_nothing (xls_format_from_content_type metadata.content_type) + from_content_type.if_nothing <| + metadata.file_name.if_not_nothing <| + extension = find_extension_from_name metadata.file_name + xls_format_from_file_extension extension diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso index b5ffaafa86a..ac655104aa2 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Excel/Excel_Workbook.enso @@ -1,8 +1,12 @@ from Standard.Base import all import Standard.Base.Errors.File_Error.File_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument +import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Metadata.Display +import Standard.Base.Runtime.Managed_Resource.Managed_Resource +import Standard.Base.Runtime.Ref.Ref import Standard.Base.System.Input_Stream.Input_Stream +import Standard.Base.System.File.Advanced.Temporary_File.Temporary_File from Standard.Base.Data.Filter_Condition import sql_like_to_regex from Standard.Base.Metadata.Choice import Option from Standard.Base.Metadata.Widget import Single_Choice @@ -13,8 +17,12 @@ import project.Excel.Excel_Section.Excel_Section import project.Internal.Excel_Reader import project.Internal.Java_Problems +polyglot java import java.io.File as Java_File polyglot java import org.apache.poi.ss.usermodel.Workbook polyglot java import org.enso.table.read.ExcelReader +polyglot java import org.enso.table.excel.ExcelConnectionPool +polyglot java import org.enso.table.excel.ExcelFileFormat +polyglot java import org.enso.table.excel.ReadOnlyExcelConnection type Excel_Workbook ## PRIVATE @@ -25,12 +33,24 @@ type Excel_Workbook - file: The file to load. - xls_format: Whether to use the old XLS format (default is XLSX). - headers: Whether to use the first row as headers (default is to infer). - new : File | Text -> Boolean -> Boolean | Infer -> Excel_Workbook + new : File | Text | Temporary_File -> Boolean -> Boolean | Infer -> Excel_Workbook new file xls_format=False headers=Infer = - file_obj = File.new file . normalize - File_Error.handle_java_exceptions file_obj <| Excel_Reader.handle_bad_format file_obj <| - file_obj.with_input_stream [File_Access.Read] stream-> - Excel_Workbook.from_stream stream xls_format headers file_obj + file_obj = case file of + tmp : Temporary_File -> tmp + other -> File.new other + file_for_errors = if file_obj.is_a Temporary_File then Nothing else file_obj + + continuation raw_file = + format = if xls_format then ExcelFileFormat.XLS else ExcelFileFormat.XLSX + File_Error.handle_java_exceptions raw_file <| Excel_Reader.handle_bad_format file_for_errors <| Illegal_State.handle_java_exception <| + # The `java_file` depends on the liveness of the possible `Temporary_File` but that is ensured by storing the `file_obj` in the resulting workbook instance. + java_file = Java_File.new raw_file.absolute.normalize.path + excel_connection_resource = Managed_Resource.register (ExcelConnectionPool.INSTANCE.openReadOnlyConnection java_file format) close_connection + Excel_Workbook.Value (Ref.new excel_connection_resource) file_obj xls_format headers + + case file_obj of + tmp : Temporary_File -> tmp.with_file continuation + f : File -> continuation f ## PRIVATE ADVANCED @@ -43,13 +63,12 @@ type Excel_Workbook - file: Optional file reference. from_stream : Input_Stream -> Boolean -> Boolean | Infer -> File | Nothing -> Excel_Workbook from_stream stream xls_format=False headers=Infer file=Nothing = Excel_Reader.handle_bad_format file <| - stream.with_java_stream java_stream-> - workbook = ExcelReader.readWorkbook java_stream xls_format - Excel_Workbook.Value workbook file xls_format headers + temp_file = Temporary_File.from_stream_light stream + Excel_Workbook.new temp_file xls_format headers ## PRIVATE Creates an Excel_Workbook connection. - Value workbook:Workbook (file:(File|Nothing)) xls_format:Boolean headers:(Boolean|Infer) + Value (excel_connection_resource_ref : Ref (Managed_Resource ReadOnlyExcelConnection)) (file:(File|Temporary_File|Nothing)) xls_format:Boolean headers:(Boolean|Infer) ## Returns the list of databases (or catalogs) for the connection. databases : Nothing @@ -57,7 +76,11 @@ type Excel_Workbook ## Returns the name of the current database (or catalog). database : Text - database self = if self.file.is_nothing then "" else self.file.path + database self = case self.file of + regular_file : File -> regular_file.path + ## A Temporary_File is still visualized as no path, because the fact that the workbook is stored as a file is + just an implementation detail - it is coming form a stream so there is no logical file it is associated with. + _ -> "" ## Returns a new Connection with the specified database set as default. @@ -92,22 +115,24 @@ type Excel_Workbook ## GROUP Standard.Base.Metadata Gets the number of sheets. sheet_count : Integer - sheet_count self = self.workbook.getNumberOfSheets + sheet_count self = self.with_java_workbook .getNumberOfSheets ## GROUP Standard.Base.Metadata Gets the names of all the sheets. sheet_names : Vector Text - sheet_names self = Vector.from_polyglot_array (ExcelReader.readSheetNames self.workbook) + sheet_names self = self.with_java_workbook java_workbook-> + Vector.from_polyglot_array (ExcelReader.readSheetNames java_workbook) ## GROUP Standard.Base.Metadata Gets the number of named ranges. named_ranges_count : Integer - named_ranges_count self = self.workbook.getNumberOfNames + named_ranges_count self = self.with_java_workbook .getNumberOfNames ## GROUP Standard.Base.Metadata Gets the names of all the named ranges. named_ranges : Vector Text - named_ranges self = Vector.from_polyglot_array (ExcelReader.readRangeNames self.workbook) + named_ranges self = self.with_java_workbook java_workbook-> + Vector.from_polyglot_array (ExcelReader.readRangeNames java_workbook) ## Gets a list of the table types. table_types : Vector Text @@ -167,9 +192,9 @@ type Excel_Workbook read self query (limit : Integer | Nothing = Nothing) = java_headers = Excel_Reader.make_java_headers self.headers java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator-> - case query of - _ : Excel_Range -> ExcelReader.readRange self.workbook query.java_range java_headers 0 limit java_problem_aggregator - _ : Text -> ExcelReader.readRangeByName self.workbook query java_headers 0 limit java_problem_aggregator + self.with_java_workbook java_workbook-> case query of + _ : Excel_Range -> ExcelReader.readRange java_workbook query.java_range java_headers 0 limit java_problem_aggregator + _ : Text -> ExcelReader.readRangeByName java_workbook query java_headers 0 limit java_problem_aggregator Table.Value java_table ## GROUP Standard.Base.Input @@ -194,13 +219,14 @@ type Excel_Workbook names.at (sheet - 1) _ -> Error.throw (Illegal_Argument.Error "Worksheet must be either Text or an Integer.") java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator-> - ExcelReader.readRangeByName self.workbook sheet_name java_headers skip_rows row_limit java_problem_aggregator + self.with_java_workbook java_workbook-> + ExcelReader.readRangeByName java_workbook sheet_name java_headers skip_rows row_limit java_problem_aggregator Table.Value java_table Excel_Section.Cell_Range address skip_rows row_limit -> java_table = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator-> - case address of - _ : Excel_Range -> ExcelReader.readRange self.workbook address.java_range java_headers skip_rows row_limit java_problem_aggregator - _ : Text -> ExcelReader.readRangeByName self.workbook address java_headers skip_rows row_limit java_problem_aggregator + self.with_java_workbook java_workbook-> case address of + _ : Excel_Range -> ExcelReader.readRange java_workbook address.java_range java_headers skip_rows row_limit java_problem_aggregator + _ : Text -> ExcelReader.readRangeByName java_workbook address java_headers skip_rows row_limit java_problem_aggregator Table.Value java_table ## ALIAS get, worksheet @@ -213,3 +239,51 @@ type Excel_Workbook sheet : Text | Integer -> Table sheet self name:(Text | Integer) = self.read_section (Excel_Section.Worksheet name 0 Nothing) + + ## ADVANCED + Closes the workbook, releasing any resources it holds. + + This method may be used to release the underlying workbook file, + e.g. to be able to delete it. + + After this method is called, this instance is not usable any more - any + operation on it will throw an `Illegal_State` error. + close : Nothing + close self = + case self.excel_connection_resource_ref.get of + Nothing -> Nothing + resource -> + resource.finalize + self.excel_connection_resource_ref.put Nothing + Nothing + + ## Returns a simple text description of the workbook. + to_text : Text + to_text self = + associated_regular_file = case self.file of + regular_file : File -> " (" + regular_file.name + ")" + _ -> "" + "Excel_Workbook"+associated_regular_file + + ## PRIVATE + Provides a JS object representation for use in visualizations. + to_js_object : JS_Object + to_js_object self = + headers = if self.headers == Infer then "Infer" else self.headers + additional_fields = case self.file of + regular_file : File -> [["file", regular_file.path]] + _ -> [] + JS_Object.from_pairs <| + [["type", "Excel_Workbook"], ["headers", headers], ["xls_format", self.xls_format]] + additional_fields + + ## PRIVATE + with_java_workbook : (Workbook -> Any) -> Any ! Illegal_State + with_java_workbook self f = Illegal_State.handle_java_exception <| + case self.excel_connection_resource_ref.get of + Nothing -> + Error.throw (Illegal_State.Error "The workbook is being used after it was closed.") + resource -> resource.with connection-> + connection.withWorkbook f + +## PRIVATE +close_connection c = c.close diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Delimited_Writer.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Delimited_Writer.enso index 1711a59c254..086e8dee2bd 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Delimited_Writer.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Delimited_Writer.enso @@ -40,8 +40,9 @@ write_file table format file on_existing_file match_columns on_problems = Existing_File_Behavior.Append -> append_to_file table format file match_columns on_problems _ -> - on_existing_file.write file stream-> - write_to_stream table format stream on_problems related_file=file + on_existing_file.write_handling_dry_run file effective_file-> stream-> + r = write_to_stream table format stream on_problems related_file=effective_file + r.if_not_error effective_file ## PRIVATE Handles appending to an existing file, ensuring that the columns are matched @@ -85,8 +86,9 @@ append_to_file table format file match_columns on_problems = False -> format.without_headers needs_leading_newline = metadata.has_any_content && metadata.ends_with_newline.not - Existing_File_Behavior.Append.write file stream-> - write_to_stream reordered_table amended_format stream on_problems related_file=file separator_override=effective_line_separator needs_leading_newline=needs_leading_newline + Existing_File_Behavior.Append.write_handling_dry_run file effective_file-> stream-> + r = write_to_stream reordered_table amended_format stream on_problems related_file=effective_file separator_override=effective_line_separator needs_leading_newline=needs_leading_newline + r.if_not_error effective_file ## PRIVATE Returns a Text value representing the table in the delimited format. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Reader.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Reader.enso index 20d2ed9c1b3..8eb45f8cf78 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Reader.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Reader.enso @@ -9,9 +9,11 @@ import project.Excel.Excel_Section.Excel_Section import project.Internal.Java_Problems from project.Errors import Duplicate_Output_Column_Names, Empty_Sheet_Error, Invalid_Column_Names, Invalid_Location +polyglot java import java.io.File as Java_File polyglot java import org.apache.poi.poifs.filesystem.NotOLE2FileException polyglot java import org.apache.poi.UnsupportedFileFormatException polyglot java import org.enso.table.error.InvalidLocationException +polyglot java import org.enso.table.excel.ExcelFileFormat polyglot java import org.enso.table.excel.ExcelHeaders polyglot java import org.enso.table.read.ExcelReader @@ -25,15 +27,16 @@ make_java_headers headers = case headers of ## PRIVATE Handle and map the Java errors when reading an Excel file -handle_reader : File -> (Input_Stream -> (Table | Vector)) -> (Table | Vector) +handle_reader : File -> (Java_File -> (Table | Vector)) -> (Table | Vector) handle_reader file reader = bad_argument caught_panic = Error.throw (Invalid_Location.Error caught_panic.payload.getCause) handle_bad_argument = Panic.catch InvalidLocationException handler=bad_argument + # TODO [RW] handle InvalidFormatException File_Error.handle_java_exceptions file <| handle_bad_argument <| handle_bad_format file <| Empty_Sheet_Error.handle_java_exception <| - file.with_input_stream [File_Access.Read] stream-> - stream.with_java_stream reader + java_file = Java_File.new file.path + reader java_file ## PRIVATE Reads an input Excel file according to the provided section. @@ -49,21 +52,22 @@ handle_reader file reader = otherwise reads in Excel 2007+ format. read_file : File -> Excel_Section -> (Boolean|Infer) -> Problem_Behavior -> Boolean -> (Table | Vector) read_file file section headers on_problems xls_format=False = - reader stream = case section of + file_format = if xls_format then ExcelFileFormat.XLS else ExcelFileFormat.XLSX + reader java_file = case section of Excel_Section.Workbook -> Error.throw (Illegal_Argument.Error "Cannot read an entire workbook.") - Excel_Section.Sheet_Names -> Vector.from_polyglot_array (ExcelReader.readSheetNames stream xls_format) - Excel_Section.Range_Names -> Vector.from_polyglot_array (ExcelReader.readRangeNames stream xls_format) + Excel_Section.Sheet_Names -> Vector.from_polyglot_array (ExcelReader.readSheetNames java_file file_format) + Excel_Section.Range_Names -> Vector.from_polyglot_array (ExcelReader.readRangeNames java_file file_format) Excel_Section.Worksheet sheet skip_rows row_limit -> Java_Problems.with_problem_aggregator on_problems java_problem_aggregator-> java_table = case sheet of - _ : Integer -> ExcelReader.readSheetByIndex stream sheet (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator - _ : Text -> ExcelReader.readSheetByName stream sheet (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator + _ : Integer -> ExcelReader.readSheetByIndex java_file sheet (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator + _ : Text -> ExcelReader.readSheetByName java_file sheet (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator Table.Value java_table Excel_Section.Cell_Range address skip_rows row_limit -> Java_Problems.with_problem_aggregator on_problems java_problem_aggregator-> java_table = case address of - _ : Excel_Range -> ExcelReader.readRange stream address.java_range (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator - _ : Text -> ExcelReader.readRangeByName stream address (make_java_headers headers) skip_rows row_limit xls_format java_problem_aggregator + _ : Excel_Range -> ExcelReader.readRange java_file address.java_range (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator + _ : Text -> ExcelReader.readRangeByName java_file address (make_java_headers headers) skip_rows row_limit file_format java_problem_aggregator Table.Value java_table handle_reader file reader diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Writer.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Writer.enso index 58ff4206296..9500593dd2e 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Writer.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Excel_Writer.enso @@ -1,6 +1,9 @@ from Standard.Base import all +import Standard.Base.Errors.File_Error.File_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import Standard.Base.Errors.Illegal_State.Illegal_State +import Standard.Base.Runtime.Context +import Standard.Base.Runtime.Managed_Resource.Managed_Resource import project.Data.Match_Columns.Match_Columns import project.Data.Table.Table @@ -9,23 +12,27 @@ import project.Excel.Excel_Section.Excel_Section import project.Internal.Excel_Reader from project.Errors import Column_Count_Mismatch, Column_Name_Mismatch, Existing_Data, Invalid_Location, Range_Exceeded +polyglot java import java.io.File as Java_File polyglot java import java.lang.IllegalStateException +polyglot java import org.apache.poi.ss.usermodel.Workbook +polyglot java import org.enso.base.DryRunFileManager polyglot java import org.enso.table.error.ExistingDataException polyglot java import org.enso.table.error.InvalidLocationException polyglot java import org.enso.table.error.RangeExceededException +polyglot java import org.enso.table.excel.ExcelConnectionPool +polyglot java import org.enso.table.excel.ExcelFileFormat polyglot java import org.enso.table.read.ExcelReader polyglot java import org.enso.table.write.ExcelWriter polyglot java import org.enso.table.write.ExistingDataMode +polyglot java import org.enso.table.write.ExistingFileBehavior ## PRIVATE -make_java_existing_data_mode : Existing_File_Behavior -> Match_Columns -> ExistingDataMode -make_java_existing_data_mode on_existing_file match_columns = case on_existing_file of - Existing_File_Behavior.Error -> ExistingDataMode.ERROR - Existing_File_Behavior.Overwrite -> ExistingDataMode.REPLACE - Existing_File_Behavior.Backup -> ExistingDataMode.REPLACE - Existing_File_Behavior.Append -> case match_columns of - Match_Columns.By_Name -> ExistingDataMode.APPEND_BY_NAME - Match_Columns.By_Position -> ExistingDataMode.APPEND_BY_INDEX +make_java_existing_file_behavior : Existing_File_Behavior -> ExistingFileBehavior +make_java_existing_file_behavior on_existing_file = case on_existing_file of + Existing_File_Behavior.Error -> ExistingFileBehavior.ERROR + Existing_File_Behavior.Overwrite -> ExistingFileBehavior.OVERWRITE + Existing_File_Behavior.Backup -> ExistingFileBehavior.BACKUP + Existing_File_Behavior.Append -> ExistingFileBehavior.APPEND ## PRIVATE Writes a Table to an Excel file. @@ -44,31 +51,81 @@ make_java_existing_data_mode on_existing_file match_columns = case on_existing_f - xls_format: If `true`, the file is written in the legacy XLS format. Otherwise, the file is written in the modern XLSX format. write_file : File -> Table -> Existing_File_Behavior -> Excel_Section -> (Boolean|Infer) -> Match_Columns -> Problem_Behavior -> Boolean -> File -write_file file table on_existing_file section headers match_columns on_problems xls_format=False = - _ = [on_problems] - ## If file does not exist or is empty then create a new workbook. - workbook = if file.exists.not || (file.size == 0) then ExcelWriter.createWorkbook xls_format else - Excel_Reader.handle_reader file stream->(ExcelReader.getWorkbook stream xls_format) +write_file (file : File) (table : Table) (on_existing_file : Existing_File_Behavior) (section : Excel_Section) (headers : Boolean|Infer) (match_columns:Match_Columns) (on_problems:Problem_Behavior) (xls_format:Boolean) = + _ = on_problems + if (on_existing_file == Existing_File_Behavior.Error) && file.exists then Error.throw (File_Error.Already_Exists file) else + java_file = file_as_java file + file_format = if xls_format then ExcelFileFormat.XLS else ExcelFileFormat.XLSX + ExcelWriter.setEnsoToTextCallbackIfUnset (.to_text) + modification_strategy = prepare_file_modification_strategy table section headers on_existing_file match_columns + is_dry_run = Context.Output.is_enabled.not + + modification_strategy.if_not_error <| Panic.recover File_Error <| File_Error.handle_java_exceptions file <| handle_writer <| + possible_backup_file = if on_existing_file == Existing_File_Behavior.Backup then file_as_java (find_bak_file file) else Nothing + possible_dry_run_file = if is_dry_run.not then Nothing else + preexisting_dry_run_file = DryRunFileManager.preExistingTemporaryFile file.absolute.normalize.path + preexisting_dry_run_file.if_not_nothing <| + Java_File.new preexisting_dry_run_file + accompanying_files = [possible_backup_file, possible_dry_run_file].filter (!= Nothing) . filter (!= java_file) + + ExcelConnectionPool.INSTANCE.lockForWriting java_file file_format accompanying_files write_helper-> Context.Output.with_enabled <| + temp_file = if is_dry_run then file.create_dry_run_file copy_original=False else + find_temp_file file + + ## We 'sync' the temp_file to reflect the original target file - if it exists we copy the contents, if the source + doesn't exist we also ensure that the temp file is not polluted with data from previous (dry-run) writes. + Panic.rethrow <| + if file.exists then (file.copy_to temp_file replace_existing=True) else (temp_file.delete_if_exists) + write_helper.writeWorkbook (file_as_java temp_file) modification_strategy + result_file = if is_dry_run then temp_file else + needs_backup = on_existing_file == Existing_File_Behavior.Backup + if needs_backup && file.exists then + backup_file = find_bak_file file + ## We can move instead of copying because right in next line we will overwrite the original file + anyway. And move should be more efficient. + Panic.rethrow <| file.move_to backup_file replace_existing=True + + Panic.rethrow <| temp_file.move_to file replace_existing=True + file + result_file + +## PRIVATE +find_temp_file : File -> File +find_temp_file base_file = + parent = base_file.absolute.normalize.parent + name = base_file.name + go i = + temp_file = parent / (name + ".temp." + System.nano_time.to_text) + if temp_file.exists then go (i + 1) else temp_file + go 0 + +## PRIVATE +find_bak_file : File -> File +find_bak_file base_file = + parent = base_file.absolute.normalize.parent + parent / (base_file.name + ".bak") + +## PRIVATE +file_as_java : File -> Java_File +file_as_java file = + Java_File.new file.absolute.normalize.path + +## PRIVATE +prepare_file_modification_strategy : Table -> Excel_Section -> Boolean|Infer -> Existing_File_Behavior -> Match_Columns -> (Workbook -> Nothing) +prepare_file_modification_strategy table section headers on_existing_file match_columns = existing_data_mode = make_java_existing_data_mode on_existing_file match_columns java_headers = Excel_Reader.make_java_headers headers - ExcelWriter.setEnsoToTextCallbackIfUnset (.to_text) - result = handle_writer <| case section of + case section of Excel_Section.Worksheet sheet skip_rows row_limit -> - ExcelWriter.writeTableToSheet workbook sheet existing_data_mode skip_rows table.java_table row_limit java_headers - Excel_Section.Cell_Range address skip_rows row_limit -> case address of - Excel_Range.Value java_range -> ExcelWriter.writeTableToRange workbook java_range existing_data_mode skip_rows table.java_table row_limit java_headers - _ : Text -> ExcelWriter.writeTableToRange workbook address existing_data_mode skip_rows table.java_table row_limit java_headers - _ : Excel_Section -> Error.throw (Illegal_Argument.Error "Only a Worksheet or Cell_Range is allowed in write_file") - - if result.is_error then result else - write_stream stream = stream.with_java_stream java_stream-> - workbook.write java_stream - case on_existing_file of - Existing_File_Behavior.Append -> - ## Special handling - have successfully added the extra sheet/range so now overwrite file with backup. - Existing_File_Behavior.Backup.write file write_stream - _ -> on_existing_file.write file write_stream + workbook_to_modify-> + ExcelWriter.writeTableToSheet workbook_to_modify sheet existing_data_mode skip_rows table.java_table row_limit java_headers + Excel_Section.Cell_Range address skip_rows row_limit -> + java_range = case address of + Excel_Range.Value java_range -> java_range + text : Text -> text + workbook_to_modify-> + ExcelWriter.writeTableToRange workbook_to_modify java_range existing_data_mode skip_rows table.java_table row_limit java_headers ## PRIVATE Handle and map the Java errors when writing an Excel file @@ -90,3 +147,13 @@ handle_writer ~writer = Column_Count_Mismatch.handle_java_exception <| handle_bad_location <| Illegal_Argument.handle_java_exception <| handle_range_exceeded <| handle_existing_data <| writer + +## PRIVATE +make_java_existing_data_mode : Existing_File_Behavior -> Match_Columns -> ExistingDataMode +make_java_existing_data_mode on_existing_file match_columns = case on_existing_file of + Existing_File_Behavior.Error -> ExistingDataMode.ERROR + Existing_File_Behavior.Overwrite -> ExistingDataMode.REPLACE + Existing_File_Behavior.Backup -> ExistingDataMode.REPLACE + Existing_File_Behavior.Append -> case match_columns of + Match_Columns.By_Name -> ExistingDataMode.APPEND_BY_NAME + Match_Columns.By_Position -> ExistingDataMode.APPEND_BY_INDEX diff --git a/std-bits/base/src/main/java/org/enso/base/DryRunFileManager.java b/std-bits/base/src/main/java/org/enso/base/DryRunFileManager.java index f6136568a8d..b907d988ad3 100644 --- a/std-bits/base/src/main/java/org/enso/base/DryRunFileManager.java +++ b/std-bits/base/src/main/java/org/enso/base/DryRunFileManager.java @@ -39,4 +39,12 @@ public class DryRunFileManager { } }); } + + public static String preExistingTemporaryFile(String path) { + if (files.containsValue(path)) { + return path; + } + + return files.get(path); + } } diff --git a/std-bits/table/src/main/java/org/enso/table/error/InvalidLocationException.java b/std-bits/table/src/main/java/org/enso/table/error/InvalidLocationException.java index 387de4ff382..beea116d412 100644 --- a/std-bits/table/src/main/java/org/enso/table/error/InvalidLocationException.java +++ b/std-bits/table/src/main/java/org/enso/table/error/InvalidLocationException.java @@ -1,6 +1,6 @@ package org.enso.table.error; -public class InvalidLocationException extends Exception { +public class InvalidLocationException extends RuntimeException { public InvalidLocationException(String errorMessage) { super(errorMessage); } diff --git a/std-bits/table/src/main/java/org/enso/table/excel/ExcelConnectionPool.java b/std-bits/table/src/main/java/org/enso/table/excel/ExcelConnectionPool.java new file mode 100644 index 00000000000..41b32c2da2a --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/excel/ExcelConnectionPool.java @@ -0,0 +1,295 @@ +package org.enso.table.excel; + +import org.apache.poi.UnsupportedFileFormatException; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.AccessMode; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.function.Function; + +public class ExcelConnectionPool { + public static final ExcelConnectionPool INSTANCE = new ExcelConnectionPool(); + + private ExcelConnectionPool() { + } + + public ReadOnlyExcelConnection openReadOnlyConnection(File file, ExcelFileFormat format) throws IOException { + synchronized (this) { + if (isCurrentlyWriting) { + throw new IllegalStateException("Cannot open a read-only Excel connection while an Excel file is being " + + "written to. This is a bug in the Table library."); + } + + if (!file.exists()) { + throw new FileNotFoundException(file.toString()); + } + + String key = getKeyForFile(file); + ConnectionRecord existingRecord = records.get(key); + if (existingRecord != null) { + // Adapt the existing record + if (existingRecord.format != format) { + throw new ExcelFileFormatMismatchException("Requesting to open " + file + " as " + format + ", but it was " + + "already opened as " + existingRecord.format + "."); + } + + existingRecord.refCount++; + + return new ReadOnlyExcelConnection(this, key, existingRecord); + } else { + // Create the new record + ConnectionRecord record = new ConnectionRecord(); + record.refCount = 1; + record.file = file; + record.format = format; + record.workbook = openWorkbook(file, format, false); + records.put(key, record); + return new ReadOnlyExcelConnection(this, key, record); + } + } + } + + public static class WriteHelper { + private final ExcelFileFormat format; + + public WriteHelper(ExcelFileFormat format) { + this.format = format; + } + + public R writeWorkbook(File file, Function writeAction) throws IOException { + boolean preExistingFile = file.exists() && Files.size(file.toPath()) > 0; + + try (Workbook workbook = preExistingFile ? ExcelConnectionPool.openWorkbook(file, format, true) : + createEmptyWorkbook(format)) { + R result = writeAction.apply(workbook); + + if (preExistingFile) { + // Save the file in place. + switch (workbook) { + case HSSFWorkbook wb -> { + wb.write(); + } + case XSSFWorkbook wb -> { + try { + wb.write(null); + } catch (OpenXML4JRuntimeException e) { + // Ignore: Workaround for bug https://bz.apache.org/bugzilla/show_bug.cgi?id=59252 + } + } + default -> throw new IllegalStateException("Unknown workbook type: " + workbook.getClass()); + } + } else { + try (OutputStream fileOut = Files.newOutputStream(file.toPath())) { + try (BufferedOutputStream workbookOut = new BufferedOutputStream(fileOut)) { + workbook.write(workbookOut); + } + } + } + + return result; + } + } + } + + /** + * Executes a write action, ensuring that any other Excel connections are closed during the action, so that it can + * modify the file. Any existing connections are re-opened after the operation finishes (regardless of its success or + * error). + *

+ * The action gets a {@link WriteHelper} object that can be used to open the workbook for reading or writing. The + * action must take care to close that workbook before returning. + *

+ * Additional files that should be closed during the write action can be specified in the {@code accompanyingFiles} + * argument. These may be related temporary files that are written during the write operation and also need to get + * 'unlocked' for the time of write. + */ + public R lockForWriting(File file, ExcelFileFormat format, File[] accompanyingFiles, + Function action) throws IOException { + synchronized (this) { + if (isCurrentlyWriting) { + throw new IllegalStateException("Another Excel write is in progress on the same thread. This is a bug in the " + + "Table library."); + } + + isCurrentlyWriting = true; + try { + String key = getKeyForFile(file); + ArrayList recordsToReopen = new ArrayList<>(1 + accompanyingFiles.length); + + try { + // Close the existing connection, if any - to avoid the write operation failing due to the file being locked. + ConnectionRecord existingRecord = records.get(key); + if (existingRecord != null) { + existingRecord.close(); + recordsToReopen.add(existingRecord); + } + + verifyIsWritable(file); + + for (File accompanyingFile : accompanyingFiles) { + String accompanyingKey = getKeyForFile(accompanyingFile); + ConnectionRecord accompanyingRecord = records.get(accompanyingKey); + if (accompanyingRecord != null) { + accompanyingRecord.close(); + recordsToReopen.add(accompanyingRecord); + } + + verifyIsWritable(accompanyingFile); + } + + WriteHelper helper = new WriteHelper(format); + return action.apply(helper); + } finally { + // Reopen the closed connections + for (ConnectionRecord record : recordsToReopen) { + record.reopen(false); + } + } + + } finally { + isCurrentlyWriting = false; + } + } + } + + private void verifyIsWritable(File file) throws IOException { + Path path = file.toPath(); + + if (!Files.exists(path)) { + // If the file does not exist, we assume that we can create it. + return; + } + + path.getFileSystem().provider().checkAccess(path, AccessMode.WRITE, AccessMode.READ); + } + + private String getKeyForFile(File file) throws IOException { + return file.getCanonicalPath(); + } + + void release(ReadOnlyExcelConnection excelConnection) throws IOException { + synchronized (this) { + excelConnection.record.refCount--; + if (excelConnection.record.refCount <= 0) { + excelConnection.record.close(); + records.remove(excelConnection.key); + } + } + } + + private final HashMap records = new HashMap<>(); + private boolean isCurrentlyWriting = false; + + static class ConnectionRecord { + private int refCount; + private File file; + private ExcelFileFormat format; + private Workbook workbook; + private IOException initializationException = null; + + T withWorkbook(Function action) throws IOException { + synchronized (this) { + return action.apply(accessCurrentWorkbook()); + } + } + + public void close() throws IOException { + synchronized (this) { + if (workbook != null) { + workbook.close(); + } + + workbook = null; + } + } + + void reopen(boolean throwOnFailure) throws IOException { + synchronized (this) { + if (workbook != null) { + throw new IllegalStateException("The workbook is already open."); + } + + try { + workbook = openWorkbook(file, format, false); + } catch (IOException e) { + initializationException = e; + if (throwOnFailure) { + throw e; + } + } + } + } + + private Workbook accessCurrentWorkbook() throws IOException { + synchronized (this) { + if (workbook == null) { + if (initializationException != null) { + throw initializationException; + } else { + throw new IllegalStateException("The workbook is used after being closed."); + } + } + + return workbook; + } + } + } + + private static Workbook openWorkbook(File file, ExcelFileFormat format, boolean writeAccess) throws IOException { + return switch (format) { + case XLS -> { + boolean readOnly = !writeAccess; + POIFSFileSystem fs = new POIFSFileSystem(file, readOnly); + try { + // If the initialization succeeds, the POIFSFileSystem will be closed by the HSSFWorkbook::close. + yield new HSSFWorkbook(fs); + } catch (Exception e) { + fs.close(); + throw e; + } + } + case XLSX -> { + try { + PackageAccess access = writeAccess ? PackageAccess.READ_WRITE : PackageAccess.READ; + OPCPackage pkg = OPCPackage.open(file, access); + try { + yield new XSSFWorkbook(pkg); + } catch (IOException e) { + pkg.close(); + throw e; + } + } catch (InvalidFormatException e) { + throw new IOException("Invalid format encountered when opening the file " + file + " as " + format + ".", e); + } + } + }; + } + + private static Workbook createEmptyWorkbook(ExcelFileFormat format) { + return switch (format) { + case XLS -> new HSSFWorkbook(); + case XLSX -> new XSSFWorkbook(); + }; + } + + public static class ExcelFileFormatMismatchException extends UnsupportedFileFormatException { + public ExcelFileFormatMismatchException(String message) { + super(message); + } + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/excel/ExcelFileFormat.java b/std-bits/table/src/main/java/org/enso/table/excel/ExcelFileFormat.java new file mode 100644 index 00000000000..b5681c258e8 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/excel/ExcelFileFormat.java @@ -0,0 +1,6 @@ +package org.enso.table.excel; + +public enum ExcelFileFormat { + XLS, + XLSX +} diff --git a/std-bits/table/src/main/java/org/enso/table/excel/ReadOnlyExcelConnection.java b/std-bits/table/src/main/java/org/enso/table/excel/ReadOnlyExcelConnection.java new file mode 100644 index 00000000000..60bc80e4d73 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/excel/ReadOnlyExcelConnection.java @@ -0,0 +1,38 @@ +package org.enso.table.excel; + +import java.io.IOException; +import java.util.function.Function; +import org.apache.poi.ss.usermodel.Workbook; + +public class ReadOnlyExcelConnection implements AutoCloseable { + + private final ExcelConnectionPool myPool; + final String key; + ExcelConnectionPool.ConnectionRecord record; + + ReadOnlyExcelConnection( + ExcelConnectionPool myPool, String key, ExcelConnectionPool.ConnectionRecord record) { + this.myPool = myPool; + this.key = key; + this.record = record; + } + + @Override + public synchronized void close() throws IOException { + if (record == null) { + // already closed + return; + } + + myPool.release(this); + record = null; + } + + public synchronized T withWorkbook(Function f) throws IOException { + if (record == null) { + throw new IllegalStateException("ReadOnlyExcelConnection is being used after it was closed."); + } + + return record.withWorkbook(f); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/read/ExcelReader.java b/std-bits/table/src/main/java/org/enso/table/read/ExcelReader.java index 657d1bbf0ed..4665ab13241 100644 --- a/std-bits/table/src/main/java/org/enso/table/read/ExcelReader.java +++ b/std-bits/table/src/main/java/org/enso/table/read/ExcelReader.java @@ -1,16 +1,16 @@ package org.enso.table.read; +import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.util.ArrayList; import java.util.List; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; -import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.ss.usermodel.Name; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.util.CellReference; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.enso.table.data.column.builder.Builder; import org.enso.table.data.column.builder.InferredBuilder; import org.enso.table.data.column.storage.ObjectStorage; @@ -18,38 +18,29 @@ import org.enso.table.data.table.Column; import org.enso.table.data.table.Table; import org.enso.table.error.EmptySheetException; import org.enso.table.error.InvalidLocationException; +import org.enso.table.excel.ExcelConnectionPool; +import org.enso.table.excel.ExcelFileFormat; import org.enso.table.excel.ExcelHeaders; import org.enso.table.excel.ExcelRange; import org.enso.table.excel.ExcelRow; import org.enso.table.excel.ExcelSheet; +import org.enso.table.excel.ReadOnlyExcelConnection; import org.enso.table.problems.ProblemAggregator; import org.graalvm.polyglot.Context; /** A table reader for MS Excel files. */ public class ExcelReader { - /** - * Loads a workbook (either XLSX or XLS format from the specified input stream. - * - * @param stream an {@link InputStream} allowing to read the XLS(X) file contents. - * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). - * @return a {@link Workbook} containing the specified data. - * @throws IOException - when the input stream cannot be read. - */ - public static Workbook readWorkbook(InputStream stream, boolean xls_format) throws IOException { - return getWorkbook(stream, xls_format); - } - /** * Reads a list of sheet names for the specified XLSX/XLS file into an array. * - * @param stream an {@link InputStream} allowing to read the XLS(X) file contents. - * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @param file the {@link File} to load + * @param format specifies the file format * @return a String[] containing the sheet names. - * @throws IOException when the input stream cannot be read. + * @throws IOException when the action fails */ - public static String[] readSheetNames(InputStream stream, boolean xls_format) throws IOException { - Workbook workbook = getWorkbook(stream, xls_format); - return readSheetNames(workbook); + public static String[] readSheetNames(File file, ExcelFileFormat format) + throws IOException, InvalidFormatException { + return withWorkbook(file, format, ExcelReader::readSheetNames); } /** @@ -72,14 +63,14 @@ public class ExcelReader { /** * Reads a list of range names for the specified XLSX/XLS file into an array. * - * @param stream an {@link InputStream} allowing to read the XLSX file contents. - * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @param file the {@link File} to load + * @param format specifies the file format * @return a String[] containing the range names. - * @throws IOException when the input stream cannot be read. + * @throws IOException when the action fails */ - public static String[] readRangeNames(InputStream stream, boolean xls_format) throws IOException { - Workbook workbook = getWorkbook(stream, xls_format); - return readRangeNames(workbook); + public static String[] readRangeNames(File file, ExcelFileFormat format) + throws IOException, InvalidFormatException { + return withWorkbook(file, format, ExcelReader::readRangeNames); } /** @@ -96,106 +87,115 @@ public class ExcelReader { /** * Reads a sheet by name for the specified XLSX/XLS file into a table. * - * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param file the {@link File} to load * @param sheetName the name of the sheet to read. * @param skip_rows skip rows from the top the sheet. * @param row_limit maximum number of rows to read. - * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @param format specifies the file format * @return a {@link Table} containing the specified data. * @throws IOException when the input stream cannot be read. * @throws InvalidLocationException when the sheet name is not found. */ public static Table readSheetByName( - InputStream stream, + File file, String sheetName, ExcelHeaders.HeaderBehavior headers, int skip_rows, Integer row_limit, - boolean xls_format, + ExcelFileFormat format, ProblemAggregator problemAggregator) throws IOException, InvalidLocationException { - Workbook workbook = getWorkbook(stream, xls_format); + return withWorkbook( + file, + format, + workbook -> { + int sheetIndex = workbook.getSheetIndex(sheetName); + if (sheetIndex == -1) { + throw new InvalidLocationException("Unknown sheet '" + sheetName + "'."); + } - int sheetIndex = workbook.getSheetIndex(sheetName); - if (sheetIndex == -1) { - throw new InvalidLocationException("Unknown sheet '" + sheetName + "'."); - } - - return readTable( - workbook, - sheetIndex, - null, - headers, - skip_rows, - row_limit == null ? Integer.MAX_VALUE : row_limit, - problemAggregator); + return readTable( + workbook, + sheetIndex, + null, + headers, + skip_rows, + row_limit == null ? Integer.MAX_VALUE : row_limit, + problemAggregator); + }); } /** * Reads a sheet by index for the specified XLSX/XLS file into a table. * - * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param file the {@link File} to load * @param index the 1-based index to the sheet. * @param skip_rows skip rows from the top the sheet. * @param row_limit maximum number of rows to read. - * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @param format specifies the file format * @return a {@link Table} containing the specified data. * @throws IOException when the input stream cannot be read. * @throws InvalidLocationException when the sheet index is not valid. */ public static Table readSheetByIndex( - InputStream stream, + File file, int index, ExcelHeaders.HeaderBehavior headers, int skip_rows, Integer row_limit, - boolean xls_format, + ExcelFileFormat format, ProblemAggregator problemAggregator) throws IOException, InvalidLocationException { - Workbook workbook = getWorkbook(stream, xls_format); + return withWorkbook( + file, + format, + workbook -> { + int sheetCount = workbook.getNumberOfSheets(); + if (index < 1 || index > sheetCount) { + throw new InvalidLocationException( + "Sheet index is not in valid range (1 to " + sheetCount + " inclusive)."); + } - int sheetCount = workbook.getNumberOfSheets(); - if (index < 1 || index > sheetCount) { - throw new InvalidLocationException( - "Sheet index is not in valid range (1 to " + sheetCount + " inclusive)."); - } - - return readTable( - workbook, - index - 1, - null, - headers, - skip_rows, - row_limit == null ? Integer.MAX_VALUE : row_limit, - problemAggregator); + return readTable( + workbook, + index - 1, + null, + headers, + skip_rows, + row_limit == null ? Integer.MAX_VALUE : row_limit, + problemAggregator); + }); } /** * Reads a range by sheet name, named range or address for the specified XLSX/XLS file into a * table. * - * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param file the {@link File} to load * @param rangeNameOrAddress sheet name, range name or address to read. * @param headers specifies whether the first row should be used as headers. * @param skip_rows skip rows from the top of the range. * @param row_limit maximum number of rows to read. - * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @param format specifies the file format * @return a {@link Table} containing the specified data. * @throws IOException when the input stream cannot be read. * @throws InvalidLocationException when the range name or address is not found. */ public static Table readRangeByName( - InputStream stream, + File file, String rangeNameOrAddress, ExcelHeaders.HeaderBehavior headers, int skip_rows, Integer row_limit, - boolean xls_format, + ExcelFileFormat format, ProblemAggregator problemAggregator) throws IOException, InvalidLocationException { - Workbook workbook = getWorkbook(stream, xls_format); - return readRangeByName( - workbook, rangeNameOrAddress, headers, skip_rows, row_limit, problemAggregator); + return withWorkbook( + file, + format, + workbook -> + readRangeByName( + workbook, rangeNameOrAddress, headers, skip_rows, row_limit, problemAggregator)); } /** @@ -245,42 +245,36 @@ public class ExcelReader { /** * Reads a range for the specified XLSX/XLS file into a table. * - * @param stream an {@link InputStream} allowing to read the XLSX file contents. + * @param file the {@link File} to load * @param excelRange the range to read. * @param skip_rows skip rows from the top of the range. * @param row_limit maximum number of rows to read. - * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). + * @param format specifies the file format * @return a {@link Table} containing the specified data. * @throws IOException when the input stream cannot be read. */ public static Table readRange( - InputStream stream, + File file, ExcelRange excelRange, ExcelHeaders.HeaderBehavior headers, int skip_rows, Integer row_limit, - boolean xls_format, + ExcelFileFormat format, ProblemAggregator problemAggregator) throws IOException, InvalidLocationException { - return readRange( - getWorkbook(stream, xls_format), - excelRange, - headers, - skip_rows, - row_limit, - problemAggregator); + return withWorkbook( + file, + format, + workbook -> + readRange(workbook, excelRange, headers, skip_rows, row_limit, problemAggregator)); } - /** - * Load a workbook into memory from an InputStream. - * - * @param stream an {@link InputStream} allowing to read the XLSX file contents. - * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). - * @return a {@link Workbook} containing the specified data. - * @throws IOException when the input stream cannot be read or an incorrect format occurs. - */ - public static Workbook getWorkbook(InputStream stream, boolean xls_format) throws IOException { - return xls_format ? new HSSFWorkbook(stream) : new XSSFWorkbook(stream); + private static T withWorkbook(File file, ExcelFileFormat format, Function action) + throws IOException { + try (ReadOnlyExcelConnection connection = + ExcelConnectionPool.INSTANCE.openReadOnlyConnection(file, format)) { + return connection.withWorkbook(action); + } } private static Table readRange( diff --git a/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java b/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java index 91951816d32..29e0ccdd909 100644 --- a/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java +++ b/std-bits/table/src/main/java/org/enso/table/write/ExcelWriter.java @@ -145,15 +145,6 @@ public class ExcelWriter { } } - /** - * Creates an empty workbook. - * @param xls_format specifies whether the file is in Excel Binary Format (95-2003 format). - * @return a {@link Workbook} containing the specified data. - */ - public static Workbook createWorkbook(boolean xls_format) { - return xls_format ? new HSSFWorkbook() : new XSSFWorkbook(); - } - private static void appendRangeWithTable(Workbook workbook, ExcelRange range, ExistingDataMode existingDataMode, Table table, Long rowLimit, ExcelHeaders.HeaderBehavior headers, ExcelSheet sheet, ExcelRange expanded) throws RangeExceededException, ExistingDataException, ColumnNameMismatchException, ColumnCountMismatchException { Table mappedTable = switch (existingDataMode) { diff --git a/std-bits/table/src/main/java/org/enso/table/write/ExistingFileBehavior.java b/std-bits/table/src/main/java/org/enso/table/write/ExistingFileBehavior.java new file mode 100644 index 00000000000..6bdd5057521 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/write/ExistingFileBehavior.java @@ -0,0 +1,8 @@ +package org.enso.table.write; + +public enum ExistingFileBehavior { + OVERWRITE, + BACKUP, + APPEND, + ERROR +} diff --git a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso index ce55a17d928..b4eb2c2d5e8 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso @@ -954,113 +954,112 @@ spec setup = op c 1 . should_fail_with Invalid_Value_Type op a True . should_fail_with Invalid_Value_Type - Test.group prefix+"Column Operations - Text Replace" <| - do_replace column term new_text case_sensitivity=Case_Sensitivity.Default only_first=False expected = - case setup.is_database of - True -> - input_type = Meta.type_of term - params = Replace_Params.Value input_type case_sensitivity only_first - supported_replace_params = setup.test_selection.supported_replace_params - supported_replace_params . should_be_a Set - are_params_supported = supported_replace_params.contains params - case are_params_supported of - True -> column.replace term new_text case_sensitivity only_first . to_vector . should_equal expected - False -> column.replace term new_text case_sensitivity only_first . should_fail_with Unsupported_Database_Operation - False -> - result = column.replace term new_text case_sensitivity only_first - result.value_type . should_equal Value_Type.Char - result . to_vector . should_equal expected + do_replace column term new_text case_sensitivity=Case_Sensitivity.Default only_first=False expected = + case setup.is_database of + True -> + input_type = Meta.type_of term + params = Replace_Params.Value input_type case_sensitivity only_first + supported_replace_params = setup.test_selection.supported_replace_params + supported_replace_params . should_be_a Set + are_params_supported = supported_replace_params.contains params + case are_params_supported of + True -> column.replace term new_text case_sensitivity only_first . to_vector . should_equal expected + False -> column.replace term new_text case_sensitivity only_first . should_fail_with Unsupported_Database_Operation + False -> + result = column.replace term new_text case_sensitivity only_first + result.value_type . should_equal Value_Type.Char + result . to_vector . should_equal expected - Test.group prefix+"replace: literal text pattern and replacement" <| - col0 = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO']]] . at "x" - col1 = table_builder [["x", ['a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']]] . at "x" + Test.group prefix+"replace: literal text pattern and replacement" <| + col0 = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO']]] . at "x" + col1 = table_builder [["x", ['a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']]] . at "x" - Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=false" - do_replace col0 'hello' 'bye' expected=['bye Hello', 'bye bye', 'HELLO HELLO'] - do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Default expected=['bye Hello', 'bye bye', 'HELLO HELLO'] - do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Sensitive expected=['bye Hello', 'bye bye', 'HELLO HELLO'] - do_replace col1 'a[bcd]' 'hey' expected=['hey A[bCd] hey', 'abac ad Ab aCAd'] + Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=false" + do_replace col0 'hello' 'bye' expected=['bye Hello', 'bye bye', 'HELLO HELLO'] + do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Default expected=['bye Hello', 'bye bye', 'HELLO HELLO'] + do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Sensitive expected=['bye Hello', 'bye bye', 'HELLO HELLO'] + do_replace col1 'a[bcd]' 'hey' expected=['hey A[bCd] hey', 'abac ad Ab aCAd'] - Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=true" - do_replace col0 'hello' 'bye' only_first=True expected=['bye Hello', 'bye hello', 'HELLO HELLO'] - do_replace col1 'a[bcd]' 'hey' only_first=True expected=['hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] + Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=true" + do_replace col0 'hello' 'bye' only_first=True expected=['bye Hello', 'bye hello', 'HELLO HELLO'] + do_replace col1 'a[bcd]' 'hey' only_first=True expected=['hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] - Test.specify "case_sensitivity=insensitive use_regex=false only_first=false" - do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Insensitive expected=['bye bye', 'bye bye', 'bye bye'] - do_replace col1 'a[bcd]' 'hey' case_sensitivity=Case_Sensitivity.Insensitive expected=['hey hey hey', 'abac ad Ab aCAd'] + Test.specify "case_sensitivity=insensitive use_regex=false only_first=false" + do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Insensitive expected=['bye bye', 'bye bye', 'bye bye'] + do_replace col1 'a[bcd]' 'hey' case_sensitivity=Case_Sensitivity.Insensitive expected=['hey hey hey', 'abac ad Ab aCAd'] - Test.specify "case_sensitivity=insensitive use_regex=false only_first=true" - do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['bye Hello', 'bye hello', 'bye HELLO'] - do_replace col1 'a[bcd]' 'hey' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] + Test.specify "case_sensitivity=insensitive use_regex=false only_first=true" + do_replace col0 'hello' 'bye' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['bye Hello', 'bye hello', 'bye HELLO'] + do_replace col1 'a[bcd]' 'hey' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] - Test.group prefix+"replace: literal regex pattern and replacement" <| - col1 = table_builder [["x", ['a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']]] . at "x" + Test.group prefix+"replace: literal regex pattern and replacement" <| + col1 = table_builder [["x", ['a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']]] . at "x" - Test.specify "case_sensitivity=sensitive/default use_regex=True only_first=false" - do_replace col1 'a[bcd]'.to_regex 'hey' expected=['a[bcd] A[bCd] a[bcd]', 'heyhey hey Ab aCAd'] + Test.specify "case_sensitivity=sensitive/default use_regex=True only_first=false" + do_replace col1 'a[bcd]'.to_regex 'hey' expected=['a[bcd] A[bCd] a[bcd]', 'heyhey hey Ab aCAd'] - Test.specify "case_sensitivity=sensitive/default use_regex=True only_first=true" - do_replace col1 'a[bcd]'.to_regex 'hey' only_first=True expected=['a[bcd] A[bCd] a[bcd]', 'heyac ad Ab aCAd'] + Test.specify "case_sensitivity=sensitive/default use_regex=True only_first=true" + do_replace col1 'a[bcd]'.to_regex 'hey' only_first=True expected=['a[bcd] A[bCd] a[bcd]', 'heyac ad Ab aCAd'] - Test.specify "case_sensitivity=insensitive use_regex=True only_first=false" - do_replace col1 'a[bcd]'.to_regex 'hey' case_sensitivity=Case_Sensitivity.Insensitive expected=['a[bcd] A[bCd] a[bcd]', 'heyhey hey hey heyhey'] + Test.specify "case_sensitivity=insensitive use_regex=True only_first=false" + do_replace col1 'a[bcd]'.to_regex 'hey' case_sensitivity=Case_Sensitivity.Insensitive expected=['a[bcd] A[bCd] a[bcd]', 'heyhey hey hey heyhey'] - Test.specify "case_sensitivity=insensitive use_regex=True only_first=true" - do_replace col1 'a[bcd]'.to_regex 'hey' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['a[bcd] A[bCd] a[bcd]', 'heyac ad Ab aCAd'] + Test.specify "case_sensitivity=insensitive use_regex=True only_first=true" + do_replace col1 'a[bcd]'.to_regex 'hey' case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['a[bcd] A[bCd] a[bcd]', 'heyac ad Ab aCAd'] - Test.specify "can properly escape complex regexes" <| - regex = "^([^\(]+)|(?\w\d[a-z])+$" - col = table_builder [["x", [regex]]] . at "x" - do_replace col regex "asdf" ["asdf"] + Test.specify "can properly escape complex regexes" <| + regex = "^([^\(]+)|(?\w\d[a-z])+$" + col = table_builder [["x", [regex]]] . at "x" + do_replace col regex "asdf" ["asdf"] - Test.group prefix+"replace: pattern and replacement columns" <| + Test.group prefix+"replace: pattern and replacement columns" <| + table = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO', 'a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']], ["patterns", ['hello', 'hello', 'hello', 'a[bcd]', 'a[bcd]']], ["replacements", ['bye', 'bye', 'bye', 'hey', 'hey']]] + col = table.at "x" + patterns = table.at "patterns" + replacements = table.at "replacements" + + Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=false" + do_replace col patterns replacements expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd'] + do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Default expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd'] + do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Sensitive expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd'] + + Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=true" + do_replace col patterns replacements only_first=True expected=['bye Hello', 'bye hello', 'HELLO HELLO', 'hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] + + Test.specify "case_sensitivity=insensitive use_regex=false only_first=false" + do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Insensitive expected=['bye bye', 'bye bye', 'bye bye', 'hey hey hey', 'abac ad Ab aCAd'] + + Test.specify "case_sensitivity=insensitive use_regex=false only_first=true" + do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['bye Hello', 'bye hello', 'bye HELLO', 'hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] + + Test.group prefix+"replace: empty table and nothings" <| + Test.specify "should work on empty tables" <| + col = table_builder [["x", ['hello Hello']]] . filter "x" (Filter_Condition.Is_Nothing) . at "x" + do_replace col 'hello' 'bye' expected=[] + + Test.specify "should work on Nothing text column" <| + col = table_builder [["x", ['hello Hello', Nothing]]] . filter "x" (Filter_Condition.Is_Nothing) . at "x" + do_replace col 'hello' 'bye' expected=[Nothing] + + if setup.is_database then Test.group prefix+"replace: DB specific edge-cases" <| + col = table_builder [["A", ["Alpha", "Bravo", "Charlie", "Delta", "Echo", "Foxtrot"]]] . at 'A' + Test.specify "should not allow Case_Sensitivity.Insensitive with a non-default locale" <| + locale = Locale.new "en" "GB" "UTF-8" + col.replace 'asdf' 'zxcv' case_sensitivity=(Case_Sensitivity.Insensitive locale) . should_fail_with Illegal_Argument + + Test.specify "column name" <| table = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO', 'a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']], ["patterns", ['hello', 'hello', 'hello', 'a[bcd]', 'a[bcd]']], ["replacements", ['bye', 'bye', 'bye', 'hey', 'hey']]] col = table.at "x" patterns = table.at "patterns" replacements = table.at "replacements" - Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=false" - do_replace col patterns replacements expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd'] - do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Default expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd'] - do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Sensitive expected=['bye Hello', 'bye bye', 'HELLO HELLO', 'hey A[bCd] hey', 'abac ad Ab aCAd'] - - Test.specify "case_sensitivity=sensitive/default use_regex=false only_first=true" - do_replace col patterns replacements only_first=True expected=['bye Hello', 'bye hello', 'HELLO HELLO', 'hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] - - Test.specify "case_sensitivity=insensitive use_regex=false only_first=false" - do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Insensitive expected=['bye bye', 'bye bye', 'bye bye', 'hey hey hey', 'abac ad Ab aCAd'] - - Test.specify "case_sensitivity=insensitive use_regex=false only_first=true" - do_replace col patterns replacements case_sensitivity=Case_Sensitivity.Insensitive only_first=True expected=['bye Hello', 'bye hello', 'bye HELLO', 'hey A[bCd] a[bcd]', 'abac ad Ab aCAd'] - - Test.group prefix+"replace: empty table and nothings" <| - Test.specify "should work on empty tables" <| - col = table_builder [["x", ['hello Hello']]] . filter "x" (Filter_Condition.Is_Nothing) . at "x" - do_replace col 'hello' 'bye' expected=[] - - Test.specify "should work on Nothing text column" <| - col = table_builder [["x", ['hello Hello', Nothing]]] . filter "x" (Filter_Condition.Is_Nothing) . at "x" - do_replace col 'hello' 'bye' expected=[Nothing] - - if setup.is_database then - col = table_builder [["A", ["Alpha", "Bravo", "Charlie", "Delta", "Echo", "Foxtrot"]]] . at 'A' - Test.specify "should not allow Case_Sensitivity.Insensitive with a non-default locale" <| - locale = Locale.new "en" "GB" "UTF-8" - col.replace 'asdf' 'zxcv' case_sensitivity=(Case_Sensitivity.Insensitive locale) . should_fail_with Illegal_Argument - - Test.specify "column name" <| - table = table_builder [["x", ['hello Hello', 'hello hello', 'HELLO HELLO', 'a[bcd] A[bCd] a[bcd]', 'abac ad Ab aCAd']], ["patterns", ['hello', 'hello', 'hello', 'a[bcd]', 'a[bcd]']], ["replacements", ['bye', 'bye', 'bye', 'hey', 'hey']]] - col = table.at "x" - patterns = table.at "patterns" - replacements = table.at "replacements" - - supported_replace_params = setup.test_selection.supported_replace_params - if supported_replace_params.contains (Replace_Params.Value Text Case_Sensitivity.Default False) then - col.replace 'hello' 'bye' . name . should_equal 'replace([x], \'hello\', \'bye\')' - if supported_replace_params.contains (Replace_Params.Value Regex Case_Sensitivity.Default False) then - col.replace 'a[bcd]'.to_regex 'hey' . name . should_equal 'replace([x], \'a[bcd]\', \'hey\')' - if supported_replace_params.contains (Replace_Params.Value Column Case_Sensitivity.Default False) then - col.replace patterns replacements . name . should_equal 'replace([x], [patterns], [replacements])' + supported_replace_params = setup.test_selection.supported_replace_params + if supported_replace_params.contains (Replace_Params.Value Text Case_Sensitivity.Default False) then + col.replace 'hello' 'bye' . name . should_equal 'replace([x], \'hello\', \'bye\')' + if supported_replace_params.contains (Replace_Params.Value Regex Case_Sensitivity.Default False) then + col.replace 'a[bcd]'.to_regex 'hey' . name . should_equal 'replace([x], \'a[bcd]\', \'hey\')' + if supported_replace_params.contains (Replace_Params.Value Column Case_Sensitivity.Default False) then + col.replace patterns replacements . name . should_equal 'replace([x], [patterns], [replacements])' Test.group prefix+"Column Operations - Text Replace (in-memory only)" <| if setup.is_database.not then diff --git a/test/Table_Tests/src/IO/Excel_Spec.enso b/test/Table_Tests/src/IO/Excel_Spec.enso index 33eab8e847c..38fb45f479f 100644 --- a/test/Table_Tests/src/IO/Excel_Spec.enso +++ b/test/Table_Tests/src/IO/Excel_Spec.enso @@ -2,7 +2,10 @@ from Standard.Base import all import Standard.Base.Errors.Common.Dry_Run_Operation import Standard.Base.Errors.File_Error.File_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument +import Standard.Base.Errors.Illegal_State.Illegal_State import Standard.Base.Runtime.Context +import Standard.Base.Runtime.Managed_Resource.Managed_Resource +import Standard.Base.Runtime.Ref.Ref from Standard.Table import Table, Match_Columns, Excel, Excel_Range, Data_Formatter, Sheet_Names, Range_Names, Worksheet, Cell_Range, Delimited, Excel_Workbook @@ -15,6 +18,8 @@ import Standard.Examples import project.Util +polyglot java import org.enso.table_test_helpers.RandomHelpers + spec_fmt header file read_method sheet_count=5 = Test.group header <| Test.specify "should read a workbook in" <| @@ -70,58 +75,67 @@ spec_fmt header file read_method sheet_count=5 = spec_write suffix test_sheet_name = Test.group ("Write " + suffix + " Files") <| - out = enso_project.data / ('out.' + suffix) - out_bak = enso_project.data / ('out.' + suffix + '.bak') table = enso_project.data/'varied_column.csv' . read clothes = enso_project.data/'clothes.csv' . read sub_clothes = clothes.select_columns [0, 1] + counter = Ref.new 0 + create_out = + i = counter.get + 1 + counter.put i + f = enso_project.data / "transient" / ("out" + i.to_text + "." + suffix) + Panic.rethrow f.delete_if_exists + f + Test.specify 'should write a table to non-existent file as a new sheet with headers; and return the file object on success' <| - out.delete_if_exists + out = create_out table.write out on_problems=Report_Error . should_succeed . should_equal out written = out.read written.sheet_count . should_equal 1 written.sheet_names . should_equal ['EnsoSheet'] written.read 'EnsoSheet' . should_equal table - out.delete_if_exists + written.close + out.delete_if_exists . should_succeed Test.specify 'should write a table to non-existent file in append mode as a new sheet with headers' <| - out.delete_if_exists + out = create_out table.write out on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed written = out.read written.sheet_count . should_equal 1 written.sheet_names . should_equal ['EnsoSheet'] written.read 'EnsoSheet' . should_equal table - out.delete_if_exists + written.close + out.delete_if_exists . should_succeed Test.specify 'should write a table to existing file overriding EnsoSheet' <| - out.delete_if_exists + out = create_out table.write out on_problems=Report_Error . should_succeed table.write out on_problems=Report_Error . should_succeed - written = out.read - written.sheet_count . should_equal 1 - written.sheet_names . should_equal ['EnsoSheet'] - written.read 'EnsoSheet' . should_equal table - out.delete_if_exists + written_workbook = out.read + written_workbook.sheet_count . should_equal 1 + written_workbook.sheet_names . should_equal ['EnsoSheet'] + written_workbook.read 'EnsoSheet' . should_equal table + written_workbook.close + out.delete_if_exists . should_succeed Test.specify 'should write a table to existing file in overwrite mode as a new sheet with headers' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out table.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "Another")) written.should_equal table - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should write a table to existing file in overwrite mode as a new sheet without headers' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out table.write out (Excel (Worksheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "NoHeaders")) written.should_equal (table.rename_columns ['A', 'B', 'C', 'D', 'E', 'F']) - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should create new sheets at the start if index is 0' <| - out.delete_if_exists + out = create_out table.write out (Excel (Worksheet 0)) on_problems=Report_Error . should_succeed clothes.write out (Excel (Worksheet 0)) on_problems=Report_Error . should_succeed read_1 = out.read (Excel (Worksheet "Sheet1")) @@ -130,214 +144,318 @@ spec_write suffix test_sheet_name = read_2 . should_equal clothes read_3 = out.read (Excel (Sheet_Names)) read_3 . should_equal ["Sheet2", "Sheet1"] - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should write a table to specific single cell location of an existing sheet' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out table.write out (Excel (Cell_Range "Another!G1")) on_problems=Report_Error . should_succeed written = out.read (Excel (Cell_Range "Another!G1")) written.should_equal table - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should clear out an existing fixed range and replace' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out sub_clothes.write out (Excel (Cell_Range "Another!A1:D20")) on_problems=Report_Error . should_succeed written = out.read (Excel (Cell_Range "Another!A1")) written.should_equal sub_clothes - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should clear out an existing range and replace' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_problems=Report_Error . should_succeed written = out.read (Excel (Cell_Range "Another!A1")) written.should_equal sub_clothes - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should result in Invalid_Location error if trying to write in a bad location' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out sub_clothes.write out (Excel (Cell_Range "DoesNotExist!A1")) . should_fail_with Invalid_Location sub_clothes.write out (Excel (Cell_Range "DoesNotExist!A1:B2")) . should_fail_with Invalid_Location sub_clothes.write out (Excel (Cell_Range "SillyRangeName")) . should_fail_with Invalid_Location - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should result in Range_Exceeded error if trying to write in too small a range' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out sub_clothes.write out (Excel (Cell_Range "Another!A1:B2")) . should_fail_with Range_Exceeded - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should result in Existing_Data error if in Error mode and trying to replace' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out lmd = out.last_modified_time - sub_clothes.write out (Excel (Worksheet 1)) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data - sub_clothes.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data - sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data - sub_clothes.write out (Excel (Cell_Range "Sheet1!A9")) on_existing_file=Existing_File_Behavior.Error . should_fail_with Existing_Data - out.last_modified_time.should_equal lmd - out.delete_if_exists + r1 = sub_clothes.write out (Excel (Worksheet 1)) on_existing_file=Existing_File_Behavior.Error + r1.should_fail_with File_Error + r1.catch.should_be_a File_Error.Already_Exists - Test.specify 'should not allow adding a new sheet if in Error mode and not clashing' <| - out.delete_if_exists + sub_clothes.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Error + sub_clothes.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Error + sub_clothes.write out (Excel (Cell_Range "Sheet1!A9")) on_existing_file=Existing_File_Behavior.Error . should_fail_with File_Error + + Test.with_clue "the original file should remain unmodified: " <| + out.last_modified_time.should_equal lmd + out.delete_if_exists . should_succeed + + Test.specify 'should not allow adding a new sheet if in Error mode, even if sheet is not clashing' <| + out = create_out (enso_project.data / test_sheet_name) . copy_to out lmd = out.last_modified_time result = sub_clothes.write out (Excel (Worksheet "Testing")) on_existing_file=Existing_File_Behavior.Error result.should_fail_with File_Error result.catch.should_be_a File_Error.Already_Exists - out.last_modified_time.should_equal lmd - out.delete_if_exists + Test.with_clue "the original file should remain unmodified: " <| + out.last_modified_time.should_equal lmd + out.delete_if_exists . should_succeed Test.specify 'should write a table to non-existent file as a new sheet without headers' <| - out.delete_if_exists + out = create_out table.write out (Excel (Worksheet "Sheet1") headers=False) on_problems=Report_Error . should_succeed written = out.read written.sheet_count . should_equal 1 written.sheet_names . should_equal ['Sheet1'] written.read 'Sheet1' . should_equal (table.rename_columns ['A', 'B', 'C', 'D', 'E', 'F']) - out.delete_if_exists + + # We need to close the workbook to be able to delete it. + written.close + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a sheet by name' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a sheet by position' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a sheet by name out of order' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a single cell by name' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a single cell by position' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a single cell by name out of order' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] extra_another.write out (Excel (Cell_Range "Another!A1")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a range by name' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['AA', ['d', 'e']], ['BB', [4, 5]], ['CC', [True, False]], ['DD', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['a', 'b', 'c', 'd', 'e']], ['BB', [1, 2, 3, 4, 5]], ['CC', [True, False, False, True, False]]] extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a range by position' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a range by name not in top left' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['CC',[True, False, False, True, False]]] extra_another.write out (Excel (Cell_Range "Random!K9")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed written = out.read (Excel (Cell_Range "Random!K9")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a range by name after deduplication of names' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['AA 1',[True, False]], ['BB 1', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['AA 1',[True, False, False, True, False]]] extra_another.write out (Excel (Cell_Range "Random!S3")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed written = out.read (Excel (Cell_Range "Random!S3")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a range by position not in top left' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['A', ['d', 'e']], ['B',[4, 5]], ['C',[True, False]], ['D', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['f', 'g', 'h', 'd', 'e']], ['BB',[1, 2, 3, 4, 5]], ['CC',[True, False, False, True, False]]] extra_another.write out (Excel (Cell_Range "Random!K9")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position on_problems=Report_Error . should_succeed written = out.read (Excel (Cell_Range "Random!K9")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to append to a range by name out of order' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']]] expected = Table.new [['AA', ['a','b','c','d', 'e']], ['BB',[1,2,3,4,5]], ['CC',[True, False, False, True, False]]] extra_another.write out (Excel (Cell_Range "Another!A1:D6")) on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error . should_succeed written = out.read (Excel (Worksheet "Another")) . select_columns [0, 1, 2] written.should_equal expected - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should be able to write to a new dry run file' <| - out.delete_if_exists + out = create_out temp = Context.Output.with_disabled <| result = table.write out on_problems=Report_Error . should_succeed Problems.expect_only_warning Dry_Run_Operation result result.exists.should_be_true + + result.absolute.normalize.path . should_not_equal out.absolute.normalize.path + written = result.read written.sheet_count . should_equal 1 written.sheet_names . should_equal ['EnsoSheet'] written.read 'EnsoSheet' . should_equal table + written.close result temp.delete_if_exists + Test.specify "should be able to write to a dry-run file, even if the dry-run workbook is open" <| + out = create_out + out.exists.should_be_false + temp = Context.Output.with_disabled <| + result = table.write out on_problems=Report_Error . should_succeed + Problems.expect_only_warning Dry_Run_Operation result + result.exists.should_be_true + result + temp.absolute.normalize.path . should_not_equal out.absolute.normalize.path + out.exists.should_be_false + + opened_temp = temp.read + opened_temp.sheet_names . should_equal ['EnsoSheet'] + + temp2 = Context.Output.with_disabled <| + result = table.write out (Excel (Worksheet "Another")) on_problems=Report_Error . should_succeed + Problems.expect_only_warning Dry_Run_Operation result + result.exists.should_be_true + result + + # The result should be written to the same dry-run file on second attempt. + temp2.absolute.normalize.path . should_equal temp.absolute.normalize.path + + ## The write operation replaces the dry run file, basing off of the _original_ out file + (which was empty in this example), so we still only get one sheet. + Different example is tested in the test below, if the subsequent file happens to the returned + dry-run object - then both updates are visible - see below. + opened_temp.sheet_names . should_equal ['Another'] + + opened_temp.close + temp.delete_if_exists + + Test.specify "should be able to write to a dry-run file multiple times if the dry-run file object is threaded through" <| + out = create_out + temp1 = Context.Output.with_disabled <| + result = table.write out on_problems=Report_Error . should_succeed + Problems.expect_only_warning Dry_Run_Operation result + result.exists.should_be_true + result + temp1.absolute.normalize.path . should_not_equal out.absolute.normalize.path + + opened_temp = temp1.read + opened_temp.sheet_names . should_equal ['EnsoSheet'] + + temp2 = Context.Output.with_disabled <| + result = table.write temp1 (Excel (Worksheet "Another")) on_problems=Report_Error . should_succeed + Problems.expect_only_warning Dry_Run_Operation result + result.exists.should_be_true + result + + # The result should be written to the same file though. + temp2.absolute.normalize.path . should_equal temp1.absolute.normalize.path + + # The write operation replaces the dry run file, basing off of the dry-run file itself - so both changes are visible. + opened_temp.sheet_names . should_equal ['EnsoSheet', 'Another'] + + opened_temp.close + temp1.delete_if_exists + + Test.specify "should be able to create a backup, even if it is currently open" <| + out = create_out + bak = out.parent / (out.name+".bak") + + t1 = Table.new [["X", [1]]] + t1.write out on_existing_file=Existing_File_Behavior.Backup on_problems=Report_Error . should_succeed + bak.exists.should_be_false + + t2 = Table.new [["X", [2]]] + t2.write out on_existing_file=Existing_File_Behavior.Backup on_problems=Report_Error . should_succeed + bak.exists.should_be_true + + opened_out = out.read + # We need to specify explicit format for the backup, because the extension is changed: + opened_backup = bak.read (Excel xls_format=(suffix=="xls")) + + opened_out.read 'EnsoSheet' . should_equal t2 + opened_backup.read 'EnsoSheet' . should_equal t1 + + t3 = Table.new [["X", [3]]] + t3.write out on_existing_file=Existing_File_Behavior.Backup on_problems=Report_Error . should_succeed + + opened_out.read 'EnsoSheet' . should_equal t3 + # The backup should actually have been updated + opened_backup.read 'EnsoSheet' . should_equal t2 + + opened_out.close + opened_backup.close + + out.delete_if_exists . should_succeed + bak.delete_if_exists . should_succeed + Test.specify 'should be able to write to an existing empty file' <| - out.delete_if_exists + out = create_out [].write_bytes out - out_bak.delete_if_exists + out_bak = out.parent / (out.name+".bak") table.write out on_problems=Report_Error . should_succeed . should_equal out written = out.read @@ -345,73 +463,75 @@ spec_write suffix test_sheet_name = written.sheet_names . should_equal ['EnsoSheet'] written.read 'EnsoSheet' . should_equal table - out_bak.exists.should_be_true - out_bak.size.should_equal 0 + Test.with_clue "should have created a backup file: " <| + out_bak.exists.should_be_true + out_bak.size.should_equal 0 - out.delete_if_exists - out_bak.delete_if_exists + written.close + out.delete_if_exists . should_succeed + out_bak.delete_if_exists . should_succeed Test.specify 'should fail to append to a sheet by name if missing columns' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']]] extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should fail to append to a sheet by name if extra columns' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out lmd = out.last_modified_time extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]] extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Column_Name_Mismatch out.last_modified_time.should_equal lmd - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should fail to append to a sheet by name if no headers' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out lmd = out.last_modified_time extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]] extra_another.write out (Excel (Worksheet "NoHeaders")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument - extra_another.write out (Excel (Worksheet "Another") False) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument + extra_another.write out (Excel (Worksheet "Another") headers=False) on_existing_file=Existing_File_Behavior.Append . should_fail_with Illegal_Argument out.last_modified_time.should_equal lmd - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should fail to append to a sheet by position if too few columns' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out lmd = out.last_modified_time extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']]] extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch out.last_modified_time.should_equal lmd - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should fail to append to a sheet by position if too many columns' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out lmd = out.last_modified_time extra_another = Table.new [['CC',[True, False]], ['BB',[4, 5]], ['AA', ['d', 'e']], ['DD', ['2022-01-20', '2022-01-21']], ['EE', ['2022-01-20', '2022-01-21']]] extra_another.write out (Excel (Worksheet "Another")) on_existing_file=Existing_File_Behavior.Append match_columns=Match_Columns.By_Position . should_fail_with Column_Count_Mismatch out.last_modified_time.should_equal lmd - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should fail to append to a range by name if not large enough' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out lmd = out.last_modified_time extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]] extra_another.write out (Excel (Cell_Range "Another!A1:D5")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Range_Exceeded out.last_modified_time.should_equal lmd - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify 'should fail to append to a range by name if it hits another table' <| - out.delete_if_exists + out = create_out (enso_project.data / test_sheet_name) . copy_to out lmd = out.last_modified_time extra_another = Table.new [['AA', ['d', 'e']], ['BB',[4, 5]], ['CC',[True, False]], ['DD', ['2022-01-20', '2022-01-21']]] extra_another.write out (Excel (Cell_Range "Random!B3")) on_existing_file=Existing_File_Behavior.Append . should_fail_with Existing_Data out.last_modified_time.should_equal lmd - out.delete_if_exists + out.delete_if_exists . should_succeed Test.specify "should fail if the target file is read-only" <| f = enso_project.data / "transient" / "permission."+suffix @@ -419,19 +539,56 @@ spec_write suffix test_sheet_name = f.delete_if_exists initial_data = Table.new [["Y", [10, 20, 30]]] - initial_data.write f - Util.set_writable f False + initial_data.write f . should_succeed + Util.set_writable f False . should_succeed t1 = Table.new [["X", [1, 2, 3]]] - [Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite, Existing_File_Behavior.Append].each behavior-> + [Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite, Existing_File_Behavior.Append].each behavior-> Test.with_clue behavior.to_text+": " <| + f.exists . should_be_true + r1 = t1.write f (Excel (Worksheet "Another")) on_existing_file=behavior - r1.should_fail_with File_Error - r1.catch.should_be_a File_Error.Access_Denied - f.read . read "EnsoSheet" . should_equal initial_data + Test.with_clue "("+r1.catch.to_display_text+") " <| + r1.should_fail_with File_Error + r1.catch.should_be_a File_Error.Access_Denied + + read_table = Managed_Resource.bracket (f.read) (.close) workbook-> + workbook.read "EnsoSheet" + read_table.should_equal initial_data Util.set_writable f True f.delete + Test.specify "should allow to write to a workbook that is open, and reflect that changes when the sheet is read again" <| + out = create_out + table.write out on_problems=Report_Error . should_succeed + + workbook = out.read (Excel headers=True) + workbook.sheet_names.should_equal ["EnsoSheet"] + workbook.to_text . should_equal "Excel_Workbook ("+out.name+")" + + # We can have the workbook open multiple times in parallel too. + w2 = out.read (Excel headers=True) + + t1 = workbook.read "EnsoSheet" + t1.should_equal table + + [Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite].each behavior-> Test.with_clue behavior.to_text+": " <| + t2 = Table.new [["X", [behavior.to_text, "B", "C", behavior.to_text+"..."]]] + t2.write out on_existing_file=behavior . should_succeed + + workbook.sheet_names.should_equal ["EnsoSheet"] + + # If we read the table again, it has the new values in it: + t3 = workbook.read "EnsoSheet" + t3.should_equal t2 + + t4 = w2.read "EnsoSheet" + t4.should_equal t2 + + workbook.close + w2.close + out.delete_if_exists . should_succeed + Test.specify "should fail if the parent directory does not exist" <| parent = enso_project.data / "transient" / "nonexistent" parent.exists.should_be_false @@ -439,19 +596,34 @@ spec_write suffix test_sheet_name = f = parent / "foo."+suffix t1 = Table.new [["X", [1, 2, 3]]] r1 = t1.write f (Excel (Worksheet "Another")) - r1.should_fail_with File_Error - r1.catch.should_be_a File_Error.Not_Found + Test.with_clue "("+r1.catch.to_display_text+") " <| + r1.should_fail_with File_Error + r1.catch.should_be_a File_Error.Not_Found Test.specify "should allow to write and read-back Unicode characters" <| encodings = enso_project.data / "transient" / "encodings."+suffix + encodings.delete_if_exists . should_succeed + t1 = Table.new [["A", ["A", "B", "😊", "D"]], ["B", [1, 2, 3, 4]]] - t1.write encodings (Excel (Worksheet "Another")) + t1.write encodings (Excel (Worksheet "Another")) . should_succeed t2 = encodings.read (Excel (Worksheet "Another")) t2.at "A" . to_vector . should_equal ["A", "B", "😊", "D"] encodings.delete - out.delete_if_exists - out_bak.delete_if_exists + Test.specify "should be able to overwrite a pre-existing empty file" <| + empty = enso_project.data / "transient" / "empty."+suffix + [Existing_File_Behavior.Backup, Existing_File_Behavior.Overwrite, Existing_File_Behavior.Append].each behavior-> Test.with_clue behavior.to_text+": " <| + empty.delete_if_exists . should_succeed + "".write empty + empty.exists.should_be_true + empty.size.should_equal 0 + + t1 = Table.new [["A", [behavior.to_text, "B", "C", "D"]], ["B", [1, 2, 3, 4]]] + t1.write empty on_existing_file=behavior . should_succeed + empty.exists.should_be_true + + t2 = empty.read (Excel (Worksheet "EnsoSheet")) + t2.should_equal t1 spec = Test.group 'Excel Range' <| @@ -703,10 +875,24 @@ spec = result.catch.should_be_a File_Error.Not_Found Test.specify "should handle wrong xls_format gracefully" <| + xlsx_sheet_copy = enso_project.data / "transient" / "TestSheetCopy.xlsx" + xlsx_sheet.copy_to xlsx_sheet_copy + + # At first, it fails with File_Error r1 = xlsx_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=True) r1.should_fail_with File_Error r1.catch.should_be_a File_Error.Corrupted_Format + # If we now open it correctly + r1_2 = xlsx_sheet.read + r1_2.should_succeed + + # And then wrong again + r1_3 = xlsx_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=True) + # It should still fail the same: + r1_3.should_fail_with File_Error + r1_3.catch.should_be_a File_Error.Corrupted_Format + r2 = xls_sheet.read (Excel (Cell_Range "Sheet1!A:C") xls_format=False) r2.should_fail_with File_Error r2.catch.should_be_a File_Error.Corrupted_Format @@ -735,6 +921,71 @@ spec = bad_file.delete + Test.specify "will fail if an operation is performed on a closed workbook" <| + workbook = xlsx_sheet.read + workbook.sheet_count . should_equal 4 + + workbook.close . should_equal Nothing + + workbook.sheet_count . should_fail_with Illegal_State + workbook.close . should_equal Nothing + workbook.read "Sheet1" . should_fail_with Illegal_State + + ci_pending = if Environment.get "CI" != Nothing then "This test takes a lot of time so it is disabled on CI." + Test.specify "should be able to write and read a big XLSX file (>110MB)" pending=ci_pending <| + n = 10^6 + IO.println "Generating big XLSX file "+Time_Of_Day.now.to_text + rng = RandomHelpers.new 123 + v = Vector.new n _-> + rng.makeRandomString 190 + table = Table.new [["X", v]] + big_file = enso_project.data / "transient" / "big.xlsx" + big_file.delete_if_exists + + table.write big_file on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed + IO.println "Done "+Time_Of_Day.now.to_text + + # Verify that the file is as big as we expected. + size = big_file.size / (1024*1024) + Test.with_clue "size="+size.to_text+"MB " <| + (size > 110).should_be_true + + workbook = big_file.read + sheets = workbook.sheet_names + sheets.length . should_equal 1 + read_table = workbook.read (sheets.at 0) + read_table.row_count . should_equal n+1 + read_table.column_names.length . should_equal 1 + read_table.at 0 . at 0 . should_equal "X" + read_table.at 0 . at 1 . should_equal (v.at 0) + + Test.specify "should be able to write and read a big XLS file (>110MB)" pending=ci_pending <| + IO.println "Generating big XLS file "+Time_Of_Day.now.to_text + rng = RandomHelpers.new 123 + # Here we instead create a 2D table, because XLS has a limit of 65536 rows and 16k columns. + rows = 65000 + cols = 20 + table = Table.new <| Vector.new cols i-> + v = Vector.new rows _-> rng.makeRandomString 100 + ["col" + i.to_text, v] + big_file = enso_project.data / "transient" / "big.xls" + big_file.delete_if_exists + + table.write big_file on_existing_file=Existing_File_Behavior.Overwrite on_problems=Report_Error . should_succeed + IO.println "Done "+Time_Of_Day.now.to_text + + # Verify that the file is as big as we expected. + size = big_file.size / (1024*1024) + Test.with_clue "size="+size.to_text+"MB " <| + (size > 110).should_be_true + + workbook = big_file.read + sheets = workbook.sheet_names + sheets.length . should_equal 1 + read_table = workbook.read (sheets.at 0) + read_table.row_count . should_equal rows+1 + read_table.column_names.length . should_equal cols + spec_fmt 'XLSX reading' Examples.xlsx .read spec_fmt 'XLS reading' Examples.xls .read @@ -784,6 +1035,8 @@ spec = problems = [Duplicate_Output_Column_Names.Error ["DD"]] Problems.test_problem_handling action problems tester + # Cleanup any leftovers from previous runs + enso_project.data/"transient" . list "out*" . each .delete spec_write "xlsx" 'TestSheet.xlsx' spec_write "xls" 'TestSheetOld.xls' diff --git a/test/Table_Tests/src/IO/Fetch_Spec.enso b/test/Table_Tests/src/IO/Fetch_Spec.enso new file mode 100644 index 00000000000..e4183b03591 --- /dev/null +++ b/test/Table_Tests/src/IO/Fetch_Spec.enso @@ -0,0 +1,60 @@ +from Standard.Base import all +import Standard.Base.Errors.File_Error.File_Error +import Standard.Base.Runtime.Context + +from Standard.Table import all +import Standard.Table.Errors.Invalid_JSON_Format + +from Standard.Test import Test, Test_Suite +import Standard.Test.Extensions + +import project.Util + +main = Test_Suite.run_main spec + +spec = + ## To run this test locally: + $ sbt 'simple-httpbin/run localhost 8080' + $ export ENSO_HTTP_TEST_HTTPBIN_URL=http://localhost:8080/ + base_url = Environment.get "ENSO_HTTP_TEST_HTTPBIN_URL" + base_url_with_slash = base_url.if_not_nothing <| + if base_url.ends_with "/" then base_url else base_url + "/" + pending_has_url = if base_url != Nothing then Nothing else + "The HTTP tests only run when the `ENSO_HTTP_TEST_HTTPBIN_URL` environment variable is set to URL of the httpbin server" + + Test.group "fetching files using HTTP" pending=pending_has_url <| + Test.specify "fetching json" <| + r = Data.fetch base_url_with_slash+"testfiles/table.json" + expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]] + r.to Table . should_equal expected_table + + Test.specify "fetching csv" <| + r = Data.fetch base_url_with_slash+"testfiles/table.csv" + expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]] + r.to Table . should_equal expected_table + + Test.specify "fetching xls" <| + url = base_url_with_slash+"testfiles/table.xls" + r = Data.fetch url + expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]] + + r.should_be_a Excel_Workbook + r.sheet_names . should_equal ["MyTestSheet"] + r.read "MyTestSheet" . should_equal expected_table + + r2 = Data.fetch url try_auto_parse_response=False . decode (Excel (Excel_Section.Worksheet "MyTestSheet")) + r2.should_be_a Table + r2 . should_equal expected_table + + Test.specify "fetching xlsx" <| + url = base_url_with_slash+"testfiles/table.xlsx" + r = Data.fetch url + expected_table = Table.from_rows ["A", "B"] [[1, "x"], [3, "y"]] + + r.should_be_a Excel_Workbook + r.sheet_names . should_equal ["MyTestSheet"] + r.read "MyTestSheet" . should_equal expected_table + + r2 = Data.fetch url try_auto_parse_response=False . decode (Excel (Excel_Section.Worksheet "MyTestSheet")) + r2.should_be_a Table + r2 . should_equal expected_table diff --git a/test/Table_Tests/src/IO/Formats_Spec.enso b/test/Table_Tests/src/IO/Formats_Spec.enso index d539d3590dd..5ede91b108d 100644 --- a/test/Table_Tests/src/IO/Formats_Spec.enso +++ b/test/Table_Tests/src/IO/Formats_Spec.enso @@ -10,109 +10,155 @@ import Standard.Test.Extensions import project.Util -spec = Test.group 'Various File Format support on Table' <| - t1 = Table.new [["X", [1, 2, 3]]] +spec = transient = enso_project.data / "transient" - simple_empty = enso_project.data/'simple_empty.csv' . read - - Test.specify "should be able to be written as CSV, Excel" <| - f1 = transient / "test2.csv" - f2 = transient / "test3.xlsx" - [f1, f2].each f-> - f.delete_if_exists - t1.write f . should_succeed - f.exists.should_be_true - f.delete - - Test.specify "should be able to be written as JSON using Table.write" <| - f1 = transient / "test1.json" - f1.delete_if_exists - t1.write f1 . should_succeed - f1.exists.should_be_true - f1.delete - - Test.specify 'should write JSON tables' <| + Test.group 'Various File Format support on Table' <| + t1 = Table.new [["X", [1, 2, 3]]] simple_empty = enso_project.data/'simple_empty.csv' . read - out = transient / 'out.json' - out.delete_if_exists - simple_empty.write out . should_equal out - Table.from_objects (Json.parse out.read_text) ['a', 'b', 'c'] . should_equal simple_empty - out.delete_if_exists - Test.specify 'should append to JSON tables' <| - out = transient / 'out.json' - out.delete_if_exists - simple_empty.write out . should_equal out - simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_equal out - Table.from_objects (Json.parse out.read_text) ['a', 'b', 'c'] . row_count . should_equal 2*simple_empty.row_count - out.delete_if_exists + Test.specify "should be able to be written as CSV, Excel" <| + f1 = transient / "test2.csv" + f2 = transient / "test3.xlsx" + [f1, f2].each f-> + f.delete_if_exists + t1.write f . should_succeed + f.exists.should_be_true + f.delete - Test.specify 'should fail to append to JSON non-arrays' <| - out = transient / 'out.json' - out.delete_if_exists - '1'.write out - simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format - out.delete_if_exists + Test.specify "should be able to be written as JSON using Table.write" <| + f1 = transient / "test1.json" + f1.delete_if_exists + t1.write f1 . should_succeed + f1.exists.should_be_true + f1.delete - '"Hello World"'.write out - simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format - out.delete_if_exists + Test.specify 'should write JSON tables' <| + simple_empty = enso_project.data/'simple_empty.csv' . read + out = transient / 'out.json' + out.delete_if_exists + simple_empty.write out . should_equal out + Table.from_objects (Json.parse out.read_text) ['a', 'b', 'c'] . should_equal simple_empty + out.delete_if_exists - '{}'.write out - simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format - out.delete_if_exists + Test.specify 'should append to JSON tables' <| + out = transient / 'out.json' + out.delete_if_exists + simple_empty.write out . should_equal out + simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_equal out + Table.from_objects (Json.parse out.read_text) ['a', 'b', 'c'] . row_count . should_equal 2*simple_empty.row_count + out.delete_if_exists - Test.specify "should fail gracefully when provided with an unsupported format" <| - f1 = (transient / "test4.unknown-format") - f1.delete_if_exists - r1 = t1.write f1 - r1.should_fail_with File_Error - r1.catch.should_be_a File_Error.Unsupported_Output_Type - r1.catch.format . should_equal f1 - r1.catch.to_display_text . should_equal "Values of type Table cannot be written as format test4.unknown-format." + Test.specify 'should fail to append to JSON non-arrays' <| + out = transient / 'out.json' + out.delete_if_exists + '1'.write out + simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format + out.delete_if_exists - f2 = (transient / "test5.txt") - f2.delete_if_exists - my_format = Plain_Text Encoding.ascii - r2 = t1.write f2 my_format - r2.should_fail_with File_Error - r2.catch.should_be_a File_Error.Unsupported_Output_Type - r2.catch.format . should_equal my_format + '"Hello World"'.write out + simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format + out.delete_if_exists - write_test extension = - f = transient / ("big." + extension) - f.delete_if_exists - f_bak = transient / ("big." + extension + ".bak") - f_bak.delete_if_exists + '{}'.write out + simple_empty.write out on_existing_file=Existing_File_Behavior.Append . should_fail_with Invalid_JSON_Format + out.delete_if_exists - big_table = Table.new [["a", 1.up_to 2000 . to_vector]] - big_table.write f + Test.specify "should fail gracefully when provided with an unsupported format" <| + f1 = (transient / "test4.unknown-format") + f1.delete_if_exists + r1 = t1.write f1 + r1.should_fail_with File_Error + r1.catch.should_be_a File_Error.Unsupported_Output_Type + r1.catch.format . should_equal f1 + r1.catch.to_display_text . should_equal "Values of type Table cannot be written as format test4.unknown-format." - new_table = Table.new [["a", 2000.up_to 4000 . to_vector]] - r = Context.Output.with_disabled <| - s = new_table.write f - s.exists.should_be_true + f2 = (transient / "test5.txt") + f2.delete_if_exists + my_format = Plain_Text Encoding.ascii + r2 = t1.write f2 my_format + r2.should_fail_with File_Error + r2.catch.should_be_a File_Error.Unsupported_Output_Type + r2.catch.format . should_equal my_format - r_data = s.read - row_count = if r_data . is_a Table then r_data.row_count else r_data.length - row_count . should_equal 1000 - s + write_tests extension = Test.group 'Writing to '+extension+' files' <| + count result = + if result . is_a Table then result.row_count else result.length - f_bak.exists.should_be_false + Test.specify "should write to a temporary "+extension+" file part of the data if context is disabled" <| + f = transient / ("big." + extension) + f.delete_if_exists + f_bak = transient / ("big." + extension + ".bak") + f_bak.delete_if_exists - f.exists.should_be_true - f_data = f.read - f_row_count = if f_data . is_a Table then f_data.row_count else f_data.length - f_row_count . should_equal 1999 + big_table = Table.new [["a", 1.up_to 2000 . to_vector]] + big_table.write f - f.delete_if_exists - f_bak.delete_if_exists - r.delete_if_exists + new_table = Table.new [["a", 2000.up_to 4000 . to_vector]] + r = Context.Output.with_disabled <| + s = new_table.write f + s.exists.should_be_true - Test.specify "should write to a temporary CSV file part of the data if context disabled" <| - write_test "csv" + r_data = s.read + count r_data . should_equal 1000 + s - Test.specify "should write to a temporary JSON file part of the data if context disabled" <| - write_test "json" + f_bak.exists.should_be_false + + f.exists.should_be_true + f_data = f.read + count f_data . should_equal 1999 + + f.delete_if_exists + f_bak.delete_if_exists + r.delete_if_exists + + Test.specify "should create a backup file if overwriting" <| + f = transient / ("test." + extension) + f.delete_if_exists + f_bak = transient / ("test." + extension + ".bak") + f_bak.delete_if_exists + + t1 = Table.new [["a", 0.up_to 10 . to_vector]] + t1.write f . should_succeed . should_equal f + f.exists.should_be_true + f_bak.exists.should_be_false + + t2 = Table.new [["a", 0.up_to 30 . to_vector]] + t2.write f . should_succeed . should_equal f + f.exists.should_be_true + f_bak.exists.should_be_true + + count f.read . should_equal 30 + + # ensure correct format is used for reading the .bak file + format = Auto_Detect.get_reading_format f + count (f_bak.read format) . should_equal 10 + + f.delete_if_exists + f_bak.delete_if_exists + + + Test.specify "should support appending" <| + f = transient / ("test." + extension) + f.delete_if_exists + f_bak = transient / ("test." + extension + ".bak") + f_bak.delete_if_exists + + t1 = Table.new [["a", 0.up_to 10 . to_vector]] + t1.write f on_existing_file=Existing_File_Behavior.Append . should_succeed . should_equal f + f.exists.should_be_true + f_bak.exists.should_be_false + + t2 = Table.new [["a", 0.up_to 30 . to_vector]] + t2.write f on_existing_file=Existing_File_Behavior.Append . should_succeed . should_equal f + f.exists.should_be_true + f_bak.exists.should_be_false + + count f.read . should_equal 40 + + f.delete_if_exists + + write_tests "csv" + write_tests "json" main = Test_Suite.run_main spec diff --git a/test/Table_Tests/src/IO/Main.enso b/test/Table_Tests/src/IO/Main.enso index ec621d751b0..bb69e1cd0d6 100644 --- a/test/Table_Tests/src/IO/Main.enso +++ b/test/Table_Tests/src/IO/Main.enso @@ -6,6 +6,7 @@ import project.IO.Csv_Spec import project.IO.Delimited_Read_Spec import project.IO.Delimited_Write_Spec import project.IO.Excel_Spec +import project.IO.Fetch_Spec import project.IO.Formats_Spec import project.IO.Json_Spec @@ -15,6 +16,7 @@ spec = Delimited_Write_Spec.spec Excel_Spec.spec Formats_Spec.spec + Fetch_Spec.spec Json_Spec.spec main = Test_Suite.run_main spec diff --git a/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/RandomHelpers.java b/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/RandomHelpers.java new file mode 100644 index 00000000000..2396cebae34 --- /dev/null +++ b/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/RandomHelpers.java @@ -0,0 +1,20 @@ +package org.enso.table_test_helpers; + +import java.util.Random; + +public class RandomHelpers { + private final Random rng; + + public RandomHelpers(int seed) { + this.rng = new Random(seed); + } + + public String makeRandomString(int length) { + StringBuilder sb = new StringBuilder(); + int n = 'z' - 'A'; + for (int i = 0; i < length; i++) { + sb.append((char) (rng.nextInt(n) + 'A')); + } + return sb.toString(); + } +} diff --git a/test/Tests/src/Main.enso b/test/Tests/src/Main.enso index 0cfca9e258a..bdba94bc519 100644 --- a/test/Tests/src/Main.enso +++ b/test/Tests/src/Main.enso @@ -81,6 +81,7 @@ import project.System.Process_Spec import project.System.Reporting_Stream_Decoder_Spec import project.System.Reporting_Stream_Encoder_Spec import project.System.System_Spec +import project.System.Temporary_File_Spec import project.Random_Spec @@ -97,6 +98,7 @@ main = Test_Suite.run_main <| Error_Spec.spec Environment_Spec.spec File_Spec.spec + Temporary_File_Spec.spec File_Read_Spec.spec Reporting_Stream_Decoder_Spec.spec Reporting_Stream_Encoder_Spec.spec diff --git a/test/Tests/src/System/File_Spec.enso b/test/Tests/src/System/File_Spec.enso index f6e63f97daa..2d85a487821 100644 --- a/test/Tests/src/System/File_Spec.enso +++ b/test/Tests/src/System/File_Spec.enso @@ -173,6 +173,19 @@ spec = f.copy_to g . should_fail_with Forbidden_Operation g.exists.should_be_false + "A".write f on_existing_file=Existing_File_Behavior.Overwrite + "B".write g on_existing_file=Existing_File_Behavior.Overwrite + + r = f.copy_to g + r.should_fail_with File_Error + r.catch.should_be_a File_Error.Already_Exists + f.read . should_equal "A" + g.read . should_equal "B" + + f.copy_to g replace_existing=True . should_succeed + f.read . should_equal "A" + g.read . should_equal "A" + f.delete_if_exists g.delete_if_exists @@ -193,6 +206,20 @@ spec = f.exists.should_be_false g.exists.should_be_true + "A".write f on_existing_file=Existing_File_Behavior.Overwrite + "B".write g on_existing_file=Existing_File_Behavior.Overwrite + r = f.move_to g + r.should_fail_with File_Error + r.catch.should_be_a File_Error.Already_Exists + f.exists.should_be_true + g.exists.should_be_true + g.read . should_equal "B" + + f.move_to g replace_existing=True . should_succeed + f.exists.should_be_false + g.exists.should_be_true + g.read . should_equal "A" + f.delete_if_exists g.delete_if_exists @@ -448,7 +475,7 @@ spec = Context.Output.with_enabled <| r.delete_if_exists - Test.specify "should perform a dry run creating and appending text to a file if Context.Output is disabled" <| + Test.specify "if Context.Output is disabled, will always start from the file given - so the effects of previous dry run are not visible" <| f = transient / "dry_append.txt" f.delete_if_exists @@ -461,12 +488,38 @@ spec = Problems.expect_only_warning Dry_Run_Operation s s.exists.should_be_true - s.read_text.should_equal 'line 1!\nline 2!' + # We only see the second line, because the base file `f` was not updated and the second append still starts from an empty file. + s.read_text.should_equal '\nline 2!' + + # But the dry run file for the same `f` target should be kept the same: s.should_equal r f.exists.should_be_false - Context.Output.with_enabled <| r.delete_if_exists + Context.Output.with_enabled r.delete_if_exists + + Test.specify "if Context.Output is disabled, will append to the dry run file if the dry run file descriptor is passed as the write target" <| + f = transient / "dry_append.txt" + f.delete_if_exists + + Context.Output.with_disabled <| + dry_run_file = "line 1!".write f on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error + Problems.expect_only_warning Dry_Run_Operation dry_run_file + dry_run_file.exists.should_be_true + + # Contrary to example above, we write to the returned file, not just `f`. + s = '\nline 2!'.write dry_run_file on_existing_file=Existing_File_Behavior.Append on_problems=Report_Error + Problems.expect_only_warning Dry_Run_Operation s + s.exists.should_be_true + + # We see both updates, because we've thread through the resulting dry run file descriptor: + s.read_text.should_equal 'line 1!\nline 2!' + # The returned file is also the same: + s.should_equal dry_run_file + + f.exists.should_be_false + + Context.Output.with_enabled dry_run_file.delete_if_exists Test.specify "should allow to overwrite files" <| f = transient / "work.txt" diff --git a/test/Tests/src/System/Temporary_File_Spec.enso b/test/Tests/src/System/Temporary_File_Spec.enso new file mode 100644 index 00000000000..f991144bee4 --- /dev/null +++ b/test/Tests/src/System/Temporary_File_Spec.enso @@ -0,0 +1,124 @@ +from Standard.Base import all +import Standard.Base.Errors.File_Error.File_Error +import Standard.Base.Errors.Illegal_State.Illegal_State +import Standard.Base.System.File.Advanced.Temporary_File.Temporary_File +import Standard.Base.System.Input_Stream.Input_Stream + +from Standard.Test import Test, Test_Suite +from Standard.Test.Execution_Context_Helpers import run_with_and_without_output +import Standard.Test.Extensions + +polyglot java import java.io.File as Java_File +polyglot java import java.io.ByteArrayInputStream +polyglot java import java.io.FileInputStream +polyglot java import java.io.InputStream + +main = Test_Suite.run_main spec + +spec = + Test.group "Temporary_File facility" <| + Test.specify "should allow to create a new file and allow to dispose it manually" <| + tmp = Temporary_File.new + tmp.with_file f-> + "test".write f + r = tmp.with_file f-> + f.read Plain_Text + r.should_equal "test" + + raw_file = tmp.unsafe_get + raw_file.exists . should_be_true + + tmp.dispose + + Test.expect_panic Illegal_State <| tmp.with_file (f->f.read Plain_Text) + raw_file.exists . should_be_false + + Test.specify "should allow to create a new file and allow to dispose it once the reference is dropped" <| + f foo = + # The tmp file is limited to the scope of the function. + tmp = Temporary_File.new + tmp.with_file f-> + "["+foo+"]" . write f + + raw_file = tmp.unsafe_get + raw_file.exists . should_be_true + + res = tmp.with_file f-> + f.read Plain_Text + [res, raw_file] + + result = f "foobar" + # At this point the `tmp` from within the function is ready for collection. + result.first.should_equal "[foobar]" + raw_file = result.second + + repeat_gc count = + if raw_file.exists.not then Nothing else + if count <= 0 then Test.fail "The temporary file was not cleaned up after numerous GC retries. Perhaps this is a bug?" else + if count % 100 == 0 then + IO.println "Trying to GC the temporary file (still "+count.to_text+" to go), but the file ("+raw_file.to_text+") still exists... " + Runtime.gc + @Tail_Call repeat_gc count-1 + + repeat_gc 999 + raw_file.exists . should_be_false + + Test.specify "should allow to materialize an input stream, regardless of Output Context settings" <| + run_with_and_without_output <| + stream = make_stream "test payload 1" + tmp = Temporary_File.from_stream stream + tmp.with_file f-> + f.read Plain_Text . should_equal "test payload 1" + + Test.specify "will fail if materializing an already closed input stream" <| + stream = Input_Stream.new (InputStream.nullInputStream) (File_Error.handle_java_exceptions Nothing) + stream.close + + Test.expect_panic File_Error <| Temporary_File.from_stream stream + + Test.specify "should be able to be converted to text, and indicate if it was disposed" <| + tmp = Temporary_File.new "pref" ".suf" + tmp.to_text.should_contain "Temporary_File" + tmp.to_text.should_contain "pref" + tmp.to_text.should_contain ".suf" + + tmp.dispose + tmp.to_text.should_contain "(disposed)" + tmp.to_text.should_contain "Temporary_File" + tmp.to_text . should_not_contain "pref" + tmp.to_text . should_not_contain "suf" + + Test.specify "should allow to materialize an input stream that is already associated with a temporary file without copying it" <| + tmp = Temporary_File.new + tmp.with_file f-> + "test payload 3" . write f + + java_file = Java_File.new tmp.unsafe_get.absolute.path + stream = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=tmp + + tmp2 = Temporary_File.from_stream_light stream + # The returned tmp file should be the same one as original. + tmp2.should_be_a Temporary_File + tmp2.unsafe_get.absolute.path . should_equal tmp.unsafe_get.absolute.path + + # If the raw file is associated, the stream will return that File descriptor (not as temporary file, but regular one): + stream3 = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=tmp.unsafe_get + f3 = Temporary_File.from_stream_light stream3 + f3.should_be_a File + f3.absolute.path . should_equal tmp.unsafe_get.absolute.path + + # But if there's no association, a new temporary file gets created: + stream4 = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=Nothing + tmp4 = Temporary_File.from_stream_light stream4 + tmp4.should_be_a Temporary_File + tmp4.unsafe_get.absolute.path . should_not_equal tmp.unsafe_get.absolute.path + + # The base variant of from_stream also always copies: + stream5 = Input_Stream.new (FileInputStream.new java_file) (File_Error.handle_java_exceptions tmp.unsafe_get) associated_file=tmp + tmp5 = Temporary_File.from_stream stream5 + tmp5.should_be_a Temporary_File + tmp5.unsafe_get.absolute.path . should_not_equal tmp.unsafe_get.absolute.path + +make_stream text = + raw_stream = ByteArrayInputStream.new text.utf_8 + Input_Stream.new raw_stream (File_Error.handle_java_exceptions Nothing) diff --git a/tools/simple-httpbin/src/main/java/org/enso/shttp/SimpleHTTPBin.java b/tools/simple-httpbin/src/main/java/org/enso/shttp/SimpleHTTPBin.java index 863268e1fdb..9a6f612e10e 100644 --- a/tools/simple-httpbin/src/main/java/org/enso/shttp/SimpleHTTPBin.java +++ b/tools/simple-httpbin/src/main/java/org/enso/shttp/SimpleHTTPBin.java @@ -2,9 +2,14 @@ package org.enso.shttp; import com.sun.net.httpserver.HttpHandler; import com.sun.net.httpserver.HttpServer; +import com.sun.net.httpserver.SimpleFileServer; import java.io.IOException; import java.net.InetSocketAddress; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.List; +import java.util.stream.Stream; import sun.misc.Signal; import sun.misc.SignalHandler; @@ -31,7 +36,9 @@ public class SimpleHTTPBin { } catch (InterruptedException e) { e.printStackTrace(); } finally { - server.stop(0); + System.out.println("Finalizing server..."); + server.stop(3); + System.out.println("Server stopped."); } } @@ -59,17 +66,19 @@ public class SimpleHTTPBin { server.addHandler(path, new TestHandler()); } + setupFileServer(server); + final SimpleHTTPBin server1 = server; SignalHandler stopServerHandler = (Signal sig) -> { - System.out.println("Stopping server..."); + System.out.println("Stopping server... (interrupt)"); server1.stop(); }; for (String signalName : List.of("TERM", "INT")) { Signal.handle(new Signal(signalName), stopServerHandler); } server.start(); - } catch (IOException e) { + } catch (IOException | URISyntaxException e) { e.printStackTrace(); } finally { if (server != null) { @@ -93,4 +102,34 @@ public class SimpleHTTPBin { return running; } } + + private static void setupFileServer(SimpleHTTPBin server) throws URISyntaxException { + Path myRuntimeJar = + Path.of(SimpleHTTPBin.class.getProtectionDomain().getCodeSource().getLocation().toURI()) + .toAbsolutePath(); + Path projectRoot = findProjectRoot(myRuntimeJar); + Path testFilesRoot = projectRoot.resolve(pathToWWW); + System.out.println("Serving files from directory " + testFilesRoot); + server.addHandler("/testfiles", SimpleFileServer.createFileHandler(testFilesRoot)); + } + + private static Path findProjectRoot(Path startingPoint) { + if (looksLikeProjectRoot(startingPoint)) { + return startingPoint; + } else { + Path parent = startingPoint.getParent(); + if (parent == null) { + throw new RuntimeException("Could not find project root"); + } + + return findProjectRoot(parent); + } + } + + private static final String pathToWWW = "tools/simple-httpbin/www-files"; + + private static boolean looksLikeProjectRoot(Path path) { + return Stream.of("build.sbt", "tools", "project", pathToWWW) + .allMatch(p -> Files.exists(path.resolve(p))); + } } diff --git a/tools/simple-httpbin/src/main/java/org/enso/shttp/TestHandler.java b/tools/simple-httpbin/src/main/java/org/enso/shttp/TestHandler.java index ba122f6612c..89cc197561a 100644 --- a/tools/simple-httpbin/src/main/java/org/enso/shttp/TestHandler.java +++ b/tools/simple-httpbin/src/main/java/org/enso/shttp/TestHandler.java @@ -19,9 +19,26 @@ public class TestHandler implements HttpHandler { private static final Set ignoredHeaders = Set.of("Host"); private static final Pattern textEncodingRegex = Pattern.compile(".*; charset=([^;]+).*"); + private final boolean logRequests = false; @Override public void handle(HttpExchange exchange) throws IOException { + try { + if (logRequests) { + System.out.println( + "Handling request: " + exchange.getRequestMethod() + " " + exchange.getRequestURI()); + } + + doHandle(exchange); + } catch (IOException e) { + e.printStackTrace(); + throw e; + } catch (Exception e) { + e.printStackTrace(); + } + } + + public void doHandle(HttpExchange exchange) throws IOException { boolean first = true; String contentType = null; String textEncoding = "UTF-8"; diff --git a/tools/simple-httpbin/www-files/table.csv b/tools/simple-httpbin/www-files/table.csv new file mode 100644 index 00000000000..f93106fd6f9 --- /dev/null +++ b/tools/simple-httpbin/www-files/table.csv @@ -0,0 +1,3 @@ +A,B +1,x +3,y diff --git a/tools/simple-httpbin/www-files/table.json b/tools/simple-httpbin/www-files/table.json new file mode 100644 index 00000000000..8bf556ec87d --- /dev/null +++ b/tools/simple-httpbin/www-files/table.json @@ -0,0 +1,4 @@ +[ + {"A": 1, "B": "x"}, + {"A": 3, "B": "y"} +] diff --git a/tools/simple-httpbin/www-files/table.xls b/tools/simple-httpbin/www-files/table.xls new file mode 100644 index 0000000000000000000000000000000000000000..66efa8d6f0e8405b133b98837d89d930f1606baa GIT binary patch literal 5632 zcmeHLU2IfU5T3i;Ew?|k+tQ+f)ax3T(jTSrAjV)9N>vgIOCX#4)@P$N4`a(4R#6%y|*uMFok(gkBr=m|p9~Of#1lr~M=H9i}Wf!)l!C*R@ zIcM&iIcMg~nKS2{J^f?N{K?NYT#y`iTuNjyQzoSzz5&|Ieg{Q-pvhazWHMPc$W^$5 zJh0%}$~E+35d%Q3`5GX3Wo~^3O=;ECWq6=R;`k-Sl1>>zosdbac=50v5#v=?OiypV zC`SHPGOPNRJa8*tvEA!`=5OZl5@7EBXRhb|F9%itD}if)RlsT>jsU3v)&kc7>wxvZ z^}xG;cLVPMZU9o}y>3nVKG2r?9dhfXS*%jG%L8R5RyI_~v5H%Q^vk1~1pbR+yZk2qZ+^@CVq0tASh*;ZE@kVmlh^Xzy zv5Owa`6PgM8+Vcic@7Fm@vUjYw2tlG6&gm1=G9SIP{}*Qe|;I}we| z>%N(hKQk7zWMS4KpN1*_a&dfmE1!HJK1awa zO$GR6H}H@9u%}XzVI54BXkC*k)w(w2*Lq!Qjn;LkK;|Wl-Dzan2_|Y}E& z$veS+e5eL{oU;^DnA8I79OMrF^?l?ne#Vkk7g~WRJKmS z@_ zHE(JehDqBLF1By>(2byvww|48J)3Ud;-ODU2*>ImCduEFv=}W$wtu!Z_<^qrjy3r3fKSS`AES>Cj3<1x@`b)E zBOa+u9M2itwb~d)xD78YI3{H9U}Pr-5x~IBq69SAL6gCV^Fm8`awF=lk%2^fJU*1P zIwuC~nDyABt$opcI~KFm54%Q&hMZ%=>9gz-mO4t8-P7r?h{+gvhM))Lr{DkM%Dw}^ z58pMVdGqJz=;~L2^c>r1IPYf>Al>l{koUCbf!u(DK;G950cqsL1(61qZbKabrmos# zDb1P?0e&$3^WKL;=a~ZJ^toSSq(R;Qh3uz3->AB?1K$2dnAe@sh{&<{vj08i+O2}D`-G(OzdL1<&`Oi zen+!Y<`v3eZo)U;YJ`CF%3;iZ6t`Z!{rBKsfFWHIa+0_W$H6HKjjn#C^pu|7vuFFinE(CIopklDM73Q1d*9#xwf^h6hcUDO literal 0 HcmV?d00001 diff --git a/tools/simple-httpbin/www-files/table.xlsx b/tools/simple-httpbin/www-files/table.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6f9433566196798a658e40a966498f12d0a566cb GIT binary patch literal 4853 zcmaJ_2RK~a*4DcaBzlcbl;}bbEqaaK>tK`&(FqwrbS8T5A&8nVY6cO#chRCnA6>M6 z3)i@F48R{KstqS2x=}GmDu!)Npn|;E_7nzmP(#gHvxkhnUD(k8 zSfvjp*yy;9ivr84rPW8E&>a?-^7DlW#?Tsd!$KBXkm-v);JLFs*vh z)?i26`y;T2z0GF(Oz>mYiu-#?brQCkB9S;d_a;ZYU6>A%o{|L4>mpOvY`qX3p(}89 zx8_p$*l9M#8|qNx&e(}*MgD|`r1>$sGXhKSg3~sKoOG9DzE`J;XW{Ti89M|jij%w? zb=8uQI=>D?&M-tY<&6|q?orVOt$1*cOsD2H=g*3up;accVCn=Mz4EA=i>)Ri!?IK|JY z&ybOjTv3sb)c+PY_NEtg*I*b}26_ zC-C{!C%qDD{ies@3X9Ln8X5R%wQ<8c-$`;kMaXqoro?3E&dE$dM3z#YBHLOx%%POC7kHLj$-0fo;?-;#K0MRY-t1H*$+-mZ!QHV z|Nmj!)7EizphPb3v&=lpAq4RQxn5@WqP700m*^qrgAbN62#S;K5|Mv$TdrK$q%C2__bywBkT_e@DFbG9&(dOX;L(> zSFs*DNE^P(a!G8V!)n;0XtS$=5l8od!}!6aZ5BqYnb z|Abz$8`g6LSX;Zfa{s>ZUIV#NN5y$sn6Po@Bca=xorAt&I)70K!;7(L6g#RmJKW94 z`)uJ@?dA$ys(TwA&-}(rNTdihq=9{Sv~vv$co2WzVwrNwDVLaio($#IfKDpb#A&aS zL-W%kKQb~UMkW--)R6w3AlfR4A@MbNtw%PfXfZZ431h|KtdG1p*)5o}l4R77p$zvE zy(!7_$+P{nm~yN{#Ut5a))RC_YvX2;8oO!6(k8mfQy4OQQk+9VOu7ZqJ~dez`tlFdIU}lxj)s#n}&|pHV zc4FXS?^IO-W3p!M;xY#*@!^3;&&c6wb8o)jO4jtfm5g2d=~mrzI89Kh+DEyESCg23 z2<@9f5)*~Arb8J{Inx@#YPo>=LC8`YQ3AlSML2YlB8j$%Zf#>@IiRDteu(oT@{8tY z#VnjTks7AGFqSHazSv_Gn?_Z=c(oq8sBNp)T8ro(2K5atvhV&1-b!lmR09B8iBryV zGO?UE4Qc$8R%+(QDYlv)47$T~8* zrI)C*o>MiK`cdz6=!X|U@VmoL7@m%aA1=Kr$e_}Y5XnMZqY2*2=i;#T<{Ps~Sb&M9 z2|E@CK22v^6Fd|ag)*~C?UTkNgrfI37GRET!z;uj++}*tPoEwn{P+SBI&uRYT>{74 z&&u2UccaHSMK&bfZ(wlZoSJ3~sX>slkjiskj`TMfn0K=kjGv>QZBxCWtofYz);PMl zyNM0fB2`i?5dt%L@=m#o$FL-SC8^|9^-4C1qrk#M52e&zhmt~b;>?kyU<->q{}2ec zg|5AQ$mhhau+lgB6{-(1)TJEbS8|-#8}QW&b7)^(c&n86;fq5brg6;bTXPA;shT8q&?(1yK<|1g?!Bb4oKju284D! z#QrA+Ao-Vm0n9FtE(wsQ1TgbL z5{{1Kq-N@T8+I<G?^yO^6BH<(4-5-ROk~$XX?PUR%Lx@R14kPoIlA!cL0kL);|mDf6ZJbadQusn_o?FZa%7YiYb8%1ZxLpC$GD5g9Lh9qi3G!Es#ep`#xUrfme=bTx9P7;i z$iVo1)3d{M{>b&vHfcPJ;vBR$U)tTV8Uru3w-(5CdUIyy0Zh^5v_u~oh$jL_6}Szm z_J#GiEMPH@jq^GcRg1;WXslds?N2sMSs!>h-R$t8&Y6{5z((KAAk< z@UeSAG_ibxn$Z^X9NdpE4;)XI#g`=*s3#tDeFSG}3VL9Vn%dCpc;h-_PnW9uZ=N&m z?|n{|E&VwA-S>9^qqckMJ%osx2_XZ2KQjn@K(cbSR0lb`xN=)MgRFm-zM9=mi0n5U zb1=*icPwPH<4DX9EbC#&th@WyDsJ2TTuv}583((7 zG-H?sQrL5&_ZGn>g5kR~=wlk~cKU2d*~EJM{X35ev~e7sGc#SBd58V%~oEp8Gb(iBAj7?9R*rvb;6St7tX0-vIXbQrdA5f<4I!^ zy{k%+K{Kts6HzThTip|{iQ_TkqBR8p{{pnrm28L>%a@KEtL;wPpUy;4;Ea}pA$L`| zK49aMq~2|z%rmab@S+xf;&`7`ZPAK^h=}<(l~J{mF+F0A?|j40a-u!Hlky0~x97{F z2i5yRo8M@$qm_!o8cjXRx?LK9Owz?7)(WMDr@7APyCIO{+SrZ9ajP>|@+k#BHe$WO zpDF{vfJ+nAsT@=w^wU|inpqj89nuqbSo}c#*>3-Xy2Kb26Fb81cswX z)l)b&epZS|pMfzNQFV!GWALfLLR@Ce`0x~Akk~n5Kr|aYdT=A=cj@MGQaVxM#K%{r z0OzF_Pd+={5!@AaFssbnZ6R-hM#3$CBWP(s2TW$sV^NAYv|KP#ZK2i;1(d`WG&GG@ z>QEtxvWt?iPv3G<69hTo=tGul#mHR|Hq}}8Ix!W8x1|R7dqwq`pEE#J-KFo~&4p3N zkFS25SD=BI6$Up`wT^$-H~r0bR&pN*5wi(JoPQ5@T*Pet#R7k={J4HaR9?c69lx*` zACFJ>WUQ(hLkCiy`2{5PT$&3d^N_#v^8sTS6y;+^EBNpC3Q4DYC$c#VOOp?ib1pXE zh=@8T^U2M;bG$*>=%CyCpif7Uk-)O6c%%(@Ko{qPTuI3@VWa`mGZ9@h%crUfvHEW1 z6Xa-Yih5zJSTaG86B%68&53z<%qqvt*Kk*@Uq~%Gpv{oeUVFe3{+yX*%94k>)^9Mze2Y3}E z6Xl*6uP#|%#+X3tc)M08ai4rK+HxHVGIJ)|w?+-~vF8LtXn$p+aoMo9@v55_DDQ@! zgvHBY4$$Mrf=MQ+wS6apm;56;42^>zOYW(p`5;)Sqje{0uzp0PNg8-86RE6P_&wJ} zaRftUH$1ZXap0-Xx@h>DDERtn#k;-WL z)ySZEHSHA2tsZcae>CG_8NS-mV?4dxRT1Li_hV$$xbWe~xlnny>4O8#}@J z>j(aIjWsv1NHeG;M{@NT*P zi=6-TzPF37Q5NWwZMt?fqM#lB>yfI`182-m%RaZjCxc&$maY!I?ob~%>{|^Q3 BEGz&3 literal 0 HcmV?d00001