Tuning S3_File doc comments. (#10832)

- Review and update the doc comments of public functions in the AWS library.
- Reorder the functions to make the order in component browser (and online docs better).
- Align some error handling.
- Fix bug with `list` on root S3.
- Hide `S3.get_object` as it's single read makes it bad for GUI use.
This commit is contained in:
James Dunkerley 2024-08-16 18:38:43 +01:00 committed by GitHub
parent b442a38894
commit 2dbdcb29a1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 490 additions and 269 deletions

View File

@ -38,7 +38,8 @@ type AWS_Credential
With_Configuration (base_credential : AWS_Credential) (default_region : AWS_Region)
## ICON cloud
Get a list of the available profiles
Returns a vector of the available profile names.
profile_names : Vector Text
profile_names = Vector.from_polyglot_array <|
ProfileReader.INSTANCE.getProfiles

View File

@ -17,18 +17,20 @@ type S3_Path
## PRIVATE
parse (uri : Text) -> S3_Path ! Illegal_Argument =
if uri.starts_with S3.uri_prefix . not then Error.throw (Illegal_Argument.Error "An S3 path must start with `"+S3.uri_prefix+"`.") else
without_prefix = uri.drop S3.uri_prefix.length
first_slash_index = without_prefix.index_of S3_Path.delimiter
if first_slash_index == 0 then Error.throw (Illegal_Argument.Error "Invalid S3 path: empty bucket name.") else
if first_slash_index.is_nothing then S3_Path.Value without_prefix "" else
bucket = (without_prefix.take first_slash_index)
key = (without_prefix.drop first_slash_index+1)
normalized = Decomposed_S3_Path.parse key . normalize . key
S3_Path.Value bucket normalized
if uri.length == S3.uri_prefix.length then S3_Path.Value "" "" else
without_prefix = uri.drop S3.uri_prefix.length
first_slash_index = without_prefix.index_of S3_Path.delimiter
if first_slash_index == 0 then Error.throw (Illegal_Argument.Error "Invalid S3 path: empty bucket name.") else
if first_slash_index.is_nothing then S3_Path.Value without_prefix "" else
bucket = (without_prefix.take first_slash_index)
if bucket == "" then Error.throw (Illegal_Argument.Error "Invalid S3 path: empty bucket name with key name.") else
key = (without_prefix.drop first_slash_index+1)
normalized = Decomposed_S3_Path.parse key . normalize . key
S3_Path.Value bucket normalized
## PRIVATE
to_text self -> Text =
S3.uri_prefix + self.bucket + S3_Path.delimiter + self.key
S3.uri_prefix + (if self.bucket == "" then "" else self.bucket + S3_Path.delimiter + self.key)
## PRIVATE
to_display_text self -> Text = self.to_text.to_display_text

View File

@ -29,30 +29,121 @@ polyglot java import software.amazon.awssdk.services.s3.model.S3Exception
polyglot java import software.amazon.awssdk.services.s3.S3Client
## ICON data_input
Gets the list of the S3 bucket names.
Arguments:
- credentials: AWS credentials. If not provided, the default credentials will
be used.
- credentials: The credentials to use to access S3. If not specified, the
default credentials are used.
Returns:
- A vector of bucket names (as Text).
! Error Conditions
- If the credentials are invalid or access to S3 is denied, then an
`AWS_SDK_Error` will be raised.
list_buckets : AWS_Credential -> Vector Text ! S3_Error
list_buckets credentials:AWS_Credential=AWS_Credential.Default = handle_s3_errors <|
list_buckets credentials:AWS_Credential=..Default = handle_s3_errors <|
client = make_client credentials
buckets = client.listBuckets.buckets
buckets.map b->b.name
## GROUP Standard.Base.Input
ICON data_input
## ICON data_input
Gets the list of the items inside a bucket.
Arguments:
- bucket: the name of the bucket.
- prefix: the prefix of keys to match.
- max_count: the maximum number of items to return. The default is 1000.
- credentials: The credentials to use to access the S3 bucket. If not
specified, the default credentials are used.
Returns:
- A vector of object keys (as Text) (including the prefix).
! Error Conditions
- If the credentials are invalid or access to S3 is denied, then an
`AWS_SDK_Error` will be raised.
- If the bucket does not exist, an `S3_Bucket_Not_Found` error is thrown.
- If more items are available than the `max_count` parameter, a
`More_Records_Available` warning is attached to the result.
list_objects : Text -> Text -> AWS_Credential -> Integer -> Vector Text ! S3_Error
list_objects bucket prefix="" credentials:AWS_Credential=..Default max_count:Integer=1000 =
read_bucket bucket prefix credentials delimiter="" max_count=max_count . second
## PRIVATE
ADVANCED
ICON data_input
Gets an object from an S3 bucket.
Returns a raw stream which can be read once.
Arguments:
- bucket: the name of the bucket.
- key: the key of the object.
- credentials: AWS credentials. If not provided, the default credentials will
be used.
list_objects : Text -> Text -> AWS_Credential -> Integer -> Vector Text ! S3_Error
list_objects bucket prefix="" credentials:AWS_Credential=AWS_Credential.Default max_count=1000 =
read_bucket bucket prefix credentials delimiter="" max_count=max_count . second
- delimiter: The delimiter to use for deducing the filename from the path.
get_object : Text -> Text -> AWS_Credential -> Text -> Response_Body ! S3_Error
get_object bucket key credentials:AWS_Credential=AWS_Credential.Default delimiter="/" = handle_s3_errors bucket=bucket key=key <|
request = GetObjectRequest.builder.bucket bucket . key key . build
client = make_client_for_bucket bucket credentials
response = client.getObject request
inner_response = response.response
s3_uri = URI.parse (uri_prefix + bucket + "/") / key
content_type = inner_response.contentType
name = filename_from_content_disposition inner_response.contentDisposition . if_nothing <|
key.split delimiter . last
metadata = File_Format_Metadata.Value path=key name=name content_type=content_type
input_stream = Input_Stream.new response (handle_io_errors s3_uri)
Response_Body.Raw_Stream input_stream metadata s3_uri
## ADVANCED
ICON data_input
Gets the metadata of a bucket or object.
Arguments:
- bucket: the name of the bucket.
- prefix: the prefix of keys to match.
- credentials: The credentials to use to access the S3 bucket. If not
specified, the default credentials are used.
Returns:
- A Dictionary of the associated metadata of a bucket or object.
! Error Conditions
- If the credentials are invalid or access to S3 is denied, then an
`AWS_SDK_Error` will be raised.
- If the bucket does not exist, an `S3_Bucket_Not_Found` error is thrown.
- If the object does not exist, an `S3_Key_Not_Found` error is thrown.
head : Text -> Text -> AWS_Credential -> Dictionary Text Any ! S3_Error
head bucket key="" credentials:AWS_Credential=AWS_Credential.Default =
response = raw_head bucket key credentials
pairs = response.sdkFields.map f-> [f.memberName, f.getValueOrDefault response]
Dictionary.from_vector pairs
## PRIVATE
Gets the raw metadata of a bucket or object.
Arguments:
- bucket: the name of the bucket.
- key: the key of the object.
- credentials: AWS credentials.
raw_head : Text -> Text -> AWS_Credential -> Dictionary Text Any ! S3_Error
raw_head bucket key credentials =
client = make_client_for_bucket bucket credentials
case key == "" of
True ->
request = HeadBucketRequest.builder.bucket bucket . build
handle_s3_errors bucket=bucket <| client.headBucket request
False ->
request = HeadObjectRequest.builder.bucket bucket . key key . build
handle_s3_errors bucket=bucket key=key <| client.headObject request
## PRIVATE
Reads an S3 bucket returning a pair of vectors, one with common prefixes and
@ -87,67 +178,6 @@ read_bucket bucket prefix="" credentials:AWS_Credential=AWS_Credential.Default d
iterator request 0 [] [] True
## ADVANCED
ICON data_input
Gets the metadata of a bucket or object.
Arguments:
- bucket: the name of the bucket.
- key: the key of the object.
- credentials: AWS credentials. If not provided, the default credentials will
be used.
head : Text -> Text -> AWS_Credential -> Dictionary Text Any ! S3_Error
head bucket key="" credentials:AWS_Credential=AWS_Credential.Default =
response = raw_head bucket key credentials
pairs = response.sdkFields.map f-> [f.memberName, f.getValueOrDefault response]
Dictionary.from_vector pairs
## PRIVATE
Gets the raw metadata of a bucket or object.
Arguments:
- bucket: the name of the bucket.
- key: the key of the object.
- credentials: AWS credentials.
raw_head : Text -> Text -> AWS_Credential -> Dictionary Text Any ! S3_Error
raw_head bucket key credentials =
client = make_client_for_bucket bucket credentials
case key == "" of
True ->
request = HeadBucketRequest.builder.bucket bucket . build
handle_s3_errors bucket=bucket <| client.headBucket request
False ->
request = HeadObjectRequest.builder.bucket bucket . key key . build
handle_s3_errors bucket=bucket key=key <| client.headObject request
## ADVANCED
ICON data_input
Gets an object from an S3 bucket.
Returns a raw stream which can be read once.
Arguments:
- bucket: the name of the bucket.
- key: the key of the object.
- credentials: AWS credentials. If not provided, the default credentials will
be used.
- delimiter: The delimiter to use for deducing the filename from the path.
get_object : Text -> Text -> AWS_Credential -> Text -> Response_Body ! S3_Error
get_object bucket key credentials:AWS_Credential=AWS_Credential.Default delimiter="/" = handle_s3_errors bucket=bucket key=key <|
request = GetObjectRequest.builder.bucket bucket . key key . build
client = make_client_for_bucket bucket credentials
response = client.getObject request
inner_response = response.response
s3_uri = URI.parse (uri_prefix + bucket + "/") / key
content_type = inner_response.contentType
name = filename_from_content_disposition inner_response.contentDisposition . if_nothing <|
key.split delimiter . last
metadata = File_Format_Metadata.Value path=key name=name content_type=content_type
input_stream = Input_Stream.new response (handle_io_errors s3_uri)
Response_Body.Raw_Stream input_stream metadata s3_uri
## PRIVATE
put_object (bucket : Text) (key : Text) credentials:AWS_Credential=AWS_Credential.Default request_body = handle_s3_errors bucket=bucket key=key <|
client = make_client_for_bucket bucket credentials

View File

@ -16,6 +16,7 @@ from Standard.Base.System.File import find_extension_from_name
from Standard.Base.System.File.Generic.File_Write_Strategy import generic_copy
import project.AWS_Credential.AWS_Credential
import project.Errors.S3_Bucket_Not_Found
import project.Errors.S3_Error
import project.Errors.S3_Key_Not_Found
import project.Internal.S3_File_Write_Strategy
@ -26,7 +27,8 @@ import project.S3.S3
If the path ends with a slash, it is a folder. Otherwise, it is a file.
type S3_File
## ICON data_input
Given an S3 URI create a file representation.
Given an S3 URI, create a file representation.
Arguments:
- uri: The URI of the file.
@ -34,63 +36,20 @@ type S3_File
If the path contains `.` or `..` segments, they will be normalized.
- credentials: The credentials to use when accessing the file.
If not specified, the default credentials are used.
Note, the credentials are not verified until the file is accessed.
Returns:
- An `S3_File` object representing the file.
! Error Conditions
- If the URI is not in the correct format, an `Illegal_Argument` error is
thrown.
new : Text -> AWS_Credential -> S3_File ! Illegal_Argument
new (uri : Text = S3.uri_prefix) credentials:AWS_Credential=AWS_Credential.Default =
new (uri : Text = S3.uri_prefix) credentials:AWS_Credential=..Default =
S3_File.Value (S3_Path.parse uri) credentials
## PRIVATE
Value (s3_path : S3_Path) credentials:AWS_Credential
## GROUP Standard.Base.Metadata
ICON metadata
Gets the URI of this file
uri : Text
uri self -> Text = self.s3_path.to_text
## GROUP Standard.Base.Metadata
ICON metadata
Checks if the folder or file exists
exists : Boolean
exists self = if self.s3_path.bucket == "" then True else
if self.s3_path.is_root then translate_file_errors self <| S3.head self.s3_path.bucket "" self.credentials . is_error . not else
pair = translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter max_count=1
pair.second.contains self.s3_path.key
## GROUP Standard.Base.Metadata
ICON metadata
Checks if this is a folder.
is_directory : Boolean
is_directory self = self.s3_path.is_directory
## GROUP Standard.Base.Metadata
ICON metadata
Checks if this is a regular file.
is_regular_file : Boolean
is_regular_file self = self.is_directory.not
## GROUP Standard.Base.Metadata
ICON metadata
Gets the size of a file in bytes.
size : Integer
size self =
if self.is_directory then Error.throw (S3_Error.Error "size can only be called on files." self.uri) else
content_length = translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . contentLength
if content_length.is_nothing then Error.throw (S3_Error.Error "ContentLength header is missing." self.uri) else content_length
## GROUP Output
ICON folder_add
Creates the directory represented by this file if it did not exist.
It also creates parent directories if they did not exist.
? S3 Handling of Directories
S3 does not have a native concept of directories.
create_directory : File
create_directory self =
## TODO Add more information about how S3 handles directories.
https://github.com/enso-org/enso/issues/9704
Unimplemented.throw "Creating S3 folders is currently not implemented."
private Value (s3_path : S3_Path) credentials:AWS_Credential
## PRIVATE
ADVANCED
@ -130,14 +89,14 @@ type S3_File
Creates a new input stream for this file and runs the specified action
on it.
The created stream is automatically closed when `action` returns (even
if it returns exceptionally).
Arguments:
- open_options: A vector of `File_Access` objects determining how to open
the stream. These options set the access properties of the stream.
- action: A function that operates on the input stream and returns some
value. The value is returned from this method.
The created stream is automatically closed when `action` returns (even
if it returns exceptionally).
with_input_stream : Vector File_Access -> (Input_Stream -> Any ! File_Error) -> Any ! S3_Error | Illegal_Argument
with_input_stream self (open_options : Vector) action = if self.is_directory then Error.throw (Illegal_Argument.Error "S3 folders cannot be opened as a stream." self.uri) else
open_as_data_link = (open_options.contains Data_Link_Access.No_Follow . not) && (Data_Link.is_data_link self)
@ -149,23 +108,35 @@ type S3_File
## ALIAS load, open
GROUP Standard.Base.Input
ICON data_input
Read a file using the specified file format
Arguments:
- format: A `File_Format` object used to read file into memory.
If `Auto_Detect` is specified; the provided file determines the specific
type and configures it appropriately. If there is no matching type then
a `File_Error.Unsupported_Type` error is returned.
type and configures it appropriately.
- on_problems: Specifies the behavior when a problem occurs during the
function.
By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
Returns:
- The contents of the file read using the specified `File_Format`.
! Error Conditions
- If the bucket does not exist, an `S3_Bucket_Not_Found` error is thrown.
- If the target is a directory, an `Illegal_Argument` error is thrown.
- If the file does not exist, a `File_Error.Not_Found` error is thrown.
- If the access to the object is forbidden, an `S3_Error.Access_Denied`
error is thrown.
- If using `Auto_Detect`, and there is no matching type then a
`File_Error.Unsupported_Type` error is returned.
@format File_Format.default_widget
read : File_Format -> Problem_Behavior -> Any ! S3_Error
read self format=Auto_Detect (on_problems : Problem_Behavior = ..Report_Warning) =
if Data_Link.is_data_link self then Data_Link_Helpers.read_data_link self format on_problems else
if self.is_directory then Error.throw (Illegal_Argument.Error "Cannot `read` a directory, use `list`.") else
if self.is_directory then Error.throw (Illegal_Argument.Error "Cannot `read` a directory, use `list`.") else
if Data_Link.is_data_link self then Data_Link_Helpers.read_data_link self format on_problems else
case format of
Auto_Detect ->
response = translate_file_errors self <| S3.get_object self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter
@ -175,11 +146,61 @@ type S3_File
resolved_format = File_Format.resolve format
self.with_input_stream [File_Access.Read] (stream-> resolved_format.read_stream stream metadata)
## GROUP Standard.Base.Input
ICON data_input
Lists files contained in the directory denoted by this file.
Note, as S3 does not have a native notion of directories, this operation
will return an empty Vector if the folder does not exist.
Arguments:
- name_filter: A glob pattern that can be used to filter the returned
files. If it is not specified, all files are returned.
- recursive: Specifies whether the returned list of files should include
also files from the subdirectories. If set to `False` (the default),
only the immediate children of the listed directory are considered.
Returns:
- A vector of `S3_File` objects representing the files in the directory.
! Error Conditions
- If the bucket does not exist, an `S3_Bucket_Not_Found` error is thrown.
- If the target is a file, an `Illegal_Argument` error is thrown.
- If the access to the object is forbidden, an `S3_Error.Access_Denied`
error is thrown.
- If using `name_filter` or `recursive`, an `Unimplemented` error is
thrown as these are currently unsupported.
list : Text -> Boolean -> Vector S3_File
list self name_filter:Text="" recursive:Boolean=False =
check_name_filter action = if name_filter != "" then Unimplemented.throw "S3 listing with name filter is not currently implemented." else action
check_recursion action = if recursive then Unimplemented.throw "S3 listing with recursion is not currently implemented." else action
check_directory action = if self.is_directory.not then Error.throw (Illegal_Argument.Error "Cannot `list` a non-directory." self.uri) else action
check_directory <| check_recursion <| check_name_filter <|
if self.s3_path.bucket == "" then translate_file_errors self <| S3.list_buckets self.credentials . map bucket-> S3_File.Value (S3_Path.Value bucket "") self.credentials else
pair = translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter
bucket = self.s3_path.bucket
sub_folders = pair.first . map key->
S3_File.Value (S3_Path.Value bucket key) self.credentials
files = pair.second . map key->
S3_File.Value (S3_Path.Value bucket key) self.credentials
sub_folders + files
## ALIAS load bytes, open bytes
ICON data_input
Reads all bytes in this file into a byte vector.
Returns:
- The contents of the file as a vector of bytes.
! Error Conditions
- If the bucket does not exist, an `S3_Bucket_Not_Found` error is thrown.
- If the target is a directory, an `Illegal_Argument` error is thrown.
- If the file does not exist, a `File_Error.Not_Found` error is thrown.
- If the access to the object is forbidden, an `S3_Error.Access_Denied`
error is thrown.
read_bytes : Vector ! File_Error
read_bytes self =
read_bytes self = if self.is_directory then Error.throw (Illegal_Argument.Error "Cannot `read_bytes` of a directory.") else
self.read Bytes
## ALIAS load text, open text
@ -190,56 +211,43 @@ type S3_File
- encoding: The text encoding to decode the file with. Defaults to UTF-8.
- on_problems: Specifies the behavior when a problem occurs during the
function.
By default, a warning is issued, but the operation proceeds.
If set to `Report_Error`, the operation fails with a dataflow error.
If set to `Ignore`, the operation proceeds without errors or warnings.
Returns:
- The contents of the file as `Text` decoded with the specified encoding.
! Error Conditions
- If the bucket does not exist, an `S3_Bucket_Not_Found` error is thrown.
- If the target is a directory, an `Illegal_Argument` error is thrown.
- If the file does not exist, a `File_Error.Not_Found` error is thrown.
- If the access to the object is forbidden, an `S3_Error.Access_Denied`
error is thrown.
- If there is a problem decoding the byte stream to text, then an
`Encoding_Error` will be raised following the `on_problems` behavior.
@encoding Encoding.default_widget
read_text : Encoding -> Problem_Behavior -> Text ! File_Error
read_text self (encoding : Encoding = Encoding.default) (on_problems : Problem_Behavior = ..Report_Warning) =
self.read (Plain_Text_Format.Plain_Text encoding) on_problems
if self.is_directory then Error.throw (Illegal_Argument.Error "Cannot `read_text` of a directory.") else
self.read (Plain_Text_Format.Plain_Text encoding) on_problems
## ICON data_output
Deletes the object.
Copies the S3 file to the specified destination.
Arguments:
- recursive: If the target is a non-empty directory, it will only be
removed if this is set to `True`. Defaults to `False`, meaning that the
operation will fail if the directory is not empty. This option has no
effect for files or data links.
delete : Boolean -> Nothing
delete self (recursive : Boolean = False) =
if self.exists.not then Error.throw (File_Error.Not_Found self) else
self.delete_if_exists recursive
## ICON data_output
Deletes the file if it had existed.
Arguments:
- recursive: If the target is a non-empty directory, it will only be
removed if this is set to `True`. Defaults to `False`, meaning that the
operation will fail if the directory is not empty. This option has no
effect for files or data links.
delete_if_exists : Boolean -> Nothing
delete_if_exists self (recursive : Boolean = False) =
Context.Output.if_enabled disabled_message="Deleting an S3_File is forbidden as the Output context is disabled." panic=False <|
case self.is_directory of
True ->
# This is a temporary simplified implementation to ensure cleaning up after tests
# TODO improve recursive deletion for S3 folders: https://github.com/enso-org/enso/issues/9704
children = self.list
if children.is_empty.not && recursive.not then Error.throw (File_Error.Directory_Not_Empty self) else
r = children.map child-> child.delete_if_exists recursive
r.if_not_error self
False -> translate_file_errors self <| S3.delete_object self.s3_path.bucket self.s3_path.key self.credentials . if_not_error Nothing
## ICON data_output
Copies the file to the specified destination.
Arguments:
- destination: the destination to move the file to.
- destination: the destination to copy the file to.
- replace_existing: specifies if the operation should proceed if the
destination file already exists. Defaults to `False`.
copy_to : File_Like -> Boolean -> Any ! File_Error
Returns:
- The destination file if the operation was successful.
! Error Conditions
- Unless `replace_existing` is set to `True`, if the destination file
already exists, a `File_Error` is thrown.
- If the source is a directory, an `S3_Error` will occur as this is not
currently supported.
- If the Output operations are disabled, a `Forbidden_Operation` panic
will occur.
copy_to : File_Like -> Boolean -> Any ! S3_Error | File_Error
copy_to self (destination : File_Like) (replace_existing : Boolean = False) = Data_Link_Helpers.disallow_links_in_copy self destination <|
if self.is_directory then Error.throw (S3_Error.Error "Copying S3 folders is currently not implemented." self.uri) else
Context.Output.if_enabled disabled_message="Copying an S3_File is forbidden as the Output context is disabled." panic=False <|
@ -257,33 +265,125 @@ type S3_File
## ICON data_output
Moves the file to the specified destination.
Arguments:
- destination: the destination to move the file to.
- replace_existing: specifies if the operation should proceed if the
destination file already exists. Defaults to `False`.
Returns:
- The destination file if the operation was successful.
! Error Conditions
- Unless `replace_existing` is set to `True`, if the destination file
already exists, a `File_Error` is thrown.
- If the source is a directory, an `S3_Error` will occur as this is not
currently supported.
- If the Output operations are disabled, a `Forbidden_Operation` panic
will occur.
! S3 Move is a Copy and Delete
Since S3 does not support moving files, this operation is implemented
as a copy followed by delete. Keep in mind that the space usage of the
file will briefly be doubled and that the operation may not be as fast
as a local move often is.
move_to : File_Like -> Boolean -> Any ! File_Error
move_to self (destination : File_Like) (replace_existing : Boolean = False) =
if self.is_directory then Error.throw (S3_Error.Error "Moving S3 folders is currently not implemented." self.uri) else
Data_Link_Helpers.disallow_links_in_move self destination <|
Context.Output.if_enabled disabled_message="File moving is forbidden as the Output context is disabled." panic=False <|
r = self.copy_to destination replace_existing=replace_existing
r.if_not_error <|
# If source and destination are the same, we do not want to delete the file
if destination.underlying == self then r else
self.delete.if_not_error r
## ICON data_output
Deletes the object.
Arguments:
- destination: the destination to move the file to.
- replace_existing: specifies if the operation should proceed if the
destination file already exists. Defaults to `False`.
move_to : File_Like -> Boolean -> Any ! File_Error
move_to self (destination : File_Like) (replace_existing : Boolean = False) = Data_Link_Helpers.disallow_links_in_move self destination <|
Context.Output.if_enabled disabled_message="File moving is forbidden as the Output context is disabled." panic=False <|
r = self.copy_to destination replace_existing=replace_existing
r.if_not_error <|
# If source and destination are the same, we do not want to delete the file
if destination.underlying == self then r else
self.delete.if_not_error r
- recursive: If the target is a non-empty directory, it will only be
removed if this is set to `True`. Defaults to `False`, meaning that the
operation will fail if the directory is not empty. This option has no
effect for files or data links.
! Error Conditions
- If the target is a directory and `recursive` is `False`, a
`File_Error.Directory_Not_Empty` error is thrown.
- If the bucket or file does not exist, a `File_Error.Not_Found` error is
thrown.
- If the access to the object is forbidden, an `S3_Error.Access_Denied`
error is thrown.
- If the Output operations are disabled, a `Forbidden_Operation` panic
will occur.
delete : Boolean -> Nothing
delete self (recursive : Boolean = False) =
if self.exists.not then Error.throw (File_Error.Not_Found self) else
self.delete_if_exists recursive
## ICON data_output
Deletes the file if it had existed.
Arguments:
- recursive: If the target is a non-empty directory, it will only be
removed if this is set to `True`. Defaults to `False`, meaning that the
operation will fail if the directory is not empty. This option has no
effect for files or data links.
! Error Conditions
- If the target is a directory and `recursive` is `False`, a
`File_Error.Directory_Not_Empty` error is thrown.
- If the bucket does not exist, an `S3_Bucket_Not_Found` error is thrown.
- If the access to the object is forbidden, an `S3_Error.Access_Denied`
error is thrown.
- If the Output operations are disabled, a `Forbidden_Operation` panic
will occur.
delete_if_exists : Boolean -> Nothing
delete_if_exists self (recursive : Boolean = False) =
Context.Output.if_enabled disabled_message="Deleting an S3_File is forbidden as the Output context is disabled." panic=False <|
case self.is_directory of
True ->
# This is a temporary simplified implementation to ensure cleaning up after tests
# TODO improve recursive deletion for S3 folders: https://github.com/enso-org/enso/issues/9704
children = self.list
if children.is_empty.not && recursive.not then Error.throw (File_Error.Directory_Not_Empty self) else
r = children.map child-> child.delete_if_exists recursive
r.if_not_error self
False -> translate_file_errors self <| S3.delete_object self.s3_path.bucket self.s3_path.key self.credentials . if_not_error Nothing
## GROUP Output
ICON folder_add
Creates the directory represented by this file if it did not exist.
It also creates parent directories if they did not exist.
Returns:
- An `Unimplemented` error as this operation is not currently supported.
! S3 Directory Handling
Note that regular S3 buckets do not have a 'native' notion of
directories, instead they are emulated using prefixes and a delimiter
(in Enso, the delimiter is set to "/").
The trailing slash determines if the given path is treated as a
directory or as a regular file.
create_directory : File
create_directory self =
## TODO Add more information about how S3 handles directories.
https://github.com/enso-org/enso/issues/9704
Unimplemented.throw "Creating S3 folders is currently not implemented."
## GROUP Standard.Base.Operators
ICON folder
Join two path segments together, normalizing the `..` and `.` subpaths.
Join two path segments together, normalizing the `..` and `.` sub-paths.
Arguments:
- subpath: The path to join to the path of `self`.
Returns:
- An `S3_File` representing the joined and normalised path, with the same
credentials.
! S3 Directory Handling
Note that regular S3 buckets do not have a 'native' notion of
@ -308,9 +408,27 @@ type S3_File
Join two or more path segments together, normalizing the `..` and `.` subpaths.
Arguments:
- subpaths: The path segment or segments to join to the path of `self`.
- sub-paths: The path segment or segments to join to the path of `self`.
See `/` for more information about S3 directory handling.
Returns:
- An `S3_File` representing the joined and normalised path, with the same
credentials.
! S3 Directory Handling
Note that regular S3 buckets do not have a 'native' notion of
directories, instead they are emulated using prefixes and a delimiter
(in Enso, the delimiter is set to "/").
The trailing slash determines if the given path is treated as a
directory or as a regular file.
However, for ease-of-use, if a path without a trailing slash is used
with the `/` operator it will be accepted and the sub paths will be
resolved, even though such a path would not be treated as a directory
by any other operations.
See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html
join : (Vector | Text) -> S3_File
join self (subpaths : Vector | Text) =
vec = Vector.unify_vector_or_element subpaths
@ -319,7 +437,141 @@ type S3_File
## GROUP Standard.Base.Metadata
ICON metadata
Returns the name of this file.
name : Text
name self = self.s3_path.file_name
## GROUP Standard.Base.Metadata
ICON metadata
Returns the extension of the file.
extension : Text
extension self = if self.is_directory then Error.throw (S3_Error.Error "Directories do not have extensions." self.uri) else
find_extension_from_name self.name
## GROUP Standard.Base.Metadata
ICON metadata
Returns the path of this file.
path : Text
path self = self.s3_path.to_text
## GROUP Standard.Base.Metadata
ICON metadata
Returns the URI of this file
The URI is in the form `s3://bucket/path/to/file`.
uri : Text
uri self -> Text = self.s3_path.to_text
## GROUP Standard.Base.Metadata
ICON metadata
Checks if the folder or file exists
Returns:
- `True` if the bucket or object exists, `False` otherwise.
! Error Conditions
- If the credential is invalid, an `AWS_SDK_Error` is thrown.
- If access is denied to the bucket, an `S3_Error` is thrown.
exists : Boolean
exists self = if self.s3_path.bucket == "" then True else
raw_result = if self.s3_path.is_root then translate_file_errors self <| S3.head self.s3_path.bucket "" self.credentials . is_error . not else
pair = translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter max_count=1
pair.second.contains self.s3_path.key
raw_result.catch S3_Bucket_Not_Found _->False
## GROUP Standard.Base.Metadata
ICON metadata
Returns the size of a file in bytes.
! Error Conditions
- If the `S3_File` represents a directory, an `S3_Error` error is thrown.
- If the bucket or object does not exist, a `File_Error.Not_Found` is
thrown.
- If the object is not accessible, an `S3_Error` is thrown.
size : Integer
size self =
if self.is_directory then Error.throw (S3_Error.Error "size can only be called on files." self.uri) else
content_length = translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . contentLength
if content_length.is_nothing then Error.throw (S3_Error.Error "ContentLength header is missing." self.uri) else content_length
## GROUP Standard.Base.Metadata
ICON metadata
Gets the creation time of a file.
Returns:
- An `S3_Error` error as only the last modified time is available for S3
objects.
creation_time : Date_Time ! File_Error
creation_time self =
Error.throw (S3_Error.Error "Creation time is not available for S3 files, consider using `last_modified_time` instead." self.uri)
## GROUP Standard.Base.Metadata
ICON metadata
Returns the last modified time of a file.
! Error Conditions
- If the `S3_File` represents a directory, an `S3_Error` error is thrown.
- If the bucket or object does not exist, a `File_Error.Not_Found` is
thrown.
- If the object is not accessible, an `S3_Error` is thrown.
last_modified_time : Date_Time ! File_Error
last_modified_time self =
if self.is_directory then Error.throw (S3_Error.Error "`last_modified_time` can only be called on files." self.uri) else
instant = translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . lastModified
if instant.is_nothing then Error.throw (S3_Error.Error "Missing information for: lastModified" self.uri) else
instant.at_zone Time_Zone.system
## GROUP Standard.Base.Metadata
ICON metadata
Checks if this is a folder.
Returns:
- `True` if the S3 path represents a folder, `False` otherwise.
! S3 Directory Handling
Note that regular S3 buckets do not have a 'native' notion of
directories, instead they are emulated using prefixes and a delimiter
(in Enso, the delimiter is set to "/").
The trailing slash determines if the given path is treated as a
directory or as a regular file.
is_directory : Boolean
is_directory self = self.s3_path.is_directory
## GROUP Standard.Base.Metadata
ICON metadata
Checks if this is a regular file.
Returns:
- `True` if the S3 path represents a file, `False` otherwise.
! S3 Directory Handling
Note that regular S3 buckets do not have a 'native' notion of
directories, instead they are emulated using prefixes and a delimiter
(in Enso, the delimiter is set to "/").
The trailing slash determines if the given path is treated as a
directory or as a regular file.
is_regular_file : Boolean
is_regular_file self = self.is_directory.not
## GROUP Standard.Base.Metadata
ICON metadata
Resolves the parent of this file.
Returns:
- The parent of this file as an `S3_File` object or if a top level then
`Nothing`.
parent : S3_File | Nothing
parent self =
parent_path = self.s3_path.parent
@ -328,78 +580,14 @@ type S3_File
## GROUP Standard.Base.Metadata
ICON metadata
Checks if `self` is a descendant of `other`.
Returns:
- `True` if `self` is a descendant of `other`, `False` otherwise.
is_descendant_of : S3_File -> Boolean
is_descendant_of self other = self.s3_path.is_descendant_of other.s3_path
## GROUP Standard.Base.Metadata
ICON metadata
Returns the path of this file.
path : Text
path self = self.s3_path.to_text
## GROUP Standard.Base.Metadata
ICON metadata
Returns the name of this file.
name : Text
name self = self.s3_path.file_name
## GROUP Standard.Base.Metadata
ICON metadata
Returns the extension of the file.
extension : Text
extension self = if self.is_directory then Error.throw (S3_Error.Error "Directories do not have extensions." self.uri) else
find_extension_from_name self.name
## GROUP Standard.Base.Metadata
ICON metadata
Gets the creation time of a file.
creation_time : Date_Time ! File_Error
creation_time self =
Error.throw (S3_Error.Error "Creation time is not available for S3 files, consider using `last_modified_time` instead." self.uri)
## GROUP Standard.Base.Metadata
ICON metadata
Gets the last modified time of a file.
last_modified_time : Date_Time ! File_Error
last_modified_time self =
if self.is_directory then Error.throw (S3_Error.Error "`last_modified_time` can only be called on files." self.uri) else
instant = translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . lastModified
if instant.is_nothing then Error.throw (S3_Error.Error "Missing information for: lastModified" self.uri) else
instant.at_zone Time_Zone.system
## GROUP Standard.Base.Input
ICON data_input
Lists files contained in the directory denoted by this file.
Arguments:
- name_filter: A glob pattern that can be used to filter the returned
files. If it is not specified, all files are returned.
- recursive: Specifies whether the returned list of files should include
also files from the subdirectories. If set to `False` (the default),
only the immediate children of the listed directory are considered.
The `name_filter` can contain the following special characters:
If `recursive` is set to True and a `name_filter` does not contain `**`,
it will be automatically prefixed with `**/` to allow matching files in
subdirectories.
list : Text -> Boolean -> Vector S3_File
list self name_filter:Text="" recursive:Boolean=False =
check_name_filter action = if name_filter != "" then Unimplemented.throw "S3 listing with name filter is not currently implemented." else action
check_recursion action = if recursive then Unimplemented.throw "S3 listing with recursion is not currently implemented." else action
check_directory action = if self.is_directory.not then Error.throw (S3_Error.Error "Cannot `list` a non-directory." self.uri) else action
check_directory <| check_recursion <| check_name_filter <|
if self.s3_path.bucket == "" then translate_file_errors self <| S3.list_buckets self.credentials . map bucket-> S3_File.Value bucket "" self.credentials else
pair = translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter
bucket = self.s3_path.bucket
sub_folders = pair.first . map key->
S3_File.Value (S3_Path.Value bucket key) self.credentials
files = pair.second . map key->
S3_File.Value (S3_Path.Value bucket key) self.credentials
sub_folders + files
## PRIVATE
File_Format_Metadata.from (that : S3_File) = File_Format_Metadata.Value that.uri that.name (that.extension.catch _->Nothing)

View File

@ -264,8 +264,8 @@ Text.characters self =
example_find_insensitive =
## This matches `aBc` @ character 11
"aabbbbccccaaBcaaaa".find "a[ab]c" Case_Sensitivity.Insensitive
Text.find : Text -> Case_Sensitivity -> Match | Nothing ! Regex_Syntax_Error | Illegal_Argument
Text.find self pattern:(Regex|Text)=".*" case_sensitivity=Case_Sensitivity.Sensitive =
Text.find : (Regex | Text) -> Case_Sensitivity -> Match | Nothing ! Regex_Syntax_Error | Illegal_Argument
Text.find self pattern:(Regex | Text)=".*" case_sensitivity=Case_Sensitivity.Sensitive =
case_insensitive = case_sensitivity.is_case_insensitive_in_memory
compiled_pattern = Regex.compile pattern case_insensitive=case_insensitive
compiled_pattern.match self

View File

@ -51,8 +51,8 @@ type Regex
! Error Conditions
- If an empty regex is used, `find` throws an `Illegal_Argument` error.
compile : Text -> Boolean -> Regex ! Regex_Syntax_Error | Illegal_Argument
compile expression:(Regex|Text) case_insensitive:Boolean=False = case expression of
compile : (Regex | Text) -> Boolean -> Regex ! Regex_Syntax_Error | Illegal_Argument
compile expression:(Regex | Text) case_insensitive:Boolean=False = case expression of
_ : Regex -> if case_insensitive == expression.case_insensitive then expression else
expression.recompile (if case_insensitive then Case_Sensitivity.In_Sensitive else Case_Sensitivity.Sensitive)
_ : Text ->

View File

@ -327,7 +327,7 @@ add_specs suite_builder =
# But the file cannot be listed:
err = new_file.list
err.should_fail_with S3_Error
err.should_fail_with Illegal_Argument
err.catch.to_display_text . should_contain "Cannot `list` a non-directory."
new_file.delete . should_succeed