mirror of
https://github.com/enso-org/enso.git
synced 2024-11-22 03:32:23 +03:00
Initial implementation of Data.read_many
(#11490)
- Part of #11311 - Adds ability to read a list of files (Vector, Column, Table) into a Vector. - Reading into a Table of objects or merged will come in a next PR.
This commit is contained in:
parent
67db825587
commit
e76fe907d3
@ -49,11 +49,14 @@
|
||||
programmatically.][11255]
|
||||
- [DB_Table may be saved as a Data Link.][11371]
|
||||
- [Support for dates before 1900 in Excel and signed AWS requests.][11373]
|
||||
- [Added `Data.read_many` that allows to read a list of files in a single
|
||||
operation.][11490]
|
||||
|
||||
[11235]: https://github.com/enso-org/enso/pull/11235
|
||||
[11255]: https://github.com/enso-org/enso/pull/11255
|
||||
[11371]: https://github.com/enso-org/enso/pull/11371
|
||||
[11373]: https://github.com/enso-org/enso/pull/11373
|
||||
[11490]: https://github.com/enso-org/enso/pull/11490
|
||||
|
||||
#### Enso Language & Runtime
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
import project.Any.Any
|
||||
import project.Data.Pair.Pair
|
||||
import project.Data.Read.Many_Files_List.Many_Files_List
|
||||
import project.Data.Read.Return_As.Return_As
|
||||
import project.Data.Text.Encoding.Encoding
|
||||
import project.Data.Text.Text
|
||||
import project.Data.Vector.Vector
|
||||
@ -27,7 +29,7 @@ import project.System.File.Generic.Writable_File.Writable_File
|
||||
from project.Data.Boolean import Boolean, False, True
|
||||
from project.Meta.Enso_Project import enso_project
|
||||
from project.Metadata.Choice import Option
|
||||
from project.Metadata.Widget import Folder_Browse, Text_Input
|
||||
from project.Metadata.Widget import Folder_Browse, Text_Input, Vector_Editor
|
||||
from project.System.File_Format import Auto_Detect, File_Format
|
||||
|
||||
## ALIAS load, open
|
||||
@ -92,6 +94,59 @@ read path=(Missing_Argument.throw "path") format=Auto_Detect (on_problems : Prob
|
||||
if file_obj.is_directory then Error.throw (Illegal_Argument.Error "Cannot `read` a directory, use `Data.list`.") else
|
||||
file_obj.read format on_problems
|
||||
|
||||
## ALIAS load, open
|
||||
GROUP Input
|
||||
ICON data_input
|
||||
Reads a a list of files into Enso.
|
||||
|
||||
Arguments:
|
||||
- paths: A list of files to load. It can be a Vector, Column or Table of
|
||||
files, paths or URIs to fetch. If a Table is provided, it must either
|
||||
contain a single column or a column called `path` (case insensitive).
|
||||
- format: A `File_Format` object used to read files into memory.
|
||||
If `Auto_Detect` is specified; each file determines the specific
|
||||
type and configures it appropriately. If there is no matching type then
|
||||
a `File_Error.Unsupported_Type` error is returned.
|
||||
- return: Specifies the shape of the data to return.
|
||||
- on_problems: Specifies the behavior when a problem occurs during the
|
||||
function.
|
||||
By default, if one of the files fails to load, a warning is issued and the
|
||||
entry for that file becomes `Nothing`, but the operation proceeds.
|
||||
If set to `Report_Error`, the operation fails with a dataflow error on the
|
||||
first failing file.
|
||||
If set to `Ignore`, the operation proceeds without errors or warnings,
|
||||
replacing files that fail to load with `Nothing`.
|
||||
|
||||
! Request Caching
|
||||
|
||||
Responses to HTTP data requests are cached, and additional requests for the
|
||||
same resources will use the cache, saving a round-trip call to the remote
|
||||
server. Two resources are considered the same if the URIs and request
|
||||
headers are the same. Header order does not affect sameness.
|
||||
|
||||
The cache respects the "max-age" and "Age" response headers; see
|
||||
`Data.fetch` for more details.
|
||||
|
||||
The cached values are retained as long as the project remains open. Closing
|
||||
a project will clear the cache.
|
||||
|
||||
> Example
|
||||
Read all CSV files from a directory into a single merged table.
|
||||
|
||||
from Standard.Table import all
|
||||
import Standard.Examples
|
||||
|
||||
files = Data.list name_filter="*.csv"
|
||||
example_csv_dir_to_table = Data.read_many files
|
||||
@paths (Vector_Editor item_editor=Text_Input item_default='""')
|
||||
@format File_Format.default_widget
|
||||
read_many : Many_Files_List -> File_Format -> Return_As -> Problem_Behavior -> Any ! File_Error
|
||||
read_many (paths : Many_Files_List = Missing_Argument.throw "paths") format=Auto_Detect return=..Vector (on_problems : Problem_Behavior = ..Report_Warning) =
|
||||
return_as = Return_As.resolve return
|
||||
loaded_objects = paths.paths_to_load.map on_problems=on_problems path->
|
||||
Data.read path format on_problems
|
||||
return_as.make_return paths loaded_objects
|
||||
|
||||
## ALIAS load text, open text
|
||||
GROUP Input
|
||||
ICON data_input
|
||||
|
@ -0,0 +1,22 @@
|
||||
import project.Data.Text.Text
|
||||
import project.Data.Vector.Vector
|
||||
|
||||
## A common interface that represents a list of files that can be read.
|
||||
|
||||
Various types (e.g. Vector, Column) can convert to this type to be able to be
|
||||
used in `Data.read_many`.
|
||||
type Many_Files_List
|
||||
## PRIVATE
|
||||
Value original_value paths_to_load:Vector
|
||||
|
||||
## PRIVATE
|
||||
to_text self -> Text =
|
||||
"Many_Files_List "+self.original_value.to_text
|
||||
|
||||
## PRIVATE
|
||||
to_display_text self -> Text =
|
||||
"Many_Files_List "+self.original_value.to_display_text
|
||||
|
||||
## PRIVATE
|
||||
Many_Files_List.from (that : Vector) =
|
||||
Many_Files_List.Value that that
|
@ -0,0 +1,78 @@
|
||||
import project.Any.Any
|
||||
import project.Data.Text.Text
|
||||
import project.Data.Read.Many_Files_List.Many_Files_List
|
||||
import project.Data.Vector.Vector
|
||||
import project.Error.Error
|
||||
import project.Errors.Common.Type_Error
|
||||
import project.Errors.Illegal_Argument.Illegal_Argument
|
||||
import project.Function.Function
|
||||
import project.Metadata.Display
|
||||
import project.Metadata.Widget
|
||||
import project.Nothing.Nothing
|
||||
import project.Panic.Panic
|
||||
from project.Data.Boolean import Boolean, False, True
|
||||
from project.Metadata.Choice import Option
|
||||
from project.Metadata.Widget import Single_Choice
|
||||
|
||||
polyglot java import org.enso.base.read.ReadManyReturnSPI
|
||||
|
||||
private _get_known_return_classes -> Vector =
|
||||
Vector.from_polyglot_array (ReadManyReturnSPI.get_types False)
|
||||
|
||||
## A common interface that represents ways to return a list of files that have
|
||||
been read.
|
||||
type Return_As
|
||||
## PRIVATE
|
||||
Instance underlying
|
||||
|
||||
## PRIVATE
|
||||
to_text self -> Text = self.underlying.to_text
|
||||
|
||||
## PRIVATE
|
||||
to_display_text self -> Text = self.underlying.to_display_text
|
||||
|
||||
## PRIVATE
|
||||
make_return self (input : Many_Files_List) (objects : Vector Any) =
|
||||
self.underlying.make_return input objects
|
||||
|
||||
## PRIVATE
|
||||
Resolve an unresolved constructor to the actual type.
|
||||
private resolve value = case value of
|
||||
_ : Function ->
|
||||
types = _get_known_return_classes
|
||||
try_next idx =
|
||||
if idx >= types.length then Error.throw (Illegal_Argument.Error "Expected Return_As, but got a function.") else
|
||||
resolved = (types.at idx).resolve value
|
||||
if resolved.is_nothing then @Tail_Call try_next (idx + 1) else resolved
|
||||
try_next 0
|
||||
_ : Return_As -> value
|
||||
_ -> Panic.throw (Type_Error.Error Return_As value "Expected `return` to be a Return_As type, but got {got}.")
|
||||
|
||||
## PRIVATE
|
||||
default_widget : Widget
|
||||
default_widget =
|
||||
options = _get_known_return_classes.map .get_dropdown_options
|
||||
Single_Choice display=Display.Always values=options
|
||||
|
||||
## PRIVATE
|
||||
type Return_As_Base
|
||||
## Will return a Vector of objects that were loaded.
|
||||
The order of the returned Vector is the same as in the input.
|
||||
Vector
|
||||
|
||||
## PRIVATE
|
||||
get_dropdown_options : Vector Option
|
||||
get_dropdown_options = [Option "Vector" "..Vector"]
|
||||
|
||||
## PRIVATE
|
||||
resolve value =
|
||||
Panic.catch Type_Error (value:Return_As_Base) _->Nothing
|
||||
|
||||
## PRIVATE
|
||||
make_return self (input : Many_Files_List) (objects : Vector Any) =
|
||||
_ = input
|
||||
objects
|
||||
|
||||
## PRIVATE
|
||||
Return_As.from (that : Return_As_Base) =
|
||||
Return_As.Instance that
|
@ -1,4 +1,5 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Read.Many_Files_List.Many_Files_List
|
||||
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
@ -2168,3 +2169,8 @@ Vector.from (that:DB_Column) =
|
||||
|
||||
## PRIVATE
|
||||
Cleansable_Text.from (that:DB_Column) = Cleansable_Text.Value (pattern->replace_with-> (that.text_replace (regex pattern) replace_with).rename that.name)
|
||||
|
||||
## PRIVATE
|
||||
Many_Files_List.from (that : DB_Column) =
|
||||
_ = that
|
||||
Error.throw (Illegal_Argument.Error "`read_many` cannot be used with Database columns. Materialize the column into memory using `.read` first.")
|
||||
|
@ -1,6 +1,7 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Array_Proxy.Array_Proxy
|
||||
import Standard.Base.Data.Filter_Condition as Filter_Condition_Module
|
||||
import Standard.Base.Data.Read.Many_Files_List.Many_Files_List
|
||||
import Standard.Base.Data.Time.Errors.Date_Time_Format_Parse_Error
|
||||
import Standard.Base.Data.Vector.Builder
|
||||
import Standard.Base.Errors.Common.Additional_Warnings
|
||||
@ -3120,3 +3121,8 @@ make_literal_table connection column_vectors column_names alias =
|
||||
connection.dialect.make_cast base_column sql_type infer_type_from_database
|
||||
|
||||
DB_Table.Value alias connection internal_columns context
|
||||
|
||||
## PRIVATE
|
||||
Many_Files_List.from (that : DB_Table) =
|
||||
_ = that
|
||||
Error.throw (Illegal_Argument.Error "`read_many` cannot be used with Database tables. Materialize the table into memory using `.read` first.")
|
||||
|
@ -19,4 +19,3 @@ export project.Extensions.Upload_In_Memory_Table.update_rows
|
||||
export project.SQL_Query.SQL_Query
|
||||
|
||||
export project.Update_Action.Update_Action
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Array_Proxy.Array_Proxy
|
||||
import Standard.Base.Data.Read.Many_Files_List.Many_Files_List
|
||||
import Standard.Base.Data.Vector.No_Wrap
|
||||
import Standard.Base.Errors.Common.Arithmetic_Error
|
||||
import Standard.Base.Errors.Common.Incomparable_Values
|
||||
@ -22,6 +23,7 @@ import project.Internal.Column_Ops
|
||||
import project.Internal.Date_Time_Helpers
|
||||
import project.Internal.Java_Problems
|
||||
import project.Internal.Parse_Values_Helper
|
||||
import project.Internal.Read_Many_Helpers
|
||||
import project.Internal.Storage
|
||||
import project.Internal.Value_Type_Helpers
|
||||
import project.Internal.Widget_Helpers
|
||||
@ -2927,3 +2929,8 @@ apply_unary_map column:Column new_name:Text function expected_result_type:Value_
|
||||
Java_Problems.with_map_operation_problem_aggregator column.name Problem_Behavior.Report_Warning java_problem_aggregator->
|
||||
map_column = UnaryOperation.mapFunction column.java_column function nothing_unchanged storage_type new_name java_problem_aggregator
|
||||
Column.Value map_column
|
||||
|
||||
## PRIVATE
|
||||
Many_Files_List.from (that : Column) =
|
||||
Read_Many_Helpers.ensure_column_type_valid_to_be_files_list that <|
|
||||
Many_Files_List.Value that that.to_vector
|
||||
|
@ -0,0 +1,30 @@
|
||||
private
|
||||
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Read.Many_Files_List.Many_Files_List
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
import project.Column.Column
|
||||
import project.Errors.Invalid_Value_Type
|
||||
import project.Table.Table
|
||||
import project.Value_Type.Value_Type
|
||||
|
||||
find_files_list_in_table (that : Table) -> Many_Files_List =
|
||||
found_column = if that.column_count == 1 then that.at 0 else
|
||||
path_columns = that.select_columns "path" case_sensitivity=..Insensitive on_problems=..Report_Error
|
||||
not_found = path_columns.is_error || (path_columns.column_count == 0)
|
||||
if not_found then Error.throw (Illegal_Argument.Error "To use a Table as file list, it must be a single column or contain a `path` column (case insensitive).") else
|
||||
if path_columns.column_count > 1 then Error.throw (Illegal_Argument.Error "Multiple 'paths' column candidates found: "+path_columns.column_names.to_display_text+".") else
|
||||
path_columns.at 0
|
||||
ensure_column_type_valid_to_be_files_list found_column <|
|
||||
Many_Files_List.Value that found_column.to_vector
|
||||
|
||||
ensure_column_type_valid_to_be_files_list (column : Column) ~action =
|
||||
is_expected_type = case column.value_type of
|
||||
# Columns containing File objects will be Mixed
|
||||
Value_Type.Mixed -> True
|
||||
# Columns containing paths as Text will be Char
|
||||
Value_Type.Char _ _ -> True
|
||||
_ -> False
|
||||
if is_expected_type then action else
|
||||
Error.throw (Invalid_Value_Type.Column "Text or Mixed" column.value_type column.name)
|
@ -55,4 +55,3 @@ export project.Table.Table
|
||||
export project.Value_Type.Auto
|
||||
export project.Value_Type.Bits
|
||||
export project.Value_Type.Value_Type
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Array_Proxy.Array_Proxy
|
||||
import Standard.Base.Data.Filter_Condition as Filter_Condition_Module
|
||||
import Standard.Base.Data.Read.Many_Files_List.Many_Files_List
|
||||
import Standard.Base.Data.Time.Errors.Date_Time_Format_Parse_Error
|
||||
import Standard.Base.Data.Vector.No_Wrap
|
||||
import Standard.Base.Errors.Common.Additional_Warnings
|
||||
@ -47,6 +48,7 @@ import project.Internal.Lookup_Helpers
|
||||
import project.Internal.Lookup_Helpers.Lookup_Column
|
||||
import project.Internal.Parse_Values_Helper
|
||||
import project.Internal.Problem_Builder.Problem_Builder
|
||||
import project.Internal.Read_Many_Helpers
|
||||
import project.Internal.Replace_Helpers
|
||||
import project.Internal.Split_Tokenize
|
||||
import project.Internal.Table_Helpers
|
||||
@ -3881,3 +3883,7 @@ make_fill_nothing_default_widget table cache=Nothing =
|
||||
## PRIVATE
|
||||
Helper method for internal use to make a Table from a Java Table.
|
||||
from_java_table java_table = Table.Value java_table
|
||||
|
||||
## PRIVATE
|
||||
Many_Files_List.from (that : Table) =
|
||||
Read_Many_Helpers.find_files_list_in_table that
|
||||
|
@ -0,0 +1,14 @@
|
||||
package org.enso.base.read;
|
||||
|
||||
@org.openide.util.lookup.ServiceProvider(service = ReadManyReturnSPI.class)
|
||||
public class BaseReadManyReturnSPI extends ReadManyReturnSPI {
|
||||
@Override
|
||||
protected String getModuleName() {
|
||||
return "Standard.Base.Data.Read.Return_As";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getTypeName() {
|
||||
return "Return_As_Base";
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
package org.enso.base.read;
|
||||
|
||||
import java.util.ServiceLoader;
|
||||
import org.enso.base.polyglot.EnsoMeta;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
public abstract class ReadManyReturnSPI {
|
||||
private static final ServiceLoader<ReadManyReturnSPI> loader =
|
||||
ServiceLoader.load(ReadManyReturnSPI.class, ReadManyReturnSPI.class.getClassLoader());
|
||||
|
||||
public static Value[] get_types(boolean refresh) {
|
||||
if (refresh) {
|
||||
loader.reload();
|
||||
}
|
||||
return loader.stream().map(provider -> provider.get().getTypeObject()).toArray(Value[]::new);
|
||||
}
|
||||
|
||||
public Value getTypeObject() {
|
||||
return EnsoMeta.getType(getModuleName(), getTypeName());
|
||||
}
|
||||
|
||||
protected abstract String getModuleName();
|
||||
|
||||
protected abstract String getTypeName();
|
||||
}
|
@ -200,6 +200,12 @@ add_specs suite_builder =
|
||||
r = Data.read (URI.from url_get)
|
||||
r.should_be_a JS_Object
|
||||
|
||||
group_builder.specify "can use URI or Text URLs in Data.read_many" <|
|
||||
r = Data.read_many [URI.from url_get, url_get]
|
||||
r.should_be_a Vector
|
||||
r.at 0 . should_be_a JS_Object
|
||||
r.at 1 . should_be_a JS_Object
|
||||
|
||||
group_builder.specify "works if HTTP is uppercase" <| Test.with_retries <|
|
||||
r = Data.fetch (url_get.replace "http" "HTTP")
|
||||
r.should_be_a JS_Object
|
||||
|
@ -1,9 +1,12 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Vector.Map_Error
|
||||
import Standard.Base.Errors.Encoding_Error.Encoding_Error
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
|
||||
from Standard.Test import all
|
||||
|
||||
type Lazy_Ref
|
||||
Value ~get
|
||||
|
||||
add_specs suite_builder =
|
||||
sample_xxx = enso_project.data / "sample.xxx"
|
||||
@ -85,6 +88,48 @@ add_specs suite_builder =
|
||||
patterns.should_contain "*.txt"
|
||||
patterns.should_contain "*.json"
|
||||
|
||||
suite_builder.group "Data.read_many" group_builder->
|
||||
js_object = JS_Object.from_pairs [["arr", [1, 2, 3]], ["num", 42.5], ["not", Nothing]]
|
||||
js_as_text = Lazy_Ref.Value <|
|
||||
(enso_project.data / "sample.json") . read ..Plain_Text
|
||||
group_builder.specify "should allow to read a list of files and return them as vector" <|
|
||||
files = [enso_project.data / "sample.json", enso_project.data / "helloworld.txt"]
|
||||
|
||||
# Read all files using Auto_Detect - each file is read according to its inferred format.
|
||||
r1 = Data.read_many files
|
||||
r1.should_equal [js_object, "Hello World!"]
|
||||
|
||||
# Read all files using a specified format.
|
||||
r2 = Data.read_many files format=..Plain_Text
|
||||
r2.should_be_a Vector
|
||||
r2.should_equal [js_as_text.get, "Hello World!"]
|
||||
|
||||
group_builder.specify "should work with paths as Text" <|
|
||||
files = [enso_project.data / "sample.json", enso_project.data / "helloworld.txt"]
|
||||
paths = files.map .path
|
||||
r1 = Data.read_many paths return=..Vector
|
||||
r1.should_equal [js_object, "Hello World!"]
|
||||
|
||||
three_files = [enso_project.data / "sample.json", enso_project.data / "nonexistent.txt", enso_project.data / "helloworld.txt"]
|
||||
group_builder.specify "should allow to Report_Error if any file fails to load" <|
|
||||
r1 = Data.read_many three_files return=..Vector on_problems=..Report_Error
|
||||
# The error reports as File_Error
|
||||
r1.should_fail_with File_Error
|
||||
# But it's actually Map_Error with index metadata
|
||||
r1.should_fail_with unwrap_errors=False Map_Error
|
||||
r1.catch.index . should_equal 1
|
||||
r1.catch.inner_error.should_be_a File_Error.Not_Found
|
||||
|
||||
group_builder.specify "should allow to Ignore errors if any file fails to load" <|
|
||||
r1 = Data.read_many three_files return=..Vector on_problems=..Ignore
|
||||
r1.should_equal [js_object, Nothing, "Hello World!"]
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
group_builder.specify "should allow to continue loading if errors are encountered, but report them as warnings" <|
|
||||
r1 = Data.read_many three_files return=..Vector on_problems=..Report_Warning
|
||||
r1.should_equal [js_object, Nothing, "Hello World!"]
|
||||
Problems.expect_only_warning File_Error r1
|
||||
|
||||
main filter=Nothing =
|
||||
suite = Test.build suite_builder->
|
||||
add_specs suite_builder
|
||||
|
@ -11,6 +11,7 @@ import project.IO.Excel_Spec
|
||||
import project.IO.Fetch_Spec
|
||||
import project.IO.Formats_Spec
|
||||
import project.IO.Json_Spec
|
||||
import project.IO.Read_Many_Spec
|
||||
|
||||
add_specs suite_builder =
|
||||
Cloud_Spec.add_specs suite_builder
|
||||
@ -22,6 +23,7 @@ add_specs suite_builder =
|
||||
Data_Link_Formats_Spec.add_specs suite_builder
|
||||
Fetch_Spec.add_specs suite_builder
|
||||
Json_Spec.add_specs suite_builder
|
||||
Read_Many_Spec.add_specs suite_builder
|
||||
|
||||
main filter=Nothing =
|
||||
suite = Test.build suite_builder->
|
||||
|
86
test/Table_Tests/src/IO/Read_Many_Spec.enso
Normal file
86
test/Table_Tests/src/IO/Read_Many_Spec.enso
Normal file
@ -0,0 +1,86 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
from Standard.Table import all
|
||||
from Standard.Table.Errors import Invalid_Value_Type
|
||||
from Standard.Database import all
|
||||
|
||||
from Standard.Test import all
|
||||
|
||||
from project.Util import all
|
||||
|
||||
main filter=Nothing =
|
||||
suite = Test.build suite_builder->
|
||||
add_specs suite_builder
|
||||
suite.run_with_filter filter
|
||||
|
||||
type Lazy_Ref
|
||||
Value ~get
|
||||
|
||||
add_specs suite_builder =
|
||||
suite_builder.group "Data.read_many" group_builder->
|
||||
# One File and one Text path
|
||||
files_vector = [enso_project.data / "empty.txt", (enso_project.data / "sample.tsv") . path]
|
||||
sample_table = Lazy_Ref.Value <|
|
||||
(enso_project.data / "sample.tsv") . read
|
||||
check_loaded_vector v =
|
||||
v.should_be_a Vector
|
||||
v.length . should_equal 2
|
||||
v.at 0 . should_equal ""
|
||||
v.at 1 . should_equal sample_table.get
|
||||
group_builder.specify "should read files listed in a Column" <|
|
||||
column = Column.from_vector "Col" files_vector
|
||||
## TODO for next PR:
|
||||
test that if `return` is not specified, it will return as a Table when a Column is provided
|
||||
r1 = Data.read_many column return=..Vector
|
||||
check_loaded_vector r1
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
group_builder.specify "should read files listed in a single column Table" <|
|
||||
table1 = Table.new [["Some column", files_vector]]
|
||||
r1 = Data.read_many table1 return=..Vector
|
||||
# TODO like above
|
||||
check_loaded_vector r1
|
||||
|
||||
group_builder.specify "should read files listed in a Table with `path` column" <|
|
||||
table2 = Table.new [["X", [1, 2]], ["path", files_vector]]
|
||||
r2 = Data.read_many table2 return=..Vector
|
||||
# TODO like above
|
||||
check_loaded_vector r2
|
||||
Problems.assume_no_problems r2
|
||||
|
||||
# Test that this is really case insensitive
|
||||
table3 = Table.new [["X", [1, 2]], ["pAtH", files_vector]]
|
||||
r3 = Data.read_many table3 return=..Vector
|
||||
check_loaded_vector r3
|
||||
Problems.assume_no_problems r3
|
||||
|
||||
group_builder.specify "will fail if no `path` column can be found or its ambiguous" <|
|
||||
table1 = Table.new [["X", [1, 2]], ["Y", files_vector]]
|
||||
r1 = Data.read_many table1 return=..Vector
|
||||
r1.should_fail_with Illegal_Argument
|
||||
|
||||
table2 = Table.new [["X", [1, 2]], ["path", files_vector], ["Path", [3, 4]]]
|
||||
r2 = Data.read_many table2 return=..Vector
|
||||
r2.should_fail_with Illegal_Argument
|
||||
|
||||
group_builder.specify "fails if a DB Table or Column is provided, telling to materialize first to in-memory" <|
|
||||
connection = Database.connect SQLite.In_Memory
|
||||
paths_vector = files_vector.map x-> case x of
|
||||
f : File -> f.path
|
||||
p : Text -> p
|
||||
|
||||
table = (Table.new [["path", paths_vector]]).select_into_database_table connection "test_table" temporary=True
|
||||
r = Data.read_many table return=..Vector
|
||||
r.should_fail_with Illegal_Argument
|
||||
|
||||
col = table.at "path"
|
||||
r2 = Data.read_many col return=..Vector
|
||||
r2.should_fail_with Illegal_Argument
|
||||
|
||||
group_builder.specify "fails if a column of invalid type is provided" <|
|
||||
table = Table.new [["path", [1, 2]], ["X", [33, 44]]]
|
||||
|
||||
Data.read_many table . should_fail_with Invalid_Value_Type
|
||||
Data.read_many (table.at "path") . should_fail_with Invalid_Value_Type
|
||||
Data.read_many (table.select_columns ["X"]) . should_fail_with Invalid_Value_Type
|
Loading…
Reference in New Issue
Block a user