Support for Table output in Data.read_many (#11546)

- Closes #11311
This commit is contained in:
Radosław Waśko 2024-11-29 12:06:30 +00:00 committed by GitHub
parent 1476c47fa9
commit dc55b4e9ce
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
35 changed files with 1249 additions and 319 deletions

View File

@ -29,7 +29,7 @@ import project.System.File.Generic.Writable_File.Writable_File
from project.Data.Boolean import Boolean, False, True
from project.Meta.Enso_Project import enso_project
from project.Metadata.Choice import Option
from project.Metadata.Widget import Folder_Browse, Text_Input, Vector_Editor
from project.Metadata.Widget import File_Browse, Folder_Browse, Text_Input, Vector_Editor
from project.System.File_Format import Auto_Detect, File_Format
## ALIAS load, open
@ -138,14 +138,15 @@ read path=(Missing_Argument.throw "path") format=Auto_Detect (on_problems : Prob
files = Data.list name_filter="*.csv"
example_csv_dir_to_table = Data.read_many files
@paths (Vector_Editor item_editor=Text_Input item_default='""' display=..Always)
@paths (Vector_Editor item_editor=File_Browse item_default='""' display=..Always)
@format File_Format.default_widget
@return Return_As.default_widget
read_many : Many_Files_List -> File_Format -> Return_As -> Problem_Behavior -> Any ! File_Error
read_many (paths : Many_Files_List = Missing_Argument.throw "paths") format=Auto_Detect return=..Vector (on_problems : Problem_Behavior = ..Report_Warning) =
read_many (paths : Many_Files_List = Missing_Argument.throw "paths") format=Auto_Detect return=..As_Merged_Table (on_problems : Problem_Behavior = ..Report_Warning) =
return_as = Return_As.resolve return
loaded_objects = paths.paths_to_load.map on_problems=on_problems path->
Data.read path format on_problems
return_as.make_return paths loaded_objects
return_as.make_return paths loaded_objects on_problems
## ALIAS load text, open text
GROUP Input

View File

@ -5,6 +5,7 @@ import project.Data.Vector.Vector
import project.Error.Error
import project.Errors.Common.Type_Error
import project.Errors.Illegal_Argument.Illegal_Argument
import project.Errors.Problem_Behavior.Problem_Behavior
import project.Function.Function
import project.Metadata.Display
import project.Metadata.Widget
@ -32,16 +33,24 @@ type Return_As
to_display_text self -> Text = self.underlying.to_display_text
## PRIVATE
make_return self (input : Many_Files_List) (objects : Vector Any) =
self.underlying.make_return input objects
make_return self (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) =
self.underlying.make_return input objects on_problems
## PRIVATE
Resolve an unresolved constructor to the actual type.
private resolve value = case value of
_ : Function ->
# Result when we fail to resolve an autoscoped constructor.
symbol_not_found =
is_the_default = (Return_As_Table_Mock.resolve value).is_nothing.not
case is_the_default of
True ->
Error.throw (Illegal_Argument.Error "The method returns a merged Table by default, but Standard.Table is not imported. Add an import, or use `As_Vector`.")
False ->
Error.throw (Illegal_Argument.Error "Expected Return_As, but got a function.")
types = _get_known_return_classes
try_next idx =
if idx >= types.length then Error.throw (Illegal_Argument.Error "Expected Return_As, but got a function.") else
if idx >= types.length then symbol_not_found else
resolved = (types.at idx).resolve value
if resolved.is_nothing then @Tail_Call try_next (idx + 1) else resolved
try_next 0
@ -50,28 +59,37 @@ type Return_As
## PRIVATE
default_widget display:Display=..When_Modified -> Widget =
options = _get_known_return_classes.map .get_dropdown_options
options = _get_known_return_classes.flat_map .get_dropdown_options
Single_Choice display=display values=options
## PRIVATE
type Return_As_Base
## Will return a Vector of objects that were loaded.
The order of the returned Vector is the same as in the input.
Vector
As_Vector
## PRIVATE
get_dropdown_options : Vector Option
get_dropdown_options = [Option "Vector" "..Vector"]
get_dropdown_options = [Option "As Vector" "..As_Vector"]
## PRIVATE
resolve value =
Panic.catch Type_Error (value:Return_As_Base) _->Nothing
## PRIVATE
make_return self (input : Many_Files_List) (objects : Vector Any) =
_ = input
make_return self (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) =
_ = [input, on_problems]
objects
## PRIVATE
Return_As.from (that : Return_As_Base) =
Return_As.Instance that
## PRIVATE
A mock type used only to check the autoscoped `As_Merged_Table` constructor.
TODO If we get support for `Unresolved_Constructor` in `Meta`, this type may be removed.
type Return_As_Table_Mock
private As_Merged_Table columns_to_keep=Nothing match=Nothing
private resolve value =
Panic.catch Type_Error (value:Return_As_Table_Mock) _->Nothing

View File

@ -38,7 +38,7 @@ import Standard.Table.Internal.Table_Ref.Table_Ref
import Standard.Table.Internal.Unique_Name_Strategy.Unique_Name_Strategy
import Standard.Table.Internal.Value_Type_Helpers
import Standard.Table.Internal.Widget_Helpers
import Standard.Table.Match_Columns as Match_Columns_Helpers
import Standard.Table.Internal.Match_Columns_Helpers
import Standard.Table.Row.Row
import Standard.Table.Rows_To_Read.Rows_To_Read
import Standard.Table.Value_Type.By_Type
@ -2461,18 +2461,21 @@ type DB_Table
true, a single row is output with `Nothing` for the aggregates column; if
false, no row is output at all.
The following aggregate values are supported:
The following values are considered sequence-like:
- `Array`
- `Vector`
- `List`
- `Range`
- `Date_Range`
- `Pair`
- `Table`
- `Table` (interpreted as a sequence of `Row`s)
- `Column`
Any other values are treated as non-aggregate values, and their rows are kept
unchanged.
Some aggregates, like dictionaries (`Dictionary`, `JS_Object`) will
expand into two columns - one for key and one for the value.
If a value is not an aggregate, or has no defined way of being expanded,
it is left as a single row containing that value unchanged.
In in-memory tables, it is permitted to mix values of different types.

View File

@ -1,5 +1,6 @@
from Standard.Base import Text, Vector
from Standard.Base.Metadata import Display, make_single_choice, Widget
import Standard.Base.Errors.Common.Missing_Argument
## Specifies which columns to keep in a union operation.
type Columns_To_Keep
@ -19,7 +20,7 @@ type Columns_To_Keep
If a table does not have a column that is specified in the list, it is
padded with `Nothing` and a problem is reported.
In_List (column_names : Vector Text)
In_List (column_names : Vector Text = Missing_Argument.throw "column_names")
## PRIVATE
Same as `In_Any`, but it will warn about columns that are not present in

View File

@ -16,56 +16,74 @@ import project.Table.Table
type Convertible_To_Rows
## PRIVATE
Arguments:
- original_value: The original value from before conversion.
- length: The number of rows in the table.
- getter: Get the value for a specified row.
- columns: The names for the columns when object is expanded.
These will be added to the name of the input column.
Value length:Integer (getter : Integer->Any) (columns:Vector=["Value"])
- is_sequence: Whether the object is a sequence-like aggregate. This will
affect the `expand_sequences_only` option of `expand_to_rows` method.
Value original_value:Any length:Integer (getter : Integer->Any) (columns:Vector=["Value"]) (is_sequence:Boolean=True)
## PRIVATE
Return the iterator values as a `Vector`.
to_vector : Vector Any
to_vector self = 0.up_to self.length . map self.getter
## PRIVATE
Aligns this conversion depending on if we want to only expand sequences
or all kinds of objects.
If `sequence_only` is `False` or the conversion reports `is_sequence=True`, no changes are made.
However, `sequence_only` is `True` and the conversion reports `is_sequence=False`,
the conversion is replaced with a single row - same as the `Any` fallback.
align_sequence_only self sequence_only:Boolean -> Convertible_To_Rows =
if sequence_only.not then self else
if self.is_sequence then self else
_conversion_from_value_as_single_row self.original_value
## PRIVATE
Convertible_To_Rows.from that:Table =
rows = that.rows
Convertible_To_Rows.from rows
## PRIVATE
Convertible_To_Rows.from that:Column = Convertible_To_Rows.Value that.length (that.get _)
Convertible_To_Rows.from that:Column = Convertible_To_Rows.Value that that.length (that.get _)
## PRIVATE
Convertible_To_Rows.from that:Vector = Convertible_To_Rows.Value that.length (that.get _)
Convertible_To_Rows.from that:Vector = Convertible_To_Rows.Value that that.length (that.get _)
## PRIVATE
Convertible_To_Rows.from that:Array = Convertible_To_Rows.Value that.length (that.get _)
Convertible_To_Rows.from that:Array = Convertible_To_Rows.Value that that.length (that.get _)
## PRIVATE
Convertible_To_Rows.from that:List = Convertible_To_Rows.from that.to_vector
## PRIVATE
Convertible_To_Rows.from that:Range = Convertible_To_Rows.Value that.length (that.get _)
Convertible_To_Rows.from that:Range = Convertible_To_Rows.Value that that.length (that.get _)
## PRIVATE
Convertible_To_Rows.from that:Pair = Convertible_To_Rows.Value that.length (that.get _)
Convertible_To_Rows.from that:Pair = Convertible_To_Rows.Value that that.length (that.get _)
## PRIVATE
Convertible_To_Rows.from that:Date_Range = Convertible_To_Rows.Value that.length (that.get _)
Convertible_To_Rows.from that:Date_Range = Convertible_To_Rows.Value that that.length (that.get _)
## PRIVATE
Convertible_To_Rows.from that:Dictionary =
vals = that.to_vector.map p-> Key_Value.Pair p.first p.second
Convertible_To_Rows.Value vals.length (vals.get _) ["Key", "Value"]
Convertible_To_Rows.Value that vals.length (vals.get _) ["Key", "Value"] is_sequence=False
## PRIVATE
Convertible_To_Rows.from that:JS_Object =
vals = that.map_with_key k->v-> Key_Value.Pair k v
Convertible_To_Rows.Value vals.length (vals.get _) ["Key", "Value"]
Convertible_To_Rows.Value that vals.length (vals.get _) ["Key", "Value"] is_sequence=False
## PRIVATE
Convertible_To_Rows.from (that:Any) =
Convertible_To_Rows.Value 1 (n-> if n==0 then that else Nothing)
_conversion_from_value_as_single_row that
private _conversion_from_value_as_single_row value =
Convertible_To_Rows.Value value 1 (n-> if n==0 then value else Nothing) is_sequence=False
## PRIVATE
type Key_Value

View File

@ -509,7 +509,7 @@ type No_Common_Type
Create a human-readable version of the error.
to_display_text : Text
to_display_text self =
types = self.types.map .to_display_text . join ", "
types = self.types.distinct.map .to_display_text . join ", "
prefix = "No common type was found for types: "+types
location = case self.related_column_name of
column_name : Text -> " when unifying column ["+column_name+"]"
@ -517,8 +517,7 @@ type No_Common_Type
suffix_type = case self of
No_Common_Type.Error _ _ -> "."
No_Common_Type.Warning_Convert_To_Text _ _ -> ", so the values were converted to text."
suffix_mixed = " If you want to have mixed types instead, please cast one of the columns to `Mixed` beforehand."
prefix + location + suffix_type + suffix_mixed
prefix + location + suffix_type
## PRIVATE
to_text self -> Text =
@ -889,3 +888,12 @@ type Mixing_Date_Time_Types
"Mixing Date and Date_Time values"+location+": the Date values have been automatically converted to Date_Time by adding a time of 00:00 in the default time-zone."
Mixing_Date_Time_Types.Implicit_Time_Zone _ ->
"Mixing Date_Time values with and without timezone"+location+". A default timezone has been assumed where it was missing."
## Indicates that a table with no rows has been returned.
type No_Rows
## PRIVATE
The message should be used to provide additional context.
private Warning message:Text
## PRIVATE
to_display_text self -> Text = self.message

View File

@ -1,4 +1,6 @@
from Standard.Base import all
import Standard.Base.Data.Vector.No_Wrap
import Standard.Base.Errors.Common.Type_Error
import Standard.Base.Errors.Deprecated.Deprecated
import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
@ -13,18 +15,23 @@ from Standard.Base.Data.Filter_Condition import sql_like_to_regex
from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Multiple_Choice, Single_Choice
import project.Column.Column
import project.Columns_To_Keep.Columns_To_Keep
import project.Excel.Excel_Range.Excel_Range
import project.Headers.Headers
import project.Internal.Excel_Reader
import project.Internal.Excel_Section.Excel_Section
import project.Internal.Read_Many_As_Merged_Table_Strategy.Read_Many_As_Merged_Table_Strategy
import project.Internal.Read_Many_As_Merged_Table_Strategy.Read_Many_As_Table_Result
import project.Internal.Java_Problems
import project.Internal.Problem_Builder.Problem_Builder
import project.Match_Columns.Match_Columns
import project.Return_As_Table.Return_As_Table
import project.Rows_To_Read.Rows_To_Read
import project.Table.Table
from project.Errors import Empty_Sheet
from project.Errors import Empty_Sheet, No_Rows
from project.Internal.Excel_Reader import handle_invalid_location
from project.Internal.Table_Helpers import duplicate_rows
polyglot java import java.io.File as Java_File
polyglot java import org.apache.poi.ss.usermodel.Workbook
@ -312,6 +319,11 @@ type Excel_Workbook
_ -> ""
"Excel_Workbook"+associated_regular_file
## PRIVATE
Returns a short human-readable text representation of the workbook.
to_display_text : Text
to_display_text self = self.to_text
## PRIVATE
Provides a JS object representation for use in visualizations.
to_js_object : JS_Object
@ -342,30 +354,29 @@ type Excel_Workbook
problem.
- headers: whether to use the first row as headers (default is
`Detect_Headers`) for each sheet.
- return: how to return the tables. Defaults to `Merged_Table` meaning
- return: how to return the tables. Defaults to `As_Merged_Table` meaning
the tables are merged into a single table.
- on_problems: how to handle problems during reading. Defaults to
`Report_Warning`.
@sheet_names build_sheet_selector
read_many : Vector Text -> Headers -> Return_As -> Problem_Behavior -> Table
read_many self sheet_names:Vector=self.sheet_names (headers:Headers=..Detect_Headers) (return:Return_As=..Merged_Table) (on_problems:Problem_Behavior=..Report_Warning) =
read_many : Vector Text -> Headers -> Return_As_Table -> Problem_Behavior -> Table
read_many self sheet_names:Vector=self.sheet_names (headers:Headers=..Detect_Headers) (return=..As_Merged_Table) (on_problems:Problem_Behavior=..Report_Warning) =
resolved_return = _resolve_return_as_compatibility return
if sheet_names.is_empty then Error.throw (Illegal_Argument.Error "No sheets selected.") else
tables = sheet_names.map on_problems=on_problems address-> self.read address headers on_problems=on_problems
case return of
Return_As.Table_Of_Tables -> Table.new [["Sheet Name", sheet_names], ["Table", tables]]
Return_As.Merged_Table columns_to_keep match ->
first_tbl = tables.find t-> t != Nothing
if first_tbl == Nothing then Error.throw (Illegal_Argument.Error "No valid sheets found.") else
unique = first_tbl.column_naming_helper.create_unique_name_strategy
tables.each tbl-> if tbl != Nothing then unique.mark_used tbl.column_names
new_column_name = unique.make_unique "Sheet Name"
with_names = tables.zip sheet_names tbl->name-> if tbl == Nothing then Nothing else tbl.set name new_column_name . reorder_columns [new_column_name]
result = Table.from_union (with_names.filter Filter_Condition.Not_Nothing) columns_to_keep=columns_to_keep match_columns=match
problem_builder = Problem_Builder.new
problem_builder.report_unique_name_strategy unique
problem_builder.attach_problems_after on_problems result
case resolved_return of
Return_As_Table.With_New_Column ->
tables_or_nothing = sheet_names.map on_problems=on_problems address->
self.read address headers on_problems=on_problems
Table.new [["Sheet Name", sheet_names], ["Table", tables_or_nothing]]
Return_As_Table.As_Merged_Table columns_to_keep match ->
names_and_tables = _read_sheets self sheet_names headers on_problems
only_valid = names_and_tables.filter p-> p.second.is_nothing.not
no_valid_sheets = only_valid.is_empty
if no_valid_sheets then Error.throw (Illegal_Argument.Error "No valid sheets found.") else
valid_names = only_valid.map .first
valid_tables = only_valid.map .second
unified_tables = Table.from_union valid_tables columns_to_keep=columns_to_keep match_columns=match on_problems=on_problems
(_sheet_names_table valid_names valid_tables).zip unified_tables right_prefix="" on_problems=on_problems
## PRIVATE
close_connection c = c.close
@ -375,17 +386,80 @@ build_sheet_selector workbook:Excel_Workbook display:Display=Display.Always -> W
names = workbook.sheet_names.map n-> Option n n.pretty
Multiple_Choice display=display values=names
## How to merge sheets into a single table.
## PRIVATE
DEPRECATED
How to merge sheets into a single table.
This type is deprecated and has been superseded by `Return_As_Table`.
It is kept only for compatibility reasons.
type Return_As
## Each sheet is returned as a row.
## DEPRECATED
Each sheet is returned as a row.
This option is deprecated, because it has been renamed. Please use the
equivalent `With_New_Column` instead.
Table_Of_Tables
## All sheets are merged into a single table. A union operation is performed.
## DEPRECATED
All sheets are merged into a single table. A union operation is performed.
This option is deprecated, because it has been renamed. Please use the
equivalent `As_Merged_Table` instead.
Merged_Table (columns_to_keep : Columns_To_Keep = Columns_To_Keep.In_Any) (match : Match_Columns = Match_Columns.By_Name)
## PRIVATE
Creates a query widget for the `read` method.
## A helper method that ensures that we can take the new `Return_As_Table` type
as well as the deprecated `Return_As` type, together with autoscoping.
With autoscoping, the `As_Merged_Table` constructor is ambiguous, so plain
autoscoping would not work and we do some workarounds to make it work.
In case of the ambiguity, the new-style `Return_As_Table.As_Merged_Table`
variant is preferred.
private _resolve_return_as_compatibility return =
Panic.catch Type_Error (return:Return_As_Table) (_->Nothing) . if_nothing <|
old_style = Panic.catch Type_Error (return:Return_As) (_->Nothing)
if old_style.is_nothing then Error.throw (Illegal_Argument.Error "Valid values for `return` are: `..With_New_Column` or `..As_Merged_Table`. Instead, got `"+return.to_display_text+"`.") else
case old_style of
Return_As.Table_Of_Tables ->
Warning.attach (Deprecated.Warning "Standard.Table.Excel.Excel_Workbook.Return_As" "Table_Of_Tables" "Deprecated: use `..With_New_Column` instead.") <|
Return_As_Table.With_New_Column
Return_As.Merged_Table columns_to_keep match ->
Warning.attach (Deprecated.Warning "Standard.Table.Excel.Excel_Workbook.Return_As" "Merged_Table" "Deprecated: use `..As_Merged_Table` instead.") <|
Return_As_Table.As_Merged_Table columns_to_keep match
## Creates a query widget for the `read` method.
private _query_widget wb:Excel_Workbook display:Display=..Always -> Widget =
default_address = "'" + wb.sheet_names.first.replace "'" "''" + "'!A1:E5"
options = [Option "<By Index>" "1", Option "<Range>" "Excel_Range.from_address "+default_address.pretty] + (wb.tables.at "Name" . to_vector . map t-> Option t t.pretty)
Single_Choice display=display values=options
private _read_sheets workbook:Excel_Workbook (sheet_names : Vector Text) headers:Headers on_problems:Problem_Behavior -> Vector (Pair Text (Table | Nothing)) =
sheet_names.map on_problems=No_Wrap.Value address->
table = workbook.read address headers on_problems=on_problems
r = Pair.new address table
# If in Report_Error mode, we forward the original error without wrapping, otherwise we wrap any warnings.
if on_problems == Problem_Behavior.Report_Error then r else
r.catch Any error->
on_problems.attach_problem_after (Pair.new address Nothing) <|
(No_Rows.Warning "The sheet "+address.to_display_text+" failed to load, so it is not included in the `As_Merged_Table` result of `read_many`. The error was: "+error.to_display_text)
private _sheet_names_table (sheet_names : Vector Text) (loaded_tables : Vector Table) -> Table =
table = Column.from_vector "Sheet Name" sheet_names . to_table
counts = loaded_tables.map t-> t.row_count
duplicate_rows table counts
## PRIVATE
Loads all sheets in the Excel workbook and merges them into a single table.
Read_Many_As_Merged_Table_Strategy.from (that:Excel_Workbook) =
callback path columns_to_keep match_columns on_problems =
tables = _read_sheets that that.sheet_names ..Detect_Headers on_problems
only_valid = tables.filter p-> p.second.is_nothing.not
case only_valid.is_empty of
True ->
on_problems.attach_problem_after Read_Many_As_Table_Result.No_Data <|
(No_Rows.Warning "The workbook "+path.to_display_text+" failed to load any sheets, so it is not included in the `As_Merged_Table` result of `read_many`.")
False ->
sheet_names = only_valid.map .first
valid_tables = only_valid.map .second
metadata = _sheet_names_table sheet_names valid_tables
data = Table.from_union valid_tables columns_to_keep match_columns on_problems
Read_Many_As_Table_Result.Table metadata=metadata data=data
Read_Many_As_Merged_Table_Strategy.Value callback

View File

@ -95,9 +95,9 @@ XML_Element.to_table self =
headers = Examples.simple_table_json_headers
Table.from_objects json headers
@fields (Widget.Vector_Editor item_editor=Widget.Text_Input item_default='""')
Table.from_objects : Any -> Vector | Nothing -> Table
Table.from_objects : Any -> Vector | Nothing -> Boolean -> Table
Table.from_objects value (fields : Vector | Nothing = Nothing) =
Expand_Objects_Helpers.create_table_from_objects value fields
Expand_Objects_Helpers.create_table_from_objects value fields treat_dictionary_as_sequence=False
## GROUP Standard.Base.Conversions
ICON convert

View File

@ -1,3 +1,5 @@
private
from Standard.Base import all
import Standard.Base.Data.Vector.Builder
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
@ -79,20 +81,27 @@ expand_column (table : Table) (column : Text | Integer) (fields : (Vector Text)
table = Table.new [["aaa", [1, 2]], ["bbb", [[30, 31], [40, 41]]]]
# => Table.new [["aaa", [1, 1, 2, 2]], ["bbb", [30, 31, 40, 41]]]
expand_to_rows : Table -> Text | Integer -> Boolean -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
expand_to_rows table column:(Text | Integer) at_least_one_row=False = if column.is_a Integer then expand_to_rows table (table.at column).name at_least_one_row else
expand_to_rows table column:(Text | Integer) at_least_one_row=False sequences_only=False = if column.is_a Integer then expand_to_rows table (table.at column).name at_least_one_row else
row_expander : Any -> Vector
row_expander value:Convertible_To_Rows = value.to_vector
row_expander value:Convertible_To_Rows =
value
. align_sequence_only sequences_only
. to_vector
column_names : Any -> Vector
column_names value:Convertible_To_Rows = value.columns.map name-> if name=="Value" then column else column+" "+name
column_names value:Convertible_To_Rows =
value
. align_sequence_only sequences_only
. columns
. map name-> if name=="Value" then column else column+" "+name
Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
builder size = make_inferred_builder size java_problem_aggregator
Fan_Out.fan_out_to_rows table column row_expander column_names at_least_one_row column_builder=builder
## PRIVATE
create_table_from_objects : Convertible_To_Rows -> (Vector Text | Nothing) -> Table
create_table_from_objects (value : Convertible_To_Rows) (fields : Vector | Nothing) = if fields.is_nothing.not && fields.is_empty then Error.throw (Illegal_Argument.Error "The fields parameter cannot be empty.") else
create_table_from_objects (base_value : Convertible_To_Rows) (fields : Vector | Nothing) (treat_dictionary_as_sequence : Boolean = False) -> Table = if fields.is_nothing.not && fields.is_empty then Error.throw (Illegal_Argument.Error "The fields parameter cannot be empty.") else
expand_sequences_only = treat_dictionary_as_sequence.not
value = base_value.align_sequence_only expand_sequences_only
len = value.length
Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->

View File

@ -0,0 +1,149 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Runtime.State
from Standard.Base.Runtime import assert
import project.Columns_To_Keep.Columns_To_Keep
import project.Constants.Report_Unmatched
import project.Match_Columns.Match_Columns
from project.Errors import Column_Count_Mismatch, No_Output_Columns, Unmatched_Columns
## PRIVATE
A helper that encapsulates the common backend-agnostic logic of matching
columns in `Table.union`.
It matches columns according to the provided matching settings and returns a
list of column sets to be merged.
Each column set consists of a name of the resulting column and a list of
indices for columns in corresponding tables that will be merged to form this
result column. The first column index corresponds to the first table in the
input and so on. If no column corresponding to a given column set was matched
in a particular table, its entry will be contain `Nothing` instead.
The column sets are returned in the order in which the corresponding result
columns should appear in the resulting table.
The method assumes at least one table is provided in its input.
match_columns tables matching_mode columns_to_keep problem_builder =
assert tables.not_empty
case matching_mode of
Match_Columns.By_Name -> _match_columns_by_name tables columns_to_keep problem_builder
Match_Columns.By_Position -> _match_columns_by_position tables columns_to_keep problem_builder
## PRIVATE
private _match_columns_by_name tables columns_to_keep problem_builder = case columns_to_keep of
Columns_To_Keep.In_List list -> if list.is_empty then Error.throw (Illegal_Argument.Error "The list of columns to keep cannot be empty.") else
output_column_names = list.distinct
column_counts = _find_column_counts tables
all_tables_count = tables.length
unmatched_column_names = output_column_names.filter name->
column_counts.get name 0 < all_tables_count
if unmatched_column_names.not_empty then
problem_builder.report_other_warning (Unmatched_Columns.Error unmatched_column_names)
_build_column_set_by_name tables output_column_names
Columns_To_Keep.In_All ->
column_counts = _find_column_counts tables
# This will only include columns that were present in all tables.
all_tables_count = tables.length
common_column_names = tables.first.column_names.filter name->
column_counts.at name == all_tables_count
if common_column_names.is_empty then Error.throw (No_Output_Columns.Error "Unmatched columns are set to be dropped, but no common column names were found.") else
dropped_column_names = tables.map .column_names
. flatten
. filter (name-> column_counts.at name < all_tables_count)
. distinct
if dropped_column_names.not_empty then
problem_builder.report_other_warning (Unmatched_Columns.Error dropped_column_names)
_build_column_set_by_name tables common_column_names
_ ->
output_column_names = _distinct_columns_in_appearance_order tables
report_missing = case columns_to_keep of
Columns_To_Keep.In_Any -> False
Columns_To_Keep.In_Any_Warn_On_Missing -> True
if report_missing then
column_counts = _find_column_counts tables
all_tables_count = tables.length
## We iterate over output column names to get deterministic
order of unmatched columns.
unmatched_column_names = output_column_names.filter name->
column_counts.get name 0 < all_tables_count
if unmatched_column_names.not_empty then
problem_builder.report_other_warning (Unmatched_Columns.Error unmatched_column_names)
_build_column_set_by_name tables output_column_names
## Common logic for computing the final result of by-name matching.
Once the set of output column names is determined, we compute the
`Column_Set` by finding the corresponding column indices in each table (if found).
private _build_column_set_by_name tables output_column_names =
output_column_names.map name->
column_indices = tables.map table->
# TODO this gets O(N^2), we should optimize
table.column_names.index_of name
Column_Set.Value name column_indices
private _match_columns_by_position tables columns_to_keep problem_builder = case columns_to_keep of
Columns_To_Keep.In_List _ ->
Error.throw (Illegal_Argument.Error "The In_List option for `columns_to_keep` cannot be used together with `By_Position` matching.")
_ ->
column_counts = tables.map table-> table.columns.length
minmax = column_counts.compute_bulk [Statistic.Minimum, Statistic.Maximum]
min = minmax.first
max = minmax.second
columns_to_take = case columns_to_keep of
Columns_To_Keep.In_All -> min
Columns_To_Keep.In_Any -> max
Columns_To_Keep.In_Any_Warn_On_Missing -> max
has_unmatched_columns = min != max
if has_unmatched_columns then
should_report_unmatched = case columns_to_keep of
Columns_To_Keep.In_All -> True
Columns_To_Keep.In_Any -> False
Columns_To_Keep.In_Any_Warn_On_Missing -> True
# TODO should we rephrase the wording of the error? should it depend on In_Any_Warn_On_Missing vs In_All?
if should_report_unmatched then
problem_builder.report_other_warning (Column_Count_Mismatch.Error max min)
name_source = case columns_to_keep of
Columns_To_Keep.In_All -> tables.first
_ ->
# We find the first table that has all the columns present.
tables.find table-> table.columns.length == columns_to_take
column_sets = Vector.new columns_to_take i->
name = name_source.at i . name
column_ids = tables.map table->
column_count = table.columns.length
if i >= column_count then Nothing else i
Column_Set.Value name column_ids
column_sets
## PRIVATE
type Column_Set
## PRIVATE
Value (name : Text) (column_indices : Vector Integer)
private resolve_columns self (all_tables : Vector) = self.column_indices.zip all_tables i-> parent_table->
case i of
Nothing -> Nothing
_ : Integer -> parent_table.at i
## Returns a map indicating in how many tables did a column with a given name appear.
private _find_column_counts tables =
tables.fold Dictionary.empty current->table->
table.columns.fold current counts-> column->
name=column.name
new_count = counts.get name 0 + 1
counts.insert name new_count
## PRIVATE
Returns a list of distinct column names, in the order of first appearance,
starting from the first table.
private _distinct_columns_in_appearance_order tables =
Vector.build names_builder->
tables.fold Dictionary.empty current-> table->
table.columns.fold current seen_names-> column->
name = column.name
if seen_names.contains_key name then seen_names else
names_builder.append name
seen_names.insert name True

View File

@ -0,0 +1,56 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import project.Columns_To_Keep.Columns_To_Keep
import project.Errors.No_Rows
import project.Extensions.Table_Conversions
import project.Match_Columns.Match_Columns
import project.Table.Table
## PRIVATE
Determines how an object should be converted into a table for merging during
a `read_many` operation with `As_Merged_Table` return mode.
type Read_Many_As_Merged_Table_Strategy
## PRIVATE
Value (into_table : Any -> Columns_To_Keep -> Match_Columns -> Problem_Behavior -> Read_Many_As_Table_Result)
## PRIVATE
type Read_Many_As_Table_Result
## PRIVATE
Table metadata:Nothing|Table data:Table
## PRIVATE
No_Data
## PRIVATE
Read_Many_As_Merged_Table_Strategy.from (that : Table) =
callback path _ _ on_problems =
table = if that.row_count > 0 then that else
on_problems.attach_problem_after that <|
(No_Rows.Warning "The "+path.to_display_text+" loaded as a table with 0 rows, so it did not contribute any rows to the `As_Merged_Table` result of `read_many`.")
Read_Many_As_Table_Result.Table metadata=Nothing data=table
Read_Many_As_Merged_Table_Strategy.Value callback
## PRIVATE
Read_Many_As_Merged_Table_Strategy.from (that : Vector) =
callback path _ _ on_problems =
if that.not_empty then Read_Many_As_Table_Result.Table metadata=Nothing data=(_interpret_as_table that path on_problems) else
on_problems.attach_problem_after Read_Many_As_Table_Result.No_Data <|
(No_Rows.Warning "The "+path.to_display_text+" loaded as an empty array, so it is not included in the `As_Merged_Table` result of `read_many`.")
Read_Many_As_Merged_Table_Strategy.Value callback
## PRIVATE
The fallback strategy for converting a generic object into a table.
Custom data types may implement a conversion to override this strategy, like above.
Read_Many_As_Merged_Table_Strategy.from (that : Any) =
callback path _ _ on_problems =
Read_Many_As_Table_Result.Table metadata=Nothing data=(_interpret_as_table that path on_problems)
Read_Many_As_Merged_Table_Strategy.Value callback
private _interpret_as_table (object : Any) path on_problems =
Table.from_objects object . catch Illegal_Argument error->
problem = Illegal_Argument.Error "Problem converting "+object.to_display_text+" (loaded from "+path.to_display_text+") to a Table: "+error.to_display_text cause=error
on_problems.attach_problem_before problem <|
# If it was an array that failed to load, let's expand it to rows, otherwise have 1 row for the unknown object.
vec = if object.is_a Vector then object else [object]
Table.new [["Value", vec]]

View File

@ -6,8 +6,13 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import project.Column.Column
import project.Errors.Invalid_Value_Type
import project.Internal.Column_Naming_Helper.Column_Naming_Helper
import project.Internal.Read_Many_As_Merged_Table_Strategy.Read_Many_As_Merged_Table_Strategy
import project.Internal.Read_Many_As_Merged_Table_Strategy.Read_Many_As_Table_Result
import project.Return_As_Table.Return_As_Table
import project.Table.Table
import project.Value_Type.Value_Type
from project.Internal.Table_Helpers import duplicate_rows
find_files_list_in_table (that : Table) -> Many_Files_List =
found_column = if that.column_count == 1 then that.at 0 else
@ -28,3 +33,83 @@ ensure_column_type_valid_to_be_files_list (column : Column) ~action =
_ -> False
if is_expected_type then action else
Error.throw (Invalid_Value_Type.Column "Text or Mixed" column.value_type column.name)
make_return (return_shape : Return_As_Table) (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) -> Table =
base_table = _input_as_table input
case return_shape of
Return_As_Table.With_New_Column ->
_add_objects_column base_table objects
Return_As_Table.As_Merged_Table columns_to_keep match_columns ->
tables = input.paths_to_load.zip objects path-> object->
strategy = Read_Many_As_Merged_Table_Strategy.from object
strategy.into_table path columns_to_keep match_columns on_problems
_merge_input_and_tables base_table tables columns_to_keep match_columns on_problems
_input_as_table (input : Many_Files_List) -> Table =
case input.original_value of
table : Table -> table
column : Column -> column.to_table
## Fallback - any unknown input shape is treated the same as
Vector input - we just extract the list of files from it
_ ->
Table.new [["Path", input.paths_to_load]]
## Expands each table from `tables_for_rows` and computes their union, also
duplicating the corresponding rows from `input_table`.
_merge_input_and_tables (input_table : Table) (tables_for_rows : Vector Read_Many_As_Table_Result) columns_to_keep match_columns on_problems -> Table =
counts = tables_for_rows.map table-> case table of
Read_Many_As_Table_Result.No_Data -> 0
Read_Many_As_Table_Result.Table _ data -> data.row_count
adapted_tables = _inherit_warnings_from_vector tables_for_rows <|
tables_for_rows.filter r-> r != Read_Many_As_Table_Result.No_Data
## If there are no tables to merge, union would fail.
Each entry is then contributing 0 rows, so we need to return an empty table.
But it should have the same structure as the input table.
if adapted_tables.is_empty then (input_table.take 0) else
unified_metadata = _unify_metadata adapted_tables on_problems
unified_data = Table.from_union (adapted_tables.map .data) columns_to_keep match_columns on_problems
multiplicated_inputs = duplicate_rows input_table counts
Runtime.assert (unified_data.row_count == multiplicated_inputs.row_count)
Runtime.assert (unified_metadata.is_nothing || (unified_metadata.row_count == unified_data.row_count))
first_pass = if unified_metadata.is_nothing then multiplicated_inputs else
multiplicated_inputs.zip unified_metadata right_prefix=""
first_pass.zip unified_data right_prefix=""
## Unifies provided metadata tables, knowing that some tables may have no
metadata - in such case we want to insert as many Nothing rows for metadata
as there are rows in the corresponding data table.
_unify_metadata (tables : Vector Read_Many_As_Table_Result) (on_problems : Problem_Behavior) -> Table | Nothing =
has_no_metadata = tables.all r-> r.metadata.is_nothing
if has_no_metadata then Nothing else
unique = Column_Naming_Helper.in_memory.create_unique_name_strategy
tables.each r->
if r.metadata.is_nothing.not then unique.mark_used r.metadata.column_names
# A dummy column because we cannot create a table with 0 columns, it will be removed after union. We find an unique name for it to avoid conflicts.
dummy_column_name = unique.make_unique "_Internal_Placeholder_Column_"
tables_for_union = tables.map r->
if r.metadata.is_nothing.not then r.metadata else
Table.new [Column.from_repeated_item dummy_column_name Nothing r.data.row_count]
# Metadata are always merged by-name and columns that appear only in some tables are kept.
unified_metadata = Table.from_union tables_for_union ..In_Any ..By_Name on_problems
unified_metadata.remove_columns [dummy_column_name] on_problems=..Ignore
_make_table_with_one_null_row (empty_table : Table) -> Table =
Runtime.assert (empty_table.row_count == 0) "The table must be empty."
Table.new (empty_table.columns.map c-> Column.from_vector c.name [Nothing] c.value_type)
_add_objects_column (base_table : Table) (objects : Vector Any) -> Table =
unique_naming = base_table.column_naming_helper.create_unique_name_strategy
unique_naming.mark_used base_table.column_names
objects_column_name = unique_naming.make_unique "Value"
base_table.set (Column.from_vector objects_column_name objects) as=objects_column_name set_mode=..Add
## Workaround for bug https://github.com/enso-org/enso/issues/11570
TODO: Remove workaround one #11570 is closed.
private _inherit_warnings_from_vector vector:Vector ~action =
result = action
Warning.set result (Warning.get_all vector + Warning.get_all result)

View File

@ -10,7 +10,7 @@ import project.Expression.Expression
import project.Internal.Column_Naming_Helper.Column_Naming_Helper
import project.Internal.Problem_Builder.Problem_Builder
import project.Internal.Value_Type_Helpers
import project.Match_Columns.Column_Set
import project.Internal.Match_Columns_Helpers.Column_Set
import project.Position.Position
import project.Set_Mode.Set_Mode
import project.Sort_Column.Sort_Column
@ -18,8 +18,10 @@ import project.Table.Table
import project.Value_Type.By_Type
import project.Value_Type.Value_Type
from project.Errors import Ambiguous_Column_Rename, Column_Type_Mismatch, Invalid_Aggregate_Column, Missing_Input_Columns, No_Common_Type, No_Input_Columns_Selected, No_Output_Columns, No_Such_Column, Too_Many_Column_Names_Provided
from project.Table import from_java_table
polyglot java import java.util.HashSet
polyglot java import org.enso.table.data.mask.OrderMask
type Table_Column_Helper
## PRIVATE
@ -584,3 +586,15 @@ replace_columns_with_transformed_columns table selectors transformer error_on_mi
columns = internal_columns.map table.columns_helper.make_column
new_columns = columns.map on_problems=No_Wrap.Value transformer
replace_columns_with_columns table columns new_columns
## Takes a table and a list of counts and returns a new table with the rows
duplicated according to the counts.
The vector should have the same number of elements as the number of rows in the table.
duplicate_rows (table : Table) (counts : Vector Integer) -> Table =
Runtime.assert (table.row_count == counts.length) "The number of counts ("+counts.length.to_text+") must match the number of rows in the table ("+table.row_count.to_text+")."
mask_array = counts
. map_with_index ix-> count-> Vector.fill count ix
. flatten
mask = OrderMask.fromArray mask_array
from_java_table (table.java_table.applyMask mask)

View File

@ -0,0 +1,52 @@
private
from Standard.Base import all
import project.Column.Column
import project.Columns_To_Keep.Columns_To_Keep
import project.Internal.Java_Problems
import project.Internal.Match_Columns_Helpers
import project.Internal.Problem_Builder.Problem_Builder
import project.Internal.Table_Helpers
import project.Internal.Table_Helpers.Union_Result_Type
import project.Match_Columns.Match_Columns
import project.Table.Table
import project.Value_Type.Value_Type
from project.Column import make_storage_builder_for_type
## The implementation of common logic that is used by `Table.from_union`.
The caller should ensure that `tables` contains only `Table` instances or `Nothing`.
Each `Nothing` is replaced by a single all-nothing row, not introducing any new columns.
The `tables` vector must contain at least one non-nothing entry.
make_union (tables : Vector) (columns_to_keep : Columns_To_Keep) (match_columns : Match_Columns) (on_problems : Problem_Behavior) =
problem_builder = Problem_Builder.new
matched_column_sets = Match_Columns_Helpers.match_columns tables match_columns columns_to_keep problem_builder
result_row_count = tables.fold 0 c-> t-> c + t.row_count
merged_columns = matched_column_sets.map column_set->
case Table_Helpers.unify_result_type_for_union column_set tables problem_builder of
Union_Result_Type.Common_Type common_type ->
_concat_columns column_set tables common_type result_row_count needs_cast=False on_problems
Union_Result_Type.Fallback_To_Text ->
_concat_columns column_set tables Value_Type.Char result_row_count needs_cast=True on_problems
Union_Result_Type.No_Types_To_Unify ->
Column.from_repeated_item column_set.name Nothing result_row_count
if merged_columns.is_empty then problem_builder.raise_no_output_columns_with_cause else
problem_builder.attach_problems_before on_problems <|
Table.new merged_columns
## A helper that efficiently concatenates storages of in-memory columns.
private _concat_columns column_set tables result_type result_row_count needs_cast on_problems =
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
storage_builder = make_storage_builder_for_type result_type on_problems initial_size=result_row_count java_problem_aggregator
column_set.column_indices.zip tables i-> parent_table->
case i of
Nothing ->
null_row_count = parent_table.row_count
storage_builder.appendNulls null_row_count
_ : Integer ->
column = parent_table.at i
converted = if needs_cast then column.cast result_type on_problems=..Report_Error else column
storage = converted.java_column.getStorage
storage_builder.appendBulkStorage storage
sealed_storage = storage_builder.seal
Column.from_storage column_set.name sealed_storage

View File

@ -1,12 +1,3 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Runtime.State
from Standard.Base.Runtime import assert
import project.Columns_To_Keep.Columns_To_Keep
import project.Constants.Report_Unmatched
from project.Errors import Column_Count_Mismatch, No_Output_Columns, Unmatched_Columns
## Specifies a column matching strategy.
type Match_Columns
## Columns are matched by Name.
@ -16,146 +7,3 @@ type Match_Columns
Note: column names are not compared.
By_Position
## PRIVATE
A helper that encapsulates the common backend-agnostic logic of matching
columns in `Table.union`.
It matches columns according to the provided matching settings and returns a
list of column sets to be merged.
Each column set consists of a name of the resulting column and a list of
indices for columns in corresponding tables that will be merged to form this
result column. The first column index corresponds to the first table in the
input and so on. If no column corresponding to a given column set was matched
in a particular table, its entry will be contain `Nothing` instead.
The column sets are returned in the order in which the corresponding result
columns should appear in the resulting table.
The method assumes at least one table is provided in its input.
match_columns tables matching_mode columns_to_keep problem_builder =
assert tables.not_empty
case matching_mode of
Match_Columns.By_Name -> match_columns_by_name tables columns_to_keep problem_builder
Match_Columns.By_Position -> match_columns_by_position tables columns_to_keep problem_builder
## PRIVATE
match_columns_by_name tables columns_to_keep problem_builder = case columns_to_keep of
Columns_To_Keep.In_List list -> if list.is_empty then Error.throw (Illegal_Argument.Error "The list of columns to keep cannot be empty.") else
output_column_names = list.distinct
column_counts = find_column_counts tables
all_tables_count = tables.length
unmatched_column_names = output_column_names.filter name->
column_counts.get name 0 < all_tables_count
if unmatched_column_names.not_empty then
problem_builder.report_other_warning (Unmatched_Columns.Error unmatched_column_names)
build_column_set_by_name tables output_column_names
Columns_To_Keep.In_All ->
column_counts = find_column_counts tables
# This will only include columns that were present in all tables.
all_tables_count = tables.length
common_column_names = tables.first.column_names.filter name->
column_counts.at name == all_tables_count
if common_column_names.is_empty then Error.throw (No_Output_Columns.Error "Unmatched columns are set to be dropped, but no common column names were found.") else
dropped_column_names = tables.map .column_names
. flatten
. filter (name-> column_counts.at name < all_tables_count)
. distinct
if dropped_column_names.not_empty then
problem_builder.report_other_warning (Unmatched_Columns.Error dropped_column_names)
build_column_set_by_name tables common_column_names
_ ->
output_column_names = distinct_columns_in_appearance_order tables
report_missing = case columns_to_keep of
Columns_To_Keep.In_Any -> False
Columns_To_Keep.In_Any_Warn_On_Missing -> True
if report_missing then
column_counts = find_column_counts tables
all_tables_count = tables.length
## We iterate over output column names to get deterministic
order of unmatched columns.
unmatched_column_names = output_column_names.filter name->
column_counts.get name 0 < all_tables_count
if unmatched_column_names.not_empty then
problem_builder.report_other_warning (Unmatched_Columns.Error unmatched_column_names)
build_column_set_by_name tables output_column_names
## PRIVATE
Common logic for computing the final result of by-name matching.
Once the set of output column names is determined, we compute the
`Column_Set` by finding the corresponding column indices in each table (if found).
build_column_set_by_name tables output_column_names =
output_column_names.map name->
column_indices = tables.map table->
# TODO this gets O(N^2), we should optimize
table.column_names.index_of name
Column_Set.Value name column_indices
## PRIVATE
match_columns_by_position tables columns_to_keep problem_builder = case columns_to_keep of
Columns_To_Keep.In_List _ ->
Error.throw (Illegal_Argument.Error "The In_List option for `columns_to_keep` cannot be used together with `By_Position` matching.")
_ ->
column_counts = tables.map table-> table.columns.length
minmax = column_counts.compute_bulk [Statistic.Minimum, Statistic.Maximum]
min = minmax.first
max = minmax.second
columns_to_take = case columns_to_keep of
Columns_To_Keep.In_All -> min
Columns_To_Keep.In_Any -> max
Columns_To_Keep.In_Any_Warn_On_Missing -> max
has_unmatched_columns = min != max
if has_unmatched_columns then
should_report_unmatched = case columns_to_keep of
Columns_To_Keep.In_All -> True
Columns_To_Keep.In_Any -> False
Columns_To_Keep.In_Any_Warn_On_Missing -> True
# TODO should we rephrase the wording of the error? should it depend on In_Any_Warn_On_Missing vs In_All?
if should_report_unmatched then
problem_builder.report_other_warning (Column_Count_Mismatch.Error max min)
name_source = case columns_to_keep of
Columns_To_Keep.In_All -> tables.first
_ ->
# We find the first table that has all the columns present.
tables.find table-> table.columns.length == columns_to_take
column_sets = Vector.new columns_to_take i->
name = name_source.at i . name
column_ids = tables.map table->
column_count = table.columns.length
if i >= column_count then Nothing else i
Column_Set.Value name column_ids
column_sets
type Column_Set
## PRIVATE
Value (name : Text) (column_indices : Vector Integer)
## PRIVATE
resolve_columns self (all_tables : Vector) = self.column_indices.zip all_tables i-> parent_table->
case i of
Nothing -> Nothing
_ : Integer -> parent_table.at i
## PRIVATE
Returns a map indicating in how many tables did a column with a given name appear.
find_column_counts tables =
tables.fold Dictionary.empty current->table->
table.columns.fold current counts-> column->
name=column.name
new_count = counts.get name 0 + 1
counts.insert name new_count
## PRIVATE
Returns a list of distinct column names, in the order of first appearance,
starting from the first table.
distinct_columns_in_appearance_order tables =
Vector.build names_builder->
tables.fold Dictionary.empty current-> table->
table.columns.fold current seen_names-> column->
name = column.name
if seen_names.contains_key name then seen_names else
names_builder.append name
seen_names.insert name True

View File

@ -0,0 +1,47 @@
from Standard.Base import all
import Standard.Base.Data.Read.Many_Files_List.Many_Files_List
import Standard.Base.Data.Read.Return_As.Return_As
import Standard.Base.Errors.Common.Type_Error
from Standard.Base.Metadata.Choice import Option
import project.Columns_To_Keep.Columns_To_Keep
import project.Internal.Read_Many_Helpers
import project.Match_Columns.Match_Columns
type Return_As_Table
## Returns a table with a new column `Value` containing the objects loaded
from each file.
When the source for files to load was a table, all columns from the
original table are also retained. In case of name clashes, the newly
added columns will get a suffix.
When the source was a simple Vector, the returned table will also contain
a `Path` column.
With_New_Column
## All files are interpreted as tables and then merged into a single table
by a union operation.
Each file is loaded according to the provided/detected format. If the
format reads it as something else than a table, then it is expanded in
the same way as `Table.from_objects`.
@columns_to_keep Columns_To_Keep.default_widget
As_Merged_Table (columns_to_keep : Columns_To_Keep = Columns_To_Keep.In_Any) (match : Match_Columns = Match_Columns.By_Name)
## PRIVATE
get_dropdown_options : Vector Option
get_dropdown_options =
[Option "With New Column" "..With_New_Column", Option "As Merged Table" "..As_Merged_Table"]
## PRIVATE
resolve value =
Panic.catch Type_Error (value:Return_As_Table) _->Nothing
## PRIVATE
make_return self (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) =
Read_Many_Helpers.make_return self input objects on_problems
## PRIVATE
Return_As.from (that : Return_As_Table) =
Return_As.Instance that

View File

@ -54,13 +54,12 @@ import project.Internal.Replace_Helpers
import project.Internal.Split_Tokenize
import project.Internal.Table_Helpers
import project.Internal.Table_Helpers.Table_Column_Helper
import project.Internal.Table_Helpers.Union_Result_Type
import project.Internal.Table_Ref.Table_Ref
import project.Internal.Union
import project.Internal.Value_Type_Helpers
import project.Internal.Widget_Helpers
import project.Join_Condition.Join_Condition
import project.Join_Kind.Join_Kind
import project.Match_Columns as Match_Columns_Helpers
import project.Match_Columns.Match_Columns
import project.Position.Position
import project.Prefix_Name.Prefix_Name
@ -72,7 +71,6 @@ import project.Sort_Column.Sort_Column
import project.Value_Type.Auto
import project.Value_Type.By_Type
import project.Value_Type.Value_Type
from project.Column import make_storage_builder_for_type
from project.Errors import all
from project.Internal.Filter_Condition_Helpers import make_filter_column
from project.Internal.Lookup_Helpers import make_java_lookup_column_description
@ -2025,18 +2023,21 @@ type Table
true, a single row is output with `Nothing` for the aggregates column; if
false, no row is output at all.
The following aggregate values are supported:
The following values are considered sequence-like:
- `Array`
- `Vector`
- `List`
- `Range`
- `Date_Range`
- `Pair`
- `Table`
- `Table` (interpreted as a sequence of `Row`s)
- `Column`
Any other values are treated as non-aggregate values, and their rows are kept
unchanged.
Some aggregates, like dictionaries will expand into two columns - one for
key and one for the value.
If a value is not an aggregate, or has no defined way of being expanded,
it is left as a single row containing that value unchanged.
In in-memory tables, it is permitted to mix values of different types.
@ -2048,7 +2049,7 @@ type Table
@column Widget_Helpers.make_column_name_selector
expand_to_rows : Text | Integer -> Boolean -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
expand_to_rows self column at_least_one_row:Boolean=False =
Expand_Objects_Helpers.expand_to_rows self column at_least_one_row
Expand_Objects_Helpers.expand_to_rows self column at_least_one_row sequences_only=False
## ALIAS filter rows, where
GROUP Standard.Base.Selections
@ -2920,10 +2921,10 @@ type Table
? Column Renaming
If columns from the two tables have colliding names, a prefix (by
default `Right_`) is added to the name of the column from the right
default `Right `) is added to the name of the column from the right
table. The left column remains unchanged. It is possible that the new
name will be in use, in this case it will be resolved using the normal
renaming strategy - adding subsequent `_1`, `_2` etc.
renaming strategy - adding subsequent ` 1`, ` 2` etc.
? Row Ordering For In-Memory Tables
@ -3765,24 +3766,7 @@ type Table
from_union (tables : Vector) (columns_to_keep : Columns_To_Keep = ..In_Any_Warn_On_Missing) (match_columns : Match_Columns = ..By_Name) (on_problems : Problem_Behavior = ..Report_Warning) =
all_tables = (tables.map t-> Table.from t)
if all_tables.is_empty then Error.throw (Illegal_Argument.Error "`Table.from_union` needs at least 1 input table.") else
## We keep separate problem builders, because if we are reporting `No_Output_Columns`,
we only want to add a cause coming from unification; matching reports problems that would not fit this error.
problem_builder_for_matching = Problem_Builder.new
problem_builder_for_unification = Problem_Builder.new
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns columns_to_keep problem_builder_for_matching
result_row_count = all_tables.fold 0 c-> t-> c + t.row_count
merged_columns = matched_column_sets.map column_set->
case Table_Helpers.unify_result_type_for_union column_set all_tables problem_builder_for_unification of
Union_Result_Type.Common_Type common_type ->
concat_columns column_set all_tables common_type result_row_count needs_cast=False on_problems
Union_Result_Type.Fallback_To_Text ->
concat_columns column_set all_tables Value_Type.Char result_row_count needs_cast=True on_problems
Union_Result_Type.No_Types_To_Unify ->
Column.from_repeated_item column_set.name Nothing result_row_count
problem_builder_for_matching.attach_problems_before on_problems <|
problem_builder_for_unification.attach_problems_before on_problems <|
if merged_columns.is_empty then problem_builder_for_unification.raise_no_output_columns_with_cause else
Table.new merged_columns
Union.make_union all_tables columns_to_keep match_columns on_problems
## PRIVATE
pretty : Text
@ -3805,24 +3789,6 @@ make_join_helpers left_table right_table =
Java_Join_Between.new left.java_column right_lower.java_column right_upper.java_column
Join_Helpers.Join_Condition_Resolver.Value (left_table.at _) (right_table.at _) make_equals make_equals_ignore_case make_between
## PRIVATE
A helper that efficiently concatenates storages of in-memory columns.
concat_columns column_set all_tables result_type result_row_count needs_cast on_problems =
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
storage_builder = make_storage_builder_for_type result_type on_problems initial_size=result_row_count java_problem_aggregator
column_set.column_indices.zip all_tables i-> parent_table->
case i of
Nothing ->
null_row_count = parent_table.row_count
storage_builder.appendNulls null_row_count
_ : Integer ->
column = parent_table.at i
converted = if needs_cast then column.cast result_type on_problems=..Report_Error else column
storage = converted.java_column.getStorage
storage_builder.appendBulkStorage storage
sealed_storage = storage_builder.seal
Column.from_storage column_set.name sealed_storage
## PRIVATE
Conversion method to a Table from a Column.
Table.from (that:Column) = that.to_table

View File

@ -39,7 +39,7 @@ test_problem_handling action expected_problems result_checker (unwrap_errors : B
Test.with_clue "The warnings were "+warnings.to_text+'.\n' <|
(if ignore_warning_cardinality then warnings.distinct else warnings)
. map unwrap_maybe
. should_equal_ignoring_order expected_problems frames_to_skip=5
. should_equal_ignoring_order expected_problems frames_to_skip=8
test_advanced_problem_handling action error_checker warnings_checker result_checker frames_to_skip=1
## UNSTABLE

View File

@ -4,6 +4,15 @@ import java.util.ServiceLoader;
import org.enso.base.polyglot.EnsoMeta;
import org.graalvm.polyglot.Value;
/**
* An SPI for specifying return types to the `read_many` operation.
*
* <p>The `read_many` operation can take return types provided from various libraries. This SPI
* ensures that it can be aware of all the available types from the loaded libraries. If a library
* registers a return type here, it will be available for autoscoping resolution and will appear in
* the dropdown. Registered types must provide methods `get_dropdown_options`, `resolve` and
* `make_return`. See `Standard.Base.Data.Read.Return_As` for examples.
*/
public abstract class ReadManyReturnSPI {
private static final ServiceLoader<ReadManyReturnSPI> loader =
ServiceLoader.load(ReadManyReturnSPI.class, ReadManyReturnSPI.class.getClassLoader());

View File

@ -13,6 +13,9 @@ public interface OrderMask {
* storage at the {@code idx}-th position. It may return {@link
* org.enso.table.data.storage.Storage.NOT_FOUND_INDEX}, in which case a missing value should be
* inserted at this position.
*
* <p>Indices may appear zero or multiple times in the mask - meaning rows that will be gone or
* duplicated.
*/
int get(int idx);

View File

@ -0,0 +1,16 @@
package org.enso.table.read;
import org.enso.base.read.ReadManyReturnSPI;
@org.openide.util.lookup.ServiceProvider(service = ReadManyReturnSPI.class)
public class TableReadManyReturnSPI extends ReadManyReturnSPI {
@Override
protected String getModuleName() {
return "Standard.Table.Return_As_Table";
}
@Override
protected String getTypeName() {
return "Return_As_Table";
}
}

View File

@ -92,7 +92,11 @@ import project.System.System_Spec
import project.System.Temporary_File_Spec
import project.Random_Spec
import project.Widget_Helpers_Spec
## Workaround for bug https://github.com/enso-org/enso/issues/11707
The Standard.Table import should be removed once the bug is fixed.
import Standard.Table
main filter=Nothing =
suite = Test.build suite_builder->
@ -177,6 +181,5 @@ main filter=Nothing =
Random_Spec.add_specs suite_builder
XML_Spec.add_specs suite_builder
Decimal_Spec.add_specs suite_builder
Widget_Helpers_Spec.add_specs suite_builder
suite.run_with_filter filter

View File

@ -226,6 +226,11 @@ add_specs suite_builder setup:Cloud_Tests_Setup = suite_builder.group "Enso Clou
r.should_fail_with File_Error
r.catch.should_be_a File_Error.Not_Found
group_builder.specify "read_many should work with Cloud files" <|
paths = [test_root.get / "test_file.json", test_root.get / "test-directory/another.txt"]
r = Data.read_many paths return=..As_Vector
r.should_equal [[1, 2, 3, "foo"], "Hello Another!"]
group_builder.specify "should be able to open a file as input stream" <|
test_file = test_root.get / "test_file.json"
test_file.exists . should_be_true

View File

@ -201,7 +201,7 @@ add_specs suite_builder =
r.should_be_a JS_Object
group_builder.specify "can use URI or Text URLs in Data.read_many" <|
r = Data.read_many [URI.from url_get, url_get]
r = Data.read_many [URI.from url_get, url_get] return=..As_Vector
r.should_be_a Vector
r.at 0 . should_be_a JS_Object
r.at 1 . should_be_a JS_Object

View File

@ -2,6 +2,7 @@ from Standard.Base import all
import Standard.Base.Data.Vector.Map_Error
import Standard.Base.Errors.Encoding_Error.Encoding_Error
import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
from Standard.Test import all
@ -96,23 +97,37 @@ add_specs suite_builder =
files = [enso_project.data / "sample.json", enso_project.data / "helloworld.txt"]
# Read all files using Auto_Detect - each file is read according to its inferred format.
r1 = Data.read_many files
# If the return type is not specified, if the input is a Vector, the return type is also Vector:
r1 = Data.read_many files return=..As_Vector
r1.should_equal [js_object, "Hello World!"]
# Read all files using a specified format.
r2 = Data.read_many files format=..Plain_Text
r2 = Data.read_many files format=..Plain_Text return=..As_Vector
r2.should_be_a Vector
r2.should_equal [js_as_text.get, "Hello World!"]
## Workaround for https://github.com/enso-org/enso/issues/11707
This pending check should be removed once it is fixed.
is_table_imported = File_Format.all.map .to_text . contains "Delimited_Format"
table_import_pending = if is_table_imported then "Base_Tests should not import Table, but they sometimes do as workaround for #11707. This test can only run if Table is not imported."
group_builder.specify "would default to returning as merged Table, but will raise a helpful error if Standard.Table is not loaded" pending=table_import_pending <|
Runtime.assert is_table_imported.not "This test assumes that Base_Tests does not import Standard.Table."
files = [enso_project.data / "sample.json"]
r1 = Data.read_many files
r1.should_fail_with Illegal_Argument
r1.catch.to_display_text.should_contain "not imported"
r1.catch.to_display_text.should_contain "Standard.Table"
r1.catch.to_display_text.should_contain "As_Vector"
group_builder.specify "should work with paths as Text" <|
files = [enso_project.data / "sample.json", enso_project.data / "helloworld.txt"]
paths = files.map .path
r1 = Data.read_many paths return=..Vector
r1 = Data.read_many paths return=..As_Vector
r1.should_equal [js_object, "Hello World!"]
three_files = [enso_project.data / "sample.json", enso_project.data / "nonexistent.txt", enso_project.data / "helloworld.txt"]
group_builder.specify "should allow to Report_Error if any file fails to load" <|
r1 = Data.read_many three_files return=..Vector on_problems=..Report_Error
r1 = Data.read_many three_files return=..As_Vector on_problems=..Report_Error
# The error reports as File_Error
r1.should_fail_with File_Error
# But it's actually Map_Error with index metadata
@ -121,15 +136,20 @@ add_specs suite_builder =
r1.catch.inner_error.should_be_a File_Error.Not_Found
group_builder.specify "should allow to Ignore errors if any file fails to load" <|
r1 = Data.read_many three_files return=..Vector on_problems=..Ignore
r1 = Data.read_many three_files return=..As_Vector on_problems=..Ignore
r1.should_equal [js_object, Nothing, "Hello World!"]
Problems.assume_no_problems r1
group_builder.specify "should allow to continue loading if errors are encountered, but report them as warnings" <|
r1 = Data.read_many three_files return=..Vector on_problems=..Report_Warning
r1 = Data.read_many three_files return=..As_Vector on_problems=..Report_Warning
r1.should_equal [js_object, Nothing, "Hello World!"]
Problems.expect_only_warning File_Error r1
group_builder.specify "should return empty vector if no files were provided" <|
r1 = Data.read_many [] return=..As_Vector
r1.should_equal []
Problems.assume_no_problems r1
main filter=Nothing =
suite = Test.build suite_builder->
add_specs suite_builder

Binary file not shown.

View File

@ -1,3 +1,2 @@
*.csv*
rootCA.crt
*.xls*
*
!.gitignore

View File

@ -144,6 +144,17 @@ run_union_tests group_builder setup call_union =
m.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing, "d", "e", "f", "g", "h", "i"]
m.at "D" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 0]
group_builder.specify "table with 0 rows will still influence the output structure with its columns" <|
t1 = table1.get
t2 = table2.get.filter "A" (..Greater 100)
table = call_union [t1, t2]
m = materialize_and_maybe_sort table
expect_column_names ["A", "B", "C"] m
m.at "A" . to_vector . should_equal [1, 2, 3]
m.at "B" . to_vector . should_equal ["a", "b", "c"]
m.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing]
group_builder.specify "should drop unmatched columns and warn, if In_All is selected" <|
t1 = table1.get # A, B
t2 = table2.get # C, A
@ -519,6 +530,37 @@ run_union_tests group_builder setup call_union =
problems = [No_Common_Type.Warning_Convert_To_Text [Value_Type.Time, Value_Type.Date] "D"]
Problems.test_problem_handling action problems tester
# This test is not run on DB as all-null columns will not be Mixed there.
if setup.is_database.not then group_builder.specify "if no common type can be found, will fall back to converting all types to text and warn (all-null columns edge case)" pending="TODO Value_Type.Null #6281" <|
# If a column is all-null, it will often have Mixed type. But that should not prevent the union from falling into the mixed type.
t1 = table_builder [["A", [Nothing]]]
t2 = table_builder [["A", [2, 3]]]
t3 = table_builder [["A", [Nothing, Nothing]]]
t4 = table_builder [["A", ['a', 'b']]]
t5 = table_builder [["A", [Nothing]]]
t1.at "A" . value_type . should_equal Value_Type.Mixed
setup.expect_integer_type <| t2.at "A"
t = call_union [t1, t2, t3, t4, t5]
expect_column_names ["A"] t
t.at "A" . to_vector . should_equal [Nothing, '2', '3', Nothing, Nothing, 'a', 'b', Nothing]
t.at "A" . value_type . is_text . should_be_true
if setup.is_database.not then group_builder.specify "all-Nothing column should not influence result type, unless it had a type explicitly given to it" pending="TODO Value_Type.Null #6281" <|
t1 = table_builder [["A", [Nothing]]]
t2 = table_builder [["A", [2, 3]]]
t3 = call_union [t1, t2]
expect_column_names ["A"] t3
t3.at "A" . to_vector . should_equal [Nothing, 2, 3]
setup.expect_integer_type <| t3.at "A"
t4 = call_union [(t1.cast "A" Value_Type.Char), t2]
expect_column_names ["A"] t4
t4.at "A" . to_vector . should_equal [Nothing, '2', '3']
t4.at "A" . value_type . should_equal Value_Type.Char
group_builder.specify "will use the _output_ column name in the warnings when matching by position (so input names may differ)" <|
t1 = table_builder [["A", [1]]]
t2 = table_builder [["B", ["a"]]]

View File

@ -1,6 +1,7 @@
from Standard.Base import all
import Standard.Base.Errors.Common.Dry_Run_Operation
import Standard.Base.Errors.Common.Missing_Argument
import Standard.Base.Errors.Deprecated.Deprecated
import Standard.Base.Errors.File_Error.File_Error
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
@ -9,9 +10,9 @@ import Standard.Base.Runtime.Managed_Resource.Managed_Resource
import Standard.Base.Runtime.Ref.Ref
from Standard.Table import Table, Match_Columns, Excel_Format, Excel_Range, Data_Formatter, Delimited_Format, Excel_Workbook, Value_Type
from Standard.Table.Errors import Invalid_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch, Empty_Sheet, No_Rows, No_Common_Type
from Standard.Table.Extensions.Excel_Extensions import all
from Standard.Table.Errors import Invalid_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch, Empty_Sheet
import Standard.Table.Excel.Excel_Workbook.Return_As as Old_Return_As
from Standard.Test import all
@ -19,6 +20,8 @@ from Standard.Test import all
import Standard.Examples
import project.Util
from project.Common_Table_Operations.Util import within_table
from project.IO.Read_Many_Spec import with_temp_dir
polyglot java import org.enso.table_test_helpers.RandomHelpers
@ -103,22 +106,45 @@ spec_fmt suite_builder header file read_method sheet_count=5 =
group_builder.specify "should let you read all sheets into a single table" <|
wb = read_method file
action = wb.read_many on_problems=_
tester table =
table.row_count . should_equal 25
table.column_names . should_equal ["Sheet Name", "Name", "Quantity", "Price", "A", "B", "C", "D", "E", "Student Name", "Enrolment Date", "Item", "Price 1"]
problems = [Empty_Sheet.Error, Duplicate_Output_Column_Names.Error ["Price"]]
Problems.test_problem_handling action problems tester ignore_warning_cardinality=True
r1 = wb.read_many on_problems=..Report_Error
r1.should_fail_with Empty_Sheet
r2 = wb.read_many on_problems=..Report_Warning
r2.row_count . should_equal 25
r2.column_names . should_equal ["Sheet Name", "Name", "Quantity", "Price", "A", "B", "C", "D", "E", "Student Name", "Enrolment Date", "Item", "Price 1"]
# We also ensure how many rows were loaded from each sheet. There should be no entries for the "Empty" sheet.
r2.at "Sheet Name" . to_vector . should_equal <|
(Vector.fill 6 "Simple") + (Vector.fill 7 "Strange Dimensions") + (Vector.fill 6 "Dates") + (Vector.fill 6 "Duplicate Columns")
w1 = Problems.expect_warning No_Rows r2
w1.to_display_text . should_contain "The sheet Empty failed to load"
w1.to_display_text . should_contain "There is no data in the sheet."
w2 = Problems.expect_warning Duplicate_Output_Column_Names r2
w2.column_names . should_equal ["Price"]
group_builder.specify "should let you read all sheets into a table of tables" <|
wb = read_method file
action = wb.read_many return=..Table_Of_Tables on_problems=_
action = wb.read_many return=..With_New_Column on_problems=_
tester table =
table.row_count . should_equal 5
table.column_names . should_equal ["Sheet Name", "Table"]
problems = [Empty_Sheet.Error, Duplicate_Output_Column_Names.Error ["Price"]]
Problems.test_problem_handling action problems tester ignore_warning_cardinality=True
group_builder.specify "should still support the old options for compatibility" <|
wb = read_method file
table1 = wb.read_many return=..Table_Of_Tables
table1.row_count . should_equal 5
table1.column_names . should_equal ["Sheet Name", "Table"]
Problems.expect_warning Deprecated table1
table2 = wb.read_many return=Old_Return_As.Merged_Table
table2.row_count . should_equal 25
Problems.expect_warning Deprecated table2
table3 = wb.read_many return=..Merged_Table
table3.row_count . should_equal 25
Problems.expect_warning Deprecated table3
group_builder.specify "should let you read some sheets from xlsx" <|
wb = read_method file
single_table = wb.read_many ["Simple", "Dates"]
@ -132,8 +158,7 @@ spec_fmt suite_builder header file read_method sheet_count=5 =
single_table.row_count . should_equal 12
single_table.column_names . should_equal ["Sheet Name", "Name", "Quantity", "Price", "Student Name", "Enrolment Date"]
single_table.has_warnings.should_be_true
warning = Problems.expect_only_warning Invalid_Location single_table
warning.location . should_equal "Not A Sheet"
warning = Problems.expect_only_warning No_Rows single_table
warning.to_display_text . should_contain "Unknown sheet or range name or invalid address: 'Not A Sheet'."
type Spec_Write_Data
@ -972,6 +997,160 @@ add_specs suite_builder =
table_4 = xlsx_sheet.read (..Sheet "Sheet1" row_limit=6)
table_4.row_count . should_equal 6
group_builder.specify "should let you `read_many` Excel sheets and other tabular files into a Table" <|
with_temp_dir base_dir->
(Table.new [["A", [1, 2]], ["B", [3, 4]]]).write (base_dir / "1.tsv") . should_succeed
(Table.new [["A", [10, 20]], ["B", [30, 40]]]).write (base_dir / "2.xlsx") . should_succeed
f3 = base_dir / "3.xlsx"
(Table.new [["A", [100]], ["B", [200]], ["C", [300]]]).write f3 format=(..Sheet "nr 1") on_existing_file=..Overwrite . should_succeed
(Table.new [["A", [400, 500, 600]]]).write f3 format=(..Sheet "nr 2") on_existing_file=..Append . should_succeed
files = Data.list base_dir . sort on=(.name)
r1 = Data.read_many files return=..With_New_Column
r1.should_be_a Table
Problems.assume_no_problems r1
within_table r1 <|
r1.column_names . should_equal ["Path", "Value"]
r1.row_count . should_equal 3
r1.at "Value" . at 0 . should_be_a Table
r1.at "Value" . at 1 . should_be_a Excel_Workbook
r1.at "Value" . at 2 . should_be_a Excel_Workbook
r2 = Data.read_many files return=..As_Merged_Table
r2.should_be_a Table
Problems.assume_no_problems r2
within_table r2 <|
r2.column_names . should_equal ["Path", "Sheet Name", "A", "B", "C"]
# We transform the Path to just file name for easier testing
rows = (r2.set (r2.at "Path" . map .name) "Path").rows.map .to_vector
rows.at 0 . should_equal ["1.tsv", Nothing, 1, 3, Nothing]
rows.at 1 . should_equal ["1.tsv", Nothing, 2, 4, Nothing]
rows.at 2 . should_equal ["2.xlsx", "EnsoSheet", 10, 30, Nothing]
rows.at 3 . should_equal ["2.xlsx", "EnsoSheet", 20, 40, Nothing]
rows.at 4 . should_equal ["3.xlsx", "nr 1", 100, 200, 300]
rows.at 5 . should_equal ["3.xlsx", "nr 2", 400, Nothing, Nothing]
rows.at 6 . should_equal ["3.xlsx", "nr 2", 500, Nothing, Nothing]
rows.at 7 . should_equal ["3.xlsx", "nr 2", 600, Nothing, Nothing]
# Test loading only Excel files and alternate matching mode to weed out edge cases
r3 = Data.read_many (Data.list base_dir name_filter="*.xlsx" . sort on=(.name)) return=..As_Merged_Table
r3.should_be_a Table
Problems.assume_no_problems r3
within_table r3 <|
r3.column_names . should_equal ["Path", "Sheet Name", "A", "B", "C"]
r3.at "Sheet Name" . to_vector . should_equal ["EnsoSheet", "EnsoSheet", "nr 1", "nr 2", "nr 2", "nr 2"]
r3.at "A" . to_vector . should_equal [10, 20, 100, 400, 500, 600]
group_builder.specify "during `read_many`, should not mix metadata columns with data columns with same name or when matching by position" <|
with_temp_dir base_dir->
(Table.new [["Z", [1, 2]], ["Sheet Name", ['data column', 'data column']]]).write (base_dir / "1.tsv") . should_succeed
(Table.new [["Z", [10]], ["X", [20]]]).write (base_dir / "2.xlsx") . should_succeed
f3 = base_dir / "3.xlsx"
(Table.new [["X", [100]], ["Y", [200]], ["Z", [300]]]).write f3 format=(..Sheet "nr 1") on_existing_file=..Overwrite . should_succeed
(Table.new [["Sheet Name", [400, 500, 600]]]).write f3 format=(..Sheet "nr 2") on_existing_file=..Append . should_succeed
files = Data.list base_dir . sort on=(.name)
input = Table.new [["Path", files], ["Sheet Name", ["input 1", "input 2", "input 3"]]]
r1 = Data.read_many input
r1.should_be_a Table
within_table r1 <|
# We transform the Path to just file name for easier testing
rows = (r1.set (r1.at "Path" . map .name) "Path").rows.map .to_vector
# Each Sheet Name column comes out as separate: 1 - input, 2 - metadata, 3 - data
# The order of columns is as they appear in the input, and they are matched by name
r1.column_names . should_equal [ "Path", "Sheet Name", "Sheet Name 1", "Z", "Sheet Name 2", "X", "Y"]
rows.at 0 . should_equal [ "1.tsv", "input 1", Nothing, 1, "data column", Nothing, Nothing]
rows.at 1 . should_equal [ "1.tsv", "input 1", Nothing, 2, "data column", Nothing, Nothing]
rows.at 2 . should_equal ["2.xlsx", "input 2", "EnsoSheet", 10, Nothing, 20, Nothing]
rows.at 3 . should_equal ["3.xlsx", "input 3", "nr 1", 300, Nothing, 100, 200]
rows.at 4 . should_equal ["3.xlsx", "input 3", "nr 2", Nothing, "400", Nothing, Nothing]
rows.at 5 . should_equal ["3.xlsx", "input 3", "nr 2", Nothing, "500", Nothing, Nothing]
rows.at 6 . should_equal ["3.xlsx", "input 3", "nr 2", Nothing, "600", Nothing, Nothing]
Problems.expect_warning Duplicate_Output_Column_Names r1
Problems.expect_warning No_Common_Type r1
r2 = Data.read_many input return=(..As_Merged_Table match=..By_Position)
r2.should_be_a Table
within_table r2 <|
rows = (r2.set (r2.at "Path" . map .name) "Path").rows.map .to_vector
# Two Sheet Name column comes out as separate: 1 - input, 2 - metadata, the third one (data) gets renamed due to positional matching
# The column names come from the first table that had all the columns - in this case, first sheet of 3.xlsx
r2.column_names . should_equal [ "Path", "Sheet Name", "Sheet Name 1", "X", "Y", "Z"]
rows.at 0 . should_equal [ "1.tsv", "input 1", Nothing, 1, "data column", Nothing]
rows.at 1 . should_equal [ "1.tsv", "input 1", Nothing, 2, "data column", Nothing]
rows.at 2 . should_equal ["2.xlsx", "input 2", "EnsoSheet", 10, "20", Nothing]
rows.at 3 . should_equal ["3.xlsx", "input 3", "nr 1", 100, "200", 300]
rows.at 4 . should_equal ["3.xlsx", "input 3", "nr 2", 400, Nothing, Nothing]
rows.at 5 . should_equal ["3.xlsx", "input 3", "nr 2", 500, Nothing, Nothing]
rows.at 6 . should_equal ["3.xlsx", "input 3", "nr 2", 600, Nothing, Nothing]
Problems.expect_warning Duplicate_Output_Column_Names r2
Problems.expect_warning No_Common_Type r2
r3 = Data.read_many input return=(..As_Merged_Table columns_to_keep=..In_All match=..By_Position)
r3.should_be_a Table
within_table r3 <|
rows = (r3.set (r3.at "Path" . map .name) "Path").rows.map .to_vector
# Same as with `r2`, but now we keep only columns that are present in all tables, then the column names come from the first table (so we get column Z).
# But the `Sheet Name` metadata column is still kept, as its matching is independent of data.
r3.column_names . should_equal [ "Path", "Sheet Name", "Sheet Name 1", "Z"]
rows.at 0 . should_equal [ "1.tsv", "input 1", Nothing, 1]
rows.at 1 . should_equal [ "1.tsv", "input 1", Nothing, 2]
rows.at 2 . should_equal ["2.xlsx", "input 2", "EnsoSheet", 10]
rows.at 3 . should_equal ["3.xlsx", "input 3", "nr 1", 100]
rows.at 4 . should_equal ["3.xlsx", "input 3", "nr 2", 400]
rows.at 5 . should_equal ["3.xlsx", "input 3", "nr 2", 500]
rows.at 6 . should_equal ["3.xlsx", "input 3", "nr 2", 600]
Problems.expect_warning Duplicate_Output_Column_Names r3
Problems.expect_warning Column_Count_Mismatch r3
group_builder.specify "during `read_many`, should correctly handle empty sheets" <|
with_temp_dir base_dir->
tsv_file = base_dir / "1.tsv"
(Table.new [["A", [1, 2]], ["B", [3, 4]]]).write tsv_file . should_succeed
xls_file = Examples.xls
r = Data.read_many [tsv_file, xls_file] return=..As_Merged_Table
r.should_be_a Table
r.row_count . should_equal 2+25
r.column_names . should_equal ["Path", "Sheet Name", "A", "B", "Name", "Quantity", "Price", "C", "D", "E", "Student Name", "Enrolment Date", "Item", "Price 1"]
# First two rows come from TSV, the rest from Excel sheets
r.at "Path" . to_vector . map .name . should_equal <|
(Vector.fill 2 tsv_file.name) + (Vector.fill 25 xls_file.name)
r.at "Sheet Name" . to_vector . should_equal <|
(Vector.fill 2 Nothing) + (Vector.fill 6 "Simple") + (Vector.fill 7 "Strange Dimensions") + (Vector.fill 6 "Dates") + (Vector.fill 6 "Duplicate Columns")
w = Problems.expect_warning No_Rows r
w.to_display_text . should_contain "Empty"
empty_xls_file = enso_project.data / "empty-sheets.xlsx"
r2 = Data.read_many [tsv_file, empty_xls_file] return=..As_Merged_Table
r2.should_be_a Table
r2.row_count . should_equal 2
# No sheet name columns because after all no data from Excel made it to the result
r2.column_names . should_equal ["Path", "A", "B"]
Problems.expect_warning No_Rows r2
Problems.get_attached_warnings r2
. map .to_display_text
. find (..Contains "failed to load any sheets")
. should_succeed
# But when not expanding rows, the workbook with all-empty sheets is normally loaded into a cell
r3 = Data.read_many [tsv_file, empty_xls_file] return=..With_New_Column
r3.should_be_a Table
r3.at "Path" . to_vector . map .name . should_equal [tsv_file.name, empty_xls_file.name]
r3.at "Value" . at 0 . should_be_a Table
r3.at "Value" . at 1 . should_be_a Excel_Workbook
suite_builder.group "Problems" group_builder->
group_builder.specify "should report a user-friendly error message when format is missing a required argument" <|
r = xlsx_sheet.read (..Range)

View File

@ -2,12 +2,13 @@ from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
from Standard.Table import all
from Standard.Table.Errors import Invalid_Value_Type
from Standard.Table.Errors import Invalid_Value_Type, No_Rows
from Standard.Database import all
from Standard.Test import all
from project.Util import all
from project.Common_Table_Operations.Util import within_table
main filter=Nothing =
suite = Test.build suite_builder->
@ -23,45 +24,68 @@ add_specs suite_builder =
files_vector = [enso_project.data / "empty.txt", (enso_project.data / "sample.tsv") . path]
sample_table = Lazy_Ref.Value <|
(enso_project.data / "sample.tsv") . read
check_loaded_vector v =
v.should_be_a Vector
v.length . should_equal 2
v.at 0 . should_equal ""
v.at 1 . should_equal sample_table.get
check_common_columns table =
table.at "Value" . to_vector . should_equal ["" , Nothing, Nothing]
table.at "a" . to_vector . should_equal [Nothing, 1, 4]
table.at "b" . to_vector . should_equal [Nothing, 2, 5]
table.at "c" . to_vector . should_equal [Nothing, 3, 6]
check_returned_vector vec =
vec.should_be_a Vector
vec.length . should_equal 2
vec.first . should_equal ""
vec.second . should_equal sample_table.get
group_builder.specify "should read files listed in a Column" <|
column = Column.from_vector "Col" files_vector
## TODO for next PR:
test that if `return` is not specified, it will return as a Table when a Column is provided
r1 = Data.read_many column return=..Vector
check_loaded_vector r1
r1 = Data.read_many column return=..As_Vector
check_returned_vector r1
Problems.assume_no_problems r1
r2 = Data.read_many column return=..With_New_Column
r2.should_be_a Table
r2.column_names . should_equal ["Col", "Value"]
r2.at "Col" . to_vector . should_equal files_vector
check_returned_vector (r2.at "Value" . to_vector)
group_builder.specify "should read files listed in a single column Table" <|
table1 = Table.new [["Some column", files_vector]]
r1 = Data.read_many table1 return=..Vector
# TODO like above
check_loaded_vector r1
r1 = Data.read_many table1 return=..As_Vector
check_returned_vector r1
r2 = Data.read_many table1 return=..With_New_Column
r2.should_be_a Table
r2.column_names . should_equal ["Some column", "Value"]
r2.at "Some column" . to_vector . should_equal files_vector
check_returned_vector (r2.at "Value" . to_vector)
group_builder.specify "should read files listed in a Table with `path` column" <|
table2 = Table.new [["X", [1, 2]], ["path", files_vector]]
r2 = Data.read_many table2 return=..Vector
# TODO like above
check_loaded_vector r2
Problems.assume_no_problems r2
table1 = Table.new [["X", [1, 2]], ["path", files_vector]]
r1 = Data.read_many table1 return=..As_Vector
check_returned_vector r1
Problems.assume_no_problems r1
r2 = Data.read_many table1 return=..With_New_Column
r2.should_be_a Table
r2.column_names . should_equal ["X", "path", "Value"]
r2.at "X" . to_vector . should_equal [1, 2]
r2.at "path" . to_vector . should_equal files_vector
check_returned_vector (r2.at "Value" . to_vector)
# Test that this is really case insensitive
table3 = Table.new [["X", [1, 2]], ["pAtH", files_vector]]
r3 = Data.read_many table3 return=..Vector
check_loaded_vector r3
Problems.assume_no_problems r3
r3 = Data.read_many table3 return=..With_New_Column
r3.should_be_a Table
r3.column_names . should_equal ["X", "pAtH", "Value"]
check_returned_vector (r3.at "Value" . to_vector)
group_builder.specify "will fail if no `path` column can be found or its ambiguous" <|
group_builder.specify "should fail if no `path` column can be found or its ambiguous" <|
table1 = Table.new [["X", [1, 2]], ["Y", files_vector]]
r1 = Data.read_many table1 return=..Vector
r1 = Data.read_many table1 return=..As_Vector
r1.should_fail_with Illegal_Argument
table2 = Table.new [["X", [1, 2]], ["path", files_vector], ["Path", [3, 4]]]
r2 = Data.read_many table2 return=..Vector
r2 = Data.read_many table2 return=..As_Vector
r2.should_fail_with Illegal_Argument
group_builder.specify "fails if a DB Table or Column is provided, telling to materialize first to in-memory" <|
@ -71,11 +95,11 @@ add_specs suite_builder =
p : Text -> p
table = (Table.new [["path", paths_vector]]).select_into_database_table connection "test_table" temporary=True
r = Data.read_many table return=..Vector
r = Data.read_many table return=..As_Vector
r.should_fail_with Illegal_Argument
col = table.at "path"
r2 = Data.read_many col return=..Vector
r2 = Data.read_many col return=..As_Vector
r2.should_fail_with Illegal_Argument
group_builder.specify "fails if a column of invalid type is provided" <|
@ -84,3 +108,284 @@ add_specs suite_builder =
Data.read_many table . should_fail_with Invalid_Value_Type
Data.read_many (table.at "path") . should_fail_with Invalid_Value_Type
Data.read_many (table.select_columns ["X"]) . should_fail_with Invalid_Value_Type
group_builder.specify "should return a merged table by default" <|
r1 = Data.read_many (Column.from_vector "my column" files_vector)
r1.should_be_a Table
r1.column_names . should_equal ["my column", "Value", "a", "b", "c"]
r1.at "my column" . to_vector . should_equal [files_vector.first, files_vector.second, files_vector.second]
check_common_columns r1
r2 = Data.read_many (Table.new [["X", [100, 200]], ["Path", files_vector], ["Y", [300, 400]]])
r2.should_be_a Table
r2.column_names . should_equal ["X", "Path", "Y", "Value", "a", "b", "c"]
# The second row is duplicated because it gets expanded along with the table that was loaded that has 2 rows
r2.at "X" . to_vector . should_equal [100, 200, 200]
r2.at "Y" . to_vector . should_equal [300, 400, 400]
check_common_columns r2
r3 = Data.read_many files_vector
r3.should_be_a Table
r3.column_names . should_equal ["Path", "Value", "a", "b", "c"]
check_common_columns r3
group_builder.specify "if input is a Vector, the default can be overridden to return a new column" <|
r1 = Data.read_many files_vector return=..With_New_Column
r1.should_be_a Table
r1.column_names . should_equal ["Path", "Value"]
r1.at "Path" . to_vector . should_equal files_vector
check_returned_vector (r1.at "Value" . to_vector)
group_builder.specify "should merge files that read as non-Table values into a Table using reasonable defaults" <|
with_temp_dir base_dir->
# raw JS Object - we want it to expand to a single row - same as if it was in a 1-element array
(JS_Object.from_pairs [["a", 1], ["b", 2]]).to_json.write (base_dir / "1_js_object.json")
# array of JS objects
[JS_Object.from_pairs [["a", 30], ["b", 40], ["c", "foobar"]], JS_Object.from_pairs [["a", 50], ["b", 60]]].to_json.write (base_dir / "2_js_array.json")
# JS array of numbers
[100, 200, 300].to_json.write (base_dir / "3_js_numbers.json")
# a Table
(Table.new [["a", [-1, -2]], ["d", [-4, -5]]]).write (base_dir / "4_table.tsv")
# a plain text value
"Hi!".write (base_dir / "5_plain_text.txt")
# JS null
"null".write (base_dir / "6_js_null.json")
# a JS string
'"str"'.write (base_dir / "7_js_string.json")
files = Data.list base_dir . sort on=(.name)
IO.println (Meta.type_of files.first)
r = Data.read_many files
r.should_be_a Table
within_table r <|
# We transform the Path to just file name
rows = (r.set (r.at "Path" . map .name) "Path").rows.map .to_vector
# TODO Once 6281 is done we should replace 100 with "100" etc.
null = Nothing
r.column_names . should_equal ["Path", "a", "b", "c", "Value", "d"]
rows.at 0 . should_equal ["1_js_object.json", 1, 2, null, null, null]
rows.at 1 . should_equal ["2_js_array.json", 30, 40, "foobar", null, null]
rows.at 2 . should_equal ["2_js_array.json", 50, 60, null, null, null]
rows.at 3 . should_equal ["3_js_numbers.json", null, null, null, 100, null]
rows.at 4 . should_equal ["3_js_numbers.json", null, null, null, 200, null]
rows.at 5 . should_equal ["3_js_numbers.json", null, null, null, 300, null]
rows.at 6 . should_equal ["4_table.tsv", -1, null, null, null, -4]
rows.at 7 . should_equal ["4_table.tsv", -2, null, null, null, -5]
rows.at 8 . should_equal ["5_plain_text.txt", null, null, null, "Hi!", null]
rows.at 9 . should_equal ["6_js_null.json", null, null, null, null, null]
rows.at 10 . should_equal ["7_js_string.json", null, null, null, "str", null]
r.at "a" . value_type . should_equal Value_Type.Integer
r.at "b" . value_type . should_equal Value_Type.Integer
r.at "c" . value_type . should_equal Value_Type.Char
# TODO change to Char once 6281 is done
r.at "Value" . value_type . should_equal Value_Type.Mixed
r.at "d" . value_type . should_equal Value_Type.Integer
# Marker to not forget
group_builder.specify "TODO" pending="Once 6281 is done we should update the test above and others." Nothing
group_builder.specify "should warn when a file loads as empty array and not include it in the As_Merged_Table result" <|
# But such array should not influence the columns present:
with_temp_dir base_dir->
'{"a": 1}'.write (base_dir / "1_js_object.json")
"[]".write (base_dir / "2_empty_array.json")
'[{"a": 2, "b": "..."}]'.write (base_dir / "3_js_object.json")
files = Data.list base_dir . sort on=(.name)
r1 = Data.read_many files return=..With_New_Column
r1.should_be_a Table
Problems.assume_no_problems r1
r1.row_count . should_equal 3
r1 . at "Path" . map .name . to_vector . should_equal ["1_js_object.json", "2_empty_array.json", "3_js_object.json"]
r1.at "Value" . at 1 . should_equal []
r2 = Data.read_many files
r2.should_be_a Table
w2 = Problems.expect_only_warning No_Rows r2
w2.to_display_text . should_contain "2_empty_array.json"
w2.to_display_text . should_contain "loaded as an empty array, so it is not included in the `As_Merged_Table` result of `read_many`."
within_table r2 <|
r2.column_names . should_equal ["Path", "a", "b"]
r2.row_count . should_equal 2
r2.at "Path" . map .name . to_vector . should_equal ["1_js_object.json", "3_js_object.json"]
r2.at "a" . to_vector . should_equal [1, 2]
r2.at "b" . to_vector . should_equal [Nothing, "..."]
group_builder.specify "should warn when a Table loads as 0-rows and is not included in As_Merged_Table result, but it should still influence the result columns" <|
with_temp_dir base_dir->
'A,B'.write (base_dir / "1_empty_table.csv")
'B,C\n1,2'.write (base_dir / "2_table.csv")
files = Data.list base_dir . sort on=(.name)
r1 = Data.read_many files format=(..Delimited ',' headers=True) return=..With_New_Column
r1.should_be_a Table
Problems.assume_no_problems r1
r1.row_count . should_equal 2
r1.at "Path" . map .name . to_vector . should_equal ["1_empty_table.csv", "2_table.csv"]
empty_table1 = r1.at "Value" . at 0
empty_table1.should_be_a Table
empty_table1.row_count . should_equal 0
empty_table1.column_names . should_equal ["A", "B"]
r2 = Data.read_many files format=(..Delimited ',' headers=True) return=..As_Merged_Table
r2.should_be_a Table
# TODO: once 6281 is done, change expect_only_warning
w2 = Problems.expect_warning No_Rows r2
w2.to_display_text . should_contain "1_empty_table.csv"
w2.to_display_text . should_contain "loaded as a table with 0 rows, so it did not contribute any rows to the `As_Merged_Table` result of `read_many`."
within_table r2 <|
## But it DOES influence the columns present.
That is because the column structure is a 'structural' property,
we want the structure of the result to be the same regardless if it has 0 or more rows.
If the workflow is run next time with this table having some rows, it is better that the structure is preserved.
Otherwise, a workflow that is running fine could stop working once a file is changed to contain no rows, as some column could no longer be found.
r2.column_names . should_equal ["Path", "A", "B", "C"]
r2.row_count . should_equal 1
r2.at "Path" . map .name . to_vector . should_equal ["2_table.csv"]
r2.at "A" . to_vector . should_equal [Nothing]
# TODO: once 6281 is done, change '1' to 1
r2.at "B" . to_vector . should_equal ['1']
r2.at "C" . to_vector . should_equal [2]
group_builder.specify "should allow to customize how the tables are merged" <|
with_temp_dir base_dir->
'{"a": 1, "b": 2}'.write (base_dir / "1_js_object.json")
'{"b": 3, "c": 4}'.write (base_dir / "2_js_object.json")
files = Data.list base_dir . sort on=(.name)
r1 = Data.read_many files return=(..As_Merged_Table columns_to_keep=..In_All)
r1.should_be_a Table
within_table r1 <|
r1.column_names . should_equal ["Path", "b"]
r1.at "b" . to_vector . should_equal [2, 3]
r2 = Data.read_many files return=(..As_Merged_Table columns_to_keep=(..In_List ["a", "c"]))
r2.should_be_a Table
within_table r2 <|
r2.column_names . should_equal ["Path", "a", "c"]
r2.at "a" . to_vector . should_equal [1, Nothing]
r2.at "c" . to_vector . should_equal [Nothing, 4]
r3 = Data.read_many files return=(..As_Merged_Table match=..By_Position)
r3.should_be_a Table
within_table r3 <|
r3.column_names . should_equal ["Path", "a", "b"]
r3.at "a" . to_vector . should_equal [1, 3]
r3.at "b" . to_vector . should_equal [2, 4]
group_builder.specify "should fallback to Char if no common type can be found for primitive values" <|
with_temp_dir base_dir->
'{"a": 1}'.write (base_dir / "1_js_object.json")
'{"a": "str"}'.write (base_dir / "2_js_object.json")
files = Data.list base_dir . sort on=(.name)
r = Data.read_many files
r.should_be_a Table
r.column_names . should_equal ["Path", "a"]
r.at "a" . value_type . should_equal Value_Type.Char
r.at "a" . to_vector . should_equal ["1", "str"]
group_builder.specify "but should keep Mixed type if more complex types are found, like dictionary" <|
with_temp_dir base_dir->
'{"a": {}}'.write (base_dir / "1_js_object.json")
'{"a": []}'.write (base_dir / "2_js_object.json")
files = Data.list base_dir . sort on=(.name)
r = Data.read_many files
r.should_be_a Table
r.column_names . should_equal ["Path", "a"]
r.at "a" . value_type . should_equal Value_Type.Mixed
r.at "a" . to_vector . should_equal [JS_Object.from_pairs [], []]
group_builder.specify "has sane behaviour if no files were provided" <|
col = Column.from_vector "C" []
Data.read_many col return=..As_Vector . should_equal []
t = Data.read_many col
t.should_be_a Table
t.row_count . should_equal 0
t.column_names . should_equal ["C"]
t2 = Data.read_many []
t2.should_be_a Table
t2.row_count . should_equal 0
t2.column_names . should_equal ["Path"]
t3 = Data.read_many [] return=..With_New_Column
t3.should_be_a Table
t3.row_count . should_equal 0
t3.column_names . should_equal ["Path", "Value"]
group_builder.specify "should have sane behaviour if all files are weird" <|
with_temp_dir base_dir->
'{}'.write (base_dir / "1_js_object.json")
'[{}, {}]'.write (base_dir / "2_js_array.json")
files = Data.list base_dir . sort on=(.name)
r = Data.read_many files
r.should_be_a Table
Problems.expect_warning Illegal_Argument r
within_table r <|
r.column_names . should_equal ["Path", "Value"]
empty = JS_Object.from_pairs []
r.at "Value" . to_vector . should_equal [empty, empty, empty]
with_temp_dir base_dir->
'[]'.write (base_dir / "1_empty_array.json")
'[]'.write (base_dir / "2_empty_array.json")
files = Data.list base_dir . sort on=(.name)
r1 = Data.read_many files return=..With_New_Column
r1.should_be_a Table
r1.row_count . should_equal 2
r1.column_names . should_equal ["Path", "Value"]
Problems.assume_no_problems r1
r2 = Data.read_many files
r2.should_be_a Table
r2.row_count . should_equal 0
r2.column_names . should_equal ["Path"]
Problems.expect_only_warning No_Rows r2
group_builder.specify "should rename duplicated columns, keeping columns from the input unchanged" <|
tmp_file = enso_project.data / "transient" / "table.csv"
(Table.new [["Path", [1]], ["Col", [2]]]).write tmp_file on_existing_file=..Overwrite . should_succeed
Panic.with_finalizer tmp_file.delete <|
col = Column.from_vector "Col" [tmp_file.path]
r = Data.read_many col return=..As_Merged_Table
r.column_names . should_equal ["Col", "Path", "Col 1"]
r.at "Col" . to_vector . should_equal [tmp_file.path]
r.at "Path" . to_vector . should_equal [1]
r.at "Col 1" . to_vector . should_equal [2]
table = Table.new [["Path", [tmp_file.path]], ["Col", ["X"]], ["Value", ["Y"]]]
r2 = Data.read_many table return=..As_Merged_Table
r2.column_names . should_equal ["Path", "Col", "Value", "Path 1", "Col 1"]
r2.at "Path" . to_vector . should_equal [tmp_file.path]
r2.at "Col" . to_vector . should_equal ["X"]
r2.at "Value" . to_vector . should_equal ["Y"]
r2.at "Path 1" . to_vector . should_equal [1]
r2.at "Col 1" . to_vector . should_equal [2]
r3 = Data.read_many table return=..With_New_Column
r3.column_names . should_equal ["Path", "Col", "Value", "Value 1"]
r3.at "Path" . to_vector . should_equal [tmp_file.path]
r3.at "Col" . to_vector . should_equal ["X"]
r3.at "Value" . to_vector . should_equal ["Y"]
r3.at "Value 1" . first . should_be_a Table
private with_temp_dir callback =
base_dir = enso_project.data / "transient" / "read_many_test"
base_dir.delete_if_exists recursive=True
base_dir.create_directory . should_succeed
Panic.with_finalizer (base_dir.delete recursive=True) (callback base_dir)

View File

@ -75,12 +75,12 @@ add_specs suite_builder =
suite_builder.group "from_objects with JSON (single values)" group_builder->
group_builder.specify "Generates a single-row table from a JSON object" <|
expected = Table.new [["Key", ["first", "last", "age"]], ["Value", ["Mary", "Smith", 23]]]
expected = Table.new [["first", ["Mary"]], ["last", ["Smith"]], ["age", [23]]]
Table.from_objects (data.uniform_json.at 0) . should_equal expected
group_builder.specify "works fine even if requested fields are duplicated" <|
expected = Table.new [["Key", ["first", "last", "age"]], ["Value", ["Mary", "Smith", 23]]]
Table.from_objects (data.uniform_json.at 0) ["Key", "Value", "Key", "Key"] . should_equal expected
expected = Table.new [["first", ["Mary"]], ["last", ["Smith"]]]
Table.from_objects (data.uniform_json.at 0) ["first", "last", "first", "first"] . should_equal expected
suite_builder.group "from_objects with uniform JSON vector" group_builder->
group_builder.specify "Generates a table from a vector of JSON objects" <|
@ -472,4 +472,3 @@ main filter=Nothing =
suite = Test.build suite_builder->
add_specs suite_builder
suite.run_with_filter filter

View File

@ -30,4 +30,3 @@ main filter=Nothing =
Visualization_Spec.add_specs suite_builder
Widgets_Spec.add_specs suite_builder
suite.run_with_filter filter

View File

@ -12,7 +12,7 @@ main filter=Nothing =
add_specs suite_builder =
suite_builder.group "Can run each helper" group_builder->
suite_builder.group "Widget_Helpers run" group_builder->
group_builder.specify "make_regex_text_widget" <|
w = make_regex_text_widget
j = (Widgets.get_widget_json w) . to_text

View File

@ -6,12 +6,14 @@ import project.Widgets.Database_Widgets_Spec
import project.Widgets.File_Format_Widgets_Spec
import project.Widgets.Table_Widgets_Spec
import project.Widgets.Text_Widgets_Spec
import project.Widgets.Widget_Helpers_Spec
add_specs suite_builder =
Table_Widgets_Spec.add_specs suite_builder
Database_Widgets_Spec.add_specs suite_builder
File_Format_Widgets_Spec.add_specs suite_builder
Text_Widgets_Spec.add_specs suite_builder
Widget_Helpers_Spec.add_specs suite_builder
main filter=Nothing =
suite = Test.build suite_builder->