mirror of
https://github.com/enso-org/enso.git
synced 2024-12-19 04:01:46 +03:00
parent
1476c47fa9
commit
dc55b4e9ce
@ -29,7 +29,7 @@ import project.System.File.Generic.Writable_File.Writable_File
|
||||
from project.Data.Boolean import Boolean, False, True
|
||||
from project.Meta.Enso_Project import enso_project
|
||||
from project.Metadata.Choice import Option
|
||||
from project.Metadata.Widget import Folder_Browse, Text_Input, Vector_Editor
|
||||
from project.Metadata.Widget import File_Browse, Folder_Browse, Text_Input, Vector_Editor
|
||||
from project.System.File_Format import Auto_Detect, File_Format
|
||||
|
||||
## ALIAS load, open
|
||||
@ -138,14 +138,15 @@ read path=(Missing_Argument.throw "path") format=Auto_Detect (on_problems : Prob
|
||||
|
||||
files = Data.list name_filter="*.csv"
|
||||
example_csv_dir_to_table = Data.read_many files
|
||||
@paths (Vector_Editor item_editor=Text_Input item_default='""' display=..Always)
|
||||
@paths (Vector_Editor item_editor=File_Browse item_default='""' display=..Always)
|
||||
@format File_Format.default_widget
|
||||
@return Return_As.default_widget
|
||||
read_many : Many_Files_List -> File_Format -> Return_As -> Problem_Behavior -> Any ! File_Error
|
||||
read_many (paths : Many_Files_List = Missing_Argument.throw "paths") format=Auto_Detect return=..Vector (on_problems : Problem_Behavior = ..Report_Warning) =
|
||||
read_many (paths : Many_Files_List = Missing_Argument.throw "paths") format=Auto_Detect return=..As_Merged_Table (on_problems : Problem_Behavior = ..Report_Warning) =
|
||||
return_as = Return_As.resolve return
|
||||
loaded_objects = paths.paths_to_load.map on_problems=on_problems path->
|
||||
Data.read path format on_problems
|
||||
return_as.make_return paths loaded_objects
|
||||
return_as.make_return paths loaded_objects on_problems
|
||||
|
||||
## ALIAS load text, open text
|
||||
GROUP Input
|
||||
|
@ -5,6 +5,7 @@ import project.Data.Vector.Vector
|
||||
import project.Error.Error
|
||||
import project.Errors.Common.Type_Error
|
||||
import project.Errors.Illegal_Argument.Illegal_Argument
|
||||
import project.Errors.Problem_Behavior.Problem_Behavior
|
||||
import project.Function.Function
|
||||
import project.Metadata.Display
|
||||
import project.Metadata.Widget
|
||||
@ -32,16 +33,24 @@ type Return_As
|
||||
to_display_text self -> Text = self.underlying.to_display_text
|
||||
|
||||
## PRIVATE
|
||||
make_return self (input : Many_Files_List) (objects : Vector Any) =
|
||||
self.underlying.make_return input objects
|
||||
make_return self (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) =
|
||||
self.underlying.make_return input objects on_problems
|
||||
|
||||
## PRIVATE
|
||||
Resolve an unresolved constructor to the actual type.
|
||||
private resolve value = case value of
|
||||
_ : Function ->
|
||||
# Result when we fail to resolve an autoscoped constructor.
|
||||
symbol_not_found =
|
||||
is_the_default = (Return_As_Table_Mock.resolve value).is_nothing.not
|
||||
case is_the_default of
|
||||
True ->
|
||||
Error.throw (Illegal_Argument.Error "The method returns a merged Table by default, but Standard.Table is not imported. Add an import, or use `As_Vector`.")
|
||||
False ->
|
||||
Error.throw (Illegal_Argument.Error "Expected Return_As, but got a function.")
|
||||
types = _get_known_return_classes
|
||||
try_next idx =
|
||||
if idx >= types.length then Error.throw (Illegal_Argument.Error "Expected Return_As, but got a function.") else
|
||||
if idx >= types.length then symbol_not_found else
|
||||
resolved = (types.at idx).resolve value
|
||||
if resolved.is_nothing then @Tail_Call try_next (idx + 1) else resolved
|
||||
try_next 0
|
||||
@ -50,28 +59,37 @@ type Return_As
|
||||
|
||||
## PRIVATE
|
||||
default_widget display:Display=..When_Modified -> Widget =
|
||||
options = _get_known_return_classes.map .get_dropdown_options
|
||||
options = _get_known_return_classes.flat_map .get_dropdown_options
|
||||
Single_Choice display=display values=options
|
||||
|
||||
## PRIVATE
|
||||
type Return_As_Base
|
||||
## Will return a Vector of objects that were loaded.
|
||||
The order of the returned Vector is the same as in the input.
|
||||
Vector
|
||||
As_Vector
|
||||
|
||||
## PRIVATE
|
||||
get_dropdown_options : Vector Option
|
||||
get_dropdown_options = [Option "Vector" "..Vector"]
|
||||
get_dropdown_options = [Option "As Vector" "..As_Vector"]
|
||||
|
||||
## PRIVATE
|
||||
resolve value =
|
||||
Panic.catch Type_Error (value:Return_As_Base) _->Nothing
|
||||
|
||||
## PRIVATE
|
||||
make_return self (input : Many_Files_List) (objects : Vector Any) =
|
||||
_ = input
|
||||
make_return self (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) =
|
||||
_ = [input, on_problems]
|
||||
objects
|
||||
|
||||
## PRIVATE
|
||||
Return_As.from (that : Return_As_Base) =
|
||||
Return_As.Instance that
|
||||
|
||||
## PRIVATE
|
||||
A mock type used only to check the autoscoped `As_Merged_Table` constructor.
|
||||
TODO If we get support for `Unresolved_Constructor` in `Meta`, this type may be removed.
|
||||
type Return_As_Table_Mock
|
||||
private As_Merged_Table columns_to_keep=Nothing match=Nothing
|
||||
|
||||
private resolve value =
|
||||
Panic.catch Type_Error (value:Return_As_Table_Mock) _->Nothing
|
||||
|
@ -38,7 +38,7 @@ import Standard.Table.Internal.Table_Ref.Table_Ref
|
||||
import Standard.Table.Internal.Unique_Name_Strategy.Unique_Name_Strategy
|
||||
import Standard.Table.Internal.Value_Type_Helpers
|
||||
import Standard.Table.Internal.Widget_Helpers
|
||||
import Standard.Table.Match_Columns as Match_Columns_Helpers
|
||||
import Standard.Table.Internal.Match_Columns_Helpers
|
||||
import Standard.Table.Row.Row
|
||||
import Standard.Table.Rows_To_Read.Rows_To_Read
|
||||
import Standard.Table.Value_Type.By_Type
|
||||
@ -2461,18 +2461,21 @@ type DB_Table
|
||||
true, a single row is output with `Nothing` for the aggregates column; if
|
||||
false, no row is output at all.
|
||||
|
||||
The following aggregate values are supported:
|
||||
The following values are considered sequence-like:
|
||||
- `Array`
|
||||
- `Vector`
|
||||
- `List`
|
||||
- `Range`
|
||||
- `Date_Range`
|
||||
- `Pair`
|
||||
- `Table`
|
||||
- `Table` (interpreted as a sequence of `Row`s)
|
||||
- `Column`
|
||||
|
||||
Any other values are treated as non-aggregate values, and their rows are kept
|
||||
unchanged.
|
||||
Some aggregates, like dictionaries (`Dictionary`, `JS_Object`) will
|
||||
expand into two columns - one for key and one for the value.
|
||||
|
||||
If a value is not an aggregate, or has no defined way of being expanded,
|
||||
it is left as a single row containing that value unchanged.
|
||||
|
||||
In in-memory tables, it is permitted to mix values of different types.
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
from Standard.Base import Text, Vector
|
||||
from Standard.Base.Metadata import Display, make_single_choice, Widget
|
||||
import Standard.Base.Errors.Common.Missing_Argument
|
||||
|
||||
## Specifies which columns to keep in a union operation.
|
||||
type Columns_To_Keep
|
||||
@ -19,7 +20,7 @@ type Columns_To_Keep
|
||||
|
||||
If a table does not have a column that is specified in the list, it is
|
||||
padded with `Nothing` and a problem is reported.
|
||||
In_List (column_names : Vector Text)
|
||||
In_List (column_names : Vector Text = Missing_Argument.throw "column_names")
|
||||
|
||||
## PRIVATE
|
||||
Same as `In_Any`, but it will warn about columns that are not present in
|
||||
|
@ -16,56 +16,74 @@ import project.Table.Table
|
||||
type Convertible_To_Rows
|
||||
## PRIVATE
|
||||
Arguments:
|
||||
- original_value: The original value from before conversion.
|
||||
- length: The number of rows in the table.
|
||||
- getter: Get the value for a specified row.
|
||||
- columns: The names for the columns when object is expanded.
|
||||
These will be added to the name of the input column.
|
||||
Value length:Integer (getter : Integer->Any) (columns:Vector=["Value"])
|
||||
- is_sequence: Whether the object is a sequence-like aggregate. This will
|
||||
affect the `expand_sequences_only` option of `expand_to_rows` method.
|
||||
Value original_value:Any length:Integer (getter : Integer->Any) (columns:Vector=["Value"]) (is_sequence:Boolean=True)
|
||||
|
||||
## PRIVATE
|
||||
Return the iterator values as a `Vector`.
|
||||
to_vector : Vector Any
|
||||
to_vector self = 0.up_to self.length . map self.getter
|
||||
|
||||
## PRIVATE
|
||||
Aligns this conversion depending on if we want to only expand sequences
|
||||
or all kinds of objects.
|
||||
|
||||
If `sequence_only` is `False` or the conversion reports `is_sequence=True`, no changes are made.
|
||||
However, `sequence_only` is `True` and the conversion reports `is_sequence=False`,
|
||||
the conversion is replaced with a single row - same as the `Any` fallback.
|
||||
align_sequence_only self sequence_only:Boolean -> Convertible_To_Rows =
|
||||
if sequence_only.not then self else
|
||||
if self.is_sequence then self else
|
||||
_conversion_from_value_as_single_row self.original_value
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Table =
|
||||
rows = that.rows
|
||||
Convertible_To_Rows.from rows
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Column = Convertible_To_Rows.Value that.length (that.get _)
|
||||
Convertible_To_Rows.from that:Column = Convertible_To_Rows.Value that that.length (that.get _)
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Vector = Convertible_To_Rows.Value that.length (that.get _)
|
||||
Convertible_To_Rows.from that:Vector = Convertible_To_Rows.Value that that.length (that.get _)
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Array = Convertible_To_Rows.Value that.length (that.get _)
|
||||
Convertible_To_Rows.from that:Array = Convertible_To_Rows.Value that that.length (that.get _)
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:List = Convertible_To_Rows.from that.to_vector
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Range = Convertible_To_Rows.Value that.length (that.get _)
|
||||
Convertible_To_Rows.from that:Range = Convertible_To_Rows.Value that that.length (that.get _)
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Pair = Convertible_To_Rows.Value that.length (that.get _)
|
||||
Convertible_To_Rows.from that:Pair = Convertible_To_Rows.Value that that.length (that.get _)
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Date_Range = Convertible_To_Rows.Value that.length (that.get _)
|
||||
Convertible_To_Rows.from that:Date_Range = Convertible_To_Rows.Value that that.length (that.get _)
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:Dictionary =
|
||||
vals = that.to_vector.map p-> Key_Value.Pair p.first p.second
|
||||
Convertible_To_Rows.Value vals.length (vals.get _) ["Key", "Value"]
|
||||
Convertible_To_Rows.Value that vals.length (vals.get _) ["Key", "Value"] is_sequence=False
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from that:JS_Object =
|
||||
vals = that.map_with_key k->v-> Key_Value.Pair k v
|
||||
Convertible_To_Rows.Value vals.length (vals.get _) ["Key", "Value"]
|
||||
Convertible_To_Rows.Value that vals.length (vals.get _) ["Key", "Value"] is_sequence=False
|
||||
|
||||
## PRIVATE
|
||||
Convertible_To_Rows.from (that:Any) =
|
||||
Convertible_To_Rows.Value 1 (n-> if n==0 then that else Nothing)
|
||||
_conversion_from_value_as_single_row that
|
||||
|
||||
private _conversion_from_value_as_single_row value =
|
||||
Convertible_To_Rows.Value value 1 (n-> if n==0 then value else Nothing) is_sequence=False
|
||||
|
||||
## PRIVATE
|
||||
type Key_Value
|
||||
|
@ -509,7 +509,7 @@ type No_Common_Type
|
||||
Create a human-readable version of the error.
|
||||
to_display_text : Text
|
||||
to_display_text self =
|
||||
types = self.types.map .to_display_text . join ", "
|
||||
types = self.types.distinct.map .to_display_text . join ", "
|
||||
prefix = "No common type was found for types: "+types
|
||||
location = case self.related_column_name of
|
||||
column_name : Text -> " when unifying column ["+column_name+"]"
|
||||
@ -517,8 +517,7 @@ type No_Common_Type
|
||||
suffix_type = case self of
|
||||
No_Common_Type.Error _ _ -> "."
|
||||
No_Common_Type.Warning_Convert_To_Text _ _ -> ", so the values were converted to text."
|
||||
suffix_mixed = " If you want to have mixed types instead, please cast one of the columns to `Mixed` beforehand."
|
||||
prefix + location + suffix_type + suffix_mixed
|
||||
prefix + location + suffix_type
|
||||
|
||||
## PRIVATE
|
||||
to_text self -> Text =
|
||||
@ -889,3 +888,12 @@ type Mixing_Date_Time_Types
|
||||
"Mixing Date and Date_Time values"+location+": the Date values have been automatically converted to Date_Time by adding a time of 00:00 in the default time-zone."
|
||||
Mixing_Date_Time_Types.Implicit_Time_Zone _ ->
|
||||
"Mixing Date_Time values with and without timezone"+location+". A default timezone has been assumed where it was missing."
|
||||
|
||||
## Indicates that a table with no rows has been returned.
|
||||
type No_Rows
|
||||
## PRIVATE
|
||||
The message should be used to provide additional context.
|
||||
private Warning message:Text
|
||||
|
||||
## PRIVATE
|
||||
to_display_text self -> Text = self.message
|
||||
|
@ -1,4 +1,6 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Vector.No_Wrap
|
||||
import Standard.Base.Errors.Common.Type_Error
|
||||
import Standard.Base.Errors.Deprecated.Deprecated
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
@ -13,18 +15,23 @@ from Standard.Base.Data.Filter_Condition import sql_like_to_regex
|
||||
from Standard.Base.Metadata.Choice import Option
|
||||
from Standard.Base.Metadata.Widget import Multiple_Choice, Single_Choice
|
||||
|
||||
import project.Column.Column
|
||||
import project.Columns_To_Keep.Columns_To_Keep
|
||||
import project.Excel.Excel_Range.Excel_Range
|
||||
import project.Headers.Headers
|
||||
import project.Internal.Excel_Reader
|
||||
import project.Internal.Excel_Section.Excel_Section
|
||||
import project.Internal.Read_Many_As_Merged_Table_Strategy.Read_Many_As_Merged_Table_Strategy
|
||||
import project.Internal.Read_Many_As_Merged_Table_Strategy.Read_Many_As_Table_Result
|
||||
import project.Internal.Java_Problems
|
||||
import project.Internal.Problem_Builder.Problem_Builder
|
||||
import project.Match_Columns.Match_Columns
|
||||
import project.Return_As_Table.Return_As_Table
|
||||
import project.Rows_To_Read.Rows_To_Read
|
||||
import project.Table.Table
|
||||
from project.Errors import Empty_Sheet
|
||||
from project.Errors import Empty_Sheet, No_Rows
|
||||
from project.Internal.Excel_Reader import handle_invalid_location
|
||||
from project.Internal.Table_Helpers import duplicate_rows
|
||||
|
||||
polyglot java import java.io.File as Java_File
|
||||
polyglot java import org.apache.poi.ss.usermodel.Workbook
|
||||
@ -312,6 +319,11 @@ type Excel_Workbook
|
||||
_ -> ""
|
||||
"Excel_Workbook"+associated_regular_file
|
||||
|
||||
## PRIVATE
|
||||
Returns a short human-readable text representation of the workbook.
|
||||
to_display_text : Text
|
||||
to_display_text self = self.to_text
|
||||
|
||||
## PRIVATE
|
||||
Provides a JS object representation for use in visualizations.
|
||||
to_js_object : JS_Object
|
||||
@ -342,30 +354,29 @@ type Excel_Workbook
|
||||
problem.
|
||||
- headers: whether to use the first row as headers (default is
|
||||
`Detect_Headers`) for each sheet.
|
||||
- return: how to return the tables. Defaults to `Merged_Table` meaning
|
||||
- return: how to return the tables. Defaults to `As_Merged_Table` meaning
|
||||
the tables are merged into a single table.
|
||||
- on_problems: how to handle problems during reading. Defaults to
|
||||
`Report_Warning`.
|
||||
@sheet_names build_sheet_selector
|
||||
read_many : Vector Text -> Headers -> Return_As -> Problem_Behavior -> Table
|
||||
read_many self sheet_names:Vector=self.sheet_names (headers:Headers=..Detect_Headers) (return:Return_As=..Merged_Table) (on_problems:Problem_Behavior=..Report_Warning) =
|
||||
read_many : Vector Text -> Headers -> Return_As_Table -> Problem_Behavior -> Table
|
||||
read_many self sheet_names:Vector=self.sheet_names (headers:Headers=..Detect_Headers) (return=..As_Merged_Table) (on_problems:Problem_Behavior=..Report_Warning) =
|
||||
resolved_return = _resolve_return_as_compatibility return
|
||||
if sheet_names.is_empty then Error.throw (Illegal_Argument.Error "No sheets selected.") else
|
||||
tables = sheet_names.map on_problems=on_problems address-> self.read address headers on_problems=on_problems
|
||||
case return of
|
||||
Return_As.Table_Of_Tables -> Table.new [["Sheet Name", sheet_names], ["Table", tables]]
|
||||
Return_As.Merged_Table columns_to_keep match ->
|
||||
first_tbl = tables.find t-> t != Nothing
|
||||
if first_tbl == Nothing then Error.throw (Illegal_Argument.Error "No valid sheets found.") else
|
||||
unique = first_tbl.column_naming_helper.create_unique_name_strategy
|
||||
tables.each tbl-> if tbl != Nothing then unique.mark_used tbl.column_names
|
||||
new_column_name = unique.make_unique "Sheet Name"
|
||||
|
||||
with_names = tables.zip sheet_names tbl->name-> if tbl == Nothing then Nothing else tbl.set name new_column_name . reorder_columns [new_column_name]
|
||||
result = Table.from_union (with_names.filter Filter_Condition.Not_Nothing) columns_to_keep=columns_to_keep match_columns=match
|
||||
|
||||
problem_builder = Problem_Builder.new
|
||||
problem_builder.report_unique_name_strategy unique
|
||||
problem_builder.attach_problems_after on_problems result
|
||||
case resolved_return of
|
||||
Return_As_Table.With_New_Column ->
|
||||
tables_or_nothing = sheet_names.map on_problems=on_problems address->
|
||||
self.read address headers on_problems=on_problems
|
||||
Table.new [["Sheet Name", sheet_names], ["Table", tables_or_nothing]]
|
||||
Return_As_Table.As_Merged_Table columns_to_keep match ->
|
||||
names_and_tables = _read_sheets self sheet_names headers on_problems
|
||||
only_valid = names_and_tables.filter p-> p.second.is_nothing.not
|
||||
no_valid_sheets = only_valid.is_empty
|
||||
if no_valid_sheets then Error.throw (Illegal_Argument.Error "No valid sheets found.") else
|
||||
valid_names = only_valid.map .first
|
||||
valid_tables = only_valid.map .second
|
||||
unified_tables = Table.from_union valid_tables columns_to_keep=columns_to_keep match_columns=match on_problems=on_problems
|
||||
(_sheet_names_table valid_names valid_tables).zip unified_tables right_prefix="" on_problems=on_problems
|
||||
|
||||
## PRIVATE
|
||||
close_connection c = c.close
|
||||
@ -375,17 +386,80 @@ build_sheet_selector workbook:Excel_Workbook display:Display=Display.Always -> W
|
||||
names = workbook.sheet_names.map n-> Option n n.pretty
|
||||
Multiple_Choice display=display values=names
|
||||
|
||||
## How to merge sheets into a single table.
|
||||
## PRIVATE
|
||||
DEPRECATED
|
||||
|
||||
How to merge sheets into a single table.
|
||||
This type is deprecated and has been superseded by `Return_As_Table`.
|
||||
It is kept only for compatibility reasons.
|
||||
type Return_As
|
||||
## Each sheet is returned as a row.
|
||||
## DEPRECATED
|
||||
Each sheet is returned as a row.
|
||||
|
||||
This option is deprecated, because it has been renamed. Please use the
|
||||
equivalent `With_New_Column` instead.
|
||||
Table_Of_Tables
|
||||
|
||||
## All sheets are merged into a single table. A union operation is performed.
|
||||
## DEPRECATED
|
||||
All sheets are merged into a single table. A union operation is performed.
|
||||
|
||||
This option is deprecated, because it has been renamed. Please use the
|
||||
equivalent `As_Merged_Table` instead.
|
||||
Merged_Table (columns_to_keep : Columns_To_Keep = Columns_To_Keep.In_Any) (match : Match_Columns = Match_Columns.By_Name)
|
||||
|
||||
## PRIVATE
|
||||
Creates a query widget for the `read` method.
|
||||
## A helper method that ensures that we can take the new `Return_As_Table` type
|
||||
as well as the deprecated `Return_As` type, together with autoscoping.
|
||||
With autoscoping, the `As_Merged_Table` constructor is ambiguous, so plain
|
||||
autoscoping would not work and we do some workarounds to make it work.
|
||||
In case of the ambiguity, the new-style `Return_As_Table.As_Merged_Table`
|
||||
variant is preferred.
|
||||
private _resolve_return_as_compatibility return =
|
||||
Panic.catch Type_Error (return:Return_As_Table) (_->Nothing) . if_nothing <|
|
||||
old_style = Panic.catch Type_Error (return:Return_As) (_->Nothing)
|
||||
if old_style.is_nothing then Error.throw (Illegal_Argument.Error "Valid values for `return` are: `..With_New_Column` or `..As_Merged_Table`. Instead, got `"+return.to_display_text+"`.") else
|
||||
case old_style of
|
||||
Return_As.Table_Of_Tables ->
|
||||
Warning.attach (Deprecated.Warning "Standard.Table.Excel.Excel_Workbook.Return_As" "Table_Of_Tables" "Deprecated: use `..With_New_Column` instead.") <|
|
||||
Return_As_Table.With_New_Column
|
||||
Return_As.Merged_Table columns_to_keep match ->
|
||||
Warning.attach (Deprecated.Warning "Standard.Table.Excel.Excel_Workbook.Return_As" "Merged_Table" "Deprecated: use `..As_Merged_Table` instead.") <|
|
||||
Return_As_Table.As_Merged_Table columns_to_keep match
|
||||
|
||||
## Creates a query widget for the `read` method.
|
||||
private _query_widget wb:Excel_Workbook display:Display=..Always -> Widget =
|
||||
default_address = "'" + wb.sheet_names.first.replace "'" "''" + "'!A1:E5"
|
||||
options = [Option "<By Index>" "1", Option "<Range>" "Excel_Range.from_address "+default_address.pretty] + (wb.tables.at "Name" . to_vector . map t-> Option t t.pretty)
|
||||
Single_Choice display=display values=options
|
||||
|
||||
private _read_sheets workbook:Excel_Workbook (sheet_names : Vector Text) headers:Headers on_problems:Problem_Behavior -> Vector (Pair Text (Table | Nothing)) =
|
||||
sheet_names.map on_problems=No_Wrap.Value address->
|
||||
table = workbook.read address headers on_problems=on_problems
|
||||
r = Pair.new address table
|
||||
# If in Report_Error mode, we forward the original error without wrapping, otherwise we wrap any warnings.
|
||||
if on_problems == Problem_Behavior.Report_Error then r else
|
||||
r.catch Any error->
|
||||
on_problems.attach_problem_after (Pair.new address Nothing) <|
|
||||
(No_Rows.Warning "The sheet "+address.to_display_text+" failed to load, so it is not included in the `As_Merged_Table` result of `read_many`. The error was: "+error.to_display_text)
|
||||
|
||||
private _sheet_names_table (sheet_names : Vector Text) (loaded_tables : Vector Table) -> Table =
|
||||
table = Column.from_vector "Sheet Name" sheet_names . to_table
|
||||
counts = loaded_tables.map t-> t.row_count
|
||||
duplicate_rows table counts
|
||||
|
||||
## PRIVATE
|
||||
Loads all sheets in the Excel workbook and merges them into a single table.
|
||||
Read_Many_As_Merged_Table_Strategy.from (that:Excel_Workbook) =
|
||||
callback path columns_to_keep match_columns on_problems =
|
||||
tables = _read_sheets that that.sheet_names ..Detect_Headers on_problems
|
||||
only_valid = tables.filter p-> p.second.is_nothing.not
|
||||
case only_valid.is_empty of
|
||||
True ->
|
||||
on_problems.attach_problem_after Read_Many_As_Table_Result.No_Data <|
|
||||
(No_Rows.Warning "The workbook "+path.to_display_text+" failed to load any sheets, so it is not included in the `As_Merged_Table` result of `read_many`.")
|
||||
False ->
|
||||
sheet_names = only_valid.map .first
|
||||
valid_tables = only_valid.map .second
|
||||
metadata = _sheet_names_table sheet_names valid_tables
|
||||
data = Table.from_union valid_tables columns_to_keep match_columns on_problems
|
||||
Read_Many_As_Table_Result.Table metadata=metadata data=data
|
||||
Read_Many_As_Merged_Table_Strategy.Value callback
|
||||
|
@ -95,9 +95,9 @@ XML_Element.to_table self =
|
||||
headers = Examples.simple_table_json_headers
|
||||
Table.from_objects json headers
|
||||
@fields (Widget.Vector_Editor item_editor=Widget.Text_Input item_default='""')
|
||||
Table.from_objects : Any -> Vector | Nothing -> Table
|
||||
Table.from_objects : Any -> Vector | Nothing -> Boolean -> Table
|
||||
Table.from_objects value (fields : Vector | Nothing = Nothing) =
|
||||
Expand_Objects_Helpers.create_table_from_objects value fields
|
||||
Expand_Objects_Helpers.create_table_from_objects value fields treat_dictionary_as_sequence=False
|
||||
|
||||
## GROUP Standard.Base.Conversions
|
||||
ICON convert
|
||||
|
@ -1,3 +1,5 @@
|
||||
private
|
||||
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Vector.Builder
|
||||
import Standard.Base.Errors.Common.Index_Out_Of_Bounds
|
||||
@ -79,20 +81,27 @@ expand_column (table : Table) (column : Text | Integer) (fields : (Vector Text)
|
||||
table = Table.new [["aaa", [1, 2]], ["bbb", [[30, 31], [40, 41]]]]
|
||||
# => Table.new [["aaa", [1, 1, 2, 2]], ["bbb", [30, 31, 40, 41]]]
|
||||
expand_to_rows : Table -> Text | Integer -> Boolean -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
|
||||
expand_to_rows table column:(Text | Integer) at_least_one_row=False = if column.is_a Integer then expand_to_rows table (table.at column).name at_least_one_row else
|
||||
expand_to_rows table column:(Text | Integer) at_least_one_row=False sequences_only=False = if column.is_a Integer then expand_to_rows table (table.at column).name at_least_one_row else
|
||||
row_expander : Any -> Vector
|
||||
row_expander value:Convertible_To_Rows = value.to_vector
|
||||
row_expander value:Convertible_To_Rows =
|
||||
value
|
||||
. align_sequence_only sequences_only
|
||||
. to_vector
|
||||
|
||||
column_names : Any -> Vector
|
||||
column_names value:Convertible_To_Rows = value.columns.map name-> if name=="Value" then column else column+" "+name
|
||||
column_names value:Convertible_To_Rows =
|
||||
value
|
||||
. align_sequence_only sequences_only
|
||||
. columns
|
||||
. map name-> if name=="Value" then column else column+" "+name
|
||||
|
||||
Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
|
||||
builder size = make_inferred_builder size java_problem_aggregator
|
||||
Fan_Out.fan_out_to_rows table column row_expander column_names at_least_one_row column_builder=builder
|
||||
|
||||
## PRIVATE
|
||||
create_table_from_objects : Convertible_To_Rows -> (Vector Text | Nothing) -> Table
|
||||
create_table_from_objects (value : Convertible_To_Rows) (fields : Vector | Nothing) = if fields.is_nothing.not && fields.is_empty then Error.throw (Illegal_Argument.Error "The fields parameter cannot be empty.") else
|
||||
create_table_from_objects (base_value : Convertible_To_Rows) (fields : Vector | Nothing) (treat_dictionary_as_sequence : Boolean = False) -> Table = if fields.is_nothing.not && fields.is_empty then Error.throw (Illegal_Argument.Error "The fields parameter cannot be empty.") else
|
||||
expand_sequences_only = treat_dictionary_as_sequence.not
|
||||
value = base_value.align_sequence_only expand_sequences_only
|
||||
len = value.length
|
||||
|
||||
Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
|
||||
|
@ -0,0 +1,149 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Runtime.State
|
||||
from Standard.Base.Runtime import assert
|
||||
|
||||
import project.Columns_To_Keep.Columns_To_Keep
|
||||
import project.Constants.Report_Unmatched
|
||||
import project.Match_Columns.Match_Columns
|
||||
from project.Errors import Column_Count_Mismatch, No_Output_Columns, Unmatched_Columns
|
||||
|
||||
## PRIVATE
|
||||
A helper that encapsulates the common backend-agnostic logic of matching
|
||||
columns in `Table.union`.
|
||||
|
||||
It matches columns according to the provided matching settings and returns a
|
||||
list of column sets to be merged.
|
||||
|
||||
Each column set consists of a name of the resulting column and a list of
|
||||
indices for columns in corresponding tables that will be merged to form this
|
||||
result column. The first column index corresponds to the first table in the
|
||||
input and so on. If no column corresponding to a given column set was matched
|
||||
in a particular table, its entry will be contain `Nothing` instead.
|
||||
|
||||
The column sets are returned in the order in which the corresponding result
|
||||
columns should appear in the resulting table.
|
||||
|
||||
The method assumes at least one table is provided in its input.
|
||||
match_columns tables matching_mode columns_to_keep problem_builder =
|
||||
assert tables.not_empty
|
||||
case matching_mode of
|
||||
Match_Columns.By_Name -> _match_columns_by_name tables columns_to_keep problem_builder
|
||||
Match_Columns.By_Position -> _match_columns_by_position tables columns_to_keep problem_builder
|
||||
|
||||
## PRIVATE
|
||||
private _match_columns_by_name tables columns_to_keep problem_builder = case columns_to_keep of
|
||||
Columns_To_Keep.In_List list -> if list.is_empty then Error.throw (Illegal_Argument.Error "The list of columns to keep cannot be empty.") else
|
||||
output_column_names = list.distinct
|
||||
column_counts = _find_column_counts tables
|
||||
all_tables_count = tables.length
|
||||
unmatched_column_names = output_column_names.filter name->
|
||||
column_counts.get name 0 < all_tables_count
|
||||
if unmatched_column_names.not_empty then
|
||||
problem_builder.report_other_warning (Unmatched_Columns.Error unmatched_column_names)
|
||||
_build_column_set_by_name tables output_column_names
|
||||
Columns_To_Keep.In_All ->
|
||||
column_counts = _find_column_counts tables
|
||||
# This will only include columns that were present in all tables.
|
||||
all_tables_count = tables.length
|
||||
common_column_names = tables.first.column_names.filter name->
|
||||
column_counts.at name == all_tables_count
|
||||
if common_column_names.is_empty then Error.throw (No_Output_Columns.Error "Unmatched columns are set to be dropped, but no common column names were found.") else
|
||||
dropped_column_names = tables.map .column_names
|
||||
. flatten
|
||||
. filter (name-> column_counts.at name < all_tables_count)
|
||||
. distinct
|
||||
if dropped_column_names.not_empty then
|
||||
problem_builder.report_other_warning (Unmatched_Columns.Error dropped_column_names)
|
||||
_build_column_set_by_name tables common_column_names
|
||||
_ ->
|
||||
output_column_names = _distinct_columns_in_appearance_order tables
|
||||
report_missing = case columns_to_keep of
|
||||
Columns_To_Keep.In_Any -> False
|
||||
Columns_To_Keep.In_Any_Warn_On_Missing -> True
|
||||
if report_missing then
|
||||
column_counts = _find_column_counts tables
|
||||
all_tables_count = tables.length
|
||||
## We iterate over output column names to get deterministic
|
||||
order of unmatched columns.
|
||||
unmatched_column_names = output_column_names.filter name->
|
||||
column_counts.get name 0 < all_tables_count
|
||||
if unmatched_column_names.not_empty then
|
||||
problem_builder.report_other_warning (Unmatched_Columns.Error unmatched_column_names)
|
||||
_build_column_set_by_name tables output_column_names
|
||||
|
||||
## Common logic for computing the final result of by-name matching.
|
||||
Once the set of output column names is determined, we compute the
|
||||
`Column_Set` by finding the corresponding column indices in each table (if found).
|
||||
private _build_column_set_by_name tables output_column_names =
|
||||
output_column_names.map name->
|
||||
column_indices = tables.map table->
|
||||
# TODO this gets O(N^2), we should optimize
|
||||
table.column_names.index_of name
|
||||
Column_Set.Value name column_indices
|
||||
|
||||
private _match_columns_by_position tables columns_to_keep problem_builder = case columns_to_keep of
|
||||
Columns_To_Keep.In_List _ ->
|
||||
Error.throw (Illegal_Argument.Error "The In_List option for `columns_to_keep` cannot be used together with `By_Position` matching.")
|
||||
_ ->
|
||||
column_counts = tables.map table-> table.columns.length
|
||||
minmax = column_counts.compute_bulk [Statistic.Minimum, Statistic.Maximum]
|
||||
min = minmax.first
|
||||
max = minmax.second
|
||||
columns_to_take = case columns_to_keep of
|
||||
Columns_To_Keep.In_All -> min
|
||||
Columns_To_Keep.In_Any -> max
|
||||
Columns_To_Keep.In_Any_Warn_On_Missing -> max
|
||||
has_unmatched_columns = min != max
|
||||
if has_unmatched_columns then
|
||||
should_report_unmatched = case columns_to_keep of
|
||||
Columns_To_Keep.In_All -> True
|
||||
Columns_To_Keep.In_Any -> False
|
||||
Columns_To_Keep.In_Any_Warn_On_Missing -> True
|
||||
# TODO should we rephrase the wording of the error? should it depend on In_Any_Warn_On_Missing vs In_All?
|
||||
if should_report_unmatched then
|
||||
problem_builder.report_other_warning (Column_Count_Mismatch.Error max min)
|
||||
|
||||
name_source = case columns_to_keep of
|
||||
Columns_To_Keep.In_All -> tables.first
|
||||
_ ->
|
||||
# We find the first table that has all the columns present.
|
||||
tables.find table-> table.columns.length == columns_to_take
|
||||
|
||||
column_sets = Vector.new columns_to_take i->
|
||||
name = name_source.at i . name
|
||||
column_ids = tables.map table->
|
||||
column_count = table.columns.length
|
||||
if i >= column_count then Nothing else i
|
||||
Column_Set.Value name column_ids
|
||||
column_sets
|
||||
|
||||
## PRIVATE
|
||||
type Column_Set
|
||||
## PRIVATE
|
||||
Value (name : Text) (column_indices : Vector Integer)
|
||||
|
||||
private resolve_columns self (all_tables : Vector) = self.column_indices.zip all_tables i-> parent_table->
|
||||
case i of
|
||||
Nothing -> Nothing
|
||||
_ : Integer -> parent_table.at i
|
||||
|
||||
## Returns a map indicating in how many tables did a column with a given name appear.
|
||||
private _find_column_counts tables =
|
||||
tables.fold Dictionary.empty current->table->
|
||||
table.columns.fold current counts-> column->
|
||||
name=column.name
|
||||
new_count = counts.get name 0 + 1
|
||||
counts.insert name new_count
|
||||
|
||||
## PRIVATE
|
||||
Returns a list of distinct column names, in the order of first appearance,
|
||||
starting from the first table.
|
||||
private _distinct_columns_in_appearance_order tables =
|
||||
Vector.build names_builder->
|
||||
tables.fold Dictionary.empty current-> table->
|
||||
table.columns.fold current seen_names-> column->
|
||||
name = column.name
|
||||
if seen_names.contains_key name then seen_names else
|
||||
names_builder.append name
|
||||
seen_names.insert name True
|
@ -0,0 +1,56 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
import project.Columns_To_Keep.Columns_To_Keep
|
||||
import project.Errors.No_Rows
|
||||
import project.Extensions.Table_Conversions
|
||||
import project.Match_Columns.Match_Columns
|
||||
import project.Table.Table
|
||||
|
||||
## PRIVATE
|
||||
Determines how an object should be converted into a table for merging during
|
||||
a `read_many` operation with `As_Merged_Table` return mode.
|
||||
type Read_Many_As_Merged_Table_Strategy
|
||||
## PRIVATE
|
||||
Value (into_table : Any -> Columns_To_Keep -> Match_Columns -> Problem_Behavior -> Read_Many_As_Table_Result)
|
||||
|
||||
## PRIVATE
|
||||
type Read_Many_As_Table_Result
|
||||
## PRIVATE
|
||||
Table metadata:Nothing|Table data:Table
|
||||
|
||||
## PRIVATE
|
||||
No_Data
|
||||
|
||||
## PRIVATE
|
||||
Read_Many_As_Merged_Table_Strategy.from (that : Table) =
|
||||
callback path _ _ on_problems =
|
||||
table = if that.row_count > 0 then that else
|
||||
on_problems.attach_problem_after that <|
|
||||
(No_Rows.Warning "The "+path.to_display_text+" loaded as a table with 0 rows, so it did not contribute any rows to the `As_Merged_Table` result of `read_many`.")
|
||||
Read_Many_As_Table_Result.Table metadata=Nothing data=table
|
||||
Read_Many_As_Merged_Table_Strategy.Value callback
|
||||
|
||||
## PRIVATE
|
||||
Read_Many_As_Merged_Table_Strategy.from (that : Vector) =
|
||||
callback path _ _ on_problems =
|
||||
if that.not_empty then Read_Many_As_Table_Result.Table metadata=Nothing data=(_interpret_as_table that path on_problems) else
|
||||
on_problems.attach_problem_after Read_Many_As_Table_Result.No_Data <|
|
||||
(No_Rows.Warning "The "+path.to_display_text+" loaded as an empty array, so it is not included in the `As_Merged_Table` result of `read_many`.")
|
||||
Read_Many_As_Merged_Table_Strategy.Value callback
|
||||
|
||||
## PRIVATE
|
||||
The fallback strategy for converting a generic object into a table.
|
||||
Custom data types may implement a conversion to override this strategy, like above.
|
||||
Read_Many_As_Merged_Table_Strategy.from (that : Any) =
|
||||
callback path _ _ on_problems =
|
||||
Read_Many_As_Table_Result.Table metadata=Nothing data=(_interpret_as_table that path on_problems)
|
||||
Read_Many_As_Merged_Table_Strategy.Value callback
|
||||
|
||||
private _interpret_as_table (object : Any) path on_problems =
|
||||
Table.from_objects object . catch Illegal_Argument error->
|
||||
problem = Illegal_Argument.Error "Problem converting "+object.to_display_text+" (loaded from "+path.to_display_text+") to a Table: "+error.to_display_text cause=error
|
||||
on_problems.attach_problem_before problem <|
|
||||
# If it was an array that failed to load, let's expand it to rows, otherwise have 1 row for the unknown object.
|
||||
vec = if object.is_a Vector then object else [object]
|
||||
Table.new [["Value", vec]]
|
@ -6,8 +6,13 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
import project.Column.Column
|
||||
import project.Errors.Invalid_Value_Type
|
||||
import project.Internal.Column_Naming_Helper.Column_Naming_Helper
|
||||
import project.Internal.Read_Many_As_Merged_Table_Strategy.Read_Many_As_Merged_Table_Strategy
|
||||
import project.Internal.Read_Many_As_Merged_Table_Strategy.Read_Many_As_Table_Result
|
||||
import project.Return_As_Table.Return_As_Table
|
||||
import project.Table.Table
|
||||
import project.Value_Type.Value_Type
|
||||
from project.Internal.Table_Helpers import duplicate_rows
|
||||
|
||||
find_files_list_in_table (that : Table) -> Many_Files_List =
|
||||
found_column = if that.column_count == 1 then that.at 0 else
|
||||
@ -28,3 +33,83 @@ ensure_column_type_valid_to_be_files_list (column : Column) ~action =
|
||||
_ -> False
|
||||
if is_expected_type then action else
|
||||
Error.throw (Invalid_Value_Type.Column "Text or Mixed" column.value_type column.name)
|
||||
|
||||
make_return (return_shape : Return_As_Table) (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) -> Table =
|
||||
base_table = _input_as_table input
|
||||
case return_shape of
|
||||
Return_As_Table.With_New_Column ->
|
||||
_add_objects_column base_table objects
|
||||
Return_As_Table.As_Merged_Table columns_to_keep match_columns ->
|
||||
tables = input.paths_to_load.zip objects path-> object->
|
||||
strategy = Read_Many_As_Merged_Table_Strategy.from object
|
||||
strategy.into_table path columns_to_keep match_columns on_problems
|
||||
_merge_input_and_tables base_table tables columns_to_keep match_columns on_problems
|
||||
|
||||
_input_as_table (input : Many_Files_List) -> Table =
|
||||
case input.original_value of
|
||||
table : Table -> table
|
||||
column : Column -> column.to_table
|
||||
## Fallback - any unknown input shape is treated the same as
|
||||
Vector input - we just extract the list of files from it
|
||||
_ ->
|
||||
Table.new [["Path", input.paths_to_load]]
|
||||
|
||||
## Expands each table from `tables_for_rows` and computes their union, also
|
||||
duplicating the corresponding rows from `input_table`.
|
||||
_merge_input_and_tables (input_table : Table) (tables_for_rows : Vector Read_Many_As_Table_Result) columns_to_keep match_columns on_problems -> Table =
|
||||
counts = tables_for_rows.map table-> case table of
|
||||
Read_Many_As_Table_Result.No_Data -> 0
|
||||
Read_Many_As_Table_Result.Table _ data -> data.row_count
|
||||
adapted_tables = _inherit_warnings_from_vector tables_for_rows <|
|
||||
tables_for_rows.filter r-> r != Read_Many_As_Table_Result.No_Data
|
||||
|
||||
## If there are no tables to merge, union would fail.
|
||||
Each entry is then contributing 0 rows, so we need to return an empty table.
|
||||
But it should have the same structure as the input table.
|
||||
if adapted_tables.is_empty then (input_table.take 0) else
|
||||
unified_metadata = _unify_metadata adapted_tables on_problems
|
||||
unified_data = Table.from_union (adapted_tables.map .data) columns_to_keep match_columns on_problems
|
||||
|
||||
multiplicated_inputs = duplicate_rows input_table counts
|
||||
Runtime.assert (unified_data.row_count == multiplicated_inputs.row_count)
|
||||
Runtime.assert (unified_metadata.is_nothing || (unified_metadata.row_count == unified_data.row_count))
|
||||
|
||||
first_pass = if unified_metadata.is_nothing then multiplicated_inputs else
|
||||
multiplicated_inputs.zip unified_metadata right_prefix=""
|
||||
first_pass.zip unified_data right_prefix=""
|
||||
|
||||
## Unifies provided metadata tables, knowing that some tables may have no
|
||||
metadata - in such case we want to insert as many Nothing rows for metadata
|
||||
as there are rows in the corresponding data table.
|
||||
_unify_metadata (tables : Vector Read_Many_As_Table_Result) (on_problems : Problem_Behavior) -> Table | Nothing =
|
||||
has_no_metadata = tables.all r-> r.metadata.is_nothing
|
||||
if has_no_metadata then Nothing else
|
||||
unique = Column_Naming_Helper.in_memory.create_unique_name_strategy
|
||||
tables.each r->
|
||||
if r.metadata.is_nothing.not then unique.mark_used r.metadata.column_names
|
||||
|
||||
# A dummy column because we cannot create a table with 0 columns, it will be removed after union. We find an unique name for it to avoid conflicts.
|
||||
dummy_column_name = unique.make_unique "_Internal_Placeholder_Column_"
|
||||
tables_for_union = tables.map r->
|
||||
if r.metadata.is_nothing.not then r.metadata else
|
||||
Table.new [Column.from_repeated_item dummy_column_name Nothing r.data.row_count]
|
||||
|
||||
# Metadata are always merged by-name and columns that appear only in some tables are kept.
|
||||
unified_metadata = Table.from_union tables_for_union ..In_Any ..By_Name on_problems
|
||||
unified_metadata.remove_columns [dummy_column_name] on_problems=..Ignore
|
||||
|
||||
_make_table_with_one_null_row (empty_table : Table) -> Table =
|
||||
Runtime.assert (empty_table.row_count == 0) "The table must be empty."
|
||||
Table.new (empty_table.columns.map c-> Column.from_vector c.name [Nothing] c.value_type)
|
||||
|
||||
_add_objects_column (base_table : Table) (objects : Vector Any) -> Table =
|
||||
unique_naming = base_table.column_naming_helper.create_unique_name_strategy
|
||||
unique_naming.mark_used base_table.column_names
|
||||
objects_column_name = unique_naming.make_unique "Value"
|
||||
base_table.set (Column.from_vector objects_column_name objects) as=objects_column_name set_mode=..Add
|
||||
|
||||
## Workaround for bug https://github.com/enso-org/enso/issues/11570
|
||||
TODO: Remove workaround one #11570 is closed.
|
||||
private _inherit_warnings_from_vector vector:Vector ~action =
|
||||
result = action
|
||||
Warning.set result (Warning.get_all vector + Warning.get_all result)
|
||||
|
@ -10,7 +10,7 @@ import project.Expression.Expression
|
||||
import project.Internal.Column_Naming_Helper.Column_Naming_Helper
|
||||
import project.Internal.Problem_Builder.Problem_Builder
|
||||
import project.Internal.Value_Type_Helpers
|
||||
import project.Match_Columns.Column_Set
|
||||
import project.Internal.Match_Columns_Helpers.Column_Set
|
||||
import project.Position.Position
|
||||
import project.Set_Mode.Set_Mode
|
||||
import project.Sort_Column.Sort_Column
|
||||
@ -18,8 +18,10 @@ import project.Table.Table
|
||||
import project.Value_Type.By_Type
|
||||
import project.Value_Type.Value_Type
|
||||
from project.Errors import Ambiguous_Column_Rename, Column_Type_Mismatch, Invalid_Aggregate_Column, Missing_Input_Columns, No_Common_Type, No_Input_Columns_Selected, No_Output_Columns, No_Such_Column, Too_Many_Column_Names_Provided
|
||||
from project.Table import from_java_table
|
||||
|
||||
polyglot java import java.util.HashSet
|
||||
polyglot java import org.enso.table.data.mask.OrderMask
|
||||
|
||||
type Table_Column_Helper
|
||||
## PRIVATE
|
||||
@ -584,3 +586,15 @@ replace_columns_with_transformed_columns table selectors transformer error_on_mi
|
||||
columns = internal_columns.map table.columns_helper.make_column
|
||||
new_columns = columns.map on_problems=No_Wrap.Value transformer
|
||||
replace_columns_with_columns table columns new_columns
|
||||
|
||||
|
||||
## Takes a table and a list of counts and returns a new table with the rows
|
||||
duplicated according to the counts.
|
||||
The vector should have the same number of elements as the number of rows in the table.
|
||||
duplicate_rows (table : Table) (counts : Vector Integer) -> Table =
|
||||
Runtime.assert (table.row_count == counts.length) "The number of counts ("+counts.length.to_text+") must match the number of rows in the table ("+table.row_count.to_text+")."
|
||||
mask_array = counts
|
||||
. map_with_index ix-> count-> Vector.fill count ix
|
||||
. flatten
|
||||
mask = OrderMask.fromArray mask_array
|
||||
from_java_table (table.java_table.applyMask mask)
|
||||
|
@ -0,0 +1,52 @@
|
||||
private
|
||||
|
||||
from Standard.Base import all
|
||||
|
||||
import project.Column.Column
|
||||
import project.Columns_To_Keep.Columns_To_Keep
|
||||
import project.Internal.Java_Problems
|
||||
import project.Internal.Match_Columns_Helpers
|
||||
import project.Internal.Problem_Builder.Problem_Builder
|
||||
import project.Internal.Table_Helpers
|
||||
import project.Internal.Table_Helpers.Union_Result_Type
|
||||
import project.Match_Columns.Match_Columns
|
||||
import project.Table.Table
|
||||
import project.Value_Type.Value_Type
|
||||
from project.Column import make_storage_builder_for_type
|
||||
|
||||
## The implementation of common logic that is used by `Table.from_union`.
|
||||
The caller should ensure that `tables` contains only `Table` instances or `Nothing`.
|
||||
Each `Nothing` is replaced by a single all-nothing row, not introducing any new columns.
|
||||
The `tables` vector must contain at least one non-nothing entry.
|
||||
make_union (tables : Vector) (columns_to_keep : Columns_To_Keep) (match_columns : Match_Columns) (on_problems : Problem_Behavior) =
|
||||
problem_builder = Problem_Builder.new
|
||||
matched_column_sets = Match_Columns_Helpers.match_columns tables match_columns columns_to_keep problem_builder
|
||||
result_row_count = tables.fold 0 c-> t-> c + t.row_count
|
||||
merged_columns = matched_column_sets.map column_set->
|
||||
case Table_Helpers.unify_result_type_for_union column_set tables problem_builder of
|
||||
Union_Result_Type.Common_Type common_type ->
|
||||
_concat_columns column_set tables common_type result_row_count needs_cast=False on_problems
|
||||
Union_Result_Type.Fallback_To_Text ->
|
||||
_concat_columns column_set tables Value_Type.Char result_row_count needs_cast=True on_problems
|
||||
Union_Result_Type.No_Types_To_Unify ->
|
||||
Column.from_repeated_item column_set.name Nothing result_row_count
|
||||
if merged_columns.is_empty then problem_builder.raise_no_output_columns_with_cause else
|
||||
problem_builder.attach_problems_before on_problems <|
|
||||
Table.new merged_columns
|
||||
|
||||
## A helper that efficiently concatenates storages of in-memory columns.
|
||||
private _concat_columns column_set tables result_type result_row_count needs_cast on_problems =
|
||||
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
|
||||
storage_builder = make_storage_builder_for_type result_type on_problems initial_size=result_row_count java_problem_aggregator
|
||||
column_set.column_indices.zip tables i-> parent_table->
|
||||
case i of
|
||||
Nothing ->
|
||||
null_row_count = parent_table.row_count
|
||||
storage_builder.appendNulls null_row_count
|
||||
_ : Integer ->
|
||||
column = parent_table.at i
|
||||
converted = if needs_cast then column.cast result_type on_problems=..Report_Error else column
|
||||
storage = converted.java_column.getStorage
|
||||
storage_builder.appendBulkStorage storage
|
||||
sealed_storage = storage_builder.seal
|
||||
Column.from_storage column_set.name sealed_storage
|
@ -1,12 +1,3 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Runtime.State
|
||||
from Standard.Base.Runtime import assert
|
||||
|
||||
import project.Columns_To_Keep.Columns_To_Keep
|
||||
import project.Constants.Report_Unmatched
|
||||
from project.Errors import Column_Count_Mismatch, No_Output_Columns, Unmatched_Columns
|
||||
|
||||
## Specifies a column matching strategy.
|
||||
type Match_Columns
|
||||
## Columns are matched by Name.
|
||||
@ -16,146 +7,3 @@ type Match_Columns
|
||||
|
||||
Note: column names are not compared.
|
||||
By_Position
|
||||
|
||||
## PRIVATE
|
||||
A helper that encapsulates the common backend-agnostic logic of matching
|
||||
columns in `Table.union`.
|
||||
|
||||
It matches columns according to the provided matching settings and returns a
|
||||
list of column sets to be merged.
|
||||
|
||||
Each column set consists of a name of the resulting column and a list of
|
||||
indices for columns in corresponding tables that will be merged to form this
|
||||
result column. The first column index corresponds to the first table in the
|
||||
input and so on. If no column corresponding to a given column set was matched
|
||||
in a particular table, its entry will be contain `Nothing` instead.
|
||||
|
||||
The column sets are returned in the order in which the corresponding result
|
||||
columns should appear in the resulting table.
|
||||
|
||||
The method assumes at least one table is provided in its input.
|
||||
match_columns tables matching_mode columns_to_keep problem_builder =
|
||||
assert tables.not_empty
|
||||
case matching_mode of
|
||||
Match_Columns.By_Name -> match_columns_by_name tables columns_to_keep problem_builder
|
||||
Match_Columns.By_Position -> match_columns_by_position tables columns_to_keep problem_builder
|
||||
|
||||
## PRIVATE
|
||||
match_columns_by_name tables columns_to_keep problem_builder = case columns_to_keep of
|
||||
Columns_To_Keep.In_List list -> if list.is_empty then Error.throw (Illegal_Argument.Error "The list of columns to keep cannot be empty.") else
|
||||
output_column_names = list.distinct
|
||||
column_counts = find_column_counts tables
|
||||
all_tables_count = tables.length
|
||||
unmatched_column_names = output_column_names.filter name->
|
||||
column_counts.get name 0 < all_tables_count
|
||||
if unmatched_column_names.not_empty then
|
||||
problem_builder.report_other_warning (Unmatched_Columns.Error unmatched_column_names)
|
||||
build_column_set_by_name tables output_column_names
|
||||
Columns_To_Keep.In_All ->
|
||||
column_counts = find_column_counts tables
|
||||
# This will only include columns that were present in all tables.
|
||||
all_tables_count = tables.length
|
||||
common_column_names = tables.first.column_names.filter name->
|
||||
column_counts.at name == all_tables_count
|
||||
if common_column_names.is_empty then Error.throw (No_Output_Columns.Error "Unmatched columns are set to be dropped, but no common column names were found.") else
|
||||
dropped_column_names = tables.map .column_names
|
||||
. flatten
|
||||
. filter (name-> column_counts.at name < all_tables_count)
|
||||
. distinct
|
||||
if dropped_column_names.not_empty then
|
||||
problem_builder.report_other_warning (Unmatched_Columns.Error dropped_column_names)
|
||||
build_column_set_by_name tables common_column_names
|
||||
_ ->
|
||||
output_column_names = distinct_columns_in_appearance_order tables
|
||||
report_missing = case columns_to_keep of
|
||||
Columns_To_Keep.In_Any -> False
|
||||
Columns_To_Keep.In_Any_Warn_On_Missing -> True
|
||||
if report_missing then
|
||||
column_counts = find_column_counts tables
|
||||
all_tables_count = tables.length
|
||||
## We iterate over output column names to get deterministic
|
||||
order of unmatched columns.
|
||||
unmatched_column_names = output_column_names.filter name->
|
||||
column_counts.get name 0 < all_tables_count
|
||||
if unmatched_column_names.not_empty then
|
||||
problem_builder.report_other_warning (Unmatched_Columns.Error unmatched_column_names)
|
||||
build_column_set_by_name tables output_column_names
|
||||
|
||||
## PRIVATE
|
||||
Common logic for computing the final result of by-name matching.
|
||||
Once the set of output column names is determined, we compute the
|
||||
`Column_Set` by finding the corresponding column indices in each table (if found).
|
||||
build_column_set_by_name tables output_column_names =
|
||||
output_column_names.map name->
|
||||
column_indices = tables.map table->
|
||||
# TODO this gets O(N^2), we should optimize
|
||||
table.column_names.index_of name
|
||||
Column_Set.Value name column_indices
|
||||
|
||||
## PRIVATE
|
||||
match_columns_by_position tables columns_to_keep problem_builder = case columns_to_keep of
|
||||
Columns_To_Keep.In_List _ ->
|
||||
Error.throw (Illegal_Argument.Error "The In_List option for `columns_to_keep` cannot be used together with `By_Position` matching.")
|
||||
_ ->
|
||||
column_counts = tables.map table-> table.columns.length
|
||||
minmax = column_counts.compute_bulk [Statistic.Minimum, Statistic.Maximum]
|
||||
min = minmax.first
|
||||
max = minmax.second
|
||||
columns_to_take = case columns_to_keep of
|
||||
Columns_To_Keep.In_All -> min
|
||||
Columns_To_Keep.In_Any -> max
|
||||
Columns_To_Keep.In_Any_Warn_On_Missing -> max
|
||||
has_unmatched_columns = min != max
|
||||
if has_unmatched_columns then
|
||||
should_report_unmatched = case columns_to_keep of
|
||||
Columns_To_Keep.In_All -> True
|
||||
Columns_To_Keep.In_Any -> False
|
||||
Columns_To_Keep.In_Any_Warn_On_Missing -> True
|
||||
# TODO should we rephrase the wording of the error? should it depend on In_Any_Warn_On_Missing vs In_All?
|
||||
if should_report_unmatched then
|
||||
problem_builder.report_other_warning (Column_Count_Mismatch.Error max min)
|
||||
|
||||
name_source = case columns_to_keep of
|
||||
Columns_To_Keep.In_All -> tables.first
|
||||
_ ->
|
||||
# We find the first table that has all the columns present.
|
||||
tables.find table-> table.columns.length == columns_to_take
|
||||
|
||||
column_sets = Vector.new columns_to_take i->
|
||||
name = name_source.at i . name
|
||||
column_ids = tables.map table->
|
||||
column_count = table.columns.length
|
||||
if i >= column_count then Nothing else i
|
||||
Column_Set.Value name column_ids
|
||||
column_sets
|
||||
|
||||
type Column_Set
|
||||
## PRIVATE
|
||||
Value (name : Text) (column_indices : Vector Integer)
|
||||
|
||||
## PRIVATE
|
||||
resolve_columns self (all_tables : Vector) = self.column_indices.zip all_tables i-> parent_table->
|
||||
case i of
|
||||
Nothing -> Nothing
|
||||
_ : Integer -> parent_table.at i
|
||||
|
||||
## PRIVATE
|
||||
Returns a map indicating in how many tables did a column with a given name appear.
|
||||
find_column_counts tables =
|
||||
tables.fold Dictionary.empty current->table->
|
||||
table.columns.fold current counts-> column->
|
||||
name=column.name
|
||||
new_count = counts.get name 0 + 1
|
||||
counts.insert name new_count
|
||||
|
||||
## PRIVATE
|
||||
Returns a list of distinct column names, in the order of first appearance,
|
||||
starting from the first table.
|
||||
distinct_columns_in_appearance_order tables =
|
||||
Vector.build names_builder->
|
||||
tables.fold Dictionary.empty current-> table->
|
||||
table.columns.fold current seen_names-> column->
|
||||
name = column.name
|
||||
if seen_names.contains_key name then seen_names else
|
||||
names_builder.append name
|
||||
seen_names.insert name True
|
||||
|
@ -0,0 +1,47 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Data.Read.Many_Files_List.Many_Files_List
|
||||
import Standard.Base.Data.Read.Return_As.Return_As
|
||||
import Standard.Base.Errors.Common.Type_Error
|
||||
from Standard.Base.Metadata.Choice import Option
|
||||
|
||||
import project.Columns_To_Keep.Columns_To_Keep
|
||||
import project.Internal.Read_Many_Helpers
|
||||
import project.Match_Columns.Match_Columns
|
||||
|
||||
type Return_As_Table
|
||||
## Returns a table with a new column `Value` containing the objects loaded
|
||||
from each file.
|
||||
|
||||
When the source for files to load was a table, all columns from the
|
||||
original table are also retained. In case of name clashes, the newly
|
||||
added columns will get a suffix.
|
||||
|
||||
When the source was a simple Vector, the returned table will also contain
|
||||
a `Path` column.
|
||||
With_New_Column
|
||||
|
||||
## All files are interpreted as tables and then merged into a single table
|
||||
by a union operation.
|
||||
|
||||
Each file is loaded according to the provided/detected format. If the
|
||||
format reads it as something else than a table, then it is expanded in
|
||||
the same way as `Table.from_objects`.
|
||||
@columns_to_keep Columns_To_Keep.default_widget
|
||||
As_Merged_Table (columns_to_keep : Columns_To_Keep = Columns_To_Keep.In_Any) (match : Match_Columns = Match_Columns.By_Name)
|
||||
|
||||
## PRIVATE
|
||||
get_dropdown_options : Vector Option
|
||||
get_dropdown_options =
|
||||
[Option "With New Column" "..With_New_Column", Option "As Merged Table" "..As_Merged_Table"]
|
||||
|
||||
## PRIVATE
|
||||
resolve value =
|
||||
Panic.catch Type_Error (value:Return_As_Table) _->Nothing
|
||||
|
||||
## PRIVATE
|
||||
make_return self (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) =
|
||||
Read_Many_Helpers.make_return self input objects on_problems
|
||||
|
||||
## PRIVATE
|
||||
Return_As.from (that : Return_As_Table) =
|
||||
Return_As.Instance that
|
@ -54,13 +54,12 @@ import project.Internal.Replace_Helpers
|
||||
import project.Internal.Split_Tokenize
|
||||
import project.Internal.Table_Helpers
|
||||
import project.Internal.Table_Helpers.Table_Column_Helper
|
||||
import project.Internal.Table_Helpers.Union_Result_Type
|
||||
import project.Internal.Table_Ref.Table_Ref
|
||||
import project.Internal.Union
|
||||
import project.Internal.Value_Type_Helpers
|
||||
import project.Internal.Widget_Helpers
|
||||
import project.Join_Condition.Join_Condition
|
||||
import project.Join_Kind.Join_Kind
|
||||
import project.Match_Columns as Match_Columns_Helpers
|
||||
import project.Match_Columns.Match_Columns
|
||||
import project.Position.Position
|
||||
import project.Prefix_Name.Prefix_Name
|
||||
@ -72,7 +71,6 @@ import project.Sort_Column.Sort_Column
|
||||
import project.Value_Type.Auto
|
||||
import project.Value_Type.By_Type
|
||||
import project.Value_Type.Value_Type
|
||||
from project.Column import make_storage_builder_for_type
|
||||
from project.Errors import all
|
||||
from project.Internal.Filter_Condition_Helpers import make_filter_column
|
||||
from project.Internal.Lookup_Helpers import make_java_lookup_column_description
|
||||
@ -2025,18 +2023,21 @@ type Table
|
||||
true, a single row is output with `Nothing` for the aggregates column; if
|
||||
false, no row is output at all.
|
||||
|
||||
The following aggregate values are supported:
|
||||
The following values are considered sequence-like:
|
||||
- `Array`
|
||||
- `Vector`
|
||||
- `List`
|
||||
- `Range`
|
||||
- `Date_Range`
|
||||
- `Pair`
|
||||
- `Table`
|
||||
- `Table` (interpreted as a sequence of `Row`s)
|
||||
- `Column`
|
||||
|
||||
Any other values are treated as non-aggregate values, and their rows are kept
|
||||
unchanged.
|
||||
Some aggregates, like dictionaries will expand into two columns - one for
|
||||
key and one for the value.
|
||||
|
||||
If a value is not an aggregate, or has no defined way of being expanded,
|
||||
it is left as a single row containing that value unchanged.
|
||||
|
||||
In in-memory tables, it is permitted to mix values of different types.
|
||||
|
||||
@ -2048,7 +2049,7 @@ type Table
|
||||
@column Widget_Helpers.make_column_name_selector
|
||||
expand_to_rows : Text | Integer -> Boolean -> Table ! Type_Error | No_Such_Column | Index_Out_Of_Bounds
|
||||
expand_to_rows self column at_least_one_row:Boolean=False =
|
||||
Expand_Objects_Helpers.expand_to_rows self column at_least_one_row
|
||||
Expand_Objects_Helpers.expand_to_rows self column at_least_one_row sequences_only=False
|
||||
|
||||
## ALIAS filter rows, where
|
||||
GROUP Standard.Base.Selections
|
||||
@ -2920,10 +2921,10 @@ type Table
|
||||
? Column Renaming
|
||||
|
||||
If columns from the two tables have colliding names, a prefix (by
|
||||
default `Right_`) is added to the name of the column from the right
|
||||
default `Right `) is added to the name of the column from the right
|
||||
table. The left column remains unchanged. It is possible that the new
|
||||
name will be in use, in this case it will be resolved using the normal
|
||||
renaming strategy - adding subsequent `_1`, `_2` etc.
|
||||
renaming strategy - adding subsequent ` 1`, ` 2` etc.
|
||||
|
||||
? Row Ordering For In-Memory Tables
|
||||
|
||||
@ -3765,24 +3766,7 @@ type Table
|
||||
from_union (tables : Vector) (columns_to_keep : Columns_To_Keep = ..In_Any_Warn_On_Missing) (match_columns : Match_Columns = ..By_Name) (on_problems : Problem_Behavior = ..Report_Warning) =
|
||||
all_tables = (tables.map t-> Table.from t)
|
||||
if all_tables.is_empty then Error.throw (Illegal_Argument.Error "`Table.from_union` needs at least 1 input table.") else
|
||||
## We keep separate problem builders, because if we are reporting `No_Output_Columns`,
|
||||
we only want to add a cause coming from unification; matching reports problems that would not fit this error.
|
||||
problem_builder_for_matching = Problem_Builder.new
|
||||
problem_builder_for_unification = Problem_Builder.new
|
||||
matched_column_sets = Match_Columns_Helpers.match_columns all_tables match_columns columns_to_keep problem_builder_for_matching
|
||||
result_row_count = all_tables.fold 0 c-> t-> c + t.row_count
|
||||
merged_columns = matched_column_sets.map column_set->
|
||||
case Table_Helpers.unify_result_type_for_union column_set all_tables problem_builder_for_unification of
|
||||
Union_Result_Type.Common_Type common_type ->
|
||||
concat_columns column_set all_tables common_type result_row_count needs_cast=False on_problems
|
||||
Union_Result_Type.Fallback_To_Text ->
|
||||
concat_columns column_set all_tables Value_Type.Char result_row_count needs_cast=True on_problems
|
||||
Union_Result_Type.No_Types_To_Unify ->
|
||||
Column.from_repeated_item column_set.name Nothing result_row_count
|
||||
problem_builder_for_matching.attach_problems_before on_problems <|
|
||||
problem_builder_for_unification.attach_problems_before on_problems <|
|
||||
if merged_columns.is_empty then problem_builder_for_unification.raise_no_output_columns_with_cause else
|
||||
Table.new merged_columns
|
||||
Union.make_union all_tables columns_to_keep match_columns on_problems
|
||||
|
||||
## PRIVATE
|
||||
pretty : Text
|
||||
@ -3805,24 +3789,6 @@ make_join_helpers left_table right_table =
|
||||
Java_Join_Between.new left.java_column right_lower.java_column right_upper.java_column
|
||||
Join_Helpers.Join_Condition_Resolver.Value (left_table.at _) (right_table.at _) make_equals make_equals_ignore_case make_between
|
||||
|
||||
## PRIVATE
|
||||
A helper that efficiently concatenates storages of in-memory columns.
|
||||
concat_columns column_set all_tables result_type result_row_count needs_cast on_problems =
|
||||
Java_Problems.with_problem_aggregator on_problems java_problem_aggregator->
|
||||
storage_builder = make_storage_builder_for_type result_type on_problems initial_size=result_row_count java_problem_aggregator
|
||||
column_set.column_indices.zip all_tables i-> parent_table->
|
||||
case i of
|
||||
Nothing ->
|
||||
null_row_count = parent_table.row_count
|
||||
storage_builder.appendNulls null_row_count
|
||||
_ : Integer ->
|
||||
column = parent_table.at i
|
||||
converted = if needs_cast then column.cast result_type on_problems=..Report_Error else column
|
||||
storage = converted.java_column.getStorage
|
||||
storage_builder.appendBulkStorage storage
|
||||
sealed_storage = storage_builder.seal
|
||||
Column.from_storage column_set.name sealed_storage
|
||||
|
||||
## PRIVATE
|
||||
Conversion method to a Table from a Column.
|
||||
Table.from (that:Column) = that.to_table
|
||||
|
@ -39,7 +39,7 @@ test_problem_handling action expected_problems result_checker (unwrap_errors : B
|
||||
Test.with_clue "The warnings were "+warnings.to_text+'.\n' <|
|
||||
(if ignore_warning_cardinality then warnings.distinct else warnings)
|
||||
. map unwrap_maybe
|
||||
. should_equal_ignoring_order expected_problems frames_to_skip=5
|
||||
. should_equal_ignoring_order expected_problems frames_to_skip=8
|
||||
test_advanced_problem_handling action error_checker warnings_checker result_checker frames_to_skip=1
|
||||
|
||||
## UNSTABLE
|
||||
|
@ -4,6 +4,15 @@ import java.util.ServiceLoader;
|
||||
import org.enso.base.polyglot.EnsoMeta;
|
||||
import org.graalvm.polyglot.Value;
|
||||
|
||||
/**
|
||||
* An SPI for specifying return types to the `read_many` operation.
|
||||
*
|
||||
* <p>The `read_many` operation can take return types provided from various libraries. This SPI
|
||||
* ensures that it can be aware of all the available types from the loaded libraries. If a library
|
||||
* registers a return type here, it will be available for autoscoping resolution and will appear in
|
||||
* the dropdown. Registered types must provide methods `get_dropdown_options`, `resolve` and
|
||||
* `make_return`. See `Standard.Base.Data.Read.Return_As` for examples.
|
||||
*/
|
||||
public abstract class ReadManyReturnSPI {
|
||||
private static final ServiceLoader<ReadManyReturnSPI> loader =
|
||||
ServiceLoader.load(ReadManyReturnSPI.class, ReadManyReturnSPI.class.getClassLoader());
|
||||
|
@ -13,6 +13,9 @@ public interface OrderMask {
|
||||
* storage at the {@code idx}-th position. It may return {@link
|
||||
* org.enso.table.data.storage.Storage.NOT_FOUND_INDEX}, in which case a missing value should be
|
||||
* inserted at this position.
|
||||
*
|
||||
* <p>Indices may appear zero or multiple times in the mask - meaning rows that will be gone or
|
||||
* duplicated.
|
||||
*/
|
||||
int get(int idx);
|
||||
|
||||
|
@ -0,0 +1,16 @@
|
||||
package org.enso.table.read;
|
||||
|
||||
import org.enso.base.read.ReadManyReturnSPI;
|
||||
|
||||
@org.openide.util.lookup.ServiceProvider(service = ReadManyReturnSPI.class)
|
||||
public class TableReadManyReturnSPI extends ReadManyReturnSPI {
|
||||
@Override
|
||||
protected String getModuleName() {
|
||||
return "Standard.Table.Return_As_Table";
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getTypeName() {
|
||||
return "Return_As_Table";
|
||||
}
|
||||
}
|
@ -92,7 +92,11 @@ import project.System.System_Spec
|
||||
import project.System.Temporary_File_Spec
|
||||
|
||||
import project.Random_Spec
|
||||
import project.Widget_Helpers_Spec
|
||||
|
||||
## Workaround for bug https://github.com/enso-org/enso/issues/11707
|
||||
The Standard.Table import should be removed once the bug is fixed.
|
||||
import Standard.Table
|
||||
|
||||
|
||||
main filter=Nothing =
|
||||
suite = Test.build suite_builder->
|
||||
@ -177,6 +181,5 @@ main filter=Nothing =
|
||||
Random_Spec.add_specs suite_builder
|
||||
XML_Spec.add_specs suite_builder
|
||||
Decimal_Spec.add_specs suite_builder
|
||||
Widget_Helpers_Spec.add_specs suite_builder
|
||||
|
||||
suite.run_with_filter filter
|
||||
|
@ -226,6 +226,11 @@ add_specs suite_builder setup:Cloud_Tests_Setup = suite_builder.group "Enso Clou
|
||||
r.should_fail_with File_Error
|
||||
r.catch.should_be_a File_Error.Not_Found
|
||||
|
||||
group_builder.specify "read_many should work with Cloud files" <|
|
||||
paths = [test_root.get / "test_file.json", test_root.get / "test-directory/another.txt"]
|
||||
r = Data.read_many paths return=..As_Vector
|
||||
r.should_equal [[1, 2, 3, "foo"], "Hello Another!"]
|
||||
|
||||
group_builder.specify "should be able to open a file as input stream" <|
|
||||
test_file = test_root.get / "test_file.json"
|
||||
test_file.exists . should_be_true
|
||||
|
@ -201,7 +201,7 @@ add_specs suite_builder =
|
||||
r.should_be_a JS_Object
|
||||
|
||||
group_builder.specify "can use URI or Text URLs in Data.read_many" <|
|
||||
r = Data.read_many [URI.from url_get, url_get]
|
||||
r = Data.read_many [URI.from url_get, url_get] return=..As_Vector
|
||||
r.should_be_a Vector
|
||||
r.at 0 . should_be_a JS_Object
|
||||
r.at 1 . should_be_a JS_Object
|
||||
|
@ -2,6 +2,7 @@ from Standard.Base import all
|
||||
import Standard.Base.Data.Vector.Map_Error
|
||||
import Standard.Base.Errors.Encoding_Error.Encoding_Error
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
from Standard.Test import all
|
||||
|
||||
@ -96,23 +97,37 @@ add_specs suite_builder =
|
||||
files = [enso_project.data / "sample.json", enso_project.data / "helloworld.txt"]
|
||||
|
||||
# Read all files using Auto_Detect - each file is read according to its inferred format.
|
||||
r1 = Data.read_many files
|
||||
# If the return type is not specified, if the input is a Vector, the return type is also Vector:
|
||||
r1 = Data.read_many files return=..As_Vector
|
||||
r1.should_equal [js_object, "Hello World!"]
|
||||
|
||||
# Read all files using a specified format.
|
||||
r2 = Data.read_many files format=..Plain_Text
|
||||
r2 = Data.read_many files format=..Plain_Text return=..As_Vector
|
||||
r2.should_be_a Vector
|
||||
r2.should_equal [js_as_text.get, "Hello World!"]
|
||||
|
||||
## Workaround for https://github.com/enso-org/enso/issues/11707
|
||||
This pending check should be removed once it is fixed.
|
||||
is_table_imported = File_Format.all.map .to_text . contains "Delimited_Format"
|
||||
table_import_pending = if is_table_imported then "Base_Tests should not import Table, but they sometimes do as workaround for #11707. This test can only run if Table is not imported."
|
||||
group_builder.specify "would default to returning as merged Table, but will raise a helpful error if Standard.Table is not loaded" pending=table_import_pending <|
|
||||
Runtime.assert is_table_imported.not "This test assumes that Base_Tests does not import Standard.Table."
|
||||
files = [enso_project.data / "sample.json"]
|
||||
r1 = Data.read_many files
|
||||
r1.should_fail_with Illegal_Argument
|
||||
r1.catch.to_display_text.should_contain "not imported"
|
||||
r1.catch.to_display_text.should_contain "Standard.Table"
|
||||
r1.catch.to_display_text.should_contain "As_Vector"
|
||||
|
||||
group_builder.specify "should work with paths as Text" <|
|
||||
files = [enso_project.data / "sample.json", enso_project.data / "helloworld.txt"]
|
||||
paths = files.map .path
|
||||
r1 = Data.read_many paths return=..Vector
|
||||
r1 = Data.read_many paths return=..As_Vector
|
||||
r1.should_equal [js_object, "Hello World!"]
|
||||
|
||||
three_files = [enso_project.data / "sample.json", enso_project.data / "nonexistent.txt", enso_project.data / "helloworld.txt"]
|
||||
group_builder.specify "should allow to Report_Error if any file fails to load" <|
|
||||
r1 = Data.read_many three_files return=..Vector on_problems=..Report_Error
|
||||
r1 = Data.read_many three_files return=..As_Vector on_problems=..Report_Error
|
||||
# The error reports as File_Error
|
||||
r1.should_fail_with File_Error
|
||||
# But it's actually Map_Error with index metadata
|
||||
@ -121,15 +136,20 @@ add_specs suite_builder =
|
||||
r1.catch.inner_error.should_be_a File_Error.Not_Found
|
||||
|
||||
group_builder.specify "should allow to Ignore errors if any file fails to load" <|
|
||||
r1 = Data.read_many three_files return=..Vector on_problems=..Ignore
|
||||
r1 = Data.read_many three_files return=..As_Vector on_problems=..Ignore
|
||||
r1.should_equal [js_object, Nothing, "Hello World!"]
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
group_builder.specify "should allow to continue loading if errors are encountered, but report them as warnings" <|
|
||||
r1 = Data.read_many three_files return=..Vector on_problems=..Report_Warning
|
||||
r1 = Data.read_many three_files return=..As_Vector on_problems=..Report_Warning
|
||||
r1.should_equal [js_object, Nothing, "Hello World!"]
|
||||
Problems.expect_only_warning File_Error r1
|
||||
|
||||
group_builder.specify "should return empty vector if no files were provided" <|
|
||||
r1 = Data.read_many [] return=..As_Vector
|
||||
r1.should_equal []
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
main filter=Nothing =
|
||||
suite = Test.build suite_builder->
|
||||
add_specs suite_builder
|
||||
|
BIN
test/Table_Tests/data/empty-sheets.xlsx
Normal file
BIN
test/Table_Tests/data/empty-sheets.xlsx
Normal file
Binary file not shown.
5
test/Table_Tests/data/transient/.gitignore
vendored
5
test/Table_Tests/data/transient/.gitignore
vendored
@ -1,3 +1,2 @@
|
||||
*.csv*
|
||||
rootCA.crt
|
||||
*.xls*
|
||||
*
|
||||
!.gitignore
|
||||
|
Binary file not shown.
@ -144,6 +144,17 @@ run_union_tests group_builder setup call_union =
|
||||
m.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing, "d", "e", "f", "g", "h", "i"]
|
||||
m.at "D" . to_vector . should_equal [Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, 0]
|
||||
|
||||
group_builder.specify "table with 0 rows will still influence the output structure with its columns" <|
|
||||
t1 = table1.get
|
||||
t2 = table2.get.filter "A" (..Greater 100)
|
||||
|
||||
table = call_union [t1, t2]
|
||||
m = materialize_and_maybe_sort table
|
||||
expect_column_names ["A", "B", "C"] m
|
||||
m.at "A" . to_vector . should_equal [1, 2, 3]
|
||||
m.at "B" . to_vector . should_equal ["a", "b", "c"]
|
||||
m.at "C" . to_vector . should_equal [Nothing, Nothing, Nothing]
|
||||
|
||||
group_builder.specify "should drop unmatched columns and warn, if In_All is selected" <|
|
||||
t1 = table1.get # A, B
|
||||
t2 = table2.get # C, A
|
||||
@ -519,6 +530,37 @@ run_union_tests group_builder setup call_union =
|
||||
problems = [No_Common_Type.Warning_Convert_To_Text [Value_Type.Time, Value_Type.Date] "D"]
|
||||
Problems.test_problem_handling action problems tester
|
||||
|
||||
# This test is not run on DB as all-null columns will not be Mixed there.
|
||||
if setup.is_database.not then group_builder.specify "if no common type can be found, will fall back to converting all types to text and warn (all-null columns edge case)" pending="TODO Value_Type.Null #6281" <|
|
||||
# If a column is all-null, it will often have Mixed type. But that should not prevent the union from falling into the mixed type.
|
||||
t1 = table_builder [["A", [Nothing]]]
|
||||
t2 = table_builder [["A", [2, 3]]]
|
||||
t3 = table_builder [["A", [Nothing, Nothing]]]
|
||||
t4 = table_builder [["A", ['a', 'b']]]
|
||||
t5 = table_builder [["A", [Nothing]]]
|
||||
|
||||
t1.at "A" . value_type . should_equal Value_Type.Mixed
|
||||
setup.expect_integer_type <| t2.at "A"
|
||||
|
||||
t = call_union [t1, t2, t3, t4, t5]
|
||||
expect_column_names ["A"] t
|
||||
t.at "A" . to_vector . should_equal [Nothing, '2', '3', Nothing, Nothing, 'a', 'b', Nothing]
|
||||
t.at "A" . value_type . is_text . should_be_true
|
||||
|
||||
if setup.is_database.not then group_builder.specify "all-Nothing column should not influence result type, unless it had a type explicitly given to it" pending="TODO Value_Type.Null #6281" <|
|
||||
t1 = table_builder [["A", [Nothing]]]
|
||||
t2 = table_builder [["A", [2, 3]]]
|
||||
|
||||
t3 = call_union [t1, t2]
|
||||
expect_column_names ["A"] t3
|
||||
t3.at "A" . to_vector . should_equal [Nothing, 2, 3]
|
||||
setup.expect_integer_type <| t3.at "A"
|
||||
|
||||
t4 = call_union [(t1.cast "A" Value_Type.Char), t2]
|
||||
expect_column_names ["A"] t4
|
||||
t4.at "A" . to_vector . should_equal [Nothing, '2', '3']
|
||||
t4.at "A" . value_type . should_equal Value_Type.Char
|
||||
|
||||
group_builder.specify "will use the _output_ column name in the warnings when matching by position (so input names may differ)" <|
|
||||
t1 = table_builder [["A", [1]]]
|
||||
t2 = table_builder [["B", ["a"]]]
|
||||
|
@ -1,6 +1,7 @@
|
||||
from Standard.Base import all
|
||||
import Standard.Base.Errors.Common.Dry_Run_Operation
|
||||
import Standard.Base.Errors.Common.Missing_Argument
|
||||
import Standard.Base.Errors.Deprecated.Deprecated
|
||||
import Standard.Base.Errors.File_Error.File_Error
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
import Standard.Base.Errors.Illegal_State.Illegal_State
|
||||
@ -9,9 +10,9 @@ import Standard.Base.Runtime.Managed_Resource.Managed_Resource
|
||||
import Standard.Base.Runtime.Ref.Ref
|
||||
|
||||
from Standard.Table import Table, Match_Columns, Excel_Format, Excel_Range, Data_Formatter, Delimited_Format, Excel_Workbook, Value_Type
|
||||
from Standard.Table.Errors import Invalid_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch, Empty_Sheet, No_Rows, No_Common_Type
|
||||
from Standard.Table.Extensions.Excel_Extensions import all
|
||||
|
||||
from Standard.Table.Errors import Invalid_Column_Names, Duplicate_Output_Column_Names, Invalid_Location, Range_Exceeded, Existing_Data, Column_Count_Mismatch, Column_Name_Mismatch, Empty_Sheet
|
||||
import Standard.Table.Excel.Excel_Workbook.Return_As as Old_Return_As
|
||||
|
||||
from Standard.Test import all
|
||||
|
||||
@ -19,6 +20,8 @@ from Standard.Test import all
|
||||
import Standard.Examples
|
||||
|
||||
import project.Util
|
||||
from project.Common_Table_Operations.Util import within_table
|
||||
from project.IO.Read_Many_Spec import with_temp_dir
|
||||
|
||||
polyglot java import org.enso.table_test_helpers.RandomHelpers
|
||||
|
||||
@ -103,22 +106,45 @@ spec_fmt suite_builder header file read_method sheet_count=5 =
|
||||
|
||||
group_builder.specify "should let you read all sheets into a single table" <|
|
||||
wb = read_method file
|
||||
action = wb.read_many on_problems=_
|
||||
tester table =
|
||||
table.row_count . should_equal 25
|
||||
table.column_names . should_equal ["Sheet Name", "Name", "Quantity", "Price", "A", "B", "C", "D", "E", "Student Name", "Enrolment Date", "Item", "Price 1"]
|
||||
problems = [Empty_Sheet.Error, Duplicate_Output_Column_Names.Error ["Price"]]
|
||||
Problems.test_problem_handling action problems tester ignore_warning_cardinality=True
|
||||
r1 = wb.read_many on_problems=..Report_Error
|
||||
r1.should_fail_with Empty_Sheet
|
||||
|
||||
r2 = wb.read_many on_problems=..Report_Warning
|
||||
r2.row_count . should_equal 25
|
||||
r2.column_names . should_equal ["Sheet Name", "Name", "Quantity", "Price", "A", "B", "C", "D", "E", "Student Name", "Enrolment Date", "Item", "Price 1"]
|
||||
# We also ensure how many rows were loaded from each sheet. There should be no entries for the "Empty" sheet.
|
||||
r2.at "Sheet Name" . to_vector . should_equal <|
|
||||
(Vector.fill 6 "Simple") + (Vector.fill 7 "Strange Dimensions") + (Vector.fill 6 "Dates") + (Vector.fill 6 "Duplicate Columns")
|
||||
w1 = Problems.expect_warning No_Rows r2
|
||||
w1.to_display_text . should_contain "The sheet Empty failed to load"
|
||||
w1.to_display_text . should_contain "There is no data in the sheet."
|
||||
w2 = Problems.expect_warning Duplicate_Output_Column_Names r2
|
||||
w2.column_names . should_equal ["Price"]
|
||||
|
||||
group_builder.specify "should let you read all sheets into a table of tables" <|
|
||||
wb = read_method file
|
||||
action = wb.read_many return=..Table_Of_Tables on_problems=_
|
||||
action = wb.read_many return=..With_New_Column on_problems=_
|
||||
tester table =
|
||||
table.row_count . should_equal 5
|
||||
table.column_names . should_equal ["Sheet Name", "Table"]
|
||||
problems = [Empty_Sheet.Error, Duplicate_Output_Column_Names.Error ["Price"]]
|
||||
Problems.test_problem_handling action problems tester ignore_warning_cardinality=True
|
||||
|
||||
group_builder.specify "should still support the old options for compatibility" <|
|
||||
wb = read_method file
|
||||
table1 = wb.read_many return=..Table_Of_Tables
|
||||
table1.row_count . should_equal 5
|
||||
table1.column_names . should_equal ["Sheet Name", "Table"]
|
||||
Problems.expect_warning Deprecated table1
|
||||
|
||||
table2 = wb.read_many return=Old_Return_As.Merged_Table
|
||||
table2.row_count . should_equal 25
|
||||
Problems.expect_warning Deprecated table2
|
||||
|
||||
table3 = wb.read_many return=..Merged_Table
|
||||
table3.row_count . should_equal 25
|
||||
Problems.expect_warning Deprecated table3
|
||||
|
||||
group_builder.specify "should let you read some sheets from xlsx" <|
|
||||
wb = read_method file
|
||||
single_table = wb.read_many ["Simple", "Dates"]
|
||||
@ -132,8 +158,7 @@ spec_fmt suite_builder header file read_method sheet_count=5 =
|
||||
single_table.row_count . should_equal 12
|
||||
single_table.column_names . should_equal ["Sheet Name", "Name", "Quantity", "Price", "Student Name", "Enrolment Date"]
|
||||
single_table.has_warnings.should_be_true
|
||||
warning = Problems.expect_only_warning Invalid_Location single_table
|
||||
warning.location . should_equal "Not A Sheet"
|
||||
warning = Problems.expect_only_warning No_Rows single_table
|
||||
warning.to_display_text . should_contain "Unknown sheet or range name or invalid address: 'Not A Sheet'."
|
||||
|
||||
type Spec_Write_Data
|
||||
@ -972,6 +997,160 @@ add_specs suite_builder =
|
||||
table_4 = xlsx_sheet.read (..Sheet "Sheet1" row_limit=6)
|
||||
table_4.row_count . should_equal 6
|
||||
|
||||
group_builder.specify "should let you `read_many` Excel sheets and other tabular files into a Table" <|
|
||||
with_temp_dir base_dir->
|
||||
(Table.new [["A", [1, 2]], ["B", [3, 4]]]).write (base_dir / "1.tsv") . should_succeed
|
||||
(Table.new [["A", [10, 20]], ["B", [30, 40]]]).write (base_dir / "2.xlsx") . should_succeed
|
||||
|
||||
f3 = base_dir / "3.xlsx"
|
||||
(Table.new [["A", [100]], ["B", [200]], ["C", [300]]]).write f3 format=(..Sheet "nr 1") on_existing_file=..Overwrite . should_succeed
|
||||
(Table.new [["A", [400, 500, 600]]]).write f3 format=(..Sheet "nr 2") on_existing_file=..Append . should_succeed
|
||||
|
||||
files = Data.list base_dir . sort on=(.name)
|
||||
|
||||
r1 = Data.read_many files return=..With_New_Column
|
||||
r1.should_be_a Table
|
||||
Problems.assume_no_problems r1
|
||||
within_table r1 <|
|
||||
r1.column_names . should_equal ["Path", "Value"]
|
||||
r1.row_count . should_equal 3
|
||||
r1.at "Value" . at 0 . should_be_a Table
|
||||
r1.at "Value" . at 1 . should_be_a Excel_Workbook
|
||||
r1.at "Value" . at 2 . should_be_a Excel_Workbook
|
||||
|
||||
r2 = Data.read_many files return=..As_Merged_Table
|
||||
r2.should_be_a Table
|
||||
Problems.assume_no_problems r2
|
||||
within_table r2 <|
|
||||
r2.column_names . should_equal ["Path", "Sheet Name", "A", "B", "C"]
|
||||
|
||||
# We transform the Path to just file name for easier testing
|
||||
rows = (r2.set (r2.at "Path" . map .name) "Path").rows.map .to_vector
|
||||
rows.at 0 . should_equal ["1.tsv", Nothing, 1, 3, Nothing]
|
||||
rows.at 1 . should_equal ["1.tsv", Nothing, 2, 4, Nothing]
|
||||
rows.at 2 . should_equal ["2.xlsx", "EnsoSheet", 10, 30, Nothing]
|
||||
rows.at 3 . should_equal ["2.xlsx", "EnsoSheet", 20, 40, Nothing]
|
||||
rows.at 4 . should_equal ["3.xlsx", "nr 1", 100, 200, 300]
|
||||
rows.at 5 . should_equal ["3.xlsx", "nr 2", 400, Nothing, Nothing]
|
||||
rows.at 6 . should_equal ["3.xlsx", "nr 2", 500, Nothing, Nothing]
|
||||
rows.at 7 . should_equal ["3.xlsx", "nr 2", 600, Nothing, Nothing]
|
||||
|
||||
# Test loading only Excel files and alternate matching mode to weed out edge cases
|
||||
r3 = Data.read_many (Data.list base_dir name_filter="*.xlsx" . sort on=(.name)) return=..As_Merged_Table
|
||||
r3.should_be_a Table
|
||||
Problems.assume_no_problems r3
|
||||
within_table r3 <|
|
||||
r3.column_names . should_equal ["Path", "Sheet Name", "A", "B", "C"]
|
||||
r3.at "Sheet Name" . to_vector . should_equal ["EnsoSheet", "EnsoSheet", "nr 1", "nr 2", "nr 2", "nr 2"]
|
||||
r3.at "A" . to_vector . should_equal [10, 20, 100, 400, 500, 600]
|
||||
|
||||
group_builder.specify "during `read_many`, should not mix metadata columns with data columns with same name or when matching by position" <|
|
||||
with_temp_dir base_dir->
|
||||
(Table.new [["Z", [1, 2]], ["Sheet Name", ['data column', 'data column']]]).write (base_dir / "1.tsv") . should_succeed
|
||||
(Table.new [["Z", [10]], ["X", [20]]]).write (base_dir / "2.xlsx") . should_succeed
|
||||
|
||||
f3 = base_dir / "3.xlsx"
|
||||
(Table.new [["X", [100]], ["Y", [200]], ["Z", [300]]]).write f3 format=(..Sheet "nr 1") on_existing_file=..Overwrite . should_succeed
|
||||
(Table.new [["Sheet Name", [400, 500, 600]]]).write f3 format=(..Sheet "nr 2") on_existing_file=..Append . should_succeed
|
||||
|
||||
files = Data.list base_dir . sort on=(.name)
|
||||
input = Table.new [["Path", files], ["Sheet Name", ["input 1", "input 2", "input 3"]]]
|
||||
r1 = Data.read_many input
|
||||
r1.should_be_a Table
|
||||
within_table r1 <|
|
||||
# We transform the Path to just file name for easier testing
|
||||
rows = (r1.set (r1.at "Path" . map .name) "Path").rows.map .to_vector
|
||||
|
||||
# Each Sheet Name column comes out as separate: 1 - input, 2 - metadata, 3 - data
|
||||
# The order of columns is as they appear in the input, and they are matched by name
|
||||
r1.column_names . should_equal [ "Path", "Sheet Name", "Sheet Name 1", "Z", "Sheet Name 2", "X", "Y"]
|
||||
rows.at 0 . should_equal [ "1.tsv", "input 1", Nothing, 1, "data column", Nothing, Nothing]
|
||||
rows.at 1 . should_equal [ "1.tsv", "input 1", Nothing, 2, "data column", Nothing, Nothing]
|
||||
rows.at 2 . should_equal ["2.xlsx", "input 2", "EnsoSheet", 10, Nothing, 20, Nothing]
|
||||
rows.at 3 . should_equal ["3.xlsx", "input 3", "nr 1", 300, Nothing, 100, 200]
|
||||
rows.at 4 . should_equal ["3.xlsx", "input 3", "nr 2", Nothing, "400", Nothing, Nothing]
|
||||
rows.at 5 . should_equal ["3.xlsx", "input 3", "nr 2", Nothing, "500", Nothing, Nothing]
|
||||
rows.at 6 . should_equal ["3.xlsx", "input 3", "nr 2", Nothing, "600", Nothing, Nothing]
|
||||
|
||||
Problems.expect_warning Duplicate_Output_Column_Names r1
|
||||
Problems.expect_warning No_Common_Type r1
|
||||
|
||||
r2 = Data.read_many input return=(..As_Merged_Table match=..By_Position)
|
||||
r2.should_be_a Table
|
||||
within_table r2 <|
|
||||
rows = (r2.set (r2.at "Path" . map .name) "Path").rows.map .to_vector
|
||||
|
||||
# Two Sheet Name column comes out as separate: 1 - input, 2 - metadata, the third one (data) gets renamed due to positional matching
|
||||
# The column names come from the first table that had all the columns - in this case, first sheet of 3.xlsx
|
||||
r2.column_names . should_equal [ "Path", "Sheet Name", "Sheet Name 1", "X", "Y", "Z"]
|
||||
rows.at 0 . should_equal [ "1.tsv", "input 1", Nothing, 1, "data column", Nothing]
|
||||
rows.at 1 . should_equal [ "1.tsv", "input 1", Nothing, 2, "data column", Nothing]
|
||||
rows.at 2 . should_equal ["2.xlsx", "input 2", "EnsoSheet", 10, "20", Nothing]
|
||||
rows.at 3 . should_equal ["3.xlsx", "input 3", "nr 1", 100, "200", 300]
|
||||
rows.at 4 . should_equal ["3.xlsx", "input 3", "nr 2", 400, Nothing, Nothing]
|
||||
rows.at 5 . should_equal ["3.xlsx", "input 3", "nr 2", 500, Nothing, Nothing]
|
||||
rows.at 6 . should_equal ["3.xlsx", "input 3", "nr 2", 600, Nothing, Nothing]
|
||||
|
||||
Problems.expect_warning Duplicate_Output_Column_Names r2
|
||||
Problems.expect_warning No_Common_Type r2
|
||||
|
||||
r3 = Data.read_many input return=(..As_Merged_Table columns_to_keep=..In_All match=..By_Position)
|
||||
r3.should_be_a Table
|
||||
within_table r3 <|
|
||||
rows = (r3.set (r3.at "Path" . map .name) "Path").rows.map .to_vector
|
||||
|
||||
# Same as with `r2`, but now we keep only columns that are present in all tables, then the column names come from the first table (so we get column Z).
|
||||
# But the `Sheet Name` metadata column is still kept, as its matching is independent of data.
|
||||
r3.column_names . should_equal [ "Path", "Sheet Name", "Sheet Name 1", "Z"]
|
||||
rows.at 0 . should_equal [ "1.tsv", "input 1", Nothing, 1]
|
||||
rows.at 1 . should_equal [ "1.tsv", "input 1", Nothing, 2]
|
||||
rows.at 2 . should_equal ["2.xlsx", "input 2", "EnsoSheet", 10]
|
||||
rows.at 3 . should_equal ["3.xlsx", "input 3", "nr 1", 100]
|
||||
rows.at 4 . should_equal ["3.xlsx", "input 3", "nr 2", 400]
|
||||
rows.at 5 . should_equal ["3.xlsx", "input 3", "nr 2", 500]
|
||||
rows.at 6 . should_equal ["3.xlsx", "input 3", "nr 2", 600]
|
||||
|
||||
Problems.expect_warning Duplicate_Output_Column_Names r3
|
||||
Problems.expect_warning Column_Count_Mismatch r3
|
||||
|
||||
group_builder.specify "during `read_many`, should correctly handle empty sheets" <|
|
||||
with_temp_dir base_dir->
|
||||
tsv_file = base_dir / "1.tsv"
|
||||
(Table.new [["A", [1, 2]], ["B", [3, 4]]]).write tsv_file . should_succeed
|
||||
xls_file = Examples.xls
|
||||
|
||||
r = Data.read_many [tsv_file, xls_file] return=..As_Merged_Table
|
||||
r.should_be_a Table
|
||||
r.row_count . should_equal 2+25
|
||||
r.column_names . should_equal ["Path", "Sheet Name", "A", "B", "Name", "Quantity", "Price", "C", "D", "E", "Student Name", "Enrolment Date", "Item", "Price 1"]
|
||||
# First two rows come from TSV, the rest from Excel sheets
|
||||
r.at "Path" . to_vector . map .name . should_equal <|
|
||||
(Vector.fill 2 tsv_file.name) + (Vector.fill 25 xls_file.name)
|
||||
r.at "Sheet Name" . to_vector . should_equal <|
|
||||
(Vector.fill 2 Nothing) + (Vector.fill 6 "Simple") + (Vector.fill 7 "Strange Dimensions") + (Vector.fill 6 "Dates") + (Vector.fill 6 "Duplicate Columns")
|
||||
|
||||
w = Problems.expect_warning No_Rows r
|
||||
w.to_display_text . should_contain "Empty"
|
||||
|
||||
empty_xls_file = enso_project.data / "empty-sheets.xlsx"
|
||||
r2 = Data.read_many [tsv_file, empty_xls_file] return=..As_Merged_Table
|
||||
r2.should_be_a Table
|
||||
r2.row_count . should_equal 2
|
||||
# No sheet name columns because after all no data from Excel made it to the result
|
||||
r2.column_names . should_equal ["Path", "A", "B"]
|
||||
Problems.expect_warning No_Rows r2
|
||||
Problems.get_attached_warnings r2
|
||||
. map .to_display_text
|
||||
. find (..Contains "failed to load any sheets")
|
||||
. should_succeed
|
||||
|
||||
# But when not expanding rows, the workbook with all-empty sheets is normally loaded into a cell
|
||||
r3 = Data.read_many [tsv_file, empty_xls_file] return=..With_New_Column
|
||||
r3.should_be_a Table
|
||||
r3.at "Path" . to_vector . map .name . should_equal [tsv_file.name, empty_xls_file.name]
|
||||
r3.at "Value" . at 0 . should_be_a Table
|
||||
r3.at "Value" . at 1 . should_be_a Excel_Workbook
|
||||
|
||||
suite_builder.group "Problems" group_builder->
|
||||
group_builder.specify "should report a user-friendly error message when format is missing a required argument" <|
|
||||
r = xlsx_sheet.read (..Range)
|
||||
|
@ -2,12 +2,13 @@ from Standard.Base import all
|
||||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
|
||||
|
||||
from Standard.Table import all
|
||||
from Standard.Table.Errors import Invalid_Value_Type
|
||||
from Standard.Table.Errors import Invalid_Value_Type, No_Rows
|
||||
from Standard.Database import all
|
||||
|
||||
from Standard.Test import all
|
||||
|
||||
from project.Util import all
|
||||
from project.Common_Table_Operations.Util import within_table
|
||||
|
||||
main filter=Nothing =
|
||||
suite = Test.build suite_builder->
|
||||
@ -23,45 +24,68 @@ add_specs suite_builder =
|
||||
files_vector = [enso_project.data / "empty.txt", (enso_project.data / "sample.tsv") . path]
|
||||
sample_table = Lazy_Ref.Value <|
|
||||
(enso_project.data / "sample.tsv") . read
|
||||
check_loaded_vector v =
|
||||
v.should_be_a Vector
|
||||
v.length . should_equal 2
|
||||
v.at 0 . should_equal ""
|
||||
v.at 1 . should_equal sample_table.get
|
||||
|
||||
check_common_columns table =
|
||||
table.at "Value" . to_vector . should_equal ["" , Nothing, Nothing]
|
||||
table.at "a" . to_vector . should_equal [Nothing, 1, 4]
|
||||
table.at "b" . to_vector . should_equal [Nothing, 2, 5]
|
||||
table.at "c" . to_vector . should_equal [Nothing, 3, 6]
|
||||
check_returned_vector vec =
|
||||
vec.should_be_a Vector
|
||||
vec.length . should_equal 2
|
||||
vec.first . should_equal ""
|
||||
vec.second . should_equal sample_table.get
|
||||
|
||||
group_builder.specify "should read files listed in a Column" <|
|
||||
column = Column.from_vector "Col" files_vector
|
||||
## TODO for next PR:
|
||||
test that if `return` is not specified, it will return as a Table when a Column is provided
|
||||
r1 = Data.read_many column return=..Vector
|
||||
check_loaded_vector r1
|
||||
r1 = Data.read_many column return=..As_Vector
|
||||
check_returned_vector r1
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
r2 = Data.read_many column return=..With_New_Column
|
||||
r2.should_be_a Table
|
||||
r2.column_names . should_equal ["Col", "Value"]
|
||||
r2.at "Col" . to_vector . should_equal files_vector
|
||||
check_returned_vector (r2.at "Value" . to_vector)
|
||||
|
||||
group_builder.specify "should read files listed in a single column Table" <|
|
||||
table1 = Table.new [["Some column", files_vector]]
|
||||
r1 = Data.read_many table1 return=..Vector
|
||||
# TODO like above
|
||||
check_loaded_vector r1
|
||||
r1 = Data.read_many table1 return=..As_Vector
|
||||
check_returned_vector r1
|
||||
|
||||
r2 = Data.read_many table1 return=..With_New_Column
|
||||
r2.should_be_a Table
|
||||
r2.column_names . should_equal ["Some column", "Value"]
|
||||
r2.at "Some column" . to_vector . should_equal files_vector
|
||||
check_returned_vector (r2.at "Value" . to_vector)
|
||||
|
||||
group_builder.specify "should read files listed in a Table with `path` column" <|
|
||||
table2 = Table.new [["X", [1, 2]], ["path", files_vector]]
|
||||
r2 = Data.read_many table2 return=..Vector
|
||||
# TODO like above
|
||||
check_loaded_vector r2
|
||||
Problems.assume_no_problems r2
|
||||
table1 = Table.new [["X", [1, 2]], ["path", files_vector]]
|
||||
r1 = Data.read_many table1 return=..As_Vector
|
||||
check_returned_vector r1
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
r2 = Data.read_many table1 return=..With_New_Column
|
||||
r2.should_be_a Table
|
||||
r2.column_names . should_equal ["X", "path", "Value"]
|
||||
r2.at "X" . to_vector . should_equal [1, 2]
|
||||
r2.at "path" . to_vector . should_equal files_vector
|
||||
check_returned_vector (r2.at "Value" . to_vector)
|
||||
|
||||
# Test that this is really case insensitive
|
||||
table3 = Table.new [["X", [1, 2]], ["pAtH", files_vector]]
|
||||
r3 = Data.read_many table3 return=..Vector
|
||||
check_loaded_vector r3
|
||||
Problems.assume_no_problems r3
|
||||
r3 = Data.read_many table3 return=..With_New_Column
|
||||
r3.should_be_a Table
|
||||
r3.column_names . should_equal ["X", "pAtH", "Value"]
|
||||
check_returned_vector (r3.at "Value" . to_vector)
|
||||
|
||||
group_builder.specify "will fail if no `path` column can be found or its ambiguous" <|
|
||||
group_builder.specify "should fail if no `path` column can be found or its ambiguous" <|
|
||||
table1 = Table.new [["X", [1, 2]], ["Y", files_vector]]
|
||||
r1 = Data.read_many table1 return=..Vector
|
||||
r1 = Data.read_many table1 return=..As_Vector
|
||||
r1.should_fail_with Illegal_Argument
|
||||
|
||||
table2 = Table.new [["X", [1, 2]], ["path", files_vector], ["Path", [3, 4]]]
|
||||
r2 = Data.read_many table2 return=..Vector
|
||||
r2 = Data.read_many table2 return=..As_Vector
|
||||
r2.should_fail_with Illegal_Argument
|
||||
|
||||
group_builder.specify "fails if a DB Table or Column is provided, telling to materialize first to in-memory" <|
|
||||
@ -71,11 +95,11 @@ add_specs suite_builder =
|
||||
p : Text -> p
|
||||
|
||||
table = (Table.new [["path", paths_vector]]).select_into_database_table connection "test_table" temporary=True
|
||||
r = Data.read_many table return=..Vector
|
||||
r = Data.read_many table return=..As_Vector
|
||||
r.should_fail_with Illegal_Argument
|
||||
|
||||
col = table.at "path"
|
||||
r2 = Data.read_many col return=..Vector
|
||||
r2 = Data.read_many col return=..As_Vector
|
||||
r2.should_fail_with Illegal_Argument
|
||||
|
||||
group_builder.specify "fails if a column of invalid type is provided" <|
|
||||
@ -84,3 +108,284 @@ add_specs suite_builder =
|
||||
Data.read_many table . should_fail_with Invalid_Value_Type
|
||||
Data.read_many (table.at "path") . should_fail_with Invalid_Value_Type
|
||||
Data.read_many (table.select_columns ["X"]) . should_fail_with Invalid_Value_Type
|
||||
|
||||
group_builder.specify "should return a merged table by default" <|
|
||||
r1 = Data.read_many (Column.from_vector "my column" files_vector)
|
||||
r1.should_be_a Table
|
||||
r1.column_names . should_equal ["my column", "Value", "a", "b", "c"]
|
||||
r1.at "my column" . to_vector . should_equal [files_vector.first, files_vector.second, files_vector.second]
|
||||
check_common_columns r1
|
||||
|
||||
r2 = Data.read_many (Table.new [["X", [100, 200]], ["Path", files_vector], ["Y", [300, 400]]])
|
||||
r2.should_be_a Table
|
||||
r2.column_names . should_equal ["X", "Path", "Y", "Value", "a", "b", "c"]
|
||||
# The second row is duplicated because it gets expanded along with the table that was loaded that has 2 rows
|
||||
r2.at "X" . to_vector . should_equal [100, 200, 200]
|
||||
r2.at "Y" . to_vector . should_equal [300, 400, 400]
|
||||
check_common_columns r2
|
||||
|
||||
r3 = Data.read_many files_vector
|
||||
r3.should_be_a Table
|
||||
r3.column_names . should_equal ["Path", "Value", "a", "b", "c"]
|
||||
check_common_columns r3
|
||||
|
||||
group_builder.specify "if input is a Vector, the default can be overridden to return a new column" <|
|
||||
r1 = Data.read_many files_vector return=..With_New_Column
|
||||
r1.should_be_a Table
|
||||
r1.column_names . should_equal ["Path", "Value"]
|
||||
r1.at "Path" . to_vector . should_equal files_vector
|
||||
check_returned_vector (r1.at "Value" . to_vector)
|
||||
|
||||
group_builder.specify "should merge files that read as non-Table values into a Table using reasonable defaults" <|
|
||||
with_temp_dir base_dir->
|
||||
# raw JS Object - we want it to expand to a single row - same as if it was in a 1-element array
|
||||
(JS_Object.from_pairs [["a", 1], ["b", 2]]).to_json.write (base_dir / "1_js_object.json")
|
||||
|
||||
# array of JS objects
|
||||
[JS_Object.from_pairs [["a", 30], ["b", 40], ["c", "foobar"]], JS_Object.from_pairs [["a", 50], ["b", 60]]].to_json.write (base_dir / "2_js_array.json")
|
||||
|
||||
# JS array of numbers
|
||||
[100, 200, 300].to_json.write (base_dir / "3_js_numbers.json")
|
||||
|
||||
# a Table
|
||||
(Table.new [["a", [-1, -2]], ["d", [-4, -5]]]).write (base_dir / "4_table.tsv")
|
||||
|
||||
# a plain text value
|
||||
"Hi!".write (base_dir / "5_plain_text.txt")
|
||||
|
||||
# JS null
|
||||
"null".write (base_dir / "6_js_null.json")
|
||||
|
||||
# a JS string
|
||||
'"str"'.write (base_dir / "7_js_string.json")
|
||||
|
||||
files = Data.list base_dir . sort on=(.name)
|
||||
IO.println (Meta.type_of files.first)
|
||||
r = Data.read_many files
|
||||
r.should_be_a Table
|
||||
|
||||
within_table r <|
|
||||
# We transform the Path to just file name
|
||||
rows = (r.set (r.at "Path" . map .name) "Path").rows.map .to_vector
|
||||
|
||||
# TODO Once 6281 is done we should replace 100 with "100" etc.
|
||||
null = Nothing
|
||||
r.column_names . should_equal ["Path", "a", "b", "c", "Value", "d"]
|
||||
rows.at 0 . should_equal ["1_js_object.json", 1, 2, null, null, null]
|
||||
rows.at 1 . should_equal ["2_js_array.json", 30, 40, "foobar", null, null]
|
||||
rows.at 2 . should_equal ["2_js_array.json", 50, 60, null, null, null]
|
||||
rows.at 3 . should_equal ["3_js_numbers.json", null, null, null, 100, null]
|
||||
rows.at 4 . should_equal ["3_js_numbers.json", null, null, null, 200, null]
|
||||
rows.at 5 . should_equal ["3_js_numbers.json", null, null, null, 300, null]
|
||||
rows.at 6 . should_equal ["4_table.tsv", -1, null, null, null, -4]
|
||||
rows.at 7 . should_equal ["4_table.tsv", -2, null, null, null, -5]
|
||||
rows.at 8 . should_equal ["5_plain_text.txt", null, null, null, "Hi!", null]
|
||||
rows.at 9 . should_equal ["6_js_null.json", null, null, null, null, null]
|
||||
rows.at 10 . should_equal ["7_js_string.json", null, null, null, "str", null]
|
||||
|
||||
r.at "a" . value_type . should_equal Value_Type.Integer
|
||||
r.at "b" . value_type . should_equal Value_Type.Integer
|
||||
r.at "c" . value_type . should_equal Value_Type.Char
|
||||
# TODO change to Char once 6281 is done
|
||||
r.at "Value" . value_type . should_equal Value_Type.Mixed
|
||||
r.at "d" . value_type . should_equal Value_Type.Integer
|
||||
|
||||
# Marker to not forget
|
||||
group_builder.specify "TODO" pending="Once 6281 is done we should update the test above and others." Nothing
|
||||
|
||||
group_builder.specify "should warn when a file loads as empty array and not include it in the As_Merged_Table result" <|
|
||||
# But such array should not influence the columns present:
|
||||
with_temp_dir base_dir->
|
||||
'{"a": 1}'.write (base_dir / "1_js_object.json")
|
||||
"[]".write (base_dir / "2_empty_array.json")
|
||||
'[{"a": 2, "b": "..."}]'.write (base_dir / "3_js_object.json")
|
||||
|
||||
files = Data.list base_dir . sort on=(.name)
|
||||
|
||||
r1 = Data.read_many files return=..With_New_Column
|
||||
r1.should_be_a Table
|
||||
Problems.assume_no_problems r1
|
||||
r1.row_count . should_equal 3
|
||||
r1 . at "Path" . map .name . to_vector . should_equal ["1_js_object.json", "2_empty_array.json", "3_js_object.json"]
|
||||
r1.at "Value" . at 1 . should_equal []
|
||||
|
||||
r2 = Data.read_many files
|
||||
r2.should_be_a Table
|
||||
w2 = Problems.expect_only_warning No_Rows r2
|
||||
w2.to_display_text . should_contain "2_empty_array.json"
|
||||
w2.to_display_text . should_contain "loaded as an empty array, so it is not included in the `As_Merged_Table` result of `read_many`."
|
||||
within_table r2 <|
|
||||
r2.column_names . should_equal ["Path", "a", "b"]
|
||||
r2.row_count . should_equal 2
|
||||
r2.at "Path" . map .name . to_vector . should_equal ["1_js_object.json", "3_js_object.json"]
|
||||
r2.at "a" . to_vector . should_equal [1, 2]
|
||||
r2.at "b" . to_vector . should_equal [Nothing, "..."]
|
||||
|
||||
group_builder.specify "should warn when a Table loads as 0-rows and is not included in As_Merged_Table result, but it should still influence the result columns" <|
|
||||
with_temp_dir base_dir->
|
||||
'A,B'.write (base_dir / "1_empty_table.csv")
|
||||
'B,C\n1,2'.write (base_dir / "2_table.csv")
|
||||
|
||||
files = Data.list base_dir . sort on=(.name)
|
||||
|
||||
r1 = Data.read_many files format=(..Delimited ',' headers=True) return=..With_New_Column
|
||||
r1.should_be_a Table
|
||||
Problems.assume_no_problems r1
|
||||
r1.row_count . should_equal 2
|
||||
r1.at "Path" . map .name . to_vector . should_equal ["1_empty_table.csv", "2_table.csv"]
|
||||
empty_table1 = r1.at "Value" . at 0
|
||||
empty_table1.should_be_a Table
|
||||
empty_table1.row_count . should_equal 0
|
||||
empty_table1.column_names . should_equal ["A", "B"]
|
||||
|
||||
r2 = Data.read_many files format=(..Delimited ',' headers=True) return=..As_Merged_Table
|
||||
r2.should_be_a Table
|
||||
# TODO: once 6281 is done, change expect_only_warning
|
||||
w2 = Problems.expect_warning No_Rows r2
|
||||
w2.to_display_text . should_contain "1_empty_table.csv"
|
||||
w2.to_display_text . should_contain "loaded as a table with 0 rows, so it did not contribute any rows to the `As_Merged_Table` result of `read_many`."
|
||||
within_table r2 <|
|
||||
## But it DOES influence the columns present.
|
||||
That is because the column structure is a 'structural' property,
|
||||
we want the structure of the result to be the same regardless if it has 0 or more rows.
|
||||
If the workflow is run next time with this table having some rows, it is better that the structure is preserved.
|
||||
Otherwise, a workflow that is running fine could stop working once a file is changed to contain no rows, as some column could no longer be found.
|
||||
r2.column_names . should_equal ["Path", "A", "B", "C"]
|
||||
r2.row_count . should_equal 1
|
||||
r2.at "Path" . map .name . to_vector . should_equal ["2_table.csv"]
|
||||
r2.at "A" . to_vector . should_equal [Nothing]
|
||||
# TODO: once 6281 is done, change '1' to 1
|
||||
r2.at "B" . to_vector . should_equal ['1']
|
||||
r2.at "C" . to_vector . should_equal [2]
|
||||
|
||||
group_builder.specify "should allow to customize how the tables are merged" <|
|
||||
with_temp_dir base_dir->
|
||||
'{"a": 1, "b": 2}'.write (base_dir / "1_js_object.json")
|
||||
'{"b": 3, "c": 4}'.write (base_dir / "2_js_object.json")
|
||||
|
||||
files = Data.list base_dir . sort on=(.name)
|
||||
|
||||
r1 = Data.read_many files return=(..As_Merged_Table columns_to_keep=..In_All)
|
||||
r1.should_be_a Table
|
||||
within_table r1 <|
|
||||
r1.column_names . should_equal ["Path", "b"]
|
||||
r1.at "b" . to_vector . should_equal [2, 3]
|
||||
|
||||
r2 = Data.read_many files return=(..As_Merged_Table columns_to_keep=(..In_List ["a", "c"]))
|
||||
r2.should_be_a Table
|
||||
within_table r2 <|
|
||||
r2.column_names . should_equal ["Path", "a", "c"]
|
||||
r2.at "a" . to_vector . should_equal [1, Nothing]
|
||||
r2.at "c" . to_vector . should_equal [Nothing, 4]
|
||||
|
||||
r3 = Data.read_many files return=(..As_Merged_Table match=..By_Position)
|
||||
r3.should_be_a Table
|
||||
within_table r3 <|
|
||||
r3.column_names . should_equal ["Path", "a", "b"]
|
||||
r3.at "a" . to_vector . should_equal [1, 3]
|
||||
r3.at "b" . to_vector . should_equal [2, 4]
|
||||
|
||||
group_builder.specify "should fallback to Char if no common type can be found for primitive values" <|
|
||||
with_temp_dir base_dir->
|
||||
'{"a": 1}'.write (base_dir / "1_js_object.json")
|
||||
'{"a": "str"}'.write (base_dir / "2_js_object.json")
|
||||
files = Data.list base_dir . sort on=(.name)
|
||||
r = Data.read_many files
|
||||
r.should_be_a Table
|
||||
r.column_names . should_equal ["Path", "a"]
|
||||
r.at "a" . value_type . should_equal Value_Type.Char
|
||||
r.at "a" . to_vector . should_equal ["1", "str"]
|
||||
|
||||
group_builder.specify "but should keep Mixed type if more complex types are found, like dictionary" <|
|
||||
with_temp_dir base_dir->
|
||||
'{"a": {}}'.write (base_dir / "1_js_object.json")
|
||||
'{"a": []}'.write (base_dir / "2_js_object.json")
|
||||
files = Data.list base_dir . sort on=(.name)
|
||||
r = Data.read_many files
|
||||
r.should_be_a Table
|
||||
r.column_names . should_equal ["Path", "a"]
|
||||
r.at "a" . value_type . should_equal Value_Type.Mixed
|
||||
r.at "a" . to_vector . should_equal [JS_Object.from_pairs [], []]
|
||||
|
||||
group_builder.specify "has sane behaviour if no files were provided" <|
|
||||
col = Column.from_vector "C" []
|
||||
Data.read_many col return=..As_Vector . should_equal []
|
||||
|
||||
t = Data.read_many col
|
||||
t.should_be_a Table
|
||||
t.row_count . should_equal 0
|
||||
t.column_names . should_equal ["C"]
|
||||
|
||||
t2 = Data.read_many []
|
||||
t2.should_be_a Table
|
||||
t2.row_count . should_equal 0
|
||||
t2.column_names . should_equal ["Path"]
|
||||
|
||||
t3 = Data.read_many [] return=..With_New_Column
|
||||
t3.should_be_a Table
|
||||
t3.row_count . should_equal 0
|
||||
t3.column_names . should_equal ["Path", "Value"]
|
||||
|
||||
group_builder.specify "should have sane behaviour if all files are weird" <|
|
||||
with_temp_dir base_dir->
|
||||
'{}'.write (base_dir / "1_js_object.json")
|
||||
'[{}, {}]'.write (base_dir / "2_js_array.json")
|
||||
|
||||
files = Data.list base_dir . sort on=(.name)
|
||||
r = Data.read_many files
|
||||
r.should_be_a Table
|
||||
Problems.expect_warning Illegal_Argument r
|
||||
within_table r <|
|
||||
r.column_names . should_equal ["Path", "Value"]
|
||||
empty = JS_Object.from_pairs []
|
||||
r.at "Value" . to_vector . should_equal [empty, empty, empty]
|
||||
|
||||
with_temp_dir base_dir->
|
||||
'[]'.write (base_dir / "1_empty_array.json")
|
||||
'[]'.write (base_dir / "2_empty_array.json")
|
||||
|
||||
files = Data.list base_dir . sort on=(.name)
|
||||
r1 = Data.read_many files return=..With_New_Column
|
||||
r1.should_be_a Table
|
||||
r1.row_count . should_equal 2
|
||||
r1.column_names . should_equal ["Path", "Value"]
|
||||
Problems.assume_no_problems r1
|
||||
|
||||
r2 = Data.read_many files
|
||||
r2.should_be_a Table
|
||||
r2.row_count . should_equal 0
|
||||
r2.column_names . should_equal ["Path"]
|
||||
Problems.expect_only_warning No_Rows r2
|
||||
|
||||
group_builder.specify "should rename duplicated columns, keeping columns from the input unchanged" <|
|
||||
tmp_file = enso_project.data / "transient" / "table.csv"
|
||||
(Table.new [["Path", [1]], ["Col", [2]]]).write tmp_file on_existing_file=..Overwrite . should_succeed
|
||||
Panic.with_finalizer tmp_file.delete <|
|
||||
col = Column.from_vector "Col" [tmp_file.path]
|
||||
r = Data.read_many col return=..As_Merged_Table
|
||||
r.column_names . should_equal ["Col", "Path", "Col 1"]
|
||||
r.at "Col" . to_vector . should_equal [tmp_file.path]
|
||||
r.at "Path" . to_vector . should_equal [1]
|
||||
r.at "Col 1" . to_vector . should_equal [2]
|
||||
|
||||
table = Table.new [["Path", [tmp_file.path]], ["Col", ["X"]], ["Value", ["Y"]]]
|
||||
r2 = Data.read_many table return=..As_Merged_Table
|
||||
r2.column_names . should_equal ["Path", "Col", "Value", "Path 1", "Col 1"]
|
||||
r2.at "Path" . to_vector . should_equal [tmp_file.path]
|
||||
r2.at "Col" . to_vector . should_equal ["X"]
|
||||
r2.at "Value" . to_vector . should_equal ["Y"]
|
||||
r2.at "Path 1" . to_vector . should_equal [1]
|
||||
r2.at "Col 1" . to_vector . should_equal [2]
|
||||
|
||||
r3 = Data.read_many table return=..With_New_Column
|
||||
r3.column_names . should_equal ["Path", "Col", "Value", "Value 1"]
|
||||
r3.at "Path" . to_vector . should_equal [tmp_file.path]
|
||||
r3.at "Col" . to_vector . should_equal ["X"]
|
||||
r3.at "Value" . to_vector . should_equal ["Y"]
|
||||
r3.at "Value 1" . first . should_be_a Table
|
||||
|
||||
private with_temp_dir callback =
|
||||
base_dir = enso_project.data / "transient" / "read_many_test"
|
||||
base_dir.delete_if_exists recursive=True
|
||||
base_dir.create_directory . should_succeed
|
||||
Panic.with_finalizer (base_dir.delete recursive=True) (callback base_dir)
|
||||
|
@ -75,12 +75,12 @@ add_specs suite_builder =
|
||||
|
||||
suite_builder.group "from_objects with JSON (single values)" group_builder->
|
||||
group_builder.specify "Generates a single-row table from a JSON object" <|
|
||||
expected = Table.new [["Key", ["first", "last", "age"]], ["Value", ["Mary", "Smith", 23]]]
|
||||
expected = Table.new [["first", ["Mary"]], ["last", ["Smith"]], ["age", [23]]]
|
||||
Table.from_objects (data.uniform_json.at 0) . should_equal expected
|
||||
|
||||
group_builder.specify "works fine even if requested fields are duplicated" <|
|
||||
expected = Table.new [["Key", ["first", "last", "age"]], ["Value", ["Mary", "Smith", 23]]]
|
||||
Table.from_objects (data.uniform_json.at 0) ["Key", "Value", "Key", "Key"] . should_equal expected
|
||||
expected = Table.new [["first", ["Mary"]], ["last", ["Smith"]]]
|
||||
Table.from_objects (data.uniform_json.at 0) ["first", "last", "first", "first"] . should_equal expected
|
||||
|
||||
suite_builder.group "from_objects with uniform JSON vector" group_builder->
|
||||
group_builder.specify "Generates a table from a vector of JSON objects" <|
|
||||
@ -472,4 +472,3 @@ main filter=Nothing =
|
||||
suite = Test.build suite_builder->
|
||||
add_specs suite_builder
|
||||
suite.run_with_filter filter
|
||||
|
||||
|
@ -30,4 +30,3 @@ main filter=Nothing =
|
||||
Visualization_Spec.add_specs suite_builder
|
||||
Widgets_Spec.add_specs suite_builder
|
||||
suite.run_with_filter filter
|
||||
|
||||
|
@ -12,7 +12,7 @@ main filter=Nothing =
|
||||
|
||||
|
||||
add_specs suite_builder =
|
||||
suite_builder.group "Can run each helper" group_builder->
|
||||
suite_builder.group "Widget_Helpers run" group_builder->
|
||||
group_builder.specify "make_regex_text_widget" <|
|
||||
w = make_regex_text_widget
|
||||
j = (Widgets.get_widget_json w) . to_text
|
@ -6,12 +6,14 @@ import project.Widgets.Database_Widgets_Spec
|
||||
import project.Widgets.File_Format_Widgets_Spec
|
||||
import project.Widgets.Table_Widgets_Spec
|
||||
import project.Widgets.Text_Widgets_Spec
|
||||
import project.Widgets.Widget_Helpers_Spec
|
||||
|
||||
add_specs suite_builder =
|
||||
Table_Widgets_Spec.add_specs suite_builder
|
||||
Database_Widgets_Spec.add_specs suite_builder
|
||||
File_Format_Widgets_Spec.add_specs suite_builder
|
||||
Text_Widgets_Spec.add_specs suite_builder
|
||||
Widget_Helpers_Spec.add_specs suite_builder
|
||||
|
||||
main filter=Nothing =
|
||||
suite = Test.build suite_builder->
|
||||
|
Loading…
Reference in New Issue
Block a user